diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index ca88c4706f..e9c7d7b270 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -202,8 +202,7 @@ struct acpi_device_flags { u32 coherent_dma:1; u32 cca_seen:1; u32 enumeration_by_parent:1; - u32 honor_deps:1; - u32 reserved:18; + u32 reserved:19; }; /* File System */ @@ -279,14 +278,12 @@ struct acpi_device_power { int state; /* Current state */ struct acpi_device_power_flags flags; struct acpi_device_power_state states[ACPI_D_STATE_COUNT]; /* Power states (D0-D3Cold) */ - u8 state_for_enumeration; /* Deepest power state for enumeration */ }; struct acpi_dep_data { struct list_head node; acpi_handle supplier; acpi_handle consumer; - bool honor_dep; }; /* Performance Management */ @@ -360,7 +357,6 @@ struct acpi_gpio_mapping; /* Device */ struct acpi_device { - u32 pld_crc; int device_type; acpi_handle handle; /* no handle for fixed hardware */ struct fwnode_handle fwnode; @@ -508,7 +504,6 @@ extern int unregister_acpi_notifier(struct notifier_block *); */ int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device); -struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); acpi_status acpi_bus_get_status_handle(acpi_handle handle, unsigned long long *sta); int acpi_bus_get_status(struct acpi_device *device); @@ -575,6 +570,7 @@ struct acpi_bus_type { bool (*match)(struct device *dev); struct acpi_device * (*find_companion)(struct device *); void (*setup)(struct device *); + void (*cleanup)(struct device *); }; int register_acpi_bus_type(struct acpi_bus_type *); int unregister_acpi_bus_type(struct acpi_bus_type *); @@ -618,33 +614,12 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev); #ifdef CONFIG_X86 bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status); -bool acpi_quirk_skip_acpi_ac_and_battery(void); #else static inline bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status) { return false; } -static inline bool acpi_quirk_skip_acpi_ac_and_battery(void) -{ - return false; -} -#endif - -#if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS) -bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev); -int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip); -#else -static inline bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev) -{ - return false; -} -static inline int -acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip) -{ - *skip = false; - return 0; -} #endif #ifdef CONFIG_PM @@ -719,7 +694,6 @@ static inline bool acpi_device_can_poweroff(struct acpi_device *adev) bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2); void acpi_dev_clear_dependencies(struct acpi_device *supplier); -bool acpi_dev_ready_for_enumeration(const struct acpi_device *device); struct acpi_device *acpi_dev_get_first_consumer_dev(struct acpi_device *supplier); struct acpi_device * acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const char *uid, s64 hrv); diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h index b5f594754a..68e4d80c1b 100644 --- a/include/acpi/acpi_numa.h +++ b/include/acpi/acpi_numa.h @@ -3,6 +3,7 @@ #define __ACPI_NUMA_H #ifdef CONFIG_ACPI_NUMA +#include #include /* Proximity bitmap length */ diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 7417731472..fa02e3ff0f 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -12,7 +12,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20211217 +#define ACPI_CA_VERSION 0x20210730 #include #include @@ -454,11 +454,9 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status * ACPI table load/unload interfaces */ ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION - acpi_install_table(struct acpi_table_header *table)) + acpi_install_table(acpi_physical_address address, + u8 physical)) -ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION - acpi_install_physical_table(acpi_physical_address - address)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_load_table(struct acpi_table_header *table, u32 *table_idx)) diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 16847c8d9d..a47b32a5cb 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -24,7 +24,6 @@ * file. Useful because they make it more difficult to inadvertently type in * the wrong signature. */ -#define ACPI_SIG_AGDI "AGDI" /* Arm Generic Diagnostic Dump and Reset Device Interface */ #define ACPI_SIG_BDAT "BDAT" /* BIOS Data ACPI Table */ #define ACPI_SIG_IORT "IORT" /* IO Remapping Table */ #define ACPI_SIG_IVRS "IVRS" /* I/O Virtualization Reporting Structure */ @@ -36,7 +35,6 @@ #define ACPI_SIG_MSCT "MSCT" /* Maximum System Characteristics Table */ #define ACPI_SIG_MSDM "MSDM" /* Microsoft Data Management Table */ #define ACPI_SIG_NFIT "NFIT" /* NVDIMM Firmware Interface Table */ -#define ACPI_SIG_NHLT "NHLT" /* Non HD Audio Link Table */ #define ACPI_SIG_PCCT "PCCT" /* Platform Communications Channel Table */ #define ACPI_SIG_PDTT "PDTT" /* Platform Debug Trigger Table */ #define ACPI_SIG_PHAT "PHAT" /* Platform Health Assessment Table */ @@ -48,8 +46,8 @@ #define ACPI_SIG_SBST "SBST" /* Smart Battery Specification Table */ #define ACPI_SIG_SDEI "SDEI" /* Software Delegated Exception Interface Table */ #define ACPI_SIG_SDEV "SDEV" /* Secure Devices table */ +#define ACPI_SIG_NHLT "NHLT" /* Non-HDAudio Link Table */ #define ACPI_SIG_SVKL "SVKL" /* Storage Volume Key Location Table */ -#define ACPI_SIG_TDEL "TDEL" /* TD Event Log Table */ /* * All tables must be byte-packed to match the ACPI specification, since @@ -156,7 +154,7 @@ typedef struct acpi_aest_processor_tlb { /* 2R: Processor Generic Resource Substructure */ typedef struct acpi_aest_processor_generic { - u32 resource; + u8 *resource; } acpi_aest_processor_generic; @@ -239,25 +237,6 @@ typedef struct acpi_aest_node_interrupt { #define ACPI_AEST_NODE_ERROR_RECOVERY 1 #define ACPI_AEST_XRUPT_RESERVED 2 /* 2 and above are reserved */ -/******************************************************************************* - * AGDI - Arm Generic Diagnostic Dump and Reset Device Interface - * - * Conforms to "ACPI for Arm Components 1.1, Platform Design Document" - * ARM DEN0093 v1.1 - * - ******************************************************************************/ -struct acpi_table_agdi { - struct acpi_table_header header; /* Common ACPI table header */ - u8 flags; - u8 reserved[3]; - u32 sdei_event; - u32 gsiv; -}; - -/* Mask for Flags field above */ - -#define ACPI_AGDI_SIGNALING_MODE (1) - /******************************************************************************* * * BDAT - BIOS Data ACPI Table @@ -999,7 +978,6 @@ struct acpi_madt_multiproc_wakeup_mailbox { /* MADT Local APIC flags */ #define ACPI_MADT_ENABLED (1) /* 00: Processor is usable if set */ -#define ACPI_MADT_ONLINE_CAPABLE (2) /* 01: System HW supports enabling processor at runtime */ /* MADT MPS INTI flags (inti_flags) */ @@ -1431,269 +1409,6 @@ struct nfit_device_handle { #define ACPI_NFIT_GET_NODE_ID(handle) \ (((handle) & ACPI_NFIT_NODE_ID_MASK) >> ACPI_NFIT_NODE_ID_OFFSET) -/******************************************************************************* - * - * NHLT - Non HD Audio Link Table - * - * Conforms to: Intel Smart Sound Technology NHLT Specification - * Version 0.8.1, January 2020. - * - ******************************************************************************/ - -/* Main table */ - -struct acpi_table_nhlt { - struct acpi_table_header header; /* Common ACPI table header */ - u8 endpoint_count; -}; - -struct acpi_nhlt_endpoint { - u32 descriptor_length; - u8 link_type; - u8 instance_id; - u16 vendor_id; - u16 device_id; - u16 revision_id; - u32 subsystem_id; - u8 device_type; - u8 direction; - u8 virtual_bus_id; -}; - -/* Types for link_type field above */ - -#define ACPI_NHLT_RESERVED_HD_AUDIO 0 -#define ACPI_NHLT_RESERVED_DSP 1 -#define ACPI_NHLT_PDM 2 -#define ACPI_NHLT_SSP 3 -#define ACPI_NHLT_RESERVED_SLIMBUS 4 -#define ACPI_NHLT_RESERVED_SOUNDWIRE 5 -#define ACPI_NHLT_TYPE_RESERVED 6 /* 6 and above are reserved */ - -/* All other values above are reserved */ - -/* Values for device_id field above */ - -#define ACPI_NHLT_PDM_DMIC 0xAE20 -#define ACPI_NHLT_BT_SIDEBAND 0xAE30 -#define ACPI_NHLT_I2S_TDM_CODECS 0xAE23 - -/* Values for device_type field above */ - -/* SSP Link */ - -#define ACPI_NHLT_LINK_BT_SIDEBAND 0 -#define ACPI_NHLT_LINK_FM 1 -#define ACPI_NHLT_LINK_MODEM 2 -/* 3 is reserved */ -#define ACPI_NHLT_LINK_SSP_ANALOG_CODEC 4 - -/* PDM Link */ - -#define ACPI_NHLT_PDM_ON_CAVS_1P8 0 -#define ACPI_NHLT_PDM_ON_CAVS_1P5 1 - -/* Values for Direction field above */ - -#define ACPI_NHLT_DIR_RENDER 0 -#define ACPI_NHLT_DIR_CAPTURE 1 -#define ACPI_NHLT_DIR_RENDER_LOOPBACK 2 -#define ACPI_NHLT_DIR_RENDER_FEEDBACK 3 -#define ACPI_NHLT_DIR_RESERVED 4 /* 4 and above are reserved */ - -struct acpi_nhlt_device_specific_config { - u32 capabilities_size; - u8 virtual_slot; - u8 config_type; -}; - -struct acpi_nhlt_device_specific_config_a { - u32 capabilities_size; - u8 virtual_slot; - u8 config_type; - u8 array_type; -}; - -/* Values for Config Type above */ - -#define ACPI_NHLT_CONFIG_TYPE_GENERIC 0x00 -#define ACPI_NHLT_CONFIG_TYPE_MIC_ARRAY 0x01 -#define ACPI_NHLT_CONFIG_TYPE_RENDER_FEEDBACK 0x03 -#define ACPI_NHLT_CONFIG_TYPE_RESERVED 0x04 /* 4 and above are reserved */ - -struct acpi_nhlt_device_specific_config_b { - u32 capabilities_size; -}; - -struct acpi_nhlt_device_specific_config_c { - u32 capabilities_size; - u8 virtual_slot; -}; - -struct acpi_nhlt_render_device_specific_config { - u32 capabilities_size; - u8 virtual_slot; -}; - -struct acpi_nhlt_wave_extensible { - u16 format_tag; - u16 channel_count; - u32 samples_per_sec; - u32 avg_bytes_per_sec; - u16 block_align; - u16 bits_per_sample; - u16 extra_format_size; - u16 valid_bits_per_sample; - u32 channel_mask; - u8 sub_format_guid[16]; -}; - -/* Values for channel_mask above */ - -#define ACPI_NHLT_SPKR_FRONT_LEFT 0x1 -#define ACPI_NHLT_SPKR_FRONT_RIGHT 0x2 -#define ACPI_NHLT_SPKR_FRONT_CENTER 0x4 -#define ACPI_NHLT_SPKR_LOW_FREQ 0x8 -#define ACPI_NHLT_SPKR_BACK_LEFT 0x10 -#define ACPI_NHLT_SPKR_BACK_RIGHT 0x20 -#define ACPI_NHLT_SPKR_FRONT_LEFT_OF_CENTER 0x40 -#define ACPI_NHLT_SPKR_FRONT_RIGHT_OF_CENTER 0x80 -#define ACPI_NHLT_SPKR_BACK_CENTER 0x100 -#define ACPI_NHLT_SPKR_SIDE_LEFT 0x200 -#define ACPI_NHLT_SPKR_SIDE_RIGHT 0x400 -#define ACPI_NHLT_SPKR_TOP_CENTER 0x800 -#define ACPI_NHLT_SPKR_TOP_FRONT_LEFT 0x1000 -#define ACPI_NHLT_SPKR_TOP_FRONT_CENTER 0x2000 -#define ACPI_NHLT_SPKR_TOP_FRONT_RIGHT 0x4000 -#define ACPI_NHLT_SPKR_TOP_BACK_LEFT 0x8000 -#define ACPI_NHLT_SPKR_TOP_BACK_CENTER 0x10000 -#define ACPI_NHLT_SPKR_TOP_BACK_RIGHT 0x20000 - -struct acpi_nhlt_format_config { - struct acpi_nhlt_wave_extensible format; - u32 capability_size; - u8 capabilities[]; -}; - -struct acpi_nhlt_formats_config { - u8 formats_count; -}; - -struct acpi_nhlt_device_specific_hdr { - u8 virtual_slot; - u8 config_type; -}; - -/* Types for config_type above */ - -#define ACPI_NHLT_GENERIC 0 -#define ACPI_NHLT_MIC 1 -#define ACPI_NHLT_RENDER 3 - -struct acpi_nhlt_mic_device_specific_config { - struct acpi_nhlt_device_specific_hdr device_config; - u8 array_type_ext; -}; - -/* Values for array_type_ext above */ - -#define ACPI_NHLT_ARRAY_TYPE_RESERVED 0x09 // 9 and below are reserved -#define ACPI_NHLT_SMALL_LINEAR_2ELEMENT 0x0A -#define ACPI_NHLT_BIG_LINEAR_2ELEMENT 0x0B -#define ACPI_NHLT_FIRST_GEOMETRY_LINEAR_4ELEMENT 0x0C -#define ACPI_NHLT_PLANAR_LSHAPED_4ELEMENT 0x0D -#define ACPI_NHLT_SECOND_GEOMETRY_LINEAR_4ELEMENT 0x0E -#define ACPI_NHLT_VENDOR_DEFINED 0x0F -#define ACPI_NHLT_ARRAY_TYPE_MASK 0x0F -#define ACPI_NHLT_ARRAY_TYPE_EXT_MASK 0x10 - -#define ACPI_NHLT_NO_EXTENSION 0x0 -#define ACPI_NHLT_MIC_SNR_SENSITIVITY_EXT (1<<4) - -struct acpi_nhlt_vendor_mic_count { - u8 microphone_count; -}; - -struct acpi_nhlt_vendor_mic_config { - u8 type; - u8 panel; - u16 speaker_position_distance; // mm - u16 horizontal_offset; // mm - u16 vertical_offset; // mm - u8 frequency_low_band; // 5*hz - u8 frequency_high_band; // 500*hz - u16 direction_angle; // -180 - + 180 - u16 elevation_angle; // -180 - + 180 - u16 work_vertical_angle_begin; // -180 - + 180 with 2 deg step - u16 work_vertical_angle_end; // -180 - + 180 with 2 deg step - u16 work_horizontal_angle_begin; // -180 - + 180 with 2 deg step - u16 work_horizontal_angle_end; // -180 - + 180 with 2 deg step -}; - -/* Values for Type field above */ - -#define ACPI_NHLT_MIC_OMNIDIRECTIONAL 0 -#define ACPI_NHLT_MIC_SUBCARDIOID 1 -#define ACPI_NHLT_MIC_CARDIOID 2 -#define ACPI_NHLT_MIC_SUPER_CARDIOID 3 -#define ACPI_NHLT_MIC_HYPER_CARDIOID 4 -#define ACPI_NHLT_MIC_8_SHAPED 5 -#define ACPI_NHLT_MIC_RESERVED6 6 // 6 is reserved -#define ACPI_NHLT_MIC_VENDOR_DEFINED 7 -#define ACPI_NHLT_MIC_RESERVED 8 // 8 and above are reserved - -/* Values for Panel field above */ - -#define ACPI_NHLT_MIC_POSITION_TOP 0 -#define ACPI_NHLT_MIC_POSITION_BOTTOM 1 -#define ACPI_NHLT_MIC_POSITION_LEFT 2 -#define ACPI_NHLT_MIC_POSITION_RIGHT 3 -#define ACPI_NHLT_MIC_POSITION_FRONT 4 -#define ACPI_NHLT_MIC_POSITION_BACK 5 -#define ACPI_NHLT_MIC_POSITION_RESERVED 6 // 6 and above are reserved - -struct acpi_nhlt_vendor_mic_device_specific_config { - struct acpi_nhlt_mic_device_specific_config mic_array_device_config; - u8 number_of_microphones; - struct acpi_nhlt_vendor_mic_config mic_config[]; // indexed by number_of_microphones -}; - -/* Microphone SNR and Sensitivity extension */ - -struct acpi_nhlt_mic_snr_sensitivity_extension { - u32 SNR; - u32 sensitivity; -}; - -/* Render device with feedback */ - -struct acpi_nhlt_render_feedback_device_specific_config { - u8 feedback_virtual_slot; // render slot in case of capture - u16 feedback_channels; // informative only - u16 feedback_valid_bits_per_sample; -}; - -/* Linux-specific structures */ - -struct acpi_nhlt_linux_specific_count { - u8 structure_count; -}; - -struct acpi_nhlt_linux_specific_data { - u8 device_id[16]; - u8 device_instance_id; - u8 device_port_id; -}; - -struct acpi_nhlt_linux_specific_data_b { - u8 specific_data[18]; -}; - -struct acpi_nhlt_table_terminator { - u32 terminator_value; - u32 terminator_signature; -}; - /******************************************************************************* * * PCCT - Platform Communications Channel Table (ACPI 5.0) @@ -2491,22 +2206,6 @@ enum acpi_svkl_format { ACPI_SVKL_FORMAT_RESERVED = 1 /* 1 and greater are reserved */ }; -/******************************************************************************* - * - * TDEL - TD-Event Log - * From: "Guest-Host-Communication Interface (GHCI) for Intel - * Trust Domain Extensions (Intel TDX)". - * September 2020 - * - ******************************************************************************/ - -struct acpi_table_tdel { - struct acpi_table_header header; /* Common ACPI table header */ - u32 reserved; - u64 log_area_minimum_length; - u64 log_area_start_address; -}; - /* Reset to default packing */ #pragma pack() diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h index edbf1ad820..9125e2f163 100644 --- a/include/acpi/actbl3.h +++ b/include/acpi/actbl3.h @@ -191,8 +191,7 @@ enum acpi_srat_type { ACPI_SRAT_TYPE_GICC_AFFINITY = 3, ACPI_SRAT_TYPE_GIC_ITS_AFFINITY = 4, /* ACPI 6.2 */ ACPI_SRAT_TYPE_GENERIC_AFFINITY = 5, /* ACPI 6.3 */ - ACPI_SRAT_TYPE_GENERIC_PORT_AFFINITY = 6, /* ACPI 6.4 */ - ACPI_SRAT_TYPE_RESERVED = 7 /* 7 and greater are reserved */ + ACPI_SRAT_TYPE_RESERVED = 6 /* 5 and greater are reserved */ }; /* @@ -273,11 +272,7 @@ struct acpi_srat_gic_its_affinity { u32 its_id; }; -/* - * Common structure for SRAT subtable types: - * 5: ACPI_SRAT_TYPE_GENERIC_AFFINITY - * 6: ACPI_SRAT_TYPE_GENERIC_PORT_AFFINITY - */ +/* 5: Generic Initiator Affinity Structure (ACPI 6.3) */ struct acpi_srat_generic_affinity { struct acpi_subtable_header header; diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 69e89d572b..cefbb7ad25 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -509,6 +509,7 @@ typedef u64 acpi_integer; #define ACPI_TO_POINTER(i) ACPI_CAST_PTR (void, (acpi_size) (i)) #define ACPI_TO_INTEGER(p) ACPI_PTR_DIFF (p, (void *) 0) #define ACPI_OFFSET(d, f) ACPI_PTR_DIFF (&(((d *) 0)->f), (void *) 0) +#define ACPI_PHYSADDR_TO_PTR(i) ACPI_TO_POINTER(i) #define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i) /* Optimizations for 4-character (32-bit) acpi_name manipulation */ @@ -1103,14 +1104,6 @@ struct acpi_connection_info { u8 access_length; }; -/* Special Context data for PCC Opregion (ACPI 6.3) */ - -struct acpi_pcc_info { - u8 subspace_id; - u16 length; - u8 *internal_buffer; -}; - typedef acpi_status (*acpi_adr_space_setup) (acpi_handle region_handle, u32 function, @@ -1228,10 +1221,6 @@ struct acpi_mem_space_context { struct acpi_mem_mapping *first_mm; }; -struct acpi_data_table_space_context { - void *pointer; -}; - /* * struct acpi_memory_list is used only if the ACPICA local cache is enabled */ @@ -1298,7 +1287,6 @@ typedef enum { #define ACPI_OSI_WIN_10_RS4 0x12 #define ACPI_OSI_WIN_10_RS5 0x13 #define ACPI_OSI_WIN_10_19H1 0x14 -#define ACPI_OSI_WIN_10_20H1 0x15 /* Definitions of getopt */ diff --git a/include/acpi/apei.h b/include/acpi/apei.h index ece0a8af2b..680f80960c 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h @@ -37,6 +37,9 @@ void __init acpi_hest_init(void); static inline void acpi_hest_init(void) { return; } #endif +typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data); +int apei_hest_parse(apei_hest_func_t func, void *data); + int erst_write(const struct cper_record_header *record); ssize_t erst_get_record_count(void); int erst_get_record_id_begin(int *pos); diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 92b7ea8d8f..bc159a9b4a 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -138,7 +138,6 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); -extern int cppc_set_enable(int cpu, bool enable); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); extern bool acpi_cpc_valid(void); extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); @@ -163,10 +162,6 @@ static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) { return -ENOTSUPP; } -static inline int cppc_set_enable(int cpu, bool enable) -{ - return -ENOTSUPP; -} static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps) { return -ENOTSUPP; diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h index 73e806fe7c..4dec4ed138 100644 --- a/include/acpi/pcc.h +++ b/include/acpi/pcc.h @@ -9,27 +9,18 @@ #include #include -struct pcc_mbox_chan { - struct mbox_chan *mchan; - u64 shmem_base_addr; - u64 shmem_size; - u32 latency; - u32 max_access_rate; - u16 min_turnaround_time; -}; - #define MAX_PCC_SUBSPACES 256 #ifdef CONFIG_PCC -extern struct pcc_mbox_chan * -pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id); -extern void pcc_mbox_free_channel(struct pcc_mbox_chan *chan); +extern struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, + int subspace_id); +extern void pcc_mbox_free_channel(struct mbox_chan *chan); #else -static inline struct pcc_mbox_chan * -pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id) +static inline struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, + int subspace_id) { return ERR_PTR(-ENODEV); } -static inline void pcc_mbox_free_channel(struct pcc_mbox_chan *chan) { } +static inline void pcc_mbox_free_channel(struct mbox_chan *chan) { } #endif #endif /* _PCC_H */ diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index 33ad282bd3..20ecb004f5 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -10,12 +10,25 @@ #ifndef __ACGCC_H__ #define __ACGCC_H__ +/* + * Use compiler specific is a good practice for even when + * -nostdinc is specified (i.e., ACPI_USE_STANDARD_HEADERS undefined. + */ #ifndef va_arg +#ifdef ACPI_USE_BUILTIN_STDARG +typedef __builtin_va_list va_list; +#define va_start(v, l) __builtin_va_start(v, l) +#define va_end(v) __builtin_va_end(v) +#define va_arg(v, l) __builtin_va_arg(v, l) +#define va_copy(d, s) __builtin_va_copy(d, s) +#else #ifdef __KERNEL__ #include #else +/* Used to build acpi tools */ #include #endif /* __KERNEL__ */ +#endif /* ACPI_USE_BUILTIN_STDARG */ #endif /* ! va_arg */ #define ACPI_INLINE __inline__ diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 1940273719..683e124ad5 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -2,16 +2,11 @@ #ifndef __ACPI_PROCESSOR_H #define __ACPI_PROCESSOR_H +#include #include #include #include -#include -#include -#include #include -#include -#include - #include #define ACPI_PROCESSOR_CLASS "processor" diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index fd7e8fbaee..640f09479b 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -14,38 +14,12 @@ #ifndef __ASSEMBLY__ #include -#include #include #ifndef nop #define nop() asm volatile ("nop") #endif -/* - * Architectures that want generic instrumentation can define __ prefixed - * variants of all barriers. - */ - -#ifdef __mb -#define mb() do { kcsan_mb(); __mb(); } while (0) -#endif - -#ifdef __rmb -#define rmb() do { kcsan_rmb(); __rmb(); } while (0) -#endif - -#ifdef __wmb -#define wmb() do { kcsan_wmb(); __wmb(); } while (0) -#endif - -#ifdef __dma_rmb -#define dma_rmb() do { kcsan_rmb(); __dma_rmb(); } while (0) -#endif - -#ifdef __dma_wmb -#define dma_wmb() do { kcsan_wmb(); __dma_wmb(); } while (0) -#endif - /* * Force strict CPU ordering. And yes, this is required on UP too when we're * talking to devices. @@ -88,15 +62,15 @@ #ifdef CONFIG_SMP #ifndef smp_mb -#define smp_mb() do { kcsan_mb(); __smp_mb(); } while (0) +#define smp_mb() __smp_mb() #endif #ifndef smp_rmb -#define smp_rmb() do { kcsan_rmb(); __smp_rmb(); } while (0) +#define smp_rmb() __smp_rmb() #endif #ifndef smp_wmb -#define smp_wmb() do { kcsan_wmb(); __smp_wmb(); } while (0) +#define smp_wmb() __smp_wmb() #endif #else /* !CONFIG_SMP */ @@ -149,19 +123,19 @@ do { \ #ifdef CONFIG_SMP #ifndef smp_store_mb -#define smp_store_mb(var, value) do { kcsan_mb(); __smp_store_mb(var, value); } while (0) +#define smp_store_mb(var, value) __smp_store_mb(var, value) #endif #ifndef smp_mb__before_atomic -#define smp_mb__before_atomic() do { kcsan_mb(); __smp_mb__before_atomic(); } while (0) +#define smp_mb__before_atomic() __smp_mb__before_atomic() #endif #ifndef smp_mb__after_atomic -#define smp_mb__after_atomic() do { kcsan_mb(); __smp_mb__after_atomic(); } while (0) +#define smp_mb__after_atomic() __smp_mb__after_atomic() #endif #ifndef smp_store_release -#define smp_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0) +#define smp_store_release(p, v) __smp_store_release(p, v) #endif #ifndef smp_load_acquire @@ -204,13 +178,13 @@ do { \ #endif /* CONFIG_SMP */ /* Barriers for virtual machine guests when talking to an SMP host */ -#define virt_mb() do { kcsan_mb(); __smp_mb(); } while (0) -#define virt_rmb() do { kcsan_rmb(); __smp_rmb(); } while (0) -#define virt_wmb() do { kcsan_wmb(); __smp_wmb(); } while (0) -#define virt_store_mb(var, value) do { kcsan_mb(); __smp_store_mb(var, value); } while (0) -#define virt_mb__before_atomic() do { kcsan_mb(); __smp_mb__before_atomic(); } while (0) -#define virt_mb__after_atomic() do { kcsan_mb(); __smp_mb__after_atomic(); } while (0) -#define virt_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0) +#define virt_mb() __smp_mb() +#define virt_rmb() __smp_rmb() +#define virt_wmb() __smp_wmb() +#define virt_store_mb(var, value) __smp_store_mb(var, value) +#define virt_mb__before_atomic() __smp_mb__before_atomic() +#define virt_mb__after_atomic() __smp_mb__after_atomic() +#define virt_store_release(p, v) __smp_store_release(p, v) #define virt_load_acquire(p) __smp_load_acquire(p) /** @@ -277,16 +251,5 @@ do { \ #define pmem_wmb() wmb() #endif -/* - * ioremap_wc() maps I/O memory as memory with write-combining attributes. For - * this kind of memory accesses, the CPU may wait for prior accesses to be - * merged with subsequent ones. In some situation, such wait is bad for the - * performance. io_stop_wc() can be used to prevent the merging of - * write-combining memory accesses before this macro with those after it. - */ -#ifndef io_stop_wc -#define io_stop_wc() do { } while (0) -#endif - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h index a47b8a71d6..df9b5bc3d2 100644 --- a/include/asm-generic/bitops.h +++ b/include/asm-generic/bitops.h @@ -20,6 +20,7 @@ #include #include #include +#include #ifndef _LINUX_BITOPS_H #error only can be included directly diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 998d4d544f..835f959a25 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -1,6 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_GENERIC_BITOPS_FIND_H_ #define _ASM_GENERIC_BITOPS_FIND_H_ +extern unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert, unsigned long le); +extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); + #ifndef find_next_bit /** * find_next_bit - find the next set bit in a memory region @@ -11,8 +19,52 @@ * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -extern unsigned long find_next_bit(const unsigned long *addr, unsigned long - size, unsigned long offset); +static inline +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr, NULL, size, offset, 0UL, 0); +} +#endif + +#ifndef find_next_and_bit +/** + * find_next_and_bit - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr1 & *addr2 & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr1, addr2, size, offset, 0UL, 0); +} #endif #ifndef find_next_zero_bit @@ -25,12 +77,27 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long * Returns the bit number of the next zero bit * If no bits are zero, returns @size. */ -extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned - long size, unsigned long offset); +static inline +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + + return _find_next_bit(addr, NULL, size, offset, ~0UL, 0); +} #endif #ifdef CONFIG_GENERIC_FIND_FIRST_BIT +#ifndef find_first_bit /** * find_first_bit - find the first set bit in a memory region * @addr: The address to start the search at @@ -39,9 +106,20 @@ extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned * Returns the bit number of the first set bit. * If no bits are set, returns @size. */ -extern unsigned long find_first_bit(const unsigned long *addr, - unsigned long size); +static inline +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + return val ? __ffs(val) : size; + } + + return _find_first_bit(addr, size); +} +#endif + +#ifndef find_first_zero_bit /** * find_first_zero_bit - find the first cleared bit in a memory region * @addr: The address to start the search at @@ -50,13 +128,66 @@ extern unsigned long find_first_bit(const unsigned long *addr, * Returns the bit number of the first cleared bit. * If no bits are zero, returns @size. */ -extern unsigned long find_first_zero_bit(const unsigned long *addr, - unsigned long size); +static inline +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr | ~GENMASK(size - 1, 0); + + return val == ~0UL ? size : ffz(val); + } + + return _find_first_zero_bit(addr, size); +} +#endif + #else /* CONFIG_GENERIC_FIND_FIRST_BIT */ +#ifndef find_first_bit #define find_first_bit(addr, size) find_next_bit((addr), (size), 0) +#endif +#ifndef find_first_zero_bit #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) +#endif #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ +#ifndef find_last_bit +/** + * find_last_bit - find the last set bit in a memory region + * @addr: The address to start the search at + * @size: The number of bits to search + * + * Returns the bit number of the last set bit, or size. + */ +static inline +unsigned long find_last_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? __fls(val) : size; + } + + return _find_last_bit(addr, size); +} +#endif + +/** + * find_next_clump8 - find next 8-bit clump with set bits in a memory region + * @clump: location to store copy of found clump + * @addr: address to base the search on + * @size: bitmap size in number of bits + * @offset: bit offset at which to start searching + * + * Returns the bit offset for the next set clump; the found clump value is + * copied to the location pointed by @clump. If no bits are set, returns @size. + */ +extern unsigned long find_next_clump8(unsigned long *clump, + const unsigned long *addr, + unsigned long size, unsigned long offset); + +#define find_first_clump8(clump, bits, size) \ + find_next_clump8((clump), (bits), (size), 0) + #endif /*_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/include/asm-generic/bitops/instrumented-atomic.h b/include/asm-generic/bitops/instrumented-atomic.h index c90192b1c7..81915dcd4b 100644 --- a/include/asm-generic/bitops/instrumented-atomic.h +++ b/include/asm-generic/bitops/instrumented-atomic.h @@ -67,7 +67,6 @@ static inline void change_bit(long nr, volatile unsigned long *addr) */ static inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { - kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); return arch_test_and_set_bit(nr, addr); } @@ -81,7 +80,6 @@ static inline bool test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { - kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); return arch_test_and_clear_bit(nr, addr); } @@ -95,7 +93,6 @@ static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) */ static inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { - kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); return arch_test_and_change_bit(nr, addr); } diff --git a/include/asm-generic/bitops/instrumented-lock.h b/include/asm-generic/bitops/instrumented-lock.h index eb64bd4f11..75ef606f71 100644 --- a/include/asm-generic/bitops/instrumented-lock.h +++ b/include/asm-generic/bitops/instrumented-lock.h @@ -22,7 +22,6 @@ */ static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) { - kcsan_release(); instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); arch_clear_bit_unlock(nr, addr); } @@ -38,7 +37,6 @@ static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) */ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) { - kcsan_release(); instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___clear_bit_unlock(nr, addr); } @@ -73,7 +71,6 @@ static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr) static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) { - kcsan_release(); instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); return arch_clear_bit_unlock_is_negative_byte(nr, addr); } diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h index d51beff603..5a28629cbf 100644 --- a/include/asm-generic/bitops/le.h +++ b/include/asm-generic/bitops/le.h @@ -2,19 +2,83 @@ #ifndef _ASM_GENERIC_BITOPS_LE_H_ #define _ASM_GENERIC_BITOPS_LE_H_ +#include #include #include +#include #if defined(__LITTLE_ENDIAN) #define BITOP_LE_SWIZZLE 0 +static inline unsigned long find_next_zero_bit_le(const void *addr, + unsigned long size, unsigned long offset) +{ + return find_next_zero_bit(addr, size, offset); +} + +static inline unsigned long find_next_bit_le(const void *addr, + unsigned long size, unsigned long offset) +{ + return find_next_bit(addr, size, offset); +} + +static inline unsigned long find_first_zero_bit_le(const void *addr, + unsigned long size) +{ + return find_first_zero_bit(addr, size); +} + #elif defined(__BIG_ENDIAN) #define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) +#ifndef find_next_zero_bit_le +static inline +unsigned long find_next_zero_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + + return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); +} #endif +#ifndef find_next_bit_le +static inline +unsigned long find_next_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr, NULL, size, offset, 0UL, 1); +} +#endif + +#ifndef find_first_zero_bit_le +#define find_first_zero_bit_le(addr, size) \ + find_next_zero_bit_le((addr), (size), 0) +#endif + +#else +#error "Please fix " +#endif static inline int test_bit_le(int nr, const void *addr) { diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h index fbca56bd9c..7ddd9dc10c 100644 --- a/include/asm-generic/error-injection.h +++ b/include/asm-generic/error-injection.h @@ -20,7 +20,7 @@ struct pt_regs; #ifdef CONFIG_FUNCTION_ERROR_INJECTION /* - * Whitelist generating macro. Specify functions which can be + * Whitelist ganerating macro. Specify functions which can be * error-injectable using this macro. */ #define ALLOW_ERROR_INJECTION(fname, _etype) \ @@ -29,7 +29,7 @@ static struct error_injection_entry __used \ _eil_addr_##fname = { \ .addr = (unsigned long)fname, \ .etype = EI_ETYPE_##_etype, \ - } + }; void override_function_with_return(struct pt_regs *regs); #else diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index 2a19215baa..f4c3470480 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -6,22 +6,15 @@ #include #include -#ifndef futex_atomic_cmpxchg_inatomic #ifndef CONFIG_SMP /* * The following implementation only for uniprocessor machines. * It relies on preempt_disable() ensuring mutual exclusion. * */ -#define futex_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval) \ - futex_atomic_cmpxchg_inatomic_local(uval, uaddr, oldval, newval) -#define arch_futex_atomic_op_inuser(op, oparg, oval, uaddr) \ - futex_atomic_op_inuser_local(op, oparg, oval, uaddr) -#endif /* CONFIG_SMP */ -#endif /** - * futex_atomic_op_inuser_local() - Atomic arithmetic operation with constant + * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant * argument and comparison of the previous * futex value with another constant. * @@ -35,7 +28,7 @@ * -ENOSYS - Operation not supported */ static inline int -futex_atomic_op_inuser_local(int op, u32 oparg, int *oval, u32 __user *uaddr) +arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) { int oldval, ret; u32 tmp; @@ -82,7 +75,7 @@ futex_atomic_op_inuser_local(int op, u32 oparg, int *oval, u32 __user *uaddr) } /** - * futex_atomic_cmpxchg_inatomic_local() - Compare and exchange the content of the + * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the * uaddr with newval if the current value is * oldval. * @uval: pointer to store content of @uaddr @@ -94,9 +87,10 @@ futex_atomic_op_inuser_local(int op, u32 oparg, int *oval, u32 __user *uaddr) * 0 - On success * -EFAULT - User access resulted in a page fault * -EAGAIN - Atomic operation was unable to complete due to contention + * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG) */ static inline int -futex_atomic_cmpxchg_inatomic_local(u32 *uval, u32 __user *uaddr, +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { u32 val; @@ -118,4 +112,19 @@ futex_atomic_cmpxchg_inatomic_local(u32 *uval, u32 __user *uaddr, return 0; } +#else +static inline int +arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) +{ + return -ENOSYS; +} + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + return -ENOSYS; +} + +#endif /* CONFIG_SMP */ #endif diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 8f97c2927b..56348a541c 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -158,7 +158,6 @@ struct ms_hyperv_tsc_page { #define HVCALL_RETARGET_INTERRUPT 0x007e #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 -#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db /* Extended hypercalls */ #define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001 @@ -540,6 +539,39 @@ enum hv_interrupt_source { HV_INTERRUPT_SOURCE_IOAPIC, }; +union hv_msi_address_register { + u32 as_uint32; + struct { + u32 reserved1:2; + u32 destination_mode:1; + u32 redirection_hint:1; + u32 reserved2:8; + u32 destination_id:8; + u32 msi_base:12; + }; +} __packed; + +union hv_msi_data_register { + u32 as_uint32; + struct { + u32 vector:8; + u32 delivery_mode:3; + u32 reserved1:3; + u32 level_assert:1; + u32 trigger_mode:1; + u32 reserved2:16; + }; +} __packed; + +/* HvRetargetDeviceInterrupt hypercall */ +union hv_msi_entry { + u64 as_uint64; + struct { + union hv_msi_address_register address; + union hv_msi_data_register data; + } __packed; +}; + union hv_ioapic_rte { u64 as_uint64; diff --git a/include/asm-generic/logic_io.h b/include/asm-generic/logic_io.h index 8a59b6e567..a53116b8c5 100644 --- a/include/asm-generic/logic_io.h +++ b/include/asm-generic/logic_io.h @@ -34,7 +34,7 @@ void __iomem *ioremap(phys_addr_t offset, size_t size); #define iounmap iounmap -void iounmap(void volatile __iomem *addr); +void iounmap(void __iomem *addr); #define __raw_readb __raw_readb u8 __raw_readb(const volatile void __iomem *addr); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index c08758b6b3..d3eae6cdba 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -35,26 +35,15 @@ struct ms_hyperv_info { u32 max_vp_index; u32 max_lp_index; u32 isolation_config_a; - union { - u32 isolation_config_b; - struct { - u32 cvm_type : 4; - u32 reserved1 : 1; - u32 shared_gpa_boundary_active : 1; - u32 shared_gpa_boundary_bits : 6; - u32 reserved2 : 20; - }; - }; - u64 shared_gpa_boundary; + u32 isolation_config_b; }; extern struct ms_hyperv_info ms_hyperv; -extern void * __percpu *hyperv_pcpu_input_arg; -extern void * __percpu *hyperv_pcpu_output_arg; +extern void __percpu **hyperv_pcpu_input_arg; +extern void __percpu **hyperv_pcpu_output_arg; extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); -extern bool hv_isolation_type_snp(void); /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */ static inline int hv_result(u64 status) @@ -265,21 +254,12 @@ bool hv_is_hyperv_initialized(void); bool hv_is_hibernation_supported(void); enum hv_isolation_type hv_get_isolation_type(void); bool hv_is_isolation_supported(void); -bool hv_isolation_type_snp(void); -u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size); void hyperv_cleanup(void); bool hv_query_ext_cap(u64 cap_query); -void *hv_map_memory(void *addr, unsigned long size); -void hv_unmap_memory(void *addr); #else /* CONFIG_HYPERV */ static inline bool hv_is_hyperv_initialized(void) { return false; } static inline bool hv_is_hibernation_supported(void) { return false; } static inline void hyperv_cleanup(void) {} -static inline bool hv_is_isolation_supported(void) { return false; } -static inline enum hv_isolation_type hv_get_isolation_type(void) -{ - return HV_ISOLATION_TYPE_NONE; -} #endif /* CONFIG_HYPERV */ #endif diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 977bea16cf..02932efad3 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -147,15 +147,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #if CONFIG_PGTABLE_LEVELS > 3 -static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - gfp_t gfp = GFP_PGTABLE_USER; - - if (mm == &init_mm) - gfp = GFP_PGTABLE_KERNEL; - return (pud_t *)get_zeroed_page(gfp); -} - #ifndef __HAVE_ARCH_PUD_ALLOC_ONE /** * pud_alloc_one - allocate a page for PUD-level page table @@ -168,23 +159,20 @@ static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) */ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return __pud_alloc_one(mm, addr); + gfp_t gfp = GFP_PGTABLE_USER; + + if (mm == &init_mm) + gfp = GFP_PGTABLE_KERNEL; + return (pud_t *)get_zeroed_page(gfp); } #endif -static inline void __pud_free(struct mm_struct *mm, pud_t *pud) +static inline void pud_free(struct mm_struct *mm, pud_t *pud) { BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); free_page((unsigned long)pud); } -#ifndef __HAVE_ARCH_PUD_FREE -static inline void pud_free(struct mm_struct *mm, pud_t *pud) -{ - __pud_free(mm, pud); -} -#endif - #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #ifndef __HAVE_ARCH_PGD_FREE diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 690f741764..d16302d3eb 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -64,6 +64,36 @@ extern __visible const void __nosave_begin, __nosave_end; #define dereference_kernel_function_descriptor(p) ((void *)(p)) #endif +/* random extra sections (if any). Override + * in asm/sections.h */ +#ifndef arch_is_kernel_text +static inline int arch_is_kernel_text(unsigned long addr) +{ + return 0; +} +#endif + +#ifndef arch_is_kernel_data +static inline int arch_is_kernel_data(unsigned long addr) +{ + return 0; +} +#endif + +/* + * Check if an address is part of freed initmem. This is needed on architectures + * with virt == phys kernel mapping, for code that wants to check if an address + * is part of a static object within [_stext, _end]. After initmem is freed, + * memory can be allocated from it, and such allocations would then have + * addresses within the range [_stext, _end]. + */ +#ifndef arch_is_kernel_initmem_freed +static inline int arch_is_kernel_initmem_freed(unsigned long addr) +{ + return 0; +} +#endif + /** * memory_contains - checks if an object is contained within a memory region * @begin: virtual address of the beginning of the memory region @@ -128,28 +158,6 @@ static inline bool init_section_intersects(void *virt, size_t size) return memory_intersects(__init_begin, __init_end, virt, size); } -/** - * is_kernel_core_data - checks if the pointer address is located in the - * .data or .bss section - * - * @addr: address to check - * - * Returns: true if the address is located in .data or .bss, false otherwise. - * Note: On some archs it may return true for core RODATA, and false - * for others. But will always be true for core RW data. - */ -static inline bool is_kernel_core_data(unsigned long addr) -{ - if (addr >= (unsigned long)_sdata && addr < (unsigned long)_edata) - return true; - - if (addr >= (unsigned long)__bss_start && - addr < (unsigned long)__bss_stop) - return true; - - return false; -} - /** * is_kernel_rodata - checks if the pointer address is located in the * .rodata section @@ -164,51 +172,4 @@ static inline bool is_kernel_rodata(unsigned long addr) addr < (unsigned long)__end_rodata; } -/** - * is_kernel_inittext - checks if the pointer address is located in the - * .init.text section - * - * @addr: address to check - * - * Returns: true if the address is located in .init.text, false otherwise. - */ -static inline bool is_kernel_inittext(unsigned long addr) -{ - return addr >= (unsigned long)_sinittext && - addr < (unsigned long)_einittext; -} - -/** - * __is_kernel_text - checks if the pointer address is located in the - * .text section - * - * @addr: address to check - * - * Returns: true if the address is located in .text, false otherwise. - * Note: an internal helper, only check the range of _stext to _etext. - */ -static inline bool __is_kernel_text(unsigned long addr) -{ - return addr >= (unsigned long)_stext && - addr < (unsigned long)_etext; -} - -/** - * __is_kernel - checks if the pointer address is located in the kernel range - * - * @addr: address to check - * - * Returns: true if the address is located in the kernel range, false otherwise. - * Note: an internal helper, check the range of _stext to _end, - * and range from __init_begin to __init_end, which can be outside - * of the _stext to _end range. - */ -static inline bool __is_kernel(unsigned long addr) -{ - return ((addr >= (unsigned long)_stext && - addr < (unsigned long)_end) || - (addr >= (unsigned long)__init_begin && - addr < (unsigned long)__init_end)); -} - #endif /* _ASM_GENERIC_SECTIONS_H_ */ diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 81695eb02a..524218ae38 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -117,6 +117,22 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args); +/** + * syscall_set_arguments - change system call parameter value + * @task: task of interest, must be in system call entry tracing + * @regs: task_pt_regs() of @task + * @args: array of argument values to store + * + * Changes 6 arguments to the system call. + * The first argument gets value @args[0], and so on. + * + * It's only valid to call this when @task is stopped for tracing on + * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_AUDIT. + */ +void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, + const unsigned long *args); + /** * syscall_get_arch - return the AUDIT_ARCH for the current system call * @task: task of interest, must be blocked diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 42f3866bca..f2984af2b8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -164,22 +164,16 @@ * Need to also make ftrace_stub_graph point to ftrace_stub * so that the same stub location may have different protocols * and not mess up with C verifiers. - * - * ftrace_ops_list_func will be defined as arch_ftrace_ops_list_func - * as some archs will have a different prototype for that function - * but ftrace_ops_list_func() will have a single prototype. */ #define MCOUNT_REC() . = ALIGN(8); \ __start_mcount_loc = .; \ KEEP(*(__mcount_loc)) \ KEEP(*(__patchable_function_entries)) \ __stop_mcount_loc = .; \ - ftrace_stub_graph = ftrace_stub; \ - ftrace_ops_list_func = arch_ftrace_ops_list_func; + ftrace_stub_graph = ftrace_stub; #else # ifdef CONFIG_FUNCTION_TRACER -# define MCOUNT_REC() ftrace_stub_graph = ftrace_stub; \ - ftrace_ops_list_func = arch_ftrace_ops_list_func; +# define MCOUNT_REC() ftrace_stub_graph = ftrace_stub; # else # define MCOUNT_REC() # endif @@ -476,7 +470,13 @@ __end_pci_fixups_suspend_late = .; \ } \ \ - FW_LOADER_BUILT_IN_DATA \ + /* Built-in firmware blobs */ \ + .builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) ALIGN(8) { \ + __start_builtin_fw = .; \ + KEEP(*(.builtin_fw)) \ + __end_builtin_fw = .; \ + } \ + \ TRACEDATA \ \ PRINTK_INDEX \ @@ -869,11 +869,10 @@ KEEP(*(.orc_unwind)) \ __stop_orc_unwind = .; \ } \ - text_size = _etext - _stext; \ . = ALIGN(4); \ .orc_lookup : AT(ADDR(.orc_lookup) - LOAD_OFFSET) { \ orc_lookup = .; \ - . += (((text_size + LOOKUP_BLOCK_SIZE - 1) / \ + . += (((SIZEOF(.text) + LOOKUP_BLOCK_SIZE - 1) / \ LOOKUP_BLOCK_SIZE) + 1) * 4; \ orc_lookup_end = .; \ } @@ -881,18 +880,6 @@ #define ORC_UNWIND_TABLE #endif -/* Built-in firmware blobs */ -#ifdef CONFIG_FW_LOADER -#define FW_LOADER_BUILT_IN_DATA \ - .builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) ALIGN(8) { \ - __start_builtin_fw = .; \ - KEEP(*(.builtin_fw)) \ - __end_builtin_fw = .; \ - } -#else -#define FW_LOADER_BUILT_IN_DATA -#endif - #ifdef CONFIG_PM_TRACE #define TRACEDATA \ . = ALIGN(4); \ diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 14db3bee05..5af914c1ab 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -8,10 +8,9 @@ #ifndef _CRYPTO_AEAD_H #define _CRYPTO_AEAD_H -#include #include +#include #include -#include /** * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API @@ -74,7 +73,6 @@ */ struct crypto_aead; -struct scatterlist; /** * struct aead_request - AEAD request diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index f76ec723ce..5f6841c73e 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -7,11 +7,9 @@ #ifndef _CRYPTO_ALGAPI_H #define _CRYPTO_ALGAPI_H -#include #include -#include #include -#include +#include /* * Maximum values for blocksize and alignmask, used to allocate @@ -26,7 +24,6 @@ struct crypto_aead; struct crypto_instance; struct module; -struct notifier_block; struct rtattr; struct seq_file; struct sk_buff; diff --git a/include/crypto/blake2b.h b/include/crypto/blake2b.h index 0c01762853..18875f16f8 100644 --- a/include/crypto/blake2b.h +++ b/include/crypto/blake2b.h @@ -5,6 +5,7 @@ #include #include +#include #include enum blake2b_lengths { diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h index f9ffd39194..bc3fb59442 100644 --- a/include/crypto/blake2s.h +++ b/include/crypto/blake2s.h @@ -7,8 +7,8 @@ #define _CRYPTO_BLAKE2S_H #include -#include #include +#include #include enum blake2s_lengths { @@ -101,4 +101,7 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key, blake2s_final(&state, out); } +void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, + const size_t keylen); + #endif /* _CRYPTO_BLAKE2S_H */ diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index b3ea73b819..dabaee6987 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -47,19 +47,12 @@ static inline void hchacha_block(const u32 *state, u32 *out, int nrounds) hchacha_block_generic(state, out, nrounds); } -enum chacha_constants { /* expand 32-byte k */ - CHACHA_CONSTANT_EXPA = 0x61707865U, - CHACHA_CONSTANT_ND_3 = 0x3320646eU, - CHACHA_CONSTANT_2_BY = 0x79622d32U, - CHACHA_CONSTANT_TE_K = 0x6b206574U -}; - static inline void chacha_init_consts(u32 *state) { - state[0] = CHACHA_CONSTANT_EXPA; - state[1] = CHACHA_CONSTANT_ND_3; - state[2] = CHACHA_CONSTANT_2_BY; - state[3] = CHACHA_CONSTANT_TE_K; + state[0] = 0x61707865; /* "expa" */ + state[1] = 0x3320646e; /* "nd 3" */ + state[2] = 0x79622d32; /* "2-by" */ + state[3] = 0x6b206574; /* "te k" */ } void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv); diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index af5ad51d3e..c416512693 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -105,12 +105,6 @@ struct drbg_test_data { struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */ }; -enum drbg_seed_state { - DRBG_SEED_STATE_UNSEEDED, - DRBG_SEED_STATE_PARTIAL, /* Seeded with !rng_is_initialized() */ - DRBG_SEED_STATE_FULL, -}; - struct drbg_state { struct mutex drbg_mutex; /* lock around DRBG */ unsigned char *V; /* internal state 10.1.1.1 1a) */ @@ -133,15 +127,16 @@ struct drbg_state { struct crypto_wait ctr_wait; /* CTR mode async wait obj */ struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ - enum drbg_seed_state seeded; /* DRBG fully seeded? */ - unsigned long last_seed_time; + bool seeded; /* DRBG fully seeded? */ bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ + struct work_struct seed_work; /* asynchronous seeding support */ struct crypto_rng *jent; const struct drbg_state_ops *d_ops; const struct drbg_core *core; struct drbg_string test_data; + struct random_ready_callback random_ready; }; static inline __u8 drbg_statelen(struct drbg_state *drbg) diff --git a/include/crypto/engine.h b/include/crypto/engine.h index ae133e98d8..26cac19b0f 100644 --- a/include/crypto/engine.h +++ b/include/crypto/engine.h @@ -9,18 +9,13 @@ #include #include +#include #include -#include -#include - #include #include #include #include #include -#include - -struct device; #define ENGINE_NAME_LEN 30 /* @@ -101,8 +96,6 @@ int crypto_transfer_akcipher_request_to_engine(struct crypto_engine *engine, struct akcipher_request *req); int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine, struct ahash_request *req); -int crypto_transfer_kpp_request_to_engine(struct crypto_engine *engine, - struct kpp_request *req); int crypto_transfer_skcipher_request_to_engine(struct crypto_engine *engine, struct skcipher_request *req); void crypto_finalize_aead_request(struct crypto_engine *engine, @@ -111,8 +104,6 @@ void crypto_finalize_akcipher_request(struct crypto_engine *engine, struct akcipher_request *req, int err); void crypto_finalize_hash_request(struct crypto_engine *engine, struct ahash_request *req, int err); -void crypto_finalize_kpp_request(struct crypto_engine *engine, - struct kpp_request *req, int err); void crypto_finalize_skcipher_request(struct crypto_engine *engine, struct skcipher_request *req, int err); int crypto_engine_start(struct crypto_engine *engine); diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 52363eee2b..8e50d48750 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -11,11 +11,11 @@ #include #include -void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, +void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, size_t nblocks, const u32 inc); -void blake2s_compress(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc); +void blake2s_compress_arch(struct blake2s_state *state,const u8 *block, + size_t nblocks, const u32 inc); bool blake2s_selftest(void); @@ -24,11 +24,14 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state) state->f[0] = -1; } +typedef void (*blake2s_compress_t)(struct blake2s_state *state, + const u8 *block, size_t nblocks, u32 inc); + /* Helper functions for BLAKE2s shared by the library and shash APIs */ -static __always_inline void -__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, - bool force_generic) +static inline void __blake2s_update(struct blake2s_state *state, + const u8 *in, size_t inlen, + blake2s_compress_t compress) { const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; @@ -36,12 +39,7 @@ __blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, return; if (inlen > fill) { memcpy(state->buf + state->buflen, in, fill); - if (force_generic) - blake2s_compress_generic(state, state->buf, 1, - BLAKE2S_BLOCK_SIZE); - else - blake2s_compress(state, state->buf, 1, - BLAKE2S_BLOCK_SIZE); + (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); state->buflen = 0; in += fill; inlen -= fill; @@ -49,12 +47,7 @@ __blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, if (inlen > BLAKE2S_BLOCK_SIZE) { const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); /* Hash one less (full) block than strictly possible */ - if (force_generic) - blake2s_compress_generic(state, in, nblocks - 1, - BLAKE2S_BLOCK_SIZE); - else - blake2s_compress(state, in, nblocks - 1, - BLAKE2S_BLOCK_SIZE); + (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); } @@ -62,16 +55,13 @@ __blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, state->buflen += inlen; } -static __always_inline void -__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic) +static inline void __blake2s_final(struct blake2s_state *state, u8 *out, + blake2s_compress_t compress) { blake2s_set_lastblock(state); memset(state->buf + state->buflen, 0, BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ - if (force_generic) - blake2s_compress_generic(state, state->buf, 1, state->buflen); - else - blake2s_compress(state, state->buf, 1, state->buflen); + (*compress)(state, state->buf, 1, state->buflen); cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); memcpy(out, state->h, state->outlen); } @@ -109,20 +99,20 @@ static inline int crypto_blake2s_init(struct shash_desc *desc) static inline int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, unsigned int inlen, - bool force_generic) + blake2s_compress_t compress) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_update(state, in, inlen, force_generic); + __blake2s_update(state, in, inlen, compress); return 0; } static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, - bool force_generic) + blake2s_compress_t compress) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_final(state, out, force_generic); + __blake2s_final(state, out, compress); return 0; } diff --git a/include/crypto/pcrypt.h b/include/crypto/pcrypt.h index 234d7cf3cf..b9bc343619 100644 --- a/include/crypto/pcrypt.h +++ b/include/crypto/pcrypt.h @@ -9,8 +9,8 @@ #ifndef _CRYPTO_PCRYPT_H #define _CRYPTO_PCRYPT_H -#include #include +#include #include struct pcrypt_request { diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 6407b4b613..7af08174a7 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -12,9 +12,8 @@ #define _CRYPTO_SCATTERWALK_H #include - #include -#include +#include #include static inline void scatterwalk_crypto_chain(struct scatterlist *head, diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 39f5b67c30..ef0fc9ed43 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -8,13 +8,9 @@ #ifndef _CRYPTO_SKCIPHER_H #define _CRYPTO_SKCIPHER_H -#include #include +#include #include -#include -#include - -struct scatterlist; /** * struct skcipher_request - Symmetric key cipher request diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h index 90b69270f2..0f66a0d9f0 100644 --- a/include/drm/amd_asic_type.h +++ b/include/drm/amd_asic_type.h @@ -62,7 +62,6 @@ enum amd_asic_type { CHIP_DIMGREY_CAVEFISH, /* 33 */ CHIP_BEIGE_GOBY, /* 34 */ CHIP_YELLOW_CARP, /* 35 */ - CHIP_IP_DISCOVERY, /* 36 */ CHIP_LAST, }; diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 5457b16851..e1466106b3 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -718,14 +718,6 @@ enum drm_bridge_ops { * this flag shall implement the &drm_bridge_funcs->get_modes callback. */ DRM_BRIDGE_OP_MODES = BIT(3), - /** - * @DRM_BRIDGE_OP_UPSTREAM_FIRST: The bridge can requires - * that the upstream node pre_enable is called before its pre_enable, - * and conversely for post_disables. This is most frequently a - * requirement for DSI devices which need the host to be initialised - * before them. - */ - DRM_BRIDGE_OP_UPSTREAM_FIRST = BIT(4), }; /** @@ -769,6 +761,14 @@ struct drm_bridge { * modes. */ bool interlace_allowed; + /** + * @pre_enable_upstream_first: The bridge requires that the upstream + * bridge @pre_enable function is called before its @pre_enable, + * and conversely for post_disable. This is most frequently a + * requirement for DSI devices which need the host to be initialised + * before the peripheral. + */ + bool pre_enable_upstream_first; /** * @ddc: Associated I2C adapter for DDC access, if any. */ @@ -798,19 +798,11 @@ drm_priv_to_bridge(struct drm_private_obj *priv) void drm_bridge_add(struct drm_bridge *bridge); void drm_bridge_remove(struct drm_bridge *bridge); +struct drm_bridge *of_drm_find_bridge(struct device_node *np); int drm_bridge_attach(struct drm_encoder *encoder, struct drm_bridge *bridge, struct drm_bridge *previous, enum drm_bridge_attach_flags flags); -#ifdef CONFIG_OF -struct drm_bridge *of_drm_find_bridge(struct device_node *np); -#else -static inline struct drm_bridge *of_drm_find_bridge(struct device_node *np) -{ - return NULL; -} -#endif - /** * drm_bridge_get_next_bridge() - Get the next bridge in the chain * @bridge: bridge object @@ -930,17 +922,4 @@ struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev, struct drm_connector *drm_panel_bridge_connector(struct drm_bridge *bridge); #endif -#if defined(CONFIG_OF) && defined(CONFIG_DRM_PANEL_BRIDGE) -struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, struct device_node *node, - u32 port, u32 endpoint); -#else -static inline struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, - struct device_node *node, - u32 port, - u32 endpoint) -{ - return ERR_PTR(-ENODEV); -} -#endif - #endif diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index b501d0bada..1647960c9e 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -41,7 +40,6 @@ struct drm_encoder; struct drm_property; struct drm_property_blob; struct drm_printer; -struct drm_privacy_screen; struct edid; struct i2c_adapter; @@ -322,30 +320,6 @@ struct drm_monitor_range_info { u8 max_vfreq; }; -/** - * enum drm_privacy_screen_status - privacy screen status - * - * This enum is used to track and control the state of the integrated privacy - * screen present on some display panels, via the "privacy-screen sw-state" - * and "privacy-screen hw-state" properties. Note the _LOCKED enum values - * are only valid for the "privacy-screen hw-state" property. - * - * @PRIVACY_SCREEN_DISABLED: - * The privacy-screen on the panel is disabled - * @PRIVACY_SCREEN_ENABLED: - * The privacy-screen on the panel is enabled - * @PRIVACY_SCREEN_DISABLED_LOCKED: - * The privacy-screen on the panel is disabled and locked (cannot be changed) - * @PRIVACY_SCREEN_ENABLED_LOCKED: - * The privacy-screen on the panel is enabled and locked (cannot be changed) - */ -enum drm_privacy_screen_status { - PRIVACY_SCREEN_DISABLED = 0, - PRIVACY_SCREEN_ENABLED, - PRIVACY_SCREEN_DISABLED_LOCKED, - PRIVACY_SCREEN_ENABLED_LOCKED, -}; - /* * This is a consolidated colorimetry list supported by HDMI and * DP protocol standard. The respective connectors will register @@ -616,18 +590,6 @@ struct drm_display_info { * @monitor_range: Frequency range supported by monitor range descriptor */ struct drm_monitor_range_info monitor_range; - - /** - * @mso_stream_count: eDP Multi-SST Operation (MSO) stream count from - * the DisplayID VESA vendor block. 0 for conventional Single-Stream - * Transport (SST), or 2 or 4 MSO streams. - */ - u8 mso_stream_count; - - /** - * @mso_pixel_overlap: eDP MSO segment pixel overlap, 0-8 pixels. - */ - u8 mso_pixel_overlap; }; int drm_display_info_set_bus_formats(struct drm_display_info *info, @@ -819,12 +781,6 @@ struct drm_connector_state { */ u8 max_bpc; - /** - * @privacy_screen_sw_state: See :ref:`Standard Connector - * Properties` - */ - enum drm_privacy_screen_status privacy_screen_sw_state; - /** * @hdr_output_metadata: * DRM blob property for HDR output metadata @@ -1128,14 +1084,6 @@ struct drm_connector_funcs { */ void (*atomic_print_state)(struct drm_printer *p, const struct drm_connector_state *state); - - /** - * @oob_hotplug_event: - * - * This will get called when a hotplug-event for a drm-connector - * has been received from a source outside the display driver / device. - */ - void (*oob_hotplug_event)(struct drm_connector *connector); }; /** @@ -1280,14 +1228,6 @@ struct drm_connector { struct device *kdev; /** @attr: sysfs attributes */ struct device_attribute *attr; - /** - * @fwnode: associated fwnode supplied by platform firmware - * - * Drivers can set this to associate a fwnode with a connector, drivers - * are expected to get a reference on the fwnode when setting this. - * drm_connector_cleanup() will call fwnode_handle_put() on this. - */ - struct fwnode_handle *fwnode; /** * @head: @@ -1299,14 +1239,6 @@ struct drm_connector { */ struct list_head head; - /** - * @global_connector_list_entry: - * - * Connector entry in the global connector-list, used by - * drm_connector_find_by_fwnode(). - */ - struct list_head global_connector_list_entry; - /** @base: base KMS object */ struct drm_mode_object base; @@ -1453,24 +1385,6 @@ struct drm_connector { */ struct drm_property *max_bpc_property; - /** @privacy_screen: drm_privacy_screen for this connector, or NULL. */ - struct drm_privacy_screen *privacy_screen; - - /** @privacy_screen_notifier: privacy-screen notifier_block */ - struct notifier_block privacy_screen_notifier; - - /** - * @privacy_screen_sw_state_property: Optional atomic property for the - * connector to control the integrated privacy screen. - */ - struct drm_property *privacy_screen_sw_state_property; - - /** - * @privacy_screen_hw_state_property: Optional atomic property for the - * connector to report the actual integrated privacy screen state. - */ - struct drm_property *privacy_screen_hw_state_property; - #define DRM_CONNECTOR_POLL_HPD (1 << 0) #define DRM_CONNECTOR_POLL_CONNECT (1 << 1) #define DRM_CONNECTOR_POLL_DISCONNECT (1 << 2) @@ -1736,7 +1650,6 @@ drm_connector_is_unregistered(struct drm_connector *connector) DRM_CONNECTOR_UNREGISTERED; } -void drm_connector_oob_hotplug_event(struct fwnode_handle *connector_fwnode); const char *drm_get_connector_type_name(unsigned int connector_type); const char *drm_get_connector_status_name(enum drm_connector_status status); const char *drm_get_subpixel_order_name(enum subpixel_order order); @@ -1794,11 +1707,6 @@ int drm_connector_set_panel_orientation_with_quirk( int width, int height); int drm_connector_attach_max_bpc_property(struct drm_connector *connector, int min, int max); -void drm_connector_create_privacy_screen_properties(struct drm_connector *conn); -void drm_connector_attach_privacy_screen_properties(struct drm_connector *conn); -void drm_connector_attach_privacy_screen_provider( - struct drm_connector *connector, struct drm_privacy_screen *priv); -void drm_connector_update_privacy_screen(const struct drm_connector_state *connector_state); /** * struct drm_tile_group - Tile group metadata diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 9923c7a688..604b1d1b2d 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -6,13 +6,16 @@ #include #include -#include +#include #include struct drm_driver; struct drm_minor; struct drm_master; +struct drm_device_dma; struct drm_vblank_crtc; +struct drm_sg_mem; +struct drm_local_map; struct drm_vma_offset_manager; struct drm_vram_mm; struct drm_fb_helper; diff --git a/include/drm/drm_displayid.h b/include/drm/drm_displayid.h index 7ffbd9f7bf..ec64d141f5 100644 --- a/include/drm/drm_displayid.h +++ b/include/drm/drm_displayid.h @@ -23,71 +23,38 @@ #define DRM_DISPLAYID_H #include -#include struct edid; -#define VESA_IEEE_OUI 0x3a0292 +#define DATA_BLOCK_PRODUCT_ID 0x00 +#define DATA_BLOCK_DISPLAY_PARAMETERS 0x01 +#define DATA_BLOCK_COLOR_CHARACTERISTICS 0x02 +#define DATA_BLOCK_TYPE_1_DETAILED_TIMING 0x03 +#define DATA_BLOCK_TYPE_2_DETAILED_TIMING 0x04 +#define DATA_BLOCK_TYPE_3_SHORT_TIMING 0x05 +#define DATA_BLOCK_TYPE_4_DMT_TIMING 0x06 +#define DATA_BLOCK_VESA_TIMING 0x07 +#define DATA_BLOCK_CEA_TIMING 0x08 +#define DATA_BLOCK_VIDEO_TIMING_RANGE 0x09 +#define DATA_BLOCK_PRODUCT_SERIAL_NUMBER 0x0a +#define DATA_BLOCK_GP_ASCII_STRING 0x0b +#define DATA_BLOCK_DISPLAY_DEVICE_DATA 0x0c +#define DATA_BLOCK_INTERFACE_POWER_SEQUENCING 0x0d +#define DATA_BLOCK_TRANSFER_CHARACTERISTICS 0x0e +#define DATA_BLOCK_DISPLAY_INTERFACE 0x0f +#define DATA_BLOCK_STEREO_DISPLAY_INTERFACE 0x10 +#define DATA_BLOCK_TILED_DISPLAY 0x12 +#define DATA_BLOCK_CTA 0x81 -/* DisplayID Structure versions */ -#define DISPLAY_ID_STRUCTURE_VER_12 0x12 -#define DISPLAY_ID_STRUCTURE_VER_20 0x20 +#define DATA_BLOCK_VENDOR_SPECIFIC 0x7f -/* DisplayID Structure v1r2 Data Blocks */ -#define DATA_BLOCK_PRODUCT_ID 0x00 -#define DATA_BLOCK_DISPLAY_PARAMETERS 0x01 -#define DATA_BLOCK_COLOR_CHARACTERISTICS 0x02 -#define DATA_BLOCK_TYPE_1_DETAILED_TIMING 0x03 -#define DATA_BLOCK_TYPE_2_DETAILED_TIMING 0x04 -#define DATA_BLOCK_TYPE_3_SHORT_TIMING 0x05 -#define DATA_BLOCK_TYPE_4_DMT_TIMING 0x06 -#define DATA_BLOCK_VESA_TIMING 0x07 -#define DATA_BLOCK_CEA_TIMING 0x08 -#define DATA_BLOCK_VIDEO_TIMING_RANGE 0x09 -#define DATA_BLOCK_PRODUCT_SERIAL_NUMBER 0x0a -#define DATA_BLOCK_GP_ASCII_STRING 0x0b -#define DATA_BLOCK_DISPLAY_DEVICE_DATA 0x0c -#define DATA_BLOCK_INTERFACE_POWER_SEQUENCING 0x0d -#define DATA_BLOCK_TRANSFER_CHARACTERISTICS 0x0e -#define DATA_BLOCK_DISPLAY_INTERFACE 0x0f -#define DATA_BLOCK_STEREO_DISPLAY_INTERFACE 0x10 -#define DATA_BLOCK_TILED_DISPLAY 0x12 -#define DATA_BLOCK_VENDOR_SPECIFIC 0x7f -#define DATA_BLOCK_CTA 0x81 - -/* DisplayID Structure v2r0 Data Blocks */ -#define DATA_BLOCK_2_PRODUCT_ID 0x20 -#define DATA_BLOCK_2_DISPLAY_PARAMETERS 0x21 -#define DATA_BLOCK_2_TYPE_7_DETAILED_TIMING 0x22 -#define DATA_BLOCK_2_TYPE_8_ENUMERATED_TIMING 0x23 -#define DATA_BLOCK_2_TYPE_9_FORMULA_TIMING 0x24 -#define DATA_BLOCK_2_DYNAMIC_VIDEO_TIMING 0x25 -#define DATA_BLOCK_2_DISPLAY_INTERFACE_FEATURES 0x26 -#define DATA_BLOCK_2_STEREO_DISPLAY_INTERFACE 0x27 -#define DATA_BLOCK_2_TILED_DISPLAY_TOPOLOGY 0x28 -#define DATA_BLOCK_2_CONTAINER_ID 0x29 -#define DATA_BLOCK_2_VENDOR_SPECIFIC 0x7e -#define DATA_BLOCK_2_CTA_DISPLAY_ID 0x81 - -/* DisplayID Structure v1r2 Product Type */ -#define PRODUCT_TYPE_EXTENSION 0 -#define PRODUCT_TYPE_TEST 1 -#define PRODUCT_TYPE_PANEL 2 -#define PRODUCT_TYPE_MONITOR 3 -#define PRODUCT_TYPE_TV 4 -#define PRODUCT_TYPE_REPEATER 5 -#define PRODUCT_TYPE_DIRECT_DRIVE 6 - -/* DisplayID Structure v2r0 Display Product Primary Use Case (~Product Type) */ -#define PRIMARY_USE_EXTENSION 0 -#define PRIMARY_USE_TEST 1 -#define PRIMARY_USE_GENERIC 2 -#define PRIMARY_USE_TV 3 -#define PRIMARY_USE_DESKTOP_PRODUCTIVITY 4 -#define PRIMARY_USE_DESKTOP_GAMING 5 -#define PRIMARY_USE_PRESENTATION 6 -#define PRIMARY_USE_HEAD_MOUNTED_VR 7 -#define PRIMARY_USE_HEAD_MOUNTED_AR 8 +#define PRODUCT_TYPE_EXTENSION 0 +#define PRODUCT_TYPE_TEST 1 +#define PRODUCT_TYPE_PANEL 2 +#define PRODUCT_TYPE_MONITOR 3 +#define PRODUCT_TYPE_TV 4 +#define PRODUCT_TYPE_REPEATER 5 +#define PRODUCT_TYPE_DIRECT_DRIVE 6 struct displayid_header { u8 rev; @@ -129,16 +96,6 @@ struct displayid_detailed_timing_block { struct displayid_detailed_timings_1 timings[]; }; -#define DISPLAYID_VESA_MSO_OVERLAP GENMASK(3, 0) -#define DISPLAYID_VESA_MSO_MODE GENMASK(6, 5) - -struct displayid_vesa_vendor_specific_block { - struct displayid_block base; - u8 oui[3]; - u8 data_structure_type; - u8 mso; -} __packed; - /* DisplayID iteration */ struct displayid_iter { const struct edid *edid; diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 30359e434c..1d5b3dbb6e 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -453,7 +453,6 @@ struct drm_panel; # define DP_FEC_UNCORR_BLK_ERROR_COUNT_CAP (1 << 1) # define DP_FEC_CORR_BLK_ERROR_COUNT_CAP (1 << 2) # define DP_FEC_BIT_ERROR_COUNT_CAP (1 << 3) -#define DP_FEC_CAPABILITY_1 0x091 /* 2.0 */ /* DP-HDMI2.1 PCON DSC ENCODER SUPPORT */ #define DP_PCON_DSC_ENCODER_CAP_SIZE 0xC /* 0x9E - 0x92 */ @@ -538,9 +537,6 @@ struct drm_panel; #define DP_DSC_BRANCH_OVERALL_THROUGHPUT_1 0x0a1 #define DP_DSC_BRANCH_MAX_LINE_WIDTH 0x0a2 -/* DFP Capability Extension */ -#define DP_DFP_CAPABILITY_EXTENSION_SUPPORT 0x0a3 /* 2.0 */ - /* Link Configuration */ #define DP_LINK_BW_SET 0x100 # define DP_LINK_RATE_TABLE 0x00 /* eDP 1.4 */ @@ -692,7 +688,6 @@ struct drm_panel; #define DP_DSC_ENABLE 0x160 /* DP 1.4 */ # define DP_DECOMPRESSION_EN (1 << 0) -#define DP_DSC_CONFIGURATION 0x161 /* DP 2.0 */ #define DP_PSR_EN_CFG 0x170 /* XXX 1.2? */ # define DP_PSR_ENABLE BIT(0) @@ -748,7 +743,6 @@ struct drm_panel; # define DP_RECEIVE_PORT_0_STATUS (1 << 0) # define DP_RECEIVE_PORT_1_STATUS (1 << 1) # define DP_STREAM_REGENERATION_STATUS (1 << 2) /* 2.0 */ -# define DP_INTRA_HOP_AUX_REPLY_INDICATION (1 << 3) /* 2.0 */ #define DP_ADJUST_REQUEST_LANE0_1 0x206 #define DP_ADJUST_REQUEST_LANE2_3 0x207 @@ -871,8 +865,6 @@ struct drm_panel; # define DP_PHY_TEST_PATTERN_80BIT_CUSTOM 0x4 # define DP_PHY_TEST_PATTERN_CP2520 0x5 -#define DP_PHY_SQUARE_PATTERN 0x249 - #define DP_TEST_HBR2_SCRAMBLER_RESET 0x24A #define DP_TEST_80BIT_CUSTOM_PATTERN_7_0 0x250 #define DP_TEST_80BIT_CUSTOM_PATTERN_15_8 0x251 @@ -1114,27 +1106,8 @@ struct drm_panel; # define DP_UHBR20 (1 << 1) # define DP_UHBR13_5 (1 << 2) -#define DP_128B132B_TRAINING_AUX_RD_INTERVAL 0x2216 /* 2.0 */ -# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK 0x7f -# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_400_US 0x00 -# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_4_MS 0x01 -# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_8_MS 0x02 -# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_12_MS 0x03 -# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_16_MS 0x04 -# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_32_MS 0x05 -# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_64_MS 0x06 - -#define DP_TEST_264BIT_CUSTOM_PATTERN_7_0 0x2230 -#define DP_TEST_264BIT_CUSTOM_PATTERN_263_256 0x2250 - -/* DSC Extended Capability Branch Total DSC Resources */ -#define DP_DSC_SUPPORT_AND_DSC_DECODER_COUNT 0x2260 /* 2.0 */ -# define DP_DSC_DECODER_COUNT_MASK (0b111 << 5) -# define DP_DSC_DECODER_COUNT_SHIFT 5 -#define DP_DSC_MAX_SLICE_COUNT_AND_AGGREGATION_0 0x2270 /* 2.0 */ -# define DP_DSC_DECODER_0_MAXIMUM_SLICE_COUNT_MASK (1 << 0) -# define DP_DSC_DECODER_0_AGGREGATION_SUPPORT_MASK (0b111 << 1) -# define DP_DSC_DECODER_0_AGGREGATION_SUPPORT_SHIFT 1 +#define DP_128B132B_TRAINING_AUX_RD_INTERVAL 0x2216 /* 2.0 */ +# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK 0x7f /* Protocol Converter Extension */ /* HDMI CEC tunneling over AUX DP 1.3 section 5.3.3.3.1 DPCD 1.4+ */ @@ -1346,10 +1319,6 @@ struct drm_panel; #define DP_MAX_LANE_COUNT_PHY_REPEATER 0xf0004 /* 1.4a */ #define DP_Repeater_FEC_CAPABILITY 0xf0004 /* 1.4 */ #define DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT 0xf0005 /* 1.4a */ -#define DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER 0xf0006 /* 2.0 */ -# define DP_PHY_REPEATER_128B132B_SUPPORTED (1 << 0) -/* See DP_128B132B_SUPPORTED_LINK_RATES for values */ -#define DP_PHY_REPEATER_128B132B_RATES 0xf0007 /* 2.0 */ enum drm_dp_phy { DP_PHY_DPRX, @@ -1396,11 +1365,6 @@ enum drm_dp_phy { # define DP_VOLTAGE_SWING_LEVEL_3_SUPPORTED BIT(0) # define DP_PRE_EMPHASIS_LEVEL_3_SUPPORTED BIT(1) -#define DP_128B132B_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 0xf0022 /* 2.0 */ -#define DP_128B132B_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER(dp_phy) \ - DP_LTTPR_REG(dp_phy, DP_128B132B_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1) -/* see DP_128B132B_TRAINING_AUX_RD_INTERVAL for values */ - #define DP_LANE0_1_STATUS_PHY_REPEATER1 0xf0030 /* 1.3 */ #define DP_LANE0_1_STATUS_PHY_REPEATER(dp_phy) \ DP_LTTPR_REG(dp_phy, DP_LANE0_1_STATUS_PHY_REPEATER1) @@ -1526,8 +1490,6 @@ u8 drm_dp_get_adjust_request_voltage(const u8 link_status[DP_LINK_STATUS_SIZE], int lane); u8 drm_dp_get_adjust_request_pre_emphasis(const u8 link_status[DP_LINK_STATUS_SIZE], int lane); -u8 drm_dp_get_adjust_tx_ffe_preset(const u8 link_status[DP_LINK_STATUS_SIZE], - int lane); u8 drm_dp_get_adjust_request_post_cursor(const u8 link_status[DP_LINK_STATUS_SIZE], unsigned int lane); @@ -1539,11 +1501,6 @@ u8 drm_dp_get_adjust_request_post_cursor(const u8 link_status[DP_LINK_STATUS_SIZ #define DP_LTTPR_COMMON_CAP_SIZE 8 #define DP_LTTPR_PHY_CAP_SIZE 3 -int drm_dp_read_clock_recovery_delay(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SIZE], - enum drm_dp_phy dp_phy, bool uhbr); -int drm_dp_read_channel_eq_delay(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SIZE], - enum drm_dp_phy dp_phy, bool uhbr); - void drm_dp_link_train_clock_recovery_delay(const struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SIZE]); void drm_dp_lttpr_link_train_clock_recovery_delay(void); @@ -1783,13 +1740,6 @@ drm_dp_tps3_supported(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) dpcd[DP_MAX_LANE_COUNT] & DP_TPS3_SUPPORTED; } -static inline bool -drm_dp_max_downspread(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) -{ - return dpcd[DP_DPCD_REV] >= 0x11 || - dpcd[DP_MAX_DOWNSPREAD] & DP_MAX_DOWNSPREAD_0_5; -} - static inline bool drm_dp_tps4_supported(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { @@ -1875,7 +1825,7 @@ drm_dp_sink_can_do_video_without_timing_msa(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) * * Note that currently this function will return %false for panels which support various DPCD * backlight features but which require the brightness be set through PWM, and don't support setting - * the brightness level via the DPCD. + * the brightness level via the DPCD. This is a TODO. * * Returns: %True if @edp_dpcd indicates that VESA backlight controls are supported, %false * otherwise @@ -1883,7 +1833,8 @@ drm_dp_sink_can_do_video_without_timing_msa(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) static inline bool drm_edp_backlight_supported(const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]) { - return !!(edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP); + return (edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP) && + (edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP); } /* @@ -2244,7 +2195,6 @@ drm_dp_has_quirk(const struct drm_dp_desc *desc, enum drm_dp_quirk quirk) * @max: The maximum backlight level that may be set * @lsb_reg_used: Do we also write values to the DP_EDP_BACKLIGHT_BRIGHTNESS_LSB register? * @aux_enable: Does the panel support the AUX enable cap? - * @aux_set: Does the panel support setting the brightness through AUX? * * This structure contains various data about an eDP backlight, which can be populated by using * drm_edp_backlight_init(). @@ -2256,7 +2206,6 @@ struct drm_edp_backlight_info { bool lsb_reg_used : 1; bool aux_enable : 1; - bool aux_set : 1; }; int diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index 78044ac5b5..ddb9231d03 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -554,8 +554,6 @@ struct drm_dp_mst_topology_state { struct drm_private_state base; struct list_head vcpis; struct drm_dp_mst_topology_mgr *mgr; - u8 total_avail_slots; - u8 start_slot; }; #define to_dp_mst_topology_mgr(x) container_of(x, struct drm_dp_mst_topology_mgr, base) @@ -808,7 +806,6 @@ int drm_dp_mst_get_vcpi_slots(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp void drm_dp_mst_reset_vcpi_slots(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port); -void drm_dp_mst_update_slots(struct drm_dp_mst_topology_state *mst_state, uint8_t link_encoding_cap); void drm_dp_mst_deallocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port); @@ -818,7 +815,7 @@ int drm_dp_find_vcpi_slots(struct drm_dp_mst_topology_mgr *mgr, int pbn); -int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr, int start_slot); +int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr); int drm_dp_update_payload_part2(struct drm_dp_mst_topology_mgr *mgr); diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index f6159acb88..0cd95953cd 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -291,9 +291,8 @@ struct drm_driver { /** * @gem_create_object: constructor for gem objects * - * Hook for allocating the GEM object struct, for use by the CMA - * and SHMEM GEM helpers. Returns a GEM object on success, or an - * ERR_PTR()-encoded error code otherwise. + * Hook for allocating the GEM object struct, for use by the CMA and + * SHMEM GEM helpers. */ struct drm_gem_object *(*gem_create_object)(struct drm_device *dev, size_t size); @@ -346,14 +345,11 @@ struct drm_driver { * mmap hook for GEM drivers, used to implement dma-buf mmap in the * PRIME helpers. * - * This hook only exists for historical reasons. Drivers must use - * drm_gem_prime_mmap() to implement it. - * - * FIXME: Convert all drivers to implement mmap in struct - * &drm_gem_object_funcs and inline drm_gem_prime_mmap() into - * its callers. This hook should be removed afterwards. + * FIXME: There's way too much duplication going on here, and also moved + * to &drm_gem_object_funcs. */ - int (*gem_prime_mmap)(struct drm_gem_object *obj, struct vm_area_struct *vma); + int (*gem_prime_mmap)(struct drm_gem_object *obj, + struct vm_area_struct *vma); /** * @dumb_create: @@ -602,6 +598,5 @@ static inline bool drm_drv_uses_atomic_modeset(struct drm_device *dev) int drm_dev_set_unique(struct drm_device *dev, const char *name); -extern bool drm_firmware_drivers_only(void); #endif diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 144c495b99..1c639e81de 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -508,52 +508,6 @@ static inline u8 drm_eld_get_conn_type(const uint8_t *eld) return eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_CONN_TYPE_MASK; } -/** - * drm_edid_encode_panel_id - Encode an ID for matching against drm_edid_get_panel_id() - * @vend_chr_0: First character of the vendor string. - * @vend_chr_1: Second character of the vendor string. - * @vend_chr_2: Third character of the vendor string. - * @product_id: The 16-bit product ID. - * - * This is a macro so that it can be calculated at compile time and used - * as an initializer. - * - * For instance: - * drm_edid_encode_panel_id('B', 'O', 'E', 0x2d08) => 0x09e52d08 - * - * Return: a 32-bit ID per panel. - */ -#define drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, vend_chr_2, product_id) \ - ((((u32)(vend_chr_0) - '@') & 0x1f) << 26 | \ - (((u32)(vend_chr_1) - '@') & 0x1f) << 21 | \ - (((u32)(vend_chr_2) - '@') & 0x1f) << 16 | \ - ((product_id) & 0xffff)) - -/** - * drm_edid_decode_panel_id - Decode a panel ID from drm_edid_encode_panel_id() - * @panel_id: The panel ID to decode. - * @vend: A 4-byte buffer to store the 3-letter vendor string plus a '\0' - * termination - * @product_id: The product ID will be returned here. - * - * For instance, after: - * drm_edid_decode_panel_id(0x09e52d08, vend, &product_id) - * These will be true: - * vend[0] = 'B' - * vend[1] = 'O' - * vend[2] = 'E' - * vend[3] = '\0' - * product_id = 0x2d08 - */ -static inline void drm_edid_decode_panel_id(u32 panel_id, char vend[4], u16 *product_id) -{ - *product_id = (u16)(panel_id & 0xffff); - vend[0] = '@' + ((panel_id >> 26) & 0x1f); - vend[1] = '@' + ((panel_id >> 21) & 0x1f); - vend[2] = '@' + ((panel_id >> 16) & 0x1f); - vend[3] = '\0'; -} - bool drm_probe_ddc(struct i2c_adapter *adapter); struct edid *drm_do_get_edid(struct drm_connector *connector, int (*get_edid_block)(void *data, u8 *buf, unsigned int block, @@ -561,7 +515,6 @@ struct edid *drm_do_get_edid(struct drm_connector *connector, void *data); struct edid *drm_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter); -u32 drm_edid_get_panel_id(struct i2c_adapter *adapter); struct edid *drm_get_edid_switcheroo(struct drm_connector *connector, struct i2c_adapter *adapter); struct edid *drm_edid_duplicate(const struct edid *edid); diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index b30ed5de0a..4e0258a613 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -6,41 +6,34 @@ #ifndef __LINUX_DRM_FORMAT_HELPER_H #define __LINUX_DRM_FORMAT_HELPER_H -struct drm_format_info; struct drm_framebuffer; struct drm_rect; -unsigned int drm_fb_clip_offset(unsigned int pitch, const struct drm_format_info *format, - const struct drm_rect *clip); +void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb, + struct drm_rect *clip); +void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr, + struct drm_framebuffer *fb, + struct drm_rect *clip); +void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb, + struct drm_rect *clip, bool cached); +void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr, + struct drm_framebuffer *fb, + struct drm_rect *clip, bool swab); +void drm_fb_xrgb8888_to_rgb565_dstclip(void __iomem *dst, unsigned int dst_pitch, + void *vaddr, struct drm_framebuffer *fb, + struct drm_rect *clip, bool swab); +void drm_fb_xrgb8888_to_rgb888_dstclip(void __iomem *dst, unsigned int dst_pitch, + void *vaddr, struct drm_framebuffer *fb, + struct drm_rect *clip); +void drm_fb_xrgb8888_to_gray8(u8 *dst, void *vaddr, struct drm_framebuffer *fb, + struct drm_rect *clip); -void drm_fb_memcpy(void *dst, unsigned int dst_pitch, const void *vaddr, - const struct drm_framebuffer *fb, const struct drm_rect *clip); -void drm_fb_memcpy_toio(void __iomem *dst, unsigned int dst_pitch, const void *vaddr, - const struct drm_framebuffer *fb, const struct drm_rect *clip); -void drm_fb_swab(void *dst, unsigned int dst_pitch, const void *src, - const struct drm_framebuffer *fb, const struct drm_rect *clip, - bool cached); -void drm_fb_xrgb8888_to_rgb332(void *dst, unsigned int dst_pitch, const void *vaddr, - const struct drm_framebuffer *fb, const struct drm_rect *clip); -void drm_fb_xrgb8888_to_rgb565(void *dst, unsigned int dst_pitch, const void *vaddr, - const struct drm_framebuffer *fb, const struct drm_rect *clip, - bool swab); -void drm_fb_xrgb8888_to_rgb565_toio(void __iomem *dst, unsigned int dst_pitch, - const void *vaddr, const struct drm_framebuffer *fb, - const struct drm_rect *clip, bool swab); -void drm_fb_xrgb8888_to_rgb888(void *dst, unsigned int dst_pitch, const void *src, - const struct drm_framebuffer *fb, const struct drm_rect *clip); -void drm_fb_xrgb8888_to_rgb888_toio(void __iomem *dst, unsigned int dst_pitch, - const void *vaddr, const struct drm_framebuffer *fb, - const struct drm_rect *clip); -void drm_fb_xrgb8888_to_xrgb2101010_toio(void __iomem *dst, unsigned int dst_pitch, - const void *vaddr, const struct drm_framebuffer *fb, - const struct drm_rect *clip); -void drm_fb_xrgb8888_to_gray8(void *dst, unsigned int dst_pitch, const void *vaddr, - const struct drm_framebuffer *fb, const struct drm_rect *clip); - -int drm_fb_blit_toio(void __iomem *dst, unsigned int dst_pitch, uint32_t dst_format, - const void *vmap, const struct drm_framebuffer *fb, - const struct drm_rect *rect); +int drm_fb_blit_rect_dstclip(void __iomem *dst, unsigned int dst_pitch, + uint32_t dst_format, void *vmap, + struct drm_framebuffer *fb, + struct drm_rect *rect); +int drm_fb_blit_dstclip(void __iomem *dst, unsigned int dst_pitch, + uint32_t dst_format, void *vmap, + struct drm_framebuffer *fb); #endif /* __LINUX_DRM_FORMAT_HELPER_H */ diff --git a/include/drm/drm_gem_atomic_helper.h b/include/drm/drm_gem_atomic_helper.h index 0b1e2dd2ac..48222a1078 100644 --- a/include/drm/drm_gem_atomic_helper.h +++ b/include/drm/drm_gem_atomic_helper.h @@ -22,24 +22,6 @@ int drm_gem_simple_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe, * Helpers for planes with shadow buffers */ -/** - * DRM_SHADOW_PLANE_MAX_WIDTH - Maximum width of a plane's shadow buffer in pixels - * - * For drivers with shadow planes, the maximum width of the framebuffer is - * usually independent from hardware limitations. Drivers can initialize struct - * drm_mode_config.max_width from DRM_SHADOW_PLANE_MAX_WIDTH. - */ -#define DRM_SHADOW_PLANE_MAX_WIDTH (4096u) - -/** - * DRM_SHADOW_PLANE_MAX_HEIGHT - Maximum height of a plane's shadow buffer in scanlines - * - * For drivers with shadow planes, the maximum height of the framebuffer is - * usually independent from hardware limitations. Drivers can initialize struct - * drm_mode_config.max_height from DRM_SHADOW_PLANE_MAX_HEIGHT. - */ -#define DRM_SHADOW_PLANE_MAX_HEIGHT (4096u) - /** * struct drm_shadow_plane_state - plane state for planes with shadow buffers * diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index adb507a9db..cd13508acb 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -32,108 +32,42 @@ struct drm_gem_cma_object { #define to_drm_gem_cma_obj(gem_obj) \ container_of(gem_obj, struct drm_gem_cma_object, base) -struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm, - size_t size); -void drm_gem_cma_free(struct drm_gem_cma_object *cma_obj); -void drm_gem_cma_print_info(const struct drm_gem_cma_object *cma_obj, - struct drm_printer *p, unsigned int indent); -struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_cma_object *cma_obj); -int drm_gem_cma_vmap(struct drm_gem_cma_object *cma_obj, struct dma_buf_map *map); -int drm_gem_cma_mmap(struct drm_gem_cma_object *cma_obj, struct vm_area_struct *vma); - -extern const struct vm_operations_struct drm_gem_cma_vm_ops; - -/* - * GEM object functions - */ +#ifndef CONFIG_MMU +#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS \ + .get_unmapped_area = drm_gem_cma_get_unmapped_area, +#else +#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS +#endif /** - * drm_gem_cma_object_free - GEM object function for drm_gem_cma_free() - * @obj: GEM object to free + * DEFINE_DRM_GEM_CMA_FOPS() - macro to generate file operations for CMA drivers + * @name: name for the generated structure * - * This function wraps drm_gem_cma_free_object(). Drivers that employ the CMA helpers - * should use it as their &drm_gem_object_funcs.free handler. + * This macro autogenerates a suitable &struct file_operations for CMA based + * drivers, which can be assigned to &drm_driver.fops. Note that this structure + * cannot be shared between drivers, because it contains a reference to the + * current module using THIS_MODULE. + * + * Note that the declaration is already marked as static - if you need a + * non-static version of this you're probably doing it wrong and will break the + * THIS_MODULE reference by accident. */ -static inline void drm_gem_cma_object_free(struct drm_gem_object *obj) -{ - struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); +#define DEFINE_DRM_GEM_CMA_FOPS(name) \ + static const struct file_operations name = {\ + .owner = THIS_MODULE,\ + .open = drm_open,\ + .release = drm_release,\ + .unlocked_ioctl = drm_ioctl,\ + .compat_ioctl = drm_compat_ioctl,\ + .poll = drm_poll,\ + .read = drm_read,\ + .llseek = noop_llseek,\ + .mmap = drm_gem_mmap,\ + DRM_GEM_CMA_UNMAPPED_AREA_FOPS \ + } - drm_gem_cma_free(cma_obj); -} - -/** - * drm_gem_cma_object_print_info() - Print &drm_gem_cma_object info for debugfs - * @p: DRM printer - * @indent: Tab indentation level - * @obj: GEM object - * - * This function wraps drm_gem_cma_print_info(). Drivers that employ the CMA helpers - * should use this function as their &drm_gem_object_funcs.print_info handler. - */ -static inline void drm_gem_cma_object_print_info(struct drm_printer *p, unsigned int indent, - const struct drm_gem_object *obj) -{ - const struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); - - drm_gem_cma_print_info(cma_obj, p, indent); -} - -/** - * drm_gem_cma_object_get_sg_table - GEM object function for drm_gem_cma_get_sg_table() - * @obj: GEM object - * - * This function wraps drm_gem_cma_get_sg_table(). Drivers that employ the CMA helpers should - * use it as their &drm_gem_object_funcs.get_sg_table handler. - * - * Returns: - * A pointer to the scatter/gather table of pinned pages or NULL on failure. - */ -static inline struct sg_table *drm_gem_cma_object_get_sg_table(struct drm_gem_object *obj) -{ - struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); - - return drm_gem_cma_get_sg_table(cma_obj); -} - -/* - * drm_gem_cma_object_vmap - GEM object function for drm_gem_cma_vmap() - * @obj: GEM object - * @map: Returns the kernel virtual address of the CMA GEM object's backing store. - * - * This function wraps drm_gem_cma_vmap(). Drivers that employ the CMA helpers should - * use it as their &drm_gem_object_funcs.vmap handler. - * - * Returns: - * 0 on success or a negative error code on failure. - */ -static inline int drm_gem_cma_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) -{ - struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); - - return drm_gem_cma_vmap(cma_obj, map); -} - -/** - * drm_gem_cma_object_mmap - GEM object function for drm_gem_cma_mmap() - * @obj: GEM object - * @vma: VMA for the area to be mapped - * - * This function wraps drm_gem_cma_mmap(). Drivers that employ the cma helpers should - * use it as their &drm_gem_object_funcs.mmap handler. - * - * Returns: - * 0 on success or a negative error code on failure. - */ -static inline int drm_gem_cma_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) -{ - struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); - - return drm_gem_cma_mmap(cma_obj, vma); -} - -/* - * Driver ops - */ +/* free GEM object */ +void drm_gem_cma_free_object(struct drm_gem_object *gem_obj); /* create memory region for DRM framebuffer */ int drm_gem_cma_dumb_create_internal(struct drm_file *file_priv, @@ -145,10 +79,30 @@ int drm_gem_cma_dumb_create(struct drm_file *file_priv, struct drm_device *drm, struct drm_mode_create_dumb *args); +/* allocate physical memory */ +struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm, + size_t size); + +extern const struct vm_operations_struct drm_gem_cma_vm_ops; + +#ifndef CONFIG_MMU +unsigned long drm_gem_cma_get_unmapped_area(struct file *filp, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags); +#endif + +void drm_gem_cma_print_info(struct drm_printer *p, unsigned int indent, + const struct drm_gem_object *obj); + +struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_object *obj); struct drm_gem_object * drm_gem_cma_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); +int drm_gem_cma_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); +int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); /** * DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE - CMA GEM driver operations @@ -231,47 +185,4 @@ drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *drm, struct dma_buf_attachment *attach, struct sg_table *sgt); -/* - * File ops - */ - -#ifndef CONFIG_MMU -unsigned long drm_gem_cma_get_unmapped_area(struct file *filp, - unsigned long addr, - unsigned long len, - unsigned long pgoff, - unsigned long flags); -#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS \ - .get_unmapped_area = drm_gem_cma_get_unmapped_area, -#else -#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS -#endif - -/** - * DEFINE_DRM_GEM_CMA_FOPS() - macro to generate file operations for CMA drivers - * @name: name for the generated structure - * - * This macro autogenerates a suitable &struct file_operations for CMA based - * drivers, which can be assigned to &drm_driver.fops. Note that this structure - * cannot be shared between drivers, because it contains a reference to the - * current module using THIS_MODULE. - * - * Note that the declaration is already marked as static - if you need a - * non-static version of this you're probably doing it wrong and will break the - * THIS_MODULE reference by accident. - */ -#define DEFINE_DRM_GEM_CMA_FOPS(name) \ - static const struct file_operations name = {\ - .owner = THIS_MODULE,\ - .open = drm_open,\ - .release = drm_release,\ - .unlocked_ioctl = drm_ioctl,\ - .compat_ioctl = drm_compat_ioctl,\ - .poll = drm_poll,\ - .read = drm_read,\ - .llseek = noop_llseek,\ - .mmap = drm_gem_mmap,\ - DRM_GEM_CMA_UNMAPPED_AREA_FOPS \ - } - #endif /* __DRM_GEM_CMA_HELPER_H__ */ diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 311d66c9cf..434328d8a0 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -107,17 +107,16 @@ struct drm_gem_shmem_object { container_of(obj, struct drm_gem_shmem_object, base) struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size); -void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem); +void drm_gem_shmem_free_object(struct drm_gem_object *obj); int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem); -int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem); -void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem); -int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); -void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); -int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma); +int drm_gem_shmem_pin(struct drm_gem_object *obj); +void drm_gem_shmem_unpin(struct drm_gem_object *obj); +int drm_gem_shmem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); +void drm_gem_shmem_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map); -int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv); +int drm_gem_shmem_madvise(struct drm_gem_object *obj, int madv); static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem) { @@ -126,156 +125,29 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem !shmem->base.dma_buf && !shmem->base.import_attach; } -void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem); -bool drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem); +void drm_gem_shmem_purge_locked(struct drm_gem_object *obj); +bool drm_gem_shmem_purge(struct drm_gem_object *obj); -struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem); -struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem); +struct drm_gem_shmem_object * +drm_gem_shmem_create_with_handle(struct drm_file *file_priv, + struct drm_device *dev, size_t size, + uint32_t *handle); -void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, - struct drm_printer *p, unsigned int indent); +int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, + struct drm_mode_create_dumb *args); -/* - * GEM object functions - */ +int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); -/** - * drm_gem_shmem_object_free - GEM object function for drm_gem_shmem_free() - * @obj: GEM object to free - * - * This function wraps drm_gem_shmem_free(). Drivers that employ the shmem helpers - * should use it as their &drm_gem_object_funcs.free handler. - */ -static inline void drm_gem_shmem_object_free(struct drm_gem_object *obj) -{ - struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - - drm_gem_shmem_free(shmem); -} - -/** - * drm_gem_shmem_object_print_info() - Print &drm_gem_shmem_object info for debugfs - * @p: DRM printer - * @indent: Tab indentation level - * @obj: GEM object - * - * This function wraps drm_gem_shmem_print_info(). Drivers that employ the shmem helpers should - * use this function as their &drm_gem_object_funcs.print_info handler. - */ -static inline void drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent, - const struct drm_gem_object *obj) -{ - const struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - - drm_gem_shmem_print_info(shmem, p, indent); -} - -/** - * drm_gem_shmem_object_pin - GEM object function for drm_gem_shmem_pin() - * @obj: GEM object - * - * This function wraps drm_gem_shmem_pin(). Drivers that employ the shmem helpers should - * use it as their &drm_gem_object_funcs.pin handler. - */ -static inline int drm_gem_shmem_object_pin(struct drm_gem_object *obj) -{ - struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - - return drm_gem_shmem_pin(shmem); -} - -/** - * drm_gem_shmem_object_unpin - GEM object function for drm_gem_shmem_unpin() - * @obj: GEM object - * - * This function wraps drm_gem_shmem_unpin(). Drivers that employ the shmem helpers should - * use it as their &drm_gem_object_funcs.unpin handler. - */ -static inline void drm_gem_shmem_object_unpin(struct drm_gem_object *obj) -{ - struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - - drm_gem_shmem_unpin(shmem); -} - -/** - * drm_gem_shmem_object_get_sg_table - GEM object function for drm_gem_shmem_get_sg_table() - * @obj: GEM object - * - * This function wraps drm_gem_shmem_get_sg_table(). Drivers that employ the shmem helpers should - * use it as their &drm_gem_object_funcs.get_sg_table handler. - * - * Returns: - * A pointer to the scatter/gather table of pinned pages or NULL on failure. - */ -static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj) -{ - struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - - return drm_gem_shmem_get_sg_table(shmem); -} - -/* - * drm_gem_shmem_object_vmap - GEM object function for drm_gem_shmem_vmap() - * @obj: GEM object - * @map: Returns the kernel virtual address of the SHMEM GEM object's backing store. - * - * This function wraps drm_gem_shmem_vmap(). Drivers that employ the shmem helpers should - * use it as their &drm_gem_object_funcs.vmap handler. - * - * Returns: - * 0 on success or a negative error code on failure. - */ -static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) -{ - struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - - return drm_gem_shmem_vmap(shmem, map); -} - -/* - * drm_gem_shmem_object_vunmap - GEM object function for drm_gem_shmem_vunmap() - * @obj: GEM object - * @map: Kernel virtual address where the SHMEM GEM object was mapped - * - * This function wraps drm_gem_shmem_vunmap(). Drivers that employ the shmem helpers should - * use it as their &drm_gem_object_funcs.vunmap handler. - */ -static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map) -{ - struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - - drm_gem_shmem_vunmap(shmem, map); -} - -/** - * drm_gem_shmem_object_mmap - GEM object function for drm_gem_shmem_mmap() - * @obj: GEM object - * @vma: VMA for the area to be mapped - * - * This function wraps drm_gem_shmem_mmap(). Drivers that employ the shmem helpers should - * use it as their &drm_gem_object_funcs.mmap handler. - * - * Returns: - * 0 on success or a negative error code on failure. - */ -static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) -{ - struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - - return drm_gem_shmem_mmap(shmem, vma); -} - -/* - * Driver ops - */ +void drm_gem_shmem_print_info(struct drm_printer *p, unsigned int indent, + const struct drm_gem_object *obj); +struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_object *obj); struct drm_gem_object * drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); -int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, - struct drm_mode_create_dumb *args); + +struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj); /** * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations diff --git a/include/drm/drm_gem_ttm_helper.h b/include/drm/drm_gem_ttm_helper.h index 78040f6cc6..c1aa02bd4c 100644 --- a/include/drm/drm_gem_ttm_helper.h +++ b/include/drm/drm_gem_ttm_helper.h @@ -3,7 +3,7 @@ #ifndef DRM_GEM_TTM_HELPER_H #define DRM_GEM_TTM_HELPER_H -#include +#include #include #include diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h index b4ce27a727..d3cf06c9af 100644 --- a/include/drm/drm_gem_vram_helper.h +++ b/include/drm/drm_gem_vram_helper.h @@ -11,8 +11,8 @@ #include #include -#include #include +#include /* for container_of() */ struct drm_mode_create_dumb; struct drm_plane; diff --git a/include/drm/drm_hashtab.h b/include/drm/drm_hashtab.h index fce2ef3fdf..bb95ff011b 100644 --- a/include/drm/drm_hashtab.h +++ b/include/drm/drm_hashtab.h @@ -49,17 +49,17 @@ struct drm_open_hash { u8 order; }; -extern int drm_ht_create(struct drm_open_hash *ht, unsigned int order); -extern int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item); -extern int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *item, - unsigned long seed, int bits, int shift, - unsigned long add); -extern int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, struct drm_hash_item **item); +int drm_ht_create(struct drm_open_hash *ht, unsigned int order); +int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item); +int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *item, + unsigned long seed, int bits, int shift, + unsigned long add); +int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, struct drm_hash_item **item); -extern void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key); -extern int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key); -extern int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item); -extern void drm_ht_remove(struct drm_open_hash *ht); +void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key); +int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key); +int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item); +void drm_ht_remove(struct drm_open_hash *ht); /* * RCU-safe interface diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h index 6ed61c371f..afb27cb6a7 100644 --- a/include/drm/drm_ioctl.h +++ b/include/drm/drm_ioctl.h @@ -167,6 +167,7 @@ struct drm_ioctl_desc { .name = #ioctl \ } +int drm_ioctl_permit(u32 flags, struct drm_file *file_priv); long drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long drm_ioctl_kernel(struct file *, drm_ioctl_t, void *, u32); #ifdef CONFIG_COMPAT diff --git a/include/drm/drm_legacy.h b/include/drm/drm_legacy.h index 0fc85418aa..58dc8d8cc9 100644 --- a/include/drm/drm_legacy.h +++ b/include/drm/drm_legacy.h @@ -37,6 +37,7 @@ #include #include +#include struct drm_device; struct drm_driver; @@ -50,20 +51,6 @@ struct pci_driver; * you're doing it terribly wrong. */ -/* - * Hash-table Support - */ - -struct drm_hash_item { - struct hlist_node head; - unsigned long key; -}; - -struct drm_open_hash { - struct hlist_head *table; - u8 order; -}; - /** * DMA buffer. */ diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 3de54f15be..eea8d86e06 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -241,13 +241,9 @@ struct mipi_dsi_device * mipi_dsi_device_register_full(struct mipi_dsi_host *host, const struct mipi_dsi_device_info *info); void mipi_dsi_device_unregister(struct mipi_dsi_device *dsi); -struct mipi_dsi_device * -devm_mipi_dsi_device_register_full(struct device *dev, struct mipi_dsi_host *host, - const struct mipi_dsi_device_info *info); struct mipi_dsi_device *of_find_mipi_dsi_device_by_node(struct device_node *np); int mipi_dsi_attach(struct mipi_dsi_device *dsi); int mipi_dsi_detach(struct mipi_dsi_device *dsi); -int devm_mipi_dsi_attach(struct device *dev, struct mipi_dsi_device *dsi); int mipi_dsi_shutdown_peripheral(struct mipi_dsi_device *dsi); int mipi_dsi_turn_on_peripheral(struct mipi_dsi_device *dsi); int mipi_dsi_set_maximum_return_packet_size(struct mipi_dsi_device *dsi, diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18..9b4292f229 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -39,15 +39,13 @@ */ #include #include -#include +#include #include #include #include #ifdef CONFIG_DRM_DEBUG_MM #include #endif -#include - #include #ifdef CONFIG_DRM_DEBUG_MM diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 91ca575a78..1ddf7783fd 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -103,13 +103,14 @@ struct drm_mode_config_funcs { * Callback used by helpers to inform the driver of output configuration * changes. * - * Drivers implementing fbdev emulation use drm_kms_helper_hotplug_event() - * to call this hook to inform the fbdev helper of output changes. + * Drivers implementing fbdev emulation with the helpers can call + * drm_fb_helper_hotplug_changed from this hook to inform the fbdev + * helper of output changes. * - * This hook is deprecated, drivers should instead use - * drm_fbdev_generic_setup() which takes care of any necessary - * hotplug event forwarding already without further involvement by - * the driver. + * FIXME: + * + * Except that there's no vtable for device-level helper callbacks + * there's no reason this is a core function. */ void (*output_poll_changed)(struct drm_device *dev); @@ -359,19 +360,6 @@ struct drm_mode_config_funcs { * Core mode resource tracking structure. All CRTC, encoders, and connectors * enumerated by the driver are added here, as are global properties. Some * global restrictions are also here, e.g. dimension restrictions. - * - * Framebuffer sizes refer to the virtual screen that can be displayed by - * the CRTC. This can be different from the physical resolution programmed. - * The minimum width and height, stored in @min_width and @min_height, - * describe the smallest size of the framebuffer. It correlates to the - * minimum programmable resolution. - * The maximum width, stored in @max_width, is typically limited by the - * maximum pitch between two adjacent scanlines. The maximum height, stored - * in @max_height, is usually only limited by the amount of addressable video - * memory. For hardware that has no real maximum, drivers should pick a - * reasonable default. - * - * See also @DRM_SHADOW_PLANE_MAX_WIDTH and @DRM_SHADOW_PLANE_MAX_HEIGHT. */ struct drm_mode_config { /** diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h index b84693fbd2..aafd07388e 100644 --- a/include/drm/drm_modeset_lock.h +++ b/include/drm/drm_modeset_lock.h @@ -24,8 +24,6 @@ #ifndef DRM_MODESET_LOCK_H_ #define DRM_MODESET_LOCK_H_ -#include /* stackdepot.h is not self-contained */ -#include #include struct drm_modeset_lock; @@ -53,12 +51,6 @@ struct drm_modeset_acquire_ctx { */ struct drm_modeset_lock *contended; - /* - * Stack depot for debugging when a contended lock was not backed off - * from. - */ - depot_stack_handle_t stack_depot; - /* * list of held locks (drm_modeset_lock) */ diff --git a/include/drm/drm_of.h b/include/drm/drm_of.h index 99f79ac8b4..b9b093add9 100644 --- a/include/drm/drm_of.h +++ b/include/drm/drm_of.h @@ -49,7 +49,6 @@ int drm_of_find_panel_or_bridge(const struct device_node *np, struct drm_bridge **bridge); int drm_of_lvds_get_dual_link_pixel_order(const struct device_node *port1, const struct device_node *port2); -int drm_of_lvds_get_data_mapping(const struct device_node *port); #else static inline uint32_t drm_of_crtc_port_mask(struct drm_device *dev, struct device_node *port) @@ -99,12 +98,6 @@ drm_of_lvds_get_dual_link_pixel_order(const struct device_node *port1, { return -EINVAL; } - -static inline int -drm_of_lvds_get_data_mapping(const struct device_node *port) -{ - return -EINVAL; -} #endif /* diff --git a/include/drm/drm_panel.h b/include/drm/drm_panel.h index 830b4d0586..99eb628d7d 100644 --- a/include/drm/drm_panel.h +++ b/include/drm/drm_panel.h @@ -24,6 +24,7 @@ #ifndef __DRM_PANEL_H__ #define __DRM_PANEL_H__ +#include #include #include #include @@ -35,8 +36,6 @@ struct drm_device; struct drm_panel; struct display_timing; -enum drm_panel_orientation; - /** * struct drm_panel_funcs - perform operations on a given panel * @@ -179,6 +178,16 @@ struct drm_panel { * Panel entry in registry. */ struct list_head list; + + /** + * @prepare_upstream_first: + * + * The upstream controller should be prepared first, before the prepare + * for the panel is called. This is largely required for DSI panels + * where the DSI host controller should be initialised to LP-11 before + * the panel is powered up. + */ + bool prepare_upstream_first; }; void drm_panel_init(struct drm_panel *panel, struct device *dev, diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 0c1102dc4d..fed97e3562 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -43,7 +43,7 @@ enum drm_scaling_filter { /** * struct drm_plane_state - mutable plane state * - * Please note that the destination coordinates @crtc_x, @crtc_y, @crtc_h and + * Please not that the destination coordinates @crtc_x, @crtc_y, @crtc_h and * @crtc_w and the source coordinates @src_x, @src_y, @src_h and @src_w are the * raw coordinates provided by userspace. Drivers should use * drm_atomic_helper_check_plane_state() and only use the derived rectangles in diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 22fabdeed2..15a089a87c 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -340,8 +340,6 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, /** * DRM_DEV_ERROR() - Error output. * - * NOTE: this is deprecated in favor of drm_err() or dev_err(). - * * @dev: device pointer * @fmt: printf() like format string. */ @@ -351,9 +349,6 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, /** * DRM_DEV_ERROR_RATELIMITED() - Rate limited error output. * - * NOTE: this is deprecated in favor of drm_err_ratelimited() or - * dev_err_ratelimited(). - * * @dev: device pointer * @fmt: printf() like format string. * @@ -369,11 +364,9 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, DRM_DEV_ERROR(dev, fmt, ##__VA_ARGS__); \ }) -/* NOTE: this is deprecated in favor of drm_info() or dev_info(). */ #define DRM_DEV_INFO(dev, fmt, ...) \ drm_dev_printk(dev, KERN_INFO, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of drm_info_once() or dev_info_once(). */ #define DRM_DEV_INFO_ONCE(dev, fmt, ...) \ ({ \ static bool __print_once __read_mostly; \ @@ -386,8 +379,6 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, /** * DRM_DEV_DEBUG() - Debug output for generic drm code * - * NOTE: this is deprecated in favor of drm_dbg_core(). - * * @dev: device pointer * @fmt: printf() like format string. */ @@ -396,8 +387,6 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, /** * DRM_DEV_DEBUG_DRIVER() - Debug output for vendor specific part of the driver * - * NOTE: this is deprecated in favor of drm_dbg() or dev_dbg(). - * * @dev: device pointer * @fmt: printf() like format string. */ @@ -406,8 +395,6 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, /** * DRM_DEV_DEBUG_KMS() - Debug output for modesetting code * - * NOTE: this is deprecated in favor of drm_dbg_kms(). - * * @dev: device pointer * @fmt: printf() like format string. */ @@ -493,63 +480,47 @@ void __drm_err(const char *format, ...); #define _DRM_PRINTK(once, level, fmt, ...) \ printk##once(KERN_##level "[" DRM_NAME "] " fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of pr_info(). */ #define DRM_INFO(fmt, ...) \ _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of pr_notice(). */ #define DRM_NOTE(fmt, ...) \ _DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of pr_warn(). */ #define DRM_WARN(fmt, ...) \ _DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of pr_info_once(). */ #define DRM_INFO_ONCE(fmt, ...) \ _DRM_PRINTK(_once, INFO, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of pr_notice_once(). */ #define DRM_NOTE_ONCE(fmt, ...) \ _DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of pr_warn_once(). */ #define DRM_WARN_ONCE(fmt, ...) \ _DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of pr_err(). */ #define DRM_ERROR(fmt, ...) \ __drm_err(fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of pr_err_ratelimited(). */ #define DRM_ERROR_RATELIMITED(fmt, ...) \ DRM_DEV_ERROR_RATELIMITED(NULL, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of drm_dbg_core(NULL, ...). */ #define DRM_DEBUG(fmt, ...) \ __drm_dbg(DRM_UT_CORE, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of drm_dbg(NULL, ...). */ #define DRM_DEBUG_DRIVER(fmt, ...) \ __drm_dbg(DRM_UT_DRIVER, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of drm_dbg_kms(NULL, ...). */ #define DRM_DEBUG_KMS(fmt, ...) \ __drm_dbg(DRM_UT_KMS, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of drm_dbg_prime(NULL, ...). */ #define DRM_DEBUG_PRIME(fmt, ...) \ __drm_dbg(DRM_UT_PRIME, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of drm_dbg_atomic(NULL, ...). */ #define DRM_DEBUG_ATOMIC(fmt, ...) \ __drm_dbg(DRM_UT_ATOMIC, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of drm_dbg_vbl(NULL, ...). */ #define DRM_DEBUG_VBL(fmt, ...) \ __drm_dbg(DRM_UT_VBL, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of drm_dbg_lease(NULL, ...). */ #define DRM_DEBUG_LEASE(fmt, ...) \ __drm_dbg(DRM_UT_LEASE, fmt, ##__VA_ARGS__) -/* NOTE: this is deprecated in favor of drm_dbg_dp(NULL, ...). */ #define DRM_DEBUG_DP(fmt, ...) \ __drm_dbg(DRM_UT_DP, fmt, ## __VA_ARGS__) @@ -565,7 +536,6 @@ void __drm_err(const char *format, ...); #define drm_dbg_kms_ratelimited(drm, fmt, ...) \ __DRM_DEFINE_DBG_RATELIMITED(KMS, drm, fmt, ## __VA_ARGS__) -/* NOTE: this is deprecated in favor of drm_dbg_kms_ratelimited(NULL, ...). */ #define DRM_DEBUG_KMS_RATELIMITED(fmt, ...) drm_dbg_kms_ratelimited(NULL, fmt, ## __VA_ARGS__) /* diff --git a/include/drm/drm_probe_helper.h b/include/drm/drm_probe_helper.h index 48300aa6ca..04c57564c3 100644 --- a/include/drm/drm_probe_helper.h +++ b/include/drm/drm_probe_helper.h @@ -20,7 +20,6 @@ void drm_kms_helper_poll_fini(struct drm_device *dev); bool drm_helper_hpd_irq_event(struct drm_device *dev); bool drm_connector_helper_hpd_irq_event(struct drm_connector *connector); void drm_kms_helper_hotplug_event(struct drm_device *dev); -void drm_kms_helper_connector_hotplug_event(struct drm_connector *connector); void drm_kms_helper_poll_disable(struct drm_device *dev); void drm_kms_helper_poll_enable(struct drm_device *dev); diff --git a/include/drm/drm_sysfs.h b/include/drm/drm_sysfs.h index 6273cac44e..d454ef617b 100644 --- a/include/drm/drm_sysfs.h +++ b/include/drm/drm_sysfs.h @@ -11,7 +11,6 @@ int drm_class_device_register(struct device *dev); void drm_class_device_unregister(struct device *dev); void drm_sysfs_hotplug_event(struct drm_device *dev); -void drm_sysfs_connector_hotplug_event(struct drm_connector *connector); void drm_sysfs_connector_status_event(struct drm_connector *connector, struct drm_property *property); #endif diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index bbc22fad8d..88ae7f331b 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -27,13 +27,9 @@ #include #include #include -#include -#include #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) -struct drm_gem_object; - struct drm_gpu_scheduler; struct drm_sched_rq; @@ -54,147 +50,56 @@ enum drm_sched_priority { * struct drm_sched_entity - A wrapper around a job queue (typically * attached to the DRM file_priv). * + * @list: used to append this struct to the list of entities in the + * runqueue. + * @rq: runqueue on which this entity is currently scheduled. + * @sched_list: A list of schedulers (drm_gpu_schedulers). + * Jobs from this entity can be scheduled on any scheduler + * on this list. + * @num_sched_list: number of drm_gpu_schedulers in the sched_list. + * @priority: priority of the entity + * @rq_lock: lock to modify the runqueue to which this entity belongs. + * @job_queue: the list of jobs of this entity. + * @fence_seq: a linearly increasing seqno incremented with each + * new &drm_sched_fence which is part of the entity. + * @fence_context: a unique context for all the fences which belong + * to this entity. + * The &drm_sched_fence.scheduled uses the + * fence_context but &drm_sched_fence.finished uses + * fence_context + 1. + * @dependency: the dependency fence of the job which is on the top + * of the job queue. + * @cb: callback for the dependency fence above. + * @guilty: points to ctx's guilty. + * @fini_status: contains the exit status in case the process was signalled. + * @last_scheduled: points to the finished fence of the last scheduled job. + * @last_user: last group leader pushing a job into the entity. + * @stopped: Marks the enity as removed from rq and destined for termination. + * @entity_idle: Signals when enityt is not in use + * * Entities will emit jobs in order to their corresponding hardware * ring, and the scheduler will alternate between entities based on * scheduling policy. */ struct drm_sched_entity { - /** - * @list: - * - * Used to append this struct to the list of entities in the runqueue - * @rq under &drm_sched_rq.entities. - * - * Protected by &drm_sched_rq.lock of @rq. - */ struct list_head list; - - /** - * @rq: - * - * Runqueue on which this entity is currently scheduled. - * - * FIXME: Locking is very unclear for this. Writers are protected by - * @rq_lock, but readers are generally lockless and seem to just race - * with not even a READ_ONCE. - */ struct drm_sched_rq *rq; - - /** - * @sched_list: - * - * A list of schedulers (struct drm_gpu_scheduler). Jobs from this entity can - * be scheduled on any scheduler on this list. - * - * This can be modified by calling drm_sched_entity_modify_sched(). - * Locking is entirely up to the driver, see the above function for more - * details. - * - * This will be set to NULL if &num_sched_list equals 1 and @rq has been - * set already. - * - * FIXME: This means priority changes through - * drm_sched_entity_set_priority() will be lost henceforth in this case. - */ struct drm_gpu_scheduler **sched_list; - - /** - * @num_sched_list: - * - * Number of drm_gpu_schedulers in the @sched_list. - */ unsigned int num_sched_list; - - /** - * @priority: - * - * Priority of the entity. This can be modified by calling - * drm_sched_entity_set_priority(). Protected by &rq_lock. - */ enum drm_sched_priority priority; - - /** - * @rq_lock: - * - * Lock to modify the runqueue to which this entity belongs. - */ spinlock_t rq_lock; - /** - * @job_queue: the list of jobs of this entity. - */ struct spsc_queue job_queue; - /** - * @fence_seq: - * - * A linearly increasing seqno incremented with each new - * &drm_sched_fence which is part of the entity. - * - * FIXME: Callers of drm_sched_job_arm() need to ensure correct locking, - * this doesn't need to be atomic. - */ atomic_t fence_seq; - - /** - * @fence_context: - * - * A unique context for all the fences which belong to this entity. The - * &drm_sched_fence.scheduled uses the fence_context but - * &drm_sched_fence.finished uses fence_context + 1. - */ uint64_t fence_context; - /** - * @dependency: - * - * The dependency fence of the job which is on the top of the job queue. - */ struct dma_fence *dependency; - - /** - * @cb: - * - * Callback for the dependency fence above. - */ struct dma_fence_cb cb; - - /** - * @guilty: - * - * Points to entities' guilty. - */ atomic_t *guilty; - - /** - * @last_scheduled: - * - * Points to the finished fence of the last scheduled job. Only written - * by the scheduler thread, can be accessed locklessly from - * drm_sched_job_arm() iff the queue is empty. - */ struct dma_fence *last_scheduled; - - /** - * @last_user: last group leader pushing a job into the entity. - */ struct task_struct *last_user; - - /** - * @stopped: - * - * Marks the enity as removed from rq and destined for - * termination. This is set by calling drm_sched_entity_flush() and by - * drm_sched_fini(). - */ bool stopped; - - /** - * @entity_idle: - * - * Signals when entity is not in use, used to sequence entity cleanup in - * drm_sched_entity_fini(). - */ struct completion entity_idle; }; @@ -287,32 +192,12 @@ struct drm_sched_job { struct list_head list; struct drm_gpu_scheduler *sched; struct drm_sched_fence *s_fence; - - /* - * work is used only after finish_cb has been used and will not be - * accessed anymore. - */ - union { - struct dma_fence_cb finish_cb; - struct irq_work work; - }; - + struct dma_fence_cb finish_cb; uint64_t id; atomic_t karma; enum drm_sched_priority s_priority; struct drm_sched_entity *entity; struct dma_fence_cb cb; - /** - * @dependencies: - * - * Contains the dependencies as struct dma_fence for this job, see - * drm_sched_job_add_dependency() and - * drm_sched_job_add_implicit_dependencies(). - */ - struct xarray dependencies; - - /** @last_dependency: tracks @dependencies as they signal */ - unsigned long last_dependency; }; static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, @@ -335,15 +220,9 @@ enum drm_gpu_sched_stat { */ struct drm_sched_backend_ops { /** - * @dependency: - * - * Called when the scheduler is considering scheduling this job next, to - * get another struct dma_fence for this job to block on. Once it - * returns NULL, run_job() may be called. - * - * If a driver exclusively uses drm_sched_job_add_dependency() and - * drm_sched_job_add_implicit_dependencies() this can be ommitted and - * left as NULL. + * @dependency: Called when the scheduler is considering scheduling + * this job next, to get another struct dma_fence for this job to + * block on. Once it returns NULL, run_job() may be called. */ struct dma_fence *(*dependency)(struct drm_sched_job *sched_job, struct drm_sched_entity *s_entity); @@ -469,14 +348,6 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched); int drm_sched_job_init(struct drm_sched_job *job, struct drm_sched_entity *entity, void *owner); -void drm_sched_job_arm(struct drm_sched_job *job); -int drm_sched_job_add_dependency(struct drm_sched_job *job, - struct dma_fence *fence); -int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, - struct drm_gem_object *obj, - bool write); - - void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, struct drm_gpu_scheduler **sched_list, unsigned int num_sched_list); @@ -510,17 +381,14 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity); void drm_sched_entity_destroy(struct drm_sched_entity *entity); void drm_sched_entity_select_rq(struct drm_sched_entity *entity); struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity); -void drm_sched_entity_push_job(struct drm_sched_job *sched_job); +void drm_sched_entity_push_job(struct drm_sched_job *sched_job, + struct drm_sched_entity *entity); void drm_sched_entity_set_priority(struct drm_sched_entity *entity, enum drm_sched_priority priority); bool drm_sched_entity_is_ready(struct drm_sched_entity *entity); -struct drm_sched_fence *drm_sched_fence_alloc( +struct drm_sched_fence *drm_sched_fence_create( struct drm_sched_entity *s_entity, void *owner); -void drm_sched_fence_init(struct drm_sched_fence *fence, - struct drm_sched_entity *entity); -void drm_sched_fence_free(struct drm_sched_fence *fence); - void drm_sched_fence_scheduled(struct drm_sched_fence *fence); void drm_sched_fence_finished(struct drm_sched_fence *fence); diff --git a/include/drm/gud.h b/include/drm/gud.h index c52a8ba4ae..0b46b54fe5 100644 --- a/include/drm/gud.h +++ b/include/drm/gud.h @@ -246,12 +246,10 @@ struct gud_state_req { /* Get supported pixel formats as a byte array of GUD_PIXEL_FORMAT_* */ #define GUD_REQ_GET_FORMATS 0x40 #define GUD_FORMATS_MAX_NUM 32 - #define GUD_PIXEL_FORMAT_R1 0x01 /* 1-bit monochrome */ - #define GUD_PIXEL_FORMAT_R8 0x08 /* 8-bit greyscale */ + /* R1 is a 1-bit monochrome transfer format presented to userspace as XRGB8888 */ + #define GUD_PIXEL_FORMAT_R1 0x01 #define GUD_PIXEL_FORMAT_XRGB1111 0x20 - #define GUD_PIXEL_FORMAT_RGB332 0x30 #define GUD_PIXEL_FORMAT_RGB565 0x40 - #define GUD_PIXEL_FORMAT_RGB888 0x50 #define GUD_PIXEL_FORMAT_XRGB8888 0x80 #define GUD_PIXEL_FORMAT_ARGB8888 0x81 diff --git a/include/drm/i915_component.h b/include/drm/i915_component.h index c1e2a43d2d..55c3b12358 100644 --- a/include/drm/i915_component.h +++ b/include/drm/i915_component.h @@ -29,7 +29,6 @@ enum i915_component_type { I915_COMPONENT_AUDIO = 1, I915_COMPONENT_HDCP, - I915_COMPONENT_PXP }; /* MAX_PORT is the number of port diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index baf3d1d3d5..eee18fa53b 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -632,16 +632,18 @@ INTEL_VGA_DEVICE(0x4905, info), \ INTEL_VGA_DEVICE(0x4906, info), \ INTEL_VGA_DEVICE(0x4907, info), \ - INTEL_VGA_DEVICE(0x4908, info), \ - INTEL_VGA_DEVICE(0x4909, info) + INTEL_VGA_DEVICE(0x4908, info) /* ADL-S */ #define INTEL_ADLS_IDS(info) \ INTEL_VGA_DEVICE(0x4680, info), \ + INTEL_VGA_DEVICE(0x4681, info), \ INTEL_VGA_DEVICE(0x4682, info), \ + INTEL_VGA_DEVICE(0x4683, info), \ INTEL_VGA_DEVICE(0x4688, info), \ - INTEL_VGA_DEVICE(0x468A, info), \ + INTEL_VGA_DEVICE(0x4689, info), \ INTEL_VGA_DEVICE(0x4690, info), \ + INTEL_VGA_DEVICE(0x4691, info), \ INTEL_VGA_DEVICE(0x4692, info), \ INTEL_VGA_DEVICE(0x4693, info) @@ -666,13 +668,4 @@ INTEL_VGA_DEVICE(0x46C2, info), \ INTEL_VGA_DEVICE(0x46C3, info) -/* RPL-S */ -#define INTEL_RPLS_IDS(info) \ - INTEL_VGA_DEVICE(0xA780, info), \ - INTEL_VGA_DEVICE(0xA781, info), \ - INTEL_VGA_DEVICE(0xA782, info), \ - INTEL_VGA_DEVICE(0xA783, info), \ - INTEL_VGA_DEVICE(0xA788, info), \ - INTEL_VGA_DEVICE(0xA789, info) - #endif /* _I915_PCIIDS_H */ diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 67530bfef1..abfefaaf89 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -4,11 +4,9 @@ #ifndef _DRM_INTEL_GTT_H #define _DRM_INTEL_GTT_H -#include - -struct agp_bridge_data; -struct pci_dev; -struct sg_table; +#include +#include +#include void intel_gtt_get(u64 *gtt_total, phys_addr_t *mappable_base, diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index c17b2df917..36f7eb9d06 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -32,6 +32,7 @@ #define _TTM_BO_API_H_ #include +#include #include #include #include @@ -263,6 +264,18 @@ static inline int ttm_bo_wait_ctx(struct ttm_buffer_object *bo, struct ttm_opera return ttm_bo_wait(bo, ctx->interruptible, ctx->no_wait_gpu); } +/** + * ttm_bo_mem_compat - Check if proposed placement is compatible with a bo + * + * @placement: Return immediately if buffer is busy. + * @mem: The struct ttm_resource indicating the region where the bo resides + * @new_flags: Describes compatible placement found + * + * Returns true if the placement is compatible + */ +bool ttm_bo_mem_compat(struct ttm_placement *placement, struct ttm_resource *mem, + uint32_t *new_flags); + /** * ttm_bo_validate * @@ -350,10 +363,9 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * @bo: Pointer to a ttm_buffer_object to be initialized. * @size: Requested size of buffer object. * @type: Requested type of buffer object. - * @placement: Initial placement for buffer object. + * @flags: Initial placement flags. * @page_alignment: Data alignment in pages. * @ctx: TTM operation context for memory allocation. - * @sg: Scatter-gather table. * @resv: Pointer to a dma_resv, or NULL to let ttm allocate one. * @destroy: Destroy function. Use NULL for kfree(). * @@ -394,7 +406,7 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, * @bo: Pointer to a ttm_buffer_object to be initialized. * @size: Requested size of buffer object. * @type: Requested type of buffer object. - * @placement: Initial placement for buffer object. + * @flags: Initial placement flags. * @page_alignment: Data alignment in pages. * @interruptible: If needing to sleep to wait for GPU resources, * sleep interruptible. @@ -402,7 +414,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, * holds a pointer to a persistent shmem object. Typically, this would * point to the shmem object backing a GEM object if TTM is used to back a * GEM user interface. - * @sg: Scatter-gather table. * @resv: Pointer to a dma_resv, or NULL to let ttm allocate one. * @destroy: Destroy function. Use NULL for kfree(). * diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 5f08757519..68d6069572 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -322,7 +322,7 @@ int ttm_bo_tt_bind(struct ttm_buffer_object *bo, struct ttm_resource *mem); */ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo); -void ttm_move_memcpy(bool clear, +void ttm_move_memcpy(struct ttm_buffer_object *bo, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter); diff --git a/include/drm/ttm/ttm_caching.h b/include/drm/ttm/ttm_caching.h index 235a743d90..3c9dd65f5a 100644 --- a/include/drm/ttm/ttm_caching.h +++ b/include/drm/ttm/ttm_caching.h @@ -27,26 +27,9 @@ #define TTM_NUM_CACHING_TYPES 3 -/** - * enum ttm_caching - CPU caching and BUS snooping behavior. - */ enum ttm_caching { - /** - * @ttm_uncached: Most defensive option for device mappings, - * don't even allow write combining. - */ ttm_uncached, - - /** - * @ttm_write_combined: Don't cache read accesses, but allow at least - * writes to be combined. - */ ttm_write_combined, - - /** - * @ttm_cached: Fully cached like normal system memory, requires that - * devices snoop the CPU cache on accesses. - */ ttm_cached }; diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 0a4ddec78d..cd592f8e94 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -39,23 +39,31 @@ struct ttm_operation_ctx; /** * struct ttm_global - Buffer object driver global data. + * + * @dummy_read_page: Pointer to a dummy page used for mapping requests + * of unpopulated pages. + * @shrink: A shrink callback object used for buffer object swap. + * @device_list_mutex: Mutex protecting the device list. + * This mutex is held while traversing the device list for pm options. + * @lru_lock: Spinlock protecting the bo subsystem lru lists. + * @device_list: List of buffer object devices. + * @swap_lru: Lru list of buffer objects used for swapping. */ extern struct ttm_global { /** - * @dummy_read_page: Pointer to a dummy page used for mapping requests - * of unpopulated pages. Constant after init. + * Constant after init. */ + struct page *dummy_read_page; /** - * @device_list: List of buffer object devices. Protected by - * ttm_global_mutex. + * Protected by ttm_global_mutex. */ struct list_head device_list; /** - * @bo_count: Number of buffer objects allocated by devices. + * Internal protection. */ atomic_t bo_count; } ttm_glob; @@ -65,7 +73,7 @@ struct ttm_device_funcs { * ttm_tt_create * * @bo: The buffer object to create the ttm for. - * @page_flags: Page flags as identified by TTM_TT_FLAG_XX flags. + * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags. * * Create a struct ttm_tt to back data with system memory pages. * No pages are actually allocated. @@ -222,64 +230,49 @@ struct ttm_device_funcs { /** * struct ttm_device - Buffer object driver device-specific data. + * + * @device_list: Our entry in the global device list. + * @funcs: Function table for the device. + * @sysman: Resource manager for the system domain. + * @man_drv: An array of resource_managers. + * @vma_manager: Address space manager. + * @pool: page pool for the device. + * @dev_mapping: A pointer to the struct address_space representing the + * device address space. + * @wq: Work queue structure for the delayed delete workqueue. */ struct ttm_device { - /** - * @device_list: Our entry in the global device list. + /* * Constant after bo device init */ struct list_head device_list; - - /** - * @funcs: Function table for the device. - * Constant after bo device init - */ struct ttm_device_funcs *funcs; - /** - * @sysman: Resource manager for the system domain. + /* * Access via ttm_manager_type. */ struct ttm_resource_manager sysman; - - /** - * @man_drv: An array of resource_managers, one per resource type. - */ struct ttm_resource_manager *man_drv[TTM_NUM_MEM_TYPES]; - /** - * @vma_manager: Address space manager for finding BOs to mmap. + /* + * Protected by internal locks. */ struct drm_vma_offset_manager *vma_manager; - - /** - * @pool: page pool for the device. - */ struct ttm_pool pool; - /** - * @lru_lock: Protection for the per manager LRU and ddestroy lists. + /* + * Protection for the per manager LRU and ddestroy lists. */ spinlock_t lru_lock; - - /** - * @ddestroy: Destroyed but not yet cleaned up buffer objects. - */ struct list_head ddestroy; - /** - * @pinned: Buffer objects which are pinned and so not on any LRU list. - */ - struct list_head pinned; - - /** - * @dev_mapping: A pointer to the struct address_space for invalidating - * CPU mappings on buffer move. Protected by load/unload sync. + /* + * Protected by load / firstopen / lastclose /unload sync. */ struct address_space *dev_mapping; - /** - * @wq: Work queue structure for the delayed delete workqueue. + /* + * Internal protection. */ struct delayed_work wq; }; @@ -291,15 +284,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) { - BUILD_BUG_ON(__builtin_constant_p(mem_type) - && mem_type >= TTM_NUM_MEM_TYPES); return bdev->man_drv[mem_type]; } static inline void ttm_set_driver_manager(struct ttm_device *bdev, int type, struct ttm_resource_manager *manager) { - BUILD_BUG_ON(__builtin_constant_p(type) && type >= TTM_NUM_MEM_TYPES); bdev->man_drv[type] = manager; } @@ -308,6 +298,5 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs, struct drm_vma_offset_manager *vma_manager, bool use_dma_alloc, bool use_dma32); void ttm_device_fini(struct ttm_device *bdev); -void ttm_device_clear_dma_mappings(struct ttm_device *bdev); #endif diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h index 8074d0f6ca..8995c9e4ec 100644 --- a/include/drm/ttm/ttm_placement.h +++ b/include/drm/ttm/ttm_placement.h @@ -35,17 +35,6 @@ /* * Memory regions for data placement. - * - * Buffers placed in TTM_PL_SYSTEM are considered under TTMs control and can - * be swapped out whenever TTMs thinks it is a good idea. - * In cases where drivers would like to use TTM_PL_SYSTEM as a valid - * placement they need to be able to handle the issues that arise due to the - * above manually. - * - * For BO's which reside in system memory but for which the accelerator - * requires direct access (i.e. their usage needs to be synchronized - * between the CPU and accelerator via fences) a new, driver private - * placement that can handle such scenarios is a good idea. */ #define TTM_PL_SYSTEM 0 @@ -69,7 +58,6 @@ * * @fpfn: first valid page frame number to put the object * @lpfn: last valid page frame number to put the object - * @mem_type: One of TTM_PL_* where the resource should be allocated from. * @flags: memory domain and caching flags for the object * * Structure indicating a possible place to put an object. diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h index ef09b23d29..4321728bdd 100644 --- a/include/drm/ttm/ttm_pool.h +++ b/include/drm/ttm/ttm_pool.h @@ -37,7 +37,7 @@ struct ttm_pool; struct ttm_operation_ctx; /** - * struct ttm_pool_type - Pool for a certain memory type + * ttm_pool_type - Pool for a certain memory type * * @pool: the pool we belong to, might be NULL for the global ones * @order: the allocation order our pages have @@ -58,9 +58,8 @@ struct ttm_pool_type { }; /** - * struct ttm_pool - Pool for all caching and orders + * ttm_pool - Pool for all caching and orders * - * @dev: the device we allocate pages for * @use_dma_alloc: if coherent DMA allocations should be used * @use_dma32: if GFP_DMA32 should be used * @caching: pools for each caching/order diff --git a/include/drm/ttm/ttm_range_manager.h b/include/drm/ttm/ttm_range_manager.h index 7963b957e9..22b6fa42ac 100644 --- a/include/drm/ttm/ttm_range_manager.h +++ b/include/drm/ttm/ttm_range_manager.h @@ -4,7 +4,6 @@ #define _TTM_RANGE_MANAGER_H_ #include -#include #include /** @@ -34,23 +33,10 @@ to_ttm_range_mgr_node(struct ttm_resource *res) return container_of(res, struct ttm_range_mgr_node, base); } -int ttm_range_man_init_nocheck(struct ttm_device *bdev, +int ttm_range_man_init(struct ttm_device *bdev, unsigned type, bool use_tt, unsigned long p_size); -int ttm_range_man_fini_nocheck(struct ttm_device *bdev, +int ttm_range_man_fini(struct ttm_device *bdev, unsigned type); -static __always_inline int ttm_range_man_init(struct ttm_device *bdev, - unsigned int type, bool use_tt, - unsigned long p_size) -{ - BUILD_BUG_ON(__builtin_constant_p(type) && type >= TTM_NUM_MEM_TYPES); - return ttm_range_man_init_nocheck(bdev, type, use_tt, p_size); -} -static __always_inline int ttm_range_man_fini(struct ttm_device *bdev, - unsigned int type) -{ - BUILD_BUG_ON(__builtin_constant_p(type) && type >= TTM_NUM_MEM_TYPES); - return ttm_range_man_fini_nocheck(bdev, type); -} #endif diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 5952051091..140b6b9a8b 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -40,7 +40,6 @@ struct ttm_resource_manager; struct ttm_resource; struct ttm_place; struct ttm_buffer_object; -struct ttm_placement; struct dma_buf_map; struct io_mapping; struct sg_table; @@ -103,7 +102,10 @@ struct ttm_resource_manager_func { * struct ttm_resource_manager * * @use_type: The memory type is enabled. - * @use_tt: If a TT object should be used for the backing store. + * @flags: TTM_MEMTYPE_XX flags identifying the traits of the memory + * managed by this memory type. + * @gpu_offset: If used, the GPU offset of the first managed page of + * fixed memory or the first managed location in an aperture. * @size: Size of the managed region. * @func: structure pointer implementing the range manager. See above * @move_lock: lock for move fence @@ -141,7 +143,6 @@ struct ttm_resource_manager { * @addr: mapped virtual address * @offset: physical addr * @is_iomem: is this io memory ? - * @caching: See enum ttm_caching * * Structure indicating the bus placement of an object. */ @@ -265,8 +266,6 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo, const struct ttm_place *place, struct ttm_resource **res); void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res); -bool ttm_resource_compat(struct ttm_resource *res, - struct ttm_placement *placement); void ttm_resource_manager_init(struct ttm_resource_manager *man, unsigned long p_size); diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index f208321398..b20e89d321 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -38,70 +38,36 @@ struct ttm_resource; struct ttm_buffer_object; struct ttm_operation_ctx; +#define TTM_PAGE_FLAG_SWAPPED (1 << 4) +#define TTM_PAGE_FLAG_ZERO_ALLOC (1 << 6) +#define TTM_PAGE_FLAG_SG (1 << 8) +#define TTM_PAGE_FLAG_NO_RETRY (1 << 9) + +#define TTM_PAGE_FLAG_PRIV_POPULATED (1 << 31) + /** - * struct ttm_tt - This is a structure holding the pages, caching- and aperture - * binding status for a buffer object that isn't backed by fixed (VRAM / AGP) + * struct ttm_tt + * + * @pages: Array of pages backing the data. + * @page_flags: see TTM_PAGE_FLAG_* + * @num_pages: Number of pages in the page array. + * @sg: for SG objects via dma-buf + * @dma_address: The DMA (bus) addresses of the pages + * @swap_storage: Pointer to shmem struct file for swap storage. + * @pages_list: used by some page allocation backend + * @caching: The current caching state of the pages. + * + * This is a structure holding the pages, caching- and aperture binding + * status for a buffer object that isn't backed by fixed (VRAM / AGP) * memory. */ struct ttm_tt { - /** @pages: Array of pages backing the data. */ struct page **pages; - /** - * @page_flags: The page flags. - * - * Supported values: - * - * TTM_TT_FLAG_SWAPPED: Set by TTM when the pages have been unpopulated - * and swapped out by TTM. Calling ttm_tt_populate() will then swap the - * pages back in, and unset the flag. Drivers should in general never - * need to touch this. - * - * TTM_TT_FLAG_ZERO_ALLOC: Set if the pages will be zeroed on - * allocation. - * - * TTM_TT_FLAG_EXTERNAL: Set if the underlying pages were allocated - * externally, like with dma-buf or userptr. This effectively disables - * TTM swapping out such pages. Also important is to prevent TTM from - * ever directly mapping these pages. - * - * Note that enum ttm_bo_type.ttm_bo_type_sg objects will always enable - * this flag. - * - * TTM_TT_FLAG_EXTERNAL_MAPPABLE: Same behaviour as - * TTM_TT_FLAG_EXTERNAL, but with the reduced restriction that it is - * still valid to use TTM to map the pages directly. This is useful when - * implementing a ttm_tt backend which still allocates driver owned - * pages underneath(say with shmem). - * - * Note that since this also implies TTM_TT_FLAG_EXTERNAL, the usage - * here should always be: - * - * page_flags = TTM_TT_FLAG_EXTERNAL | - * TTM_TT_FLAG_EXTERNAL_MAPPABLE; - * - * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is - * set by TTM after ttm_tt_populate() has successfully returned, and is - * then unset when TTM calls ttm_tt_unpopulate(). - */ -#define TTM_TT_FLAG_SWAPPED (1 << 0) -#define TTM_TT_FLAG_ZERO_ALLOC (1 << 1) -#define TTM_TT_FLAG_EXTERNAL (1 << 2) -#define TTM_TT_FLAG_EXTERNAL_MAPPABLE (1 << 3) - -#define TTM_TT_FLAG_PRIV_POPULATED (1 << 31) uint32_t page_flags; - /** @num_pages: Number of pages in the page array. */ uint32_t num_pages; - /** @sg: for SG objects via dma-buf. */ struct sg_table *sg; - /** @dma_address: The DMA (bus) addresses of the pages. */ dma_addr_t *dma_address; - /** @swap_storage: Pointer to shmem struct file for swap storage. */ struct file *swap_storage; - /** - * @caching: The current caching state of the pages, see enum - * ttm_caching. - */ enum ttm_caching caching; }; @@ -119,7 +85,7 @@ struct ttm_kmap_iter_tt { static inline bool ttm_tt_is_populated(struct ttm_tt *tt) { - return tt->page_flags & TTM_TT_FLAG_PRIV_POPULATED; + return tt->page_flags & TTM_PAGE_FLAG_PRIV_POPULATED; } /** @@ -138,7 +104,7 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc); * * @ttm: The struct ttm_tt. * @bo: The buffer object we create the ttm for. - * @page_flags: Page flags as identified by TTM_TT_FLAG_XX flags. + * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags. * @caching: the desired caching state of the pages * * Create a struct ttm_tt to back data with system memory pages. @@ -161,15 +127,21 @@ int ttm_sg_tt_init(struct ttm_tt *ttm_dma, struct ttm_buffer_object *bo, void ttm_tt_fini(struct ttm_tt *ttm); /** - * ttm_tt_destroy: + * ttm_ttm_destroy: * - * @bdev: the ttm_device this object belongs to * @ttm: The struct ttm_tt. * * Unbind, unpopulate and destroy common struct ttm_tt. */ void ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm); +/** + * ttm_tt_destroy_common: + * + * Called from driver to destroy common path. + */ +void ttm_tt_destroy_common(struct ttm_device *bdev, struct ttm_tt *ttm); + /** * ttm_tt_swapin: * @@ -184,19 +156,15 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm, /** * ttm_tt_populate - allocate pages for a ttm * - * @bdev: the ttm_device this object belongs to * @ttm: Pointer to the ttm_tt structure - * @ctx: operation context for populating the tt object. * * Calls the driver method to allocate pages for a ttm */ -int ttm_tt_populate(struct ttm_device *bdev, struct ttm_tt *ttm, - struct ttm_operation_ctx *ctx); +int ttm_tt_populate(struct ttm_device *bdev, struct ttm_tt *ttm, struct ttm_operation_ctx *ctx); /** * ttm_tt_unpopulate - free pages from a ttm * - * @bdev: the ttm_device this object belongs to * @ttm: Pointer to the ttm_tt structure * * Calls the driver method to free all pages from a ttm @@ -213,7 +181,7 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm); */ static inline void ttm_tt_mark_for_clear(struct ttm_tt *ttm) { - ttm->page_flags |= TTM_TT_FLAG_ZERO_ALLOC; + ttm->page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; } void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages); @@ -229,7 +197,7 @@ struct ttm_kmap_iter *ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt, * * @bo: Buffer object we allocate the ttm for. * @bridge: The agp bridge this device is sitting on. - * @page_flags: Page flags as identified by TTM_TT_FLAG_XX flags. + * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags. * * * Create a TTM backend that uses the indicated AGP bridge as an aperture diff --git a/include/dt-bindings/clock/am4.h b/include/dt-bindings/clock/am4.h index 4be6c5961f..d961e7cb36 100644 --- a/include/dt-bindings/clock/am4.h +++ b/include/dt-bindings/clock/am4.h @@ -158,7 +158,6 @@ #define AM4_L3S_VPFE0_CLKCTRL AM4_L3S_CLKCTRL_INDEX(0x68) #define AM4_L3S_VPFE1_CLKCTRL AM4_L3S_CLKCTRL_INDEX(0x70) #define AM4_L3S_GPMC_CLKCTRL AM4_L3S_CLKCTRL_INDEX(0x220) -#define AM4_L3S_ADC1_CLKCTRL AM4_L3S_CLKCTRL_INDEX(0x230) #define AM4_L3S_MCASP0_CLKCTRL AM4_L3S_CLKCTRL_INDEX(0x238) #define AM4_L3S_MCASP1_CLKCTRL AM4_L3S_CLKCTRL_INDEX(0x240) #define AM4_L3S_MMC3_CLKCTRL AM4_L3S_CLKCTRL_INDEX(0x248) diff --git a/include/dt-bindings/clock/dra7.h b/include/dt-bindings/clock/dra7.h index 29ff6b8958..7d57063b8a 100644 --- a/include/dt-bindings/clock/dra7.h +++ b/include/dt-bindings/clock/dra7.h @@ -84,10 +84,17 @@ #define DRA7_L3_MAIN_2_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_L3_INSTR_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) +/* iva clocks */ +#define DRA7_IVA_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) +#define DRA7_SL2IF_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) + /* dss clocks */ #define DRA7_DSS_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_BB2D_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) +/* gpu clocks */ +#define DRA7_GPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) + /* l3init clocks */ #define DRA7_MMC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) #define DRA7_MMC2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) @@ -260,17 +267,10 @@ #define DRA7_L3INSTR_L3_MAIN_2_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_L3INSTR_L3_INSTR_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) -/* iva clocks */ -#define DRA7_IVA_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) -#define DRA7_SL2IF_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) - /* dss clocks */ #define DRA7_DSS_DSS_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_DSS_BB2D_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) -/* gpu clocks */ -#define DRA7_GPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) - /* l3init clocks */ #define DRA7_L3INIT_MMC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) #define DRA7_L3INIT_MMC2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) diff --git a/include/dt-bindings/clock/exynos4.h b/include/dt-bindings/clock/exynos4.h index acbfbab875..88ec3968b9 100644 --- a/include/dt-bindings/clock/exynos4.h +++ b/include/dt-bindings/clock/exynos4.h @@ -209,7 +209,6 @@ #define CLK_ACLK400_MCUISP 395 /* Exynos4x12 only */ #define CLK_MOUT_HDMI 396 #define CLK_MOUT_MIXER 397 -#define CLK_MOUT_VPLLSRC 398 /* gate clocks - ppmu */ #define CLK_PPMULEFT 400 @@ -237,10 +236,9 @@ #define CLK_DIV_C2C 458 /* Exynos4x12 only */ #define CLK_DIV_GDL 459 #define CLK_DIV_GDR 460 -#define CLK_DIV_CORE2 461 /* must be greater than maximal clock id */ -#define CLK_NR_CLKS 462 +#define CLK_NR_CLKS 461 /* Exynos4x12 ISP clocks */ #define CLK_ISP_FIMC_ISP 1 diff --git a/include/dt-bindings/clock/exynos5250.h b/include/dt-bindings/clock/exynos5250.h index 4680da7357..e259cc01f2 100644 --- a/include/dt-bindings/clock/exynos5250.h +++ b/include/dt-bindings/clock/exynos5250.h @@ -19,7 +19,6 @@ #define CLK_FOUT_EPLL 7 #define CLK_FOUT_VPLL 8 #define CLK_ARM_CLK 9 -#define CLK_DIV_ARM2 10 /* gate for special clocks (sclk) */ #define CLK_SCLK_CAM_BAYER 128 @@ -175,9 +174,8 @@ #define CLK_MOUT_ACLK300_DISP1_SUB 1027 #define CLK_MOUT_APLL 1028 #define CLK_MOUT_MPLL 1029 -#define CLK_MOUT_VPLLSRC 1030 /* must be greater than maximal clock id */ -#define CLK_NR_CLKS 1031 +#define CLK_NR_CLKS 1030 #endif /* _DT_BINDINGS_CLOCK_EXYNOS_5250_H */ diff --git a/include/dt-bindings/clock/imx8mp-clock.h b/include/dt-bindings/clock/imx8mp-clock.h index 235c7a00d3..43927a1b9e 100644 --- a/include/dt-bindings/clock/imx8mp-clock.h +++ b/include/dt-bindings/clock/imx8mp-clock.h @@ -117,6 +117,7 @@ #define IMX8MP_CLK_AUDIO_AHB 108 #define IMX8MP_CLK_MIPI_DSI_ESC_RX 109 #define IMX8MP_CLK_IPG_ROOT 110 +#define IMX8MP_CLK_IPG_AUDIO_ROOT 111 #define IMX8MP_CLK_DRAM_ALT 112 #define IMX8MP_CLK_DRAM_APB 113 #define IMX8MP_CLK_VPU_G1 114 diff --git a/include/dt-bindings/clock/jz4740-cgu.h b/include/dt-bindings/clock/jz4740-cgu.h index 43153d3e9b..e82d770285 100644 --- a/include/dt-bindings/clock/jz4740-cgu.h +++ b/include/dt-bindings/clock/jz4740-cgu.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This header provides clock numbers for the ingenic,jz4740-cgu DT binding. * @@ -33,5 +34,6 @@ #define JZ4740_CLK_ADC 19 #define JZ4740_CLK_I2C 20 #define JZ4740_CLK_AIC 21 +#define JZ4740_CLK_TCU 22 #endif /* __DT_BINDINGS_CLOCK_JZ4740_CGU_H__ */ diff --git a/include/dt-bindings/clock/jz4780-cgu.h b/include/dt-bindings/clock/jz4780-cgu.h index 467165e3cf..85cf8eb508 100644 --- a/include/dt-bindings/clock/jz4780-cgu.h +++ b/include/dt-bindings/clock/jz4780-cgu.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This header provides clock numbers for the ingenic,jz4780-cgu DT binding. * @@ -11,78 +12,80 @@ #ifndef __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ #define __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ -#define JZ4780_CLK_EXCLK 0 -#define JZ4780_CLK_RTCLK 1 -#define JZ4780_CLK_APLL 2 -#define JZ4780_CLK_MPLL 3 -#define JZ4780_CLK_EPLL 4 -#define JZ4780_CLK_VPLL 5 -#define JZ4780_CLK_OTGPHY 6 -#define JZ4780_CLK_SCLKA 7 -#define JZ4780_CLK_CPUMUX 8 -#define JZ4780_CLK_CPU 9 -#define JZ4780_CLK_L2CACHE 10 -#define JZ4780_CLK_AHB0 11 -#define JZ4780_CLK_AHB2PMUX 12 -#define JZ4780_CLK_AHB2 13 -#define JZ4780_CLK_PCLK 14 -#define JZ4780_CLK_DDR 15 -#define JZ4780_CLK_VPU 16 -#define JZ4780_CLK_I2SPLL 17 -#define JZ4780_CLK_I2S 18 +#define JZ4780_CLK_EXCLK 0 +#define JZ4780_CLK_RTCLK 1 +#define JZ4780_CLK_APLL 2 +#define JZ4780_CLK_MPLL 3 +#define JZ4780_CLK_EPLL 4 +#define JZ4780_CLK_VPLL 5 +#define JZ4780_CLK_OTGPHY 6 +#define JZ4780_CLK_SCLKA 7 +#define JZ4780_CLK_CPUMUX 8 +#define JZ4780_CLK_CPU 9 +#define JZ4780_CLK_L2CACHE 10 +#define JZ4780_CLK_AHB0 11 +#define JZ4780_CLK_AHB2PMUX 12 +#define JZ4780_CLK_AHB2 13 +#define JZ4780_CLK_PCLK 14 +#define JZ4780_CLK_DDR 15 +#define JZ4780_CLK_VPU 16 +#define JZ4780_CLK_I2SPLL 17 +#define JZ4780_CLK_I2S 18 #define JZ4780_CLK_LCD0PIXCLK 19 #define JZ4780_CLK_LCD1PIXCLK 20 -#define JZ4780_CLK_MSCMUX 21 -#define JZ4780_CLK_MSC0 22 -#define JZ4780_CLK_MSC1 23 -#define JZ4780_CLK_MSC2 24 -#define JZ4780_CLK_UHC 25 -#define JZ4780_CLK_SSIPLL 26 -#define JZ4780_CLK_SSI 27 -#define JZ4780_CLK_CIMMCLK 28 -#define JZ4780_CLK_PCMPLL 29 -#define JZ4780_CLK_PCM 30 -#define JZ4780_CLK_GPU 31 -#define JZ4780_CLK_HDMI 32 -#define JZ4780_CLK_BCH 33 -#define JZ4780_CLK_NEMC 34 -#define JZ4780_CLK_OTG0 35 -#define JZ4780_CLK_SSI0 36 -#define JZ4780_CLK_SMB0 37 -#define JZ4780_CLK_SMB1 38 -#define JZ4780_CLK_SCC 39 -#define JZ4780_CLK_AIC 40 -#define JZ4780_CLK_TSSI0 41 -#define JZ4780_CLK_OWI 42 -#define JZ4780_CLK_KBC 43 -#define JZ4780_CLK_SADC 44 -#define JZ4780_CLK_UART0 45 -#define JZ4780_CLK_UART1 46 -#define JZ4780_CLK_UART2 47 -#define JZ4780_CLK_UART3 48 -#define JZ4780_CLK_SSI1 49 -#define JZ4780_CLK_SSI2 50 -#define JZ4780_CLK_PDMA 51 -#define JZ4780_CLK_GPS 52 -#define JZ4780_CLK_MAC 53 -#define JZ4780_CLK_SMB2 54 -#define JZ4780_CLK_CIM 55 -#define JZ4780_CLK_LCD 56 -#define JZ4780_CLK_TVE 57 -#define JZ4780_CLK_IPU 58 -#define JZ4780_CLK_DDR0 59 -#define JZ4780_CLK_DDR1 60 -#define JZ4780_CLK_SMB3 61 -#define JZ4780_CLK_TSSI1 62 -#define JZ4780_CLK_COMPRESS 63 -#define JZ4780_CLK_AIC1 64 -#define JZ4780_CLK_GPVLC 65 -#define JZ4780_CLK_OTG1 66 -#define JZ4780_CLK_UART4 67 -#define JZ4780_CLK_AHBMON 68 -#define JZ4780_CLK_SMB4 69 -#define JZ4780_CLK_DES 70 -#define JZ4780_CLK_X2D 71 -#define JZ4780_CLK_CORE1 72 +#define JZ4780_CLK_MSCMUX 21 +#define JZ4780_CLK_MSC0 22 +#define JZ4780_CLK_MSC1 23 +#define JZ4780_CLK_MSC2 24 +#define JZ4780_CLK_UHC 25 +#define JZ4780_CLK_SSIPLL 26 +#define JZ4780_CLK_SSI 27 +#define JZ4780_CLK_CIMMCLK 28 +#define JZ4780_CLK_PCMPLL 29 +#define JZ4780_CLK_PCM 30 +#define JZ4780_CLK_GPU 31 +#define JZ4780_CLK_HDMI 32 +#define JZ4780_CLK_BCH 33 +#define JZ4780_CLK_NEMC 34 +#define JZ4780_CLK_OTG0 35 +#define JZ4780_CLK_SSI0 36 +#define JZ4780_CLK_SMB0 37 +#define JZ4780_CLK_SMB1 38 +#define JZ4780_CLK_SCC 39 +#define JZ4780_CLK_AIC 40 +#define JZ4780_CLK_TSSI0 41 +#define JZ4780_CLK_OWI 42 +#define JZ4780_CLK_KBC 43 +#define JZ4780_CLK_SADC 44 +#define JZ4780_CLK_UART0 45 +#define JZ4780_CLK_UART1 46 +#define JZ4780_CLK_UART2 47 +#define JZ4780_CLK_UART3 48 +#define JZ4780_CLK_SSI1 49 +#define JZ4780_CLK_SSI2 50 +#define JZ4780_CLK_PDMA 51 +#define JZ4780_CLK_GPS 52 +#define JZ4780_CLK_MAC 53 +#define JZ4780_CLK_SMB2 54 +#define JZ4780_CLK_CIM 55 +#define JZ4780_CLK_LCD 56 +#define JZ4780_CLK_TVE 57 +#define JZ4780_CLK_IPU 58 +#define JZ4780_CLK_DDR0 59 +#define JZ4780_CLK_DDR1 60 +#define JZ4780_CLK_SMB3 61 +#define JZ4780_CLK_TSSI1 62 +#define JZ4780_CLK_COMPRESS 63 +#define JZ4780_CLK_AIC1 64 +#define JZ4780_CLK_GPVLC 65 +#define JZ4780_CLK_OTG1 66 +#define JZ4780_CLK_UART4 67 +#define JZ4780_CLK_AHBMON 68 +#define JZ4780_CLK_SMB4 69 +#define JZ4780_CLK_DES 70 +#define JZ4780_CLK_X2D 71 +#define JZ4780_CLK_CORE1 72 +#define JZ4780_CLK_EXCLK_DIV512 73 +#define JZ4780_CLK_RTC 74 #endif /* __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ */ diff --git a/include/dt-bindings/clock/meson8b-clkc.h b/include/dt-bindings/clock/meson8b-clkc.h index 78aa07fd7c..f33781338e 100644 --- a/include/dt-bindings/clock/meson8b-clkc.h +++ b/include/dt-bindings/clock/meson8b-clkc.h @@ -105,16 +105,6 @@ #define CLKID_PERIPH 126 #define CLKID_AXI 128 #define CLKID_L2_DRAM 130 -#define CLKID_HDMI_PLL_HDMI_OUT 132 -#define CLKID_VID_PLL_FINAL_DIV 137 -#define CLKID_VCLK_IN_SEL 138 -#define CLKID_VCLK2_IN_SEL 149 -#define CLKID_CTS_ENCT 161 -#define CLKID_CTS_ENCP 163 -#define CLKID_CTS_ENCI 165 -#define CLKID_HDMI_TX_PIXEL 167 -#define CLKID_CTS_ENCL 169 -#define CLKID_CTS_VDAC0 171 #define CLKID_HDMI_SYS 174 #define CLKID_VPU 190 #define CLKID_VDEC_1 196 diff --git a/include/dt-bindings/clock/qcom,gcc-msm8994.h b/include/dt-bindings/clock/qcom,gcc-msm8994.h index f6836f430b..507b8d6eff 100644 --- a/include/dt-bindings/clock/qcom,gcc-msm8994.h +++ b/include/dt-bindings/clock/qcom,gcc-msm8994.h @@ -148,18 +148,6 @@ #define GCC_USB30_SLEEP_CLK 138 #define GCC_USB_HS_AHB_CLK 139 #define GCC_USB_PHY_CFG_AHB2PHY_CLK 140 -#define CONFIG_NOC_CLK_SRC 141 -#define PERIPH_NOC_CLK_SRC 142 -#define SYSTEM_NOC_CLK_SRC 143 -#define GPLL0_OUT_MMSSCC 144 -#define GPLL0_OUT_MSSCC 145 -#define PCIE_0_PHY_LDO 146 -#define PCIE_1_PHY_LDO 147 -#define UFS_PHY_LDO 148 -#define USB_SS_PHY_LDO 149 -#define GCC_BOOT_ROM_AHB_CLK 150 -#define GCC_PRNG_AHB_CLK 151 -#define GCC_USB3_PHY_PIPE_CLK 152 /* GDSCs */ #define PCIE_GDSC 0 @@ -174,6 +162,5 @@ #define PCIE_PHY_0_RESET 2 #define PCIE_PHY_1_RESET 3 #define QUSB2_PHY_RESET 4 -#define MSS_RESET 5 #endif diff --git a/include/dt-bindings/clock/qcom,rpmcc.h b/include/dt-bindings/clock/qcom,rpmcc.h index fb624ff392..aa834d5162 100644 --- a/include/dt-bindings/clock/qcom,rpmcc.h +++ b/include/dt-bindings/clock/qcom,rpmcc.h @@ -159,11 +159,5 @@ #define RPM_SMD_SNOC_PERIPH_A_CLK 113 #define RPM_SMD_SNOC_LPASS_CLK 114 #define RPM_SMD_SNOC_LPASS_A_CLK 115 -#define RPM_SMD_HWKM_CLK 116 -#define RPM_SMD_HWKM_A_CLK 117 -#define RPM_SMD_PKA_CLK 118 -#define RPM_SMD_PKA_A_CLK 119 -#define RPM_SMD_CPUSS_GNOC_CLK 120 -#define RPM_SMD_CPUSS_GNOC_A_CLK 121 #endif diff --git a/include/dt-bindings/clock/sun50i-a64-ccu.h b/include/dt-bindings/clock/sun50i-a64-ccu.h index 175892189e..318eb15c41 100644 --- a/include/dt-bindings/clock/sun50i-a64-ccu.h +++ b/include/dt-bindings/clock/sun50i-a64-ccu.h @@ -113,7 +113,7 @@ #define CLK_USB_OHCI0 91 #define CLK_USB_OHCI1 93 -#define CLK_DRAM 94 + #define CLK_DRAM_VE 95 #define CLK_DRAM_CSI 96 #define CLK_DRAM_DEINTERLACE 97 diff --git a/include/dt-bindings/clock/sun8i-h3-ccu.h b/include/dt-bindings/clock/sun8i-h3-ccu.h index 5d4ada2c22..30d2d15373 100644 --- a/include/dt-bindings/clock/sun8i-h3-ccu.h +++ b/include/dt-bindings/clock/sun8i-h3-ccu.h @@ -126,7 +126,7 @@ #define CLK_USB_OHCI1 93 #define CLK_USB_OHCI2 94 #define CLK_USB_OHCI3 95 -#define CLK_DRAM 96 + #define CLK_DRAM_VE 97 #define CLK_DRAM_CSI 98 #define CLK_DRAM_DEINTERLACE 99 diff --git a/include/dt-bindings/clock/tegra234-clock.h b/include/dt-bindings/clock/tegra234-clock.h index 8d7e66e1b6..2c82072950 100644 --- a/include/dt-bindings/clock/tegra234-clock.h +++ b/include/dt-bindings/clock/tegra234-clock.h @@ -4,31 +4,11 @@ #ifndef DT_BINDINGS_CLOCK_TEGRA234_CLOCK_H #define DT_BINDINGS_CLOCK_TEGRA234_CLOCK_H -/** - * @file - * @defgroup bpmp_clock_ids Clock ID's - * @{ - */ -/** - * @brief controls the EMC clock frequency. - * @details Doing a clk_set_rate on this clock will select the - * appropriate clock source, program the source rate and execute a - * specific sequence to switch to the new clock source for both memory - * controllers. This can be used to control the balance between memory - * throughput and memory controller power. - */ -#define TEGRA234_CLK_EMC 31U /** @brief output of gate CLK_ENB_FUSE */ -#define TEGRA234_CLK_FUSE 40U +#define TEGRA234_CLK_FUSE 40 /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC4 */ -#define TEGRA234_CLK_SDMMC4 123U +#define TEGRA234_CLK_SDMMC4 123 /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_UARTA */ -#define TEGRA234_CLK_UARTA 155U -/** @brief CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC_LEGACY_TM switch divider output */ -#define TEGRA234_CLK_SDMMC_LEGACY_TM 219U -/** @brief PLL controlled by CLK_RST_CONTROLLER_PLLC4_BASE */ -#define TEGRA234_CLK_PLLC4 237U -/** @brief 32K input clock provided by PMIC */ -#define TEGRA234_CLK_CLK_32K 289U +#define TEGRA234_CLK_UARTA 155 #endif diff --git a/include/dt-bindings/gpio/msc313-gpio.h b/include/dt-bindings/gpio/msc313-gpio.h index 5458c6580a..2dd56683d3 100644 --- a/include/dt-bindings/gpio/msc313-gpio.h +++ b/include/dt-bindings/gpio/msc313-gpio.h @@ -50,75 +50,4 @@ #define MSC313_GPIO_SPI0_DI (MSC313_GPIO_SPI0 + 2) #define MSC313_GPIO_SPI0_DO (MSC313_GPIO_SPI0 + 3) -/* SSD20x */ -#define SSD20XD_GPIO_FUART 0 -#define SSD20XD_GPIO_FUART_RX (SSD20XD_GPIO_FUART + 0) -#define SSD20XD_GPIO_FUART_TX (SSD20XD_GPIO_FUART + 1) -#define SSD20XD_GPIO_FUART_CTS (SSD20XD_GPIO_FUART + 2) -#define SSD20XD_GPIO_FUART_RTS (SSD20XD_GPIO_FUART + 3) - -#define SSD20XD_GPIO_SD (SSD20XD_GPIO_FUART_RTS + 1) -#define SSD20XD_GPIO_SD_CLK (SSD20XD_GPIO_SD + 0) -#define SSD20XD_GPIO_SD_CMD (SSD20XD_GPIO_SD + 1) -#define SSD20XD_GPIO_SD_D0 (SSD20XD_GPIO_SD + 2) -#define SSD20XD_GPIO_SD_D1 (SSD20XD_GPIO_SD + 3) -#define SSD20XD_GPIO_SD_D2 (SSD20XD_GPIO_SD + 4) -#define SSD20XD_GPIO_SD_D3 (SSD20XD_GPIO_SD + 5) - -#define SSD20XD_GPIO_UART0 (SSD20XD_GPIO_SD_D3 + 1) -#define SSD20XD_GPIO_UART0_RX (SSD20XD_GPIO_UART0 + 0) -#define SSD20XD_GPIO_UART0_TX (SSD20XD_GPIO_UART0 + 1) - -#define SSD20XD_GPIO_UART1 (SSD20XD_GPIO_UART0_TX + 1) -#define SSD20XD_GPIO_UART1_RX (SSD20XD_GPIO_UART1 + 0) -#define SSD20XD_GPIO_UART1_TX (SSD20XD_GPIO_UART1 + 1) - -#define SSD20XD_GPIO_TTL (SSD20XD_GPIO_UART1_TX + 1) -#define SSD20XD_GPIO_TTL0 (SSD20XD_GPIO_TTL + 0) -#define SSD20XD_GPIO_TTL1 (SSD20XD_GPIO_TTL + 1) -#define SSD20XD_GPIO_TTL2 (SSD20XD_GPIO_TTL + 2) -#define SSD20XD_GPIO_TTL3 (SSD20XD_GPIO_TTL + 3) -#define SSD20XD_GPIO_TTL4 (SSD20XD_GPIO_TTL + 4) -#define SSD20XD_GPIO_TTL5 (SSD20XD_GPIO_TTL + 5) -#define SSD20XD_GPIO_TTL6 (SSD20XD_GPIO_TTL + 6) -#define SSD20XD_GPIO_TTL7 (SSD20XD_GPIO_TTL + 7) -#define SSD20XD_GPIO_TTL8 (SSD20XD_GPIO_TTL + 8) -#define SSD20XD_GPIO_TTL9 (SSD20XD_GPIO_TTL + 9) -#define SSD20XD_GPIO_TTL10 (SSD20XD_GPIO_TTL + 10) -#define SSD20XD_GPIO_TTL11 (SSD20XD_GPIO_TTL + 11) -#define SSD20XD_GPIO_TTL12 (SSD20XD_GPIO_TTL + 12) -#define SSD20XD_GPIO_TTL13 (SSD20XD_GPIO_TTL + 13) -#define SSD20XD_GPIO_TTL14 (SSD20XD_GPIO_TTL + 14) -#define SSD20XD_GPIO_TTL15 (SSD20XD_GPIO_TTL + 15) -#define SSD20XD_GPIO_TTL16 (SSD20XD_GPIO_TTL + 16) -#define SSD20XD_GPIO_TTL17 (SSD20XD_GPIO_TTL + 17) -#define SSD20XD_GPIO_TTL18 (SSD20XD_GPIO_TTL + 18) -#define SSD20XD_GPIO_TTL19 (SSD20XD_GPIO_TTL + 19) -#define SSD20XD_GPIO_TTL20 (SSD20XD_GPIO_TTL + 20) -#define SSD20XD_GPIO_TTL21 (SSD20XD_GPIO_TTL + 21) -#define SSD20XD_GPIO_TTL22 (SSD20XD_GPIO_TTL + 22) -#define SSD20XD_GPIO_TTL23 (SSD20XD_GPIO_TTL + 23) -#define SSD20XD_GPIO_TTL24 (SSD20XD_GPIO_TTL + 24) -#define SSD20XD_GPIO_TTL25 (SSD20XD_GPIO_TTL + 25) -#define SSD20XD_GPIO_TTL26 (SSD20XD_GPIO_TTL + 26) -#define SSD20XD_GPIO_TTL27 (SSD20XD_GPIO_TTL + 27) - -#define SSD20XD_GPIO_GPIO (SSD20XD_GPIO_TTL27 + 1) -#define SSD20XD_GPIO_GPIO0 (SSD20XD_GPIO_GPIO + 0) -#define SSD20XD_GPIO_GPIO1 (SSD20XD_GPIO_GPIO + 1) -#define SSD20XD_GPIO_GPIO2 (SSD20XD_GPIO_GPIO + 2) -#define SSD20XD_GPIO_GPIO3 (SSD20XD_GPIO_GPIO + 3) -#define SSD20XD_GPIO_GPIO4 (SSD20XD_GPIO_GPIO + 4) -#define SSD20XD_GPIO_GPIO5 (SSD20XD_GPIO_GPIO + 5) -#define SSD20XD_GPIO_GPIO6 (SSD20XD_GPIO_GPIO + 6) -#define SSD20XD_GPIO_GPIO7 (SSD20XD_GPIO_GPIO + 7) -#define SSD20XD_GPIO_GPIO10 (SSD20XD_GPIO_GPIO + 8) -#define SSD20XD_GPIO_GPIO11 (SSD20XD_GPIO_GPIO + 9) -#define SSD20XD_GPIO_GPIO12 (SSD20XD_GPIO_GPIO + 10) -#define SSD20XD_GPIO_GPIO13 (SSD20XD_GPIO_GPIO + 11) -#define SSD20XD_GPIO_GPIO14 (SSD20XD_GPIO_GPIO + 12) -#define SSD20XD_GPIO_GPIO85 (SSD20XD_GPIO_GPIO + 13) -#define SSD20XD_GPIO_GPIO86 (SSD20XD_GPIO_GPIO + 14) -#define SSD20XD_GPIO_GPIO90 (SSD20XD_GPIO_GPIO + 15) - #endif /* _DT_BINDINGS_MSC313_GPIO_H */ diff --git a/include/dt-bindings/leds/common.h b/include/dt-bindings/leds/common.h index 3be89a7c20..52b619d44b 100644 --- a/include/dt-bindings/leds/common.h +++ b/include/dt-bindings/leds/common.h @@ -60,13 +60,6 @@ #define LED_FUNCTION_MICMUTE "micmute" #define LED_FUNCTION_MUTE "mute" -/* Used for player LEDs as found on game controllers from e.g. Nintendo, Sony. */ -#define LED_FUNCTION_PLAYER1 "player-1" -#define LED_FUNCTION_PLAYER2 "player-2" -#define LED_FUNCTION_PLAYER3 "player-3" -#define LED_FUNCTION_PLAYER4 "player-4" -#define LED_FUNCTION_PLAYER5 "player-5" - /* Miscelleaus functions. Use functions above if you can. */ #define LED_FUNCTION_ACTIVITY "activity" #define LED_FUNCTION_ALARM "alarm" diff --git a/include/dt-bindings/mailbox/qcom-ipcc.h b/include/dt-bindings/mailbox/qcom-ipcc.h index 9296d0bb5f..eb91a6c05b 100644 --- a/include/dt-bindings/mailbox/qcom-ipcc.h +++ b/include/dt-bindings/mailbox/qcom-ipcc.h @@ -8,7 +8,6 @@ /* Signal IDs for MPROC protocol */ #define IPCC_MPROC_SIGNAL_GLINK_QMP 0 -#define IPCC_MPROC_SIGNAL_TZ 1 #define IPCC_MPROC_SIGNAL_SMP2P 2 #define IPCC_MPROC_SIGNAL_PING 3 @@ -30,7 +29,6 @@ #define IPCC_CLIENT_PCIE1 14 #define IPCC_CLIENT_PCIE2 15 #define IPCC_CLIENT_SPSS 16 -#define IPCC_CLIENT_TME 23 #define IPCC_CLIENT_WPSS 24 #endif diff --git a/include/dt-bindings/mux/ti-serdes.h b/include/dt-bindings/mux/ti-serdes.h index d3116c52ab..d417b9268b 100644 --- a/include/dt-bindings/mux/ti-serdes.h +++ b/include/dt-bindings/mux/ti-serdes.h @@ -95,26 +95,4 @@ #define AM64_SERDES0_LANE0_PCIE0 0x0 #define AM64_SERDES0_LANE0_USB 0x1 -/* J721S2 */ - -#define J721S2_SERDES0_LANE0_EDP_LANE0 0x0 -#define J721S2_SERDES0_LANE0_PCIE1_LANE0 0x1 -#define J721S2_SERDES0_LANE0_IP3_UNUSED 0x2 -#define J721S2_SERDES0_LANE0_IP4_UNUSED 0x3 - -#define J721S2_SERDES0_LANE1_EDP_LANE1 0x0 -#define J721S2_SERDES0_LANE1_PCIE1_LANE1 0x1 -#define J721S2_SERDES0_LANE1_USB 0x2 -#define J721S2_SERDES0_LANE1_IP4_UNUSED 0x3 - -#define J721S2_SERDES0_LANE2_EDP_LANE2 0x0 -#define J721S2_SERDES0_LANE2_PCIE1_LANE2 0x1 -#define J721S2_SERDES0_LANE2_IP3_UNUSED 0x2 -#define J721S2_SERDES0_LANE2_IP4_UNUSED 0x3 - -#define J721S2_SERDES0_LANE3_EDP_LANE3 0x0 -#define J721S2_SERDES0_LANE3_PCIE1_LANE3 0x1 -#define J721S2_SERDES0_LANE3_USB 0x2 -#define J721S2_SERDES0_LANE3_IP4_UNUSED 0x3 - #endif /* _DT_BINDINGS_MUX_TI_SERDES */ diff --git a/include/dt-bindings/phy/phy-cadence.h b/include/dt-bindings/phy/phy-cadence.h index 0671991208..4652bcb862 100644 --- a/include/dt-bindings/phy/phy-cadence.h +++ b/include/dt-bindings/phy/phy-cadence.h @@ -6,18 +6,15 @@ #ifndef _DT_BINDINGS_CADENCE_SERDES_H #define _DT_BINDINGS_CADENCE_SERDES_H -#define CDNS_SERDES_NO_SSC 0 -#define CDNS_SERDES_EXTERNAL_SSC 1 -#define CDNS_SERDES_INTERNAL_SSC 2 - /* Torrent */ +#define TORRENT_SERDES_NO_SSC 0 +#define TORRENT_SERDES_EXTERNAL_SSC 1 +#define TORRENT_SERDES_INTERNAL_SSC 2 + #define CDNS_TORRENT_REFCLK_DRIVER 0 -#define CDNS_TORRENT_DERIVED_REFCLK 1 -#define CDNS_TORRENT_RECEIVED_REFCLK 2 /* Sierra */ #define CDNS_SIERRA_PLL_CMNLC 0 #define CDNS_SIERRA_PLL_CMNLC1 1 -#define CDNS_SIERRA_DERIVED_REFCLK 2 #endif /* _DT_BINDINGS_CADENCE_SERDES_H */ diff --git a/include/dt-bindings/pinctrl/k3.h b/include/dt-bindings/pinctrl/k3.h index 63e038e36c..e085f102b2 100644 --- a/include/dt-bindings/pinctrl/k3.h +++ b/include/dt-bindings/pinctrl/k3.h @@ -38,7 +38,4 @@ #define AM64X_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) #define AM64X_MCU_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) -#define J721S2_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) -#define J721S2_WKUP_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) - #endif diff --git a/include/dt-bindings/pinctrl/mt65xx.h b/include/dt-bindings/pinctrl/mt65xx.h index f5934abcd1..7e16e58fe1 100644 --- a/include/dt-bindings/pinctrl/mt65xx.h +++ b/include/dt-bindings/pinctrl/mt65xx.h @@ -16,15 +16,6 @@ #define MTK_PUPD_SET_R1R0_10 102 #define MTK_PUPD_SET_R1R0_11 103 -#define MTK_PULL_SET_RSEL_000 200 -#define MTK_PULL_SET_RSEL_001 201 -#define MTK_PULL_SET_RSEL_010 202 -#define MTK_PULL_SET_RSEL_011 203 -#define MTK_PULL_SET_RSEL_100 204 -#define MTK_PULL_SET_RSEL_101 205 -#define MTK_PULL_SET_RSEL_110 206 -#define MTK_PULL_SET_RSEL_111 207 - #define MTK_DRIVE_2mA 2 #define MTK_DRIVE_4mA 4 #define MTK_DRIVE_6mA 6 diff --git a/include/dt-bindings/pinctrl/samsung.h b/include/dt-bindings/pinctrl/samsung.h index 950970634d..b1832506b9 100644 --- a/include/dt-bindings/pinctrl/samsung.h +++ b/include/dt-bindings/pinctrl/samsung.h @@ -36,10 +36,7 @@ #define EXYNOS5260_PIN_DRV_LV4 2 #define EXYNOS5260_PIN_DRV_LV6 3 -/* - * Drive strengths for Exynos5410, Exynos542x, Exynos5800 and Exynos850 (except - * GPIO_HSI block) - */ +/* Drive strengths for Exynos5410, Exynos542x and Exynos5800 */ #define EXYNOS5420_PIN_DRV_LV1 0 #define EXYNOS5420_PIN_DRV_LV2 1 #define EXYNOS5420_PIN_DRV_LV3 2 @@ -59,14 +56,6 @@ #define EXYNOS5433_PIN_DRV_SLOW_SR5 0xc #define EXYNOS5433_PIN_DRV_SLOW_SR6 0xf -/* Drive strengths for Exynos850 GPIO_HSI block */ -#define EXYNOS850_HSI_PIN_DRV_LV1 0 /* 1x */ -#define EXYNOS850_HSI_PIN_DRV_LV1_5 1 /* 1.5x */ -#define EXYNOS850_HSI_PIN_DRV_LV2 2 /* 2x */ -#define EXYNOS850_HSI_PIN_DRV_LV2_5 3 /* 2.5x */ -#define EXYNOS850_HSI_PIN_DRV_LV3 4 /* 3x */ -#define EXYNOS850_HSI_PIN_DRV_LV4 5 /* 4x */ - #define EXYNOS_PIN_FUNC_INPUT 0 #define EXYNOS_PIN_FUNC_OUTPUT 1 #define EXYNOS_PIN_FUNC_2 2 diff --git a/include/dt-bindings/power/imx8mm-power.h b/include/dt-bindings/power/imx8mm-power.h index 648938f24c..fc9c2e16aa 100644 --- a/include/dt-bindings/power/imx8mm-power.h +++ b/include/dt-bindings/power/imx8mm-power.h @@ -19,13 +19,4 @@ #define IMX8MM_POWER_DOMAIN_DISPMIX 10 #define IMX8MM_POWER_DOMAIN_MIPI 11 -#define IMX8MM_VPUBLK_PD_G1 0 -#define IMX8MM_VPUBLK_PD_G2 1 -#define IMX8MM_VPUBLK_PD_H1 2 - -#define IMX8MM_DISPBLK_PD_CSI_BRIDGE 0 -#define IMX8MM_DISPBLK_PD_LCDIF 1 -#define IMX8MM_DISPBLK_PD_MIPI_DSI 2 -#define IMX8MM_DISPBLK_PD_MIPI_CSI 3 - #endif diff --git a/include/dt-bindings/power/imx8mn-power.h b/include/dt-bindings/power/imx8mn-power.h index eedd0e5819..102ee85a9b 100644 --- a/include/dt-bindings/power/imx8mn-power.h +++ b/include/dt-bindings/power/imx8mn-power.h @@ -12,9 +12,4 @@ #define IMX8MN_POWER_DOMAIN_DISPMIX 3 #define IMX8MN_POWER_DOMAIN_MIPI 4 -#define IMX8MN_DISPBLK_PD_MIPI_DSI 0 -#define IMX8MN_DISPBLK_PD_MIPI_CSI 1 -#define IMX8MN_DISPBLK_PD_LCDIF 2 -#define IMX8MN_DISPBLK_PD_ISI 3 - #endif diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h index edfc1ff2ac..4533dbbf99 100644 --- a/include/dt-bindings/power/qcom-rpmpd.h +++ b/include/dt-bindings/power/qcom-rpmpd.h @@ -20,14 +20,6 @@ #define SDX55_MX 1 #define SDX55_CX 2 -/* SM6350 Power Domain Indexes */ -#define SM6350_CX 0 -#define SM6350_GFX 1 -#define SM6350_LCX 2 -#define SM6350_LMX 3 -#define SM6350_MSS 4 -#define SM6350_MX 5 - /* SM8150 Power Domain Indexes */ #define SM8150_MSS 0 #define SM8150_EBI 1 @@ -68,21 +60,6 @@ #define SM8350_MXC_AO 11 #define SM8350_MSS 12 -/* SM8450 Power Domain Indexes */ -#define SM8450_CX 0 -#define SM8450_CX_AO 1 -#define SM8450_EBI 2 -#define SM8450_GFX 3 -#define SM8450_LCX 4 -#define SM8450_LMX 5 -#define SM8450_MMCX 6 -#define SM8450_MMCX_AO 7 -#define SM8450_MX 8 -#define SM8450_MX_AO 9 -#define SM8450_MXC 10 -#define SM8450_MXC_AO 11 -#define SM8450_MSS 12 - /* SC7180 Power Domain Indexes */ #define SC7180_CX 0 #define SC7180_CX_AO 1 @@ -156,15 +133,6 @@ #define MSM8916_VDDMX 3 #define MSM8916_VDDMX_AO 4 -/* MSM8953 Power Domain Indexes */ -#define MSM8953_VDDMD 0 -#define MSM8953_VDDMD_AO 1 -#define MSM8953_VDDCX 2 -#define MSM8953_VDDCX_AO 3 -#define MSM8953_VDDCX_VFL 4 -#define MSM8953_VDDMX 5 -#define MSM8953_VDDMX_AO 6 - /* MSM8976 Power Domain Indexes */ #define MSM8976_VDDCX 0 #define MSM8976_VDDCX_AO 1 @@ -234,24 +202,6 @@ #define SM6115_VDD_LPI_CX 6 #define SM6115_VDD_LPI_MX 7 -/* SM6125 Power Domains */ -#define SM6125_VDDCX 0 -#define SM6125_VDDCX_AO 1 -#define SM6125_VDDCX_VFL 2 -#define SM6125_VDDMX 3 -#define SM6125_VDDMX_AO 4 -#define SM6125_VDDMX_VFL 5 - -/* QCM2290 Power Domains */ -#define QCM2290_VDDCX 0 -#define QCM2290_VDDCX_AO 1 -#define QCM2290_VDDCX_VFL 2 -#define QCM2290_VDDMX 3 -#define QCM2290_VDDMX_AO 4 -#define QCM2290_VDDMX_VFL 5 -#define QCM2290_VDD_LPI_CX 6 -#define QCM2290_VDD_LPI_MX 7 - /* RPM SMD Power Domain performance levels */ #define RPM_SMD_LEVEL_RETENTION 16 #define RPM_SMD_LEVEL_RETENTION_PLUS 32 diff --git a/include/dt-bindings/reset/mt8173-resets.h b/include/dt-bindings/reset/mt8173-resets.h index 6a60c7cecc..ba8636eda5 100644 --- a/include/dt-bindings/reset/mt8173-resets.h +++ b/include/dt-bindings/reset/mt8173-resets.h @@ -27,8 +27,6 @@ #define MT8173_INFRA_GCE_FAXI_RST 40 #define MT8173_INFRA_MMIOMMURST 47 -/* MMSYS resets */ -#define MT8173_MMSYS_SW0_RST_B_DISP_DSI0 25 /* PERICFG resets */ #define MT8173_PERI_UART0_SW_RST 0 diff --git a/include/dt-bindings/reset/tegra234-reset.h b/include/dt-bindings/reset/tegra234-reset.h index 50e13bced6..b3c63be06d 100644 --- a/include/dt-bindings/reset/tegra234-reset.h +++ b/include/dt-bindings/reset/tegra234-reset.h @@ -4,15 +4,7 @@ #ifndef DT_BINDINGS_RESET_TEGRA234_RESET_H #define DT_BINDINGS_RESET_TEGRA234_RESET_H -/** - * @file - * @defgroup bpmp_reset_ids Reset ID's - * @brief Identifiers for Resets controllable by firmware - * @{ - */ -#define TEGRA234_RESET_SDMMC4 85U -#define TEGRA234_RESET_UARTA 100U - -/** @} */ +#define TEGRA234_RESET_SDMMC4 85 +#define TEGRA234_RESET_UARTA 100 #endif diff --git a/include/dt-bindings/sound/qcom,lpass.h b/include/dt-bindings/sound/qcom,lpass.h index a9404c3b88..7b0b80b386 100644 --- a/include/dt-bindings/sound/qcom,lpass.h +++ b/include/dt-bindings/sound/qcom,lpass.h @@ -10,37 +10,6 @@ #define LPASS_DP_RX 5 -#define LPASS_CDC_DMA_RX0 6 -#define LPASS_CDC_DMA_RX1 7 -#define LPASS_CDC_DMA_RX2 8 -#define LPASS_CDC_DMA_RX3 9 -#define LPASS_CDC_DMA_RX4 10 -#define LPASS_CDC_DMA_RX5 11 -#define LPASS_CDC_DMA_RX6 12 -#define LPASS_CDC_DMA_RX7 13 -#define LPASS_CDC_DMA_RX8 14 -#define LPASS_CDC_DMA_RX9 15 - -#define LPASS_CDC_DMA_TX0 16 -#define LPASS_CDC_DMA_TX1 17 -#define LPASS_CDC_DMA_TX2 18 -#define LPASS_CDC_DMA_TX3 19 -#define LPASS_CDC_DMA_TX4 20 -#define LPASS_CDC_DMA_TX5 21 -#define LPASS_CDC_DMA_TX6 22 -#define LPASS_CDC_DMA_TX7 23 -#define LPASS_CDC_DMA_TX8 24 - -#define LPASS_CDC_DMA_VA_TX0 25 -#define LPASS_CDC_DMA_VA_TX1 26 -#define LPASS_CDC_DMA_VA_TX2 27 -#define LPASS_CDC_DMA_VA_TX3 28 -#define LPASS_CDC_DMA_VA_TX4 29 -#define LPASS_CDC_DMA_VA_TX5 30 -#define LPASS_CDC_DMA_VA_TX6 31 -#define LPASS_CDC_DMA_VA_TX7 32 -#define LPASS_CDC_DMA_VA_TX8 33 - #define LPASS_MCLK0 0 #endif /* __DT_QCOM_LPASS_H */ diff --git a/include/dt-bindings/sound/qcom,q6afe.h b/include/dt-bindings/sound/qcom,q6afe.h index 9d5d89cfab..66c21ab03e 100644 --- a/include/dt-bindings/sound/qcom,q6afe.h +++ b/include/dt-bindings/sound/qcom,q6afe.h @@ -2,8 +2,207 @@ #ifndef __DT_BINDINGS_Q6_AFE_H__ #define __DT_BINDINGS_Q6_AFE_H__ -/* This file exists due to backward compatibility reasons, Please do not DELETE! */ +/* Audio Front End (AFE) virtual ports IDs */ +#define HDMI_RX 1 +#define SLIMBUS_0_RX 2 +#define SLIMBUS_0_TX 3 +#define SLIMBUS_1_RX 4 +#define SLIMBUS_1_TX 5 +#define SLIMBUS_2_RX 6 +#define SLIMBUS_2_TX 7 +#define SLIMBUS_3_RX 8 +#define SLIMBUS_3_TX 9 +#define SLIMBUS_4_RX 10 +#define SLIMBUS_4_TX 11 +#define SLIMBUS_5_RX 12 +#define SLIMBUS_5_TX 13 +#define SLIMBUS_6_RX 14 +#define SLIMBUS_6_TX 15 +#define PRIMARY_MI2S_RX 16 +#define PRIMARY_MI2S_TX 17 +#define SECONDARY_MI2S_RX 18 +#define SECONDARY_MI2S_TX 19 +#define TERTIARY_MI2S_RX 20 +#define TERTIARY_MI2S_TX 21 +#define QUATERNARY_MI2S_RX 22 +#define QUATERNARY_MI2S_TX 23 +#define PRIMARY_TDM_RX_0 24 +#define PRIMARY_TDM_TX_0 25 +#define PRIMARY_TDM_RX_1 26 +#define PRIMARY_TDM_TX_1 27 +#define PRIMARY_TDM_RX_2 28 +#define PRIMARY_TDM_TX_2 29 +#define PRIMARY_TDM_RX_3 30 +#define PRIMARY_TDM_TX_3 31 +#define PRIMARY_TDM_RX_4 32 +#define PRIMARY_TDM_TX_4 33 +#define PRIMARY_TDM_RX_5 34 +#define PRIMARY_TDM_TX_5 35 +#define PRIMARY_TDM_RX_6 36 +#define PRIMARY_TDM_TX_6 37 +#define PRIMARY_TDM_RX_7 38 +#define PRIMARY_TDM_TX_7 39 +#define SECONDARY_TDM_RX_0 40 +#define SECONDARY_TDM_TX_0 41 +#define SECONDARY_TDM_RX_1 42 +#define SECONDARY_TDM_TX_1 43 +#define SECONDARY_TDM_RX_2 44 +#define SECONDARY_TDM_TX_2 45 +#define SECONDARY_TDM_RX_3 46 +#define SECONDARY_TDM_TX_3 47 +#define SECONDARY_TDM_RX_4 48 +#define SECONDARY_TDM_TX_4 49 +#define SECONDARY_TDM_RX_5 50 +#define SECONDARY_TDM_TX_5 51 +#define SECONDARY_TDM_RX_6 52 +#define SECONDARY_TDM_TX_6 53 +#define SECONDARY_TDM_RX_7 54 +#define SECONDARY_TDM_TX_7 55 +#define TERTIARY_TDM_RX_0 56 +#define TERTIARY_TDM_TX_0 57 +#define TERTIARY_TDM_RX_1 58 +#define TERTIARY_TDM_TX_1 59 +#define TERTIARY_TDM_RX_2 60 +#define TERTIARY_TDM_TX_2 61 +#define TERTIARY_TDM_RX_3 62 +#define TERTIARY_TDM_TX_3 63 +#define TERTIARY_TDM_RX_4 64 +#define TERTIARY_TDM_TX_4 65 +#define TERTIARY_TDM_RX_5 66 +#define TERTIARY_TDM_TX_5 67 +#define TERTIARY_TDM_RX_6 68 +#define TERTIARY_TDM_TX_6 69 +#define TERTIARY_TDM_RX_7 70 +#define TERTIARY_TDM_TX_7 71 +#define QUATERNARY_TDM_RX_0 72 +#define QUATERNARY_TDM_TX_0 73 +#define QUATERNARY_TDM_RX_1 74 +#define QUATERNARY_TDM_TX_1 75 +#define QUATERNARY_TDM_RX_2 76 +#define QUATERNARY_TDM_TX_2 77 +#define QUATERNARY_TDM_RX_3 78 +#define QUATERNARY_TDM_TX_3 79 +#define QUATERNARY_TDM_RX_4 80 +#define QUATERNARY_TDM_TX_4 81 +#define QUATERNARY_TDM_RX_5 82 +#define QUATERNARY_TDM_TX_5 83 +#define QUATERNARY_TDM_RX_6 84 +#define QUATERNARY_TDM_TX_6 85 +#define QUATERNARY_TDM_RX_7 86 +#define QUATERNARY_TDM_TX_7 87 +#define QUINARY_TDM_RX_0 88 +#define QUINARY_TDM_TX_0 89 +#define QUINARY_TDM_RX_1 90 +#define QUINARY_TDM_TX_1 91 +#define QUINARY_TDM_RX_2 92 +#define QUINARY_TDM_TX_2 93 +#define QUINARY_TDM_RX_3 94 +#define QUINARY_TDM_TX_3 95 +#define QUINARY_TDM_RX_4 96 +#define QUINARY_TDM_TX_4 97 +#define QUINARY_TDM_RX_5 98 +#define QUINARY_TDM_TX_5 99 +#define QUINARY_TDM_RX_6 100 +#define QUINARY_TDM_TX_6 101 +#define QUINARY_TDM_RX_7 102 +#define QUINARY_TDM_TX_7 103 +#define DISPLAY_PORT_RX 104 +#define WSA_CODEC_DMA_RX_0 105 +#define WSA_CODEC_DMA_TX_0 106 +#define WSA_CODEC_DMA_RX_1 107 +#define WSA_CODEC_DMA_TX_1 108 +#define WSA_CODEC_DMA_TX_2 109 +#define VA_CODEC_DMA_TX_0 110 +#define VA_CODEC_DMA_TX_1 111 +#define VA_CODEC_DMA_TX_2 112 +#define RX_CODEC_DMA_RX_0 113 +#define TX_CODEC_DMA_TX_0 114 +#define RX_CODEC_DMA_RX_1 115 +#define TX_CODEC_DMA_TX_1 116 +#define RX_CODEC_DMA_RX_2 117 +#define TX_CODEC_DMA_TX_2 118 +#define RX_CODEC_DMA_RX_3 119 +#define TX_CODEC_DMA_TX_3 120 +#define RX_CODEC_DMA_RX_4 121 +#define TX_CODEC_DMA_TX_4 122 +#define RX_CODEC_DMA_RX_5 123 +#define TX_CODEC_DMA_TX_5 124 +#define RX_CODEC_DMA_RX_6 125 +#define RX_CODEC_DMA_RX_7 126 +#define QUINARY_MI2S_RX 127 +#define QUINARY_MI2S_TX 128 -#include +#define LPASS_CLK_ID_PRI_MI2S_IBIT 1 +#define LPASS_CLK_ID_PRI_MI2S_EBIT 2 +#define LPASS_CLK_ID_SEC_MI2S_IBIT 3 +#define LPASS_CLK_ID_SEC_MI2S_EBIT 4 +#define LPASS_CLK_ID_TER_MI2S_IBIT 5 +#define LPASS_CLK_ID_TER_MI2S_EBIT 6 +#define LPASS_CLK_ID_QUAD_MI2S_IBIT 7 +#define LPASS_CLK_ID_QUAD_MI2S_EBIT 8 +#define LPASS_CLK_ID_SPEAKER_I2S_IBIT 9 +#define LPASS_CLK_ID_SPEAKER_I2S_EBIT 10 +#define LPASS_CLK_ID_SPEAKER_I2S_OSR 11 +#define LPASS_CLK_ID_QUI_MI2S_IBIT 12 +#define LPASS_CLK_ID_QUI_MI2S_EBIT 13 +#define LPASS_CLK_ID_SEN_MI2S_IBIT 14 +#define LPASS_CLK_ID_SEN_MI2S_EBIT 15 +#define LPASS_CLK_ID_INT0_MI2S_IBIT 16 +#define LPASS_CLK_ID_INT1_MI2S_IBIT 17 +#define LPASS_CLK_ID_INT2_MI2S_IBIT 18 +#define LPASS_CLK_ID_INT3_MI2S_IBIT 19 +#define LPASS_CLK_ID_INT4_MI2S_IBIT 20 +#define LPASS_CLK_ID_INT5_MI2S_IBIT 21 +#define LPASS_CLK_ID_INT6_MI2S_IBIT 22 +#define LPASS_CLK_ID_QUI_MI2S_OSR 23 +#define LPASS_CLK_ID_PRI_PCM_IBIT 24 +#define LPASS_CLK_ID_PRI_PCM_EBIT 25 +#define LPASS_CLK_ID_SEC_PCM_IBIT 26 +#define LPASS_CLK_ID_SEC_PCM_EBIT 27 +#define LPASS_CLK_ID_TER_PCM_IBIT 28 +#define LPASS_CLK_ID_TER_PCM_EBIT 29 +#define LPASS_CLK_ID_QUAD_PCM_IBIT 30 +#define LPASS_CLK_ID_QUAD_PCM_EBIT 31 +#define LPASS_CLK_ID_QUIN_PCM_IBIT 32 +#define LPASS_CLK_ID_QUIN_PCM_EBIT 33 +#define LPASS_CLK_ID_QUI_PCM_OSR 34 +#define LPASS_CLK_ID_PRI_TDM_IBIT 35 +#define LPASS_CLK_ID_PRI_TDM_EBIT 36 +#define LPASS_CLK_ID_SEC_TDM_IBIT 37 +#define LPASS_CLK_ID_SEC_TDM_EBIT 38 +#define LPASS_CLK_ID_TER_TDM_IBIT 39 +#define LPASS_CLK_ID_TER_TDM_EBIT 40 +#define LPASS_CLK_ID_QUAD_TDM_IBIT 41 +#define LPASS_CLK_ID_QUAD_TDM_EBIT 42 +#define LPASS_CLK_ID_QUIN_TDM_IBIT 43 +#define LPASS_CLK_ID_QUIN_TDM_EBIT 44 +#define LPASS_CLK_ID_QUIN_TDM_OSR 45 +#define LPASS_CLK_ID_MCLK_1 46 +#define LPASS_CLK_ID_MCLK_2 47 +#define LPASS_CLK_ID_MCLK_3 48 +#define LPASS_CLK_ID_MCLK_4 49 +#define LPASS_CLK_ID_INTERNAL_DIGITAL_CODEC_CORE 50 +#define LPASS_CLK_ID_INT_MCLK_0 51 +#define LPASS_CLK_ID_INT_MCLK_1 52 +#define LPASS_CLK_ID_MCLK_5 53 +#define LPASS_CLK_ID_WSA_CORE_MCLK 54 +#define LPASS_CLK_ID_WSA_CORE_NPL_MCLK 55 +#define LPASS_CLK_ID_VA_CORE_MCLK 56 +#define LPASS_CLK_ID_TX_CORE_MCLK 57 +#define LPASS_CLK_ID_TX_CORE_NPL_MCLK 58 +#define LPASS_CLK_ID_RX_CORE_MCLK 59 +#define LPASS_CLK_ID_RX_CORE_NPL_MCLK 60 +#define LPASS_CLK_ID_VA_CORE_2X_MCLK 61 + +#define LPASS_HW_AVTIMER_VOTE 101 +#define LPASS_HW_MACRO_VOTE 102 +#define LPASS_HW_DCODEC_VOTE 103 + +#define Q6AFE_MAX_CLK_ID 104 + +#define LPASS_CLK_ATTRIBUTE_INVALID 0x0 +#define LPASS_CLK_ATTRIBUTE_COUPLE_NO 0x1 +#define LPASS_CLK_ATTRIBUTE_COUPLE_DIVIDEND 0x2 +#define LPASS_CLK_ATTRIBUTE_COUPLE_DIVISOR 0x3 #endif /* __DT_BINDINGS_Q6_AFE_H__ */ diff --git a/include/dt-bindings/sound/rt5640.h b/include/dt-bindings/sound/rt5640.h index 655f694638..154c9b4414 100644 --- a/include/dt-bindings/sound/rt5640.h +++ b/include/dt-bindings/sound/rt5640.h @@ -16,7 +16,6 @@ #define RT5640_JD_SRC_GPIO2 4 #define RT5640_JD_SRC_GPIO3 5 #define RT5640_JD_SRC_GPIO4 6 -#define RT5640_JD_SRC_HDA_HEADER 7 #define RT5640_OVCD_SF_0P5 0 #define RT5640_OVCD_SF_0P75 1 diff --git a/include/keys/asymmetric-type.h b/include/keys/asymmetric-type.h index 6c5d4963e1..c432fdb854 100644 --- a/include/keys/asymmetric-type.h +++ b/include/keys/asymmetric-type.h @@ -53,7 +53,7 @@ struct asymmetric_key_id { }; struct asymmetric_key_ids { - void *id[3]; + void *id[2]; }; extern bool asymmetric_key_id_same(const struct asymmetric_key_id *kid1, @@ -81,7 +81,6 @@ const struct public_key *asymmetric_key_public_key(const struct key *key) extern struct key *find_asymmetric_key(struct key *keyring, const struct asymmetric_key_id *id_0, const struct asymmetric_key_id *id_1, - const struct asymmetric_key_id *id_2, bool partial); /* diff --git a/include/kunit/test.h b/include/kunit/test.h index b26400731c..018e776a34 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -11,20 +11,11 @@ #include #include - -#include -#include -#include -#include -#include -#include +#include #include #include -#include -#include #include - -#include +#include struct kunit_resource; diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index f9ed4c171d..90f21898aa 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -13,6 +13,13 @@ #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) #define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) +DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); + +static __always_inline bool kvm_arm_support_pmu_v3(void) +{ + return static_branch_likely(&kvm_arm_pmu_available); +} + #ifdef CONFIG_HW_PERF_EVENTS struct kvm_pmc { @@ -29,13 +36,6 @@ struct kvm_pmu { struct irq_work overflow_work; }; -DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); - -static __always_inline bool kvm_arm_support_pmu_v3(void) -{ - return static_branch_likely(&kvm_arm_pmu_available); -} - #define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS) u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); @@ -65,11 +65,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); struct kvm_pmu { }; -static inline bool kvm_arm_support_pmu_v3(void) -{ - return false; -} - #define kvm_arm_pmu_irq_initialized(v) (false) static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index bb30a6803d..e602d848fc 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -5,11 +5,9 @@ #ifndef __KVM_ARM_VGIC_H #define __KVM_ARM_VGIC_H -#include +#include #include #include -#include -#include #include #include #include diff --git a/include/media/dmxdev.h b/include/media/dmxdev.h index 63219a6993..baafa3b8ac 100644 --- a/include/media/dmxdev.h +++ b/include/media/dmxdev.h @@ -21,6 +21,7 @@ #include #include +#include #include #include #include diff --git a/include/media/dvb_frontend.h b/include/media/dvb_frontend.h index e7c44870f2..0d76fa4551 100644 --- a/include/media/dvb_frontend.h +++ b/include/media/dvb_frontend.h @@ -364,10 +364,6 @@ struct dvb_frontend_internal_info { * allocated by the driver. * @init: callback function used to initialize the tuner device. * @sleep: callback function used to put the tuner to sleep. - * @suspend: callback function used to inform that the Kernel will - * suspend. - * @resume: callback function used to inform that the Kernel is - * resuming from suspend. * @write: callback function used by some demod legacy drivers to * allow other drivers to write data into their registers. * Should not be used on new drivers. @@ -447,8 +443,6 @@ struct dvb_frontend_ops { int (*init)(struct dvb_frontend* fe); int (*sleep)(struct dvb_frontend* fe); - int (*suspend)(struct dvb_frontend *fe); - int (*resume)(struct dvb_frontend *fe); int (*write)(struct dvb_frontend* fe, const u8 buf[], int len); @@ -761,8 +755,7 @@ void dvb_frontend_detach(struct dvb_frontend *fe); * &dvb_frontend_ops.tuner_ops.suspend\(\) is available, it calls it. Otherwise, * it will call &dvb_frontend_ops.tuner_ops.sleep\(\), if available. * - * It will also call &dvb_frontend_ops.suspend\(\) to put the demod to suspend, - * if available. Otherwise it will call &dvb_frontend_ops.sleep\(\). + * It will also call &dvb_frontend_ops.sleep\(\) to put the demod to suspend. * * The drivers should also call dvb_frontend_suspend\(\) as part of their * handler for the &device_driver.suspend\(\). @@ -776,9 +769,7 @@ int dvb_frontend_suspend(struct dvb_frontend *fe); * * This function resumes the usual operation of the tuner after resume. * - * In order to resume the frontend, it calls the demod - * &dvb_frontend_ops.resume\(\) if available. Otherwise it calls demod - * &dvb_frontend_ops.init\(\). + * In order to resume the frontend, it calls the demod &dvb_frontend_ops.init\(\). * * If &dvb_frontend_ops.tuner_ops.resume\(\) is available, It, it calls it. * Otherwise,t will call &dvb_frontend_ops.tuner_ops.init\(\), if available. diff --git a/include/media/i2c/mt9p031.h b/include/media/i2c/mt9p031.h index f933cd0be8..7c29c53aa9 100644 --- a/include/media/i2c/mt9p031.h +++ b/include/media/i2c/mt9p031.h @@ -10,7 +10,6 @@ struct v4l2_subdev; * @target_freq: Pixel clock frequency */ struct mt9p031_platform_data { - unsigned int pixclk_pol:1; int ext_freq; int target_freq; }; diff --git a/include/media/media-entity.h b/include/media/media-entity.h index fea489f03d..09737b4788 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -13,11 +13,10 @@ #include #include -#include #include +#include #include #include -#include /* Enums used internally at the media controller to represent graphs */ diff --git a/include/media/rc-core.h b/include/media/rc-core.h index ab9d3b7cd7..8c5b7978e1 100644 --- a/include/media/rc-core.h +++ b/include/media/rc-core.h @@ -59,6 +59,7 @@ enum rc_filter_type { * @rc: rcdev for this lirc chardev * @carrier_low: when setting the carrier range, first the low end must be * set with an ioctl and then the high end with another ioctl + * @send_timeout_reports: report timeouts in lirc raw IR. * @rawir: queue for incoming raw IR * @scancodes: queue for incoming decoded scancodes * @wait_poll: poll struct for lirc device @@ -71,6 +72,7 @@ struct lirc_fh { struct list_head list; struct rc_dev *rc; int carrier_low; + bool send_timeout_reports; DECLARE_KFIFO_PTR(rawir, unsigned int); DECLARE_KFIFO_PTR(scancodes, struct lirc_scancode); wait_queue_head_t wait_poll; diff --git a/include/media/tuner.h b/include/media/tuner.h index a7796e0a36..ff85d7227f 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -132,7 +132,6 @@ #define TUNER_SONY_BTF_PG472Z 89 /* PAL+SECAM */ #define TUNER_SONY_BTF_PK467Z 90 /* NTSC_JP */ #define TUNER_SONY_BTF_PB463Z 91 /* NTSC */ -#define TUNER_SI2157 92 /* tv card specific */ #define TDA9887_PRESENT (1<<0) diff --git a/include/media/v4l2-async.h b/include/media/v4l2-async.h index 13ff3ad948..fa49011626 100644 --- a/include/media/v4l2-async.h +++ b/include/media/v4l2-async.h @@ -123,45 +123,45 @@ struct v4l2_async_notifier { void v4l2_async_debug_init(struct dentry *debugfs_dir); /** - * v4l2_async_nf_init - Initialize a notifier. + * v4l2_async_notifier_init - Initialize a notifier. * * @notifier: pointer to &struct v4l2_async_notifier * * This function initializes the notifier @asd_list. It must be called * before adding a subdevice to a notifier, using one of: - * v4l2_async_nf_add_fwnode_remote(), - * v4l2_async_nf_add_fwnode(), - * v4l2_async_nf_add_i2c(), - * __v4l2_async_nf_add_subdev() or - * v4l2_async_nf_parse_fwnode_endpoints(). + * v4l2_async_notifier_add_fwnode_remote_subdev(), + * v4l2_async_notifier_add_fwnode_subdev(), + * v4l2_async_notifier_add_i2c_subdev(), + * __v4l2_async_notifier_add_subdev() or + * v4l2_async_notifier_parse_fwnode_endpoints(). */ -void v4l2_async_nf_init(struct v4l2_async_notifier *notifier); +void v4l2_async_notifier_init(struct v4l2_async_notifier *notifier); /** - * __v4l2_async_nf_add_subdev - Add an async subdev to the + * __v4l2_async_notifier_add_subdev - Add an async subdev to the * notifier's master asd list. * * @notifier: pointer to &struct v4l2_async_notifier * @asd: pointer to &struct v4l2_async_subdev * * \warning: Drivers should avoid using this function and instead use one of: - * v4l2_async_nf_add_fwnode(), - * v4l2_async_nf_add_fwnode_remote() or - * v4l2_async_nf_add_i2c(). + * v4l2_async_notifier_add_fwnode_subdev(), + * v4l2_async_notifier_add_fwnode_remote_subdev() or + * v4l2_async_notifier_add_i2c_subdev(). * * Call this function before registering a notifier to link the provided @asd to * the notifiers master @asd_list. The @asd must be allocated with k*alloc() as * it will be freed by the framework when the notifier is destroyed. */ -int __v4l2_async_nf_add_subdev(struct v4l2_async_notifier *notifier, - struct v4l2_async_subdev *asd); +int __v4l2_async_notifier_add_subdev(struct v4l2_async_notifier *notifier, + struct v4l2_async_subdev *asd); struct v4l2_async_subdev * -__v4l2_async_nf_add_fwnode(struct v4l2_async_notifier *notifier, - struct fwnode_handle *fwnode, - unsigned int asd_struct_size); +__v4l2_async_notifier_add_fwnode_subdev(struct v4l2_async_notifier *notifier, + struct fwnode_handle *fwnode, + unsigned int asd_struct_size); /** - * v4l2_async_nf_add_fwnode - Allocate and add a fwnode async + * v4l2_async_notifier_add_fwnode_subdev - Allocate and add a fwnode async * subdev to the notifier's master asd_list. * * @notifier: pointer to &struct v4l2_async_notifier @@ -175,15 +175,16 @@ __v4l2_async_nf_add_fwnode(struct v4l2_async_notifier *notifier, * notifiers @asd_list. The function also gets a reference of the fwnode which * is released later at notifier cleanup time. */ -#define v4l2_async_nf_add_fwnode(notifier, fwnode, type) \ - ((type *)__v4l2_async_nf_add_fwnode(notifier, fwnode, sizeof(type))) +#define v4l2_async_notifier_add_fwnode_subdev(notifier, fwnode, type) \ + ((type *)__v4l2_async_notifier_add_fwnode_subdev(notifier, fwnode, \ + sizeof(type))) struct v4l2_async_subdev * -__v4l2_async_nf_add_fwnode_remote(struct v4l2_async_notifier *notif, - struct fwnode_handle *endpoint, - unsigned int asd_struct_size); +__v4l2_async_notifier_add_fwnode_remote_subdev(struct v4l2_async_notifier *notif, + struct fwnode_handle *endpoint, + unsigned int asd_struct_size); /** - * v4l2_async_nf_add_fwnode_remote - Allocate and add a fwnode + * v4l2_async_notifier_add_fwnode_remote_subdev - Allocate and add a fwnode * remote async subdev to the * notifier's master asd_list. * @@ -199,18 +200,20 @@ __v4l2_async_nf_add_fwnode_remote(struct v4l2_async_notifier *notif, * function also gets a reference of the fwnode which is released later at * notifier cleanup time. * - * This is just like v4l2_async_nf_add_fwnode(), but with the + * This is just like v4l2_async_notifier_add_fwnode_subdev(), but with the * exception that the fwnode refers to a local endpoint, not the remote one. */ -#define v4l2_async_nf_add_fwnode_remote(notifier, ep, type) \ - ((type *)__v4l2_async_nf_add_fwnode_remote(notifier, ep, sizeof(type))) +#define v4l2_async_notifier_add_fwnode_remote_subdev(notifier, ep, type) \ + ((type *) \ + __v4l2_async_notifier_add_fwnode_remote_subdev(notifier, ep, \ + sizeof(type))) struct v4l2_async_subdev * -__v4l2_async_nf_add_i2c(struct v4l2_async_notifier *notifier, - int adapter_id, unsigned short address, - unsigned int asd_struct_size); +__v4l2_async_notifier_add_i2c_subdev(struct v4l2_async_notifier *notifier, + int adapter_id, unsigned short address, + unsigned int asd_struct_size); /** - * v4l2_async_nf_add_i2c - Allocate and add an i2c async + * v4l2_async_notifier_add_i2c_subdev - Allocate and add an i2c async * subdev to the notifier's master asd_list. * * @notifier: pointer to &struct v4l2_async_notifier @@ -220,59 +223,59 @@ __v4l2_async_nf_add_i2c(struct v4l2_async_notifier *notifier, * v4l2_async_subdev shall be the first member of the driver's async * sub-device struct, i.e. both begin at the same memory address. * - * Same as v4l2_async_nf_add_fwnode() but for I2C matched + * Same as v4l2_async_notifier_add_fwnode_subdev() but for I2C matched * sub-devices. */ -#define v4l2_async_nf_add_i2c(notifier, adapter, address, type) \ - ((type *)__v4l2_async_nf_add_i2c(notifier, adapter, address, \ - sizeof(type))) +#define v4l2_async_notifier_add_i2c_subdev(notifier, adapter, address, type) \ + ((type *)__v4l2_async_notifier_add_i2c_subdev(notifier, adapter, \ + address, sizeof(type))) /** - * v4l2_async_nf_register - registers a subdevice asynchronous notifier + * v4l2_async_notifier_register - registers a subdevice asynchronous notifier * * @v4l2_dev: pointer to &struct v4l2_device * @notifier: pointer to &struct v4l2_async_notifier */ -int v4l2_async_nf_register(struct v4l2_device *v4l2_dev, - struct v4l2_async_notifier *notifier); +int v4l2_async_notifier_register(struct v4l2_device *v4l2_dev, + struct v4l2_async_notifier *notifier); /** - * v4l2_async_subdev_nf_register - registers a subdevice asynchronous + * v4l2_async_subdev_notifier_register - registers a subdevice asynchronous * notifier for a sub-device * * @sd: pointer to &struct v4l2_subdev * @notifier: pointer to &struct v4l2_async_notifier */ -int v4l2_async_subdev_nf_register(struct v4l2_subdev *sd, - struct v4l2_async_notifier *notifier); +int v4l2_async_subdev_notifier_register(struct v4l2_subdev *sd, + struct v4l2_async_notifier *notifier); /** - * v4l2_async_nf_unregister - unregisters a subdevice + * v4l2_async_notifier_unregister - unregisters a subdevice * asynchronous notifier * * @notifier: pointer to &struct v4l2_async_notifier */ -void v4l2_async_nf_unregister(struct v4l2_async_notifier *notifier); +void v4l2_async_notifier_unregister(struct v4l2_async_notifier *notifier); /** - * v4l2_async_nf_cleanup - clean up notifier resources + * v4l2_async_notifier_cleanup - clean up notifier resources * @notifier: the notifier the resources of which are to be cleaned up * * Release memory resources related to a notifier, including the async * sub-devices allocated for the purposes of the notifier but not the notifier * itself. The user is responsible for calling this function to clean up the * notifier after calling - * v4l2_async_nf_add_fwnode_remote(), - * v4l2_async_nf_add_fwnode(), - * v4l2_async_nf_add_i2c(), - * __v4l2_async_nf_add_subdev() or - * v4l2_async_nf_parse_fwnode_endpoints(). + * v4l2_async_notifier_add_fwnode_remote_subdev(), + * v4l2_async_notifier_add_fwnode_subdev(), + * v4l2_async_notifier_add_i2c_subdev(), + * __v4l2_async_notifier_add_subdev() or + * v4l2_async_notifier_parse_fwnode_endpoints(). * - * There is no harm from calling v4l2_async_nf_cleanup() in other + * There is no harm from calling v4l2_async_notifier_cleanup() in other * cases as long as its memory has been zeroed after it has been * allocated. */ -void v4l2_async_nf_cleanup(struct v4l2_async_notifier *notifier); +void v4l2_async_notifier_cleanup(struct v4l2_async_notifier *notifier); /** * v4l2_async_register_subdev - registers a sub-device to the asynchronous @@ -292,7 +295,7 @@ int v4l2_async_register_subdev(struct v4l2_subdev *sd); * * This function is just like v4l2_async_register_subdev() with the exception * that calling it will also parse firmware interfaces for remote references - * using v4l2_async_nf_parse_fwnode_sensor() and registers the + * using v4l2_async_notifier_parse_fwnode_sensor() and registers the * async sub-devices. The sub-device is similarly unregistered by calling * v4l2_async_unregister_subdev(). * diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index f4105de8a8..ebd9cef133 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -50,8 +50,6 @@ struct video_device; * @p_h264_decode_params: Pointer to a struct v4l2_ctrl_h264_decode_params. * @p_h264_pred_weights: Pointer to a struct v4l2_ctrl_h264_pred_weights. * @p_vp8_frame: Pointer to a VP8 frame params structure. - * @p_vp9_compressed_hdr_probs: Pointer to a VP9 frame compressed header probs structure. - * @p_vp9_frame: Pointer to a VP9 frame params structure. * @p_hevc_sps: Pointer to an HEVC sequence parameter set structure. * @p_hevc_pps: Pointer to an HEVC picture parameter set structure. * @p_hevc_slice_params: Pointer to an HEVC slice parameters structure. @@ -82,8 +80,6 @@ union v4l2_ctrl_ptr { struct v4l2_ctrl_hevc_sps *p_hevc_sps; struct v4l2_ctrl_hevc_pps *p_hevc_pps; struct v4l2_ctrl_hevc_slice_params *p_hevc_slice_params; - struct v4l2_ctrl_vp9_compressed_hdr *p_vp9_compressed_hdr_probs; - struct v4l2_ctrl_vp9_frame *p_vp9_frame; struct v4l2_ctrl_hdr10_cll_info *p_hdr10_cll; struct v4l2_ctrl_hdr10_mastering_display *p_hdr10_mastering; struct v4l2_area *p_area; diff --git a/include/media/v4l2-fwnode.h b/include/media/v4l2-fwnode.h index 9c97f1dbd1..7ab033b819 100644 --- a/include/media/v4l2-fwnode.h +++ b/include/media/v4l2-fwnode.h @@ -463,7 +463,7 @@ typedef int (*parse_endpoint_func)(struct device *dev, struct v4l2_async_subdev *asd); /** - * v4l2_async_nf_parse_fwnode_endpoints - Parse V4L2 fwnode endpoints in a + * v4l2_async_notifier_parse_fwnode_endpoints - Parse V4L2 fwnode endpoints in a * device node * @dev: the device the endpoints of which are to be parsed * @notifier: notifier for @dev @@ -496,7 +496,7 @@ typedef int (*parse_endpoint_func)(struct device *dev, * to retain that configuration, the user needs to allocate memory for it. * * Any notifier populated using this function must be released with a call to - * v4l2_async_nf_cleanup() after it has been unregistered and the async + * v4l2_async_notifier_cleanup() after it has been unregistered and the async * sub-devices are no longer in use, even if the function returned an error. * * Return: %0 on success, including when no async sub-devices are found @@ -505,10 +505,10 @@ typedef int (*parse_endpoint_func)(struct device *dev, * Other error codes as returned by @parse_endpoint */ int -v4l2_async_nf_parse_fwnode_endpoints(struct device *dev, - struct v4l2_async_notifier *notifier, - size_t asd_struct_size, - parse_endpoint_func parse_endpoint); +v4l2_async_notifier_parse_fwnode_endpoints(struct device *dev, + struct v4l2_async_notifier *notifier, + size_t asd_struct_size, + parse_endpoint_func parse_endpoint); /* Helper macros to access the connector links. */ diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h index fdbd5257e0..5a91b548ec 100644 --- a/include/media/v4l2-mem2mem.h +++ b/include/media/v4l2-mem2mem.h @@ -495,11 +495,6 @@ __poll_t v4l2_m2m_poll(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, int v4l2_m2m_mmap(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct vm_area_struct *vma); -#ifndef CONFIG_MMU -unsigned long v4l2_m2m_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); -#endif /** * v4l2_m2m_init() - initialize per-driver m2m data * diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index f636bfb5ad..4b72d0e16e 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -267,10 +267,10 @@ struct vb2_buffer { * after the 'buf_finish' op is called. * copied_timestamp: the timestamp of this capture buffer was copied * from an output buffer. - * skip_cache_sync_on_prepare: when set buffer's ->prepare() function - * skips cache sync/invalidation. - * skip_cache_sync_on_finish: when set buffer's ->finish() function - * skips cache sync/invalidation. + * need_cache_sync_on_prepare: when set buffer's ->prepare() function + * performs cache sync/invalidation. + * need_cache_sync_on_finish: when set buffer's ->finish() function + * performs cache sync/invalidation. * queued_entry: entry on the queued buffers list, which holds * all buffers queued from userspace * done_entry: entry on the list that stores all buffers ready @@ -281,8 +281,8 @@ struct vb2_buffer { unsigned int synced:1; unsigned int prepared:1; unsigned int copied_timestamp:1; - unsigned int skip_cache_sync_on_prepare:1; - unsigned int skip_cache_sync_on_finish:1; + unsigned int need_cache_sync_on_prepare:1; + unsigned int need_cache_sync_on_finish:1; struct vb2_plane planes[VB2_MAX_PLANES]; struct list_head queued_entry; @@ -504,8 +504,6 @@ struct vb2_buf_ops { * @allow_cache_hints: when set user-space can pass cache management hints in * order to skip cache flush/invalidation on ->prepare() or/and * ->finish(). - * @non_coherent_mem: when set queue will attempt to allocate buffers using - * non-coherent memory. * @lock: pointer to a mutex that protects the &struct vb2_queue. The * driver can set this to a mutex to let the v4l2 core serialize * the queuing ioctls. If the driver wants to handle locking @@ -585,7 +583,6 @@ struct vb2_queue { unsigned int uses_qbuf:1; unsigned int uses_requests:1; unsigned int allow_cache_hints:1; - unsigned int non_coherent_mem:1; struct mutex *lock; void *owner; @@ -751,8 +748,6 @@ void vb2_core_querybuf(struct vb2_queue *q, unsigned int index, void *pb); * vb2_core_reqbufs() - Initiate streaming. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @memory: memory type, as defined by &enum vb2_memory. - * @flags: auxiliary queue/buffer management flags. Currently, the only - * used flag is %V4L2_MEMORY_FLAG_NON_COHERENT. * @count: requested buffer count. * * Videobuf2 core helper to implement VIDIOC_REQBUF() operation. It is called @@ -777,13 +772,12 @@ void vb2_core_querybuf(struct vb2_queue *q, unsigned int index, void *pb); * Return: returns zero on success; an error code otherwise. */ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, - unsigned int flags, unsigned int *count); + unsigned int *count); /** * vb2_core_create_bufs() - Allocate buffers and any required auxiliary structs * @q: pointer to &struct vb2_queue with videobuf2 queue. * @memory: memory type, as defined by &enum vb2_memory. - * @flags: auxiliary queue/buffer management flags. * @count: requested buffer count. * @requested_planes: number of planes requested. * @requested_sizes: array with the size of the planes. @@ -801,7 +795,7 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, * Return: returns zero on success; an error code otherwise. */ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, - unsigned int flags, unsigned int *count, + unsigned int *count, unsigned int requested_planes, const unsigned int requested_sizes[]); @@ -1170,15 +1164,8 @@ static inline void *vb2_get_drv_priv(struct vb2_queue *q) static inline void vb2_set_plane_payload(struct vb2_buffer *vb, unsigned int plane_no, unsigned long size) { - /* - * size must never be larger than the buffer length, so - * warn and clamp to the buffer length if that's the case. - */ - if (plane_no < vb->num_planes) { - if (WARN_ON_ONCE(size > vb->planes[plane_no].length)) - size = vb->planes[plane_no].length; + if (plane_no < vb->num_planes) vb->planes[plane_no].bytesused = size; - } } /** diff --git a/include/memory/renesas-rpc-if.h b/include/memory/renesas-rpc-if.h index 7c93f51775..77c694a191 100644 --- a/include/memory/renesas-rpc-if.h +++ b/include/memory/renesas-rpc-if.h @@ -57,11 +57,6 @@ struct rpcif_op { } data; }; -enum rpcif_type { - RPCIF_RCAR_GEN3, - RPCIF_RZ_G2L, -}; - struct rpcif { struct device *dev; void __iomem *base; @@ -69,7 +64,6 @@ struct rpcif { struct regmap *regmap; struct reset_control *rstc; size_t size; - enum rpcif_type type; enum rpcif_data_dir dir; u8 bus_size; void *buffer; @@ -84,7 +78,7 @@ struct rpcif { }; int rpcif_sw_init(struct rpcif *rpc, struct device *dev); -int rpcif_hw_init(struct rpcif *rpc, bool hyperflash); +void rpcif_hw_init(struct rpcif *rpc, bool hyperflash); void rpcif_prepare(struct rpcif *rpc, const struct rpcif_op *op, u64 *offs, size_t *len); int rpcif_manual_xfer(struct rpcif *rpc); diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index d0337a4114..0bdbc0d17d 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -358,6 +358,7 @@ TRACE_EVENT(aer_event, EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ EM ( MF_MSG_SLAB, "kernel slab page" ) \ EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \ + EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" ) \ EM ( MF_MSG_HUGE, "huge page" ) \ EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ EM ( MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page" ) \ @@ -372,6 +373,7 @@ TRACE_EVENT(aer_event, EM ( MF_MSG_CLEAN_LRU, "clean LRU page" ) \ EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \ EM ( MF_MSG_BUDDY, "free buddy page" ) \ + EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \ EM ( MF_MSG_DAX, "dax page" ) \ EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \ EMe ( MF_MSG_UNKNOWN, "unknown page" ) diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 8ae07c0ecd..7e54220586 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -232,7 +232,6 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr) #define IB_BTH_SE_SHIFT 23 #define IB_BTH_TVER_MASK 0xf #define IB_BTH_TVER_SHIFT 16 -#define IB_BTH_OPCODE_CNP 0x81 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) { diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 2e3843b761..465b0d0bda 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -276,7 +276,6 @@ enum ib_port_capability_mask2_bits { IB_PORT_SWITCH_PORT_STATE_TABLE_SUP = 1 << 3, IB_PORT_LINK_WIDTH_2X_SUP = 1 << 4, IB_PORT_LINK_SPEED_HDR_SUP = 1 << 5, - IB_PORT_LINK_SPEED_NDR_SUP = 1 << 10, }; #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26) diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index fc16b826b2..fdb8633cba 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -144,15 +144,5 @@ ib_get_smp_direction(struct ib_smp *smp) #define IB_NOTICE_TRAP_DR_NOTICE 0x80 #define IB_NOTICE_TRAP_DR_TRUNC 0x40 -/** - * ib_init_query_mad - Initialize query MAD. - * @mad: MAD to initialize. - */ -static inline void ib_init_query_mad(struct ib_smp *mad) -{ - mad->base_version = IB_MGMT_BASE_VERSION; - mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - mad->class_version = 1; - mad->method = IB_MGMT_METHOD_GET; -} + #endif /* IB_SMI_H */ diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 92a673cd9b..5ae9dff74d 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -38,7 +38,6 @@ struct ib_umem_dmabuf { unsigned long first_sg_offset; unsigned long last_sg_trim; void *private; - u8 pinned : 1; }; static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem) @@ -140,10 +139,6 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, unsigned long offset, size_t size, int fd, int access, const struct dma_buf_attach_ops *ops); -struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, - unsigned long offset, - size_t size, int fd, - int access); int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); @@ -184,12 +179,6 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, { return ERR_PTR(-EOPNOTSUPP); } -static inline struct ib_umem_dmabuf * -ib_umem_dmabuf_get_pinned(struct ib_device *device, unsigned long offset, - size_t size, int fd, int access) -{ - return ERR_PTR(-EOPNOTSUPP); -} static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) { return -EOPNOTSUPP; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 69d883f7fb..4ba642fc8a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -545,22 +545,6 @@ enum ib_port_speed { IB_SPEED_NDR = 128, }; -enum ib_stat_flag { - IB_STAT_FLAG_OPTIONAL = 1 << 0, -}; - -/** - * struct rdma_stat_desc - * @name - The name of the counter - * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL - * @priv - Driver private information; Core code should not use - */ -struct rdma_stat_desc { - const char *name; - unsigned int flags; - const void *priv; -}; - /** * struct rdma_hw_stats * @lock - Mutex to protect parallel write access to lifespan and values @@ -571,10 +555,8 @@ struct rdma_stat_desc { * should be before being updated again. Stored in jiffies, defaults * to 10 milliseconds, drivers can override the default be specifying * their own value during their allocation routine. - * @descs - Array of pointers to static descriptors used for the counters - * in directory. - * @is_disabled - A bitmap to indicate each counter is currently disabled - * or not. + * @name - Array of pointers to static names used for the counters in + * directory. * @num_counters - How many hardware counters there are. If name is * shorter than this number, a kernel oops will result. Driver authors * are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters) @@ -586,19 +568,36 @@ struct rdma_hw_stats { struct mutex lock; /* Protect lifespan and values[] */ unsigned long timestamp; unsigned long lifespan; - const struct rdma_stat_desc *descs; - unsigned long *is_disabled; + const char * const *names; int num_counters; u64 value[]; }; #define RDMA_HW_STATS_DEFAULT_LIFESPAN 10 +/** + * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct + * for drivers. + * @names - Array of static const char * + * @num_counters - How many elements in array + * @lifespan - How many milliseconds between updates + */ +static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( + const char * const *names, int num_counters, + unsigned long lifespan) +{ + struct rdma_hw_stats *stats; -struct rdma_hw_stats *rdma_alloc_hw_stats_struct( - const struct rdma_stat_desc *descs, int num_counters, - unsigned long lifespan); + stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64), + GFP_KERNEL); + if (!stats) + return NULL; + stats->names = names; + stats->num_counters = num_counters; + stats->lifespan = msecs_to_jiffies(lifespan); + + return stats; +} -void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats); /* Define bits for the various functionality this port needs to be supported by * the core. @@ -2570,13 +2569,6 @@ struct ib_device_ops { int (*get_hw_stats)(struct ib_device *device, struct rdma_hw_stats *stats, u32 port, int index); - /** - * modify_hw_stat - Modify the counter configuration - * @enable: true/false when enable/disable a counter - * Return codes - 0 on success or error code otherwise. - */ - int (*modify_hw_stat)(struct ib_device *device, u32 port, - unsigned int counter_index, bool enable); /** * Allows rdma drivers to add their own restrack attributes. */ @@ -2914,15 +2906,6 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, size_t length, u32 min_pgoff, u32 max_pgoff); -static inline int -rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext, - struct rdma_user_mmap_entry *entry, - size_t length, u32 pgoff) -{ - return rdma_user_mmap_entry_insert_range(ucontext, entry, length, pgoff, - pgoff); -} - struct rdma_user_mmap_entry * rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, unsigned long pgoff); @@ -4749,23 +4732,6 @@ static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn) return (u32)(v & IB_GRH_FLOWLABEL_MASK); } -/** - * rdma_get_udp_sport - Calculate and set UDP source port based on the flow - * label. If flow label is not defined in GRH then - * calculate it based on lqpn/rqpn. - * - * @fl: flow label from GRH - * @lqpn: local qp number - * @rqpn: remote qp number - */ -static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) -{ - if (!fl) - fl = rdma_calc_flow_label(lqpn, rqpn); - - return rdma_flow_label_to_udp_sport(fl); -} - const struct ib_port_immutable* ib_port_immutable_read(struct ib_device *dev, unsigned int port); #endif /* IB_VERBS_H */ diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h index 45d5481a78..0295b22cd1 100644 --- a/include/rdma/rdma_counter.h +++ b/include/rdma/rdma_counter.h @@ -63,6 +63,4 @@ int rdma_counter_get_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mode *mode, enum rdma_nl_counter_mask *mask); -int rdma_counter_modify(struct ib_device *dev, u32 port, - unsigned int index, bool enable); #endif /* _RDMA_COUNTER_H_ */ diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 698f203280..6fe125a71b 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -356,7 +356,6 @@ enum sas_ha_state { SAS_HA_DRAINING, SAS_HA_ATA_EH_ACTIVE, SAS_HA_FROZEN, - SAS_HA_RESUMING, }; struct sas_ha_struct { @@ -661,12 +660,10 @@ extern int sas_register_ha(struct sas_ha_struct *); extern int sas_unregister_ha(struct sas_ha_struct *); extern void sas_prep_resume_ha(struct sas_ha_struct *sas_ha); extern void sas_resume_ha(struct sas_ha_struct *sas_ha); -extern void sas_resume_ha_no_sync(struct sas_ha_struct *sas_ha); extern void sas_suspend_ha(struct sas_ha_struct *sas_ha); int sas_set_phy_speed(struct sas_phy *phy, struct sas_phy_linkrates *rates); int sas_phy_reset(struct sas_phy *phy, int hard_reset); -int sas_phy_enable(struct sas_phy *phy, int enable); extern int sas_queuecommand(struct Scsi_Host *, struct scsi_cmnd *); extern int sas_target_alloc(struct scsi_target *); extern int sas_slave_configure(struct scsi_device *); diff --git a/include/scsi/sas.h b/include/scsi/sas.h index 64154c1fed..4726c1bbec 100644 --- a/include/scsi/sas.h +++ b/include/scsi/sas.h @@ -323,10 +323,8 @@ struct ssp_response_iu { __be32 sense_data_len; __be32 response_data_len; - union { - DECLARE_FLEX_ARRAY(u8, resp_data); - DECLARE_FLEX_ARRAY(u8, sense_data); - }; + u8 resp_data[0]; + u8 sense_data[]; } __attribute__ ((packed)); struct ssp_command_iu { @@ -556,10 +554,8 @@ struct ssp_response_iu { __be32 sense_data_len; __be32 response_data_len; - union { - DECLARE_FLEX_ARRAY(u8, resp_data); - DECLARE_FLEX_ARRAY(u8, sense_data); - }; + u8 resp_data[0]; + u8 sense_data[]; } __attribute__ ((packed)); struct ssp_command_iu { diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 6794d7322c..59afe8787c 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -10,6 +10,7 @@ #include #include #include +#include #include struct Scsi_Host; @@ -64,12 +65,6 @@ struct scsi_pointer { #define SCMD_STATE_COMPLETE 0 #define SCMD_STATE_INFLIGHT 1 -enum scsi_cmnd_submitter { - SUBMITTED_BY_BLOCK_LAYER = 0, - SUBMITTED_BY_SCSI_ERROR_HANDLER = 1, - SUBMITTED_BY_SCSI_RESET_IOCTL = 2, -} __packed; - struct scsi_cmnd { struct scsi_request req; struct scsi_device *device; @@ -95,7 +90,6 @@ struct scsi_cmnd { unsigned char prot_op; unsigned char prot_type; unsigned char prot_flags; - enum scsi_cmnd_submitter submitter; unsigned short cmd_len; enum dma_data_direction sc_data_direction; @@ -123,6 +117,10 @@ struct scsi_cmnd { * command (auto-sense). Length must be * SCSI_SENSE_BUFFERSIZE bytes. */ + /* Low-level done function - can be used by low-level driver to point + * to completion function. Not used by mid/upper level code. */ + void (*scsi_done) (struct scsi_cmnd *); + /* * The following fields can be written to by the host specific code. * Everything else should be left alone. @@ -164,11 +162,9 @@ static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); - return *(struct scsi_driver **)rq->q->disk->private_data; + return *(struct scsi_driver **)rq->rq_disk->private_data; } -void scsi_done(struct scsi_cmnd *cmd); - extern void scsi_finish_command(struct scsi_cmnd *cmd); extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count, @@ -400,7 +396,4 @@ static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd) extern void scsi_build_sense(struct scsi_cmnd *scmd, int desc, u8 key, u8 asc, u8 ascq); -struct request *scsi_alloc_request(struct request_queue *q, - unsigned int op, blk_mq_req_flags_t flags); - #endif /* _SCSI_SCSI_CMND_H */ diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 647c53b261..b97e142a7c 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -207,7 +207,6 @@ struct scsi_device { * creation time */ unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */ - unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ atomic_t disk_events_disable_depth; /* disable depth for disk events */ @@ -275,9 +274,9 @@ scmd_printk(const char *, const struct scsi_cmnd *, const char *, ...); do { \ struct request *__rq = scsi_cmd_to_rq((scmd)); \ \ - if (__rq->q->disk) \ + if (__rq->rq_disk) \ sdev_dbg((scmd)->device, "[%s] " fmt, \ - __rq->q->disk->disk_name, ##a); \ + __rq->rq_disk->disk_name, ##a); \ else \ sdev_dbg((scmd)->device, fmt, ##a); \ } while (0) @@ -415,8 +414,9 @@ extern int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, int retries, struct scsi_mode_data *data, struct scsi_sense_hdr *); extern int scsi_mode_select(struct scsi_device *sdev, int pf, int sp, - unsigned char *buffer, int len, int timeout, - int retries, struct scsi_mode_data *data, + int modepage, unsigned char *buffer, int len, + int timeout, int retries, + struct scsi_mode_data *data, struct scsi_sense_hdr *); extern int scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, struct scsi_sense_hdr *sshdr); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 72e1a347ba..1a02e58eb4 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -474,9 +474,14 @@ struct scsi_host_template { #define SCSI_DEFAULT_HOST_BLOCKED 7 /* - * Pointer to the SCSI host sysfs attribute groups, NULL terminated. + * Pointer to the sysfs class properties for this host, NULL terminated. */ - const struct attribute_group **shost_groups; + struct device_attribute **shost_attrs; + + /* + * Pointer to the SCSI device properties for this host, NULL terminated. + */ + struct device_attribute **sdev_attrs; /* * Pointer to the SCSI device attribute groups for this host, @@ -511,7 +516,7 @@ struct scsi_host_template { unsigned long irq_flags; \ int rc; \ spin_lock_irqsave(shost->host_lock, irq_flags); \ - rc = func_name##_lck(cmd); \ + rc = func_name##_lck (cmd, cmd->scsi_done); \ spin_unlock_irqrestore(shost->host_lock, irq_flags); \ return rc; \ } @@ -793,6 +798,16 @@ void scsi_host_busy_iter(struct Scsi_Host *, struct class_container; +/* + * These two functions are used to allocate and free a pseudo device + * which will connect to the host adapter itself rather than any + * physical device. You must deallocate when you are done with the + * thing. This physical pseudo-device isn't real and won't be available + * from any high-level drivers. + */ +extern void scsi_free_host_dev(struct scsi_device *); +extern struct scsi_device *scsi_get_host_dev(struct Scsi_Host *); + /* * DIF defines the exchange of protection information between * initiator and SBC block device. diff --git a/include/scsi/scsi_ioctl.h b/include/scsi/scsi_ioctl.h index beac64e38b..d2cb9aeaf1 100644 --- a/include/scsi/scsi_ioctl.h +++ b/include/scsi/scsi_ioctl.h @@ -45,8 +45,8 @@ typedef struct scsi_fctargaddress { int scsi_ioctl_block_when_processing_errors(struct scsi_device *sdev, int cmd, bool ndelay); -int scsi_ioctl(struct scsi_device *sdev, fmode_t mode, int cmd, - void __user *arg); +int scsi_ioctl(struct scsi_device *sdev, struct gendisk *disk, fmode_t mode, + int cmd, void __user *arg); int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp); int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp); bool scsi_cmd_allowed(unsigned char *cmd, fmode_t mode); diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 0e75b9277c..05ec927a3c 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -41,7 +41,6 @@ enum sas_linkrate { SAS_LINK_RATE_G2 = SAS_LINK_RATE_3_0_GBPS, SAS_LINK_RATE_6_0_GBPS = 10, SAS_LINK_RATE_12_0_GBPS = 11, - SAS_LINK_RATE_22_5_GBPS = 12, /* These are virtual to the transport class and may never * be signalled normally since the standard defined field * is only 4 bits */ diff --git a/include/scsi/sg.h b/include/scsi/sg.h index 068e35d365..843cefb8ef 100644 --- a/include/scsi/sg.h +++ b/include/scsi/sg.h @@ -29,6 +29,10 @@ * For utility and test programs see: http://sg.danny.cz/sg/sg3_utils.html */ +#ifdef __KERNEL__ +extern int sg_big_buff; /* for sysctl */ +#endif + typedef struct sg_iovec /* same structure as used by readv() Linux system */ { /* call. It defines one scatter-gather element. */ diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h index 38ea046e65..96666efddb 100644 --- a/include/sound/dmaengine_pcm.h +++ b/include/sound/dmaengine_pcm.h @@ -60,6 +60,7 @@ struct dma_chan *snd_dmaengine_pcm_get_chan(struct snd_pcm_substream *substream) * @maxburst: Maximum number of words(note: words, as in units of the * src_addr_width member, not bytes) that can be send to or received from the * DAI in one burst. + * @slave_id: Slave requester id for the DMA channel. * @filter_data: Custom DMA channel filter data, this will usually be used when * requesting the DMA channel. * @chan_name: Custom channel name to use when requesting DMA channel. @@ -73,6 +74,7 @@ struct snd_dmaengine_dai_dma_data { dma_addr_t addr; enum dma_slave_buswidth addr_width; u32 maxburst; + unsigned int slave_id; void *filter_data; const char *chan_name; unsigned int fifo_size; diff --git a/include/sound/graph_card.h b/include/sound/graph_card.h index 4c8b94c77b..6f10bfb0d5 100644 --- a/include/sound/graph_card.h +++ b/include/sound/graph_card.h @@ -9,27 +9,6 @@ #include -typedef int (*GRAPH2_CUSTOM)(struct asoc_simple_priv *priv, - struct device_node *lnk, - struct link_info *li); - -struct graph2_custom_hooks { - int (*hook_pre)(struct asoc_simple_priv *priv); - int (*hook_post)(struct asoc_simple_priv *priv); - GRAPH2_CUSTOM custom_normal; - GRAPH2_CUSTOM custom_dpcm; - GRAPH2_CUSTOM custom_c2c; -}; - int audio_graph_parse_of(struct asoc_simple_priv *priv, struct device *dev); -int audio_graph2_parse_of(struct asoc_simple_priv *priv, struct device *dev, - struct graph2_custom_hooks *hooks); - -int audio_graph2_link_normal(struct asoc_simple_priv *priv, - struct device_node *lnk, struct link_info *li); -int audio_graph2_link_dpcm(struct asoc_simple_priv *priv, - struct device_node *lnk, struct link_info *li); -int audio_graph2_link_c2c(struct asoc_simple_priv *priv, - struct device_node *lnk, struct link_info *li); #endif /* __GRAPH_CARD_H */ diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index 6a90ce405e..22af68b014 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -558,7 +558,6 @@ int snd_hdac_stream_set_params(struct hdac_stream *azx_dev, void snd_hdac_stream_start(struct hdac_stream *azx_dev, bool fresh_start); void snd_hdac_stream_clear(struct hdac_stream *azx_dev); void snd_hdac_stream_stop(struct hdac_stream *azx_dev); -void snd_hdac_stop_streams_and_chip(struct hdac_bus *bus); void snd_hdac_stream_reset(struct hdac_stream *azx_dev); void snd_hdac_stream_sync_trigger(struct hdac_stream *azx_dev, bool set, unsigned int streams, unsigned int reg); diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index 77123c3e40..d4e31ea16a 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -78,35 +78,36 @@ struct hdac_ext_stream { container_of(s, struct hdac_ext_stream, hstream) void snd_hdac_ext_stream_init(struct hdac_bus *bus, - struct hdac_ext_stream *hext_stream, int idx, - int direction, int tag); + struct hdac_ext_stream *stream, int idx, + int direction, int tag); int snd_hdac_ext_stream_init_all(struct hdac_bus *bus, int start_idx, - int num_stream, int dir); + int num_stream, int dir); void snd_hdac_stream_free_all(struct hdac_bus *bus); void snd_hdac_link_free_all(struct hdac_bus *bus); struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus, struct snd_pcm_substream *substream, int type); -void snd_hdac_ext_stream_release(struct hdac_ext_stream *hext_stream, int type); +void snd_hdac_ext_stream_release(struct hdac_ext_stream *azx_dev, int type); void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, - struct hdac_ext_stream *hext_stream, bool decouple); + struct hdac_ext_stream *azx_dev, bool decouple); void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, struct hdac_ext_stream *azx_dev, bool decouple); +void snd_hdac_ext_stop_streams(struct hdac_bus *bus); int snd_hdac_ext_stream_set_spib(struct hdac_bus *bus, - struct hdac_ext_stream *hext_stream, u32 value); + struct hdac_ext_stream *stream, u32 value); int snd_hdac_ext_stream_get_spbmaxfifo(struct hdac_bus *bus, - struct hdac_ext_stream *hext_stream); + struct hdac_ext_stream *stream); void snd_hdac_ext_stream_drsm_enable(struct hdac_bus *bus, bool enable, int index); int snd_hdac_ext_stream_set_dpibr(struct hdac_bus *bus, - struct hdac_ext_stream *hext_stream, u32 value); -int snd_hdac_ext_stream_set_lpib(struct hdac_ext_stream *hext_stream, u32 value); + struct hdac_ext_stream *stream, u32 value); +int snd_hdac_ext_stream_set_lpib(struct hdac_ext_stream *stream, u32 value); -void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *hext_stream); -void snd_hdac_ext_link_stream_clear(struct hdac_ext_stream *hext_stream); -void snd_hdac_ext_link_stream_reset(struct hdac_ext_stream *hext_stream); -int snd_hdac_ext_link_stream_setup(struct hdac_ext_stream *hext_stream, int fmt); +void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *hstream); +void snd_hdac_ext_link_stream_clear(struct hdac_ext_stream *hstream); +void snd_hdac_ext_link_stream_reset(struct hdac_ext_stream *hstream); +int snd_hdac_ext_link_stream_setup(struct hdac_ext_stream *stream, int fmt); struct hdac_ext_link { struct hdac_bus *bus; diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h index 089a760d36..d057480586 100644 --- a/include/sound/intel-nhlt.h +++ b/include/sound/intel-nhlt.h @@ -10,14 +10,6 @@ #include -enum nhlt_link_type { - NHLT_LINK_HDA = 0, - NHLT_LINK_DSP = 1, - NHLT_LINK_DMIC = 2, - NHLT_LINK_SSP = 3, - NHLT_LINK_INVALID -}; - #if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_INTEL_NHLT) struct wav_fmt { @@ -41,6 +33,14 @@ struct wav_fmt_ext { u8 sub_fmt[16]; } __packed; +enum nhlt_link_type { + NHLT_LINK_HDA = 0, + NHLT_LINK_DSP = 1, + NHLT_LINK_DMIC = 2, + NHLT_LINK_SSP = 3, + NHLT_LINK_INVALID +}; + enum nhlt_device_type { NHLT_DEVICE_BT = 0, NHLT_DEVICE_DMIC = 1, @@ -132,12 +132,6 @@ void intel_nhlt_free(struct nhlt_acpi_table *addr); int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt); -bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, u8 link_type); -struct nhlt_specific_cfg * -intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, - u32 bus_id, u8 link_type, u8 vbps, u8 bps, - u8 num_ch, u32 rate, u8 dir, u8 dev_type); - #else struct nhlt_acpi_table; @@ -156,21 +150,6 @@ static inline int intel_nhlt_get_dmic_geo(struct device *dev, { return 0; } - -static inline bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, - u8 link_type) -{ - return false; -} - -static inline struct nhlt_specific_cfg * -intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, - u32 bus_id, u8 link_type, u8 vbps, u8 bps, - u8 num_ch, u32 rate, u8 dir, u8 dev_type) -{ - return NULL; -} - #endif #endif diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h index 653dfffb3a..b197e3f431 100644 --- a/include/sound/memalloc.h +++ b/include/sound/memalloc.h @@ -9,20 +9,16 @@ #ifndef __SOUND_MEMALLOC_H #define __SOUND_MEMALLOC_H -#include #include struct device; struct vm_area_struct; -struct sg_table; /* * buffer device info */ struct snd_dma_device { int type; /* SNDRV_DMA_TYPE_XXX */ - enum dma_data_direction dir; /* DMA direction */ - bool need_sync; /* explicit sync needed? */ struct device *dev; /* generic device */ }; @@ -36,21 +32,19 @@ struct snd_dma_device { #define SNDRV_DMA_TYPE_CONTINUOUS 1 /* continuous no-DMA memory */ #define SNDRV_DMA_TYPE_DEV 2 /* generic device continuous */ #define SNDRV_DMA_TYPE_DEV_WC 5 /* continuous write-combined */ +#ifdef CONFIG_SND_DMA_SGBUF +#define SNDRV_DMA_TYPE_DEV_SG 3 /* generic device SG-buffer */ +#define SNDRV_DMA_TYPE_DEV_WC_SG 6 /* SG write-combined */ +#else +#define SNDRV_DMA_TYPE_DEV_SG SNDRV_DMA_TYPE_DEV /* no SG-buf support */ +#define SNDRV_DMA_TYPE_DEV_WC_SG SNDRV_DMA_TYPE_DEV_WC +#endif #ifdef CONFIG_GENERIC_ALLOCATOR #define SNDRV_DMA_TYPE_DEV_IRAM 4 /* generic device iram-buffer */ #else #define SNDRV_DMA_TYPE_DEV_IRAM SNDRV_DMA_TYPE_DEV #endif #define SNDRV_DMA_TYPE_VMALLOC 7 /* vmalloc'ed buffer */ -#define SNDRV_DMA_TYPE_NONCONTIG 8 /* non-coherent SG buffer */ -#define SNDRV_DMA_TYPE_NONCOHERENT 9 /* non-coherent buffer */ -#ifdef CONFIG_SND_DMA_SGBUF -#define SNDRV_DMA_TYPE_DEV_SG SNDRV_DMA_TYPE_NONCONTIG -#define SNDRV_DMA_TYPE_DEV_WC_SG 6 /* SG write-combined */ -#else -#define SNDRV_DMA_TYPE_DEV_SG SNDRV_DMA_TYPE_DEV /* no SG-buf support */ -#define SNDRV_DMA_TYPE_DEV_WC_SG SNDRV_DMA_TYPE_DEV_WC -#endif /* * info for buffer allocation @@ -72,52 +66,22 @@ static inline unsigned int snd_sgbuf_aligned_pages(size_t size) } /* allocate/release a buffer */ -int snd_dma_alloc_dir_pages(int type, struct device *dev, - enum dma_data_direction dir, size_t size, - struct snd_dma_buffer *dmab); - -static inline int snd_dma_alloc_pages(int type, struct device *dev, - size_t size, struct snd_dma_buffer *dmab) -{ - return snd_dma_alloc_dir_pages(type, dev, DMA_BIDIRECTIONAL, size, dmab); -} - +int snd_dma_alloc_pages(int type, struct device *dev, size_t size, + struct snd_dma_buffer *dmab); int snd_dma_alloc_pages_fallback(int type, struct device *dev, size_t size, struct snd_dma_buffer *dmab); void snd_dma_free_pages(struct snd_dma_buffer *dmab); int snd_dma_buffer_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area); -enum snd_dma_sync_mode { SNDRV_DMA_SYNC_CPU, SNDRV_DMA_SYNC_DEVICE }; -#ifdef CONFIG_HAS_DMA -void snd_dma_buffer_sync(struct snd_dma_buffer *dmab, - enum snd_dma_sync_mode mode); -#else -static inline void snd_dma_buffer_sync(struct snd_dma_buffer *dmab, - enum snd_dma_sync_mode mode) {} -#endif - dma_addr_t snd_sgbuf_get_addr(struct snd_dma_buffer *dmab, size_t offset); struct page *snd_sgbuf_get_page(struct snd_dma_buffer *dmab, size_t offset); unsigned int snd_sgbuf_get_chunk_size(struct snd_dma_buffer *dmab, unsigned int ofs, unsigned int size); /* device-managed memory allocator */ -struct snd_dma_buffer *snd_devm_alloc_dir_pages(struct device *dev, int type, - enum dma_data_direction dir, - size_t size); - -static inline struct snd_dma_buffer * -snd_devm_alloc_pages(struct device *dev, int type, size_t size) -{ - return snd_devm_alloc_dir_pages(dev, type, DMA_BIDIRECTIONAL, size); -} - -static inline struct sg_table * -snd_dma_noncontig_sg_table(struct snd_dma_buffer *dmab) -{ - return dmab->private_data; -} +struct snd_dma_buffer *snd_devm_alloc_pages(struct device *dev, int type, + size_t size); #endif /* __SOUND_MEMALLOC_H */ diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 36da42cd07..33451f8ff7 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -147,9 +147,6 @@ struct snd_pcm_ops { #define SNDRV_PCM_FMTBIT_S24_BE _SNDRV_PCM_FMTBIT(S24_BE) #define SNDRV_PCM_FMTBIT_U24_LE _SNDRV_PCM_FMTBIT(U24_LE) #define SNDRV_PCM_FMTBIT_U24_BE _SNDRV_PCM_FMTBIT(U24_BE) -// For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the -// available bit count in most significant bit. It's for the case of so-called 'left-justified' or -// `right-padding` sample which has less width than 32 bit. #define SNDRV_PCM_FMTBIT_S32_LE _SNDRV_PCM_FMTBIT(S32_LE) #define SNDRV_PCM_FMTBIT_S32_BE _SNDRV_PCM_FMTBIT(S32_BE) #define SNDRV_PCM_FMTBIT_U32_LE _SNDRV_PCM_FMTBIT(U32_LE) @@ -617,7 +614,6 @@ void snd_pcm_stream_unlock(struct snd_pcm_substream *substream); void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream); void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream); unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream); -unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream); /** * snd_pcm_stream_lock_irqsave - Lock the PCM stream @@ -636,20 +632,6 @@ unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *subs void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, unsigned long flags); -/** - * snd_pcm_stream_lock_irqsave_nested - Single-nested PCM stream locking - * @substream: PCM substream - * @flags: irq flags - * - * This locks the PCM stream like snd_pcm_stream_lock_irqsave() but with - * the single-depth lockdep subclass. - */ -#define snd_pcm_stream_lock_irqsave_nested(substream, flags) \ - do { \ - typecheck(unsigned long, flags); \ - flags = _snd_pcm_stream_lock_irqsave_nested(substream); \ - } while (0) - /** * snd_pcm_group_for_each_entry - iterate over the linked substreams * @s: the iterator diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index df430f1c2a..51b3b485a9 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -42,7 +42,6 @@ struct prop_nums { int cpus; int codecs; int platforms; - int c2c; }; struct asoc_simple_priv { @@ -55,7 +54,6 @@ struct asoc_simple_priv { struct snd_soc_dai_link_component *platforms; struct asoc_simple_data adata; struct snd_soc_codec_conf *codec_conf; - struct snd_soc_pcm_stream *c2c_conf; struct prop_nums num; unsigned int mclk_fs; } *dai_props; @@ -66,7 +64,6 @@ struct asoc_simple_priv { struct snd_soc_dai_link_component *dlcs; struct snd_soc_dai_link_component dummy; struct snd_soc_codec_conf *codec_conf; - struct snd_soc_pcm_stream *c2c_conf; struct gpio_desc *pa_gpio; const struct snd_soc_ops *ops; unsigned int dpcm_selectable:1; @@ -118,7 +115,7 @@ struct asoc_simple_priv { ((codec) = simple_props_to_dai_codec(props, i)); \ (i)++) -#define SNDRV_MAX_LINKS 512 +#define SNDRV_MAX_LINKS 128 struct link_info { int link; /* number of link */ @@ -183,7 +180,6 @@ int asoc_simple_init_priv(struct asoc_simple_priv *priv, int asoc_simple_remove(struct platform_device *pdev); int asoc_graph_card_probe(struct snd_soc_card *card); -int asoc_graph_is_ports0(struct device_node *port); #ifdef DEBUG static inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv, diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index ac0893df9c..2f3fa385c0 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -129,8 +129,6 @@ struct snd_soc_acpi_link_adr { * all firmware/topology related fields. * * @id: ACPI ID (usually the codec's) used to find a matching machine driver. - * @comp_ids: list of compatible audio codecs using the same machine driver, - * firmware and topology * @link_mask: describes required board layout, e.g. for SoundWire. * @links: array of link _ADR descriptors, null terminated. * @drv_name: machine driver name @@ -147,8 +145,7 @@ struct snd_soc_acpi_link_adr { */ /* Descriptor for SST ASoC machine driver */ struct snd_soc_acpi_mach { - u8 id[ACPI_ID_LEN]; - const struct snd_soc_acpi_codecs *comp_ids; + const u8 id[ACPI_ID_LEN]; const u32 link_mask; const struct snd_soc_acpi_link_adr *links; const char *drv_name; diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index a52080407b..8c4d683059 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -148,8 +148,6 @@ struct snd_soc_component_driver { struct vm_area_struct *vma); int (*ack)(struct snd_soc_component *component, struct snd_pcm_substream *substream); - snd_pcm_sframes_t (*delay)(struct snd_soc_component *component, - struct snd_pcm_substream *substream); const struct snd_compress_ops *compress_ops; @@ -222,15 +220,17 @@ struct snd_soc_component { int (*init)(struct snd_soc_component *component); /* function mark */ - void *mark_module; + struct snd_pcm_substream *mark_module; struct snd_pcm_substream *mark_open; struct snd_pcm_substream *mark_hw_params; struct snd_pcm_substream *mark_trigger; struct snd_compr_stream *mark_compr_open; void *mark_pm; +#ifdef CONFIG_DEBUG_FS struct dentry *debugfs_root; const char *debugfs_prefix; +#endif }; #define for_each_component_dais(component, dai)\ @@ -335,11 +335,6 @@ static inline int snd_soc_component_cache_sync( return regcache_sync(component->regmap); } -static inline int snd_soc_component_is_codec(struct snd_soc_component *component) -{ - return component->driver->non_legacy_dai_naming; -} - void snd_soc_component_set_aux(struct snd_soc_component *component, struct snd_soc_aux_dev *aux); int snd_soc_component_init(struct snd_soc_component *component); @@ -396,13 +391,15 @@ void snd_soc_component_exit_regmap(struct snd_soc_component *component); #define snd_soc_component_module_get_when_open(component, substream) \ snd_soc_component_module_get(component, substream, 1) int snd_soc_component_module_get(struct snd_soc_component *component, - void *mark, int upon_open); + struct snd_pcm_substream *substream, + int upon_open); #define snd_soc_component_module_put_when_remove(component) \ snd_soc_component_module_put(component, NULL, 0, 0) #define snd_soc_component_module_put_when_close(component, substream, rollback) \ snd_soc_component_module_put(component, substream, 1, rollback) void snd_soc_component_module_put(struct snd_soc_component *component, - void *mark, int upon_open, int rollback); + struct snd_pcm_substream *substream, + int upon_open, int rollback); static inline void snd_soc_component_set_drvdata(struct snd_soc_component *c, void *data) @@ -458,10 +455,8 @@ int snd_soc_component_of_xlate_dai_id(struct snd_soc_component *component, int snd_soc_component_of_xlate_dai_name(struct snd_soc_component *component, const struct of_phandle_args *args, const char **dai_name); -int snd_soc_component_compr_open(struct snd_soc_component *component, - struct snd_compr_stream *cstream); -void snd_soc_component_compr_free(struct snd_soc_component *component, - struct snd_compr_stream *cstream, +int snd_soc_component_compr_open(struct snd_compr_stream *cstream); +void snd_soc_component_compr_free(struct snd_compr_stream *cstream, int rollback); int snd_soc_component_compr_trigger(struct snd_compr_stream *cstream, int cmd); int snd_soc_component_compr_set_params(struct snd_compr_stream *cstream, @@ -507,7 +502,5 @@ int snd_soc_pcm_component_pm_runtime_get(struct snd_soc_pcm_runtime *rtd, void snd_soc_pcm_component_pm_runtime_put(struct snd_soc_pcm_runtime *rtd, void *stream, int rollback); int snd_soc_pcm_component_ack(struct snd_pcm_substream *substream); -void snd_soc_pcm_component_delay(struct snd_pcm_substream *substream, - snd_pcm_sframes_t *cpu_delay, snd_pcm_sframes_t *codec_delay); #endif /* __SOC_COMPONENT_H */ diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index bbd821d2df..0dcb361a98 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -208,6 +208,8 @@ int snd_soc_dai_startup(struct snd_soc_dai *dai, struct snd_pcm_substream *substream); void snd_soc_dai_shutdown(struct snd_soc_dai *dai, struct snd_pcm_substream *substream, int rollback); +snd_pcm_sframes_t snd_soc_dai_delay(struct snd_soc_dai *dai, + struct snd_pcm_substream *substream); void snd_soc_dai_suspend(struct snd_soc_dai *dai); void snd_soc_dai_resume(struct snd_soc_dai *dai); int snd_soc_dai_compress_new(struct snd_soc_dai *dai, @@ -236,8 +238,6 @@ int snd_soc_pcm_dai_trigger(struct snd_pcm_substream *substream, int cmd, int rollback); int snd_soc_pcm_dai_bespoke_trigger(struct snd_pcm_substream *substream, int cmd); -void snd_soc_pcm_dai_delay(struct snd_pcm_substream *substream, - snd_pcm_sframes_t *cpu_delay, snd_pcm_sframes_t *codec_delay); int snd_soc_dai_compr_startup(struct snd_soc_dai *dai, struct snd_compr_stream *cstream); @@ -295,9 +295,9 @@ struct snd_soc_dai_ops { unsigned int *rx_num, unsigned int *rx_slot); int (*set_tristate)(struct snd_soc_dai *dai, int tristate); - int (*set_stream)(struct snd_soc_dai *dai, - void *stream, int direction); - void *(*get_stream)(struct snd_soc_dai *dai, int direction); + int (*set_sdw_stream)(struct snd_soc_dai *dai, + void *stream, int direction); + void *(*get_sdw_stream)(struct snd_soc_dai *dai, int direction); /* * DAI digital mute - optional. @@ -515,42 +515,42 @@ static inline void *snd_soc_dai_get_drvdata(struct snd_soc_dai *dai) } /** - * snd_soc_dai_set_stream() - Configures a DAI for stream operation + * snd_soc_dai_set_sdw_stream() - Configures a DAI for SDW stream operation * @dai: DAI - * @stream: STREAM (opaque structure depending on DAI type) + * @stream: STREAM * @direction: Stream direction(Playback/Capture) - * Some subsystems, such as SoundWire, don't have a notion of direction and we reuse + * SoundWire subsystem doesn't have a notion of direction and we reuse * the ASoC stream direction to configure sink/source ports. * Playback maps to source ports and Capture for sink ports. * * This should be invoked with NULL to clear the stream set previously. * Returns 0 on success, a negative error code otherwise. */ -static inline int snd_soc_dai_set_stream(struct snd_soc_dai *dai, - void *stream, int direction) +static inline int snd_soc_dai_set_sdw_stream(struct snd_soc_dai *dai, + void *stream, int direction) { - if (dai->driver->ops->set_stream) - return dai->driver->ops->set_stream(dai, stream, direction); + if (dai->driver->ops->set_sdw_stream) + return dai->driver->ops->set_sdw_stream(dai, stream, direction); else return -ENOTSUPP; } /** - * snd_soc_dai_get_stream() - Retrieves stream from DAI + * snd_soc_dai_get_sdw_stream() - Retrieves SDW stream from DAI * @dai: DAI * @direction: Stream direction(Playback/Capture) * * This routine only retrieves that was previously configured - * with snd_soc_dai_get_stream() + * with snd_soc_dai_get_sdw_stream() * * Returns pointer to stream or an ERR_PTR value, e.g. * ERR_PTR(-ENOTSUPP) if callback is not supported; */ -static inline void *snd_soc_dai_get_stream(struct snd_soc_dai *dai, - int direction) +static inline void *snd_soc_dai_get_sdw_stream(struct snd_soc_dai *dai, + int direction) { - if (dai->driver->ops->get_stream) - return dai->driver->ops->get_stream(dai, direction); + if (dai->driver->ops->get_sdw_stream) + return dai->driver->ops->get_sdw_stream(dai, direction); else return ERR_PTR(-ENOTSUPP); } diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h index 75b92d8839..e296a3949b 100644 --- a/include/sound/soc-dpcm.h +++ b/include/sound/soc-dpcm.h @@ -101,8 +101,6 @@ struct snd_soc_dpcm_runtime { enum snd_soc_dpcm_state state; int trigger_pending; /* trigger cmd + 1 if pending, 0 if not */ - - int be_start; /* refcount protected by BE stream pcm lock */ }; #define for_each_dpcm_fe(be, stream, _dpcm) \ @@ -161,7 +159,6 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, int cmd); int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream); int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir, int event); -bool dpcm_end_walk_at_be(struct snd_soc_dapm_widget *widget, enum snd_soc_dapm_direction dir); #define dpcm_be_dai_startup_rollback(fe, stream, last) \ dpcm_be_dai_stop(fe, stream, 0, last) diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h index b4b896f83b..3e8a85e1e8 100644 --- a/include/sound/soc-topology.h +++ b/include/sound/soc-topology.h @@ -151,7 +151,7 @@ struct snd_soc_tplg_ops { struct snd_soc_tplg_hdr *); /* completion - called at completion of firmware loading */ - int (*complete)(struct snd_soc_component *comp); + void (*complete)(struct snd_soc_component *); /* manifest - optional to inform component of manifest */ int (*manifest)(struct snd_soc_component *, int index, diff --git a/include/sound/soc.h b/include/sound/soc.h index 7a1650b303..8e6dd8a257 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -893,6 +893,8 @@ struct snd_soc_card { struct mutex pcm_mutex; enum snd_soc_pcm_subclass pcm_subclass; + spinlock_t dpcm_lock; + int (*probe)(struct snd_soc_card *card); int (*late_probe)(struct snd_soc_card *card); int (*remove)(struct snd_soc_card *card); @@ -1211,7 +1213,6 @@ int snd_soc_of_parse_card_name(struct snd_soc_card *card, const char *propname); int snd_soc_of_parse_audio_simple_widgets(struct snd_soc_card *card, const char *propname); -int snd_soc_of_parse_pin_switches(struct snd_soc_card *card, const char *prop); int snd_soc_of_get_slot_mask(struct device_node *np, const char *prop_name, unsigned int *mask); diff --git a/include/sound/sof.h b/include/sound/sof.h index 813680ab9a..6a1cd8e783 100644 --- a/include/sound/sof.h +++ b/include/sound/sof.h @@ -17,28 +17,6 @@ struct snd_sof_dsp_ops; -/** - * enum sof_fw_state - DSP firmware state definitions - * @SOF_FW_BOOT_NOT_STARTED: firmware boot is not yet started - * @SOF_FW_BOOT_PREPARE: preparing for boot (firmware loading for exaqmple) - * @SOF_FW_BOOT_IN_PROGRESS: firmware boot is in progress - * @SOF_FW_BOOT_FAILED: firmware boot failed - * @SOF_FW_BOOT_READY_FAILED: firmware booted but fw_ready op failed - * @SOF_FW_BOOT_READY_OK: firmware booted and fw_ready op passed - * @SOF_FW_BOOT_COMPLETE: firmware is booted up and functional - * @SOF_FW_CRASHED: firmware crashed after successful boot - */ -enum sof_fw_state { - SOF_FW_BOOT_NOT_STARTED = 0, - SOF_FW_BOOT_PREPARE, - SOF_FW_BOOT_IN_PROGRESS, - SOF_FW_BOOT_FAILED, - SOF_FW_BOOT_READY_FAILED, - SOF_FW_BOOT_READY_OK, - SOF_FW_BOOT_COMPLETE, - SOF_FW_CRASHED, -}; - /* * SOF Platform data. */ @@ -96,6 +74,11 @@ struct sof_dev_desc { int resindex_pcicfg_base; int resindex_imr_base; int irqindex_host_ipc; + int resindex_dma_base; + + /* DMA only valid when resindex_dma_base != -1*/ + int dma_engine; + int dma_size; /* IPC timeouts in ms */ int ipc_timeout; diff --git a/include/sound/sof/dai-intel.h b/include/sound/sof/dai-intel.h index 7a266f4198..136adf6686 100644 --- a/include/sound/sof/dai-intel.h +++ b/include/sound/sof/dai-intel.h @@ -48,10 +48,6 @@ #define SOF_DAI_INTEL_SSP_CLKCTRL_FS_KA BIT(4) /* bclk idle */ #define SOF_DAI_INTEL_SSP_CLKCTRL_BCLK_IDLE_HIGH BIT(5) -/* mclk early start */ -#define SOF_DAI_INTEL_SSP_CLKCTRL_MCLK_ES BIT(6) -/* bclk early start */ -#define SOF_DAI_INTEL_SSP_CLKCTRL_BCLK_ES BIT(7) /* DMIC max. four controllers for eight microphone channels */ #define SOF_DAI_INTEL_DMIC_NUM_CTRL 4 diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h index 59ee50ac77..6bb403e8c5 100644 --- a/include/sound/sof/dai.h +++ b/include/sound/sof/dai.h @@ -12,8 +12,6 @@ #include #include #include -#include -#include /* * DAI Configuration. @@ -52,26 +50,6 @@ #define SOF_DAI_FMT_INV_MASK 0x0f00 #define SOF_DAI_FMT_CLOCK_PROVIDER_MASK 0xf000 -/* - * DAI_CONFIG flags. The 4 LSB bits are used for the commands, HW_PARAMS, HW_FREE and PAUSE - * representing when the IPC is sent. The 4 MSB bits are used to add quirks along with the above - * commands. - */ -#define SOF_DAI_CONFIG_FLAGS_CMD_MASK 0xF -#define SOF_DAI_CONFIG_FLAGS_NONE 0 /**< DAI_CONFIG sent without stage information */ -#define SOF_DAI_CONFIG_FLAGS_HW_PARAMS BIT(0) /**< DAI_CONFIG sent during hw_params stage */ -#define SOF_DAI_CONFIG_FLAGS_HW_FREE BIT(1) /**< DAI_CONFIG sent during hw_free stage */ -/**< DAI_CONFIG sent during pause trigger. Only available ABI 3.20 onwards */ -#define SOF_DAI_CONFIG_FLAGS_PAUSE BIT(2) -#define SOF_DAI_CONFIG_FLAGS_QUIRK_SHIFT 4 -#define SOF_DAI_CONFIG_FLAGS_QUIRK_MASK (0xF << SOF_DAI_CONFIG_FLAGS_QUIRK_SHIFT) -/* - * This should be used along with the SOF_DAI_CONFIG_FLAGS_HW_PARAMS to indicate that pipeline - * stop/pause and DAI DMA stop/pause should happen in two steps. This change is only available - * ABI 3.20 onwards. - */ -#define SOF_DAI_CONFIG_FLAGS_2_STEP_STOP BIT(0) - /** \brief Types of DAI */ enum sof_ipc_dai_type { SOF_DAI_INTEL_NONE = 0, /**< None */ @@ -81,10 +59,6 @@ enum sof_ipc_dai_type { SOF_DAI_INTEL_ALH, /**< Intel ALH */ SOF_DAI_IMX_SAI, /**< i.MX SAI */ SOF_DAI_IMX_ESAI, /**< i.MX ESAI */ - SOF_DAI_AMD_BT, /**< AMD ACP BT*/ - SOF_DAI_AMD_SP, /**< AMD ACP SP */ - SOF_DAI_AMD_DMIC, /**< AMD ACP DMIC */ - SOF_DAI_MEDIATEK_AFE, /**< Mediatek AFE */ }; /* general purpose DAI configuration */ @@ -95,8 +69,7 @@ struct sof_ipc_dai_config { /* physical protocol and clocking */ uint16_t format; /**< SOF_DAI_FMT_ */ - uint8_t group_id; /**< group ID, 0 means no group (ABI 3.17) */ - uint8_t flags; /**< SOF_DAI_CONFIG_FLAGS_ (ABI 3.19) */ + uint16_t reserved16; /**< alignment */ /* reserved for future use */ uint32_t reserved[8]; @@ -109,10 +82,6 @@ struct sof_ipc_dai_config { struct sof_ipc_dai_alh_params alh; struct sof_ipc_dai_esai_params esai; struct sof_ipc_dai_sai_params sai; - struct sof_ipc_dai_acp_params acpbt; - struct sof_ipc_dai_acp_params acpsp; - struct sof_ipc_dai_acp_params acpdmic; - struct sof_ipc_dai_mtk_afe_params afe; }; } __packed; diff --git a/include/sound/sof/debug.h b/include/sound/sof/debug.h index 38693e3fb5..3ecb579378 100644 --- a/include/sound/sof/debug.h +++ b/include/sound/sof/debug.h @@ -19,8 +19,6 @@ enum sof_ipc_dbg_mem_zone { SOF_IPC_MEM_ZONE_SYS_RUNTIME = 1, /**< System-runtime zone */ SOF_IPC_MEM_ZONE_RUNTIME = 2, /**< Runtime zone */ SOF_IPC_MEM_ZONE_BUFFER = 3, /**< Buffer zone */ - SOF_IPC_MEM_ZONE_RUNTIME_SHARED = 4, /**< System runtime zone */ - SOF_IPC_MEM_ZONE_SYS_SHARED = 5, /**< System shared zone */ }; /** ABI3.18 */ diff --git a/include/sound/sof/header.h b/include/sound/sof/header.h index b97a76bcb6..4c747c52e0 100644 --- a/include/sound/sof/header.h +++ b/include/sound/sof/header.h @@ -119,7 +119,6 @@ #define SOF_IPC_TRACE_DMA_POSITION SOF_CMD_TYPE(0x002) #define SOF_IPC_TRACE_DMA_PARAMS_EXT SOF_CMD_TYPE(0x003) #define SOF_IPC_TRACE_FILTER_UPDATE SOF_CMD_TYPE(0x004) /**< ABI3.17 */ -#define SOF_IPC_TRACE_DMA_FREE SOF_CMD_TYPE(0x005) /**< ABI3.20 */ /* debug */ #define SOF_IPC_DEBUG_MEM_USAGE SOF_CMD_TYPE(0x001) diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 7660a78465..a23be89119 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -21,22 +21,6 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) -#undef __get_rel_dynamic_array -#define __get_rel_dynamic_array(field) \ - ((void *)(&__entry->__rel_loc_##field) + \ - sizeof(__entry->__rel_loc_##field) + \ - (__entry->__rel_loc_##field & 0xffff)) - -#undef __get_rel_dynamic_array_len -#define __get_rel_dynamic_array_len(field) \ - ((__entry->__rel_loc_##field >> 16) & 0xffff) - -#undef __get_rel_str -#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) - -#undef __get_rel_bitmask -#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) - #undef __perf_count #define __perf_count(c) (c) @@ -109,7 +93,8 @@ __section("__bpf_raw_tp_map") = { \ #define FIRST(x, ...) x -#define __CHECK_WRITABLE_BUF_SIZE(call, proto, args, size) \ +#undef DEFINE_EVENT_WRITABLE +#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ static inline void bpf_test_buffer_##call(void) \ { \ /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \ @@ -118,12 +103,8 @@ static inline void bpf_test_buffer_##call(void) \ */ \ FIRST(proto); \ (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \ -} - -#undef DEFINE_EVENT_WRITABLE -#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ - __CHECK_WRITABLE_BUF_SIZE(call, PARAMS(proto), PARAMS(args), size) \ - __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) +} \ +__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ @@ -138,17 +119,9 @@ static inline void bpf_test_buffer_##call(void) \ __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) \ __DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), 0) -#undef DECLARE_TRACE_WRITABLE -#define DECLARE_TRACE_WRITABLE(call, proto, args, size) \ - __CHECK_WRITABLE_BUF_SIZE(call, PARAMS(proto), PARAMS(args), size) \ - __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) \ - __DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), size) - #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef DECLARE_TRACE_WRITABLE #undef DEFINE_EVENT_WRITABLE -#undef __CHECK_WRITABLE_BUF_SIZE #undef __DEFINE_EVENT #undef FIRST diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 499f5fabd2..bca73e8c8c 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -1016,32 +1016,31 @@ TRACE_EVENT(afs_dir_check_failed, __entry->vnode, __entry->off, __entry->i_size) ); -TRACE_EVENT(afs_folio_dirty, - TP_PROTO(struct afs_vnode *vnode, const char *where, struct folio *folio), +TRACE_EVENT(afs_page_dirty, + TP_PROTO(struct afs_vnode *vnode, const char *where, struct page *page), - TP_ARGS(vnode, where, folio), + TP_ARGS(vnode, where, page), TP_STRUCT__entry( __field(struct afs_vnode *, vnode ) __field(const char *, where ) - __field(pgoff_t, index ) + __field(pgoff_t, page ) __field(unsigned long, from ) __field(unsigned long, to ) ), TP_fast_assign( - unsigned long priv = (unsigned long)folio_get_private(folio); __entry->vnode = vnode; __entry->where = where; - __entry->index = folio_index(folio); - __entry->from = afs_folio_dirty_from(folio, priv); - __entry->to = afs_folio_dirty_to(folio, priv); - __entry->to |= (afs_is_folio_dirty_mmapped(priv) ? - (1UL << (BITS_PER_LONG - 1)) : 0); + __entry->page = page->index; + __entry->from = afs_page_dirty_from(page, page->private); + __entry->to = afs_page_dirty_to(page, page->private); + __entry->to |= (afs_is_page_dirty_mmapped(page->private) ? + (1UL << (BITS_PER_LONG - 1)) : 0); ), TP_printk("vn=%p %lx %s %lx-%lx%s", - __entry->vnode, __entry->index, __entry->where, + __entry->vnode, __entry->page, __entry->where, __entry->from, __entry->to & ~(1UL << (BITS_PER_LONG - 1)), __entry->to & (1UL << (BITS_PER_LONG - 1)) ? " M" : "") diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 27170e40e8..cc5ab96a74 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -85,7 +85,7 @@ TRACE_EVENT(block_rq_requeue, ), TP_fast_assign( - __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); @@ -114,7 +114,7 @@ TRACE_EVENT(block_rq_requeue, */ TRACE_EVENT(block_rq_complete, - TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), + TP_PROTO(struct request *rq, int error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes), @@ -122,16 +122,16 @@ TRACE_EVENT(block_rq_complete, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __field( int , error ) + __field( int, error ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( - __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_pos(rq); __entry->nr_sector = nr_bytes >> 9; - __entry->error = blk_status_to_errno(error); + __entry->error = error; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; @@ -161,7 +161,7 @@ DECLARE_EVENT_CLASS(block_rq, ), TP_fast_assign( - __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->bytes = blk_rq_bytes(rq); @@ -512,7 +512,7 @@ TRACE_EVENT(block_rq_remap, ), TP_fast_assign( - __entry->dev = disk_devt(rq->q->disk); + __entry->dev = disk_devt(rq->rq_disk); __entry->sector = blk_rq_pos(rq); __entry->nr_sector = blk_rq_sectors(rq); __entry->old_dev = dev; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0d729664b4..8f58fd95ef 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -182,18 +182,18 @@ FLUSH_STATES TRACE_EVENT(btrfs_transaction_commit, - TP_PROTO(const struct btrfs_fs_info *fs_info), + TP_PROTO(const struct btrfs_root *root), - TP_ARGS(fs_info), + TP_ARGS(root), TP_STRUCT__entry_btrfs( __field( u64, generation ) __field( u64, root_objectid ) ), - TP_fast_assign_btrfs(fs_info, - __entry->generation = fs_info->generation; - __entry->root_objectid = BTRFS_ROOT_TREE_OBJECTID; + TP_fast_assign_btrfs(root->fs_info, + __entry->generation = root->fs_info->generation; + __entry->root_objectid = root->root_key.objectid; ), TP_printk_btrfs("root=%llu(%s) gen=%llu", diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index c6f5aa74db..920b6a303d 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* CacheFiles tracepoints * - * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #undef TRACE_SYSTEM @@ -19,83 +19,9 @@ #define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY enum cachefiles_obj_ref_trace { - cachefiles_obj_get_ioreq, - cachefiles_obj_new, - cachefiles_obj_put_alloc_fail, - cachefiles_obj_put_detach, - cachefiles_obj_put_ioreq, - cachefiles_obj_see_clean_commit, - cachefiles_obj_see_clean_delete, - cachefiles_obj_see_clean_drop_tmp, - cachefiles_obj_see_lookup_cookie, - cachefiles_obj_see_lookup_failed, - cachefiles_obj_see_withdraw_cookie, - cachefiles_obj_see_withdrawal, -}; - -enum fscache_why_object_killed { - FSCACHE_OBJECT_IS_STALE, - FSCACHE_OBJECT_IS_WEIRD, - FSCACHE_OBJECT_INVALIDATED, - FSCACHE_OBJECT_NO_SPACE, - FSCACHE_OBJECT_WAS_RETIRED, - FSCACHE_OBJECT_WAS_CULLED, - FSCACHE_VOLUME_IS_WEIRD, -}; - -enum cachefiles_coherency_trace { - cachefiles_coherency_check_aux, - cachefiles_coherency_check_content, - cachefiles_coherency_check_dirty, - cachefiles_coherency_check_len, - cachefiles_coherency_check_objsize, - cachefiles_coherency_check_ok, - cachefiles_coherency_check_type, - cachefiles_coherency_check_xattr, - cachefiles_coherency_set_fail, - cachefiles_coherency_set_ok, - cachefiles_coherency_vol_check_cmp, - cachefiles_coherency_vol_check_ok, - cachefiles_coherency_vol_check_xattr, - cachefiles_coherency_vol_set_fail, - cachefiles_coherency_vol_set_ok, -}; - -enum cachefiles_trunc_trace { - cachefiles_trunc_dio_adjust, - cachefiles_trunc_expand_tmpfile, - cachefiles_trunc_shrink, -}; - -enum cachefiles_prepare_read_trace { - cachefiles_trace_read_after_eof, - cachefiles_trace_read_found_hole, - cachefiles_trace_read_found_part, - cachefiles_trace_read_have_data, - cachefiles_trace_read_no_data, - cachefiles_trace_read_no_file, - cachefiles_trace_read_seek_error, - cachefiles_trace_read_seek_nxio, -}; - -enum cachefiles_error_trace { - cachefiles_trace_fallocate_error, - cachefiles_trace_getxattr_error, - cachefiles_trace_link_error, - cachefiles_trace_lookup_error, - cachefiles_trace_mkdir_error, - cachefiles_trace_notify_change_error, - cachefiles_trace_open_error, - cachefiles_trace_read_error, - cachefiles_trace_remxattr_error, - cachefiles_trace_rename_error, - cachefiles_trace_seek_error, - cachefiles_trace_setxattr_error, - cachefiles_trace_statfs_error, - cachefiles_trace_tmpfile_error, - cachefiles_trace_trunc_error, - cachefiles_trace_unlink_error, - cachefiles_trace_write_error, + cachefiles_obj_put_wait_retry = fscache_obj_ref__nr_traces, + cachefiles_obj_put_wait_timeo, + cachefiles_obj_ref__nr_traces }; #endif @@ -105,78 +31,21 @@ enum cachefiles_error_trace { */ #define cachefiles_obj_kill_traces \ EM(FSCACHE_OBJECT_IS_STALE, "stale") \ - EM(FSCACHE_OBJECT_IS_WEIRD, "weird") \ - EM(FSCACHE_OBJECT_INVALIDATED, "inval") \ EM(FSCACHE_OBJECT_NO_SPACE, "no_space") \ EM(FSCACHE_OBJECT_WAS_RETIRED, "was_retired") \ - EM(FSCACHE_OBJECT_WAS_CULLED, "was_culled") \ - E_(FSCACHE_VOLUME_IS_WEIRD, "volume_weird") + E_(FSCACHE_OBJECT_WAS_CULLED, "was_culled") #define cachefiles_obj_ref_traces \ - EM(cachefiles_obj_get_ioreq, "GET ioreq") \ - EM(cachefiles_obj_new, "NEW obj") \ - EM(cachefiles_obj_put_alloc_fail, "PUT alloc_fail") \ - EM(cachefiles_obj_put_detach, "PUT detach") \ - EM(cachefiles_obj_put_ioreq, "PUT ioreq") \ - EM(cachefiles_obj_see_clean_commit, "SEE clean_commit") \ - EM(cachefiles_obj_see_clean_delete, "SEE clean_delete") \ - EM(cachefiles_obj_see_clean_drop_tmp, "SEE clean_drop_tmp") \ - EM(cachefiles_obj_see_lookup_cookie, "SEE lookup_cookie") \ - EM(cachefiles_obj_see_lookup_failed, "SEE lookup_failed") \ - EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ - E_(cachefiles_obj_see_withdrawal, "SEE withdrawal") - -#define cachefiles_coherency_traces \ - EM(cachefiles_coherency_check_aux, "BAD aux ") \ - EM(cachefiles_coherency_check_content, "BAD cont") \ - EM(cachefiles_coherency_check_dirty, "BAD dirt") \ - EM(cachefiles_coherency_check_len, "BAD len ") \ - EM(cachefiles_coherency_check_objsize, "BAD osiz") \ - EM(cachefiles_coherency_check_ok, "OK ") \ - EM(cachefiles_coherency_check_type, "BAD type") \ - EM(cachefiles_coherency_check_xattr, "BAD xatt") \ - EM(cachefiles_coherency_set_fail, "SET fail") \ - EM(cachefiles_coherency_set_ok, "SET ok ") \ - EM(cachefiles_coherency_vol_check_cmp, "VOL BAD cmp ") \ - EM(cachefiles_coherency_vol_check_ok, "VOL OK ") \ - EM(cachefiles_coherency_vol_check_xattr,"VOL BAD xatt") \ - EM(cachefiles_coherency_vol_set_fail, "VOL SET fail") \ - E_(cachefiles_coherency_vol_set_ok, "VOL SET ok ") - -#define cachefiles_trunc_traces \ - EM(cachefiles_trunc_dio_adjust, "DIOADJ") \ - EM(cachefiles_trunc_expand_tmpfile, "EXPTMP") \ - E_(cachefiles_trunc_shrink, "SHRINK") - -#define cachefiles_prepare_read_traces \ - EM(cachefiles_trace_read_after_eof, "after-eof ") \ - EM(cachefiles_trace_read_found_hole, "found-hole") \ - EM(cachefiles_trace_read_found_part, "found-part") \ - EM(cachefiles_trace_read_have_data, "have-data ") \ - EM(cachefiles_trace_read_no_data, "no-data ") \ - EM(cachefiles_trace_read_no_file, "no-file ") \ - EM(cachefiles_trace_read_seek_error, "seek-error") \ - E_(cachefiles_trace_read_seek_nxio, "seek-enxio") - -#define cachefiles_error_traces \ - EM(cachefiles_trace_fallocate_error, "fallocate") \ - EM(cachefiles_trace_getxattr_error, "getxattr") \ - EM(cachefiles_trace_link_error, "link") \ - EM(cachefiles_trace_lookup_error, "lookup") \ - EM(cachefiles_trace_mkdir_error, "mkdir") \ - EM(cachefiles_trace_notify_change_error, "notify_change") \ - EM(cachefiles_trace_open_error, "open") \ - EM(cachefiles_trace_read_error, "read") \ - EM(cachefiles_trace_remxattr_error, "remxattr") \ - EM(cachefiles_trace_rename_error, "rename") \ - EM(cachefiles_trace_seek_error, "seek") \ - EM(cachefiles_trace_setxattr_error, "setxattr") \ - EM(cachefiles_trace_statfs_error, "statfs") \ - EM(cachefiles_trace_tmpfile_error, "tmpfile") \ - EM(cachefiles_trace_trunc_error, "trunc") \ - EM(cachefiles_trace_unlink_error, "unlink") \ - E_(cachefiles_trace_write_error, "write") - + EM(fscache_obj_get_add_to_deps, "GET add_to_deps") \ + EM(fscache_obj_get_queue, "GET queue") \ + EM(fscache_obj_put_alloc_fail, "PUT alloc_fail") \ + EM(fscache_obj_put_attach_fail, "PUT attach_fail") \ + EM(fscache_obj_put_drop_obj, "PUT drop_obj") \ + EM(fscache_obj_put_enq_dep, "PUT enq_dep") \ + EM(fscache_obj_put_queue, "PUT queue") \ + EM(fscache_obj_put_work, "PUT work") \ + EM(cachefiles_obj_put_wait_retry, "PUT wait_retry") \ + E_(cachefiles_obj_put_wait_timeo, "PUT wait_timeo") /* * Export enum symbols via userspace. @@ -188,10 +57,6 @@ enum cachefiles_error_trace { cachefiles_obj_kill_traces; cachefiles_obj_ref_traces; -cachefiles_coherency_traces; -cachefiles_trunc_traces; -cachefiles_prepare_read_traces; -cachefiles_error_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -204,12 +69,12 @@ cachefiles_error_traces; TRACE_EVENT(cachefiles_ref, - TP_PROTO(unsigned int object_debug_id, - unsigned int cookie_debug_id, - int usage, - enum cachefiles_obj_ref_trace why), + TP_PROTO(struct cachefiles_object *obj, + struct fscache_cookie *cookie, + enum cachefiles_obj_ref_trace why, + int usage), - TP_ARGS(object_debug_id, cookie_debug_id, usage, why), + TP_ARGS(obj, cookie, why, usage), /* Note that obj may be NULL */ TP_STRUCT__entry( @@ -220,8 +85,8 @@ TRACE_EVENT(cachefiles_ref, ), TP_fast_assign( - __entry->obj = object_debug_id; - __entry->cookie = cookie_debug_id; + __entry->obj = obj->fscache.debug_id; + __entry->cookie = cookie->debug_id; __entry->usage = usage; __entry->why = why; ), @@ -233,440 +98,221 @@ TRACE_EVENT(cachefiles_ref, TRACE_EVENT(cachefiles_lookup, TP_PROTO(struct cachefiles_object *obj, - struct dentry *dir, - struct dentry *de), + struct dentry *de, + struct inode *inode), - TP_ARGS(obj, dir, de), + TP_ARGS(obj, de, inode), TP_STRUCT__entry( __field(unsigned int, obj ) - __field(short, error ) - __field(unsigned long, dino ) - __field(unsigned long, ino ) + __field(struct dentry *, de ) + __field(struct inode *, inode ) ), TP_fast_assign( - __entry->obj = obj ? obj->debug_id : 0; - __entry->dino = d_backing_inode(dir)->i_ino; - __entry->ino = (!IS_ERR(de) && d_backing_inode(de) ? - d_backing_inode(de)->i_ino : 0); - __entry->error = IS_ERR(de) ? PTR_ERR(de) : 0; + __entry->obj = obj->fscache.debug_id; + __entry->de = de; + __entry->inode = inode; ), - TP_printk("o=%08x dB=%lx B=%lx e=%d", - __entry->obj, __entry->dino, __entry->ino, __entry->error) + TP_printk("o=%08x d=%p i=%p", + __entry->obj, __entry->de, __entry->inode) ); TRACE_EVENT(cachefiles_mkdir, - TP_PROTO(struct dentry *dir, struct dentry *subdir), + TP_PROTO(struct cachefiles_object *obj, + struct dentry *de, int ret), - TP_ARGS(dir, subdir), + TP_ARGS(obj, de, ret), TP_STRUCT__entry( - __field(unsigned int, dir ) - __field(unsigned int, subdir ) + __field(unsigned int, obj ) + __field(struct dentry *, de ) + __field(int, ret ) ), TP_fast_assign( - __entry->dir = d_backing_inode(dir)->i_ino; - __entry->subdir = d_backing_inode(subdir)->i_ino; + __entry->obj = obj->fscache.debug_id; + __entry->de = de; + __entry->ret = ret; ), - TP_printk("dB=%x sB=%x", - __entry->dir, - __entry->subdir) + TP_printk("o=%08x d=%p r=%u", + __entry->obj, __entry->de, __entry->ret) ); -TRACE_EVENT(cachefiles_tmpfile, - TP_PROTO(struct cachefiles_object *obj, struct inode *backer), +TRACE_EVENT(cachefiles_create, + TP_PROTO(struct cachefiles_object *obj, + struct dentry *de, int ret), - TP_ARGS(obj, backer), + TP_ARGS(obj, de, ret), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) + __field(unsigned int, obj ) + __field(struct dentry *, de ) + __field(int, ret ) ), TP_fast_assign( - __entry->obj = obj->debug_id; - __entry->backer = backer->i_ino; + __entry->obj = obj->fscache.debug_id; + __entry->de = de; + __entry->ret = ret; ), - TP_printk("o=%08x B=%x", - __entry->obj, - __entry->backer) - ); - -TRACE_EVENT(cachefiles_link, - TP_PROTO(struct cachefiles_object *obj, struct inode *backer), - - TP_ARGS(obj, backer), - - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - ), - - TP_fast_assign( - __entry->obj = obj->debug_id; - __entry->backer = backer->i_ino; - ), - - TP_printk("o=%08x B=%x", - __entry->obj, - __entry->backer) + TP_printk("o=%08x d=%p r=%u", + __entry->obj, __entry->de, __entry->ret) ); TRACE_EVENT(cachefiles_unlink, TP_PROTO(struct cachefiles_object *obj, - ino_t ino, + struct dentry *de, enum fscache_why_object_killed why), - TP_ARGS(obj, ino, why), + TP_ARGS(obj, de, why), /* Note that obj may be NULL */ TP_STRUCT__entry( __field(unsigned int, obj ) - __field(unsigned int, ino ) + __field(struct dentry *, de ) __field(enum fscache_why_object_killed, why ) ), TP_fast_assign( - __entry->obj = obj ? obj->debug_id : UINT_MAX; - __entry->ino = ino; + __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; + __entry->de = de; __entry->why = why; ), - TP_printk("o=%08x B=%x w=%s", - __entry->obj, __entry->ino, + TP_printk("o=%08x d=%p w=%s", + __entry->obj, __entry->de, __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) ); TRACE_EVENT(cachefiles_rename, TP_PROTO(struct cachefiles_object *obj, - ino_t ino, + struct dentry *de, + struct dentry *to, enum fscache_why_object_killed why), - TP_ARGS(obj, ino, why), + TP_ARGS(obj, de, to, why), /* Note that obj may be NULL */ TP_STRUCT__entry( __field(unsigned int, obj ) - __field(unsigned int, ino ) + __field(struct dentry *, de ) + __field(struct dentry *, to ) __field(enum fscache_why_object_killed, why ) ), TP_fast_assign( - __entry->obj = obj ? obj->debug_id : UINT_MAX; - __entry->ino = ino; - __entry->why = why; - ), - - TP_printk("o=%08x B=%x w=%s", - __entry->obj, __entry->ino, - __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) - ); - -TRACE_EVENT(cachefiles_coherency, - TP_PROTO(struct cachefiles_object *obj, - ino_t ino, - enum cachefiles_content content, - enum cachefiles_coherency_trace why), - - TP_ARGS(obj, ino, content, why), - - /* Note that obj may be NULL */ - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(enum cachefiles_coherency_trace, why ) - __field(enum cachefiles_content, content ) - __field(u64, ino ) - ), - - TP_fast_assign( - __entry->obj = obj->debug_id; - __entry->why = why; - __entry->content = content; - __entry->ino = ino; - ), - - TP_printk("o=%08x %s B=%llx c=%u", - __entry->obj, - __print_symbolic(__entry->why, cachefiles_coherency_traces), - __entry->ino, - __entry->content) - ); - -TRACE_EVENT(cachefiles_vol_coherency, - TP_PROTO(struct cachefiles_volume *volume, - ino_t ino, - enum cachefiles_coherency_trace why), - - TP_ARGS(volume, ino, why), - - /* Note that obj may be NULL */ - TP_STRUCT__entry( - __field(unsigned int, vol ) - __field(enum cachefiles_coherency_trace, why ) - __field(u64, ino ) - ), - - TP_fast_assign( - __entry->vol = volume->vcookie->debug_id; - __entry->why = why; - __entry->ino = ino; - ), - - TP_printk("V=%08x %s B=%llx", - __entry->vol, - __print_symbolic(__entry->why, cachefiles_coherency_traces), - __entry->ino) - ); - -TRACE_EVENT(cachefiles_prep_read, - TP_PROTO(struct netfs_read_subrequest *sreq, - enum netfs_read_source source, - enum cachefiles_prepare_read_trace why, - ino_t cache_inode), - - TP_ARGS(sreq, source, why, cache_inode), - - TP_STRUCT__entry( - __field(unsigned int, rreq ) - __field(unsigned short, index ) - __field(unsigned short, flags ) - __field(enum netfs_read_source, source ) - __field(enum cachefiles_prepare_read_trace, why ) - __field(size_t, len ) - __field(loff_t, start ) - __field(unsigned int, netfs_inode ) - __field(unsigned int, cache_inode ) - ), - - TP_fast_assign( - __entry->rreq = sreq->rreq->debug_id; - __entry->index = sreq->debug_index; - __entry->flags = sreq->flags; - __entry->source = source; - __entry->why = why; - __entry->len = sreq->len; - __entry->start = sreq->start; - __entry->netfs_inode = sreq->rreq->inode->i_ino; - __entry->cache_inode = cache_inode; - ), - - TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x B=%x", - __entry->rreq, __entry->index, - __print_symbolic(__entry->source, netfs_sreq_sources), - __print_symbolic(__entry->why, cachefiles_prepare_read_traces), - __entry->flags, - __entry->start, __entry->len, - __entry->netfs_inode, __entry->cache_inode) - ); - -TRACE_EVENT(cachefiles_read, - TP_PROTO(struct cachefiles_object *obj, - struct inode *backer, - loff_t start, - size_t len), - - TP_ARGS(obj, backer, start, len), - - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - __field(size_t, len ) - __field(loff_t, start ) - ), - - TP_fast_assign( - __entry->obj = obj->debug_id; - __entry->backer = backer->i_ino; - __entry->start = start; - __entry->len = len; - ), - - TP_printk("o=%08x B=%x s=%llx l=%zx", - __entry->obj, - __entry->backer, - __entry->start, - __entry->len) - ); - -TRACE_EVENT(cachefiles_write, - TP_PROTO(struct cachefiles_object *obj, - struct inode *backer, - loff_t start, - size_t len), - - TP_ARGS(obj, backer, start, len), - - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - __field(size_t, len ) - __field(loff_t, start ) - ), - - TP_fast_assign( - __entry->obj = obj->debug_id; - __entry->backer = backer->i_ino; - __entry->start = start; - __entry->len = len; - ), - - TP_printk("o=%08x B=%x s=%llx l=%zx", - __entry->obj, - __entry->backer, - __entry->start, - __entry->len) - ); - -TRACE_EVENT(cachefiles_trunc, - TP_PROTO(struct cachefiles_object *obj, struct inode *backer, - loff_t from, loff_t to, enum cachefiles_trunc_trace why), - - TP_ARGS(obj, backer, from, to, why), - - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - __field(enum cachefiles_trunc_trace, why ) - __field(loff_t, from ) - __field(loff_t, to ) - ), - - TP_fast_assign( - __entry->obj = obj->debug_id; - __entry->backer = backer->i_ino; - __entry->from = from; + __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; + __entry->de = de; __entry->to = to; __entry->why = why; ), - TP_printk("o=%08x B=%x %s l=%llx->%llx", - __entry->obj, - __entry->backer, - __print_symbolic(__entry->why, cachefiles_trunc_traces), - __entry->from, - __entry->to) + TP_printk("o=%08x d=%p t=%p w=%s", + __entry->obj, __entry->de, __entry->to, + __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) ); TRACE_EVENT(cachefiles_mark_active, TP_PROTO(struct cachefiles_object *obj, - struct inode *inode), + struct dentry *de), - TP_ARGS(obj, inode), + TP_ARGS(obj, de), /* Note that obj may be NULL */ TP_STRUCT__entry( __field(unsigned int, obj ) - __field(ino_t, inode ) + __field(struct dentry *, de ) ), TP_fast_assign( - __entry->obj = obj ? obj->debug_id : 0; - __entry->inode = inode->i_ino; + __entry->obj = obj->fscache.debug_id; + __entry->de = de; ), - TP_printk("o=%08x B=%lx", - __entry->obj, __entry->inode) + TP_printk("o=%08x d=%p", + __entry->obj, __entry->de) ); -TRACE_EVENT(cachefiles_mark_failed, +TRACE_EVENT(cachefiles_wait_active, TP_PROTO(struct cachefiles_object *obj, - struct inode *inode), + struct dentry *de, + struct cachefiles_object *xobj), - TP_ARGS(obj, inode), + TP_ARGS(obj, de, xobj), /* Note that obj may be NULL */ TP_STRUCT__entry( __field(unsigned int, obj ) - __field(ino_t, inode ) + __field(unsigned int, xobj ) + __field(struct dentry *, de ) + __field(u16, flags ) + __field(u16, fsc_flags ) ), TP_fast_assign( - __entry->obj = obj ? obj->debug_id : 0; - __entry->inode = inode->i_ino; + __entry->obj = obj->fscache.debug_id; + __entry->de = de; + __entry->xobj = xobj->fscache.debug_id; + __entry->flags = xobj->flags; + __entry->fsc_flags = xobj->fscache.flags; ), - TP_printk("o=%08x B=%lx", - __entry->obj, __entry->inode) + TP_printk("o=%08x d=%p wo=%08x wf=%x wff=%x", + __entry->obj, __entry->de, __entry->xobj, + __entry->flags, __entry->fsc_flags) ); TRACE_EVENT(cachefiles_mark_inactive, TP_PROTO(struct cachefiles_object *obj, + struct dentry *de, struct inode *inode), - TP_ARGS(obj, inode), + TP_ARGS(obj, de, inode), /* Note that obj may be NULL */ TP_STRUCT__entry( __field(unsigned int, obj ) - __field(ino_t, inode ) + __field(struct dentry *, de ) + __field(struct inode *, inode ) ), TP_fast_assign( - __entry->obj = obj ? obj->debug_id : 0; - __entry->inode = inode->i_ino; + __entry->obj = obj->fscache.debug_id; + __entry->de = de; + __entry->inode = inode; ), - TP_printk("o=%08x B=%lx", - __entry->obj, __entry->inode) + TP_printk("o=%08x d=%p i=%p", + __entry->obj, __entry->de, __entry->inode) ); -TRACE_EVENT(cachefiles_vfs_error, - TP_PROTO(struct cachefiles_object *obj, struct inode *backer, - int error, enum cachefiles_error_trace where), +TRACE_EVENT(cachefiles_mark_buried, + TP_PROTO(struct cachefiles_object *obj, + struct dentry *de, + enum fscache_why_object_killed why), - TP_ARGS(obj, backer, error, where), + TP_ARGS(obj, de, why), + /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - __field(enum cachefiles_error_trace, where ) - __field(short, error ) + __field(unsigned int, obj ) + __field(struct dentry *, de ) + __field(enum fscache_why_object_killed, why ) ), TP_fast_assign( - __entry->obj = obj ? obj->debug_id : 0; - __entry->backer = backer->i_ino; - __entry->error = error; - __entry->where = where; + __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; + __entry->de = de; + __entry->why = why; ), - TP_printk("o=%08x B=%x %s e=%d", - __entry->obj, - __entry->backer, - __print_symbolic(__entry->where, cachefiles_error_traces), - __entry->error) - ); - -TRACE_EVENT(cachefiles_io_error, - TP_PROTO(struct cachefiles_object *obj, struct inode *backer, - int error, enum cachefiles_error_trace where), - - TP_ARGS(obj, backer, error, where), - - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - __field(enum cachefiles_error_trace, where ) - __field(short, error ) - ), - - TP_fast_assign( - __entry->obj = obj ? obj->debug_id : 0; - __entry->backer = backer->i_ino; - __entry->error = error; - __entry->where = where; - ), - - TP_printk("o=%08x B=%x %s e=%d", - __entry->obj, - __entry->backer, - __print_symbolic(__entry->where, cachefiles_error_traces), - __entry->error) + TP_printk("o=%08x d=%p w=%s", + __entry->obj, __entry->de, + __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) ); #endif /* _TRACE_CACHEFILES_H */ diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 7d48e7079e..54e5bf0811 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -68,9 +68,10 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages, TRACE_EVENT(mm_compaction_migratepages, TP_PROTO(unsigned long nr_all, - unsigned int nr_succeeded), + int migrate_rc, + struct list_head *migratepages), - TP_ARGS(nr_all, nr_succeeded), + TP_ARGS(nr_all, migrate_rc, migratepages), TP_STRUCT__entry( __field(unsigned long, nr_migrated) @@ -78,8 +79,23 @@ TRACE_EVENT(mm_compaction_migratepages, ), TP_fast_assign( - __entry->nr_migrated = nr_succeeded; - __entry->nr_failed = nr_all - nr_succeeded; + unsigned long nr_failed = 0; + struct list_head *page_lru; + + /* + * migrate_pages() returns either a non-negative number + * with the number of pages that failed migration, or an + * error code, in which case we need to count the remaining + * pages manually + */ + if (migrate_rc >= 0) + nr_failed = migrate_rc; + else + list_for_each(page_lru, migratepages) + nr_failed++; + + __entry->nr_migrated = nr_all - nr_failed; + __entry->nr_failed = nr_failed; ), TP_printk("nr_migrated=%lu nr_failed=%lu", diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index c79f1d4c39..2f422f4f1f 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -11,10 +11,10 @@ TRACE_EVENT(damon_aggregated, - TP_PROTO(struct damon_target *t, unsigned int target_id, - struct damon_region *r, unsigned int nr_regions), + TP_PROTO(struct damon_target *t, struct damon_region *r, + unsigned int nr_regions), - TP_ARGS(t, target_id, r, nr_regions), + TP_ARGS(t, r, nr_regions), TP_STRUCT__entry( __field(unsigned long, target_id) @@ -22,22 +22,19 @@ TRACE_EVENT(damon_aggregated, __field(unsigned long, start) __field(unsigned long, end) __field(unsigned int, nr_accesses) - __field(unsigned int, age) ), TP_fast_assign( - __entry->target_id = target_id; + __entry->target_id = t->id; __entry->nr_regions = nr_regions; __entry->start = r->ar.start; __entry->end = r->ar.end; __entry->nr_accesses = r->nr_accesses; - __entry->age = r->age; ), - TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u %u", + TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u", __entry->target_id, __entry->nr_regions, - __entry->start, __entry->end, - __entry->nr_accesses, __entry->age) + __entry->start, __entry->end, __entry->nr_accesses) ); #endif /* _TRACE_DAMON_H */ diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 2814f188d9..44d8e29810 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -21,9 +21,9 @@ TRACE_EVENT(devlink_hwmsg, TP_ARGS(devlink, incoming, type, buf, len), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(driver_name, devlink->dev->driver->name) __field(bool, incoming) __field(unsigned long, type) __dynamic_array(u8, buf, len) @@ -31,9 +31,9 @@ TRACE_EVENT(devlink_hwmsg, ), TP_fast_assign( - __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); - __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); - __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(driver_name, devlink->dev->driver->name); __entry->incoming = incoming; __entry->type = type; memcpy(__get_dynamic_array(buf), buf, len); @@ -55,17 +55,17 @@ TRACE_EVENT(devlink_hwerr, TP_ARGS(devlink, err, msg), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(driver_name, devlink->dev->driver->name) __field(int, err) __string(msg, msg) ), TP_fast_assign( - __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); - __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); - __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(driver_name, devlink->dev->driver->name); __entry->err = err; __assign_str(msg, msg); ), @@ -85,17 +85,17 @@ TRACE_EVENT(devlink_health_report, TP_ARGS(devlink, reporter_name, msg), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(driver_name, devlink->dev->driver->name) __string(reporter_name, msg) __string(msg, msg) ), TP_fast_assign( - __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); - __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); - __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(driver_name, devlink->dev->driver->name); __assign_str(reporter_name, reporter_name); __assign_str(msg, msg); ), @@ -116,18 +116,18 @@ TRACE_EVENT(devlink_health_recover_aborted, TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(driver_name, devlink->dev->driver->name) __string(reporter_name, reporter_name) __field(bool, health_state) __field(u64, time_since_last_recover) ), TP_fast_assign( - __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); - __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); - __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(driver_name, devlink->dev->driver->name); __assign_str(reporter_name, reporter_name); __entry->health_state = health_state; __entry->time_since_last_recover = time_since_last_recover; @@ -150,17 +150,17 @@ TRACE_EVENT(devlink_health_reporter_state_update, TP_ARGS(devlink, reporter_name, new_state), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(driver_name, devlink->dev->driver->name) __string(reporter_name, reporter_name) __field(u8, new_state) ), TP_fast_assign( - __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); - __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); - __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(driver_name, devlink->dev->driver->name); __assign_str(reporter_name, reporter_name); __entry->new_state = new_state; ), @@ -181,9 +181,9 @@ TRACE_EVENT(devlink_trap_report, TP_ARGS(devlink, skb, metadata), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(driver_name, devlink->dev->driver->name) __string(trap_name, metadata->trap_name) __string(trap_group_name, metadata->trap_group_name) __dynamic_array(char, input_dev_name, IFNAMSIZ) @@ -192,9 +192,9 @@ TRACE_EVENT(devlink_trap_report, TP_fast_assign( struct net_device *input_dev = metadata->input_dev; - __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); - __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); - __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(driver_name, devlink->dev->driver->name); __assign_str(trap_name, metadata->trap_name); __assign_str(trap_group_name, metadata->trap_group_name); __assign_str(input_dev_name, diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index 57de057bd5..db4f2cec83 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -24,7 +24,7 @@ struct erofs_map_blocks; #define show_mflags(flags) __print_flags(flags, "", \ { EROFS_MAP_MAPPED, "M" }, \ { EROFS_MAP_META, "I" }, \ - { EROFS_MAP_ENCODED, "E" }) + { EROFS_MAP_ZIPPED, "Z" }) TRACE_EVENT(erofs_lookup, @@ -169,7 +169,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter, __entry->flags ? show_map_flags(__entry->flags) : "NULL") ); -DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_enter, +DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter, TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, unsigned flags), @@ -221,7 +221,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit, show_mflags(__entry->mflags), __entry->ret) ); -DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_exit, +DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit, TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, unsigned flags, int ret), diff --git a/include/trace/events/error_report.h b/include/trace/events/error_report.h index a1922a800e..96f64bf218 100644 --- a/include/trace/events/error_report.h +++ b/include/trace/events/error_report.h @@ -17,16 +17,14 @@ enum error_detector { ERROR_DETECTOR_KFENCE, - ERROR_DETECTOR_KASAN, - ERROR_DETECTOR_WARN, + ERROR_DETECTOR_KASAN }; #endif /* __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY */ -#define error_detector_list \ +#define error_detector_list \ EM(ERROR_DETECTOR_KFENCE, "kfence") \ - EM(ERROR_DETECTOR_KASAN, "kasan") \ - EMe(ERROR_DETECTOR_WARN, "warning") + EMe(ERROR_DETECTOR_KASAN, "kasan") /* Always end the list with an EMe. */ #undef EM diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 19e957b7f9..0ea36b2b06 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2837,29 +2837,6 @@ TRACE_EVENT(ext4_fc_track_range, __entry->end) ); -TRACE_EVENT(ext4_update_sb, - TP_PROTO(struct super_block *sb, ext4_fsblk_t fsblk, - unsigned int flags), - - TP_ARGS(sb, fsblk, flags), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ext4_fsblk_t, fsblk) - __field(unsigned int, flags) - ), - - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->fsblk = fsblk; - __entry->flags = flags; - ), - - TP_printk("dev %d,%d fsblk %llu flags %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->fsblk, __entry->flags) -); - #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index f701bb23f8..4cb055af1e 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -540,17 +540,17 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, TRACE_EVENT(f2fs_file_write_iter, - TP_PROTO(struct inode *inode, loff_t offset, size_t length, - ssize_t ret), + TP_PROTO(struct inode *inode, unsigned long offset, + unsigned long length, int ret), TP_ARGS(inode, offset, length, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(loff_t, offset) - __field(size_t, length) - __field(ssize_t, ret) + __field(unsigned long, offset) + __field(unsigned long, length) + __field(int, ret) ), TP_fast_assign( @@ -562,7 +562,7 @@ TRACE_EVENT(f2fs_file_write_iter, ), TP_printk("dev = (%d,%d), ino = %lu, " - "offset = %lld, length = %zu, written(err) = %zd", + "offset = %lu, length = %lu, written(err) = %d", show_dev_ino(__entry), __entry->offset, __entry->length, @@ -570,10 +570,9 @@ TRACE_EVENT(f2fs_file_write_iter, ); TRACE_EVENT(f2fs_map_blocks, - TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, - int create, int flag, int ret), + TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret), - TP_ARGS(inode, map, create, flag, ret), + TP_ARGS(inode, map, ret), TP_STRUCT__entry( __field(dev_t, dev) @@ -584,14 +583,11 @@ TRACE_EVENT(f2fs_map_blocks, __field(unsigned int, m_flags) __field(int, m_seg_type) __field(bool, m_may_create) - __field(bool, m_multidev_dio) - __field(int, create) - __field(int, flag) __field(int, ret) ), TP_fast_assign( - __entry->dev = map->m_bdev->bd_dev; + __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->m_lblk = map->m_lblk; __entry->m_pblk = map->m_pblk; @@ -599,16 +595,12 @@ TRACE_EVENT(f2fs_map_blocks, __entry->m_flags = map->m_flags; __entry->m_seg_type = map->m_seg_type; __entry->m_may_create = map->m_may_create; - __entry->m_multidev_dio = map->m_multidev_dio; - __entry->create = create; - __entry->flag = flag; __entry->ret = ret; ), TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " - "start blkaddr = 0x%llx, len = 0x%llx, flags = %u, " - "seg_type = %d, may_create = %d, multidevice = %d, " - "create = %d, flag = %d, err = %d", + "start blkaddr = 0x%llx, len = 0x%llx, flags = %u," + "seg_type = %d, may_create = %d, err = %d", show_dev_ino(__entry), (unsigned long long)__entry->m_lblk, (unsigned long long)__entry->m_pblk, @@ -616,9 +608,6 @@ TRACE_EVENT(f2fs_map_blocks, __entry->m_flags, __entry->m_seg_type, __entry->m_may_create, - __entry->m_multidev_dio, - __entry->create, - __entry->flag, __entry->ret) ); @@ -936,14 +925,14 @@ TRACE_EVENT(f2fs_fallocate, TRACE_EVENT(f2fs_direct_IO_enter, - TP_PROTO(struct inode *inode, struct kiocb *iocb, long len, int rw), + TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw), - TP_ARGS(inode, iocb, len, rw), + TP_ARGS(inode, offset, len, rw), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(struct kiocb *, iocb) + __field(loff_t, pos) __field(unsigned long, len) __field(int, rw) ), @@ -951,18 +940,15 @@ TRACE_EVENT(f2fs_direct_IO_enter, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->iocb = iocb; + __entry->pos = offset; __entry->len = len; __entry->rw = rw; ), - TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_hint = %x ki_ioprio = %x rw = %d", + TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu rw = %d", show_dev_ino(__entry), - __entry->iocb->ki_pos, + __entry->pos, __entry->len, - __entry->iocb->ki_flags, - __entry->iocb->ki_hint, - __entry->iocb->ki_ioprio, __entry->rw) ); diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h index 46c89c1e46..c47b63db12 100644 --- a/include/trace/events/filemap.h +++ b/include/trace/events/filemap.h @@ -15,45 +15,43 @@ DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, - TP_PROTO(struct folio *folio), + TP_PROTO(struct page *page), - TP_ARGS(folio), + TP_ARGS(page), TP_STRUCT__entry( __field(unsigned long, pfn) __field(unsigned long, i_ino) __field(unsigned long, index) __field(dev_t, s_dev) - __field(unsigned char, order) ), TP_fast_assign( - __entry->pfn = folio_pfn(folio); - __entry->i_ino = folio->mapping->host->i_ino; - __entry->index = folio->index; - if (folio->mapping->host->i_sb) - __entry->s_dev = folio->mapping->host->i_sb->s_dev; + __entry->pfn = page_to_pfn(page); + __entry->i_ino = page->mapping->host->i_ino; + __entry->index = page->index; + if (page->mapping->host->i_sb) + __entry->s_dev = page->mapping->host->i_sb->s_dev; else - __entry->s_dev = folio->mapping->host->i_rdev; - __entry->order = folio_order(folio); + __entry->s_dev = page->mapping->host->i_rdev; ), - TP_printk("dev %d:%d ino %lx pfn=0x%lx ofs=%lu order=%u", + TP_printk("dev %d:%d ino %lx page=%p pfn=0x%lx ofs=%lu", MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, + pfn_to_page(__entry->pfn), __entry->pfn, - __entry->index << PAGE_SHIFT, - __entry->order) + __entry->index << PAGE_SHIFT) ); DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_delete_from_page_cache, - TP_PROTO(struct folio *folio), - TP_ARGS(folio) + TP_PROTO(struct page *page), + TP_ARGS(page) ); DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, - TP_PROTO(struct folio *folio), - TP_ARGS(folio) + TP_PROTO(struct page *page), + TP_ARGS(page) ); TRACE_EVENT(filemap_set_wb_err, diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index cb3fb337e8..446392f5ba 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* FS-Cache tracepoints * - * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #undef TRACE_SYSTEM @@ -19,83 +19,65 @@ #ifndef __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY -enum fscache_cache_trace { - fscache_cache_collision, - fscache_cache_get_acquire, - fscache_cache_new_acquire, - fscache_cache_put_alloc_volume, - fscache_cache_put_cache, - fscache_cache_put_prep_failed, - fscache_cache_put_relinquish, - fscache_cache_put_volume, -}; - -enum fscache_volume_trace { - fscache_volume_collision, - fscache_volume_get_cookie, - fscache_volume_get_create_work, - fscache_volume_get_hash_collision, - fscache_volume_free, - fscache_volume_new_acquire, - fscache_volume_put_cookie, - fscache_volume_put_create_work, - fscache_volume_put_hash_collision, - fscache_volume_put_relinquish, - fscache_volume_see_create_work, - fscache_volume_see_hash_wake, - fscache_volume_wait_create_work, -}; - enum fscache_cookie_trace { fscache_cookie_collision, fscache_cookie_discard, + fscache_cookie_get_acquire_parent, fscache_cookie_get_attach_object, - fscache_cookie_get_end_access, - fscache_cookie_get_hash_collision, - fscache_cookie_get_inval_work, - fscache_cookie_get_lru, - fscache_cookie_get_use_work, - fscache_cookie_new_acquire, - fscache_cookie_put_hash_collision, - fscache_cookie_put_lru, - fscache_cookie_put_object, - fscache_cookie_put_over_queued, + fscache_cookie_get_reacquire, + fscache_cookie_get_register_netfs, + fscache_cookie_put_acquire_nobufs, + fscache_cookie_put_dup_netfs, fscache_cookie_put_relinquish, - fscache_cookie_put_withdrawn, - fscache_cookie_put_work, - fscache_cookie_see_active, - fscache_cookie_see_lru_discard, - fscache_cookie_see_lru_do_one, - fscache_cookie_see_relinquish, - fscache_cookie_see_withdraw, - fscache_cookie_see_work, + fscache_cookie_put_object, + fscache_cookie_put_parent, }; -enum fscache_active_trace { - fscache_active_use, - fscache_active_use_modify, - fscache_active_unuse, +enum fscache_page_trace { + fscache_page_cached, + fscache_page_inval, + fscache_page_maybe_release, + fscache_page_radix_clear_store, + fscache_page_radix_delete, + fscache_page_radix_insert, + fscache_page_radix_pend2store, + fscache_page_radix_set_pend, + fscache_page_uncache, + fscache_page_write, + fscache_page_write_end, + fscache_page_write_end_pend, + fscache_page_write_end_noc, + fscache_page_write_wait, + fscache_page_trace__nr }; -enum fscache_access_trace { - fscache_access_acquire_volume, - fscache_access_acquire_volume_end, - fscache_access_cache_pin, - fscache_access_cache_unpin, - fscache_access_invalidate_cookie, - fscache_access_invalidate_cookie_end, - fscache_access_io_end, - fscache_access_io_not_live, - fscache_access_io_read, - fscache_access_io_resize, - fscache_access_io_wait, - fscache_access_io_write, - fscache_access_lookup_cookie, - fscache_access_lookup_cookie_end, - fscache_access_lookup_cookie_end_failed, - fscache_access_relinquish_volume, - fscache_access_relinquish_volume_end, - fscache_access_unlive, +enum fscache_op_trace { + fscache_op_cancel, + fscache_op_cancel_all, + fscache_op_cancelled, + fscache_op_completed, + fscache_op_enqueue_async, + fscache_op_enqueue_mythread, + fscache_op_gc, + fscache_op_init, + fscache_op_put, + fscache_op_run, + fscache_op_signal, + fscache_op_submit, + fscache_op_submit_ex, + fscache_op_work, + fscache_op_trace__nr +}; + +enum fscache_page_op_trace { + fscache_page_op_alloc_one, + fscache_page_op_attr_changed, + fscache_page_op_check_consistency, + fscache_page_op_invalidate, + fscache_page_op_retr_multi, + fscache_page_op_retr_one, + fscache_page_op_write_one, + fscache_page_op_trace__nr }; #endif @@ -103,79 +85,59 @@ enum fscache_access_trace { /* * Declare tracing information enums and their string mappings for display. */ -#define fscache_cache_traces \ - EM(fscache_cache_collision, "*COLLIDE*") \ - EM(fscache_cache_get_acquire, "GET acq ") \ - EM(fscache_cache_new_acquire, "NEW acq ") \ - EM(fscache_cache_put_alloc_volume, "PUT alvol") \ - EM(fscache_cache_put_cache, "PUT cache") \ - EM(fscache_cache_put_prep_failed, "PUT pfail") \ - EM(fscache_cache_put_relinquish, "PUT relnq") \ - E_(fscache_cache_put_volume, "PUT vol ") - -#define fscache_volume_traces \ - EM(fscache_volume_collision, "*COLLIDE*") \ - EM(fscache_volume_get_cookie, "GET cook ") \ - EM(fscache_volume_get_create_work, "GET creat") \ - EM(fscache_volume_get_hash_collision, "GET hcoll") \ - EM(fscache_volume_free, "FREE ") \ - EM(fscache_volume_new_acquire, "NEW acq ") \ - EM(fscache_volume_put_cookie, "PUT cook ") \ - EM(fscache_volume_put_create_work, "PUT creat") \ - EM(fscache_volume_put_hash_collision, "PUT hcoll") \ - EM(fscache_volume_put_relinquish, "PUT relnq") \ - EM(fscache_volume_see_create_work, "SEE creat") \ - EM(fscache_volume_see_hash_wake, "SEE hwake") \ - E_(fscache_volume_wait_create_work, "WAIT crea") - #define fscache_cookie_traces \ - EM(fscache_cookie_collision, "*COLLIDE*") \ - EM(fscache_cookie_discard, "DISCARD ") \ - EM(fscache_cookie_get_attach_object, "GET attch") \ - EM(fscache_cookie_get_hash_collision, "GET hcoll") \ - EM(fscache_cookie_get_end_access, "GQ endac") \ - EM(fscache_cookie_get_inval_work, "GQ inval") \ - EM(fscache_cookie_get_lru, "GET lru ") \ - EM(fscache_cookie_get_use_work, "GQ use ") \ - EM(fscache_cookie_new_acquire, "NEW acq ") \ - EM(fscache_cookie_put_hash_collision, "PUT hcoll") \ - EM(fscache_cookie_put_lru, "PUT lru ") \ - EM(fscache_cookie_put_object, "PUT obj ") \ - EM(fscache_cookie_put_over_queued, "PQ overq") \ - EM(fscache_cookie_put_relinquish, "PUT relnq") \ - EM(fscache_cookie_put_withdrawn, "PUT wthdn") \ - EM(fscache_cookie_put_work, "PQ work ") \ - EM(fscache_cookie_see_active, "- activ") \ - EM(fscache_cookie_see_lru_discard, "- x-lru") \ - EM(fscache_cookie_see_lru_do_one, "- lrudo") \ - EM(fscache_cookie_see_relinquish, "- x-rlq") \ - EM(fscache_cookie_see_withdraw, "- x-wth") \ - E_(fscache_cookie_see_work, "- work ") + EM(fscache_cookie_collision, "*COLLISION*") \ + EM(fscache_cookie_discard, "DISCARD") \ + EM(fscache_cookie_get_acquire_parent, "GET prn") \ + EM(fscache_cookie_get_attach_object, "GET obj") \ + EM(fscache_cookie_get_reacquire, "GET raq") \ + EM(fscache_cookie_get_register_netfs, "GET net") \ + EM(fscache_cookie_put_acquire_nobufs, "PUT nbf") \ + EM(fscache_cookie_put_dup_netfs, "PUT dnt") \ + EM(fscache_cookie_put_relinquish, "PUT rlq") \ + EM(fscache_cookie_put_object, "PUT obj") \ + E_(fscache_cookie_put_parent, "PUT prn") -#define fscache_active_traces \ - EM(fscache_active_use, "USE ") \ - EM(fscache_active_use_modify, "USE-m ") \ - E_(fscache_active_unuse, "UNUSE ") +#define fscache_page_traces \ + EM(fscache_page_cached, "Cached ") \ + EM(fscache_page_inval, "InvalPg") \ + EM(fscache_page_maybe_release, "MayRels") \ + EM(fscache_page_uncache, "Uncache") \ + EM(fscache_page_radix_clear_store, "RxCStr ") \ + EM(fscache_page_radix_delete, "RxDel ") \ + EM(fscache_page_radix_insert, "RxIns ") \ + EM(fscache_page_radix_pend2store, "RxP2S ") \ + EM(fscache_page_radix_set_pend, "RxSPend ") \ + EM(fscache_page_write, "WritePg") \ + EM(fscache_page_write_end, "EndPgWr") \ + EM(fscache_page_write_end_pend, "EndPgWP") \ + EM(fscache_page_write_end_noc, "EndPgNC") \ + E_(fscache_page_write_wait, "WtOnWrt") -#define fscache_access_traces \ - EM(fscache_access_acquire_volume, "BEGIN acq_vol") \ - EM(fscache_access_acquire_volume_end, "END acq_vol") \ - EM(fscache_access_cache_pin, "PIN cache ") \ - EM(fscache_access_cache_unpin, "UNPIN cache ") \ - EM(fscache_access_invalidate_cookie, "BEGIN inval ") \ - EM(fscache_access_invalidate_cookie_end,"END inval ") \ - EM(fscache_access_io_end, "END io ") \ - EM(fscache_access_io_not_live, "END io_notl") \ - EM(fscache_access_io_read, "BEGIN io_read") \ - EM(fscache_access_io_resize, "BEGIN io_resz") \ - EM(fscache_access_io_wait, "WAIT io ") \ - EM(fscache_access_io_write, "BEGIN io_writ") \ - EM(fscache_access_lookup_cookie, "BEGIN lookup ") \ - EM(fscache_access_lookup_cookie_end, "END lookup ") \ - EM(fscache_access_lookup_cookie_end_failed,"END lookupf") \ - EM(fscache_access_relinquish_volume, "BEGIN rlq_vol") \ - EM(fscache_access_relinquish_volume_end,"END rlq_vol") \ - E_(fscache_access_unlive, "END unlive ") +#define fscache_op_traces \ + EM(fscache_op_cancel, "Cancel1") \ + EM(fscache_op_cancel_all, "CancelA") \ + EM(fscache_op_cancelled, "Canclld") \ + EM(fscache_op_completed, "Complet") \ + EM(fscache_op_enqueue_async, "EnqAsyn") \ + EM(fscache_op_enqueue_mythread, "EnqMyTh") \ + EM(fscache_op_gc, "GC ") \ + EM(fscache_op_init, "Init ") \ + EM(fscache_op_put, "Put ") \ + EM(fscache_op_run, "Run ") \ + EM(fscache_op_signal, "Signal ") \ + EM(fscache_op_submit, "Submit ") \ + EM(fscache_op_submit_ex, "SubmitX") \ + E_(fscache_op_work, "Work ") + +#define fscache_page_op_traces \ + EM(fscache_page_op_alloc_one, "Alloc1 ") \ + EM(fscache_page_op_attr_changed, "AttrChg") \ + EM(fscache_page_op_check_consistency, "CheckCn") \ + EM(fscache_page_op_invalidate, "Inval ") \ + EM(fscache_page_op_retr_multi, "RetrMul") \ + EM(fscache_page_op_retr_one, "Retr1 ") \ + E_(fscache_page_op_write_one, "Write1 ") /* * Export enum symbols via userspace. @@ -185,10 +147,7 @@ enum fscache_access_trace { #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); -fscache_cache_traces; -fscache_volume_traces; fscache_cookie_traces; -fscache_access_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -200,56 +159,6 @@ fscache_access_traces; #define E_(a, b) { a, b } -TRACE_EVENT(fscache_cache, - TP_PROTO(unsigned int cache_debug_id, - int usage, - enum fscache_cache_trace where), - - TP_ARGS(cache_debug_id, usage, where), - - TP_STRUCT__entry( - __field(unsigned int, cache ) - __field(int, usage ) - __field(enum fscache_cache_trace, where ) - ), - - TP_fast_assign( - __entry->cache = cache_debug_id; - __entry->usage = usage; - __entry->where = where; - ), - - TP_printk("C=%08x %s r=%d", - __entry->cache, - __print_symbolic(__entry->where, fscache_cache_traces), - __entry->usage) - ); - -TRACE_EVENT(fscache_volume, - TP_PROTO(unsigned int volume_debug_id, - int usage, - enum fscache_volume_trace where), - - TP_ARGS(volume_debug_id, usage, where), - - TP_STRUCT__entry( - __field(unsigned int, volume ) - __field(int, usage ) - __field(enum fscache_volume_trace, where ) - ), - - TP_fast_assign( - __entry->volume = volume_debug_id; - __entry->usage = usage; - __entry->where = where; - ), - - TP_printk("V=%08x %s u=%d", - __entry->volume, - __print_symbolic(__entry->where, fscache_volume_traces), - __entry->usage) - ); - TRACE_EVENT(fscache_cookie, TP_PROTO(unsigned int cookie_debug_id, int ref, @@ -259,144 +168,39 @@ TRACE_EVENT(fscache_cookie, TP_STRUCT__entry( __field(unsigned int, cookie ) - __field(int, ref ) __field(enum fscache_cookie_trace, where ) + __field(int, ref ) ), TP_fast_assign( __entry->cookie = cookie_debug_id; - __entry->ref = ref; __entry->where = where; + __entry->ref = ref; ), - TP_printk("c=%08x %s r=%d", - __entry->cookie, + TP_printk("%s c=%08x r=%d", __print_symbolic(__entry->where, fscache_cookie_traces), - __entry->ref) + __entry->cookie, __entry->ref) ); -TRACE_EVENT(fscache_active, - TP_PROTO(unsigned int cookie_debug_id, - int ref, - int n_active, - int n_accesses, - enum fscache_active_trace why), +TRACE_EVENT(fscache_netfs, + TP_PROTO(struct fscache_netfs *netfs), - TP_ARGS(cookie_debug_id, ref, n_active, n_accesses, why), + TP_ARGS(netfs), TP_STRUCT__entry( __field(unsigned int, cookie ) - __field(int, ref ) - __field(int, n_active ) - __field(int, n_accesses ) - __field(enum fscache_active_trace, why ) + __array(char, name, 8 ) ), TP_fast_assign( - __entry->cookie = cookie_debug_id; - __entry->ref = ref; - __entry->n_active = n_active; - __entry->n_accesses = n_accesses; - __entry->why = why; + __entry->cookie = netfs->primary_index->debug_id; + strncpy(__entry->name, netfs->name, 8); + __entry->name[7] = 0; ), - TP_printk("c=%08x %s r=%d a=%d c=%d", - __entry->cookie, - __print_symbolic(__entry->why, fscache_active_traces), - __entry->ref, - __entry->n_accesses, - __entry->n_active) - ); - -TRACE_EVENT(fscache_access_cache, - TP_PROTO(unsigned int cache_debug_id, - int ref, - int n_accesses, - enum fscache_access_trace why), - - TP_ARGS(cache_debug_id, ref, n_accesses, why), - - TP_STRUCT__entry( - __field(unsigned int, cache ) - __field(int, ref ) - __field(int, n_accesses ) - __field(enum fscache_access_trace, why ) - ), - - TP_fast_assign( - __entry->cache = cache_debug_id; - __entry->ref = ref; - __entry->n_accesses = n_accesses; - __entry->why = why; - ), - - TP_printk("C=%08x %s r=%d a=%d", - __entry->cache, - __print_symbolic(__entry->why, fscache_access_traces), - __entry->ref, - __entry->n_accesses) - ); - -TRACE_EVENT(fscache_access_volume, - TP_PROTO(unsigned int volume_debug_id, - unsigned int cookie_debug_id, - int ref, - int n_accesses, - enum fscache_access_trace why), - - TP_ARGS(volume_debug_id, cookie_debug_id, ref, n_accesses, why), - - TP_STRUCT__entry( - __field(unsigned int, volume ) - __field(unsigned int, cookie ) - __field(int, ref ) - __field(int, n_accesses ) - __field(enum fscache_access_trace, why ) - ), - - TP_fast_assign( - __entry->volume = volume_debug_id; - __entry->cookie = cookie_debug_id; - __entry->ref = ref; - __entry->n_accesses = n_accesses; - __entry->why = why; - ), - - TP_printk("V=%08x c=%08x %s r=%d a=%d", - __entry->volume, - __entry->cookie, - __print_symbolic(__entry->why, fscache_access_traces), - __entry->ref, - __entry->n_accesses) - ); - -TRACE_EVENT(fscache_access, - TP_PROTO(unsigned int cookie_debug_id, - int ref, - int n_accesses, - enum fscache_access_trace why), - - TP_ARGS(cookie_debug_id, ref, n_accesses, why), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(int, ref ) - __field(int, n_accesses ) - __field(enum fscache_access_trace, why ) - ), - - TP_fast_assign( - __entry->cookie = cookie_debug_id; - __entry->ref = ref; - __entry->n_accesses = n_accesses; - __entry->why = why; - ), - - TP_printk("c=%08x %s r=%d a=%d", - __entry->cookie, - __print_symbolic(__entry->why, fscache_access_traces), - __entry->ref, - __entry->n_accesses) + TP_printk("c=%08x n=%s", + __entry->cookie, __entry->name) ); TRACE_EVENT(fscache_acquire, @@ -406,21 +210,26 @@ TRACE_EVENT(fscache_acquire, TP_STRUCT__entry( __field(unsigned int, cookie ) - __field(unsigned int, volume ) - __field(int, v_ref ) - __field(int, v_n_cookies ) + __field(unsigned int, parent ) + __array(char, name, 8 ) + __field(int, p_ref ) + __field(int, p_n_children ) + __field(u8, p_flags ) ), TP_fast_assign( __entry->cookie = cookie->debug_id; - __entry->volume = cookie->volume->debug_id; - __entry->v_ref = refcount_read(&cookie->volume->ref); - __entry->v_n_cookies = atomic_read(&cookie->volume->n_cookies); + __entry->parent = cookie->parent->debug_id; + __entry->p_ref = refcount_read(&cookie->parent->ref); + __entry->p_n_children = atomic_read(&cookie->parent->n_children); + __entry->p_flags = cookie->parent->flags; + memcpy(__entry->name, cookie->def->name, 8); + __entry->name[7] = 0; ), - TP_printk("c=%08x V=%08x vr=%d vc=%d", - __entry->cookie, - __entry->volume, __entry->v_ref, __entry->v_n_cookies) + TP_printk("c=%08x p=%08x pr=%d pc=%d pf=%02x n=%s", + __entry->cookie, __entry->parent, __entry->p_ref, + __entry->p_n_children, __entry->p_flags, __entry->name) ); TRACE_EVENT(fscache_relinquish, @@ -430,8 +239,9 @@ TRACE_EVENT(fscache_relinquish, TP_STRUCT__entry( __field(unsigned int, cookie ) - __field(unsigned int, volume ) + __field(unsigned int, parent ) __field(int, ref ) + __field(int, n_children ) __field(int, n_active ) __field(u8, flags ) __field(bool, retire ) @@ -439,58 +249,272 @@ TRACE_EVENT(fscache_relinquish, TP_fast_assign( __entry->cookie = cookie->debug_id; - __entry->volume = cookie->volume->debug_id; + __entry->parent = cookie->parent->debug_id; __entry->ref = refcount_read(&cookie->ref); + __entry->n_children = atomic_read(&cookie->n_children); __entry->n_active = atomic_read(&cookie->n_active); __entry->flags = cookie->flags; __entry->retire = retire; ), - TP_printk("c=%08x V=%08x r=%d U=%d f=%02x rt=%u", - __entry->cookie, __entry->volume, __entry->ref, - __entry->n_active, __entry->flags, __entry->retire) + TP_printk("c=%08x r=%d p=%08x Nc=%d Na=%d f=%02x r=%u", + __entry->cookie, __entry->ref, + __entry->parent, __entry->n_children, __entry->n_active, + __entry->flags, __entry->retire) ); -TRACE_EVENT(fscache_invalidate, - TP_PROTO(struct fscache_cookie *cookie, loff_t new_size), +TRACE_EVENT(fscache_enable, + TP_PROTO(struct fscache_cookie *cookie), - TP_ARGS(cookie, new_size), + TP_ARGS(cookie), TP_STRUCT__entry( __field(unsigned int, cookie ) - __field(loff_t, new_size ) + __field(int, ref ) + __field(int, n_children ) + __field(int, n_active ) + __field(u8, flags ) ), TP_fast_assign( __entry->cookie = cookie->debug_id; - __entry->new_size = new_size; + __entry->ref = refcount_read(&cookie->ref); + __entry->n_children = atomic_read(&cookie->n_children); + __entry->n_active = atomic_read(&cookie->n_active); + __entry->flags = cookie->flags; ), - TP_printk("c=%08x sz=%llx", - __entry->cookie, __entry->new_size) + TP_printk("c=%08x r=%d Nc=%d Na=%d f=%02x", + __entry->cookie, __entry->ref, + __entry->n_children, __entry->n_active, __entry->flags) ); -TRACE_EVENT(fscache_resize, - TP_PROTO(struct fscache_cookie *cookie, loff_t new_size), +TRACE_EVENT(fscache_disable, + TP_PROTO(struct fscache_cookie *cookie), - TP_ARGS(cookie, new_size), + TP_ARGS(cookie), TP_STRUCT__entry( __field(unsigned int, cookie ) - __field(loff_t, old_size ) - __field(loff_t, new_size ) + __field(int, ref ) + __field(int, n_children ) + __field(int, n_active ) + __field(u8, flags ) ), TP_fast_assign( __entry->cookie = cookie->debug_id; - __entry->old_size = cookie->object_size; - __entry->new_size = new_size; + __entry->ref = refcount_read(&cookie->ref); + __entry->n_children = atomic_read(&cookie->n_children); + __entry->n_active = atomic_read(&cookie->n_active); + __entry->flags = cookie->flags; ), - TP_printk("c=%08x os=%08llx sz=%08llx", + TP_printk("c=%08x r=%d Nc=%d Na=%d f=%02x", + __entry->cookie, __entry->ref, + __entry->n_children, __entry->n_active, __entry->flags) + ); + +TRACE_EVENT(fscache_osm, + TP_PROTO(struct fscache_object *object, + const struct fscache_state *state, + bool wait, bool oob, s8 event_num), + + TP_ARGS(object, state, wait, oob, event_num), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(unsigned int, object ) + __array(char, state, 8 ) + __field(bool, wait ) + __field(bool, oob ) + __field(s8, event_num ) + ), + + TP_fast_assign( + __entry->cookie = object->cookie->debug_id; + __entry->object = object->debug_id; + __entry->wait = wait; + __entry->oob = oob; + __entry->event_num = event_num; + memcpy(__entry->state, state->short_name, 8); + ), + + TP_printk("c=%08x o=%08d %s %s%sev=%d", __entry->cookie, - __entry->old_size, - __entry->new_size) + __entry->object, + __entry->state, + __print_symbolic(__entry->wait, + { true, "WAIT" }, + { false, "WORK" }), + __print_symbolic(__entry->oob, + { true, " OOB " }, + { false, " " }), + __entry->event_num) + ); + +TRACE_EVENT(fscache_page, + TP_PROTO(struct fscache_cookie *cookie, struct page *page, + enum fscache_page_trace why), + + TP_ARGS(cookie, page, why), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(pgoff_t, page ) + __field(enum fscache_page_trace, why ) + ), + + TP_fast_assign( + __entry->cookie = cookie->debug_id; + __entry->page = page->index; + __entry->why = why; + ), + + TP_printk("c=%08x %s pg=%lx", + __entry->cookie, + __print_symbolic(__entry->why, fscache_page_traces), + __entry->page) + ); + +TRACE_EVENT(fscache_check_page, + TP_PROTO(struct fscache_cookie *cookie, struct page *page, + void *val, int n), + + TP_ARGS(cookie, page, val, n), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(void *, page ) + __field(void *, val ) + __field(int, n ) + ), + + TP_fast_assign( + __entry->cookie = cookie->debug_id; + __entry->page = page; + __entry->val = val; + __entry->n = n; + ), + + TP_printk("c=%08x pg=%p val=%p n=%d", + __entry->cookie, __entry->page, __entry->val, __entry->n) + ); + +TRACE_EVENT(fscache_wake_cookie, + TP_PROTO(struct fscache_cookie *cookie), + + TP_ARGS(cookie), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + ), + + TP_fast_assign( + __entry->cookie = cookie->debug_id; + ), + + TP_printk("c=%08x", __entry->cookie) + ); + +TRACE_EVENT(fscache_op, + TP_PROTO(struct fscache_cookie *cookie, struct fscache_operation *op, + enum fscache_op_trace why), + + TP_ARGS(cookie, op, why), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(unsigned int, op ) + __field(enum fscache_op_trace, why ) + ), + + TP_fast_assign( + __entry->cookie = cookie ? cookie->debug_id : 0; + __entry->op = op->debug_id; + __entry->why = why; + ), + + TP_printk("c=%08x op=%08x %s", + __entry->cookie, __entry->op, + __print_symbolic(__entry->why, fscache_op_traces)) + ); + +TRACE_EVENT(fscache_page_op, + TP_PROTO(struct fscache_cookie *cookie, struct page *page, + struct fscache_operation *op, enum fscache_page_op_trace what), + + TP_ARGS(cookie, page, op, what), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(unsigned int, op ) + __field(pgoff_t, page ) + __field(enum fscache_page_op_trace, what ) + ), + + TP_fast_assign( + __entry->cookie = cookie->debug_id; + __entry->page = page ? page->index : 0; + __entry->op = op->debug_id; + __entry->what = what; + ), + + TP_printk("c=%08x %s pg=%lx op=%08x", + __entry->cookie, + __print_symbolic(__entry->what, fscache_page_op_traces), + __entry->page, __entry->op) + ); + +TRACE_EVENT(fscache_wrote_page, + TP_PROTO(struct fscache_cookie *cookie, struct page *page, + struct fscache_operation *op, int ret), + + TP_ARGS(cookie, page, op, ret), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(unsigned int, op ) + __field(pgoff_t, page ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->cookie = cookie->debug_id; + __entry->page = page->index; + __entry->op = op->debug_id; + __entry->ret = ret; + ), + + TP_printk("c=%08x pg=%lx op=%08x ret=%d", + __entry->cookie, __entry->page, __entry->op, __entry->ret) + ); + +TRACE_EVENT(fscache_gang_lookup, + TP_PROTO(struct fscache_cookie *cookie, struct fscache_operation *op, + void **results, int n, pgoff_t store_limit), + + TP_ARGS(cookie, op, results, n, store_limit), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(unsigned int, op ) + __field(pgoff_t, results0 ) + __field(int, n ) + __field(pgoff_t, store_limit ) + ), + + TP_fast_assign( + __entry->cookie = cookie->debug_id; + __entry->op = op->debug_id; + __entry->results0 = results[0] ? ((struct page *)results[0])->index : (pgoff_t)-1; + __entry->n = n; + __entry->store_limit = store_limit; + ), + + TP_printk("c=%08x op=%08x r0=%lx n=%d sl=%lx", + __entry->cookie, __entry->op, __entry->results0, __entry->n, + __entry->store_limit) ); #endif /* _TRACE_FSCACHE_H */ diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 7346f0164c..0dd30de00e 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -6,7 +6,6 @@ #define _TRACE_IO_URING_H #include -#include struct io_wq_work; @@ -498,66 +497,6 @@ TRACE_EVENT(io_uring_task_run, (unsigned long long) __entry->user_data) ); -/* - * io_uring_req_failed - called when an sqe is errored dring submission - * - * @sqe: pointer to the io_uring_sqe that failed - * @error: error it failed with - * - * Allows easier diagnosing of malformed requests in production systems. - */ -TRACE_EVENT(io_uring_req_failed, - - TP_PROTO(const struct io_uring_sqe *sqe, int error), - - TP_ARGS(sqe, error), - - TP_STRUCT__entry ( - __field( u8, opcode ) - __field( u8, flags ) - __field( u8, ioprio ) - __field( u64, off ) - __field( u64, addr ) - __field( u32, len ) - __field( u32, op_flags ) - __field( u64, user_data ) - __field( u16, buf_index ) - __field( u16, personality ) - __field( u32, file_index ) - __field( u64, pad1 ) - __field( u64, pad2 ) - __field( int, error ) - ), - - TP_fast_assign( - __entry->opcode = sqe->opcode; - __entry->flags = sqe->flags; - __entry->ioprio = sqe->ioprio; - __entry->off = sqe->off; - __entry->addr = sqe->addr; - __entry->len = sqe->len; - __entry->op_flags = sqe->rw_flags; - __entry->user_data = sqe->user_data; - __entry->buf_index = sqe->buf_index; - __entry->personality = sqe->personality; - __entry->file_index = sqe->file_index; - __entry->pad1 = sqe->__pad2[0]; - __entry->pad2 = sqe->__pad2[1]; - __entry->error = error; - ), - - TP_printk("op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, " - "len=%u, rw_flags=0x%x, user_data=0x%llx, buf_index=%d, " - "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d", - __entry->opcode, __entry->flags, __entry->ioprio, - (unsigned long long)__entry->off, - (unsigned long long) __entry->addr, __entry->len, - __entry->op_flags, (unsigned long long) __entry->user_data, - __entry->buf_index, __entry->personality, __entry->file_index, - (unsigned long long) __entry->pad1, - (unsigned long long) __entry->pad2, __entry->error) -); - #endif /* _TRACE_IO_URING_H */ /* This part must be outside protection */ diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h index 29096fe126..72b4582322 100644 --- a/include/trace/events/iommu.h +++ b/include/trace/events/iommu.h @@ -101,9 +101,8 @@ TRACE_EVENT(map, __entry->size = size; ), - TP_printk("IOMMU: iova=0x%016llx - 0x%016llx paddr=0x%016llx size=%zu", - __entry->iova, __entry->iova + __entry->size, __entry->paddr, - __entry->size + TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=%zu", + __entry->iova, __entry->paddr, __entry->size ) ); @@ -125,9 +124,8 @@ TRACE_EVENT(unmap, __entry->unmapped_size = unmapped_size; ), - TP_printk("IOMMU: iova=0x%016llx - 0x%016llx size=%zu unmapped_size=%zu", - __entry->iova, __entry->iova + __entry->size, - __entry->size, __entry->unmapped_size + TP_printk("IOMMU: iova=0x%016llx size=%zu unmapped_size=%zu", + __entry->iova, __entry->size, __entry->unmapped_size ) ); diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index d4e631aa97..ab69434e23 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -132,37 +132,9 @@ ata_protocol_name(ATAPI_PROT_PIO), \ ata_protocol_name(ATAPI_PROT_DMA)) -#define ata_class_name(class) { class, #class } -#define show_class_name(val) \ - __print_symbolic(val, \ - ata_class_name(ATA_DEV_UNKNOWN), \ - ata_class_name(ATA_DEV_ATA), \ - ata_class_name(ATA_DEV_ATA_UNSUP), \ - ata_class_name(ATA_DEV_ATAPI), \ - ata_class_name(ATA_DEV_ATAPI_UNSUP), \ - ata_class_name(ATA_DEV_PMP), \ - ata_class_name(ATA_DEV_PMP_UNSUP), \ - ata_class_name(ATA_DEV_SEMB), \ - ata_class_name(ATA_DEV_SEMB_UNSUP), \ - ata_class_name(ATA_DEV_ZAC), \ - ata_class_name(ATA_DEV_ZAC_UNSUP), \ - ata_class_name(ATA_DEV_NONE)) - -#define ata_sff_hsm_state_name(state) { state, #state } -#define show_sff_hsm_state_name(val) \ - __print_symbolic(val, \ - ata_sff_hsm_state_name(HSM_ST_IDLE), \ - ata_sff_hsm_state_name(HSM_ST_FIRST), \ - ata_sff_hsm_state_name(HSM_ST), \ - ata_sff_hsm_state_name(HSM_ST_LAST), \ - ata_sff_hsm_state_name(HSM_ST_ERR)) - const char *libata_trace_parse_status(struct trace_seq*, unsigned char); #define __parse_status(s) libata_trace_parse_status(p, s) -const char *libata_trace_parse_host_stat(struct trace_seq *, unsigned char); -#define __parse_host_stat(s) libata_trace_parse_host_stat(p, s) - const char *libata_trace_parse_eh_action(struct trace_seq *, unsigned int); #define __parse_eh_action(a) libata_trace_parse_eh_action(p, a) @@ -172,14 +144,11 @@ const char *libata_trace_parse_eh_err_mask(struct trace_seq *, unsigned int); const char *libata_trace_parse_qc_flags(struct trace_seq *, unsigned int); #define __parse_qc_flags(f) libata_trace_parse_qc_flags(p, f) -const char *libata_trace_parse_tf_flags(struct trace_seq *, unsigned int); -#define __parse_tf_flags(f) libata_trace_parse_tf_flags(p, f) - const char *libata_trace_parse_subcmd(struct trace_seq *, unsigned char, unsigned char, unsigned char); #define __parse_subcmd(c,f,h) libata_trace_parse_subcmd(p, c, f, h) -DECLARE_EVENT_CLASS(ata_qc_issue_template, +TRACE_EVENT(ata_qc_issue, TP_PROTO(struct ata_queued_cmd *qc), @@ -238,14 +207,6 @@ DECLARE_EVENT_CLASS(ata_qc_issue_template, __entry->dev) ); -DEFINE_EVENT(ata_qc_issue_template, ata_qc_prep, - TP_PROTO(struct ata_queued_cmd *qc), - TP_ARGS(qc)); - -DEFINE_EVENT(ata_qc_issue_template, ata_qc_issue, - TP_PROTO(struct ata_queued_cmd *qc), - TP_ARGS(qc)); - DECLARE_EVENT_CLASS(ata_qc_complete_template, TP_PROTO(struct ata_queued_cmd *qc), @@ -314,128 +275,6 @@ DEFINE_EVENT(ata_qc_complete_template, ata_qc_complete_done, TP_PROTO(struct ata_queued_cmd *qc), TP_ARGS(qc)); -TRACE_EVENT(ata_tf_load, - - TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf), - - TP_ARGS(ap, tf), - - TP_STRUCT__entry( - __field( unsigned int, ata_port ) - __field( unsigned char, cmd ) - __field( unsigned char, dev ) - __field( unsigned char, lbal ) - __field( unsigned char, lbam ) - __field( unsigned char, lbah ) - __field( unsigned char, nsect ) - __field( unsigned char, feature ) - __field( unsigned char, hob_lbal ) - __field( unsigned char, hob_lbam ) - __field( unsigned char, hob_lbah ) - __field( unsigned char, hob_nsect ) - __field( unsigned char, hob_feature ) - __field( unsigned char, proto ) - ), - - TP_fast_assign( - __entry->ata_port = ap->print_id; - __entry->proto = tf->protocol; - __entry->cmd = tf->command; - __entry->dev = tf->device; - __entry->lbal = tf->lbal; - __entry->lbam = tf->lbam; - __entry->lbah = tf->lbah; - __entry->hob_lbal = tf->hob_lbal; - __entry->hob_lbam = tf->hob_lbam; - __entry->hob_lbah = tf->hob_lbah; - __entry->feature = tf->feature; - __entry->hob_feature = tf->hob_feature; - __entry->nsect = tf->nsect; - __entry->hob_nsect = tf->hob_nsect; - ), - - TP_printk("ata_port=%u proto=%s cmd=%s%s " \ - " tf=(%02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x)", - __entry->ata_port, - show_protocol_name(__entry->proto), - show_opcode_name(__entry->cmd), - __parse_subcmd(__entry->cmd, __entry->feature, __entry->hob_nsect), - __entry->cmd, __entry->feature, __entry->nsect, - __entry->lbal, __entry->lbam, __entry->lbah, - __entry->hob_feature, __entry->hob_nsect, - __entry->hob_lbal, __entry->hob_lbam, __entry->hob_lbah, - __entry->dev) -); - -DECLARE_EVENT_CLASS(ata_exec_command_template, - - TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), - - TP_ARGS(ap, tf, tag), - - TP_STRUCT__entry( - __field( unsigned int, ata_port ) - __field( unsigned int, tag ) - __field( unsigned char, cmd ) - __field( unsigned char, feature ) - __field( unsigned char, hob_nsect ) - __field( unsigned char, proto ) - ), - - TP_fast_assign( - __entry->ata_port = ap->print_id; - __entry->tag = tag; - __entry->proto = tf->protocol; - __entry->cmd = tf->command; - __entry->feature = tf->feature; - __entry->hob_nsect = tf->hob_nsect; - ), - - TP_printk("ata_port=%u tag=%d proto=%s cmd=%s%s", - __entry->ata_port, __entry->tag, - show_protocol_name(__entry->proto), - show_opcode_name(__entry->cmd), - __parse_subcmd(__entry->cmd, __entry->feature, __entry->hob_nsect)) -); - -DEFINE_EVENT(ata_exec_command_template, ata_exec_command, - TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), - TP_ARGS(ap, tf, tag)); - -DEFINE_EVENT(ata_exec_command_template, ata_bmdma_setup, - TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), - TP_ARGS(ap, tf, tag)); - -DEFINE_EVENT(ata_exec_command_template, ata_bmdma_start, - TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), - TP_ARGS(ap, tf, tag)); - -DEFINE_EVENT(ata_exec_command_template, ata_bmdma_stop, - TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), - TP_ARGS(ap, tf, tag)); - -TRACE_EVENT(ata_bmdma_status, - - TP_PROTO(struct ata_port *ap, unsigned int host_stat), - - TP_ARGS(ap, host_stat), - - TP_STRUCT__entry( - __field( unsigned int, ata_port ) - __field( unsigned int, tag ) - __field( unsigned char, host_stat ) - ), - - TP_fast_assign( - __entry->ata_port = ap->print_id; - __entry->host_stat = host_stat; - ), - - TP_printk("ata_port=%u host_stat=%s", - __entry->ata_port, - __parse_host_stat(__entry->host_stat)) -); - TRACE_EVENT(ata_eh_link_autopsy, TP_PROTO(struct ata_device *dev, unsigned int eh_action, unsigned int eh_err_mask), @@ -490,259 +329,6 @@ TRACE_EVENT(ata_eh_link_autopsy_qc, __parse_eh_err_mask(__entry->eh_err_mask)) ); -DECLARE_EVENT_CLASS(ata_eh_action_template, - - TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action), - - TP_ARGS(link, devno, eh_action), - - TP_STRUCT__entry( - __field( unsigned int, ata_port ) - __field( unsigned int, ata_dev ) - __field( unsigned int, eh_action ) - ), - - TP_fast_assign( - __entry->ata_port = link->ap->print_id; - __entry->ata_dev = link->pmp + devno; - __entry->eh_action = eh_action; - ), - - TP_printk("ata_port=%u ata_dev=%u eh_action=%s", - __entry->ata_port, __entry->ata_dev, - __parse_eh_action(__entry->eh_action)) -); - -DEFINE_EVENT(ata_eh_action_template, ata_eh_about_to_do, - TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action), - TP_ARGS(link, devno, eh_action)); - -DEFINE_EVENT(ata_eh_action_template, ata_eh_done, - TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action), - TP_ARGS(link, devno, eh_action)); - -DECLARE_EVENT_CLASS(ata_link_reset_begin_template, - - TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), - - TP_ARGS(link, class, deadline), - - TP_STRUCT__entry( - __field( unsigned int, ata_port ) - __array( unsigned int, class, 2 ) - __field( unsigned long, deadline ) - ), - - TP_fast_assign( - __entry->ata_port = link->ap->print_id; - memcpy(__entry->class, class, 2); - __entry->deadline = deadline; - ), - - TP_printk("ata_port=%u deadline=%lu classes=[%s,%s]", - __entry->ata_port, __entry->deadline, - show_class_name(__entry->class[0]), - show_class_name(__entry->class[1])) -); - -DEFINE_EVENT(ata_link_reset_begin_template, ata_link_hardreset_begin, - TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), - TP_ARGS(link, class, deadline)); - -DEFINE_EVENT(ata_link_reset_begin_template, ata_slave_hardreset_begin, - TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), - TP_ARGS(link, class, deadline)); - -DEFINE_EVENT(ata_link_reset_begin_template, ata_link_softreset_begin, - TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), - TP_ARGS(link, class, deadline)); - -DECLARE_EVENT_CLASS(ata_link_reset_end_template, - - TP_PROTO(struct ata_link *link, unsigned int *class, int rc), - - TP_ARGS(link, class, rc), - - TP_STRUCT__entry( - __field( unsigned int, ata_port ) - __array( unsigned int, class, 2 ) - __field( int, rc ) - ), - - TP_fast_assign( - __entry->ata_port = link->ap->print_id; - memcpy(__entry->class, class, 2); - __entry->rc = rc; - ), - - TP_printk("ata_port=%u rc=%d class=[%s,%s]", - __entry->ata_port, __entry->rc, - show_class_name(__entry->class[0]), - show_class_name(__entry->class[1])) -); - -DEFINE_EVENT(ata_link_reset_end_template, ata_link_hardreset_end, - TP_PROTO(struct ata_link *link, unsigned int *class, int rc), - TP_ARGS(link, class, rc)); - -DEFINE_EVENT(ata_link_reset_end_template, ata_slave_hardreset_end, - TP_PROTO(struct ata_link *link, unsigned int *class, int rc), - TP_ARGS(link, class, rc)); - -DEFINE_EVENT(ata_link_reset_end_template, ata_link_softreset_end, - TP_PROTO(struct ata_link *link, unsigned int *class, int rc), - TP_ARGS(link, class, rc)); - -DEFINE_EVENT(ata_link_reset_end_template, ata_link_postreset, - TP_PROTO(struct ata_link *link, unsigned int *class, int rc), - TP_ARGS(link, class, rc)); - -DEFINE_EVENT(ata_link_reset_end_template, ata_slave_postreset, - TP_PROTO(struct ata_link *link, unsigned int *class, int rc), - TP_ARGS(link, class, rc)); - -DECLARE_EVENT_CLASS(ata_port_eh_begin_template, - - TP_PROTO(struct ata_port *ap), - - TP_ARGS(ap), - - TP_STRUCT__entry( - __field( unsigned int, ata_port ) - ), - - TP_fast_assign( - __entry->ata_port = ap->print_id; - ), - - TP_printk("ata_port=%u", __entry->ata_port) -); - -DEFINE_EVENT(ata_port_eh_begin_template, ata_std_sched_eh, - TP_PROTO(struct ata_port *ap), - TP_ARGS(ap)); - -DEFINE_EVENT(ata_port_eh_begin_template, ata_port_freeze, - TP_PROTO(struct ata_port *ap), - TP_ARGS(ap)); - -DEFINE_EVENT(ata_port_eh_begin_template, ata_port_thaw, - TP_PROTO(struct ata_port *ap), - TP_ARGS(ap)); - -DECLARE_EVENT_CLASS(ata_sff_hsm_template, - - TP_PROTO(struct ata_queued_cmd *qc, unsigned char status), - - TP_ARGS(qc, status), - - TP_STRUCT__entry( - __field( unsigned int, ata_port ) - __field( unsigned int, ata_dev ) - __field( unsigned int, tag ) - __field( unsigned int, qc_flags ) - __field( unsigned int, protocol ) - __field( unsigned int, hsm_state ) - __field( unsigned char, dev_state ) - ), - - TP_fast_assign( - __entry->ata_port = qc->ap->print_id; - __entry->ata_dev = qc->dev->link->pmp + qc->dev->devno; - __entry->tag = qc->tag; - __entry->qc_flags = qc->flags; - __entry->protocol = qc->tf.protocol; - __entry->hsm_state = qc->ap->hsm_task_state; - __entry->dev_state = status; - ), - - TP_printk("ata_port=%u ata_dev=%u tag=%d proto=%s flags=%s task_state=%s dev_stat=0x%X", - __entry->ata_port, __entry->ata_dev, __entry->tag, - show_protocol_name(__entry->protocol), - __parse_qc_flags(__entry->qc_flags), - show_sff_hsm_state_name(__entry->hsm_state), - __entry->dev_state) -); - -DEFINE_EVENT(ata_sff_hsm_template, ata_sff_hsm_state, - TP_PROTO(struct ata_queued_cmd *qc, unsigned char state), - TP_ARGS(qc, state)); - -DEFINE_EVENT(ata_sff_hsm_template, ata_sff_hsm_command_complete, - TP_PROTO(struct ata_queued_cmd *qc, unsigned char state), - TP_ARGS(qc, state)); - -DEFINE_EVENT(ata_sff_hsm_template, ata_sff_port_intr, - TP_PROTO(struct ata_queued_cmd *qc, unsigned char state), - TP_ARGS(qc, state)); - -DECLARE_EVENT_CLASS(ata_transfer_data_template, - - TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), - - TP_ARGS(qc, offset, count), - - TP_STRUCT__entry( - __field( unsigned int, ata_port ) - __field( unsigned int, ata_dev ) - __field( unsigned int, tag ) - __field( unsigned int, flags ) - __field( unsigned int, offset ) - __field( unsigned int, bytes ) - ), - - TP_fast_assign( - __entry->ata_port = qc->ap->print_id; - __entry->ata_dev = qc->dev->link->pmp + qc->dev->devno; - __entry->tag = qc->tag; - __entry->flags = qc->tf.flags; - __entry->offset = offset; - __entry->bytes = count; - ), - - TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s offset=%u bytes=%u", - __entry->ata_port, __entry->ata_dev, __entry->tag, - __parse_tf_flags(__entry->flags), - __entry->offset, __entry->bytes) -); - -DEFINE_EVENT(ata_transfer_data_template, ata_sff_pio_transfer_data, - TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), - TP_ARGS(qc, offset, count)); - -DEFINE_EVENT(ata_transfer_data_template, atapi_pio_transfer_data, - TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), - TP_ARGS(qc, offset, count)); - -DEFINE_EVENT(ata_transfer_data_template, atapi_send_cdb, - TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), - TP_ARGS(qc, offset, count)); - -DECLARE_EVENT_CLASS(ata_sff_template, - - TP_PROTO(struct ata_port *ap), - - TP_ARGS(ap), - - TP_STRUCT__entry( - __field( unsigned int, ata_port ) - __field( unsigned char, hsm_state ) - ), - - TP_fast_assign( - __entry->ata_port = ap->print_id; - __entry->hsm_state = ap->hsm_task_state; - ), - - TP_printk("ata_port=%u task_state=%s", - __entry->ata_port, - show_sff_hsm_state_name(__entry->hsm_state)) -); - -DEFINE_EVENT(ata_sff_template, ata_sff_flush_pio_task, - TP_PROTO(struct ata_port *ap), - TP_ARGS(ap)); - #endif /* _TRACE_LIBATA_H */ /* This part must be outside protection */ diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h index 14db8044c1..0abff67b96 100644 --- a/include/trace/events/mmap_lock.h +++ b/include/trace/events/mmap_lock.h @@ -13,7 +13,7 @@ struct mm_struct; extern int trace_mmap_lock_reg(void); extern void trace_mmap_lock_unreg(void); -DECLARE_EVENT_CLASS(mmap_lock, +TRACE_EVENT_FN(mmap_lock_start_locking, TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write), @@ -32,23 +32,15 @@ DECLARE_EVENT_CLASS(mmap_lock, ), TP_printk( - "mm=%p memcg_path=%s write=%s", + "mm=%p memcg_path=%s write=%s\n", __entry->mm, __get_str(memcg_path), __entry->write ? "true" : "false" - ) + ), + + trace_mmap_lock_reg, trace_mmap_lock_unreg ); -#define DEFINE_MMAP_LOCK_EVENT(name) \ - DEFINE_EVENT_FN(mmap_lock, name, \ - TP_PROTO(struct mm_struct *mm, const char *memcg_path, \ - bool write), \ - TP_ARGS(mm, memcg_path, write), \ - trace_mmap_lock_reg, trace_mmap_lock_unreg) - -DEFINE_MMAP_LOCK_EVENT(mmap_lock_start_locking); -DEFINE_MMAP_LOCK_EVENT(mmap_lock_released); - TRACE_EVENT_FN(mmap_lock_acquire_returned, TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write, @@ -71,7 +63,7 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned, ), TP_printk( - "mm=%p memcg_path=%s write=%s success=%s", + "mm=%p memcg_path=%s write=%s success=%s\n", __entry->mm, __get_str(memcg_path), __entry->write ? "true" : "false", @@ -81,6 +73,34 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned, trace_mmap_lock_reg, trace_mmap_lock_unreg ); +TRACE_EVENT_FN(mmap_lock_released, + + TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write), + + TP_ARGS(mm, memcg_path, write), + + TP_STRUCT__entry( + __field(struct mm_struct *, mm) + __string(memcg_path, memcg_path) + __field(bool, write) + ), + + TP_fast_assign( + __entry->mm = mm; + __assign_str(memcg_path, memcg_path); + __entry->write = write; + ), + + TP_printk( + "mm=%p memcg_path=%s write=%s\n", + __entry->mm, + __get_str(memcg_path), + __entry->write ? "true" : "false" + ), + + trace_mmap_lock_reg, trace_mmap_lock_unreg +); + #endif /* _TRACE_MMAP_LOCK_H */ /* This part must be outside protection */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index e6f4ebbb4c..4d470bffd9 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -135,7 +135,6 @@ TRACE_EVENT(netfs_read, __field(loff_t, start ) __field(size_t, len ) __field(enum netfs_read_trace, what ) - __field(unsigned int, netfs_inode ) ), TP_fast_assign( @@ -144,14 +143,12 @@ TRACE_EVENT(netfs_read, __entry->start = start; __entry->len = len; __entry->what = what; - __entry->netfs_inode = rreq->inode->i_ino; ), - TP_printk("R=%08x %s c=%08x ni=%x s=%llx %zx", + TP_printk("R=%08x %s c=%08x s=%llx %zx", __entry->rreq, __print_symbolic(__entry->what, netfs_read_traces), __entry->cookie, - __entry->netfs_inode, __entry->start, __entry->len) ); diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 171524d352..1d28431e85 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -16,38 +16,38 @@ #define PAGEMAP_MAPPEDDISK 0x0020u #define PAGEMAP_BUFFERS 0x0040u -#define trace_pagemap_flags(folio) ( \ - (folio_test_anon(folio) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \ - (folio_mapped(folio) ? PAGEMAP_MAPPED : 0) | \ - (folio_test_swapcache(folio) ? PAGEMAP_SWAPCACHE : 0) | \ - (folio_test_swapbacked(folio) ? PAGEMAP_SWAPBACKED : 0) | \ - (folio_test_mappedtodisk(folio) ? PAGEMAP_MAPPEDDISK : 0) | \ - (folio_test_private(folio) ? PAGEMAP_BUFFERS : 0) \ +#define trace_pagemap_flags(page) ( \ + (PageAnon(page) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \ + (page_mapped(page) ? PAGEMAP_MAPPED : 0) | \ + (PageSwapCache(page) ? PAGEMAP_SWAPCACHE : 0) | \ + (PageSwapBacked(page) ? PAGEMAP_SWAPBACKED : 0) | \ + (PageMappedToDisk(page) ? PAGEMAP_MAPPEDDISK : 0) | \ + (page_has_private(page) ? PAGEMAP_BUFFERS : 0) \ ) TRACE_EVENT(mm_lru_insertion, - TP_PROTO(struct folio *folio), + TP_PROTO(struct page *page), - TP_ARGS(folio), + TP_ARGS(page), TP_STRUCT__entry( - __field(struct folio *, folio ) + __field(struct page *, page ) __field(unsigned long, pfn ) __field(enum lru_list, lru ) __field(unsigned long, flags ) ), TP_fast_assign( - __entry->folio = folio; - __entry->pfn = folio_pfn(folio); - __entry->lru = folio_lru_list(folio); - __entry->flags = trace_pagemap_flags(folio); + __entry->page = page; + __entry->pfn = page_to_pfn(page); + __entry->lru = page_lru(page); + __entry->flags = trace_pagemap_flags(page); ), /* Flag format is based on page-types.c formatting for pagemap */ - TP_printk("folio=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s", - __entry->folio, + TP_printk("page=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s", + __entry->page, __entry->pfn, __entry->lru, __entry->flags & PAGEMAP_MAPPED ? "M" : " ", @@ -60,21 +60,23 @@ TRACE_EVENT(mm_lru_insertion, TRACE_EVENT(mm_lru_activate, - TP_PROTO(struct folio *folio), + TP_PROTO(struct page *page), - TP_ARGS(folio), + TP_ARGS(page), TP_STRUCT__entry( - __field(struct folio *, folio ) + __field(struct page *, page ) __field(unsigned long, pfn ) ), TP_fast_assign( - __entry->folio = folio; - __entry->pfn = folio_pfn(folio); + __entry->page = page; + __entry->pfn = page_to_pfn(page); ), - TP_printk("folio=%p pfn=0x%lx", __entry->folio, __entry->pfn) + /* Flag format is based on page-types.c formatting for pagemap */ + TP_printk("page=%p pfn=0x%lx", __entry->page, __entry->pfn) + ); #endif /* _TRACE_PAGEMAP_H */ diff --git a/include/trace/events/random.h b/include/trace/events/random.h index a2d9aa16a5..3d7b432ca5 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -28,71 +28,80 @@ TRACE_EVENT(add_device_randomness, ); DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(const char *pool_name, int bytes, unsigned long IP), - TP_ARGS(bytes, IP), + TP_ARGS(pool_name, bytes, IP), TP_STRUCT__entry( + __field( const char *, pool_name ) __field( int, bytes ) __field(unsigned long, IP ) ), TP_fast_assign( + __entry->pool_name = pool_name; __entry->bytes = bytes; __entry->IP = IP; ), - TP_printk("input pool: bytes %d caller %pS", - __entry->bytes, (void *)__entry->IP) + TP_printk("%s pool: bytes %d caller %pS", + __entry->pool_name, __entry->bytes, (void *)__entry->IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(const char *pool_name, int bytes, unsigned long IP), - TP_ARGS(bytes, IP) + TP_ARGS(pool_name, bytes, IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(const char *pool_name, int bytes, unsigned long IP), - TP_ARGS(bytes, IP) + TP_ARGS(pool_name, bytes, IP) ); TRACE_EVENT(credit_entropy_bits, - TP_PROTO(int bits, int entropy_count, unsigned long IP), + TP_PROTO(const char *pool_name, int bits, int entropy_count, + unsigned long IP), - TP_ARGS(bits, entropy_count, IP), + TP_ARGS(pool_name, bits, entropy_count, IP), TP_STRUCT__entry( + __field( const char *, pool_name ) __field( int, bits ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( + __entry->pool_name = pool_name; __entry->bits = bits; __entry->entropy_count = entropy_count; __entry->IP = IP; ), - TP_printk("input pool: bits %d entropy_count %d caller %pS", - __entry->bits, __entry->entropy_count, (void *)__entry->IP) + TP_printk("%s pool: bits %d entropy_count %d caller %pS", + __entry->pool_name, __entry->bits, + __entry->entropy_count, (void *)__entry->IP) ); TRACE_EVENT(debit_entropy, - TP_PROTO(int debit_bits), + TP_PROTO(const char *pool_name, int debit_bits), - TP_ARGS( debit_bits), + TP_ARGS(pool_name, debit_bits), TP_STRUCT__entry( + __field( const char *, pool_name ) __field( int, debit_bits ) ), TP_fast_assign( + __entry->pool_name = pool_name; __entry->debit_bits = debit_bits; ), - TP_printk("input pool: debit_bits %d", __entry->debit_bits) + TP_printk("%s: debit_bits %d", __entry->pool_name, + __entry->debit_bits) ); TRACE_EVENT(add_input_randomness, @@ -161,31 +170,36 @@ DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, ); DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(int nbytes, int entropy_count, unsigned long IP), + TP_PROTO(const char *pool_name, int nbytes, int entropy_count, + unsigned long IP), - TP_ARGS(nbytes, entropy_count, IP), + TP_ARGS(pool_name, nbytes, entropy_count, IP), TP_STRUCT__entry( + __field( const char *, pool_name ) __field( int, nbytes ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( + __entry->pool_name = pool_name; __entry->nbytes = nbytes; __entry->entropy_count = entropy_count; __entry->IP = IP; ), - TP_printk("input pool: nbytes %d entropy_count %d caller %pS", - __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) + TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", + __entry->pool_name, __entry->nbytes, __entry->entropy_count, + (void *)__entry->IP) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(int nbytes, int entropy_count, unsigned long IP), + TP_PROTO(const char *pool_name, int nbytes, int entropy_count, + unsigned long IP), - TP_ARGS(nbytes, entropy_count, IP) + TP_ARGS(pool_name, nbytes, entropy_count, IP) ); TRACE_EVENT(urandom_read, diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index c9048f3e47..b2a2672e66 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -8,13 +8,11 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM rpcgss -#if !defined(_TRACE_RPCGSS_H) || defined(TRACE_HEADER_MULTI_READ) +#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RPCGSS_H #include -#include - /** ** GSS-API related trace events **/ @@ -101,7 +99,7 @@ DECLARE_EVENT_CLASS(rpcgss_gssapi_event, __entry->maj_stat = maj_stat; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " maj_stat=%s", + TP_printk("task:%u@%u maj_stat=%s", __entry->task_id, __entry->client_id, __entry->maj_stat == 0 ? "GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat)) @@ -334,8 +332,7 @@ TRACE_EVENT(rpcgss_unwrap_failed, __entry->client_id = task->tk_client->cl_clid; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER, - __entry->task_id, __entry->client_id) + TP_printk("task:%u@%u", __entry->task_id, __entry->client_id) ); TRACE_EVENT(rpcgss_bad_seqno, @@ -361,8 +358,7 @@ TRACE_EVENT(rpcgss_bad_seqno, __entry->received = received; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " expected seqno %u, received seqno %u", + TP_printk("task:%u@%u expected seqno %u, received seqno %u", __entry->task_id, __entry->client_id, __entry->expected, __entry->received) ); @@ -390,7 +386,7 @@ TRACE_EVENT(rpcgss_seqno, __entry->seqno = rqst->rq_seqno; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x seqno=%u", + TP_printk("task:%u@%u xid=0x%08x seqno=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->seqno) ); @@ -422,8 +418,7 @@ TRACE_EVENT(rpcgss_need_reencode, __entry->ret = ret; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded", + TP_printk("task:%u@%u xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded", __entry->task_id, __entry->client_id, __entry->xid, __entry->seqno, __entry->seq_xmit, __entry->ret ? "" : "un") @@ -457,8 +452,7 @@ TRACE_EVENT(rpcgss_update_slack, __entry->verfsize = auth->au_verfsize; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " xid=0x%08x auth=%p rslack=%u ralign=%u verfsize=%u\n", + TP_printk("task:%u@%u xid=0x%08x auth=%p rslack=%u ralign=%u verfsize=%u\n", __entry->task_id, __entry->client_id, __entry->xid, __entry->auth, __entry->rslack, __entry->ralign, __entry->verfsize) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index fcd3b3f102..de41954995 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -14,9 +14,7 @@ #include #include #include - #include -#include /** ** Event classes @@ -62,74 +60,6 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class, ), \ TP_ARGS(wc, cid)) -DECLARE_EVENT_CLASS(rpcrdma_send_completion_class, - TP_PROTO( - const struct ib_wc *wc, - const struct rpc_rdma_cid *cid - ), - - TP_ARGS(wc, cid), - - TP_STRUCT__entry( - __field(u32, cq_id) - __field(int, completion_id) - ), - - TP_fast_assign( - __entry->cq_id = cid->ci_queue_id; - __entry->completion_id = cid->ci_completion_id; - ), - - TP_printk("cq.id=%u cid=%d", - __entry->cq_id, __entry->completion_id - ) -); - -#define DEFINE_SEND_COMPLETION_EVENT(name) \ - DEFINE_EVENT(rpcrdma_send_completion_class, name, \ - TP_PROTO( \ - const struct ib_wc *wc, \ - const struct rpc_rdma_cid *cid \ - ), \ - TP_ARGS(wc, cid)) - -DECLARE_EVENT_CLASS(rpcrdma_send_flush_class, - TP_PROTO( - const struct ib_wc *wc, - const struct rpc_rdma_cid *cid - ), - - TP_ARGS(wc, cid), - - TP_STRUCT__entry( - __field(u32, cq_id) - __field(int, completion_id) - __field(unsigned long, status) - __field(unsigned int, vendor_err) - ), - - TP_fast_assign( - __entry->cq_id = cid->ci_queue_id; - __entry->completion_id = cid->ci_completion_id; - __entry->status = wc->status; - __entry->vendor_err = wc->vendor_err; - ), - - TP_printk("cq.id=%u cid=%d status=%s (%lu/0x%x)", - __entry->cq_id, __entry->completion_id, - rdma_show_wc_status(__entry->status), - __entry->status, __entry->vendor_err - ) -); - -#define DEFINE_SEND_FLUSH_EVENT(name) \ - DEFINE_EVENT(rpcrdma_send_flush_class, name, \ - TP_PROTO( \ - const struct ib_wc *wc, \ - const struct rpc_rdma_cid *cid \ - ), \ - TP_ARGS(wc, cid)) - DECLARE_EVENT_CLASS(rpcrdma_mr_completion_class, TP_PROTO( const struct ib_wc *wc, @@ -215,77 +145,6 @@ DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class, ), \ TP_ARGS(wc, cid)) -DECLARE_EVENT_CLASS(rpcrdma_receive_success_class, - TP_PROTO( - const struct ib_wc *wc, - const struct rpc_rdma_cid *cid - ), - - TP_ARGS(wc, cid), - - TP_STRUCT__entry( - __field(u32, cq_id) - __field(int, completion_id) - __field(u32, received) - ), - - TP_fast_assign( - __entry->cq_id = cid->ci_queue_id; - __entry->completion_id = cid->ci_completion_id; - __entry->received = wc->byte_len; - ), - - TP_printk("cq.id=%u cid=%d received=%u", - __entry->cq_id, __entry->completion_id, - __entry->received - ) -); - -#define DEFINE_RECEIVE_SUCCESS_EVENT(name) \ - DEFINE_EVENT(rpcrdma_receive_success_class, name, \ - TP_PROTO( \ - const struct ib_wc *wc, \ - const struct rpc_rdma_cid *cid \ - ), \ - TP_ARGS(wc, cid)) - -DECLARE_EVENT_CLASS(rpcrdma_receive_flush_class, - TP_PROTO( - const struct ib_wc *wc, - const struct rpc_rdma_cid *cid - ), - - TP_ARGS(wc, cid), - - TP_STRUCT__entry( - __field(u32, cq_id) - __field(int, completion_id) - __field(unsigned long, status) - __field(unsigned int, vendor_err) - ), - - TP_fast_assign( - __entry->cq_id = cid->ci_queue_id; - __entry->completion_id = cid->ci_completion_id; - __entry->status = wc->status; - __entry->vendor_err = wc->vendor_err; - ), - - TP_printk("cq.id=%u cid=%d status=%s (%lu/0x%x)", - __entry->cq_id, __entry->completion_id, - rdma_show_wc_status(__entry->status), - __entry->status, __entry->vendor_err - ) -); - -#define DEFINE_RECEIVE_FLUSH_EVENT(name) \ - DEFINE_EVENT(rpcrdma_receive_flush_class, name, \ - TP_PROTO( \ - const struct ib_wc *wc, \ - const struct rpc_rdma_cid *cid \ - ), \ - TP_ARGS(wc, cid)) - DECLARE_EVENT_CLASS(xprtrdma_reply_class, TP_PROTO( const struct rpcrdma_rep *rep @@ -420,8 +279,7 @@ DECLARE_EVENT_CLASS(xprtrdma_rdch_event, __entry->nsegs = nsegs; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " pos=%u %u@0x%016llx:0x%08x (%s)", + TP_printk("task:%u@%u pos=%u %u@0x%016llx:0x%08x (%s)", __entry->task_id, __entry->client_id, __entry->pos, __entry->length, (unsigned long long)__entry->offset, __entry->handle, @@ -468,8 +326,7 @@ DECLARE_EVENT_CLASS(xprtrdma_wrch_event, __entry->nsegs = nsegs; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " %u@0x%016llx:0x%08x (%s)", + TP_printk("task:%u@%u %u@0x%016llx:0x%08x (%s)", __entry->task_id, __entry->client_id, __entry->length, (unsigned long long)__entry->offset, __entry->handle, @@ -518,16 +375,10 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class, TP_fast_assign( const struct rpcrdma_req *req = mr->mr_req; + const struct rpc_task *task = req->rl_slot.rq_task; - if (req) { - const struct rpc_task *task = req->rl_slot.rq_task; - - __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client->cl_clid; - } else { - __entry->task_id = 0; - __entry->client_id = -1; - } + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; __entry->mr_id = mr->mr_ibmr->res.id; __entry->nents = mr->mr_nents; __entry->handle = mr->mr_handle; @@ -536,8 +387,7 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class, __entry->dir = mr->mr_dir; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)", + TP_printk("task:%u@%u mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)", __entry->task_id, __entry->client_id, __entry->mr_id, __entry->nents, __entry->length, (unsigned long long)__entry->offset, __entry->handle, @@ -780,16 +630,15 @@ TRACE_EVENT(xprtrdma_nomrs_err, __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " peer=[%s]:%s", - __entry->task_id, __entry->client_id, - __get_str(addr), __get_str(port) + TP_printk("peer=[%s]:%s task:%u@%u", + __get_str(addr), __get_str(port), + __entry->task_id, __entry->client_id ) ); DEFINE_RDCH_EVENT(read); DEFINE_WRCH_EVENT(write); DEFINE_WRCH_EVENT(reply); -DEFINE_WRCH_EVENT(wp); TRACE_DEFINE_ENUM(rpcrdma_noch); TRACE_DEFINE_ENUM(rpcrdma_noch_pullup); @@ -844,8 +693,7 @@ TRACE_EVENT(xprtrdma_marshal, __entry->wtype = wtype; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " xid=0x%08x hdr=%u xdr=%u/%u/%u %s/%s", + TP_printk("task:%u@%u xid=0x%08x: hdr=%u xdr=%u/%u/%u %s/%s", __entry->task_id, __entry->client_id, __entry->xid, __entry->hdrlen, __entry->headlen, __entry->pagelen, __entry->taillen, @@ -875,7 +723,7 @@ TRACE_EVENT(xprtrdma_marshal_failed, __entry->ret = ret; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x ret=%d", + TP_printk("task:%u@%u xid=0x%08x: ret=%d", __entry->task_id, __entry->client_id, __entry->xid, __entry->ret ) @@ -902,7 +750,7 @@ TRACE_EVENT(xprtrdma_prepsend_failed, __entry->ret = ret; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x ret=%d", + TP_printk("task:%u@%u xid=0x%08x: ret=%d", __entry->task_id, __entry->client_id, __entry->xid, __entry->ret ) @@ -937,7 +785,7 @@ TRACE_EVENT(xprtrdma_post_send, __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " cq.id=%u cid=%d (%d SGE%s) %s", + TP_printk("task:%u@%u cq.id=%u cid=%d (%d SGE%s) %s", __entry->task_id, __entry->client_id, __entry->cq_id, __entry->completion_id, __entry->num_sge, (__entry->num_sge == 1 ? "" : "s"), @@ -972,7 +820,7 @@ TRACE_EVENT(xprtrdma_post_send_err, __entry->rc = rc; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " cq.id=%u rc=%d", + TP_printk("task:%u@%u cq.id=%u rc=%d", __entry->task_id, __entry->client_id, __entry->cq_id, __entry->rc ) @@ -1084,7 +932,7 @@ TRACE_EVENT(xprtrdma_post_linv_err, __entry->status = status; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d", + TP_printk("task:%u@%u status=%d", __entry->task_id, __entry->client_id, __entry->status ) ); @@ -1272,7 +1120,7 @@ TRACE_EVENT(xprtrdma_reply, __entry->credits = credits; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x credits=%u", + TP_printk("task:%u@%u xid=0x%08x credits=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->credits ) @@ -1308,7 +1156,7 @@ TRACE_EVENT(xprtrdma_err_vers, __entry->max = be32_to_cpup(max); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x versions=[%u, %u]", + TP_printk("task:%u@%u xid=0x%08x versions=[%u, %u]", __entry->task_id, __entry->client_id, __entry->xid, __entry->min, __entry->max ) @@ -1333,7 +1181,7 @@ TRACE_EVENT(xprtrdma_err_chunk, __entry->xid = be32_to_cpu(rqst->rq_xid); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x", + TP_printk("task:%u@%u xid=0x%08x", __entry->task_id, __entry->client_id, __entry->xid ) ); @@ -1359,7 +1207,7 @@ TRACE_EVENT(xprtrdma_err_unrecognized, __entry->procedure = be32_to_cpup(procedure); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x procedure=%u", + TP_printk("task:%u@%u xid=0x%08x procedure=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->procedure ) @@ -1391,7 +1239,7 @@ TRACE_EVENT(xprtrdma_fixup, __entry->taillen = rqst->rq_rcv_buf.tail[0].iov_len; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " fixup=%lu xdr=%zu/%u/%zu", + TP_printk("task:%u@%u fixup=%lu xdr=%zu/%u/%zu", __entry->task_id, __entry->client_id, __entry->fixup, __entry->headlen, __entry->pagelen, __entry->taillen ) @@ -1441,7 +1289,7 @@ TRACE_EVENT(xprtrdma_mrs_zap, __entry->client_id = task->tk_client->cl_clid; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER, + TP_printk("task:%u@%u", __entry->task_id, __entry->client_id ) ); @@ -2020,9 +1868,7 @@ TRACE_EVENT(svcrdma_post_send, ) ); -DEFINE_SEND_COMPLETION_EVENT(svcrdma_wc_send); -DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_send_flush); -DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_send_err); +DEFINE_COMPLETION_EVENT(svcrdma_wc_send); TRACE_EVENT(svcrdma_post_recv, TP_PROTO( @@ -2046,9 +1892,7 @@ TRACE_EVENT(svcrdma_post_recv, ) ); -DEFINE_RECEIVE_SUCCESS_EVENT(svcrdma_wc_recv); -DEFINE_RECEIVE_FLUSH_EVENT(svcrdma_wc_recv_flush); -DEFINE_RECEIVE_FLUSH_EVENT(svcrdma_wc_recv_err); +DEFINE_RECEIVE_COMPLETION_EVENT(svcrdma_wc_receive); TRACE_EVENT(svcrdma_rq_post_err, TP_PROTO( @@ -2112,42 +1956,8 @@ DEFINE_POST_CHUNK_EVENT(read); DEFINE_POST_CHUNK_EVENT(write); DEFINE_POST_CHUNK_EVENT(reply); -TRACE_EVENT(svcrdma_wc_read, - TP_PROTO( - const struct ib_wc *wc, - const struct rpc_rdma_cid *cid, - unsigned int totalbytes, - const ktime_t posttime - ), - - TP_ARGS(wc, cid, totalbytes, posttime), - - TP_STRUCT__entry( - __field(u32, cq_id) - __field(int, completion_id) - __field(s64, read_latency) - __field(unsigned int, totalbytes) - ), - - TP_fast_assign( - __entry->cq_id = cid->ci_queue_id; - __entry->completion_id = cid->ci_completion_id; - __entry->totalbytes = totalbytes; - __entry->read_latency = ktime_us_delta(ktime_get(), posttime); - ), - - TP_printk("cq.id=%u cid=%d totalbytes=%u latency-us=%lld", - __entry->cq_id, __entry->completion_id, - __entry->totalbytes, __entry->read_latency - ) -); - -DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_read_flush); -DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_read_err); - -DEFINE_SEND_COMPLETION_EVENT(svcrdma_wc_write); -DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_flush); -DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_err); +DEFINE_COMPLETION_EVENT(svcrdma_wc_read); +DEFINE_COMPLETION_EVENT(svcrdma_wc_write); TRACE_EVENT(svcrdma_qp_error, TP_PROTO( diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index a8a64b9750..9e92f22eb0 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -9,56 +9,29 @@ #include #include -#define TRACE_SKB_DROP_REASON \ - EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ - EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ - EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ - EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ - EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ - EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ - EMe(SKB_DROP_REASON_MAX, MAX) - -#undef EM -#undef EMe - -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define EMe(a, b) TRACE_DEFINE_ENUM(a); - -TRACE_SKB_DROP_REASON - -#undef EM -#undef EMe -#define EM(a, b) { a, #b }, -#define EMe(a, b) { a, #b } - /* * Tracepoint for free an sk_buff: */ TRACE_EVENT(kfree_skb, - TP_PROTO(struct sk_buff *skb, void *location, - enum skb_drop_reason reason), + TP_PROTO(struct sk_buff *skb, void *location), - TP_ARGS(skb, location, reason), + TP_ARGS(skb, location), TP_STRUCT__entry( - __field(void *, skbaddr) - __field(void *, location) - __field(unsigned short, protocol) - __field(enum skb_drop_reason, reason) + __field( void *, skbaddr ) + __field( void *, location ) + __field( unsigned short, protocol ) ), TP_fast_assign( __entry->skbaddr = skb; __entry->location = location; __entry->protocol = ntohs(skb->protocol); - __entry->reason = reason; ), - TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", - __entry->skbaddr, __entry->protocol, __entry->location, - __print_symbolic(__entry->reason, - TRACE_SKB_DROP_REASON)) + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) ); TRACE_EVENT(consume_skb, diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 29982d60b6..7c48613c18 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,8 +14,6 @@ #include #include -#include - TRACE_DEFINE_ENUM(SOCK_STREAM); TRACE_DEFINE_ENUM(SOCK_DGRAM); TRACE_DEFINE_ENUM(SOCK_RAW); @@ -64,7 +62,6 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class, __field(size_t, head_len) __field(const void *, tail_base) __field(size_t, tail_len) - __field(unsigned int, page_base) __field(unsigned int, page_len) __field(unsigned int, msg_len) ), @@ -77,18 +74,14 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class, __entry->head_len = xdr->head[0].iov_len; __entry->tail_base = xdr->tail[0].iov_base; __entry->tail_len = xdr->tail[0].iov_len; - __entry->page_base = xdr->page_base; __entry->page_len = xdr->page_len; __entry->msg_len = xdr->len; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " head=[%p,%zu] page=%u(%u) tail=[%p,%zu] len=%u", + TP_printk("task:%u@%u head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", __entry->task_id, __entry->client_id, - __entry->head_base, __entry->head_len, - __entry->page_len, __entry->page_base, - __entry->tail_base, __entry->tail_len, - __entry->msg_len + __entry->head_base, __entry->head_len, __entry->page_len, + __entry->tail_base, __entry->tail_len, __entry->msg_len ) ); @@ -121,7 +114,7 @@ DECLARE_EVENT_CLASS(rpc_clnt_class, __entry->client_id = clnt->cl_clid; ), - TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER, __entry->client_id) + TP_printk("clid=%u", __entry->client_id) ); #define DEFINE_RPC_CLNT_EVENT(name) \ @@ -165,8 +158,7 @@ TRACE_EVENT(rpc_clnt_new, __assign_str(server, server); ), - TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER - " peer=[%s]:%s program=%s server=%s", + TP_printk("client=%u peer=[%s]:%s program=%s server=%s", __entry->client_id, __get_str(addr), __get_str(port), __get_str(program), __get_str(server)) ); @@ -214,8 +206,7 @@ TRACE_EVENT(rpc_clnt_clone_err, __entry->error = error; ), - TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER " error=%d", - __entry->client_id, __entry->error) + TP_printk("client=%u error=%d", __entry->client_id, __entry->error) ); @@ -257,7 +248,7 @@ DECLARE_EVENT_CLASS(rpc_task_status, __entry->status = task->tk_status; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d", + TP_printk("task:%u@%u status=%d", __entry->task_id, __entry->client_id, __entry->status) ); @@ -297,7 +288,7 @@ TRACE_EVENT(rpc_request, __assign_str(procname, rpc_proc_name(task)); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " %sv%d %s (%ssync)", + TP_printk("task:%u@%u %sv%d %s (%ssync)", __entry->task_id, __entry->client_id, __get_str(progname), __entry->version, __get_str(procname), __entry->async ? "a": "" @@ -357,8 +348,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, __entry->flags = task->tk_flags; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " flags=%s runstate=%s status=%d action=%ps", + TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%ps", __entry->task_id, __entry->client_id, rpc_show_task_flags(__entry->flags), rpc_show_runstate(__entry->runstate), @@ -382,7 +372,6 @@ DEFINE_RPC_RUNNING_EVENT(complete); DEFINE_RPC_RUNNING_EVENT(timeout); DEFINE_RPC_RUNNING_EVENT(signalled); DEFINE_RPC_RUNNING_EVENT(end); -DEFINE_RPC_RUNNING_EVENT(call_done); DECLARE_EVENT_CLASS(rpc_task_queued, @@ -411,8 +400,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued, __assign_str(q_name, rpc_qname(q)); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " flags=%s runstate=%s status=%d timeout=%lu queue=%s", + TP_printk("task:%u@%d flags=%s runstate=%s status=%d timeout=%lu queue=%s", __entry->task_id, __entry->client_id, rpc_show_task_flags(__entry->flags), rpc_show_runstate(__entry->runstate), @@ -448,7 +436,7 @@ DECLARE_EVENT_CLASS(rpc_failure, __entry->client_id = task->tk_client->cl_clid; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER, + TP_printk("task:%u@%u", __entry->task_id, __entry->client_id) ); @@ -490,8 +478,7 @@ DECLARE_EVENT_CLASS(rpc_reply_event, __assign_str(servername, task->tk_xprt->servername); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " server=%s xid=0x%08x %sv%d %s", + TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s", __entry->task_id, __entry->client_id, __get_str(servername), __entry->xid, __get_str(progname), __entry->version, __get_str(procname)) @@ -551,8 +538,7 @@ TRACE_EVENT(rpc_buf_alloc, __entry->status = status; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " callsize=%zu recvsize=%zu status=%d", + TP_printk("task:%u@%u callsize=%zu recvsize=%zu status=%d", __entry->task_id, __entry->client_id, __entry->callsize, __entry->recvsize, __entry->status ) @@ -581,8 +567,7 @@ TRACE_EVENT(rpc_call_rpcerror, __entry->rpc_status = rpc_status; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " tk_status=%d rpc_status=%d", + TP_printk("task:%u@%u tk_status=%d rpc_status=%d", __entry->task_id, __entry->client_id, __entry->tk_status, __entry->rpc_status) ); @@ -622,8 +607,7 @@ TRACE_EVENT(rpc_stats_latency, __entry->execute = ktime_to_us(execute); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu", + TP_printk("task:%u@%d xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu", __entry->task_id, __entry->client_id, __entry->xid, __get_str(progname), __entry->version, __get_str(procname), __entry->backlog, __entry->rtt, __entry->execute) @@ -667,8 +651,8 @@ TRACE_EVENT(rpc_xdr_overflow, __entry->version = task->tk_client->cl_vers; __assign_str(procedure, task->tk_msg.rpc_proc->p_name); } else { - __entry->task_id = -1; - __entry->client_id = -1; + __entry->task_id = 0; + __entry->client_id = 0; __assign_str(progname, "unknown"); __entry->version = 0; __assign_str(procedure, "unknown"); @@ -684,8 +668,8 @@ TRACE_EVENT(rpc_xdr_overflow, __entry->len = xdr->buf->len; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + TP_printk( + "task:%u@%u %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", __entry->task_id, __entry->client_id, __get_str(progname), __entry->version, __get_str(procedure), __entry->requested, __entry->p, __entry->end, @@ -743,8 +727,8 @@ TRACE_EVENT(rpc_xdr_alignment, __entry->len = xdr->buf->len; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + TP_printk( + "task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", __entry->task_id, __entry->client_id, __get_str(progname), __entry->version, __get_str(procedure), __entry->offset, __entry->copied, @@ -794,9 +778,6 @@ RPC_SHOW_SOCKET RPC_SHOW_SOCK - -#include - /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. @@ -819,32 +800,27 @@ DECLARE_EVENT_CLASS(xs_socket_event, __field(unsigned int, socket_state) __field(unsigned int, sock_state) __field(unsigned long long, ino) - __array(__u8, saddr, sizeof(struct sockaddr_in6)) - __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __string(dstaddr, + xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(dstport, + xprt->address_strings[RPC_DISPLAY_PORT]) ), TP_fast_assign( struct inode *inode = SOCK_INODE(socket); - const struct sock *sk = socket->sk; - const struct inet_sock *inet = inet_sk(sk); - - memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); - memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); - - TP_STORE_ADDR_PORTS(__entry, inet, sk); - __entry->socket_state = socket->state; __entry->sock_state = socket->sk->sk_state; __entry->ino = (unsigned long long)inode->i_ino; - + __assign_str(dstaddr, + xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(dstport, + xprt->address_strings[RPC_DISPLAY_PORT]); ), TP_printk( - "socket:[%llu] srcaddr=%pISpc dstaddr=%pISpc " + "socket:[%llu] dstaddr=%s/%s " "state=%u (%s) sk_state=%u (%s)", - __entry->ino, - __entry->saddr, - __entry->daddr, + __entry->ino, __get_str(dstaddr), __get_str(dstport), __entry->socket_state, rpc_show_socket_state(__entry->socket_state), __entry->sock_state, @@ -874,33 +850,29 @@ DECLARE_EVENT_CLASS(xs_socket_event_done, __field(unsigned int, socket_state) __field(unsigned int, sock_state) __field(unsigned long long, ino) - __array(__u8, saddr, sizeof(struct sockaddr_in6)) - __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __string(dstaddr, + xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(dstport, + xprt->address_strings[RPC_DISPLAY_PORT]) ), TP_fast_assign( struct inode *inode = SOCK_INODE(socket); - const struct sock *sk = socket->sk; - const struct inet_sock *inet = inet_sk(sk); - - memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); - memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); - - TP_STORE_ADDR_PORTS(__entry, inet, sk); - __entry->socket_state = socket->state; __entry->sock_state = socket->sk->sk_state; __entry->ino = (unsigned long long)inode->i_ino; __entry->error = error; + __assign_str(dstaddr, + xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(dstport, + xprt->address_strings[RPC_DISPLAY_PORT]); ), TP_printk( - "error=%d socket:[%llu] srcaddr=%pISpc dstaddr=%pISpc " + "error=%d socket:[%llu] dstaddr=%s/%s " "state=%u (%s) sk_state=%u (%s)", __entry->error, - __entry->ino, - __entry->saddr, - __entry->daddr, + __entry->ino, __get_str(dstaddr), __get_str(dstport), __entry->socket_state, rpc_show_socket_state(__entry->socket_state), __entry->sock_state, @@ -945,8 +917,7 @@ TRACE_EVENT(rpc_socket_nospace, __entry->remaining = rqst->rq_slen - transport->xmit.offset; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " total=%u remaining=%u", + TP_printk("task:%u@%u total=%u remaining=%u", __entry->task_id, __entry->client_id, __entry->total, __entry->remaining ) @@ -1072,8 +1043,8 @@ TRACE_EVENT(xprt_transmit, __entry->status = status; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " xid=0x%08x seqno=%u status=%d", + TP_printk( + "task:%u@%u xid=0x%08x seqno=%u status=%d", __entry->task_id, __entry->client_id, __entry->xid, __entry->seqno, __entry->status) ); @@ -1112,8 +1083,8 @@ TRACE_EVENT(xprt_retransmit, __assign_str(procname, rpc_proc_name(task)); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " xid=0x%08x %sv%d %s ntrans=%d timeout=%lu", + TP_printk( + "task:%u@%u xid=0x%08x %sv%d %s ntrans=%d timeout=%lu", __entry->task_id, __entry->client_id, __entry->xid, __get_str(progname), __entry->version, __get_str(procname), __entry->ntrans, __entry->timeout @@ -1170,8 +1141,7 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, __entry->snd_task_id = -1; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " snd_task:" SUNRPC_TRACE_PID_SPECIFIER, + TP_printk("task:%u@%u snd_task:%u", __entry->task_id, __entry->client_id, __entry->snd_task_id) ); @@ -1223,9 +1193,7 @@ DECLARE_EVENT_CLASS(xprt_cong_event, __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " snd_task:" SUNRPC_TRACE_PID_SPECIFIER - " cong=%lu cwnd=%lu%s", + TP_printk("task:%u@%u snd_task:%u cong=%lu cwnd=%lu%s", __entry->task_id, __entry->client_id, __entry->snd_task_id, __entry->cong, __entry->cwnd, __entry->wait ? " (wait)" : "") @@ -1263,7 +1231,7 @@ TRACE_EVENT(xprt_reserve, __entry->xid = be32_to_cpu(rqst->rq_xid); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x", + TP_printk("task:%u@%u xid=0x%08x", __entry->task_id, __entry->client_id, __entry->xid ) ); @@ -1352,8 +1320,7 @@ TRACE_EVENT(rpcb_getport, __assign_str(servername, task->tk_xprt->servername); ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " server=%s program=%u version=%u protocol=%d bind_version=%u", + TP_printk("task:%u@%u server=%s program=%u version=%u protocol=%d bind_version=%u", __entry->task_id, __entry->client_id, __get_str(servername), __entry->program, __entry->version, __entry->protocol, __entry->bind_version @@ -1383,7 +1350,7 @@ TRACE_EVENT(rpcb_setport, __entry->port = port; ), - TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d port=%u", + TP_printk("task:%u@%u status=%d port=%u", __entry->task_id, __entry->client_id, __entry->status, __entry->port ) @@ -1537,7 +1504,6 @@ DECLARE_EVENT_CLASS(svc_xdr_buf_class, __field(size_t, head_len) __field(const void *, tail_base) __field(size_t, tail_len) - __field(unsigned int, page_base) __field(unsigned int, page_len) __field(unsigned int, msg_len) ), @@ -1548,17 +1514,14 @@ DECLARE_EVENT_CLASS(svc_xdr_buf_class, __entry->head_len = xdr->head[0].iov_len; __entry->tail_base = xdr->tail[0].iov_base; __entry->tail_len = xdr->tail[0].iov_len; - __entry->page_base = xdr->page_base; __entry->page_len = xdr->page_len; __entry->msg_len = xdr->len; ), - TP_printk("xid=0x%08x head=[%p,%zu] page=%u(%u) tail=[%p,%zu] len=%u", + TP_printk("xid=0x%08x head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", __entry->xid, - __entry->head_base, __entry->head_len, - __entry->page_len, __entry->page_base, - __entry->tail_base, __entry->tail_len, - __entry->msg_len + __entry->head_base, __entry->head_len, __entry->page_len, + __entry->tail_base, __entry->tail_len, __entry->msg_len ) ); @@ -1789,7 +1752,7 @@ TRACE_EVENT(svc_xprt_create_err, __entry->error) ); -TRACE_EVENT(svc_xprt_enqueue, +TRACE_EVENT(svc_xprt_do_enqueue, TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), TP_ARGS(xprt, rqst), @@ -1836,6 +1799,7 @@ DECLARE_EVENT_CLASS(svc_xprt_event, ), \ TP_ARGS(xprt)) +DEFINE_SVC_XPRT_EVENT(received); DEFINE_SVC_XPRT_EVENT(no_write_space); DEFINE_SVC_XPRT_EVENT(close); DEFINE_SVC_XPRT_EVENT(detach); @@ -1904,22 +1868,25 @@ TRACE_EVENT(svc_wake_up, TP_printk("pid=%d", __entry->pid) ); -TRACE_EVENT(svc_alloc_arg_err, - TP_PROTO( - unsigned int pages - ), +TRACE_EVENT(svc_handle_xprt, + TP_PROTO(struct svc_xprt *xprt, int len), - TP_ARGS(pages), + TP_ARGS(xprt, len), TP_STRUCT__entry( - __field(unsigned int, pages) + __field(int, len) + __field(unsigned long, flags) + __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->pages = pages; + __entry->len = len; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("pages=%u", __entry->pages) + TP_printk("addr=%s len=%d flags=%s", __get_str(addr), + __entry->len, show_svc_xprt_flags(__entry->flags)) ); TRACE_EVENT(svc_stats_latency, diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h index ca3f276782..d7fbbe5518 100644 --- a/include/trace/events/thp.h +++ b/include/trace/events/thp.h @@ -8,6 +8,24 @@ #include #include +TRACE_EVENT(hugepage_invalidate, + + TP_PROTO(unsigned long addr, unsigned long pte), + TP_ARGS(addr, pte), + TP_STRUCT__entry( + __field(unsigned long, addr) + __field(unsigned long, pte) + ), + + TP_fast_assign( + __entry->addr = addr; + __entry->pte = pte; + ), + + TP_printk("hugepage invalidate at addr 0x%lx and pte = 0x%lx", + __entry->addr, __entry->pte) +); + TRACE_EVENT(hugepage_set_pmd, TP_PROTO(unsigned long addr, unsigned long pmd), @@ -47,6 +65,23 @@ TRACE_EVENT(hugepage_update, TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set) ); +TRACE_EVENT(hugepage_splitting, + + TP_PROTO(unsigned long addr, unsigned long pte), + TP_ARGS(addr, pte), + TP_STRUCT__entry( + __field(unsigned long, addr) + __field(unsigned long, pte) + ), + + TP_fast_assign( + __entry->addr = addr; + __entry->pte = pte; + ), + + TP_printk("hugepage splitting at addr 0x%lx and pte = 0x%lx", + __entry->addr, __entry->pte) +); #endif /* _TRACE_THP_H */ diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index ca2e9009a6..88faf2400e 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -27,20 +27,6 @@ {RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \ ) : "RECLAIM_WB_NONE" -#define _VMSCAN_THROTTLE_WRITEBACK (1 << VMSCAN_THROTTLE_WRITEBACK) -#define _VMSCAN_THROTTLE_ISOLATED (1 << VMSCAN_THROTTLE_ISOLATED) -#define _VMSCAN_THROTTLE_NOPROGRESS (1 << VMSCAN_THROTTLE_NOPROGRESS) -#define _VMSCAN_THROTTLE_CONGESTED (1 << VMSCAN_THROTTLE_CONGESTED) - -#define show_throttle_flags(flags) \ - (flags) ? __print_flags(flags, "|", \ - {_VMSCAN_THROTTLE_WRITEBACK, "VMSCAN_THROTTLE_WRITEBACK"}, \ - {_VMSCAN_THROTTLE_ISOLATED, "VMSCAN_THROTTLE_ISOLATED"}, \ - {_VMSCAN_THROTTLE_NOPROGRESS, "VMSCAN_THROTTLE_NOPROGRESS"}, \ - {_VMSCAN_THROTTLE_CONGESTED, "VMSCAN_THROTTLE_CONGESTED"} \ - ) : "VMSCAN_THROTTLE_NONE" - - #define trace_reclaim_flags(file) ( \ (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ (RECLAIM_WB_ASYNC) \ @@ -468,32 +454,6 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end, TP_ARGS(nr_reclaimed) ); -TRACE_EVENT(mm_vmscan_throttled, - - TP_PROTO(int nid, int usec_timeout, int usec_delayed, int reason), - - TP_ARGS(nid, usec_timeout, usec_delayed, reason), - - TP_STRUCT__entry( - __field(int, nid) - __field(int, usec_timeout) - __field(int, usec_delayed) - __field(int, reason) - ), - - TP_fast_assign( - __entry->nid = nid; - __entry->usec_timeout = usec_timeout; - __entry->usec_delayed = usec_delayed; - __entry->reason = 1U << reason; - ), - - TP_printk("nid=%d usec_timeout=%d usect_delayed=%d reason=%s", - __entry->nid, - __entry->usec_timeout, - __entry->usec_delayed, - show_throttle_flags(__entry->reason)) -); #endif /* _TRACE_VMSCAN_H */ /* This part must be outside protection */ diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index a345b1e12d..840d1ba84c 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -52,11 +52,11 @@ WB_WORK_REASON struct wb_writeback_work; -DECLARE_EVENT_CLASS(writeback_folio_template, +DECLARE_EVENT_CLASS(writeback_page_template, - TP_PROTO(struct folio *folio, struct address_space *mapping), + TP_PROTO(struct page *page, struct address_space *mapping), - TP_ARGS(folio, mapping), + TP_ARGS(page, mapping), TP_STRUCT__entry ( __array(char, name, 32) @@ -69,7 +69,7 @@ DECLARE_EVENT_CLASS(writeback_folio_template, bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : NULL), 32); __entry->ino = mapping ? mapping->host->i_ino : 0; - __entry->index = folio->index; + __entry->index = page->index; ), TP_printk("bdi %s: ino=%lu index=%lu", @@ -79,18 +79,18 @@ DECLARE_EVENT_CLASS(writeback_folio_template, ) ); -DEFINE_EVENT(writeback_folio_template, writeback_dirty_folio, +DEFINE_EVENT(writeback_page_template, writeback_dirty_page, - TP_PROTO(struct folio *folio, struct address_space *mapping), + TP_PROTO(struct page *page, struct address_space *mapping), - TP_ARGS(folio, mapping) + TP_ARGS(page, mapping) ); -DEFINE_EVENT(writeback_folio_template, folio_wait_writeback, +DEFINE_EVENT(writeback_page_template, wait_on_page_writeback, - TP_PROTO(struct folio *folio, struct address_space *mapping), + TP_PROTO(struct page *page, struct address_space *mapping), - TP_ARGS(folio, mapping) + TP_ARGS(page, mapping) ); DECLARE_EVENT_CLASS(writeback_dirty_inode_template, @@ -236,9 +236,9 @@ TRACE_EVENT(inode_switch_wbs, TRACE_EVENT(track_foreign_dirty, - TP_PROTO(struct folio *folio, struct bdi_writeback *wb), + TP_PROTO(struct page *page, struct bdi_writeback *wb), - TP_ARGS(folio, wb), + TP_ARGS(page, wb), TP_STRUCT__entry( __array(char, name, 32) @@ -250,7 +250,7 @@ TRACE_EVENT(track_foreign_dirty, ), TP_fast_assign( - struct address_space *mapping = folio_mapping(folio); + struct address_space *mapping = page_mapping(page); struct inode *inode = mapping ? mapping->host : NULL; strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); @@ -258,7 +258,7 @@ TRACE_EVENT(track_foreign_dirty, __entry->ino = inode ? inode->i_ino : 0; __entry->memcg_id = wb->memcg_css->id; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); - __entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup); + __entry->page_cgroup_ino = cgroup_ino(page_memcg(page)->css.cgroup); ), TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu", @@ -763,6 +763,13 @@ DEFINE_EVENT(writeback_congest_waited_template, writeback_congestion_wait, TP_ARGS(usec_timeout, usec_delayed) ); +DEFINE_EVENT(writeback_congest_waited_template, writeback_wait_iff_congested, + + TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), + + TP_ARGS(usec_timeout, usec_delayed) +); + DECLARE_EVENT_CLASS(writeback_single_inode_template, TP_PROTO(struct inode *inode, diff --git a/include/trace/perf.h b/include/trace/perf.h index 5d48c46a30..dbc6c74def 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -21,23 +21,6 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) -#undef __get_rel_dynamic_array -#define __get_rel_dynamic_array(field) \ - ((void *)__entry + \ - offsetof(typeof(*__entry), __rel_loc_##field) + \ - sizeof(__entry->__rel_loc_##field) + \ - (__entry->__rel_loc_##field & 0xffff)) - -#undef __get_rel_dynamic_array_len -#define __get_rel_dynamic_array_len(field) \ - ((__entry->__rel_loc_##field >> 16) & 0xffff) - -#undef __get_rel_str -#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) - -#undef __get_rel_bitmask -#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) - #undef __perf_count #define __perf_count(c) (__count = (c)) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 3d29919045..08810a4638 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -108,18 +108,6 @@ TRACE_MAKE_SYSTEM_STR(); #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) - #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -128,7 +116,7 @@ TRACE_MAKE_SYSTEM_STR(); struct trace_event_raw_##name { \ struct trace_entry ent; \ tstruct \ - char __data[]; \ + char __data[0]; \ }; \ \ static struct trace_event_class event_class_##name; @@ -212,23 +200,11 @@ TRACE_MAKE_SYSTEM_STR(); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) - #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) u32 item; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -317,20 +293,6 @@ TRACE_MAKE_SYSTEM_STR(); #undef __get_str #define __get_str(field) ((char *)__get_dynamic_array(field)) -#undef __get_rel_dynamic_array -#define __get_rel_dynamic_array(field) \ - ((void *)__entry + \ - offsetof(typeof(*__entry), __rel_loc_##field) + \ - sizeof(__entry->__rel_loc_##field) + \ - (__entry->__rel_loc_##field & 0xffff)) - -#undef __get_rel_dynamic_array_len -#define __get_rel_dynamic_array_len(field) \ - ((__entry->__rel_loc_##field >> 16) & 0xffff) - -#undef __get_rel_str -#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) - #undef __get_bitmask #define __get_bitmask(field) \ ({ \ @@ -340,15 +302,6 @@ TRACE_MAKE_SYSTEM_STR(); trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ }) -#undef __get_rel_bitmask -#define __get_rel_bitmask(field) \ - ({ \ - void *__bitmask = __get_rel_dynamic_array(field); \ - unsigned int __bitmask_size; \ - __bitmask_size = __get_rel_dynamic_array_len(field); \ - trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ - }) - #undef __print_flags #define __print_flags(flag, delim, flag_array...) \ ({ \ @@ -518,21 +471,6 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) -#undef __rel_dynamic_array -#define __rel_dynamic_array(_type, _item, _len) { \ - .type = "__rel_loc " #_type "[]", .name = #_item, \ - .size = 4, .align = 4, \ - .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) - #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static struct trace_event_fields trace_event_fields_##call[] = { \ @@ -581,22 +519,6 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) \ - __item_length = (len) * sizeof(type); \ - __data_offsets->item = __data_size + \ - offsetof(typeof(*entry), __data) - \ - offsetof(typeof(*entry), __rel_loc_##item) - \ - sizeof(u32); \ - __data_offsets->item |= __item_length << 16; \ - __data_size += __item_length; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, \ - strlen((src) ? (const char *)(src) : "(null)") + 1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1) /* * __bitmask_size_in_bytes_raw is the number of bytes needed to hold * num_possible_cpus(). @@ -620,10 +542,6 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ __bitmask_size_in_longs(nr_bits)) -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ - __bitmask_size_in_longs(nr_bits)) - #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline notrace int trace_event_get_offsets_##call( \ @@ -788,37 +706,6 @@ static inline notrace int trace_event_get_offsets_##call( \ #define __assign_bitmask(dst, src, nr_bits) \ memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) \ - __entry->__rel_loc_##item = __data_offsets.item; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __assign_rel_str -#define __assign_rel_str(dst, src) \ - strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)"); - -#undef __assign_rel_str_len -#define __assign_rel_str_len(dst, src, len) \ - do { \ - memcpy(__get_rel_str(dst), (src), (len)); \ - __get_rel_str(dst)[len] = '\0'; \ - } while (0) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) - -#undef __get_rel_bitmask -#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) - -#undef __assign_rel_bitmask -#define __assign_rel_bitmask(dst, src, nr_bits) \ - memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) - #undef TP_fast_assign #define TP_fast_assign(args...) args @@ -883,10 +770,6 @@ static inline void ftrace_test_probe_##call(void) \ #undef __get_dynamic_array_len #undef __get_str #undef __get_bitmask -#undef __get_rel_dynamic_array -#undef __get_rel_dynamic_array_len -#undef __get_rel_str -#undef __get_rel_bitmask #undef __print_array #undef __print_hex_dump diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index ecd0f5bdfc..9dc0bf0c5a 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -181,10 +181,6 @@ struct f_owner_ex { blocking */ #define LOCK_UN 8 /* remove lock */ -/* - * LOCK_MAND support has been removed from the kernel. We leave the symbols - * here to not break legacy builds, but these should not be used in new code. - */ #define LOCK_MAND 32 /* This is a mandatory flock ... */ #define LOCK_READ 64 /* which allows concurrent read operations */ #define LOCK_WRITE 128 /* which allows concurrent write operations */ diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index c77a1313b3..1f0a2b4864 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -126,8 +126,6 @@ #define SO_BUF_LOCK 72 -#define SO_RESERVE_MEM 73 - #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 1c48b0ae3b..1c5fb86d45 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -880,14 +880,8 @@ __SYSCALL(__NR_memfd_secret, sys_memfd_secret) #define __NR_process_mrelease 448 __SYSCALL(__NR_process_mrelease, sys_process_mrelease) -#define __NR_futex_waitv 449 -__SYSCALL(__NR_futex_waitv, sys_futex_waitv) - -#define __NR_set_mempolicy_home_node 450 -__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) - #undef __NR_syscalls -#define __NR_syscalls 451 +#define __NR_syscalls 449 /* * 32 bit systems traditionally used different diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 0b94ec7b73..0cbd1540ae 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -80,7 +80,7 @@ extern "C" { * * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the * GPU's virtual address space via gart. Gart memory linearizes non-contiguous - * pages of system memory, allows GPU access system memory in a linearized + * pages of system memory, allows GPU access system memory in a linezrized * fashion. * * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory @@ -786,6 +786,13 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F /* query ras mask of enabled features*/ #define AMDGPU_INFO_RAS_ENABLED_FEATURES 0x20 +/* query video encode/decode caps */ +#define AMDGPU_INFO_VIDEO_CAPS 0x21 + /* Subquery id: Decode */ + #define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 + /* Subquery id: Encode */ + #define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 + /* RAS MASK: UMC (VRAM) */ #define AMDGPU_INFO_RAS_ENABLED_UMC (1 << 0) /* RAS MASK: SDMA */ @@ -814,12 +821,6 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_RAS_ENABLED_MP1 (1 << 12) /* RAS MASK: FUSE */ #define AMDGPU_INFO_RAS_ENABLED_FUSE (1 << 13) -/* query video encode/decode caps */ -#define AMDGPU_INFO_VIDEO_CAPS 0x21 - /* Subquery id: Decode */ - #define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 - /* Subquery id: Encode */ - #define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 642808520d..3b810b53ba 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -1096,24 +1096,6 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) -/** - * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. - * - * This queries metadata about a framebuffer. User-space fills - * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the - * struct as the output. - * - * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles - * will be filled with GEM buffer handles. Planes are valid until one has a - * zero handle -- this can be used to compute the number of planes. - * - * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid - * until one has a zero &drm_mode_fb_cmd2.pitches. - * - * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set - * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the - * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. - */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) /* diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index fc0c1454d2..dcf0c65f13 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -104,12 +104,6 @@ extern "C" { /* 8 bpp Red */ #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ -/* 10 bpp Red */ -#define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ - -/* 12 bpp Red */ -#define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ - /* 16 bpp Red */ #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ @@ -314,13 +308,6 @@ extern "C" { */ #define DRM_FORMAT_P016 fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */ -/* 2 plane YCbCr420. - * 3 10 bit components and 2 padding bits packed into 4 bytes. - * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian - * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 [2:10:10:10:2:10:10:10] little endian - */ -#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ - /* 3 plane non-subsampled (444) YCbCr * 16 bits per component, but only 10 bits are used and 6 bits are padded * index 0: Y plane, [15:0] Y:x [10:6] little endian @@ -337,6 +324,13 @@ extern "C" { */ #define DRM_FORMAT_Q401 fourcc_code('Q', '4', '0', '1') +/* + * 2 plane YCbCr MSB aligned, 3 pixels packed into 4 bytes. + * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian + * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 [2:10:10:10:2:10:10:10] little endian + */ +#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ + /* * 3 plane YCbCr * index 0: Y plane, [7:0] Y @@ -386,12 +380,6 @@ extern "C" { #define DRM_FORMAT_RESERVED ((1ULL << 56) - 1) -#define fourcc_mod_get_vendor(modifier) \ - (((modifier) >> 56) & 0xff) - -#define fourcc_mod_is_vendor(modifier, vendor) \ - (fourcc_mod_get_vendor(modifier) == DRM_FORMAT_MOD_VENDOR_## vendor) - #define fourcc_mod_code(vendor, val) \ ((((__u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | ((val) & 0x00ffffffffffffffULL)) diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index e1e3516828..90c55383f1 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -1110,10 +1110,6 @@ struct drm_mode_destroy_blob { * struct drm_mode_create_lease - Create lease * * Lease mode resources, creating another drm_master. - * - * The @object_ids array must reference at least one CRTC, one connector and - * one plane if &DRM_CLIENT_CAP_UNIVERSAL_PLANES is enabled. Alternatively, - * the lease can be completely empty. */ struct drm_mode_create_lease { /** @object_ids: Pointer to array of object ids (__u32) */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 914ebd9290..bde5860b36 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1522,12 +1522,6 @@ struct drm_i915_gem_caching { #define I915_TILING_NONE 0 #define I915_TILING_X 1 #define I915_TILING_Y 2 -/* - * Do not add new tiling types here. The I915_TILING_* values are for - * de-tiling fence registers that no longer exist on modern platforms. Although - * the hardware may support new types of tiling in general (e.g., Tile4), we - * do not need to add them to the uapi that is specific to now-defunct ioctls. - */ #define I915_TILING_LAST I915_TILING_Y #define I915_BIT_6_SWIZZLE_NONE 0 @@ -1830,7 +1824,6 @@ struct drm_i915_gem_context_param { * Extensions: * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) - * i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT) */ #define I915_CONTEXT_PARAM_ENGINES 0xa @@ -1853,55 +1846,6 @@ struct drm_i915_gem_context_param { * attempted to use it, never re-use this context param number. */ #define I915_CONTEXT_PARAM_RINGSIZE 0xc - -/* - * I915_CONTEXT_PARAM_PROTECTED_CONTENT: - * - * Mark that the context makes use of protected content, which will result - * in the context being invalidated when the protected content session is. - * Given that the protected content session is killed on suspend, the device - * is kept awake for the lifetime of a protected context, so the user should - * make sure to dispose of them once done. - * This flag can only be set at context creation time and, when set to true, - * must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE - * to false. This flag can't be set to true in conjunction with setting the - * I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example: - * - * .. code-block:: C - * - * struct drm_i915_gem_context_create_ext_setparam p_protected = { - * .base = { - * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, - * }, - * .param = { - * .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT, - * .value = 1, - * } - * }; - * struct drm_i915_gem_context_create_ext_setparam p_norecover = { - * .base = { - * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, - * .next_extension = to_user_pointer(&p_protected), - * }, - * .param = { - * .param = I915_CONTEXT_PARAM_RECOVERABLE, - * .value = 0, - * } - * }; - * struct drm_i915_gem_context_create_ext create = { - * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, - * .extensions = to_user_pointer(&p_norecover); - * }; - * - * ctx_id = gem_context_create_ext(drm_fd, &create); - * - * In addition to the normal failure cases, setting this flag during context - * creation can result in the following errors: - * - * -ENODEV: feature not available - * -EPERM: trying to mark a recoverable or not bannable context as protected - */ -#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd /* Must be kept compact -- no holes and well documented */ __u64 value; @@ -2105,135 +2049,6 @@ struct i915_context_engines_bond { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ -/** - * struct i915_context_engines_parallel_submit - Configure engine for - * parallel submission. - * - * Setup a slot in the context engine map to allow multiple BBs to be submitted - * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU - * in parallel. Multiple hardware contexts are created internally in the i915 to - * run these BBs. Once a slot is configured for N BBs only N BBs can be - * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user - * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how - * many BBs there are based on the slot's configuration. The N BBs are the last - * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. - * - * The default placement behavior is to create implicit bonds between each - * context if each context maps to more than 1 physical engine (e.g. context is - * a virtual engine). Also we only allow contexts of same engine class and these - * contexts must be in logically contiguous order. Examples of the placement - * behavior are described below. Lastly, the default is to not allow BBs to be - * preempted mid-batch. Rather insert coordinated preemption points on all - * hardware contexts between each set of BBs. Flags could be added in the future - * to change both of these default behaviors. - * - * Returns -EINVAL if hardware context placement configuration is invalid or if - * the placement configuration isn't supported on the platform / submission - * interface. - * Returns -ENODEV if extension isn't supported on the platform / submission - * interface. - * - * .. code-block:: none - * - * Examples syntax: - * CS[X] = generic engine of same class, logical instance X - * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE - * - * Example 1 pseudo code: - * set_engines(INVALID) - * set_parallel(engine_index=0, width=2, num_siblings=1, - * engines=CS[0],CS[1]) - * - * Results in the following valid placement: - * CS[0], CS[1] - * - * Example 2 pseudo code: - * set_engines(INVALID) - * set_parallel(engine_index=0, width=2, num_siblings=2, - * engines=CS[0],CS[2],CS[1],CS[3]) - * - * Results in the following valid placements: - * CS[0], CS[1] - * CS[2], CS[3] - * - * This can be thought of as two virtual engines, each containing two - * engines thereby making a 2D array. However, there are bonds tying the - * entries together and placing restrictions on how they can be scheduled. - * Specifically, the scheduler can choose only vertical columns from the 2D - * array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the - * scheduler wants to submit to CS[0], it must also choose CS[1] and vice - * versa. Same for CS[2] requires also using CS[3]. - * VE[0] = CS[0], CS[2] - * VE[1] = CS[1], CS[3] - * - * Example 3 pseudo code: - * set_engines(INVALID) - * set_parallel(engine_index=0, width=2, num_siblings=2, - * engines=CS[0],CS[1],CS[1],CS[3]) - * - * Results in the following valid and invalid placements: - * CS[0], CS[1] - * CS[1], CS[3] - Not logically contiguous, return -EINVAL - */ -struct i915_context_engines_parallel_submit { - /** - * @base: base user extension. - */ - struct i915_user_extension base; - - /** - * @engine_index: slot for parallel engine - */ - __u16 engine_index; - - /** - * @width: number of contexts per parallel engine or in other words the - * number of batches in each submission - */ - __u16 width; - - /** - * @num_siblings: number of siblings per context or in other words the - * number of possible placements for each submission - */ - __u16 num_siblings; - - /** - * @mbz16: reserved for future use; must be zero - */ - __u16 mbz16; - - /** - * @flags: all undefined flags must be zero, currently not defined flags - */ - __u64 flags; - - /** - * @mbz64: reserved for future use; must be zero - */ - __u64 mbz64[3]; - - /** - * @engines: 2-d array of engine instances to configure parallel engine - * - * length = width (i) * num_siblings (j) - * index = j + i * num_siblings - */ - struct i915_engine_class_instance engines[0]; - -} __packed; - -#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \ - struct i915_user_extension base; \ - __u16 engine_index; \ - __u16 width; \ - __u16 num_siblings; \ - __u16 mbz16; \ - __u64 flags; \ - __u64 mbz64[3]; \ - struct i915_engine_class_instance engines[N__]; \ -} __attribute__((packed)) name__ - /** * DOC: Context Engine Map uAPI * @@ -2293,7 +2108,6 @@ struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ -#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ struct i915_engine_class_instance engines[0]; } __attribute__((packed)); @@ -2912,20 +2726,14 @@ struct drm_i915_engine_info { /** @flags: Engine flags. */ __u64 flags; -#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0) /** @capabilities: Capabilities of this engine. */ __u64 capabilities; #define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0) #define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1) - /** @logical_instance: Logical instance of engine */ - __u16 logical_instance; - /** @rsvd1: Reserved fields. */ - __u16 rsvd1[3]; - /** @rsvd2: Reserved fields. */ - __u64 rsvd2[3]; + __u64 rsvd1[4]; }; /** @@ -3171,12 +2979,8 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see * struct drm_i915_gem_create_ext_memory_regions. - * - * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see - * struct drm_i915_gem_create_ext_protected_content. */ #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 -#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1 __u64 extensions; }; @@ -3234,50 +3038,6 @@ struct drm_i915_gem_create_ext_memory_regions { __u64 regions; }; -/** - * struct drm_i915_gem_create_ext_protected_content - The - * I915_OBJECT_PARAM_PROTECTED_CONTENT extension. - * - * If this extension is provided, buffer contents are expected to be protected - * by PXP encryption and require decryption for scan out and processing. This - * is only possible on platforms that have PXP enabled, on all other scenarios - * using this extension will cause the ioctl to fail and return -ENODEV. The - * flags parameter is reserved for future expansion and must currently be set - * to zero. - * - * The buffer contents are considered invalid after a PXP session teardown. - * - * The encryption is guaranteed to be processed correctly only if the object - * is submitted with a context created using the - * I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks - * at submission time on the validity of the objects involved. - * - * Below is an example on how to create a protected object: - * - * .. code-block:: C - * - * struct drm_i915_gem_create_ext_protected_content protected_ext = { - * .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT }, - * .flags = 0, - * }; - * struct drm_i915_gem_create_ext create_ext = { - * .size = PAGE_SIZE, - * .extensions = (uintptr_t)&protected_ext, - * }; - * - * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); - * if (err) ... - */ -struct drm_i915_gem_create_ext_protected_content { - /** @base: Extension link. See struct i915_user_extension. */ - struct i915_user_extension base; - /** @flags: reserved for future usage, currently MBZ */ - __u32 flags; -}; - -/* ID of the protected content session managed by i915 when PXP is active */ -#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf - #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/mga_drm.h b/include/uapi/drm/mga_drm.h index bb31567e66..8c4337548a 100644 --- a/include/uapi/drm/mga_drm.h +++ b/include/uapi/drm/mga_drm.h @@ -279,22 +279,20 @@ typedef struct drm_mga_init { unsigned long sarea_priv_offset; - __struct_group(/* no tag */, always32bit, /* no attrs */, - int chipset; - int sgram; + int chipset; + int sgram; - unsigned int maccess; + unsigned int maccess; - unsigned int fb_cpp; - unsigned int front_offset, front_pitch; - unsigned int back_offset, back_pitch; + unsigned int fb_cpp; + unsigned int front_offset, front_pitch; + unsigned int back_offset, back_pitch; - unsigned int depth_cpp; - unsigned int depth_offset, depth_pitch; + unsigned int depth_cpp; + unsigned int depth_offset, depth_pitch; - unsigned int texture_offset[MGA_NR_TEX_HEAPS]; - unsigned int texture_size[MGA_NR_TEX_HEAPS]; - ); + unsigned int texture_offset[MGA_NR_TEX_HEAPS]; + unsigned int texture_size[MGA_NR_TEX_HEAPS]; unsigned long fb_offset; unsigned long mmio_offset; diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 3dfc0af875..4104f22fb3 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -58,67 +58,6 @@ extern "C" { struct drm_v3d_perfmon_get_values) #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 -#define DRM_V3D_SUBMIT_EXTENSION 0x02 - -/* struct drm_v3d_extension - ioctl extensions - * - * Linked-list of generic extensions where the id identify which struct is - * pointed by ext_data. Therefore, DRM_V3D_EXT_ID_* is used on id to identify - * the extension type. - */ -struct drm_v3d_extension { - __u64 next; - __u32 id; -#define DRM_V3D_EXT_ID_MULTI_SYNC 0x01 - __u32 flags; /* mbz */ -}; - -/* struct drm_v3d_sem - wait/signal semaphore - * - * If binary semaphore, it only takes syncobj handle and ignores flags and - * point fields. Point is defined for timeline syncobj feature. - */ -struct drm_v3d_sem { - __u32 handle; /* syncobj */ - /* rsv below, for future uses */ - __u32 flags; - __u64 point; /* for timeline sem support */ - __u64 mbz[2]; /* must be zero, rsv */ -}; - -/* Enum for each of the V3D queues. */ -enum v3d_queue { - V3D_BIN, - V3D_RENDER, - V3D_TFU, - V3D_CSD, - V3D_CACHE_CLEAN, -}; - -/** - * struct drm_v3d_multi_sync - ioctl extension to add support multiples - * syncobjs for commands submission. - * - * When an extension of DRM_V3D_EXT_ID_MULTI_SYNC id is defined, it points to - * this extension to define wait and signal dependencies, instead of single - * in/out sync entries on submitting commands. The field flags is used to - * determine the stage to set wait dependencies. - */ -struct drm_v3d_multi_sync { - struct drm_v3d_extension base; - /* Array of wait and signal semaphores */ - __u64 in_syncs; - __u64 out_syncs; - - /* Number of entries */ - __u32 in_sync_count; - __u32 out_sync_count; - - /* set the stage (v3d_queue) to sync */ - __u32 wait_stage; - - __u32 pad; /* mbz */ -}; /** * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D @@ -196,16 +135,12 @@ struct drm_v3d_submit_cl { /* Number of BO handles passed in (size is that times 4). */ __u32 bo_handle_count; - /* DRM_V3D_SUBMIT_* properties */ __u32 flags; /* ID of the perfmon to attach to this job. 0 means no perfmon. */ __u32 perfmon_id; __u32 pad; - - /* Pointer to an array of ioctl extensions*/ - __u64 extensions; }; /** @@ -275,7 +210,6 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_CSD, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH, DRM_V3D_PARAM_SUPPORTS_PERFMON, - DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, }; struct drm_v3d_get_param { @@ -314,11 +248,6 @@ struct drm_v3d_submit_tfu { __u32 in_sync; /* Sync object to signal when the TFU job is done. */ __u32 out_sync; - - __u32 flags; - - /* Pointer to an array of ioctl extensions*/ - __u64 extensions; }; /* Submits a compute shader for dispatch. This job will block on any @@ -347,13 +276,6 @@ struct drm_v3d_submit_csd { /* ID of the perfmon to attach to this job. 0 means no perfmon. */ __u32 perfmon_id; - - /* Pointer to an array of ioctl extensions*/ - __u64 extensions; - - __u32 flags; - - __u32 pad; }; enum { diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index 0512fde5e6..b9ec26e9c6 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -47,15 +47,12 @@ extern "C" { #define DRM_VIRTGPU_WAIT 0x08 #define DRM_VIRTGPU_GET_CAPS 0x09 #define DRM_VIRTGPU_RESOURCE_CREATE_BLOB 0x0a -#define DRM_VIRTGPU_CONTEXT_INIT 0x0b #define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01 #define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02 -#define VIRTGPU_EXECBUF_RING_IDX 0x04 #define VIRTGPU_EXECBUF_FLAGS (\ VIRTGPU_EXECBUF_FENCE_FD_IN |\ VIRTGPU_EXECBUF_FENCE_FD_OUT |\ - VIRTGPU_EXECBUF_RING_IDX |\ 0) struct drm_virtgpu_map { @@ -71,8 +68,6 @@ struct drm_virtgpu_execbuffer { __u64 bo_handles; __u32 num_bo_handles; __s32 fence_fd; /* in/out fence fd (see VIRTGPU_EXECBUF_FENCE_FD_IN/OUT) */ - __u32 ring_idx; /* command ring index (see VIRTGPU_EXECBUF_RING_IDX) */ - __u32 pad; }; #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ @@ -80,8 +75,6 @@ struct drm_virtgpu_execbuffer { #define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */ #define VIRTGPU_PARAM_HOST_VISIBLE 4 /* Host blob resources are mappable */ #define VIRTGPU_PARAM_CROSS_DEVICE 5 /* Cross virtio-device resource sharing */ -#define VIRTGPU_PARAM_CONTEXT_INIT 6 /* DRM_VIRTGPU_CONTEXT_INIT */ -#define VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs 7 /* Bitmask of supported capability set ids */ struct drm_virtgpu_getparam { __u64 param; @@ -180,29 +173,6 @@ struct drm_virtgpu_resource_create_blob { __u64 blob_id; }; -#define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001 -#define VIRTGPU_CONTEXT_PARAM_NUM_RINGS 0x0002 -#define VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK 0x0003 -struct drm_virtgpu_context_set_param { - __u64 param; - __u64 value; -}; - -struct drm_virtgpu_context_init { - __u32 num_params; - __u32 pad; - - /* pointer to drm_virtgpu_context_set_param array */ - __u64 ctx_set_params; -}; - -/* - * Event code that's given when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is in - * effect. The event size is sizeof(drm_event), since there is no additional - * payload. - */ -#define VIRTGPU_EVENT_FENCE_SIGNALED 0x90000000 - #define DRM_IOCTL_VIRTGPU_MAP \ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map) @@ -242,10 +212,6 @@ struct drm_virtgpu_context_init { DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE_BLOB, \ struct drm_virtgpu_resource_create_blob) -#define DRM_IOCTL_VIRTGPU_CONTEXT_INIT \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_CONTEXT_INIT, \ - struct drm_virtgpu_context_init) - #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index 8277644c11..9078775feb 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -110,7 +110,6 @@ extern "C" { #define DRM_VMW_PARAM_HW_CAPS2 13 #define DRM_VMW_PARAM_SM4_1 14 #define DRM_VMW_PARAM_SM5 15 -#define DRM_VMW_PARAM_GL43 16 /** * enum drm_vmw_handle_type - handle type for ref ioctls diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_HL.h b/include/uapi/linux/netfilter_ipv6/ip6t_HL.h index 6b62f9418e..eaed56a287 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6t_HL.h +++ b/include/uapi/linux/netfilter_ipv6/ip6t_HL.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* ip6tables module for matching the Hop Limit value +/* Hop Limit modification module for ip6tables * Maciej Soltysiak - * Based on HW's ttl module */ + * Based on HW's TTL module */ #ifndef _IP6T_HL_H #define _IP6T_HL_H @@ -9,14 +9,14 @@ #include enum { - IP6T_HL_EQ = 0, /* equals */ - IP6T_HL_NE, /* not equals */ - IP6T_HL_LT, /* less than */ - IP6T_HL_GT, /* greater than */ + IP6T_HL_SET = 0, + IP6T_HL_INC, + IP6T_HL_DEC }; +#define IP6T_HL_MAXMODE IP6T_HL_DEC -struct ip6t_hl_info { +struct ip6t_HL_info { __u8 mode; __u8 hop_limit; }; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 371dfc4243..d13bb8c1b4 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -272,16 +272,6 @@ enum hl_gaudi_pll_index { HL_GAUDI_PLL_MAX }; -/** - * enum hl_device_status - Device status information. - * @HL_DEVICE_STATUS_OPERATIONAL: Device is operational. - * @HL_DEVICE_STATUS_IN_RESET: Device is currently during reset. - * @HL_DEVICE_STATUS_MALFUNCTION: Device is unusable. - * @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled. - * @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in - * progress. - * @HL_DEVICE_STATUS_LAST: Last status. - */ enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, @@ -333,18 +323,7 @@ enum hl_server_type { * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency - * HL_INFO_POWER - Retrieve power information * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls - * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info - * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num - * HL_INFO_LAST_ERR_OPEN_DEV_TIME - Retrieve timestamp of the last time the device was opened - * and CS timeout or razwi error occurred. - * HL_INFO_CS_TIMEOUT_EVENT - Retrieve CS timeout timestamp and its related CS sequence number. - * HL_INFO_RAZWI_EVENT - Retrieve parameters of razwi: - * Timestamp of razwi. - * The address which accessing it caused the razwi. - * Razwi initiator. - * Razwi cause, was it a page fault or MMU access error. */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -364,13 +343,8 @@ enum hl_server_type { #define HL_INFO_PLL_FREQUENCY 16 #define HL_INFO_POWER 17 #define HL_INFO_OPEN_STATS 18 -#define HL_INFO_DRAM_REPLACED_ROWS 21 -#define HL_INFO_DRAM_PENDING_ROWS 22 -#define HL_INFO_LAST_ERR_OPEN_DEV_TIME 23 -#define HL_INFO_CS_TIMEOUT_EVENT 24 -#define HL_INFO_RAZWI_EVENT 25 -#define HL_INFO_VERSION_MAX_LEN 128 +#define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 /** @@ -489,27 +463,15 @@ struct hl_info_pci_counters { __u64 replay_cnt; }; -enum hl_clk_throttling_type { - HL_CLK_THROTTLE_TYPE_POWER, - HL_CLK_THROTTLE_TYPE_THERMAL, - HL_CLK_THROTTLE_TYPE_MAX -}; - -/* clk_throttling_reason masks */ -#define HL_CLK_THROTTLE_POWER (1 << HL_CLK_THROTTLE_TYPE_POWER) -#define HL_CLK_THROTTLE_THERMAL (1 << HL_CLK_THROTTLE_TYPE_THERMAL) +#define HL_CLK_THROTTLE_POWER 0x1 +#define HL_CLK_THROTTLE_THERMAL 0x2 /** * struct hl_info_clk_throttle - clock throttling reason * @clk_throttling_reason: each bit represents a clk throttling reason - * @clk_throttling_timestamp_us: represents CPU timestamp in microseconds of the start-event - * @clk_throttling_duration_ns: the clock throttle time in nanosec */ struct hl_info_clk_throttle { __u32 clk_throttling_reason; - __u32 pad; - __u64 clk_throttling_timestamp_us[HL_CLK_THROTTLE_TYPE_MAX]; - __u64 clk_throttling_duration_ns[HL_CLK_THROTTLE_TYPE_MAX]; }; /** @@ -587,51 +549,6 @@ struct hl_info_cs_counters { __u64 ctx_validation_drop_cnt; }; -/** - * struct hl_info_last_err_open_dev_time - last error boot information. - * @timestamp: timestamp of last time the device was opened and error occurred. - */ -struct hl_info_last_err_open_dev_time { - __s64 timestamp; -}; - -/** - * struct hl_info_cs_timeout_event - last CS timeout information. - * @timestamp: timestamp when last CS timeout event occurred. - * @seq: sequence number of last CS timeout event. - */ -struct hl_info_cs_timeout_event { - __s64 timestamp; - __u64 seq; -}; - -#define HL_RAZWI_PAGE_FAULT 0 -#define HL_RAZWI_MMU_ACCESS_ERROR 1 - -/** - * struct hl_info_razwi_event - razwi information. - * @timestamp: timestamp of razwi. - * @addr: address which accessing it caused razwi. - * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does not - * have engine id it will be set to U16_MAX. - * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible - * engines which one them caused the razwi. In that case, it will contain the - * second possible engine id, otherwise it will be set to U16_MAX. - * @no_engine_id: if razwi initiator does not have engine id, this field will be set to 1, - * otherwise 0. - * @error_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. - * @pad: padding to 64 bit. - */ -struct hl_info_razwi_event { - __s64 timestamp; - __u64 addr; - __u16 engine_id_1; - __u16 engine_id_2; - __u8 no_engine_id; - __u8 error_type; - __u8 pad[2]; -}; - enum gaudi_dcores { HL_GAUDI_WS_DCORE, HL_GAUDI_WN_DCORE, @@ -639,30 +556,33 @@ enum gaudi_dcores { HL_GAUDI_ES_DCORE }; -/** - * struct hl_info_args - Main structure to retrieve device related information. - * @return_pointer: User space address of the relevant structure related to HL_INFO_* operation - * mentioned in @op. - * @return_size: Size of the structure used in @return_pointer, just like "size" in "snprintf", it - * limits how many bytes the kernel can write. For hw_events array, the size should be - * hl_info_hw_ip_info.num_of_events * sizeof(__u32). - * @op: Defines which type of information to be retrieved. Refer HL_INFO_* for details. - * @dcore_id: DCORE id for which the information is relevant (for Gaudi refer to enum gaudi_dcores). - * @ctx_id: Context ID of the user. Currently not in use. - * @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms - * resolution. Currently not in use. - * @pll_index: Index as defined in hl__pll_index enumeration. - * @pad: Padding to 64 bit. - */ struct hl_info_args { + /* Location of relevant struct in userspace */ __u64 return_pointer; + /* + * The size of the return value. Just like "size" in "snprintf", + * it limits how many bytes the kernel can write + * + * For hw_events array, the size should be + * hl_info_hw_ip_info.num_of_events * sizeof(__u32) + */ __u32 return_size; + + /* HL_INFO_* */ __u32 op; union { + /* Dcore id for which the information is relevant. + * For Gaudi refer to 'enum gaudi_dcores' + */ __u32 dcore_id; + /* Context ID - Currently not in use */ __u32 ctx_id; + /* Period value for utilization rate (100ms - 1000ms, in 100ms + * resolution. + */ __u32 period_ms; + /* PLL frequency retrieval */ __u32 pll_index; }; @@ -680,10 +600,7 @@ struct hl_info_args { #define HL_MAX_CB_SIZE (0x200000 - 32) /* Indicates whether the command buffer should be mapped to the device's MMU */ -#define HL_CB_FLAGS_MAP 0x1 - -/* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped CB */ -#define HL_CB_FLAGS_GET_DEVICE_VA 0x2 +#define HL_CB_FLAGS_MAP 0x1 struct hl_cb_in { /* Handle of CB or 0 if we want to create one */ @@ -705,16 +622,11 @@ struct hl_cb_out { /* Handle of CB */ __u64 cb_handle; - union { - /* Information about CB */ - struct { - /* Usage count of CB */ - __u32 usage_cnt; - __u32 pad; - }; - - /* CB mapped address to device MMU */ - __u64 device_va; + /* Information about CB */ + struct { + /* Usage count of CB */ + __u32 usage_cnt; + __u32 pad; }; }; }; @@ -937,17 +849,9 @@ struct hl_cs_out { /* * SOB base address offset - * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY or HL_CS_FLAGS_SIGNAL is set + * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ __u32 sob_base_addr_offset; - - /* - * Count of completed signals in SOB before current signal submission. - * Valid only when (HL_CS_FLAGS_ENCAP_SIGNALS & HL_CS_FLAGS_STAGED_SUBMISSION) - * or HL_CS_FLAGS_SIGNAL is set - */ - __u16 sob_count_before_submission; - __u16 pad[3]; }; union hl_cs_args { @@ -955,10 +859,9 @@ union hl_cs_args { struct hl_cs_out out; }; -#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 -#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 -#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 -#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 +#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 +#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 +#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 #define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 @@ -978,25 +881,20 @@ struct hl_wait_cs_in { }; struct { - union { - /* User address for completion comparison. - * upon interrupt, driver will compare the value pointed - * by this address with the supplied target value. - * in order not to perform any comparison, set address - * to all 1s. - * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set - */ - __u64 addr; - - /* cq_counters_handle to a kernel mapped cb which contains - * cq counters. - * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set - */ - __u64 cq_counters_handle; - }; - + /* User address for completion comparison. + * upon interrupt, driver will compare the value pointed + * by this address with the supplied target value. + * in order not to perform any comparison, set address + * to all 1s. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 addr; /* Target value for completion comparison */ - __u64 target; + __u32 target; + /* Absolute timeout to wait for interrupt + * in microseconds + */ + __u32 interrupt_timeout_us; }; }; @@ -1010,27 +908,9 @@ struct hl_wait_cs_in { */ __u32 flags; - union { - struct { - /* Multi CS API info- valid entries in multi-CS array */ - __u8 seq_arr_len; - __u8 pad[7]; - }; - - /* Absolute timeout to wait for an interrupt in microseconds. - * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set - */ - __u64 interrupt_timeout_us; - }; - - /* - * cq counter offset inside the counters cb pointed by cq_counters_handle above. - * upon interrupt, driver will compare the value pointed - * by this address (cq_counters_handle + cq_counters_offset) - * with the supplied target value. - * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set - */ - __u64 cq_counters_offset; + /* Multi CS API info- valid entries in multi-CS array */ + __u8 seq_arr_len; + __u8 pad[7]; }; #define HL_WAIT_CS_STATUS_COMPLETED 0 @@ -1072,10 +952,6 @@ union hl_wait_cs_args { #define HL_MEM_OP_UNMAP 3 /* Opcode to map a hw block */ #define HL_MEM_OP_MAP_BLOCK 4 -/* Opcode to create DMA-BUF object for an existing device memory allocation - * and to export an FD of that DMA-BUF back to the caller - */ -#define HL_MEM_OP_EXPORT_DMABUF_FD 5 /* Memory flags */ #define HL_MEM_CONTIGUOUS 0x1 @@ -1147,26 +1023,11 @@ struct hl_mem_in { /* Virtual address returned from HL_MEM_OP_MAP */ __u64 device_virt_addr; } unmap; - - /* HL_MEM_OP_EXPORT_DMABUF_FD */ - struct { - /* Handle returned from HL_MEM_OP_ALLOC. In Gaudi, - * where we don't have MMU for the device memory, the - * driver expects a physical address (instead of - * a handle) in the device memory space. - */ - __u64 handle; - /* Size of memory allocation. Relevant only for GAUDI */ - __u64 mem_size; - } export_dmabuf_fd; }; /* HL_MEM_OP_* */ __u32 op; - /* HL_MEM_* flags. - * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the - * DMA-BUF file/FD flags. - */ + /* HL_MEM_* flags */ __u32 flags; /* Context ID - Currently not in use */ __u32 ctx_id; @@ -1203,13 +1064,6 @@ struct hl_mem_out { __u32 pad; }; - - /* Returned in HL_MEM_OP_EXPORT_DMABUF_FD. Represents the - * DMA-BUF object that was created to describe a memory - * allocation on the device's memory space. The FD should be - * passed to the importer driver - */ - __s32 fd; }; }; diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index 08035ccf1f..f89fbb5b1e 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H @@ -52,20 +52,11 @@ struct efa_ibv_alloc_pd_resp { __u8 reserved_30[2]; }; -enum { - EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0, -}; - struct efa_ibv_create_cq { __u32 comp_mask; __u32 cq_entry_size; __u16 num_sub_cqs; - __u8 flags; - __u8 reserved_58[5]; -}; - -enum { - EFA_CREATE_CQ_RESP_DB_OFF = 1 << 0, + __u8 reserved_50[6]; }; struct efa_ibv_create_cq_resp { @@ -74,9 +65,7 @@ struct efa_ibv_create_cq_resp { __aligned_u64 q_mmap_key; __aligned_u64 q_mmap_size; __u16 cq_idx; - __u8 reserved_d0[2]; - __u32 db_off; - __aligned_u64 db_mmap_key; + __u8 reserved_d0[6]; }; enum { @@ -117,7 +106,6 @@ struct efa_ibv_create_ah_resp { enum { EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0, EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1, - EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS = 1 << 2, }; struct efa_ibv_ex_query_device_resp { diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index f6fde06db4..42b1776555 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -77,12 +77,10 @@ enum hns_roce_qp_cap_flags { HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, - HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, }; struct hns_roce_ib_create_qp_resp { __aligned_u64 cap_flags; - __aligned_u64 dwqe_mmap_key; }; struct hns_roce_ib_alloc_ucontext_resp { diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index e50c357367..75a1ae2311 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -297,8 +297,6 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_RES_SRQ_GET, /* can dump */ - RDMA_NLDEV_CMD_STAT_GET_STATUS, - RDMA_NLDEV_NUM_OPS }; @@ -551,9 +549,6 @@ enum rdma_nldev_attr { RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, /* u8 */ - RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, /* u32 */ - RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, /* u8 */ - /* * Always the end */ diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index f09c5c9e3d..e283c2220a 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -98,10 +98,6 @@ struct rxe_send_wr { __u32 remote_qpn; __u32 remote_qkey; __u16 pkey_index; - __u16 reserved; - __u32 ah_num; - __u32 pad[4]; - struct rxe_av av; } ud; struct { __aligned_u64 addr; @@ -145,13 +141,14 @@ struct rxe_dma_info { __u32 sge_offset; __u32 reserved; union { - __DECLARE_FLEX_ARRAY(__u8, inline_data); - __DECLARE_FLEX_ARRAY(struct rxe_sge, sge); + __u8 inline_data[0]; + struct rxe_sge sge[0]; }; }; struct rxe_send_wqe { struct rxe_send_wr wr; + struct rxe_av av; __u32 status; __u32 state; __aligned_u64 iova; @@ -171,11 +168,6 @@ struct rxe_recv_wqe { struct rxe_dma_info dma; }; -struct rxe_create_ah_resp { - __u32 ah_num; - __u32 reserved; -}; - struct rxe_create_cq_resp { struct mminfo mi; }; diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h index 053949287c..da61398b1f 100644 --- a/include/uapi/sound/asoc.h +++ b/include/uapi/sound/asoc.h @@ -240,8 +240,8 @@ struct snd_soc_tplg_vendor_array { struct snd_soc_tplg_private { __le32 size; /* in bytes of private data */ union { - __DECLARE_FLEX_ARRAY(char, data); - __DECLARE_FLEX_ARRAY(struct snd_soc_tplg_vendor_array, array); + char data[0]; + struct snd_soc_tplg_vendor_array array[0]; }; } __attribute__((packed)); diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 2d3e5df39a..93e40f91bd 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -204,11 +204,6 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */ -/* - * For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the - * available bit count in most significant bit. It's for the case of so-called 'left-justified' or - * `right-padding` sample which has less width than 32 bit. - */ #define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10) #define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11) #define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12) @@ -307,7 +302,7 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ #define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */ -#define SNDRV_PCM_INFO_NO_REWINDS 0x20000000 /* hardware can only support monotonic changes of appl_ptr */ + #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ @@ -1009,7 +1004,7 @@ typedef int __bitwise snd_ctl_elem_iface_t; #define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1) #define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE) #define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */ -/* (1 << 3) is unused. */ +// (1 << 3) is unused. #define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */ #define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */ #define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) diff --git a/include/uapi/sound/firewire.h b/include/uapi/sound/firewire.h index 39cf6eb759..ae12826ed6 100644 --- a/include/uapi/sound/firewire.h +++ b/include/uapi/sound/firewire.h @@ -13,7 +13,6 @@ #define SNDRV_FIREWIRE_EVENT_DIGI00X_MESSAGE 0x746e736c #define SNDRV_FIREWIRE_EVENT_MOTU_NOTIFICATION 0x64776479 #define SNDRV_FIREWIRE_EVENT_TASCAM_CONTROL 0x7473636d -#define SNDRV_FIREWIRE_EVENT_MOTU_REGISTER_DSP_CHANGE 0x4d545244 struct snd_firewire_event_common { unsigned int type; /* SNDRV_FIREWIRE_EVENT_xxx */ @@ -66,12 +65,6 @@ struct snd_firewire_event_tascam_control { struct snd_firewire_tascam_change changes[0]; }; -struct snd_firewire_event_motu_register_dsp_change { - unsigned int type; - __u32 count; /* The number of changes. */ - __u32 changes[]; /* Encoded event for change of register DSP. */ -}; - union snd_firewire_event { struct snd_firewire_event_common common; struct snd_firewire_event_lock_status lock_status; @@ -80,7 +73,6 @@ union snd_firewire_event { struct snd_firewire_event_digi00x_message digi00x_message; struct snd_firewire_event_tascam_control tascam_control; struct snd_firewire_event_motu_notification motu_notification; - struct snd_firewire_event_motu_register_dsp_change motu_register_dsp_change; }; @@ -88,9 +80,6 @@ union snd_firewire_event { #define SNDRV_FIREWIRE_IOCTL_LOCK _IO('H', 0xf9) #define SNDRV_FIREWIRE_IOCTL_UNLOCK _IO('H', 0xfa) #define SNDRV_FIREWIRE_IOCTL_TASCAM_STATE _IOR('H', 0xfb, struct snd_firewire_tascam_state) -#define SNDRV_FIREWIRE_IOCTL_MOTU_REGISTER_DSP_METER _IOR('H', 0xfc, struct snd_firewire_motu_register_dsp_meter) -#define SNDRV_FIREWIRE_IOCTL_MOTU_COMMAND_DSP_METER _IOR('H', 0xfd, struct snd_firewire_motu_command_dsp_meter) -#define SNDRV_FIREWIRE_IOCTL_MOTU_REGISTER_DSP_PARAMETER _IOR('H', 0xfe, struct snd_firewire_motu_register_dsp_parameter) #define SNDRV_FIREWIRE_TYPE_DICE 1 #define SNDRV_FIREWIRE_TYPE_FIREWORKS 2 @@ -119,143 +108,4 @@ struct snd_firewire_tascam_state { __be32 data[SNDRV_FIREWIRE_TASCAM_STATE_COUNT]; }; -/* - * In below MOTU models, software is allowed to control their DSP by accessing to registers. - * - 828mk2 - * - 896hd - * - Traveler - * - 8 pre - * - Ultralite - * - 4 pre - * - Audio Express - * - * On the other hand, the status of DSP is split into specific messages included in the sequence of - * isochronous packet. ALSA firewire-motu driver gathers the messages and allow userspace applications - * to read it via ioctl. In 828mk2, 896hd, and Traveler, hardware meter for all of physical inputs - * are put into the message, while one pair of physical outputs is selected. The selection is done by - * LSB one byte in asynchronous write quadlet transaction to 0x'ffff'f000'0b2c. - * - * I note that V3HD/V4HD uses asynchronous transaction for the purpose. The destination address is - * registered to 0x'ffff'f000'0b38 and '0b3c by asynchronous write quadlet request. The size of - * message differs between 23 and 51 quadlets. For the case, the number of mixer bus can be extended - * up to 12. - */ - -#define SNDRV_FIREWIRE_MOTU_REGISTER_DSP_METER_INPUT_COUNT 24 -#define SNDRV_FIREWIRE_MOTU_REGISTER_DSP_METER_OUTPUT_COUNT 24 -#define SNDRV_FIREWIRE_MOTU_REGISTER_DSP_METER_COUNT \ - (SNDRV_FIREWIRE_MOTU_REGISTER_DSP_METER_INPUT_COUNT + SNDRV_FIREWIRE_MOTU_REGISTER_DSP_METER_OUTPUT_COUNT) - -/** - * struct snd_firewire_motu_register_dsp_meter - the container for meter information in DSP - * controlled by register access - * @data: Signal level meters. The mapping between position and input/output channel is - * model-dependent. - * - * The structure expresses the part of DSP status for hardware meter. The u8 storage includes linear - * value for audio signal level between 0x00 and 0x7f. - */ -struct snd_firewire_motu_register_dsp_meter { - __u8 data[SNDRV_FIREWIRE_MOTU_REGISTER_DSP_METER_COUNT]; -}; - -#define SNDRV_FIREWIRE_MOTU_REGISTER_DSP_MIXER_COUNT 4 -#define SNDRV_FIREWIRE_MOTU_REGISTER_DSP_MIXER_SRC_COUNT 20 -#define SNDRV_FIREWIRE_MOTU_REGISTER_DSP_INPUT_COUNT 10 -#define SNDRV_FIREWIRE_MOTU_REGISTER_DSP_ALIGNED_INPUT_COUNT (SNDRV_FIREWIRE_MOTU_REGISTER_DSP_INPUT_COUNT + 2) - -/** - * snd_firewire_motu_register_dsp_parameter - the container for parameters of DSP controlled - * by register access. - * @mixer.source.gain: The gain of source to mixer. - * @mixer.source.pan: The L/R balance of source to mixer. - * @mixer.source.flag: The flag of source to mixer, including mute, solo. - * @mixer.source.paired_balance: The L/R balance of paired source to mixer, only for 4 pre and - * Audio Express. - * @mixer.source.paired_width: The width of paired source to mixer, only for 4 pre and - * Audio Express. - * @mixer.output.paired_volume: The volume of paired output from mixer. - * @mixer.output.paired_flag: The flag of paired output from mixer. - * @output.main_paired_volume: The volume of paired main output. - * @output.hp_paired_volume: The volume of paired hp output. - * @output.hp_paired_assignment: The source assigned to paired hp output. - * @output.reserved: Padding for 32 bit alignment for future extension. - * @line_input.boost_flag: The flags of boost for line inputs, only for 828mk2 and Traveler. - * @line_input.nominal_level_flag: The flags of nominal level for line inputs, only for 828mk2 and - * Traveler. - * @line_input.reserved: Padding for 32 bit alignment for future extension. - * @input.gain_and_invert: The value including gain and invert for input, only for Ultralite, 4 pre - * and Audio Express. - * @input.flag: The flag of input; e.g. jack detection, phantom power, and pad, only for Ultralite, - * 4 pre and Audio express. - * @reserved: Padding so that the size of structure is kept to 512 byte, but for future extension. - * - * The structure expresses the set of parameters for DSP controlled by register access. - */ -struct snd_firewire_motu_register_dsp_parameter { - struct { - struct { - __u8 gain[SNDRV_FIREWIRE_MOTU_REGISTER_DSP_MIXER_SRC_COUNT]; - __u8 pan[SNDRV_FIREWIRE_MOTU_REGISTER_DSP_MIXER_SRC_COUNT]; - __u8 flag[SNDRV_FIREWIRE_MOTU_REGISTER_DSP_MIXER_SRC_COUNT]; - __u8 paired_balance[SNDRV_FIREWIRE_MOTU_REGISTER_DSP_MIXER_SRC_COUNT]; - __u8 paired_width[SNDRV_FIREWIRE_MOTU_REGISTER_DSP_MIXER_SRC_COUNT]; - } source[SNDRV_FIREWIRE_MOTU_REGISTER_DSP_MIXER_COUNT]; - struct { - __u8 paired_volume[SNDRV_FIREWIRE_MOTU_REGISTER_DSP_MIXER_COUNT]; - __u8 paired_flag[SNDRV_FIREWIRE_MOTU_REGISTER_DSP_MIXER_COUNT]; - } output; - } mixer; - struct { - __u8 main_paired_volume; - __u8 hp_paired_volume; - __u8 hp_paired_assignment; - __u8 reserved[5]; - } output; - struct { - __u8 boost_flag; - __u8 nominal_level_flag; - __u8 reserved[6]; - } line_input; - struct { - __u8 gain_and_invert[SNDRV_FIREWIRE_MOTU_REGISTER_DSP_ALIGNED_INPUT_COUNT]; - __u8 flag[SNDRV_FIREWIRE_MOTU_REGISTER_DSP_ALIGNED_INPUT_COUNT]; - } input; - __u8 reserved[64]; -}; - -/* - * In below MOTU models, software is allowed to control their DSP by command in frame of - * asynchronous transaction to 0x'ffff'0001'0000: - * - * - 828 mk3 (FireWire only and Hybrid) - * - 896 mk3 (FireWire only and Hybrid) - * - Ultralite mk3 (FireWire only and Hybrid) - * - Traveler mk3 - * - Track 16 - * - * On the other hand, the states of hardware meter is split into specific messages included in the - * sequence of isochronous packet. ALSA firewire-motu driver gathers the message and allow userspace - * application to read it via ioctl. - */ - -#define SNDRV_FIREWIRE_MOTU_COMMAND_DSP_METER_COUNT 400 - -/** - * struct snd_firewire_motu_command_dsp_meter - the container for meter information in DSP - * controlled by command - * @data: Signal level meters. The mapping between position and signal channel is model-dependent. - * - * The structure expresses the part of DSP status for hardware meter. The 32 bit storage is - * estimated to include IEEE 764 32 bit single precision floating point (binary32) value. It is - * expected to be linear value (not logarithm) for audio signal level between 0.0 and +1.0. - */ -struct snd_firewire_motu_command_dsp_meter { -#ifdef __KERNEL__ - __u32 data[SNDRV_FIREWIRE_MOTU_COMMAND_DSP_METER_COUNT]; -#else - float data[SNDRV_FIREWIRE_MOTU_COMMAND_DSP_METER_COUNT]; -#endif -}; - #endif /* _UAPI_SOUND_FIREWIRE_H_INCLUDED */ diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h index b72fa385be..a642bf30c0 100644 --- a/include/uapi/sound/sof/tokens.h +++ b/include/uapi/sound/sof/tokens.h @@ -51,7 +51,6 @@ #define SOF_TKN_SCHED_CORE 203 #define SOF_TKN_SCHED_FRAMES 204 #define SOF_TKN_SCHED_TIME_DOMAIN 205 -#define SOF_TKN_SCHED_DYNAMIC_PIPELINE 206 /* volume */ #define SOF_TKN_VOLUME_RAMP_STEP_TYPE 250 @@ -140,9 +139,4 @@ #define SOF_TKN_INTEL_HDA_RATE 1500 #define SOF_TKN_INTEL_HDA_CH 1501 -/* AFE */ -#define SOF_TKN_MEDIATEK_AFE_RATE 1600 -#define SOF_TKN_MEDIATEK_AFE_CH 1601 -#define SOF_TKN_MEDIATEK_AFE_FORMAT 1602 - #endif diff --git a/include/uapi/xen/gntdev.h b/include/uapi/xen/gntdev.h index 7a7145395c..9ac5515b9b 100644 --- a/include/uapi/xen/gntdev.h +++ b/include/uapi/xen/gntdev.h @@ -47,13 +47,7 @@ struct ioctl_gntdev_grant_ref { /* * Inserts the grant references into the mapping table of an instance * of gntdev. N.B. This does not perform the mapping, which is deferred - * until mmap() is called with @index as the offset. @index should be - * considered opaque to userspace, with one exception: if no grant - * references have ever been inserted into the mapping table of this - * instance, @index will be set to 0. This is necessary to use gntdev - * with userspace APIs that expect a file descriptor that can be - * mmap()'d at offset 0, such as Wayland. If @count is set to 0, this - * ioctl will fail. + * until mmap() is called with @index as the offset. */ #define IOCTL_GNTDEV_MAP_GRANT_REF \ _IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref)) diff --git a/include/xen/arm/hypercall.h b/include/xen/arm/hypercall.h index 9d7dd1c65a..b40485e54d 100644 --- a/include/xen/arm/hypercall.h +++ b/include/xen/arm/hypercall.h @@ -53,6 +53,7 @@ unsigned long HYPERVISOR_hvm_op(int op, void *arg); int HYPERVISOR_memory_op(unsigned int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); +int HYPERVISOR_tmem_op(void *arg); int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type); int HYPERVISOR_dm_op(domid_t domid, unsigned int nr_bufs, struct xen_dm_op_buf *bufs); @@ -73,4 +74,18 @@ HYPERVISOR_suspend(unsigned long start_info_mfn) return HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); } +static inline void +MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, + unsigned int new_val, unsigned long flags) +{ + BUG(); +} + +static inline void +MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, + int count, int *success_count, domid_t domid) +{ + BUG(); +} + #endif /* _ASM_ARM_XEN_HYPERCALL_H */ diff --git a/include/xen/balloon.h b/include/xen/balloon.h index f78a6cc94f..6dbdb0b3fd 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -26,8 +26,8 @@ extern struct balloon_stats balloon_stats; void balloon_set_new_target(unsigned long target); -int xen_alloc_ballooned_pages(unsigned int nr_pages, struct page **pages); -void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages); +int alloc_xenballooned_pages(int nr_pages, struct page **pages); +void free_xenballooned_pages(int nr_pages, struct page **pages); #ifdef CONFIG_XEN_BALLOON void xen_balloon_init(void); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index cb854df031..c9fea9389e 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -104,17 +104,32 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); * access has been ended, free the given page too. Access will be ended * immediately iff the grant entry is not in use, otherwise it will happen * some time later. page may be 0, in which case no freeing will occur. + * Note that the granted page might still be accessed (read or write) by the + * other side after gnttab_end_foreign_access() returns, so even if page was + * specified as 0 it is not allowed to just reuse the page for other + * purposes immediately. gnttab_end_foreign_access() will take an additional + * reference to the granted page in this case, which is dropped only after + * the grant is no longer in use. + * This requires that multi page allocations for areas subject to + * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing + * via free_pages_exact()) in order to avoid high order pages. */ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page); +/* + * End access through the given grant reference, iff the grant entry is + * no longer in use. In case of success ending foreign access, the + * grant reference is deallocated. + * Return 1 if the grant entry was freed, 0 if it is still in use. + */ +int gnttab_try_end_foreign_access(grant_ref_t ref); + int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); -int gnttab_query_foreign_access(grant_ref_t ref); - /* * operations on reserved batches of grant references */ diff --git a/include/xen/interface/callback.h b/include/xen/interface/callback.h index c67822a25e..dc3193f4b5 100644 --- a/include/xen/interface/callback.h +++ b/include/xen/interface/callback.h @@ -1,9 +1,26 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * callback.h * * Register guest OS callbacks with Xen. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2006, Ian Campbell */ diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h index 38deb12146..449bd383cb 100644 --- a/include/xen/interface/elfnote.h +++ b/include/xen/interface/elfnote.h @@ -1,9 +1,26 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * elfnote.h * * Definitions used for the Xen ELF notes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2006, Ian Campbell, XenSource Ltd. */ diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h index 5f8da466e8..cf80e338fb 100644 --- a/include/xen/interface/event_channel.h +++ b/include/xen/interface/event_channel.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: MIT */ +/* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * event_channel.h * diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h index 53f760378e..5a7bdefa06 100644 --- a/include/xen/interface/features.h +++ b/include/xen/interface/features.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: MIT */ +/* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * features.h * diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index 3eeabbc7ee..7fb7112d66 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h @@ -1,10 +1,27 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * grant_table.h * * Interface for granting foreign access to page frames, and receiving * page-ownership transfers. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2004, K A Fraser */ diff --git a/include/xen/interface/hvm/dm_op.h b/include/xen/interface/hvm/dm_op.h index 08d972f87c..ee9e480bc5 100644 --- a/include/xen/interface/hvm/dm_op.h +++ b/include/xen/interface/hvm/dm_op.h @@ -1,6 +1,23 @@ -/* SPDX-License-Identifier: MIT */ /* * Copyright (c) 2016, Citrix Systems Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. */ #ifndef __XEN_PUBLIC_HVM_DM_OP_H__ diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h index f3097e79bb..25d945ef17 100644 --- a/include/xen/interface/hvm/hvm_op.h +++ b/include/xen/interface/hvm/hvm_op.h @@ -1,4 +1,22 @@ -/* SPDX-License-Identifier: MIT */ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ #define __XEN_PUBLIC_HVM_HVM_OP_H__ diff --git a/include/xen/interface/hvm/hvm_vcpu.h b/include/xen/interface/hvm/hvm_vcpu.h index cbf9349327..bfc2138e0b 100644 --- a/include/xen/interface/hvm/hvm_vcpu.h +++ b/include/xen/interface/hvm/hvm_vcpu.h @@ -1,5 +1,22 @@ -/* SPDX-License-Identifier: MIT */ /* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2015, Roger Pau Monne */ diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h index 4e2c94b3c4..4d61fc58d9 100644 --- a/include/xen/interface/hvm/params.h +++ b/include/xen/interface/hvm/params.h @@ -1,4 +1,22 @@ -/* SPDX-License-Identifier: MIT */ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ #ifndef __XEN_PUBLIC_HVM_PARAMS_H__ #define __XEN_PUBLIC_HVM_PARAMS_H__ diff --git a/include/xen/interface/hvm/start_info.h b/include/xen/interface/hvm/start_info.h index e33557c0b4..50af9ea2ff 100644 --- a/include/xen/interface/hvm/start_info.h +++ b/include/xen/interface/hvm/start_info.h @@ -1,5 +1,22 @@ -/* SPDX-License-Identifier: MIT */ /* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2016, Citrix Systems, Inc. */ diff --git a/include/xen/interface/io/9pfs.h b/include/xen/interface/io/9pfs.h index f1a4c5ad2f..5b6c19dae5 100644 --- a/include/xen/interface/io/9pfs.h +++ b/include/xen/interface/io/9pfs.h @@ -1,7 +1,24 @@ -/* SPDX-License-Identifier: MIT */ /* * 9pfs.h -- Xen 9PFS transport * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (C) 2017 Stefano Stabellini */ diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index ba1e9f5b63..5e40041c7e 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: MIT */ +/* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * blkif.h * diff --git a/include/xen/interface/io/console.h b/include/xen/interface/io/console.h index cf17e89ed8..85ca8b0269 100644 --- a/include/xen/interface/io/console.h +++ b/include/xen/interface/io/console.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: MIT */ +/* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * console.h * diff --git a/include/xen/interface/io/displif.h b/include/xen/interface/io/displif.h index 18417b0178..d43ca0361f 100644 --- a/include/xen/interface/io/displif.h +++ b/include/xen/interface/io/displif.h @@ -1,9 +1,26 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * displif.h * * Unified display device I/O interface for Xen guest OSes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (C) 2016-2017 EPAM Systems Inc. * * Authors: Oleksandr Andrushchenko diff --git a/include/xen/interface/io/fbif.h b/include/xen/interface/io/fbif.h index 60ca808cef..974a51ed91 100644 --- a/include/xen/interface/io/fbif.h +++ b/include/xen/interface/io/fbif.h @@ -1,7 +1,24 @@ -/* SPDX-License-Identifier: MIT */ /* * fbif.h -- Xen virtual frame buffer device * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (C) 2005 Anthony Liguori * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster */ diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h index b8b08aa533..5c7630d737 100644 --- a/include/xen/interface/io/kbdif.h +++ b/include/xen/interface/io/kbdif.h @@ -1,7 +1,24 @@ -/* SPDX-License-Identifier: MIT */ /* * kbdif.h -- Xen virtual keyboard/mouse * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (C) 2005 Anthony Liguori * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster */ diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h index cb0c1a25d5..2194322c3c 100644 --- a/include/xen/interface/io/netif.h +++ b/include/xen/interface/io/netif.h @@ -1,9 +1,26 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * xen_netif.h * * Unified network-device I/O interface for Xen guest OSes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2003-2004, Keir Fraser */ diff --git a/include/xen/interface/io/pciif.h b/include/xen/interface/io/pciif.h index d1a87b62da..d9922ae36e 100644 --- a/include/xen/interface/io/pciif.h +++ b/include/xen/interface/io/pciif.h @@ -1,7 +1,24 @@ -/* SPDX-License-Identifier: MIT */ /* * PCI Backend/Frontend Common Data Structures & Macros * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Author: Ryan Wilson */ #ifndef __XEN_PCI_COMMON_H__ diff --git a/include/xen/interface/io/protocols.h b/include/xen/interface/io/protocols.h index 22099bb407..6a89dc1bf2 100644 --- a/include/xen/interface/io/protocols.h +++ b/include/xen/interface/io/protocols.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: MIT */ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __XEN_PROTOCOLS_H__ #define __XEN_PROTOCOLS_H__ diff --git a/include/xen/interface/io/pvcalls.h b/include/xen/interface/io/pvcalls.h index b6680fdbe2..ccf97b817e 100644 --- a/include/xen/interface/io/pvcalls.h +++ b/include/xen/interface/io/pvcalls.h @@ -1,5 +1,3 @@ -/* SPDX-License-Identifier: MIT */ - #ifndef __XEN_PUBLIC_IO_XEN_PVCALLS_H__ #define __XEN_PUBLIC_IO_XEN_PVCALLS_H__ diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index 2470ec45eb..b39cdbc522 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -1,9 +1,26 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * ring.h * * Shared producer-consumer ring macros. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Tim Deegan and Andrew Warfield November 2004. */ diff --git a/include/xen/interface/io/sndif.h b/include/xen/interface/io/sndif.h index 445657cdb1..2aac8f7361 100644 --- a/include/xen/interface/io/sndif.h +++ b/include/xen/interface/io/sndif.h @@ -1,9 +1,26 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * sndif.h * * Unified sound-device I/O interface for Xen guest OSes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (C) 2013-2015 GlobalLogic Inc. * Copyright (C) 2016-2017 EPAM Systems Inc. * diff --git a/include/xen/interface/io/vscsiif.h b/include/xen/interface/io/vscsiif.h index 1f6047d3de..d07d7aca8d 100644 --- a/include/xen/interface/io/vscsiif.h +++ b/include/xen/interface/io/vscsiif.h @@ -1,9 +1,26 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * vscsiif.h * * Based on the blkif.h code. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright(c) FUJITSU Limited 2008. */ diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h index 44456e2853..fb87161122 100644 --- a/include/xen/interface/io/xenbus.h +++ b/include/xen/interface/io/xenbus.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: MIT */ +/* SPDX-License-Identifier: GPL-2.0 */ /***************************************************************************** * xenbus.h * diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h index d40a44f09b..1517c7e93a 100644 --- a/include/xen/interface/io/xs_wire.h +++ b/include/xen/interface/io/xs_wire.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: MIT */ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Details of the "wire" protocol between Xen Store Daemon and client * library or guest kernel. diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index 1a371a825c..447004861f 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: MIT */ +/* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * memory.h * diff --git a/include/xen/interface/nmi.h b/include/xen/interface/nmi.h index b665fdbef1..73d9b0a297 100644 --- a/include/xen/interface/nmi.h +++ b/include/xen/interface/nmi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: MIT */ +/* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * nmi.h * diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index a237af8678..610dba9b62 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -1,4 +1,22 @@ -/* SPDX-License-Identifier: MIT */ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ #ifndef __XEN_PUBLIC_PHYSDEV_H__ #define __XEN_PUBLIC_PHYSDEV_H__ diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index 655d92e803..732efb08c3 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h @@ -1,9 +1,26 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * platform.h * * Hardware platform operations. Intended for use by domain-0 kernel. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2002-2006, K Fraser */ diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h index 4dac0634ff..a4c4d735d7 100644 --- a/include/xen/interface/sched.h +++ b/include/xen/interface/sched.h @@ -1,9 +1,26 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * sched.h * * Scheduler state interactions * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2005, Keir Fraser */ diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index c7cc28ad8d..504c716015 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h @@ -1,9 +1,26 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * vcpu.h * * VCPU initialisation, query, and hotplug. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2005, Keir Fraser */ diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 37d6588873..8772b552c0 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: MIT */ +/* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * version.h * diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h index 464aa6b3a5..7483a78d24 100644 --- a/include/xen/interface/xen-mca.h +++ b/include/xen/interface/xen-mca.h @@ -1,4 +1,3 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * arch-x86/mca.h * Guest OS machine check interface to x86 Xen. diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 0ca23eca2a..5ee37a2964 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -1,9 +1,26 @@ -/* SPDX-License-Identifier: MIT */ /****************************************************************************** * xen.h * * Guest OS interface to Xen. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2004, K A Fraser */ @@ -722,9 +739,6 @@ struct dom0_vga_console_info { uint32_t gbl_caps; /* Mode attributes (offset 0x0, VESA command 0x4f01). */ uint16_t mode_attrs; - uint16_t pad; - /* high 32 bits of lfb_base */ - uint32_t ext_lfb_base; } vesa_lfb; } u; }; diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h index e2ee73d91b..ad603eab24 100644 --- a/include/xen/interface/xenpmu.h +++ b/include/xen/interface/xenpmu.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: MIT */ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __XEN_PUBLIC_XENPMU_H__ #define __XEN_PUBLIC_XENPMU_H__ diff --git a/include/xen/xen.h b/include/xen/xen.h index a99bab8175..43efba045a 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -55,20 +55,10 @@ extern u64 xen_saved_max_mem_size; #ifdef CONFIG_XEN_UNPOPULATED_ALLOC int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages); void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages); -#include -int arch_xen_unpopulated_init(struct resource **res); #else +#define xen_alloc_unpopulated_pages alloc_xenballooned_pages +#define xen_free_unpopulated_pages free_xenballooned_pages #include -static inline int xen_alloc_unpopulated_pages(unsigned int nr_pages, - struct page **pages) -{ - return xen_alloc_ballooned_pages(nr_pages, pages); -} -static inline void xen_free_unpopulated_pages(unsigned int nr_pages, - struct page **pages) -{ - xen_free_ballooned_pages(nr_pages, pages); -} #endif #endif /* _XEN_XEN_H */ diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index b13eb86395..b94074c827 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -112,7 +112,6 @@ struct xenbus_driver { const char *name; /* defaults to ids[0].devicetype */ const struct xenbus_device_id *ids; bool allow_rebind; /* avoid setting xenstore closed during remove */ - bool not_essential; /* is not mandatory for boot progress */ int (*probe)(struct xenbus_device *dev, const struct xenbus_device_id *id); void (*otherend_changed)(struct xenbus_device *dev, diff --git a/include/xen/xenbus_dev.h b/include/xen/xenbus_dev.h index 4dc45a51c0..bbee8c6a34 100644 --- a/include/xen/xenbus_dev.h +++ b/include/xen/xenbus_dev.h @@ -1,4 +1,6 @@ /****************************************************************************** + * evtchn.h + * * Interface to /dev/xen/xenbus_backend. * * Copyright (c) 2011 Bastian Blank diff --git a/init/Kconfig b/init/Kconfig index e9119bf54b..11f8a845f2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -550,7 +550,7 @@ config SCHED_THERMAL_PRESSURE i.e. put less load on throttled CPUs than on non/less throttled ones. This requires the architecture to implement - arch_update_thermal_pressure() and arch_scale_thermal_pressure(). + arch_set_thermal_pressure() and arch_scale_thermal_pressure(). config BSD_PROCESS_ACCT bool "BSD Process Accounting" @@ -885,11 +885,6 @@ config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH config CC_HAS_INT128 def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT -config CC_IMPLICIT_FALLTHROUGH - string - default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5) - default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough) - # # For architectures that know their GCC __int128 support is sound # @@ -906,7 +901,7 @@ config NUMA_BALANCING bool "Memory placement aware NUMA scheduler" depends on ARCH_SUPPORTS_NUMA_BALANCING depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY - depends on SMP && NUMA && MIGRATION && !PREEMPT_RT + depends on SMP && NUMA && MIGRATION help This option adds support for automatic NUMA aware memory/task placement. The mechanism is quite primitive and is based on migrating memory when @@ -1414,6 +1409,7 @@ config LD_DEAD_CODE_DATA_ELIMINATION config LD_ORPHAN_WARN def_bool y depends on ARCH_WANT_LD_ORPHAN_WARN + depends on !LD_IS_LLD || LLD_VERSION >= 110000 depends on $(ld-option,--orphan-handling=warn) config SYSCTL @@ -1578,7 +1574,6 @@ config BASE_FULL config FUTEX bool "Enable futex support" if EXPERT - depends on !(SPARC32 && SMP) default y imply RT_MUTEXES help @@ -1591,6 +1586,14 @@ config FUTEX_PI depends on FUTEX && RT_MUTEXES default y +config HAVE_FUTEX_CMPXCHG + bool + depends on FUTEX + help + Architectures should select this if futex_atomic_cmpxchg_inatomic() + is implemented and always working. This removes a couple of runtime + checks. + config EPOLL bool "Enable eventpoll support" if EXPERT default y @@ -1796,10 +1799,6 @@ config HAVE_PERF_EVENTS help See tools/perf/design.txt for details. -config GUEST_PERF_EVENTS - bool - depends on HAVE_PERF_EVENTS - config PERF_USE_VMALLOC bool help @@ -1897,7 +1896,6 @@ choice config SLAB bool "SLAB" - depends on !PREEMPT_RT select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work @@ -1918,7 +1916,6 @@ config SLUB config SLOB depends on EXPERT bool "SLOB (Simple Allocator)" - depends on !PREEMPT_RT help SLOB replaces the stock allocator with a drastically simpler allocator. SLOB is generally more space efficient but @@ -1929,7 +1926,6 @@ endchoice config SLAB_MERGE_DEFAULT bool "Allow slab caches to be merged" default y - depends on SLAB || SLUB help For reduced kernel memory fragmentation, slab caches can be merged when they share the same size and other characteristics. @@ -2277,19 +2273,6 @@ config MODULE_COMPRESS_ZSTD endchoice -config MODULE_DECOMPRESS - bool "Support in-kernel module decompression" - depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ - select ZLIB_INFLATE if MODULE_COMPRESS_GZIP - select XZ_DEC if MODULE_COMPRESS_XZ - help - - Support for decompressing kernel modules by the kernel itself - instead of relying on userspace to perform this task. Useful when - load pinning security policy is enabled. - - If unsure, say N. - config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS bool "Allow loading of modules with missing namespace imports" help diff --git a/init/Makefile b/init/Makefile index 06326e3043..2846113677 100644 --- a/init/Makefile +++ b/init/Makefile @@ -30,8 +30,8 @@ $(obj)/version.o: include/generated/compile.h quiet_cmd_compile.h = CHK $@ cmd_compile.h = \ $(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT_BUILD)" \ - "$(CONFIG_PREEMPT_RT)" "$(CONFIG_CC_VERSION_TEXT)" "$(LD)" + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" \ + "$(CONFIG_PREEMPT_RT)" $(CONFIG_CC_VERSION_TEXT) "$(LD)" include/generated/compile.h: FORCE $(call cmd,compile.h) diff --git a/init/init_task.c b/init/init_task.c index 73cc8f0351..2d024066e2 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -182,6 +182,11 @@ struct task_struct init_task #endif #ifdef CONFIG_KCSAN .kcsan_ctx = { + .disable_count = 0, + .atomic_next = 0, + .atomic_nest_count = 0, + .in_flat_atomic = false, + .access_mask = 0, .scoped_accesses = {LIST_POISON1, NULL}, }, #endif diff --git a/init/initramfs.c b/init/initramfs.c index 2f3d96dc3d..a842c05447 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -607,7 +607,7 @@ void __weak __init free_initrd_mem(unsigned long start, unsigned long end) unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE); unsigned long aligned_end = ALIGN(end, PAGE_SIZE); - memblock_free((void *)aligned_start, aligned_end - aligned_start); + memblock_free(__pa(aligned_start), aligned_end - aligned_start); #endif free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, diff --git a/init/main.c b/init/main.c index 65fa2e41a9..bcd132d4e7 100644 --- a/init/main.c +++ b/init/main.c @@ -83,6 +83,7 @@ #include #include #include +#include #include #include #include @@ -381,7 +382,7 @@ static char * __init xbc_make_cmdline(const char *key) ret = xbc_snprint_cmdline(new_cmdline, len + 1, root); if (ret < 0 || ret > len) { pr_err("Failed to print extra kernel cmdline.\n"); - memblock_free(new_cmdline, len + 1); + memblock_free_ptr(new_cmdline, len + 1); return NULL; } @@ -409,7 +410,7 @@ static void __init setup_boot_config(void) const char *msg; int pos; u32 size, csum; - char *data, *err; + char *data, *copy, *err; int ret; /* Cut out the bootconfig data even if we have no bootconfig option */ @@ -442,7 +443,16 @@ static void __init setup_boot_config(void) return; } - ret = xbc_init(data, size, &msg, &pos); + copy = memblock_alloc(size + 1, SMP_CACHE_BYTES); + if (!copy) { + pr_err("Failed to allocate memory for bootconfig\n"); + return; + } + + memcpy(copy, data, size); + copy[size] = '\0'; + + ret = xbc_init(copy, &msg, &pos); if (ret < 0) { if (pos < 0) pr_err("Failed to init bootconfig: %s.\n", msg); @@ -450,7 +460,6 @@ static void __init setup_boot_config(void) pr_err("Failed to parse bootconfig: %s at %d.\n", msg, pos); } else { - xbc_get_info(&ret, NULL); pr_info("Load bootconfig: %d bytes %d nodes\n", size, ret); /* keys starting with "kernel." are passed via cmdline */ extra_command_line = xbc_make_cmdline("kernel"); @@ -462,7 +471,7 @@ static void __init setup_boot_config(void) static void __init exit_boot_config(void) { - xbc_exit(); + xbc_destroy_all(); } #else /* !CONFIG_BOOT_CONFIG */ @@ -834,15 +843,12 @@ static void __init mm_init(void) init_mem_debugging_and_hardening(); kfence_alloc_pool(); report_meminit(); - stack_depot_early_init(); + stack_depot_init(); mem_init(); mem_init_print_info(); - kmem_cache_init(); - /* - * page_owner must be initialized after buddy is ready, and also after - * slab is ready so that stack_depot_init() works properly - */ + /* page_owner must be initialized after buddy is ready */ page_ext_init_flatmem_late(); + kmem_cache_init(); kmemleak_init(); pgtable_init(); debug_objects_mem_init(); @@ -921,7 +927,7 @@ static void __init print_unknown_bootoptions(void) /* Start at unknown_options[1] to skip the initial space */ pr_notice("Unknown kernel command line parameters \"%s\", will be passed to user space.\n", &unknown_options[1]); - memblock_free(unknown_options, len); + memblock_free_ptr(unknown_options, len); } asmlinkage __visible void __init __no_sanitize_address start_kernel(void) @@ -1502,8 +1508,6 @@ static int __ref kernel_init(void *unused) kernel_init_freeable(); /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); - - system_state = SYSTEM_FREEING_INITMEM; kprobe_free_init_mem(); ftrace_free_init_mem(); kgdb_free_init_mem(); diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index f101c17175..3f312bf2b1 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include "util.h" @@ -23,6 +22,7 @@ static void *get_ipc(struct ctl_table *table) return which; } +#ifdef CONFIG_PROC_SYSCTL static int proc_ipc_dointvec(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -104,17 +104,13 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, return ret; } -#ifdef CONFIG_CHECKPOINT_RESTORE -static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, - int write, void *buffer, size_t *lenp, loff_t *ppos) -{ - struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; - - if (write && !checkpoint_restore_ns_capable(user_ns)) - return -EPERM; - - return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); -} +#else +#define proc_ipc_doulongvec_minmax NULL +#define proc_ipc_dointvec NULL +#define proc_ipc_dointvec_minmax NULL +#define proc_ipc_dointvec_minmax_orphans NULL +#define proc_ipc_auto_msgmni NULL +#define proc_ipc_sem_dointvec NULL #endif int ipc_mni = IPCMNI; @@ -202,8 +198,8 @@ static struct ctl_table ipc_kern_table[] = { .procname = "sem_next_id", .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, @@ -211,8 +207,8 @@ static struct ctl_table ipc_kern_table[] = { .procname = "msg_next_id", .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, @@ -220,8 +216,8 @@ static struct ctl_table ipc_kern_table[] = { .procname = "shm_next_id", .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, diff --git a/ipc/shm.c b/ipc/shm.c index b3048ebd5c..048eb183b2 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -330,6 +330,9 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) shm_unlock(shp); if (!is_file_hugepages(shm_file)) shmem_lock(shm_file, 0, shp->mlock_ucounts); + else if (shp->mlock_ucounts) + user_shm_unlock(i_size_read(file_inode(shm_file)), + shp->mlock_ucounts); fput(shm_file); ipc_update_pid(&shp->shm_cprid, NULL); ipc_update_pid(&shp->shm_lprid, NULL); @@ -739,7 +742,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (shmflg & SHM_NORESERVE) acctflag = VM_NORESERVE; file = hugetlb_file_setup(name, hugesize, acctflag, - HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); + &shp->mlock_ucounts, HUGETLB_SHMFS_INODE, + (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); } else { /* * Do not allow no accounting for OVERCOMMIT_NEVER, even @@ -790,6 +794,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) no_id: ipc_update_pid(&shp->shm_cprid, NULL); ipc_update_pid(&shp->shm_lprid, NULL); + if (is_file_hugepages(file) && shp->mlock_ucounts) + user_shm_unlock(size, shp->mlock_ucounts); fput(file); ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); return error; diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index ce77f02656..5876e30c57 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -1,23 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only -config PREEMPT_NONE_BUILD - bool - -config PREEMPT_VOLUNTARY_BUILD - bool - -config PREEMPT_BUILD - bool - select PREEMPTION - select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK - choice prompt "Preemption Model" default PREEMPT_NONE config PREEMPT_NONE bool "No Forced Preemption (Server)" - select PREEMPT_NONE_BUILD if !PREEMPT_DYNAMIC help This is the traditional Linux preemption model, geared towards throughput. It will still provide good latencies most of the @@ -32,7 +20,6 @@ config PREEMPT_NONE config PREEMPT_VOLUNTARY bool "Voluntary Kernel Preemption (Desktop)" depends on !ARCH_NO_PREEMPT - select PREEMPT_VOLUNTARY_BUILD if !PREEMPT_DYNAMIC help This option reduces the latency of the kernel by adding more "explicit preemption points" to the kernel code. These new @@ -51,7 +38,9 @@ config PREEMPT_VOLUNTARY config PREEMPT bool "Preemptible Kernel (Low-Latency Desktop)" depends on !ARCH_NO_PREEMPT - select PREEMPT_BUILD + select PREEMPTION + select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK + select PREEMPT_DYNAMIC if HAVE_PREEMPT_DYNAMIC help This option reduces the latency of the kernel by making all kernel code (that is not executing in a critical section) @@ -94,10 +83,7 @@ config PREEMPTION select PREEMPT_COUNT config PREEMPT_DYNAMIC - bool "Preemption behaviour defined on boot" - depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT - select PREEMPT_BUILD - default y + bool help This option allows to define the preemption model on the kernel command line parameter and thus override the default preemption diff --git a/kernel/Makefile b/kernel/Makefile index 56f4ee97f3..4df609be42 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -59,7 +59,7 @@ obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ -obj-$(CONFIG_FUTEX) += futex/ +obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += smp.o ifneq ($(CONFIG_SMP),y) @@ -67,7 +67,6 @@ obj-y += up.o endif obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_MODULE_DECOMPRESS) += module_decompress.o obj-$(CONFIG_MODULE_SIG) += module_signing.o obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signature.o obj-$(CONFIG_KALLSYMS) += kallsyms.o @@ -86,6 +85,7 @@ obj-$(CONFIG_PID_NS) += pid_namespace.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_IKHEADERS) += kheaders.o obj-$(CONFIG_SMP) += stop_machine.o +obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_AUDIT) += audit.o auditfilter.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o audit_watch.o audit_fsnotify.o audit_tree.o obj-$(CONFIG_GCOV_KERNEL) += gcov/ diff --git a/kernel/acct.c b/kernel/acct.c index 3df53cf1dc..23a7ab8e6c 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -60,6 +60,7 @@ #include #include +#include /* sector_div */ #include #include diff --git a/kernel/audit.c b/kernel/audit.c index 7690c29d4e..94ded5de91 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1468,7 +1468,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (err) return err; } - sig_data = kmalloc(struct_size(sig_data, ctx, len), GFP_KERNEL); + sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); if (!sig_data) { if (audit_sig_sid) security_release_secctx(ctx, len); @@ -1481,7 +1481,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) security_release_secctx(ctx, len); } audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0, - sig_data, struct_size(sig_data, ctx, len)); + sig_data, sizeof(*sig_data) + len); kfree(sig_data); break; case AUDIT_TTY_GET: { @@ -2171,7 +2171,7 @@ int audit_log_task_context(struct audit_buffer *ab) int error; u32 sid; - security_current_getsecid_subj(&sid); + security_task_getsecid_subj(current, &sid); if (!sid) return 0; @@ -2392,7 +2392,7 @@ int audit_signal_info(int sig, struct task_struct *t) audit_sig_uid = auid; else audit_sig_uid = uid; - security_current_getsecid_subj(&audit_sig_sid); + security_task_getsecid_subj(current, &audit_sig_sid); } return audit_signal_info_syscall(t); diff --git a/kernel/audit.h b/kernel/audit.h index c4498090a5..d6a2c899a8 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -14,7 +14,6 @@ #include #include #include -#include // struct open_how /* AUDIT_NAMES is the number of slots we reserve in the audit_context * for saving names from getname(). If we get more names we will allocate @@ -101,15 +100,10 @@ struct audit_proctitle { /* The per-task audit context. */ struct audit_context { int dummy; /* must be the first element */ - enum { - AUDIT_CTX_UNUSED, /* audit_context is currently unused */ - AUDIT_CTX_SYSCALL, /* in use by syscall */ - AUDIT_CTX_URING, /* in use by io_uring */ - } context; + int in_syscall; /* 1 if task is in a syscall */ enum audit_state state, current_state; unsigned int serial; /* serial number for record */ int major; /* syscall number */ - int uring_op; /* uring operation */ struct timespec64 ctime; /* time of syscall entry */ unsigned long argv[4]; /* syscall arguments */ long return_code;/* syscall return code */ @@ -194,7 +188,6 @@ struct audit_context { int fd; int flags; } mmap; - struct open_how openat2; struct { int argc; } execve; diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index 02348b4844..60739d5e33 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -160,7 +160,8 @@ static int audit_mark_handle_event(struct fsnotify_mark *inode_mark, u32 mask, audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark); - if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group)) + if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group) || + WARN_ON_ONCE(!inode)) return 0; if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) { diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index e7315d4871..2cd7b56944 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -30,7 +30,7 @@ struct audit_chunk { int count; atomic_long_t refs; struct rcu_head head; - struct audit_node { + struct node { struct list_head list; struct audit_tree *owner; unsigned index; /* index; upper bit indicates 'will prune' */ @@ -94,7 +94,7 @@ static struct audit_tree *alloc_tree(const char *s) { struct audit_tree *tree; - tree = kmalloc(struct_size(tree, pathname, strlen(s) + 1), GFP_KERNEL); + tree = kmalloc(sizeof(struct audit_tree) + strlen(s) + 1, GFP_KERNEL); if (tree) { refcount_set(&tree->count, 1); tree->goner = 0; @@ -269,7 +269,7 @@ bool audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree) /* tagging and untagging inodes with trees */ -static struct audit_chunk *find_chunk(struct audit_node *p) +static struct audit_chunk *find_chunk(struct node *p) { int index = p->index & ~(1U<<31); p -= index; @@ -322,7 +322,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old) list_replace_rcu(&old->hash, &new->hash); } -static void remove_chunk_node(struct audit_chunk *chunk, struct audit_node *p) +static void remove_chunk_node(struct audit_chunk *chunk, struct node *p) { struct audit_tree *owner = p->owner; @@ -459,7 +459,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) { struct fsnotify_mark *mark; struct audit_chunk *chunk, *old; - struct audit_node *p; + struct node *p; int n; mutex_lock(&audit_tree_group->mark_mutex); @@ -570,11 +570,11 @@ static void prune_tree_chunks(struct audit_tree *victim, bool tagged) { spin_lock(&hash_lock); while (!list_empty(&victim->chunks)) { - struct audit_node *p; + struct node *p; struct audit_chunk *chunk; struct fsnotify_mark *mark; - p = list_first_entry(&victim->chunks, struct audit_node, list); + p = list_first_entry(&victim->chunks, struct node, list); /* have we run out of marked? */ if (tagged && !(p->index & (1U<<31))) break; @@ -616,7 +616,7 @@ static void trim_marked(struct audit_tree *tree) } /* reorder */ for (p = tree->chunks.next; p != &tree->chunks; p = q) { - struct audit_node *node = list_entry(p, struct audit_node, list); + struct node *node = list_entry(p, struct node, list); q = p->next; if (node->index & (1U<<31)) { list_del_init(p); @@ -684,7 +684,7 @@ void audit_trim_trees(void) struct audit_tree *tree; struct path path; struct vfsmount *root_mnt; - struct audit_node *node; + struct node *node; int err; tree = container_of(cursor.next, struct audit_tree, list); @@ -726,8 +726,7 @@ int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) { if (pathname[0] != '/' || - (rule->listnr != AUDIT_FILTER_EXIT && - rule->listnr != AUDIT_FILTER_URING_EXIT) || + rule->listnr != AUDIT_FILTER_EXIT || op != Audit_equal || rule->inode_f || rule->watch || rule->tree) return -EINVAL; @@ -840,7 +839,7 @@ int audit_add_tree_rule(struct audit_krule *rule) drop_collected_mounts(mnt); if (!err) { - struct audit_node *node; + struct node *node; spin_lock(&hash_lock); list_for_each_entry(node, &tree->chunks, list) node->index &= ~(1U<<31); @@ -939,7 +938,7 @@ int audit_tag_tree(char *old, char *new) mutex_unlock(&audit_filter_mutex); if (!failed) { - struct audit_node *node; + struct node *node; spin_lock(&hash_lock); list_for_each_entry(node, &tree->chunks, list) node->index &= ~(1U<<31); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 713b256be9..2acf7ca491 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -183,8 +183,7 @@ int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op) return -EOPNOTSUPP; if (path[0] != '/' || path[len-1] == '/' || - (krule->listnr != AUDIT_FILTER_EXIT && - krule->listnr != AUDIT_FILTER_URING_EXIT) || + krule->listnr != AUDIT_FILTER_EXIT || op != Audit_equal || krule->inode_f || krule->watch || krule->tree) return -EINVAL; @@ -473,7 +472,8 @@ static int audit_watch_handle_event(struct fsnotify_mark *inode_mark, u32 mask, parent = container_of(inode_mark, struct audit_parent, mark); - if (WARN_ON_ONCE(inode_mark->group != audit_watch_group)) + if (WARN_ON_ONCE(inode_mark->group != audit_watch_group) || + WARN_ON_ONCE(!inode)) return 0; if (mask & (FS_CREATE|FS_MOVED_TO) && inode) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 42d99896e7..db2c6b59df 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -44,8 +44,7 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { LIST_HEAD_INIT(audit_filter_list[4]), LIST_HEAD_INIT(audit_filter_list[5]), LIST_HEAD_INIT(audit_filter_list[6]), - LIST_HEAD_INIT(audit_filter_list[7]), -#if AUDIT_NR_FILTERS != 8 +#if AUDIT_NR_FILTERS != 7 #error Fix audit_filter_list initialiser #endif }; @@ -57,7 +56,6 @@ static struct list_head audit_rules_list[AUDIT_NR_FILTERS] = { LIST_HEAD_INIT(audit_rules_list[4]), LIST_HEAD_INIT(audit_rules_list[5]), LIST_HEAD_INIT(audit_rules_list[6]), - LIST_HEAD_INIT(audit_rules_list[7]), }; DEFINE_MUTEX(audit_filter_mutex); @@ -153,8 +151,7 @@ char *audit_unpack_string(void **bufp, size_t *remain, size_t len) static inline int audit_to_inode(struct audit_krule *krule, struct audit_field *f) { - if ((krule->listnr != AUDIT_FILTER_EXIT && - krule->listnr != AUDIT_FILTER_URING_EXIT) || + if (krule->listnr != AUDIT_FILTER_EXIT || krule->inode_f || krule->watch || krule->tree || (f->op != Audit_equal && f->op != Audit_not_equal)) return -EINVAL; @@ -251,7 +248,6 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule_data * pr_err("AUDIT_FILTER_ENTRY is deprecated\n"); goto exit_err; case AUDIT_FILTER_EXIT: - case AUDIT_FILTER_URING_EXIT: case AUDIT_FILTER_TASK: #endif case AUDIT_FILTER_USER: @@ -336,10 +332,6 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) if (entry->rule.listnr != AUDIT_FILTER_FS) return -EINVAL; break; - case AUDIT_PERM: - if (entry->rule.listnr == AUDIT_FILTER_URING_EXIT) - return -EINVAL; - break; } switch (entry->rule.listnr) { @@ -637,7 +629,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) void *bufp; int i; - data = kmalloc(struct_size(data, buf, krule->buflen), GFP_KERNEL); + data = kmalloc(sizeof(*data) + krule->buflen, GFP_KERNEL); if (unlikely(!data)) return NULL; memset(data, 0, sizeof(*data)); @@ -988,8 +980,7 @@ static inline int audit_add_rule(struct audit_entry *entry) } entry->rule.prio = ~0ULL; - if (entry->rule.listnr == AUDIT_FILTER_EXIT || - entry->rule.listnr == AUDIT_FILTER_URING_EXIT) { + if (entry->rule.listnr == AUDIT_FILTER_EXIT) { if (entry->rule.flags & AUDIT_FILTER_PREPEND) entry->rule.prio = ++prio_high; else @@ -1092,7 +1083,7 @@ static void audit_list_rules(int seq, struct sk_buff_head *q) break; skb = audit_make_reply(seq, AUDIT_LIST_RULES, 0, 1, data, - struct_size(data, buf, data->buflen)); + sizeof(*data) + data->buflen); if (skb) skb_queue_tail(q, skb); kfree(data); @@ -1368,7 +1359,8 @@ int audit_filter(int msgtype, unsigned int listtype) case AUDIT_SUBJ_SEN: case AUDIT_SUBJ_CLR: if (f->lsm_rule) { - security_current_getsecid_subj(&sid); + security_task_getsecid_subj(current, + &sid); result = security_audit_rule_match(sid, f->type, f->op, f->lsm_rule); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index a83928cbdc..b1cb1dbf74 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1,4 +1,3 @@ -// SPDX-License-Identifier: GPL-2.0-or-later /* auditsc.c -- System-call auditing support * Handles all system-call specific auditing features. * @@ -7,6 +6,20 @@ * Copyright (C) 2005, 2006 IBM Corporation * All Rights Reserved. * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * * Written by Rickard E. (Rik) Faith * * Many of the ideas implemented here are from Stephen C. Tweedie, @@ -63,7 +76,6 @@ #include #include #include -#include // struct open_how #include "audit.h" @@ -154,7 +166,7 @@ static int audit_match_perm(struct audit_context *ctx, int mask) n = ctx->major; switch (audit_classify_syscall(ctx->arch, n)) { - case AUDITSC_NATIVE: + case 0: /* native */ if ((mask & AUDIT_PERM_WRITE) && audit_match_class(AUDIT_CLASS_WRITE, n)) return 1; @@ -165,7 +177,7 @@ static int audit_match_perm(struct audit_context *ctx, int mask) audit_match_class(AUDIT_CLASS_CHATTR, n)) return 1; return 0; - case AUDITSC_COMPAT: /* 32bit on biarch */ + case 1: /* 32bit on biarch */ if ((mask & AUDIT_PERM_WRITE) && audit_match_class(AUDIT_CLASS_WRITE_32, n)) return 1; @@ -176,16 +188,14 @@ static int audit_match_perm(struct audit_context *ctx, int mask) audit_match_class(AUDIT_CLASS_CHATTR_32, n)) return 1; return 0; - case AUDITSC_OPEN: + case 2: /* open */ return mask & ACC_MODE(ctx->argv[1]); - case AUDITSC_OPENAT: + case 3: /* openat */ return mask & ACC_MODE(ctx->argv[2]); - case AUDITSC_SOCKETCALL: + case 4: /* socketcall */ return ((mask & AUDIT_PERM_WRITE) && ctx->argv[0] == SYS_BIND); - case AUDITSC_EXECVE: + case 5: /* execve */ return mask & AUDIT_PERM_EXEC; - case AUDITSC_OPENAT2: - return mask & ACC_MODE((u32)ctx->openat2.flags); default: return 0; } @@ -470,9 +480,6 @@ static int audit_filter_rules(struct task_struct *tsk, u32 sid; unsigned int sessionid; - if (ctx && rule->prio <= ctx->prio) - return 0; - cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation); for (i = 0; i < rule->field_count; i++) { @@ -666,16 +673,7 @@ static int audit_filter_rules(struct task_struct *tsk, logged upon error */ if (f->lsm_rule) { if (need_sid) { - /* @tsk should always be equal to - * @current with the exception of - * fork()/copy_process() in which case - * the new @tsk creds are still a dup - * of @current's creds so we can still - * use security_current_getsecid_subj() - * here even though it always refs - * @current's creds - */ - security_current_getsecid_subj(&sid); + security_task_getsecid_subj(tsk, &sid); need_sid = 0; } result = security_audit_rule_match(sid, f->type, @@ -749,6 +747,8 @@ static int audit_filter_rules(struct task_struct *tsk, } if (ctx) { + if (rule->prio <= ctx->prio) + return 0; if (rule->filterkey) { kfree(ctx->filterkey); ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC); @@ -805,34 +805,6 @@ static int audit_in_mask(const struct audit_krule *rule, unsigned long val) return rule->mask[word] & bit; } -/** - * audit_filter_uring - apply filters to an io_uring operation - * @tsk: associated task - * @ctx: audit context - */ -static void audit_filter_uring(struct task_struct *tsk, - struct audit_context *ctx) -{ - struct audit_entry *e; - enum audit_state state; - - if (auditd_test_task(tsk)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_URING_EXIT], - list) { - if (audit_in_mask(&e->rule, ctx->uring_op) && - audit_filter_rules(tsk, &e->rule, ctx, NULL, &state, - false)) { - rcu_read_unlock(); - ctx->current_state = state; - return; - } - } - rcu_read_unlock(); -} - /* At syscall exit time, this filter is called if the audit_state is * not low enough that auditing cannot take place, but is also not * high enough that we already know we have to write an audit record @@ -943,81 +915,10 @@ static inline void audit_free_aux(struct audit_context *context) context->aux = aux->next; kfree(aux); } - context->aux = NULL; while ((aux = context->aux_pids)) { context->aux_pids = aux->next; kfree(aux); } - context->aux_pids = NULL; -} - -/** - * audit_reset_context - reset a audit_context structure - * @ctx: the audit_context to reset - * - * All fields in the audit_context will be reset to an initial state, all - * references held by fields will be dropped, and private memory will be - * released. When this function returns the audit_context will be suitable - * for reuse, so long as the passed context is not NULL or a dummy context. - */ -static void audit_reset_context(struct audit_context *ctx) -{ - if (!ctx) - return; - - /* if ctx is non-null, reset the "ctx->state" regardless */ - ctx->context = AUDIT_CTX_UNUSED; - if (ctx->dummy) - return; - - /* - * NOTE: It shouldn't matter in what order we release the fields, so - * release them in the order in which they appear in the struct; - * this gives us some hope of quickly making sure we are - * resetting the audit_context properly. - * - * Other things worth mentioning: - * - we don't reset "dummy" - * - we don't reset "state", we do reset "current_state" - * - we preserve "filterkey" if "state" is AUDIT_STATE_RECORD - * - much of this is likely overkill, but play it safe for now - * - we really need to work on improving the audit_context struct - */ - - ctx->current_state = ctx->state; - ctx->serial = 0; - ctx->major = 0; - ctx->uring_op = 0; - ctx->ctime = (struct timespec64){ .tv_sec = 0, .tv_nsec = 0 }; - memset(ctx->argv, 0, sizeof(ctx->argv)); - ctx->return_code = 0; - ctx->prio = (ctx->state == AUDIT_STATE_RECORD ? ~0ULL : 0); - ctx->return_valid = AUDITSC_INVALID; - audit_free_names(ctx); - if (ctx->state != AUDIT_STATE_RECORD) { - kfree(ctx->filterkey); - ctx->filterkey = NULL; - } - audit_free_aux(ctx); - kfree(ctx->sockaddr); - ctx->sockaddr = NULL; - ctx->sockaddr_len = 0; - ctx->pid = ctx->ppid = 0; - ctx->uid = ctx->euid = ctx->suid = ctx->fsuid = KUIDT_INIT(0); - ctx->gid = ctx->egid = ctx->sgid = ctx->fsgid = KGIDT_INIT(0); - ctx->personality = 0; - ctx->arch = 0; - ctx->target_pid = 0; - ctx->target_auid = ctx->target_uid = KUIDT_INIT(0); - ctx->target_sessionid = 0; - ctx->target_sid = 0; - ctx->target_comm[0] = '\0'; - unroll_tree_refs(ctx, NULL, 0); - WARN_ON(!list_empty(&ctx->killed_trees)); - ctx->type = 0; - audit_free_module(ctx); - ctx->fds[0] = -1; - audit_proctitle_free(ctx); } static inline struct audit_context *audit_alloc_context(enum audit_state state) @@ -1027,7 +928,6 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state) context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return NULL; - context->context = AUDIT_CTX_UNUSED; context->state = state; context->prio = state == AUDIT_STATE_RECORD ? ~0ULL : 0; INIT_LIST_HEAD(&context->killed_trees); @@ -1053,7 +953,7 @@ int audit_alloc(struct task_struct *tsk) char *key = NULL; if (likely(!audit_ever_enabled)) - return 0; + return 0; /* Return if not auditing. */ state = audit_filter_task(tsk, &key); if (state == AUDIT_STATE_DISABLED) { @@ -1073,37 +973,16 @@ int audit_alloc(struct task_struct *tsk) return 0; } -/** - * audit_alloc_kernel - allocate an audit_context for a kernel task - * @tsk: the kernel task - * - * Similar to the audit_alloc() function, but intended for kernel private - * threads. Returns zero on success, negative values on failure. - */ -int audit_alloc_kernel(struct task_struct *tsk) -{ - /* - * At the moment we are just going to call into audit_alloc() to - * simplify the code, but there two things to keep in mind with this - * approach: - * - * 1. Filtering internal kernel tasks is a bit laughable in almost all - * cases, but there is at least one case where there is a benefit: - * the '-a task,never' case allows the admin to effectively disable - * task auditing at runtime. - * - * 2. The {set,clear}_task_syscall_work() ops likely have zero effect - * on these internal kernel tasks, but they probably don't hurt either. - */ - return audit_alloc(tsk); -} - static inline void audit_free_context(struct audit_context *context) { - /* resetting is extra work, but it is likely just noise */ - audit_reset_context(context); + audit_free_module(context); + audit_free_names(context); + unroll_tree_refs(context, NULL, 0); free_tree_refs(context); + audit_free_aux(context); kfree(context->filterkey); + kfree(context->sockaddr); + audit_proctitle_free(context); kfree(context); } @@ -1437,12 +1316,6 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd, context->mmap.flags); break; - case AUDIT_OPENAT2: - audit_log_format(ab, "oflag=0%llo mode=0%llo resolve=0x%llx", - context->openat2.flags, - context->openat2.mode, - context->openat2.resolve); - break; case AUDIT_EXECVE: audit_log_execve_info(context, &ab); break; @@ -1606,44 +1479,6 @@ static void audit_log_proctitle(void) audit_log_end(ab); } -/** - * audit_log_uring - generate a AUDIT_URINGOP record - * @ctx: the audit context - */ -static void audit_log_uring(struct audit_context *ctx) -{ - struct audit_buffer *ab; - const struct cred *cred; - - ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_URINGOP); - if (!ab) - return; - cred = current_cred(); - audit_log_format(ab, "uring_op=%d", ctx->uring_op); - if (ctx->return_valid != AUDITSC_INVALID) - audit_log_format(ab, " success=%s exit=%ld", - (ctx->return_valid == AUDITSC_SUCCESS ? - "yes" : "no"), - ctx->return_code); - audit_log_format(ab, - " items=%d" - " ppid=%d pid=%d uid=%u gid=%u euid=%u suid=%u" - " fsuid=%u egid=%u sgid=%u fsgid=%u", - ctx->name_count, - task_ppid_nr(current), task_tgid_nr(current), - from_kuid(&init_user_ns, cred->uid), - from_kgid(&init_user_ns, cred->gid), - from_kuid(&init_user_ns, cred->euid), - from_kuid(&init_user_ns, cred->suid), - from_kuid(&init_user_ns, cred->fsuid), - from_kgid(&init_user_ns, cred->egid), - from_kgid(&init_user_ns, cred->sgid), - from_kgid(&init_user_ns, cred->fsgid)); - audit_log_task_context(ab); - audit_log_key(ab, ctx->filterkey); - audit_log_end(ab); -} - static void audit_log_exit(void) { int i, call_panic = 0; @@ -1654,38 +1489,29 @@ static void audit_log_exit(void) context->personality = current->personality; - switch (context->context) { - case AUDIT_CTX_SYSCALL: - ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); - if (!ab) - return; - audit_log_format(ab, "arch=%x syscall=%d", - context->arch, context->major); - if (context->personality != PER_LINUX) - audit_log_format(ab, " per=%lx", context->personality); - if (context->return_valid != AUDITSC_INVALID) - audit_log_format(ab, " success=%s exit=%ld", - (context->return_valid == AUDITSC_SUCCESS ? - "yes" : "no"), - context->return_code); - audit_log_format(ab, - " a0=%lx a1=%lx a2=%lx a3=%lx items=%d", - context->argv[0], - context->argv[1], - context->argv[2], - context->argv[3], - context->name_count); - audit_log_task_info(ab); - audit_log_key(ab, context->filterkey); - audit_log_end(ab); - break; - case AUDIT_CTX_URING: - audit_log_uring(context); - break; - default: - BUG(); - break; - } + ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); + if (!ab) + return; /* audit_panic has been called */ + audit_log_format(ab, "arch=%x syscall=%d", + context->arch, context->major); + if (context->personality != PER_LINUX) + audit_log_format(ab, " per=%lx", context->personality); + if (context->return_valid != AUDITSC_INVALID) + audit_log_format(ab, " success=%s exit=%ld", + (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", + context->return_code); + + audit_log_format(ab, + " a0=%lx a1=%lx a2=%lx a3=%lx items=%d", + context->argv[0], + context->argv[1], + context->argv[2], + context->argv[3], + context->name_count); + + audit_log_task_info(ab); + audit_log_key(ab, context->filterkey); + audit_log_end(ab); for (aux = context->aux; aux; aux = aux->next) { @@ -1776,22 +1602,21 @@ static void audit_log_exit(void) audit_log_name(context, n, NULL, i++, &call_panic); } - if (context->context == AUDIT_CTX_SYSCALL) - audit_log_proctitle(); + audit_log_proctitle(); /* Send end of event record to help user space know we are finished */ ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); if (ab) audit_log_end(ab); if (call_panic) - audit_panic("error in audit_log_exit()"); + audit_panic("error converting sid to string"); } /** * __audit_free - free a per-task audit context * @tsk: task whose audit context block to free * - * Called from copy_process, do_exit, and the io_uring code + * Called from copy_process and do_exit */ void __audit_free(struct task_struct *tsk) { @@ -1800,7 +1625,6 @@ void __audit_free(struct task_struct *tsk) if (!context) return; - /* this may generate CONFIG_CHANGE records */ if (!list_empty(&context->killed_trees)) audit_kill_trees(context); @@ -1809,152 +1633,20 @@ void __audit_free(struct task_struct *tsk) * random task_struct that doesn't doesn't have any meaningful data we * need to log via audit_log_exit(). */ - if (tsk == current && !context->dummy) { + if (tsk == current && !context->dummy && context->in_syscall) { context->return_valid = AUDITSC_INVALID; context->return_code = 0; - if (context->context == AUDIT_CTX_SYSCALL) { - audit_filter_syscall(tsk, context); - audit_filter_inodes(tsk, context); - if (context->current_state == AUDIT_STATE_RECORD) - audit_log_exit(); - } else if (context->context == AUDIT_CTX_URING) { - /* TODO: verify this case is real and valid */ - audit_filter_uring(tsk, context); - audit_filter_inodes(tsk, context); - if (context->current_state == AUDIT_STATE_RECORD) - audit_log_uring(context); - } + + audit_filter_syscall(tsk, context); + audit_filter_inodes(tsk, context); + if (context->current_state == AUDIT_STATE_RECORD) + audit_log_exit(); } audit_set_context(tsk, NULL); audit_free_context(context); } -/** - * audit_return_fixup - fixup the return codes in the audit_context - * @ctx: the audit_context - * @success: true/false value to indicate if the operation succeeded or not - * @code: operation return code - * - * We need to fixup the return code in the audit logs if the actual return - * codes are later going to be fixed by the arch specific signal handlers. - */ -static void audit_return_fixup(struct audit_context *ctx, - int success, long code) -{ - /* - * This is actually a test for: - * (rc == ERESTARTSYS ) || (rc == ERESTARTNOINTR) || - * (rc == ERESTARTNOHAND) || (rc == ERESTART_RESTARTBLOCK) - * - * but is faster than a bunch of || - */ - if (unlikely(code <= -ERESTARTSYS) && - (code >= -ERESTART_RESTARTBLOCK) && - (code != -ENOIOCTLCMD)) - ctx->return_code = -EINTR; - else - ctx->return_code = code; - ctx->return_valid = (success ? AUDITSC_SUCCESS : AUDITSC_FAILURE); -} - -/** - * __audit_uring_entry - prepare the kernel task's audit context for io_uring - * @op: the io_uring opcode - * - * This is similar to audit_syscall_entry() but is intended for use by io_uring - * operations. This function should only ever be called from - * audit_uring_entry() as we rely on the audit context checking present in that - * function. - */ -void __audit_uring_entry(u8 op) -{ - struct audit_context *ctx = audit_context(); - - if (ctx->state == AUDIT_STATE_DISABLED) - return; - - /* - * NOTE: It's possible that we can be called from the process' context - * before it returns to userspace, and before audit_syscall_exit() - * is called. In this case there is not much to do, just record - * the io_uring details and return. - */ - ctx->uring_op = op; - if (ctx->context == AUDIT_CTX_SYSCALL) - return; - - ctx->dummy = !audit_n_rules; - if (!ctx->dummy && ctx->state == AUDIT_STATE_BUILD) - ctx->prio = 0; - - ctx->context = AUDIT_CTX_URING; - ctx->current_state = ctx->state; - ktime_get_coarse_real_ts64(&ctx->ctime); -} - -/** - * __audit_uring_exit - wrap up the kernel task's audit context after io_uring - * @success: true/false value to indicate if the operation succeeded or not - * @code: operation return code - * - * This is similar to audit_syscall_exit() but is intended for use by io_uring - * operations. This function should only ever be called from - * audit_uring_exit() as we rely on the audit context checking present in that - * function. - */ -void __audit_uring_exit(int success, long code) -{ - struct audit_context *ctx = audit_context(); - - if (ctx->context == AUDIT_CTX_SYSCALL) { - /* - * NOTE: See the note in __audit_uring_entry() about the case - * where we may be called from process context before we - * return to userspace via audit_syscall_exit(). In this - * case we simply emit a URINGOP record and bail, the - * normal syscall exit handling will take care of - * everything else. - * It is also worth mentioning that when we are called, - * the current process creds may differ from the creds - * used during the normal syscall processing; keep that - * in mind if/when we move the record generation code. - */ - - /* - * We need to filter on the syscall info here to decide if we - * should emit a URINGOP record. I know it seems odd but this - * solves the problem where users have a filter to block *all* - * syscall records in the "exit" filter; we want to preserve - * the behavior here. - */ - audit_filter_syscall(current, ctx); - if (ctx->current_state != AUDIT_STATE_RECORD) - audit_filter_uring(current, ctx); - audit_filter_inodes(current, ctx); - if (ctx->current_state != AUDIT_STATE_RECORD) - return; - - audit_log_uring(ctx); - return; - } - - /* this may generate CONFIG_CHANGE records */ - if (!list_empty(&ctx->killed_trees)) - audit_kill_trees(ctx); - - /* run through both filters to ensure we set the filterkey properly */ - audit_filter_uring(current, ctx); - audit_filter_inodes(current, ctx); - if (ctx->current_state != AUDIT_STATE_RECORD) - goto out; - audit_return_fixup(ctx, success, code); - audit_log_exit(); - -out: - audit_reset_context(ctx); -} - /** * __audit_syscall_entry - fill in an audit record at syscall entry * @major: major syscall type (function) @@ -1980,12 +1672,7 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, if (!audit_enabled || !context) return; - WARN_ON(context->context != AUDIT_CTX_UNUSED); - WARN_ON(context->name_count); - if (context->context != AUDIT_CTX_UNUSED || context->name_count) { - audit_panic("unrecoverable error in audit_syscall_entry()"); - return; - } + BUG_ON(context->in_syscall || context->name_count); state = context->state; if (state == AUDIT_STATE_DISABLED) @@ -2004,8 +1691,10 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, context->argv[1] = a2; context->argv[2] = a3; context->argv[3] = a4; - context->context = AUDIT_CTX_SYSCALL; + context->serial = 0; + context->in_syscall = 1; context->current_state = state; + context->ppid = 0; ktime_get_coarse_real_ts64(&context->ctime); } @@ -2022,27 +1711,63 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, */ void __audit_syscall_exit(int success, long return_code) { - struct audit_context *context = audit_context(); + struct audit_context *context; - if (!context || context->dummy || - context->context != AUDIT_CTX_SYSCALL) - goto out; + context = audit_context(); + if (!context) + return; - /* this may generate CONFIG_CHANGE records */ if (!list_empty(&context->killed_trees)) audit_kill_trees(context); - /* run through both filters to ensure we set the filterkey properly */ - audit_filter_syscall(current, context); - audit_filter_inodes(current, context); - if (context->current_state < AUDIT_STATE_RECORD) - goto out; + if (!context->dummy && context->in_syscall) { + if (success) + context->return_valid = AUDITSC_SUCCESS; + else + context->return_valid = AUDITSC_FAILURE; - audit_return_fixup(context, success, return_code); - audit_log_exit(); + /* + * we need to fix up the return code in the audit logs if the + * actual return codes are later going to be fixed up by the + * arch specific signal handlers + * + * This is actually a test for: + * (rc == ERESTARTSYS ) || (rc == ERESTARTNOINTR) || + * (rc == ERESTARTNOHAND) || (rc == ERESTART_RESTARTBLOCK) + * + * but is faster than a bunch of || + */ + if (unlikely(return_code <= -ERESTARTSYS) && + (return_code >= -ERESTART_RESTARTBLOCK) && + (return_code != -ENOIOCTLCMD)) + context->return_code = -EINTR; + else + context->return_code = return_code; -out: - audit_reset_context(context); + audit_filter_syscall(current, context); + audit_filter_inodes(current, context); + if (context->current_state == AUDIT_STATE_RECORD) + audit_log_exit(); + } + + context->in_syscall = 0; + context->prio = context->state == AUDIT_STATE_RECORD ? ~0ULL : 0; + + audit_free_module(context); + audit_free_names(context); + unroll_tree_refs(context, NULL, 0); + audit_free_aux(context); + context->aux = NULL; + context->aux_pids = NULL; + context->target_pid = 0; + context->target_sid = 0; + context->sockaddr_len = 0; + context->type = 0; + context->fds[0] = -1; + if (context->state != AUDIT_STATE_RECORD) { + kfree(context->filterkey); + context->filterkey = NULL; + } } static inline void handle_one(const struct inode *inode) @@ -2194,7 +1919,7 @@ void __audit_getname(struct filename *name) struct audit_context *context = audit_context(); struct audit_names *n; - if (context->context == AUDIT_CTX_UNUSED) + if (!context->in_syscall) return; n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); @@ -2266,7 +1991,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, struct list_head *list = &audit_filter_list[AUDIT_FILTER_FS]; int i; - if (context->context == AUDIT_CTX_UNUSED) + if (!context->in_syscall) return; rcu_read_lock(); @@ -2384,7 +2109,7 @@ void __audit_inode_child(struct inode *parent, struct list_head *list = &audit_filter_list[AUDIT_FILTER_FS]; int i; - if (context->context == AUDIT_CTX_UNUSED) + if (!context->in_syscall) return; rcu_read_lock(); @@ -2483,7 +2208,7 @@ EXPORT_SYMBOL_GPL(__audit_inode_child); int auditsc_get_stamp(struct audit_context *ctx, struct timespec64 *t, unsigned int *serial) { - if (ctx->context == AUDIT_CTX_UNUSED) + if (!ctx->in_syscall) return 0; if (!ctx->serial) ctx->serial = audit_serial(); @@ -2821,16 +2546,6 @@ void __audit_mmap_fd(int fd, int flags) context->type = AUDIT_MMAP; } -void __audit_openat2_how(struct open_how *how) -{ - struct audit_context *context = audit_context(); - - context->openat2.flags = how->flags; - context->openat2.mode = how->mode; - context->openat2.resolve = how->resolve; - context->type = AUDIT_OPENAT2; -} - void __audit_log_kern_module(char *name) { struct audit_context *context = audit_context(); @@ -2991,7 +2706,8 @@ void audit_seccomp_actions_logged(const char *names, const char *old_names, struct list_head *audit_killed_trees(void) { struct audit_context *ctx = audit_context(); - if (likely(!ctx || ctx->context == AUDIT_CTX_UNUSED)) + + if (likely(!ctx || !ctx->in_syscall)) return NULL; return &ctx->killed_trees; } diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index d24d518ddd..a82d6de865 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -64,7 +64,6 @@ config BPF_JIT_DEFAULT_ON config BPF_UNPRIV_DEFAULT_OFF bool "Disable unprivileged BPF by default" - default y depends on BPF_SYSCALL help Disables unprivileged BPF by default by setting the corresponding @@ -73,12 +72,6 @@ config BPF_UNPRIV_DEFAULT_OFF disable it by setting it to 1 (from which no other transition to 0 is possible anymore). - Unprivileged BPF could be used to exploit certain potential - speculative execution side-channel vulnerabilities on unmitigated - affected hardware. - - If you are unsure how to answer this question, answer Y. - source "kernel/bpf/preload/Kconfig" config BPF_LSM diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index c1a9be6a4b..7f33098ca6 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -7,7 +7,7 @@ endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o @@ -36,7 +36,3 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o obj-${CONFIG_BPF_LSM} += bpf_lsm.o endif obj-$(CONFIG_BPF_PRELOAD) += preload/ - -obj-$(CONFIG_BPF_SYSCALL) += relo_core.o -$(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE - $(call if_changed_rule,cc_o_c) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index c7a5be3bf8..447def5405 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -645,7 +645,7 @@ static const struct bpf_iter_seq_info iter_seq_info = { .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info), }; -static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn, +static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) { u32 i, key, num_elems = 0; @@ -668,8 +668,9 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_ val = array->value + array->elem_size * i; num_elems++; key = i; - ret = callback_fn((u64)(long)map, (u64)(long)&key, - (u64)(long)val, (u64)(long)callback_ctx, 0); + ret = BPF_CAST_CALL(callback_fn)((u64)(long)map, + (u64)(long)&key, (u64)(long)val, + (u64)(long)callback_ctx, 0); /* return value: 0 - continue, 1 - stop and return */ if (ret) break; diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index e29d9e3d85..96ceed0e0f 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -17,7 +17,6 @@ #include #include #include -#include DEFINE_BPF_STORAGE_CACHE(inode_cache); @@ -45,8 +44,7 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, if (!bsb) return NULL; - inode_storage = - rcu_dereference_check(bsb->storage, bpf_rcu_lock_held()); + inode_storage = rcu_dereference(bsb->storage); if (!inode_storage) return NULL; @@ -174,7 +172,6 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, { struct bpf_local_storage_data *sdata; - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) return (unsigned long)NULL; @@ -207,7 +204,6 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, BPF_CALL_2(bpf_inode_storage_delete, struct bpf_map *, map, struct inode *, inode) { - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!inode) return -EINVAL; diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index b7aef5b341..b2ee45064e 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -714,38 +714,3 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = { .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; - -/* maximum number of loops */ -#define MAX_LOOPS BIT(23) - -BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, - u64, flags) -{ - bpf_callback_t callback = (bpf_callback_t)callback_fn; - u64 ret; - u32 i; - - if (flags) - return -EINVAL; - if (nr_loops > MAX_LOOPS) - return -E2BIG; - - for (i = 0; i < nr_loops; i++) { - ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); - /* return value: 0 - continue, 1 - stop and return */ - if (ret) - return i + 1; - } - - return i; -} - -const struct bpf_func_proto bpf_loop_proto = { - .func = bpf_loop, - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_ANYTHING, - .arg2_type = ARG_PTR_TO_FUNC, - .arg3_type = ARG_PTR_TO_STACK_OR_NULL, - .arg4_type = ARG_ANYTHING, -}; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 71de2a8986..b305270b7a 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -11,9 +11,6 @@ #include #include #include -#include -#include -#include #define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE) @@ -84,22 +81,6 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, return NULL; } -void bpf_local_storage_free_rcu(struct rcu_head *rcu) -{ - struct bpf_local_storage *local_storage; - - local_storage = container_of(rcu, struct bpf_local_storage, rcu); - kfree_rcu(local_storage, rcu); -} - -static void bpf_selem_free_rcu(struct rcu_head *rcu) -{ - struct bpf_local_storage_elem *selem; - - selem = container_of(rcu, struct bpf_local_storage_elem, rcu); - kfree_rcu(selem, rcu); -} - /* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. @@ -112,7 +93,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, bool free_local_storage; void *owner; - smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + smap = rcu_dereference(SDATA(selem)->smap); owner = local_storage->owner; /* All uncharging on the owner must be done first. @@ -137,12 +118,12 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, * * Although the unlock will be done under * rcu_read_lock(), it is more intutivie to - * read if the freeing of the storage is done + * read if kfree_rcu(local_storage, rcu) is done * after the raw_spin_unlock_bh(&local_storage->lock). * * Hence, a "bool free_local_storage" is returned - * to the caller which then calls then frees the storage after - * all the RCU grace periods have expired. + * to the caller which then calls the kfree_rcu() + * after unlock. */ } hlist_del_init_rcu(&selem->snode); @@ -150,7 +131,8 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); + kfree_rcu(selem, rcu); + return free_local_storage; } @@ -164,8 +146,7 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) /* selem has already been unlinked from sk */ return; - local_storage = rcu_dereference_check(selem->local_storage, - bpf_rcu_lock_held()); + local_storage = rcu_dereference(selem->local_storage); raw_spin_lock_irqsave(&local_storage->lock, flags); if (likely(selem_linked_to_storage(selem))) free_local_storage = bpf_selem_unlink_storage_nolock( @@ -173,8 +154,7 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (free_local_storage) - call_rcu_tasks_trace(&local_storage->rcu, - bpf_local_storage_free_rcu); + kfree_rcu(local_storage, rcu); } void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, @@ -194,7 +174,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) /* selem has already be unlinked from smap */ return; - smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + smap = rcu_dereference(SDATA(selem)->smap); b = select_bucket(smap, selem); raw_spin_lock_irqsave(&b->lock, flags); if (likely(selem_linked_to_map(selem))) @@ -233,14 +213,12 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem; /* Fast path (cache hit) */ - sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], - bpf_rcu_lock_held()); + sdata = rcu_dereference(local_storage->cache[smap->cache_idx]); if (sdata && rcu_access_pointer(sdata->smap) == smap) return sdata; /* Slow path (cache miss) */ - hlist_for_each_entry_rcu(selem, &local_storage->list, snode, - rcu_read_lock_trace_held()) + hlist_for_each_entry_rcu(selem, &local_storage->list, snode) if (rcu_access_pointer(SDATA(selem)->smap) == smap) break; @@ -328,8 +306,7 @@ int bpf_local_storage_alloc(void *owner, * bucket->list, first_selem can be freed immediately * (instead of kfree_rcu) because * bpf_local_storage_map_free() does a - * synchronize_rcu_mult (waiting for both sleepable and - * normal programs) before walking the bucket->list. + * synchronize_rcu() before walking the bucket->list. * Hence, no one is accessing selem from the * bucket->list under rcu_read_lock(). */ @@ -365,8 +342,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, !map_value_has_spin_lock(&smap->map))) return ERR_PTR(-EINVAL); - local_storage = rcu_dereference_check(*owner_storage(smap, owner), - bpf_rcu_lock_held()); + local_storage = rcu_dereference(*owner_storage(smap, owner)); if (!local_storage || hlist_empty(&local_storage->list)) { /* Very first elem for the owner */ err = check_flags(NULL, map_flags); diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 9e4ecc9906..06062370c3 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -207,7 +207,7 @@ BTF_ID(func, bpf_lsm_socket_socketpair) BTF_ID(func, bpf_lsm_syslog) BTF_ID(func, bpf_lsm_task_alloc) -BTF_ID(func, bpf_lsm_current_getsecid_subj) +BTF_ID(func, bpf_lsm_task_getsecid_subj) BTF_ID(func, bpf_lsm_task_getsecid_obj) BTF_ID(func, bpf_lsm_task_prctl) BTF_ID(func, bpf_lsm_task_setscheduler) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 21069dbe91..9abcc33f02 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -93,9 +93,6 @@ const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { }; const struct bpf_prog_ops bpf_struct_ops_prog_ops = { -#ifdef CONFIG_NET - .test_run = bpf_struct_ops_test_run, -#endif }; static const struct btf_type *module_type; @@ -165,7 +162,7 @@ void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) break; } - if (__btf_member_bitfield_size(t, member)) { + if (btf_member_bitfield_size(t, member)) { pr_warn("bit field member %s in struct %s is not supported\n", mname, st_ops->name); break; @@ -296,7 +293,7 @@ static int check_zero_holes(const struct btf_type *t, void *data) const struct btf_type *mtype; for_each_member(i, t, member) { - moff = __btf_member_bit_offset(t, member) / 8; + moff = btf_member_bit_offset(t, member) / 8; if (moff > prev_mend && memchr_inv(data + prev_mend, 0, moff - prev_mend)) return -EINVAL; @@ -315,20 +312,6 @@ static int check_zero_holes(const struct btf_type *t, void *data) return 0; } -int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, - struct bpf_prog *prog, - const struct btf_func_model *model, - void *image, void *image_end) -{ - u32 flags; - - tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; - tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; - flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; - return arch_prepare_bpf_trampoline(NULL, image, image_end, - model, flags, tprogs, NULL); -} - static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { @@ -340,7 +323,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, struct bpf_tramp_progs *tprogs = NULL; void *udata, *kdata; int prog_fd, err = 0; - void *image, *image_end; + void *image; u32 i; if (flags) @@ -380,14 +363,14 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, udata = &uvalue->data; kdata = &kvalue->data; image = st_map->image; - image_end = st_map->image + PAGE_SIZE; for_each_member(i, t, member) { const struct btf_type *mtype, *ptype; struct bpf_prog *prog; u32 moff; + u32 flags; - moff = __btf_member_bit_offset(t, member) / 8; + moff = btf_member_bit_offset(t, member) / 8; ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); if (ptype == module_type) { if (*(void **)(udata + moff)) @@ -447,9 +430,14 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto reset_unlock; } - err = bpf_struct_ops_prepare_trampoline(tprogs, prog, - &st_ops->func_models[i], - image, image_end); + tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; + tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; + flags = st_ops->func_models[i].ret_size > 0 ? + BPF_TRAMP_F_RET_FENTRY_RET : 0; + err = arch_prepare_bpf_trampoline(NULL, image, + st_map->image + PAGE_SIZE, + &st_ops->func_models[i], + flags, tprogs, NULL); if (err < 0) goto reset_unlock; diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h index 5678a9ddf8..066d83ea1c 100644 --- a/kernel/bpf/bpf_struct_ops_types.h +++ b/kernel/bpf/bpf_struct_ops_types.h @@ -2,9 +2,6 @@ /* internal file - do not include directly */ #ifdef CONFIG_BPF_JIT -#ifdef CONFIG_NET -BPF_STRUCT_OPS_TYPE(bpf_dummy_ops) -#endif #ifdef CONFIG_INET #include BPF_STRUCT_OPS_TYPE(tcp_congestion_ops) diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 5da7bed0f5..ebfa8bc908 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -17,7 +17,6 @@ #include #include #include -#include DEFINE_BPF_STORAGE_CACHE(task_cache); @@ -60,8 +59,7 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map, struct bpf_local_storage *task_storage; struct bpf_local_storage_map *smap; - task_storage = - rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held()); + task_storage = rcu_dereference(task->bpf_storage); if (!task_storage) return NULL; @@ -231,7 +229,6 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, { struct bpf_local_storage_data *sdata; - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) return (unsigned long)NULL; @@ -263,7 +260,6 @@ BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, { int ret; - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; @@ -327,7 +323,7 @@ const struct bpf_func_proto bpf_task_storage_get_proto = { .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID, - .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .arg2_btf_id = &btf_task_struct_ids[0], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; @@ -338,5 +334,5 @@ const struct bpf_func_proto bpf_task_storage_delete_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID, - .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .arg2_btf_id = &btf_task_struct_ids[0], }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 3e23b3fa79..b8ed4da63b 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -25,7 +25,6 @@ #include #include #include -#include "../tools/lib/bpf/relo_core.h" /* BTF (BPF Type Format) is the meta data format which describes * the data types of BPF program/map. Hence, it basically focus @@ -282,8 +281,6 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", - [BTF_KIND_DECL_TAG] = "DECL_TAG", - [BTF_KIND_TYPE_TAG] = "TYPE_TAG", }; const char *btf_type_str(const struct btf_type *t) @@ -420,7 +417,6 @@ static bool btf_type_is_modifier(const struct btf_type *t) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: - case BTF_KIND_TYPE_TAG: return true; } @@ -463,17 +459,6 @@ static bool btf_type_is_datasec(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } -static bool btf_type_is_decl_tag(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; -} - -static bool btf_type_is_decl_tag_target(const struct btf_type *t) -{ - return btf_type_is_func(t) || btf_type_is_struct(t) || - btf_type_is_var(t) || btf_type_is_typedef(t); -} - u32 btf_nr_types(const struct btf *btf) { u32 total = 0; @@ -552,7 +537,6 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, static bool btf_type_is_resolve_source_only(const struct btf_type *t) { return btf_type_is_var(t) || - btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -579,7 +563,6 @@ static bool btf_type_needs_resolve(const struct btf_type *t) btf_type_is_struct(t) || btf_type_is_array(t) || btf_type_is_var(t) || - btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -633,11 +616,6 @@ static const struct btf_var *btf_type_var(const struct btf_type *t) return (const struct btf_var *)(t + 1); } -static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t) -{ - return (const struct btf_decl_tag *)(t + 1); -} - static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) { return kind_ops[BTF_INFO_KIND(t->info)]; @@ -837,7 +815,7 @@ static const char *btf_show_name(struct btf_show *show) const char *ptr_suffix = &ptr_suffixes[strlen(ptr_suffixes)]; const char *name = NULL, *prefix = "", *parens = ""; const struct btf_member *m = show->state.member; - const struct btf_type *t; + const struct btf_type *t = show->state.type; const struct btf_array *array; u32 id = show->state.type_id; const char *member = NULL; @@ -1740,7 +1718,6 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type, case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: - case BTF_KIND_TYPE_TAG: id = type->type; type = btf_type_by_id(btf, type->type); break; @@ -2349,8 +2326,6 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { - const char *value; - if (btf_type_vlen(t)) { btf_verifier_log_type(env, t, "vlen != 0"); return -EINVAL; @@ -2366,7 +2341,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, return -EINVAL; } - /* typedef/type_tag type must have a valid name, and other ref types, + /* typedef type must have a valid name, and other ref types, * volatile, const, restrict, should have a null name. */ if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) { @@ -2375,12 +2350,6 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } - } else if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG) { - value = btf_name_by_offset(env->btf, t->name_off); - if (!value || !value[0]) { - btf_verifier_log_type(env, t, "Invalid name"); - return -EINVAL; - } } else { if (t->name_off) { btf_verifier_log_type(env, t, "Invalid name"); @@ -2970,7 +2939,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, return -EINVAL; } - offset = __btf_member_bit_offset(t, member); + offset = btf_member_bit_offset(t, member); if (is_union && offset) { btf_verifier_log_member(env, t, member, "Invalid member bits_offset"); @@ -3095,7 +3064,7 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t if (off != -ENOENT) /* only one such field is allowed */ return -E2BIG; - off = __btf_member_bit_offset(t, member); + off = btf_member_bit_offset(t, member); if (off % 8) /* valid C code cannot generate such BTF */ return -EINVAL; @@ -3185,8 +3154,8 @@ static void __btf_struct_show(const struct btf *btf, const struct btf_type *t, btf_show_start_member(show, member); - member_offset = __btf_member_bit_offset(t, member); - bitfield_size = __btf_member_bitfield_size(t, member); + member_offset = btf_member_bit_offset(t, member); + bitfield_size = btf_member_bitfield_size(t, member); bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); bits8_offset = BITS_PER_BYTE_MASKED(member_offset); if (bitfield_size) { @@ -3832,110 +3801,6 @@ static const struct btf_kind_operations float_ops = { .show = btf_df_show, }; -static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env, - const struct btf_type *t, - u32 meta_left) -{ - const struct btf_decl_tag *tag; - u32 meta_needed = sizeof(*tag); - s32 component_idx; - const char *value; - - if (meta_left < meta_needed) { - btf_verifier_log_basic(env, t, - "meta_left:%u meta_needed:%u", - meta_left, meta_needed); - return -EINVAL; - } - - value = btf_name_by_offset(env->btf, t->name_off); - if (!value || !value[0]) { - btf_verifier_log_type(env, t, "Invalid value"); - return -EINVAL; - } - - if (btf_type_vlen(t)) { - btf_verifier_log_type(env, t, "vlen != 0"); - return -EINVAL; - } - - if (btf_type_kflag(t)) { - btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); - return -EINVAL; - } - - component_idx = btf_type_decl_tag(t)->component_idx; - if (component_idx < -1) { - btf_verifier_log_type(env, t, "Invalid component_idx"); - return -EINVAL; - } - - btf_verifier_log_type(env, t, NULL); - - return meta_needed; -} - -static int btf_decl_tag_resolve(struct btf_verifier_env *env, - const struct resolve_vertex *v) -{ - const struct btf_type *next_type; - const struct btf_type *t = v->t; - u32 next_type_id = t->type; - struct btf *btf = env->btf; - s32 component_idx; - u32 vlen; - - next_type = btf_type_by_id(btf, next_type_id); - if (!next_type || !btf_type_is_decl_tag_target(next_type)) { - btf_verifier_log_type(env, v->t, "Invalid type_id"); - return -EINVAL; - } - - if (!env_type_is_resolve_sink(env, next_type) && - !env_type_is_resolved(env, next_type_id)) - return env_stack_push(env, next_type, next_type_id); - - component_idx = btf_type_decl_tag(t)->component_idx; - if (component_idx != -1) { - if (btf_type_is_var(next_type) || btf_type_is_typedef(next_type)) { - btf_verifier_log_type(env, v->t, "Invalid component_idx"); - return -EINVAL; - } - - if (btf_type_is_struct(next_type)) { - vlen = btf_type_vlen(next_type); - } else { - /* next_type should be a function */ - next_type = btf_type_by_id(btf, next_type->type); - vlen = btf_type_vlen(next_type); - } - - if ((u32)component_idx >= vlen) { - btf_verifier_log_type(env, v->t, "Invalid component_idx"); - return -EINVAL; - } - } - - env_stack_pop_resolved(env, next_type_id, 0); - - return 0; -} - -static void btf_decl_tag_log(struct btf_verifier_env *env, const struct btf_type *t) -{ - btf_verifier_log(env, "type=%u component_idx=%d", t->type, - btf_type_decl_tag(t)->component_idx); -} - -static const struct btf_kind_operations decl_tag_ops = { - .check_meta = btf_decl_tag_check_meta, - .resolve = btf_decl_tag_resolve, - .check_member = btf_df_check_member, - .check_kflag_member = btf_df_check_kflag_member, - .log_details = btf_decl_tag_log, - .show = btf_df_show, -}; - static int btf_func_proto_check(struct btf_verifier_env *env, const struct btf_type *t) { @@ -4070,8 +3935,6 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_VAR] = &var_ops, [BTF_KIND_DATASEC] = &datasec_ops, [BTF_KIND_FLOAT] = &float_ops, - [BTF_KIND_DECL_TAG] = &decl_tag_ops, - [BTF_KIND_TYPE_TAG] = &modifier_ops, }; static s32 btf_check_meta(struct btf_verifier_env *env, @@ -4156,10 +4019,6 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, return !btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); - if (btf_type_is_decl_tag(t)) - return btf_resolved_type_id(btf, type_id) && - !btf_resolved_type_size(btf, type_id); - if (btf_type_is_modifier(t) || btf_type_is_ptr(t) || btf_type_is_var(t)) { t = btf_type_id_resolve(btf, &type_id); @@ -4826,7 +4685,7 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) return prog->aux->attach_btf; } -static bool is_int_ptr(struct btf *btf, const struct btf_type *t) +static bool is_string_ptr(struct btf *btf, const struct btf_type *t) { /* t comes in already as a pointer */ t = btf_type_by_id(btf, t->type); @@ -4835,7 +4694,8 @@ static bool is_int_ptr(struct btf *btf, const struct btf_type *t) if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST) t = btf_type_by_id(btf, t->type); - return btf_type_is_int(t); + /* char, signed char, unsigned char */ + return btf_type_is_int(t) && t->size == 1; } bool btf_ctx_access(int off, int size, enum bpf_access_type type, @@ -4940,12 +4800,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; - u32 type, flag; - type = base_type(ctx_arg_info->reg_type); - flag = type_flag(ctx_arg_info->reg_type); - if (ctx_arg_info->offset == off && type == PTR_TO_BUF && - (flag & PTR_MAYBE_NULL)) { + if (ctx_arg_info->offset == off && + (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL || + ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) { info->reg_type = ctx_arg_info->reg_type; return true; } @@ -4958,7 +4816,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, */ return true; - if (is_int_ptr(btf, t)) + if (is_string_ptr(btf, t)) return true; /* this is a pointer to another type */ @@ -5061,7 +4919,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, if (array_elem->nelems != 0) goto error; - moff = __btf_member_bit_offset(t, member) / 8; + moff = btf_member_bit_offset(t, member) / 8; if (off < moff) goto error; @@ -5084,14 +4942,14 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, for_each_member(i, t, member) { /* offset of the field in bytes */ - moff = __btf_member_bit_offset(t, member) / 8; + moff = btf_member_bit_offset(t, member) / 8; if (off + size <= moff) /* won't find anything, field is already too far */ break; - if (__btf_member_bitfield_size(t, member)) { - u32 end_bit = __btf_member_bit_offset(t, member) + - __btf_member_bitfield_size(t, member); + if (btf_member_bitfield_size(t, member)) { + u32 end_bit = btf_member_bit_offset(t, member) + + btf_member_bitfield_size(t, member); /* off <= moff instead of off == moff because clang * does not generate a BTF member for anonymous @@ -5576,53 +5434,12 @@ static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { #endif }; -/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */ -static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int rec) -{ - const struct btf_type *member_type; - const struct btf_member *member; - u32 i; - - if (!btf_type_is_struct(t)) - return false; - - for_each_member(i, t, member) { - const struct btf_array *array; - - member_type = btf_type_skip_modifiers(btf, member->type, NULL); - if (btf_type_is_struct(member_type)) { - if (rec >= 3) { - bpf_log(log, "max struct nesting depth exceeded\n"); - return false; - } - if (!__btf_type_is_scalar_struct(log, btf, member_type, rec + 1)) - return false; - continue; - } - if (btf_type_is_array(member_type)) { - array = btf_type_array(member_type); - if (!array->nelems) - return false; - member_type = btf_type_skip_modifiers(btf, array->type, NULL); - if (!btf_type_is_scalar(member_type)) - return false; - continue; - } - if (!btf_type_is_scalar(member_type)) - return false; - } - return true; -} - static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, bool ptr_to_mem_ok) { struct bpf_verifier_log *log = &env->log; - bool is_kfunc = btf_is_kernel(btf); const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; const struct btf_param *args; @@ -5675,21 +5492,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); ref_tname = btf_name_by_offset(btf, ref_t->name_off); - if (btf_get_prog_ctx_type(log, btf, t, - env->prog->type, i)) { - /* If function expects ctx type in BTF check that caller - * is passing PTR_TO_CTX. - */ - if (reg->type != PTR_TO_CTX) { - bpf_log(log, - "arg#%d expected pointer to ctx, but got %s\n", - i, btf_type_str(t)); - return -EINVAL; - } - if (check_ptr_off_reg(env, reg, regno)) - return -EINVAL; - } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || - (reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)))) { + if (btf_is_kernel(btf)) { const struct btf_type *reg_ref_t; const struct btf *reg_btf; const char *reg_ref_tname; @@ -5705,9 +5508,14 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (reg->type == PTR_TO_BTF_ID) { reg_btf = reg->btf; reg_ref_id = reg->btf_id; - } else { + } else if (reg2btf_ids[reg->type]) { reg_btf = btf_vmlinux; - reg_ref_id = *reg2btf_ids[base_type(reg->type)]; + reg_ref_id = *reg2btf_ids[reg->type]; + } else { + bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n", + func_name, i, + btf_type_str(ref_t), ref_tname, regno); + return -EINVAL; } reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, @@ -5723,24 +5531,23 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, reg_ref_tname); return -EINVAL; } + } else if (btf_get_prog_ctx_type(log, btf, t, + env->prog->type, i)) { + /* If function expects ctx type in BTF check that caller + * is passing PTR_TO_CTX. + */ + if (reg->type != PTR_TO_CTX) { + bpf_log(log, + "arg#%d expected pointer to ctx, but got %s\n", + i, btf_type_str(t)); + return -EINVAL; + } + if (check_ctx_reg(env, reg, regno)) + return -EINVAL; } else if (ptr_to_mem_ok) { const struct btf_type *resolve_ret; u32 type_size; - if (is_kfunc) { - /* Permit pointer to mem, but only when argument - * type is pointer to scalar, or struct composed - * (recursively) of scalars. - */ - if (!btf_type_is_scalar(ref_t) && - !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) { - bpf_log(log, - "arg#%d pointer type %s %s must point to scalar or struct with scalar\n", - i, btf_type_str(ref_t), ref_tname); - return -EINVAL; - } - } - resolve_ret = btf_resolve_size(btf, ref_t, &type_size); if (IS_ERR(resolve_ret)) { bpf_log(log, @@ -5753,8 +5560,6 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (check_mem_reg(env, reg, regno, type_size)) return -EINVAL; } else { - bpf_log(log, "reg type unsupported for arg#%d %sfunction %s#%d\n", i, - is_kfunc ? "kernel " : "", func_name, func_id); return -EINVAL; } } @@ -5804,7 +5609,7 @@ int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs) { - return btf_check_func_arg_match(env, btf, func_id, regs, true); + return btf_check_func_arg_match(env, btf, func_id, regs, false); } /* Convert BTF of a function into bpf_reg_state if possible @@ -5912,7 +5717,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, return -EINVAL; } - reg->type = PTR_TO_MEM | PTR_MAYBE_NULL; + reg->type = PTR_TO_MEM_OR_NULL; reg->id = ++env->id_gen; continue; @@ -6223,8 +6028,6 @@ btf_module_read(struct file *file, struct kobject *kobj, return len; } -static void purge_cand_cache(struct btf *btf); - static int btf_module_notify(struct notifier_block *nb, unsigned long op, void *module) { @@ -6259,7 +6062,6 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, goto out; } - purge_cand_cache(NULL); mutex_lock(&btf_module_mutex); btf_mod->module = module; btf_mod->btf = btf; @@ -6302,7 +6104,6 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, list_del(&btf_mod->list); if (btf_mod->sysfs_attr) sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr); - purge_cand_cache(btf_mod->btf); btf_put(btf_mod->btf); kfree(btf_mod->sysfs_attr); kfree(btf_mod); @@ -6406,442 +6207,10 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = { .func = bpf_btf_find_by_name_kind, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg1_type = ARG_PTR_TO_MEM, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_ANYTHING, }; -BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE) -#define BTF_TRACING_TYPE(name, type) BTF_ID(struct, type) -BTF_TRACING_TYPE_xxx -#undef BTF_TRACING_TYPE - -/* BTF ID set registration API for modules */ - -#ifdef CONFIG_DEBUG_INFO_BTF_MODULES - -void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) -{ - mutex_lock(&l->mutex); - list_add(&s->list, &l->list); - mutex_unlock(&l->mutex); -} -EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set); - -void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) -{ - mutex_lock(&l->mutex); - list_del_init(&s->list); - mutex_unlock(&l->mutex); -} -EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set); - -bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, - struct module *owner) -{ - struct kfunc_btf_id_set *s; - - mutex_lock(&klist->mutex); - list_for_each_entry(s, &klist->list, list) { - if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) { - mutex_unlock(&klist->mutex); - return true; - } - } - mutex_unlock(&klist->mutex); - return false; -} - -#define DEFINE_KFUNC_BTF_ID_LIST(name) \ - struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ - __MUTEX_INITIALIZER(name.mutex) }; \ - EXPORT_SYMBOL_GPL(name) - -DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list); -DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list); - -#endif - -int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, - const struct btf *targ_btf, __u32 targ_id) -{ - return -EOPNOTSUPP; -} - -static bool bpf_core_is_flavor_sep(const char *s) -{ - /* check X___Y name pattern, where X and Y are not underscores */ - return s[0] != '_' && /* X */ - s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ - s[4] != '_'; /* Y */ -} - -size_t bpf_core_essential_name_len(const char *name) -{ - size_t n = strlen(name); - int i; - - for (i = n - 5; i >= 0; i--) { - if (bpf_core_is_flavor_sep(name + i)) - return i + 1; - } - return n; -} - -struct bpf_cand_cache { - const char *name; - u32 name_len; - u16 kind; - u16 cnt; - struct { - const struct btf *btf; - u32 id; - } cands[]; -}; - -static void bpf_free_cands(struct bpf_cand_cache *cands) -{ - if (!cands->cnt) - /* empty candidate array was allocated on stack */ - return; - kfree(cands); -} - -static void bpf_free_cands_from_cache(struct bpf_cand_cache *cands) -{ - kfree(cands->name); - kfree(cands); -} - -#define VMLINUX_CAND_CACHE_SIZE 31 -static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE]; - -#define MODULE_CAND_CACHE_SIZE 31 -static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE]; - -static DEFINE_MUTEX(cand_cache_mutex); - -static void __print_cand_cache(struct bpf_verifier_log *log, - struct bpf_cand_cache **cache, - int cache_size) -{ - struct bpf_cand_cache *cc; - int i, j; - - for (i = 0; i < cache_size; i++) { - cc = cache[i]; - if (!cc) - continue; - bpf_log(log, "[%d]%s(", i, cc->name); - for (j = 0; j < cc->cnt; j++) { - bpf_log(log, "%d", cc->cands[j].id); - if (j < cc->cnt - 1) - bpf_log(log, " "); - } - bpf_log(log, "), "); - } -} - -static void print_cand_cache(struct bpf_verifier_log *log) -{ - mutex_lock(&cand_cache_mutex); - bpf_log(log, "vmlinux_cand_cache:"); - __print_cand_cache(log, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); - bpf_log(log, "\nmodule_cand_cache:"); - __print_cand_cache(log, module_cand_cache, MODULE_CAND_CACHE_SIZE); - bpf_log(log, "\n"); - mutex_unlock(&cand_cache_mutex); -} - -static u32 hash_cands(struct bpf_cand_cache *cands) -{ - return jhash(cands->name, cands->name_len, 0); -} - -static struct bpf_cand_cache *check_cand_cache(struct bpf_cand_cache *cands, - struct bpf_cand_cache **cache, - int cache_size) -{ - struct bpf_cand_cache *cc = cache[hash_cands(cands) % cache_size]; - - if (cc && cc->name_len == cands->name_len && - !strncmp(cc->name, cands->name, cands->name_len)) - return cc; - return NULL; -} - -static size_t sizeof_cands(int cnt) -{ - return offsetof(struct bpf_cand_cache, cands[cnt]); -} - -static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands, - struct bpf_cand_cache **cache, - int cache_size) -{ - struct bpf_cand_cache **cc = &cache[hash_cands(cands) % cache_size], *new_cands; - - if (*cc) { - bpf_free_cands_from_cache(*cc); - *cc = NULL; - } - new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL); - if (!new_cands) { - bpf_free_cands(cands); - return ERR_PTR(-ENOMEM); - } - /* strdup the name, since it will stay in cache. - * the cands->name points to strings in prog's BTF and the prog can be unloaded. - */ - new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL); - bpf_free_cands(cands); - if (!new_cands->name) { - kfree(new_cands); - return ERR_PTR(-ENOMEM); - } - *cc = new_cands; - return new_cands; -} - -#ifdef CONFIG_DEBUG_INFO_BTF_MODULES -static void __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache, - int cache_size) -{ - struct bpf_cand_cache *cc; - int i, j; - - for (i = 0; i < cache_size; i++) { - cc = cache[i]; - if (!cc) - continue; - if (!btf) { - /* when new module is loaded purge all of module_cand_cache, - * since new module might have candidates with the name - * that matches cached cands. - */ - bpf_free_cands_from_cache(cc); - cache[i] = NULL; - continue; - } - /* when module is unloaded purge cache entries - * that match module's btf - */ - for (j = 0; j < cc->cnt; j++) - if (cc->cands[j].btf == btf) { - bpf_free_cands_from_cache(cc); - cache[i] = NULL; - break; - } - } - -} - -static void purge_cand_cache(struct btf *btf) -{ - mutex_lock(&cand_cache_mutex); - __purge_cand_cache(btf, module_cand_cache, MODULE_CAND_CACHE_SIZE); - mutex_unlock(&cand_cache_mutex); -} -#endif - -static struct bpf_cand_cache * -bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf, - int targ_start_id) -{ - struct bpf_cand_cache *new_cands; - const struct btf_type *t; - const char *targ_name; - size_t targ_essent_len; - int n, i; - - n = btf_nr_types(targ_btf); - for (i = targ_start_id; i < n; i++) { - t = btf_type_by_id(targ_btf, i); - if (btf_kind(t) != cands->kind) - continue; - - targ_name = btf_name_by_offset(targ_btf, t->name_off); - if (!targ_name) - continue; - - /* the resched point is before strncmp to make sure that search - * for non-existing name will have a chance to schedule(). - */ - cond_resched(); - - if (strncmp(cands->name, targ_name, cands->name_len) != 0) - continue; - - targ_essent_len = bpf_core_essential_name_len(targ_name); - if (targ_essent_len != cands->name_len) - continue; - - /* most of the time there is only one candidate for a given kind+name pair */ - new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL); - if (!new_cands) { - bpf_free_cands(cands); - return ERR_PTR(-ENOMEM); - } - - memcpy(new_cands, cands, sizeof_cands(cands->cnt)); - bpf_free_cands(cands); - cands = new_cands; - cands->cands[cands->cnt].btf = targ_btf; - cands->cands[cands->cnt].id = i; - cands->cnt++; - } - return cands; -} - -static struct bpf_cand_cache * -bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id) -{ - struct bpf_cand_cache *cands, *cc, local_cand = {}; - const struct btf *local_btf = ctx->btf; - const struct btf_type *local_type; - const struct btf *main_btf; - size_t local_essent_len; - struct btf *mod_btf; - const char *name; - int id; - - main_btf = bpf_get_btf_vmlinux(); - if (IS_ERR(main_btf)) - return ERR_CAST(main_btf); - - local_type = btf_type_by_id(local_btf, local_type_id); - if (!local_type) - return ERR_PTR(-EINVAL); - - name = btf_name_by_offset(local_btf, local_type->name_off); - if (str_is_empty(name)) - return ERR_PTR(-EINVAL); - local_essent_len = bpf_core_essential_name_len(name); - - cands = &local_cand; - cands->name = name; - cands->kind = btf_kind(local_type); - cands->name_len = local_essent_len; - - cc = check_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); - /* cands is a pointer to stack here */ - if (cc) { - if (cc->cnt) - return cc; - goto check_modules; - } - - /* Attempt to find target candidates in vmlinux BTF first */ - cands = bpf_core_add_cands(cands, main_btf, 1); - if (IS_ERR(cands)) - return ERR_CAST(cands); - - /* cands is a pointer to kmalloced memory here if cands->cnt > 0 */ - - /* populate cache even when cands->cnt == 0 */ - cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); - if (IS_ERR(cc)) - return ERR_CAST(cc); - - /* if vmlinux BTF has any candidate, don't go for module BTFs */ - if (cc->cnt) - return cc; - -check_modules: - /* cands is a pointer to stack here and cands->cnt == 0 */ - cc = check_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE); - if (cc) - /* if cache has it return it even if cc->cnt == 0 */ - return cc; - - /* If candidate is not found in vmlinux's BTF then search in module's BTFs */ - spin_lock_bh(&btf_idr_lock); - idr_for_each_entry(&btf_idr, mod_btf, id) { - if (!btf_is_module(mod_btf)) - continue; - /* linear search could be slow hence unlock/lock - * the IDR to avoiding holding it for too long - */ - btf_get(mod_btf); - spin_unlock_bh(&btf_idr_lock); - cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf)); - if (IS_ERR(cands)) { - btf_put(mod_btf); - return ERR_CAST(cands); - } - spin_lock_bh(&btf_idr_lock); - btf_put(mod_btf); - } - spin_unlock_bh(&btf_idr_lock); - /* cands is a pointer to kmalloced memory here if cands->cnt > 0 - * or pointer to stack if cands->cnd == 0. - * Copy it into the cache even when cands->cnt == 0 and - * return the result. - */ - return populate_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE); -} - -int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, - int relo_idx, void *insn) -{ - bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL; - struct bpf_core_cand_list cands = {}; - struct bpf_core_spec *specs; - int err; - - /* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5" - * into arrays of btf_ids of struct fields and array indices. - */ - specs = kcalloc(3, sizeof(*specs), GFP_KERNEL); - if (!specs) - return -ENOMEM; - - if (need_cands) { - struct bpf_cand_cache *cc; - int i; - - mutex_lock(&cand_cache_mutex); - cc = bpf_core_find_cands(ctx, relo->type_id); - if (IS_ERR(cc)) { - bpf_log(ctx->log, "target candidate search failed for %d\n", - relo->type_id); - err = PTR_ERR(cc); - goto out; - } - if (cc->cnt) { - cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL); - if (!cands.cands) { - err = -ENOMEM; - goto out; - } - } - for (i = 0; i < cc->cnt; i++) { - bpf_log(ctx->log, - "CO-RE relocating %s %s: found target candidate [%d]\n", - btf_kind_str[cc->kind], cc->name, cc->cands[i].id); - cands.cands[i].btf = cc->cands[i].btf; - cands.cands[i].id = cc->cands[i].id; - } - cands.len = cc->cnt; - /* cand_cache_mutex needs to span the cache lookup and - * copy of btf pointer into bpf_core_cand_list, - * since module can be unloaded while bpf_core_apply_relo_insn - * is working with module's btf. - */ - } - - err = bpf_core_apply_relo_insn((void *)ctx->log, insn, relo->insn_off / 8, - relo, relo_idx, ctx->btf, &cands, specs); -out: - kfree(specs); - if (need_cands) { - kfree(cands.cands); - mutex_unlock(&cand_cache_mutex); - if (ctx->log->level & BPF_LOG_LEVEL2) - print_cand_cache(ctx->log); - } - return err; -} +BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 514b4681a9..7dbd68195a 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -430,10 +430,10 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs, * Exactly one of @prog or @link can be non-null. * Must be called with cgroup_mutex held. */ -static int __cgroup_bpf_attach(struct cgroup *cgrp, - struct bpf_prog *prog, struct bpf_prog *replace_prog, - struct bpf_cgroup_link *link, - enum bpf_attach_type type, u32 flags) +int __cgroup_bpf_attach(struct cgroup *cgrp, + struct bpf_prog *prog, struct bpf_prog *replace_prog, + struct bpf_cgroup_link *link, + enum bpf_attach_type type, u32 flags) { u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); struct bpf_prog *old_prog = NULL; @@ -523,20 +523,6 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, return err; } -static int cgroup_bpf_attach(struct cgroup *cgrp, - struct bpf_prog *prog, struct bpf_prog *replace_prog, - struct bpf_cgroup_link *link, - enum bpf_attach_type type, - u32 flags) -{ - int ret; - - mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags); - mutex_unlock(&cgroup_mutex); - return ret; -} - /* Swap updated BPF program for given link in effective program arrays across * all descendant cgroups. This function is guaranteed to succeed. */ @@ -686,14 +672,14 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs, * propagate the change to descendants * @cgrp: The cgroup which descendants to traverse * @prog: A program to detach or NULL - * @link: A link to detach or NULL + * @prog: A link to detach or NULL * @type: Type of detach operation * * At most one of @prog or @link can be non-NULL. * Must be called with cgroup_mutex held. */ -static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_cgroup_link *link, enum bpf_attach_type type) +int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_cgroup_link *link, enum bpf_attach_type type) { enum cgroup_bpf_attach_type atype; struct bpf_prog *old_prog; @@ -744,20 +730,9 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, return err; } -static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type) -{ - int ret; - - mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_detach(cgrp, prog, NULL, type); - mutex_unlock(&cgroup_mutex); - return ret; -} - /* Must be called with cgroup_mutex held to avoid races. */ -static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, - union bpf_attr __user *uattr) +int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr) { __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); enum bpf_attach_type type = attr->query.attach_type; @@ -814,17 +789,6 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, return ret; } -static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, - union bpf_attr __user *uattr) -{ - int ret; - - mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_query(cgrp, attr, uattr); - mutex_unlock(&cgroup_mutex); - return ret; -} - int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog) { @@ -1789,7 +1753,7 @@ static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index de3e5bc678..6e3ae90ad1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include @@ -390,13 +389,6 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, i = end_new; insn = prog->insnsi + end_old; } - if (bpf_pseudo_func(insn)) { - ret = bpf_adj_delta_to_imm(insn, pos, end_old, - end_new, i, probe_pass); - if (ret) - return ret; - continue; - } code = insn->code; if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || @@ -1574,8 +1566,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) if (unlikely(index >= array->map.max_entries)) goto out; - - if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT)) + if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) goto out; tail_call_cnt++; @@ -1892,7 +1883,7 @@ static void bpf_prog_select_func(struct bpf_prog *fp) /** * bpf_prog_select_runtime - select exec runtime for BPF program - * @fp: bpf_prog populated with BPF program + * @fp: bpf_prog populated with internal BPF program * @err: pointer to error variable * * Try to JIT eBPF program, if JIT is not available, use interpreter. @@ -2272,9 +2263,6 @@ static void bpf_prog_free_deferred(struct work_struct *work) int i; aux = container_of(work, struct bpf_prog_aux, work); -#ifdef CONFIG_BPF_SYSCALL - bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab); -#endif bpf_free_used_maps(aux); bpf_free_used_btfs(aux); if (bpf_prog_is_dev_bound(aux)) @@ -2301,6 +2289,7 @@ static void bpf_prog_free_deferred(struct work_struct *work) } } +/* Free internal BPF program */ void bpf_prog_free(struct bpf_prog *fp) { struct bpf_prog_aux *aux = fp->aux; @@ -2376,11 +2365,6 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) return NULL; } -const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void) -{ - return NULL; -} - u64 __weak bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index b3e6b94222..585b2b77cc 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -195,7 +195,7 @@ static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, } return; default: - bpf_warn_invalid_xdp_action(NULL, rcpu->prog, act); + bpf_warn_invalid_xdp_action(act); fallthrough; case XDP_ABORTED: trace_xdp_exception(skb->dev, rcpu->prog, act); @@ -254,7 +254,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, } break; default: - bpf_warn_invalid_xdp_action(NULL, rcpu->prog, act); + bpf_warn_invalid_xdp_action(act); fallthrough; case XDP_DROP: xdp_return_frame(xdpf); @@ -746,9 +746,15 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) list_add(&bq->flush_node, flush_list); } -int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, +int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx) { + struct xdp_frame *xdpf; + + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) + return -EOVERFLOW; + /* Info needed when constructing SKB on remote CPU */ xdpf->dev_rx = dev_rx; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index fe019dbdb3..f02d04540c 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -348,7 +348,7 @@ static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, frames[nframes++] = xdpf; break; default: - bpf_warn_invalid_xdp_action(NULL, xdp_prog, act); + bpf_warn_invalid_xdp_action(act); fallthrough; case XDP_ABORTED: trace_xdp_exception(dev, xdp_prog, act); @@ -467,19 +467,24 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, bq->q[bq->count++] = xdpf; } -static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, +static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx, struct bpf_prog *xdp_prog) { + struct xdp_frame *xdpf; int err; if (!dev->netdev_ops->ndo_xdp_xmit) return -EOPNOTSUPP; - err = xdp_ok_fwd_dev(dev, xdpf->len); + err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); if (unlikely(err)) return err; + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) + return -EOVERFLOW; + bq_enqueue(dev, xdpf, dev_rx, xdp_prog); return 0; } @@ -502,7 +507,7 @@ static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev __skb_push(skb, skb->mac_len); break; default: - bpf_warn_invalid_xdp_action(NULL, dst->xdp_prog, act); + bpf_warn_invalid_xdp_action(act); fallthrough; case XDP_ABORTED: trace_xdp_exception(dst->dev, dst->xdp_prog, act); @@ -515,27 +520,27 @@ static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev return act; } -int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, +int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx) { - return __xdp_enqueue(dev, xdpf, dev_rx, NULL); + return __xdp_enqueue(dev, xdp, dev_rx, NULL); } -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx) { struct net_device *dev = dst->dev; - return __xdp_enqueue(dev, xdpf, dev_rx, dst->xdp_prog); + return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog); } -static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf) +static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp) { if (!obj || !obj->dev->netdev_ops->ndo_xdp_xmit) return false; - if (xdp_ok_fwd_dev(obj->dev, xdpf->len)) + if (xdp_ok_fwd_dev(obj->dev, xdp->data_end - xdp->data)) return false; return true; @@ -581,13 +586,14 @@ static int get_upper_ifindexes(struct net_device *dev, int *indexes) return n; } -int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, +int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dst, *last_dst = NULL; int excluded_devices[1+MAX_NEST_DEV]; struct hlist_head *head; + struct xdp_frame *xdpf; int num_excluded = 0; unsigned int i; int err; @@ -597,11 +603,15 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, excluded_devices[num_excluded++] = dev_rx->ifindex; } + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) + return -EOVERFLOW; + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { for (i = 0; i < map->max_entries; i++) { dst = rcu_dereference_check(dtab->netdev_map[i], rcu_read_lock_bh_held()); - if (!is_valid_dst(dst, xdpf)) + if (!is_valid_dst(dst, xdp)) continue; if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) @@ -624,7 +634,7 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, head = dev_map_index_hash(dtab, i); hlist_for_each_entry_rcu(dst, head, index_hlist, lockdep_is_held(&dtab->index_lock)) { - if (!is_valid_dst(dst, xdpf)) + if (!is_valid_dst(dst, xdp)) continue; if (is_ifindex_excluded(excluded_devices, num_excluded, diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index d29af9988f..32471ba027 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -668,7 +668,7 @@ static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, (void *(*)(struct bpf_map *map, void *key))NULL)); - *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); + *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem)); *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, offsetof(struct htab_elem, key) + @@ -709,7 +709,7 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map, BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, (void *(*)(struct bpf_map *map, void *key))NULL)); - *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); + *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem)); *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret, offsetof(struct htab_elem, lru_node) + @@ -2049,7 +2049,7 @@ static const struct bpf_iter_seq_info iter_seq_info = { .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), }; -static int bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, +static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); @@ -2089,8 +2089,9 @@ static int bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_f val = elem->key + roundup_key_size; } num_elems++; - ret = callback_fn((u64)(long)map, (u64)(long)key, - (u64)(long)val, (u64)(long)callback_ctx, 0); + ret = BPF_CAST_CALL(callback_fn)((u64)(long)map, + (u64)(long)key, (u64)(long)val, + (u64)(long)callback_ctx, 0); /* return value: 0 - continue, 1 - stop and return */ if (ret) { rcu_read_unlock(); @@ -2396,7 +2397,7 @@ static int htab_of_map_gen_lookup(struct bpf_map *map, BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, (void *(*)(struct bpf_map *map, void *key))NULL)); - *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); + *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem)); *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, offsetof(struct htab_elem, key) + diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 55c084251f..6f600cc95c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2,8 +2,6 @@ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com */ #include -#include -#include #include #include #include @@ -532,7 +530,7 @@ const struct bpf_func_proto bpf_strtol_proto = { .func = bpf_strtol, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg1_type = ARG_PTR_TO_MEM, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_LONG, @@ -560,27 +558,13 @@ const struct bpf_func_proto bpf_strtoul_proto = { .func = bpf_strtoul, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg1_type = ARG_PTR_TO_MEM, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_LONG, }; #endif -BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) -{ - return strncmp(s1, s2, s1_sz); -} - -const struct bpf_func_proto bpf_strncmp_proto = { - .func = bpf_strncmp, - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM, - .arg2_type = ARG_CONST_SIZE, - .arg3_type = ARG_PTR_TO_CONST_STR, -}; - BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, struct bpf_pidns_info *, nsdata, u32, size) { @@ -646,7 +630,7 @@ const struct bpf_func_proto bpf_event_output_data_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; @@ -683,7 +667,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) const struct bpf_func_proto bpf_per_cpu_ptr_proto = { .func = bpf_per_cpu_ptr, .gpl_only = false, - .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY, + .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, .arg2_type = ARG_ANYTHING, }; @@ -696,7 +680,7 @@ BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) const struct bpf_func_proto bpf_this_cpu_ptr_proto = { .func = bpf_this_cpu_ptr, .gpl_only = false, - .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY, + .ret_type = RET_PTR_TO_MEM_OR_BTF_ID, .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, }; @@ -995,13 +979,15 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, return err; } +#define MAX_SNPRINTF_VARARGS 12 + BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, const void *, data, u32, data_len) { int err, num_args; u32 *bin_args; - if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 || + if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 || (data_len && !data)) return -EINVAL; num_args = data_len / 8; @@ -1027,7 +1013,7 @@ const struct bpf_func_proto bpf_snprintf_proto = { .arg1_type = ARG_PTR_TO_MEM_OR_NULL, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_PTR_TO_CONST_STR, - .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM_OR_NULL, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; @@ -1072,11 +1058,10 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer) struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer); struct bpf_map *map = t->map; void *value = t->value; - bpf_callback_t callback_fn; + void *callback_fn; void *key; u32 idx; - BTF_TYPE_EMIT(struct bpf_timer); callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held()); if (!callback_fn) goto out; @@ -1098,7 +1083,8 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer) key = value - round_up(map->key_size, 8); } - callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0); + BPF_CAST_CALL(callback_fn)((u64)(long)map, (u64)(long)key, + (u64)(long)value, 0, 0); /* The verifier checked that return value is zero. */ this_cpu_write(hrtimer_running, NULL); @@ -1393,10 +1379,6 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ringbuf_query_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; - case BPF_FUNC_loop: - return &bpf_loop_proto; - case BPF_FUNC_strncmp: - return &bpf_strncmp_proto; default: break; } @@ -1453,8 +1435,6 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_snprintf_proto; case BPF_FUNC_task_pt_regs: return &bpf_task_pt_regs_proto; - case BPF_FUNC_trace_vprintk: - return bpf_get_trace_vprintk_proto(); default: return NULL; } diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 23f7f9d08a..035e9e3a71 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -163,7 +163,8 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key, return 0; } - new = bpf_map_kmalloc_node(map, struct_size(new, data, map->value_size), + new = bpf_map_kmalloc_node(map, sizeof(struct bpf_storage_buffer) + + map->value_size, __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, map->numa_node); if (!new) diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index b0fa190b09..6a9542af42 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -174,9 +174,9 @@ static const struct bpf_iter_reg bpf_map_elem_reg_info = { .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_map_elem, key), - PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY }, + PTR_TO_RDONLY_BUF_OR_NULL }, { offsetof(struct bpf_iter__bpf_map_elem, value), - PTR_TO_BUF | PTR_MAYBE_NULL }, + PTR_TO_RDWR_BUF_OR_NULL }, }, }; diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index 868cc2c438..542f275bf2 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include #include #include diff --git a/kernel/bpf/preload/.gitignore b/kernel/bpf/preload/.gitignore index 9452322902..856a4c5ad0 100644 --- a/kernel/bpf/preload/.gitignore +++ b/kernel/bpf/preload/.gitignore @@ -1,2 +1,4 @@ -/libbpf +/FEATURE-DUMP.libbpf +/bpf_helper_defs.h +/feature /bpf_preload_umd diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile index 1400ac5817..1951332dd1 100644 --- a/kernel/bpf/preload/Makefile +++ b/kernel/bpf/preload/Makefile @@ -1,35 +1,21 @@ # SPDX-License-Identifier: GPL-2.0 LIBBPF_SRCS = $(srctree)/tools/lib/bpf/ -LIBBPF_OUT = $(abspath $(obj))/libbpf -LIBBPF_A = $(LIBBPF_OUT)/libbpf.a -LIBBPF_DESTDIR = $(LIBBPF_OUT) -LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include +LIBBPF_A = $(obj)/libbpf.a +LIBBPF_OUT = $(abspath $(obj)) # Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test # in tools/scripts/Makefile.include always succeeds when building the kernel # with $(O) pointing to a relative path, as in "make O=build bindeb-pkg". -$(LIBBPF_A): | $(LIBBPF_OUT) - $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ \ - DESTDIR=$(LIBBPF_DESTDIR) prefix= \ - $(LIBBPF_OUT)/libbpf.a install_headers - -libbpf_hdrs: $(LIBBPF_A) - -.PHONY: libbpf_hdrs - -$(LIBBPF_OUT): - $(call msg,MKDIR,$@) - $(Q)mkdir -p $@ +$(LIBBPF_A): + $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \ - -I $(LIBBPF_INCLUDE) -Wno-unused-result + -I $(srctree)/tools/lib/ -Wno-unused-result userprogs := bpf_preload_umd -clean-files := libbpf/ - -$(obj)/iterators/iterators.o: | libbpf_hdrs +clean-files := $(userprogs) bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/ bpf_preload_umd-objs := iterators/iterators.o bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile index b8bd605112..28fa8c1440 100644 --- a/kernel/bpf/preload/iterators/Makefile +++ b/kernel/bpf/preload/iterators/Makefile @@ -1,26 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 OUTPUT := .output -abs_out := $(abspath $(OUTPUT)) - CLANG ?= clang LLC ?= llc LLVM_STRIP ?= llvm-strip - -TOOLS_PATH := $(abspath ../../../../tools) -BPFTOOL_SRC := $(TOOLS_PATH)/bpf/bpftool -BPFTOOL_OUTPUT := $(abs_out)/bpftool DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) - -LIBBPF_SRC := $(TOOLS_PATH)/lib/bpf -LIBBPF_OUTPUT := $(abs_out)/libbpf -LIBBPF_DESTDIR := $(LIBBPF_OUTPUT) -LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include -BPFOBJ := $(LIBBPF_OUTPUT)/libbpf.a - -INCLUDES := -I$(OUTPUT) -I$(LIBBPF_INCLUDE) -I$(TOOLS_PATH)/include/uapi +LIBBPF_SRC := $(abspath ../../../../tools/lib/bpf) +BPFOBJ := $(OUTPUT)/libbpf.a +BPF_INCLUDE := $(OUTPUT) +INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../../../tools/lib) \ + -I$(abspath ../../../../tools/include/uapi) CFLAGS := -g -Wall +abs_out := $(abspath $(OUTPUT)) ifeq ($(V),1) Q = msg = @@ -52,18 +44,14 @@ $(OUTPUT)/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT) -c $(filter %.c,$^) -o $@ && \ $(LLVM_STRIP) -g $@ -$(OUTPUT) $(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT): +$(OUTPUT): $(call msg,MKDIR,$@) - $(Q)mkdir -p $@ + $(Q)mkdir -p $(OUTPUT) -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ - OUTPUT=$(abspath $(dir $@))/ prefix= \ - DESTDIR=$(LIBBPF_DESTDIR) $(abspath $@) install_headers + OUTPUT=$(abspath $(dir $@))/ $(abspath $@) -$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) \ - OUTPUT=$(BPFTOOL_OUTPUT)/ \ - LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \ - LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ \ - prefix= DESTDIR=$(abs_out)/ install-bin +$(DEFAULT_BPFTOOL): + $(Q)$(MAKE) $(submake_extras) -C ../../../../tools/bpf/bpftool \ + prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 556a769b5b..93a5539179 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -152,12 +152,16 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); struct reuseport_array *array; + u64 array_size; if (!bpf_capable()) return ERR_PTR(-EPERM); + array_size = sizeof(*array); + array_size += (u64)attr->max_entries * sizeof(struct sock *); + /* allocate all map elements and zero-initialize them */ - array = bpf_map_area_alloc(struct_size(array, ptrs, attr->max_entries), numa_node); + array = bpf_map_area_alloc(array_size, numa_node); if (!array) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 710ba9de12..f1c51c4566 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -444,7 +444,7 @@ const struct bpf_func_proto bpf_ringbuf_output_proto = { .func = bpf_ringbuf_output, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 22c8ae94e4..0dcaed4d3f 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -7,10 +7,10 @@ #include #include #include +#include #include #include #include "percpu_freelist.h" -#include "mmap_unlock_work.h" #define STACK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ @@ -31,6 +31,25 @@ struct bpf_stack_map { struct stack_map_bucket *buckets[]; }; +/* irq_work to run up_read() for build_id lookup in nmi context */ +struct stack_map_irq_work { + struct irq_work irq_work; + struct mm_struct *mm; +}; + +static void do_up_read(struct irq_work *entry) +{ + struct stack_map_irq_work *work; + + if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) + return; + + work = container_of(entry, struct stack_map_irq_work, irq_work); + mmap_read_unlock_non_owner(work->mm); +} + +static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work); + static inline bool stack_map_use_build_id(struct bpf_map *map) { return (map->map_flags & BPF_F_STACK_BUILD_ID); @@ -130,13 +149,35 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, u64 *ips, u32 trace_nr, bool user) { int i; - struct mmap_unlock_irq_work *work = NULL; - bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); struct vm_area_struct *vma; + bool irq_work_busy = false; + struct stack_map_irq_work *work = NULL; - /* If the irq_work is in use, fall back to report ips. Same - * fallback is used for kernel stack (!user) on a stackmap with - * build_id. + if (irqs_disabled()) { + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { + work = this_cpu_ptr(&up_read_work); + if (irq_work_is_busy(&work->irq_work)) { + /* cannot queue more up_read, fallback */ + irq_work_busy = true; + } + } else { + /* + * PREEMPT_RT does not allow to trylock mmap sem in + * interrupt disabled context. Force the fallback code. + */ + irq_work_busy = true; + } + } + + /* + * We cannot do up_read() when the irq is disabled, because of + * risk to deadlock with rq_lock. To do build_id lookup when the + * irqs are disabled, we need to run up_read() in irq_work. We use + * a percpu variable to do the irq_work. If the irq_work is + * already used by another lookup, we fall back to report ips. + * + * Same fallback is used for kernel stack (!user) on a stackmap + * with build_id. */ if (!user || !current || !current->mm || irq_work_busy || !mmap_read_trylock(current->mm)) { @@ -162,7 +203,19 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, - vma->vm_start; id_offs[i].status = BPF_STACK_BUILD_ID_VALID; } - bpf_mmap_unlock_mm(work, current->mm); + + if (!work) { + mmap_read_unlock(current->mm); + } else { + work->mm = current->mm; + + /* The lock will be released once we're out of interrupt + * context. Tell lockdep that we've released it now so + * it doesn't complain that we forgot to release it. + */ + rwsem_release(¤t->mm->mmap_lock.dep_map, _RET_IP_); + irq_work_queue(&work->irq_work); + } } static struct perf_callchain_entry * @@ -490,7 +543,7 @@ const struct bpf_func_proto bpf_get_task_stack_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, - .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .arg1_btf_id = &btf_task_struct_ids[0], .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, @@ -667,3 +720,16 @@ const struct bpf_map_ops stack_trace_map_ops = { .map_btf_name = "bpf_stack_map", .map_btf_id = &stack_trace_map_btf_id, }; + +static int __init stack_map_init(void) +{ + int cpu; + struct stack_map_irq_work *work; + + for_each_possible_cpu(cpu) { + work = per_cpu_ptr(&up_read_work, cpu); + init_irq_work(&work->irq_work, do_up_read); + } + return 0; +} +subsys_initcall(stack_map_init); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ca70fe6fba..42490c39df 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2,7 +2,6 @@ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com */ #include -#include #include #include #include @@ -215,8 +214,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, err = bpf_fd_reuseport_array_update_elem(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK || - map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { + map->map_type == BPF_MAP_TYPE_STACK) { err = map->ops->map_push_elem(map, value, flags); } else { rcu_read_lock(); @@ -255,8 +253,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { err = bpf_fd_reuseport_array_lookup_elem(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK || - map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { + map->map_type == BPF_MAP_TYPE_STACK) { err = map->ops->map_peek_elem(map, value); } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { /* struct_ops map requires directly updating "value" */ @@ -366,7 +363,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) map->max_entries = attr->max_entries; map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); map->numa_node = bpf_map_attr_numa_node(attr); - map->map_extra = attr->map_extra; } static int bpf_map_alloc_id(struct bpf_map *map) @@ -574,7 +570,6 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "value_size:\t%u\n" "max_entries:\t%u\n" "map_flags:\t%#x\n" - "map_extra:\t%#llx\n" "memlock:\t%lu\n" "map_id:\t%u\n" "frozen:\t%u\n", @@ -583,7 +578,6 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->value_size, map->max_entries, map->map_flags, - (unsigned long long)map->map_extra, bpf_map_memory_footprint(map), map->id, READ_ONCE(map->frozen)); @@ -827,7 +821,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, return ret; } -#define BPF_MAP_CREATE_LAST_FIELD map_extra +#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -848,10 +842,6 @@ static int map_create(union bpf_attr *attr) return -EINVAL; } - if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && - attr->map_extra != 0) - return -EINVAL; - f_flags = bpf_get_file_flag(attr->map_flags); if (f_flags < 0) return f_flags; @@ -1101,14 +1091,6 @@ static int map_lookup_elem(union bpf_attr *attr) if (!value) goto free_key; - if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { - if (copy_from_user(value, uvalue, value_size)) - err = -EFAULT; - else - err = bpf_map_copy_value(map, key, value, attr->flags); - goto free_value; - } - err = bpf_map_copy_value(map, key, value, attr->flags); if (err) goto free_value; @@ -1892,8 +1874,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) "prog_id:\t%u\n" "run_time_ns:\t%llu\n" "run_cnt:\t%llu\n" - "recursion_misses:\t%llu\n" - "verified_insns:\t%u\n", + "recursion_misses:\t%llu\n", prog->type, prog->jited, prog_tag, @@ -1901,8 +1882,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) prog->aux->id, stats.nsecs, stats.cnt, - stats.misses, - prog->aux->verified_insns); + stats.misses); } #endif @@ -2202,7 +2182,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size +#define BPF_PROG_LOAD_LAST_FIELD fd_array static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) { @@ -3671,8 +3651,6 @@ static int bpf_prog_get_info_by_fd(struct file *file, info.run_cnt = stats.cnt; info.recursion_misses = stats.misses; - info.verified_insns = prog->aux->verified_insns; - if (!bpf_capable()) { info.jited_prog_len = 0; info.xlated_prog_len = 0; @@ -3919,7 +3897,6 @@ static int bpf_map_get_info_by_fd(struct file *file, info.value_size = map->value_size; info.max_entries = map->max_entries; info.map_flags = map->map_flags; - info.map_extra = map->map_extra; memcpy(info.name, map->name, sizeof(map->name)); if (map->btf) { @@ -4776,7 +4753,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, }; @@ -4803,31 +4780,6 @@ static const struct bpf_func_proto bpf_sys_close_proto = { .arg1_type = ARG_ANYTHING, }; -BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) -{ - if (flags) - return -EINVAL; - - if (name_sz <= 1 || name[name_sz - 1]) - return -EINVAL; - - if (!bpf_dump_raw_ok(current_cred())) - return -EPERM; - - *res = kallsyms_lookup_name(name); - return *res ? 0 : -ENOENT; -} - -const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { - .func = bpf_kallsyms_lookup_name, - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM, - .arg2_type = ARG_CONST_SIZE_OR_ZERO, - .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, -}; - static const struct bpf_func_proto * syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -4838,8 +4790,6 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_btf_find_by_name_kind_proto; case BPF_FUNC_sys_close: return &bpf_sys_close_proto; - case BPF_FUNC_kallsyms_lookup_name: - return &bpf_kallsyms_lookup_name_proto; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index d94696198e..b48750bfba 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -8,7 +8,6 @@ #include #include #include -#include "mmap_unlock_work.h" struct bpf_iter_seq_task_common { struct pid_namespace *ns; @@ -525,6 +524,10 @@ static const struct seq_operations task_vma_seq_ops = { .show = task_vma_seq_show, }; +BTF_ID_LIST(btf_task_file_ids) +BTF_ID(struct, file) +BTF_ID(struct, vm_area_struct) + static const struct bpf_iter_seq_info task_seq_info = { .seq_ops = &task_seq_ops, .init_seq_private = init_seq_pidns, @@ -583,88 +586,23 @@ static struct bpf_iter_reg task_vma_reg_info = { .seq_info = &task_vma_seq_info, }; -BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start, - bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags) -{ - struct mmap_unlock_irq_work *work = NULL; - struct vm_area_struct *vma; - bool irq_work_busy = false; - struct mm_struct *mm; - int ret = -ENOENT; - - if (flags) - return -EINVAL; - - if (!task) - return -ENOENT; - - mm = task->mm; - if (!mm) - return -ENOENT; - - irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); - - if (irq_work_busy || !mmap_read_trylock(mm)) - return -EBUSY; - - vma = find_vma(mm, start); - - if (vma && vma->vm_start <= start && vma->vm_end > start) { - callback_fn((u64)(long)task, (u64)(long)vma, - (u64)(long)callback_ctx, 0, 0); - ret = 0; - } - bpf_mmap_unlock_mm(work, mm); - return ret; -} - -const struct bpf_func_proto bpf_find_vma_proto = { - .func = bpf_find_vma, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_BTF_ID, - .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], - .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_FUNC, - .arg4_type = ARG_PTR_TO_STACK_OR_NULL, - .arg5_type = ARG_ANYTHING, -}; - -DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); - -static void do_mmap_read_unlock(struct irq_work *entry) -{ - struct mmap_unlock_irq_work *work; - - if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) - return; - - work = container_of(entry, struct mmap_unlock_irq_work, irq_work); - mmap_read_unlock_non_owner(work->mm); -} - static int __init task_iter_init(void) { - struct mmap_unlock_irq_work *work; - int ret, cpu; + int ret; - for_each_possible_cpu(cpu) { - work = per_cpu_ptr(&mmap_unlock_work, cpu); - init_irq_work(&work->irq_work, do_mmap_read_unlock); - } - - task_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; + task_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0]; ret = bpf_iter_reg_target(&task_reg_info); if (ret) return ret; - task_file_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; - task_file_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_FILE]; + task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0]; + task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[0]; ret = bpf_iter_reg_target(&task_file_reg_info); if (ret) return ret; - task_vma_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; - task_vma_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA]; + task_vma_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0]; + task_vma_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1]; return bpf_iter_reg_target(&task_vma_reg_info); } late_initcall(task_iter_init); diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 5e7edf9130..2660fbced9 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -10,7 +10,6 @@ #include #include #include -#include /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -27,14 +26,6 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; /* serializes access to trampoline_table */ static DEFINE_MUTEX(trampoline_mutex); -bool bpf_prog_has_trampoline(const struct bpf_prog *prog) -{ - enum bpf_attach_type eatype = prog->expected_attach_type; - - return eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || - eatype == BPF_MODIFY_RETURN; -} - void *bpf_jit_alloc_exec_page(void) { void *image; @@ -535,7 +526,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) } #define NO_START_TIME 1 -static __always_inline u64 notrace bpf_prog_start_time(void) +static u64 notrace bpf_prog_start_time(void) { u64 start = NO_START_TIME; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a39eedecc9..670721e39c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4,7 +4,6 @@ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io */ #include -#include #include #include #include @@ -241,6 +240,12 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) insn->src_reg == BPF_PSEUDO_KFUNC_CALL; } +static bool bpf_pseudo_func(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && + insn->src_reg == BPF_PSEUDO_FUNC; +} + struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; @@ -294,15 +299,13 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, "verifier log line truncated - local buffer too short\n"); - if (log->level == BPF_LOG_KERNEL) { - bool newline = n > 0 && log->kbuf[n - 1] == '\n'; - - pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n"); - return; - } - n = min(log->len_total - log->len_used - 1, n); log->kbuf[n] = '\0'; + + if (log->level == BPF_LOG_KERNEL) { + pr_err("BPF:%s\n", log->kbuf); + return; + } if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) log->len_used += n; else @@ -442,6 +445,18 @@ static bool reg_type_not_null(enum bpf_reg_type type) type == PTR_TO_SOCK_COMMON; } +static bool reg_type_may_be_null(enum bpf_reg_type type) +{ + return type == PTR_TO_MAP_VALUE_OR_NULL || + type == PTR_TO_SOCKET_OR_NULL || + type == PTR_TO_SOCK_COMMON_OR_NULL || + type == PTR_TO_TCP_SOCK_OR_NULL || + type == PTR_TO_BTF_ID_OR_NULL || + type == PTR_TO_MEM_OR_NULL || + type == PTR_TO_RDONLY_BUF_OR_NULL || + type == PTR_TO_RDWR_BUF_OR_NULL; +} + static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { return reg->type == PTR_TO_MAP_VALUE && @@ -450,14 +465,12 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) { - return base_type(type) == PTR_TO_SOCKET || - base_type(type) == PTR_TO_TCP_SOCK || - base_type(type) == PTR_TO_MEM; -} - -static bool type_is_rdonly_mem(u32 type) -{ - return type & MEM_RDONLY; + return type == PTR_TO_SOCKET || + type == PTR_TO_SOCKET_OR_NULL || + type == PTR_TO_TCP_SOCK || + type == PTR_TO_TCP_SOCK_OR_NULL || + type == PTR_TO_MEM || + type == PTR_TO_MEM_OR_NULL; } static bool arg_type_may_be_refcounted(enum bpf_arg_type type) @@ -465,9 +478,14 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type) return type == ARG_PTR_TO_SOCK_COMMON; } -static bool type_may_be_null(u32 type) +static bool arg_type_may_be_null(enum bpf_arg_type type) { - return type & PTR_MAYBE_NULL; + return type == ARG_PTR_TO_MAP_VALUE_OR_NULL || + type == ARG_PTR_TO_MEM_OR_NULL || + type == ARG_PTR_TO_CTX_OR_NULL || + type == ARG_PTR_TO_SOCKET_OR_NULL || + type == ARG_PTR_TO_ALLOC_MEM_OR_NULL || + type == ARG_PTR_TO_STACK_OR_NULL; } /* Determine whether the function releases some resources allocated by another @@ -527,56 +545,39 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) insn->imm == BPF_CMPXCHG; } -/* string representation of 'enum bpf_reg_type' - * - * Note that reg_type_str() can not appear more than once in a single verbose() - * statement. - */ -static const char *reg_type_str(struct bpf_verifier_env *env, - enum bpf_reg_type type) -{ - char postfix[16] = {0}, prefix[16] = {0}; - static const char * const str[] = { - [NOT_INIT] = "?", - [SCALAR_VALUE] = "inv", - [PTR_TO_CTX] = "ctx", - [CONST_PTR_TO_MAP] = "map_ptr", - [PTR_TO_MAP_VALUE] = "map_value", - [PTR_TO_STACK] = "fp", - [PTR_TO_PACKET] = "pkt", - [PTR_TO_PACKET_META] = "pkt_meta", - [PTR_TO_PACKET_END] = "pkt_end", - [PTR_TO_FLOW_KEYS] = "flow_keys", - [PTR_TO_SOCKET] = "sock", - [PTR_TO_SOCK_COMMON] = "sock_common", - [PTR_TO_TCP_SOCK] = "tcp_sock", - [PTR_TO_TP_BUFFER] = "tp_buffer", - [PTR_TO_XDP_SOCK] = "xdp_sock", - [PTR_TO_BTF_ID] = "ptr_", - [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", - [PTR_TO_MEM] = "mem", - [PTR_TO_BUF] = "buf", - [PTR_TO_FUNC] = "func", - [PTR_TO_MAP_KEY] = "map_key", - }; - - if (type & PTR_MAYBE_NULL) { - if (base_type(type) == PTR_TO_BTF_ID || - base_type(type) == PTR_TO_PERCPU_BTF_ID) - strncpy(postfix, "or_null_", 16); - else - strncpy(postfix, "_or_null", 16); - } - - if (type & MEM_RDONLY) - strncpy(prefix, "rdonly_", 16); - if (type & MEM_ALLOC) - strncpy(prefix, "alloc_", 16); - - snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", - prefix, str[base_type(type)], postfix); - return env->type_str_buf; -} +/* string representation of 'enum bpf_reg_type' */ +static const char * const reg_type_str[] = { + [NOT_INIT] = "?", + [SCALAR_VALUE] = "inv", + [PTR_TO_CTX] = "ctx", + [CONST_PTR_TO_MAP] = "map_ptr", + [PTR_TO_MAP_VALUE] = "map_value", + [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", + [PTR_TO_STACK] = "fp", + [PTR_TO_PACKET] = "pkt", + [PTR_TO_PACKET_META] = "pkt_meta", + [PTR_TO_PACKET_END] = "pkt_end", + [PTR_TO_FLOW_KEYS] = "flow_keys", + [PTR_TO_SOCKET] = "sock", + [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", + [PTR_TO_SOCK_COMMON] = "sock_common", + [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", + [PTR_TO_TCP_SOCK] = "tcp_sock", + [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", + [PTR_TO_TP_BUFFER] = "tp_buffer", + [PTR_TO_XDP_SOCK] = "xdp_sock", + [PTR_TO_BTF_ID] = "ptr_", + [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", + [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", + [PTR_TO_MEM] = "mem", + [PTR_TO_MEM_OR_NULL] = "mem_or_null", + [PTR_TO_RDONLY_BUF] = "rdonly_buf", + [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", + [PTR_TO_RDWR_BUF] = "rdwr_buf", + [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", + [PTR_TO_FUNC] = "func", + [PTR_TO_MAP_KEY] = "map_key", +}; static char slot_type_char[] = { [STACK_INVALID] = '?', @@ -611,61 +612,8 @@ static const char *kernel_type_name(const struct btf* btf, u32 id) return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); } -static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) -{ - env->scratched_regs |= 1U << regno; -} - -static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi) -{ - env->scratched_stack_slots |= 1ULL << spi; -} - -static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno) -{ - return (env->scratched_regs >> regno) & 1; -} - -static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno) -{ - return (env->scratched_stack_slots >> regno) & 1; -} - -static bool verifier_state_scratched(const struct bpf_verifier_env *env) -{ - return env->scratched_regs || env->scratched_stack_slots; -} - -static void mark_verifier_state_clean(struct bpf_verifier_env *env) -{ - env->scratched_regs = 0U; - env->scratched_stack_slots = 0ULL; -} - -/* Used for printing the entire verifier state. */ -static void mark_verifier_state_scratched(struct bpf_verifier_env *env) -{ - env->scratched_regs = ~0U; - env->scratched_stack_slots = ~0ULL; -} - -/* The reg state of a pointer or a bounded scalar was saved when - * it was spilled to the stack. - */ -static bool is_spilled_reg(const struct bpf_stack_state *stack) -{ - return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL; -} - -static void scrub_spilled_slot(u8 *stype) -{ - if (*stype != STACK_INVALID) - *stype = STACK_MISC; -} - static void print_verifier_state(struct bpf_verifier_env *env, - const struct bpf_func_state *state, - bool print_all) + const struct bpf_func_state *state) { const struct bpf_reg_state *reg; enum bpf_reg_type t; @@ -678,11 +626,9 @@ static void print_verifier_state(struct bpf_verifier_env *env, t = reg->type; if (t == NOT_INIT) continue; - if (!print_all && !reg_scratched(env, i)) - continue; verbose(env, " R%d", i); print_liveness(env, reg->live); - verbose(env, "=%s", reg_type_str(env, t)); + verbose(env, "=%s", reg_type_str[t]); if (t == SCALAR_VALUE && reg->precise) verbose(env, "P"); if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && @@ -690,8 +636,9 @@ static void print_verifier_state(struct bpf_verifier_env *env, /* reg->off should be 0 for SCALAR_VALUE */ verbose(env, "%lld", reg->var_off.value + reg->off); } else { - if (base_type(t) == PTR_TO_BTF_ID || - base_type(t) == PTR_TO_PERCPU_BTF_ID) + if (t == PTR_TO_BTF_ID || + t == PTR_TO_BTF_ID_OR_NULL || + t == PTR_TO_PERCPU_BTF_ID) verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id)); verbose(env, "(id=%d", reg->id); if (reg_type_may_be_refcounted_or_null(t)) @@ -700,9 +647,10 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, ",off=%d", reg->off); if (type_is_pkt_pointer(t)) verbose(env, ",r=%d", reg->range); - else if (base_type(t) == CONST_PTR_TO_MAP || - base_type(t) == PTR_TO_MAP_KEY || - base_type(t) == PTR_TO_MAP_VALUE) + else if (t == CONST_PTR_TO_MAP || + t == PTR_TO_MAP_KEY || + t == PTR_TO_MAP_VALUE || + t == PTR_TO_MAP_VALUE_OR_NULL) verbose(env, ",ks=%d,vs=%d", reg->map_ptr->key_size, reg->map_ptr->value_size); @@ -767,14 +715,12 @@ static void print_verifier_state(struct bpf_verifier_env *env, types_buf[BPF_REG_SIZE] = 0; if (!valid) continue; - if (!print_all && !stack_slot_scratched(env, i)) - continue; verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); print_liveness(env, state->stack[i].spilled_ptr.live); - if (is_spilled_reg(&state->stack[i])) { + if (state->stack[i].slot_type[0] == STACK_SPILL) { reg = &state->stack[i].spilled_ptr; t = reg->type; - verbose(env, "=%s", reg_type_str(env, t)); + verbose(env, "=%s", reg_type_str[t]); if (t == SCALAR_VALUE && reg->precise) verbose(env, "P"); if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) @@ -794,26 +740,6 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (state->in_async_callback_fn) verbose(env, " async_cb"); verbose(env, "\n"); - mark_verifier_state_clean(env); -} - -static inline u32 vlog_alignment(u32 pos) -{ - return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT), - BPF_LOG_MIN_ALIGNMENT) - pos - 1; -} - -static void print_insn_state(struct bpf_verifier_env *env, - const struct bpf_func_state *state) -{ - if (env->prev_log_len && env->prev_log_len == env->log.len_used) { - /* remove new line character */ - bpf_vlog_reset(&env->log, env->prev_log_len - 1); - verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' '); - } else { - verbose(env, "%d:", env->insn_idx); - } - print_verifier_state(env, state, false); } /* copy array src of length n * size bytes to dst. dst is reallocated if it's too @@ -1207,7 +1133,8 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env, static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) { - if (base_type(reg->type) == PTR_TO_MAP_VALUE) { + switch (reg->type) { + case PTR_TO_MAP_VALUE_OR_NULL: { const struct bpf_map *map = reg->map_ptr; if (map->inner_map_meta) { @@ -1226,10 +1153,32 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) } else { reg->type = PTR_TO_MAP_VALUE; } - return; + break; + } + case PTR_TO_SOCKET_OR_NULL: + reg->type = PTR_TO_SOCKET; + break; + case PTR_TO_SOCK_COMMON_OR_NULL: + reg->type = PTR_TO_SOCK_COMMON; + break; + case PTR_TO_TCP_SOCK_OR_NULL: + reg->type = PTR_TO_TCP_SOCK; + break; + case PTR_TO_BTF_ID_OR_NULL: + reg->type = PTR_TO_BTF_ID; + break; + case PTR_TO_MEM_OR_NULL: + reg->type = PTR_TO_MEM; + break; + case PTR_TO_RDONLY_BUF_OR_NULL: + reg->type = PTR_TO_RDONLY_BUF; + break; + case PTR_TO_RDWR_BUF_OR_NULL: + reg->type = PTR_TO_RDWR_BUF; + break; + default: + WARN_ONCE(1, "unknown nullable register type"); } - - reg->type &= ~PTR_MAYBE_NULL; } static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) @@ -1587,7 +1536,6 @@ static void init_func_state(struct bpf_verifier_env *env, state->frameno = frameno; state->subprogno = subprogno; init_reg_state(env, state); - mark_verifier_state_scratched(env); } /* Similar to push_stack(), but for async callbacks */ @@ -1685,168 +1633,52 @@ static int add_subprog(struct bpf_verifier_env *env, int off) return env->subprog_cnt - 1; } -#define MAX_KFUNC_DESCS 256 -#define MAX_KFUNC_BTFS 256 - struct bpf_kfunc_desc { struct btf_func_model func_model; u32 func_id; s32 imm; - u16 offset; -}; - -struct bpf_kfunc_btf { - struct btf *btf; - struct module *module; - u16 offset; }; +#define MAX_KFUNC_DESCS 256 struct bpf_kfunc_desc_tab { struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; u32 nr_descs; }; -struct bpf_kfunc_btf_tab { - struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS]; - u32 nr_descs; -}; - -static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) +static int kfunc_desc_cmp_by_id(const void *a, const void *b) { const struct bpf_kfunc_desc *d0 = a; const struct bpf_kfunc_desc *d1 = b; /* func_id is not greater than BTF_MAX_TYPE */ - return d0->func_id - d1->func_id ?: d0->offset - d1->offset; -} - -static int kfunc_btf_cmp_by_off(const void *a, const void *b) -{ - const struct bpf_kfunc_btf *d0 = a; - const struct bpf_kfunc_btf *d1 = b; - - return d0->offset - d1->offset; + return d0->func_id - d1->func_id; } static const struct bpf_kfunc_desc * -find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) +find_kfunc_desc(const struct bpf_prog *prog, u32 func_id) { struct bpf_kfunc_desc desc = { .func_id = func_id, - .offset = offset, }; struct bpf_kfunc_desc_tab *tab; tab = prog->aux->kfunc_tab; return bsearch(&desc, tab->descs, tab->nr_descs, - sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off); + sizeof(tab->descs[0]), kfunc_desc_cmp_by_id); } -static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, - s16 offset, struct module **btf_modp) -{ - struct bpf_kfunc_btf kf_btf = { .offset = offset }; - struct bpf_kfunc_btf_tab *tab; - struct bpf_kfunc_btf *b; - struct module *mod; - struct btf *btf; - int btf_fd; - - tab = env->prog->aux->kfunc_btf_tab; - b = bsearch(&kf_btf, tab->descs, tab->nr_descs, - sizeof(tab->descs[0]), kfunc_btf_cmp_by_off); - if (!b) { - if (tab->nr_descs == MAX_KFUNC_BTFS) { - verbose(env, "too many different module BTFs\n"); - return ERR_PTR(-E2BIG); - } - - if (bpfptr_is_null(env->fd_array)) { - verbose(env, "kfunc offset > 0 without fd_array is invalid\n"); - return ERR_PTR(-EPROTO); - } - - if (copy_from_bpfptr_offset(&btf_fd, env->fd_array, - offset * sizeof(btf_fd), - sizeof(btf_fd))) - return ERR_PTR(-EFAULT); - - btf = btf_get_by_fd(btf_fd); - if (IS_ERR(btf)) { - verbose(env, "invalid module BTF fd specified\n"); - return btf; - } - - if (!btf_is_module(btf)) { - verbose(env, "BTF fd for kfunc is not a module BTF\n"); - btf_put(btf); - return ERR_PTR(-EINVAL); - } - - mod = btf_try_get_module(btf); - if (!mod) { - btf_put(btf); - return ERR_PTR(-ENXIO); - } - - b = &tab->descs[tab->nr_descs++]; - b->btf = btf; - b->module = mod; - b->offset = offset; - - sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), - kfunc_btf_cmp_by_off, NULL); - } - if (btf_modp) - *btf_modp = b->module; - return b->btf; -} - -void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) -{ - if (!tab) - return; - - while (tab->nr_descs--) { - module_put(tab->descs[tab->nr_descs].module); - btf_put(tab->descs[tab->nr_descs].btf); - } - kfree(tab); -} - -static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, - u32 func_id, s16 offset, - struct module **btf_modp) -{ - if (offset) { - if (offset < 0) { - /* In the future, this can be allowed to increase limit - * of fd index into fd_array, interpreted as u16. - */ - verbose(env, "negative offset disallowed for kernel module function call\n"); - return ERR_PTR(-EINVAL); - } - - return __find_kfunc_desc_btf(env, offset, btf_modp); - } - return btf_vmlinux ?: ERR_PTR(-ENOENT); -} - -static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) +static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) { const struct btf_type *func, *func_proto; - struct bpf_kfunc_btf_tab *btf_tab; struct bpf_kfunc_desc_tab *tab; struct bpf_prog_aux *prog_aux; struct bpf_kfunc_desc *desc; const char *func_name; - struct btf *desc_btf; unsigned long addr; int err; prog_aux = env->prog->aux; tab = prog_aux->kfunc_tab; - btf_tab = prog_aux->kfunc_btf_tab; if (!tab) { if (!btf_vmlinux) { verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n"); @@ -1874,29 +1706,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) prog_aux->kfunc_tab = tab; } - /* func_id == 0 is always invalid, but instead of returning an error, be - * conservative and wait until the code elimination pass before returning - * error, so that invalid calls that get pruned out can be in BPF programs - * loaded from userspace. It is also required that offset be untouched - * for such calls. - */ - if (!func_id && !offset) - return 0; - - if (!btf_tab && offset) { - btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL); - if (!btf_tab) - return -ENOMEM; - prog_aux->kfunc_btf_tab = btf_tab; - } - - desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL); - if (IS_ERR(desc_btf)) { - verbose(env, "failed to find BTF for kernel function\n"); - return PTR_ERR(desc_btf); - } - - if (find_kfunc_desc(env->prog, func_id, offset)) + if (find_kfunc_desc(env->prog, func_id)) return 0; if (tab->nr_descs == MAX_KFUNC_DESCS) { @@ -1904,20 +1714,20 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return -E2BIG; } - func = btf_type_by_id(desc_btf, func_id); + func = btf_type_by_id(btf_vmlinux, func_id); if (!func || !btf_type_is_func(func)) { verbose(env, "kernel btf_id %u is not a function\n", func_id); return -EINVAL; } - func_proto = btf_type_by_id(desc_btf, func->type); + func_proto = btf_type_by_id(btf_vmlinux, func->type); if (!func_proto || !btf_type_is_func_proto(func_proto)) { verbose(env, "kernel function btf_id %u does not have a valid func_proto\n", func_id); return -EINVAL; } - func_name = btf_name_by_offset(desc_btf, func->name_off); + func_name = btf_name_by_offset(btf_vmlinux, func->name_off); addr = kallsyms_lookup_name(func_name); if (!addr) { verbose(env, "cannot find address for kernel function %s\n", @@ -1927,14 +1737,13 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) desc = &tab->descs[tab->nr_descs++]; desc->func_id = func_id; - desc->imm = BPF_CALL_IMM(addr); - desc->offset = offset; - err = btf_distill_func_proto(&env->log, desc_btf, + desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base; + err = btf_distill_func_proto(&env->log, btf_vmlinux, func_proto, func_name, &desc->func_model); if (!err) sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), - kfunc_desc_cmp_by_id_off, NULL); + kfunc_desc_cmp_by_id, NULL); return err; } @@ -2005,10 +1814,16 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) return -EPERM; } - if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn)) + if (bpf_pseudo_func(insn)) { ret = add_subprog(env, i + insn->imm + 1); - else - ret = add_kfunc_call(env, insn->imm, insn->off); + if (ret >= 0) + /* remember subprog */ + insn[1].imm = ret; + } else if (bpf_pseudo_call(insn)) { + ret = add_subprog(env, i + insn->imm + 1); + } else { + ret = add_kfunc_call(env, insn->imm); + } if (ret < 0) return ret; @@ -2091,7 +1906,7 @@ static int mark_reg_read(struct bpf_verifier_env *env, break; if (parent->live & REG_LIVE_DONE) { verbose(env, "verifier BUG type %s var_off %lld off %d\n", - reg_type_str(env, parent->type), + reg_type_str[parent->type], parent->var_off.value, parent->off); return -EFAULT; } @@ -2275,8 +2090,6 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, return -EINVAL; } - mark_reg_scratched(env, regno); - reg = ®s[regno]; rw64 = is_reg64(env, insn, regno, reg, t); if (t == SRC_OP) { @@ -2346,17 +2159,12 @@ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) { const struct btf_type *func; - struct btf *desc_btf; if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) return NULL; - desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL); - if (IS_ERR(desc_btf)) - return ""; - - func = btf_type_by_id(desc_btf, insn->imm); - return btf_name_by_offset(desc_btf, func->name_off); + func = btf_type_by_id(btf_vmlinux, insn->imm); + return btf_name_by_offset(btf_vmlinux, func->name_off); } /* For given verifier state backtrack_insn() is called from the last insn to @@ -2381,7 +2189,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, if (insn->code == 0) return 0; - if (env->log.level & BPF_LOG_LEVEL2) { + if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); verbose(env, "%d: ", idx); print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); @@ -2431,6 +2239,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, */ if (insn->src_reg != BPF_REG_FP) return 0; + if (BPF_SIZE(insn->code) != BPF_DW) + return 0; /* dreg = *(u64 *)[fp - off] was a fill from the stack. * that [fp - off] slot contains scalar that needs to be @@ -2453,6 +2263,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, /* scalars can only be spilled into stack */ if (insn->dst_reg != BPF_REG_FP) return 0; + if (BPF_SIZE(insn->code) != BPF_DW) + return 0; spi = (-insn->off - 1) / BPF_REG_SIZE; if (spi >= 64) { verbose(env, "BUG spi %d\n", spi); @@ -2568,7 +2380,7 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env, reg->precise = true; } for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { - if (!is_spilled_reg(&func->stack[j])) + if (func->stack[j].slot_type[0] != STACK_SPILL) continue; reg = &func->stack[j].spilled_ptr; if (reg->type != SCALAR_VALUE) @@ -2610,7 +2422,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, } while (spi >= 0) { - if (!is_spilled_reg(&func->stack[spi])) { + if (func->stack[spi].slot_type[0] != STACK_SPILL) { stack_mask = 0; break; } @@ -2635,7 +2447,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, DECLARE_BITMAP(mask, 64); u32 history = st->jmp_history_cnt; - if (env->log.level & BPF_LOG_LEVEL2) + if (env->log.level & BPF_LOG_LEVEL) verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); for (i = last_idx;;) { if (skip_first) { @@ -2709,7 +2521,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, return 0; } - if (!is_spilled_reg(&func->stack[i])) { + if (func->stack[i].slot_type[0] != STACK_SPILL) { stack_mask &= ~(1ull << i); continue; } @@ -2722,11 +2534,11 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, new_marks = true; reg->precise = true; } - if (env->log.level & BPF_LOG_LEVEL2) { - verbose(env, "parent %s regs=%x stack=%llx marks:", + if (env->log.level & BPF_LOG_LEVEL) { + print_verifier_state(env, func); + verbose(env, "parent %s regs=%x stack=%llx marks\n", new_marks ? "didn't have" : "already had", reg_mask, stack_mask); - print_verifier_state(env, func, true); } if (!reg_mask && !stack_mask) @@ -2752,8 +2564,9 @@ static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi) static bool is_spillable_regtype(enum bpf_reg_type type) { - switch (base_type(type)) { + switch (type) { case PTR_TO_MAP_VALUE: + case PTR_TO_MAP_VALUE_OR_NULL: case PTR_TO_STACK: case PTR_TO_CTX: case PTR_TO_PACKET: @@ -2762,13 +2575,21 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_FLOW_KEYS: case CONST_PTR_TO_MAP: case PTR_TO_SOCKET: + case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: + case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: - case PTR_TO_BUF: + case PTR_TO_BTF_ID_OR_NULL: + case PTR_TO_RDONLY_BUF: + case PTR_TO_RDONLY_BUF_OR_NULL: + case PTR_TO_RDWR_BUF: + case PTR_TO_RDWR_BUF_OR_NULL: case PTR_TO_PERCPU_BTF_ID: case PTR_TO_MEM: + case PTR_TO_MEM_OR_NULL: case PTR_TO_FUNC: case PTR_TO_MAP_KEY: return true; @@ -2812,21 +2633,15 @@ static bool __is_pointer_value(bool allow_ptr_leaks, } static void save_register_state(struct bpf_func_state *state, - int spi, struct bpf_reg_state *reg, - int size) + int spi, struct bpf_reg_state *reg) { int i; state->stack[spi].spilled_ptr = *reg; - if (size == BPF_REG_SIZE) - state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; - for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--) - state->stack[spi].slot_type[i - 1] = STACK_SPILL; - - /* size < 8 bytes spill */ - for (; i; i--) - scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]); + for (i = 0; i < BPF_REG_SIZE; i++) + state->stack[spi].slot_type[i] = STACK_SPILL; } /* check_stack_{read,write}_fixed_off functions track spill/fill of registers, @@ -2873,8 +2688,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, env->insn_aux_data[insn_idx].sanitize_stack_spill = true; } - mark_stack_slot_scratched(env, spi); - if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && + if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) { if (dst_reg != BPF_REG_FP) { /* The backtracking logic can only recognize explicit @@ -2887,7 +2701,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, if (err) return err; } - save_register_state(state, spi, reg, size); + save_register_state(state, spi, reg); } else if (reg && is_spillable_regtype(reg->type)) { /* register containing pointer is being spilled into stack */ if (size != BPF_REG_SIZE) { @@ -2899,16 +2713,16 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); return -EINVAL; } - save_register_state(state, spi, reg, size); + save_register_state(state, spi, reg); } else { u8 type = STACK_MISC; /* regular write of data into stack destroys any spilled ptr */ state->stack[spi].spilled_ptr.type = NOT_INIT; /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ - if (is_spilled_reg(&state->stack[spi])) + if (state->stack[spi].slot_type[0] == STACK_SPILL) for (i = 0; i < BPF_REG_SIZE; i++) - scrub_spilled_slot(&state->stack[spi].slot_type[i]); + state->stack[spi].slot_type[i] = STACK_MISC; /* only mark the slot as written if all 8 bytes were written * otherwise read propagation may incorrectly stop too soon @@ -2995,7 +2809,6 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, slot = -i - 1; spi = slot / BPF_REG_SIZE; stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; - mark_stack_slot_scratched(env, spi); if (!env->allow_ptr_leaks && *stype != NOT_INIT @@ -3112,52 +2925,31 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, struct bpf_func_state *state = vstate->frame[vstate->curframe]; int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; struct bpf_reg_state *reg; - u8 *stype, type; + u8 *stype; stype = reg_state->stack[spi].slot_type; reg = ®_state->stack[spi].spilled_ptr; - if (is_spilled_reg(®_state->stack[spi])) { - u8 spill_size = 1; - - for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--) - spill_size++; - - if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) { + if (stype[0] == STACK_SPILL) { + if (size != BPF_REG_SIZE) { if (reg->type != SCALAR_VALUE) { verbose_linfo(env, env->insn_idx, "; "); verbose(env, "invalid size of register fill\n"); return -EACCES; } - - mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); - if (dst_regno < 0) - return 0; - - if (!(off % BPF_REG_SIZE) && size == spill_size) { - /* The earlier check_reg_arg() has decided the - * subreg_def for this insn. Save it first. - */ - s32 subreg_def = state->regs[dst_regno].subreg_def; - - state->regs[dst_regno] = *reg; - state->regs[dst_regno].subreg_def = subreg_def; - } else { - for (i = 0; i < size; i++) { - type = stype[(slot - i) % BPF_REG_SIZE]; - if (type == STACK_SPILL) - continue; - if (type == STACK_MISC) - continue; - verbose(env, "invalid read from stack off %d+%d size %d\n", - off, i, size); - return -EACCES; - } + if (dst_regno >= 0) { mark_reg_unknown(env, state->regs, dst_regno); + state->regs[dst_regno].live |= REG_LIVE_WRITTEN; } - state->regs[dst_regno].live |= REG_LIVE_WRITTEN; + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); return 0; } + for (i = 1; i < BPF_REG_SIZE; i++) { + if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { + verbose(env, "corrupted spill memory\n"); + return -EACCES; + } + } if (dst_regno >= 0) { /* restore register state from stack */ @@ -3180,6 +2972,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, } mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); } else { + u8 type; + for (i = 0; i < size; i++) { type = stype[(slot - i) % BPF_REG_SIZE]; if (type == STACK_MISC) @@ -3412,8 +3206,11 @@ static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno, /* We may have adjusted the register pointing to memory region, so we * need to try adding each of min_value and max_value to off * to make sure our theoretical access will be safe. - * - * The minimum value is only important with signed + */ + if (env->log.level & BPF_LOG_LEVEL) + print_verifier_state(env, state); + + /* The minimum value is only important with signed * comparisons where we can't assume the floor of a * value is 0. If we are using signed variables for our * index'es we need to make sure that whatever we use @@ -3608,7 +3405,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, */ *reg_type = info.reg_type; - if (base_type(*reg_type) == PTR_TO_BTF_ID) { + if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) { *btf = info.btf; *btf_id = info.btf_id; } else { @@ -3676,7 +3473,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, } verbose(env, "R%d invalid %s access off=%d size=%d\n", - regno, reg_type_str(env, reg->type), off, size); + regno, reg_type_str[reg->type], off, size); return -EACCES; } @@ -3971,17 +3768,16 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env, } #endif -static int __check_ptr_off_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno, - bool fixed_off_ok) +int check_ctx_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno) { - /* Access to this pointer-typed register or passing it to a helper - * is only allowed in its original, unmodified form. + /* Access to ctx or passing it to a helper is only allowed in + * its original, unmodified form. */ - if (!fixed_off_ok && reg->off) { - verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", - reg_type_str(env, reg->type), regno, reg->off); + if (reg->off) { + verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n", + regno, reg->off); return -EACCES; } @@ -3989,20 +3785,13 @@ static int __check_ptr_off_reg(struct bpf_verifier_env *env, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "variable %s access var_off=%s disallowed\n", - reg_type_str(env, reg->type), tn_buf); + verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf); return -EACCES; } return 0; } -int check_ptr_off_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno) -{ - return __check_ptr_off_reg(env, reg, regno, false); -} - static int __check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg, @@ -4411,30 +4200,15 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn mark_reg_unknown(env, regs, value_regno); } } - } else if (base_type(reg->type) == PTR_TO_MEM) { - bool rdonly_mem = type_is_rdonly_mem(reg->type); - - if (type_may_be_null(reg->type)) { - verbose(env, "R%d invalid mem access '%s'\n", regno, - reg_type_str(env, reg->type)); - return -EACCES; - } - - if (t == BPF_WRITE && rdonly_mem) { - verbose(env, "R%d cannot write into %s\n", - regno, reg_type_str(env, reg->type)); - return -EACCES; - } - + } else if (reg->type == PTR_TO_MEM) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose(env, "R%d leaks addr into mem\n", value_regno); return -EACCES; } - err = check_mem_region_access(env, regno, off, size, reg->mem_size, false); - if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) + if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_CTX) { enum bpf_reg_type reg_type = SCALAR_VALUE; @@ -4447,7 +4221,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return -EACCES; } - err = check_ptr_off_reg(env, reg, regno); + err = check_ctx_reg(env, reg, regno); if (err < 0) return err; @@ -4464,7 +4238,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else { mark_reg_known_zero(env, regs, value_regno); - if (type_may_be_null(reg_type)) + if (reg_type_may_be_null(reg_type)) regs[value_regno].id = ++env->id_gen; /* A load of ctx field could have different * actual load size with the one encoded in the @@ -4472,7 +4246,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn * a sub-register. */ regs[value_regno].subreg_def = DEF_NOT_SUBREG; - if (base_type(reg_type) == PTR_TO_BTF_ID) { + if (reg_type == PTR_TO_BTF_ID || + reg_type == PTR_TO_BTF_ID_OR_NULL) { regs[value_regno].btf = btf; regs[value_regno].btf_id = btf_id; } @@ -4525,7 +4300,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else if (type_is_sk_pointer(reg->type)) { if (t == BPF_WRITE) { verbose(env, "R%d cannot write into %s\n", - regno, reg_type_str(env, reg->type)); + regno, reg_type_str[reg->type]); return -EACCES; } err = check_sock_access(env, insn_idx, regno, off, size, t); @@ -4541,32 +4316,26 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else if (reg->type == CONST_PTR_TO_MAP) { err = check_ptr_to_map_access(env, regs, regno, off, size, t, value_regno); - } else if (base_type(reg->type) == PTR_TO_BUF) { - bool rdonly_mem = type_is_rdonly_mem(reg->type); - const char *buf_info; - u32 *max_access; - - if (rdonly_mem) { - if (t == BPF_WRITE) { - verbose(env, "R%d cannot write into %s\n", - regno, reg_type_str(env, reg->type)); - return -EACCES; - } - buf_info = "rdonly"; - max_access = &env->prog->aux->max_rdonly_access; - } else { - buf_info = "rdwr"; - max_access = &env->prog->aux->max_rdwr_access; + } else if (reg->type == PTR_TO_RDONLY_BUF) { + if (t == BPF_WRITE) { + verbose(env, "R%d cannot write into %s\n", + regno, reg_type_str[reg->type]); + return -EACCES; } - err = check_buffer_access(env, reg, regno, off, size, false, - buf_info, max_access); - - if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) + "rdonly", + &env->prog->aux->max_rdonly_access); + if (!err && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_RDWR_BUF) { + err = check_buffer_access(env, reg, regno, off, size, false, + "rdwr", + &env->prog->aux->max_rdwr_access); + if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, - reg_type_str(env, reg->type)); + reg_type_str[reg->type]); return -EACCES; } @@ -4640,7 +4409,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i is_sk_reg(env, insn->dst_reg)) { verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, - reg_type_str(env, reg_state(env, insn->dst_reg)->type)); + reg_type_str[reg_state(env, insn->dst_reg)->type]); return -EACCES; } @@ -4780,17 +4549,17 @@ static int check_stack_range_initialized( goto mark; } - if (is_spilled_reg(&state->stack[spi]) && + if (state->stack[spi].slot_type[0] == STACK_SPILL && state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID) goto mark; - if (is_spilled_reg(&state->stack[spi]) && + if (state->stack[spi].slot_type[0] == STACK_SPILL && (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || env->allow_ptr_leaks)) { if (clobber) { __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); for (j = 0; j < BPF_REG_SIZE; j++) - scrub_spilled_slot(&state->stack[spi].slot_type[j]); + state->stack[spi].slot_type[j] = STACK_MISC; } goto mark; } @@ -4823,10 +4592,8 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; - const char *buf_info; - u32 *max_access; - switch (base_type(reg->type)) { + switch (reg->type) { case PTR_TO_PACKET: case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size, @@ -4845,20 +4612,18 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_mem_region_access(env, regno, reg->off, access_size, reg->mem_size, zero_size_allowed); - case PTR_TO_BUF: - if (type_is_rdonly_mem(reg->type)) { - if (meta && meta->raw_mode) - return -EACCES; - - buf_info = "rdonly"; - max_access = &env->prog->aux->max_rdonly_access; - } else { - buf_info = "rdwr"; - max_access = &env->prog->aux->max_rdwr_access; - } + case PTR_TO_RDONLY_BUF: + if (meta && meta->raw_mode) + return -EACCES; return check_buffer_access(env, reg, regno, reg->off, access_size, zero_size_allowed, - buf_info, max_access); + "rdonly", + &env->prog->aux->max_rdonly_access); + case PTR_TO_RDWR_BUF: + return check_buffer_access(env, reg, regno, reg->off, + access_size, zero_size_allowed, + "rdwr", + &env->prog->aux->max_rdwr_access); case PTR_TO_STACK: return check_stack_range_initialized( env, @@ -4870,9 +4635,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, register_is_null(reg)) return 0; - verbose(env, "R%d type=%s ", regno, - reg_type_str(env, reg->type)); - verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK)); + verbose(env, "R%d type=%s expected=%s\n", regno, + reg_type_str[reg->type], + reg_type_str[PTR_TO_STACK]); return -EACCES; } } @@ -4883,7 +4648,7 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, if (register_is_null(reg)) return 0; - if (type_may_be_null(reg->type)) { + if (reg_type_may_be_null(reg->type)) { /* Assuming that the register contains a value check if the memory * access is safe. Temporarily save and restore the register's state as * the conversion shouldn't be visible to a caller. @@ -5031,8 +4796,9 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, static bool arg_type_is_mem_ptr(enum bpf_arg_type type) { - return base_type(type) == ARG_PTR_TO_MEM || - base_type(type) == ARG_PTR_TO_UNINIT_MEM; + return type == ARG_PTR_TO_MEM || + type == ARG_PTR_TO_MEM_OR_NULL || + type == ARG_PTR_TO_UNINIT_MEM; } static bool arg_type_is_mem_size(enum bpf_arg_type type) @@ -5082,10 +4848,7 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env, return -EINVAL; } break; - case BPF_MAP_TYPE_BLOOM_FILTER: - if (meta->func_id == BPF_FUNC_map_peek_elem) - *arg_type = ARG_PTR_TO_MAP_VALUE; - break; + default: break; } @@ -5137,8 +4900,8 @@ static const struct bpf_reg_types mem_types = { PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, - PTR_TO_MEM | MEM_ALLOC, - PTR_TO_BUF, + PTR_TO_RDONLY_BUF, + PTR_TO_RDWR_BUF, }, }; @@ -5155,7 +4918,7 @@ static const struct bpf_reg_types int_ptr_types = { static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; -static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } }; +static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } }; static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; @@ -5169,26 +4932,31 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types, [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types, + [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types, [ARG_CONST_SIZE] = &scalar_types, [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, [ARG_CONST_MAP_PTR] = &const_map_ptr_types, [ARG_PTR_TO_CTX] = &context_types, + [ARG_PTR_TO_CTX_OR_NULL] = &context_types, [ARG_PTR_TO_SOCK_COMMON] = &sock_types, #ifdef CONFIG_NET [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types, #endif [ARG_PTR_TO_SOCKET] = &fullsock_types, + [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types, [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, [ARG_PTR_TO_MEM] = &mem_types, + [ARG_PTR_TO_MEM_OR_NULL] = &mem_types, [ARG_PTR_TO_UNINIT_MEM] = &mem_types, [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, + [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types, [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, [ARG_PTR_TO_FUNC] = &func_ptr_types, - [ARG_PTR_TO_STACK] = &stack_ptr_types, + [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, }; @@ -5202,27 +4970,12 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, const struct bpf_reg_types *compatible; int i, j; - compatible = compatible_reg_types[base_type(arg_type)]; + compatible = compatible_reg_types[arg_type]; if (!compatible) { verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type); return -EFAULT; } - /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY, - * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY - * - * Same for MAYBE_NULL: - * - * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL, - * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL - * - * Therefore we fold these flags depending on the arg_type before comparison. - */ - if (arg_type & MEM_RDONLY) - type &= ~MEM_RDONLY; - if (arg_type & PTR_MAYBE_NULL) - type &= ~PTR_MAYBE_NULL; - for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { expected = compatible->types[i]; if (expected == NOT_INIT) @@ -5232,14 +4985,14 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, goto found; } - verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type)); + verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]); for (j = 0; j + 1 < i; j++) - verbose(env, "%s, ", reg_type_str(env, compatible->types[j])); - verbose(env, "%s\n", reg_type_str(env, compatible->types[j])); + verbose(env, "%s, ", reg_type_str[compatible->types[j]]); + verbose(env, "%s\n", reg_type_str[compatible->types[j]]); return -EACCES; found: - if (reg->type == PTR_TO_BTF_ID) { + if (type == PTR_TO_BTF_ID) { if (!arg_btf_id) { if (!compatible->btf_id) { verbose(env, "verifier internal error: missing arg compatible BTF ID\n"); @@ -5255,6 +5008,12 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, kernel_type_name(btf_vmlinux, *arg_btf_id)); return -EACCES; } + + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", + regno); + return -EACCES; + } } return 0; @@ -5292,14 +5051,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, return -EACCES; } - if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || - base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { + if (arg_type == ARG_PTR_TO_MAP_VALUE || + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || + arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { err = resolve_map_arg_type(env, meta, &arg_type); if (err) return err; } - if (register_is_null(reg) && type_may_be_null(arg_type)) + if (register_is_null(reg) && arg_type_may_be_null(arg_type)) /* A NULL register has a SCALAR_VALUE type, so skip * type checking. */ @@ -5309,33 +5069,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, if (err) return err; - switch ((u32)type) { - case SCALAR_VALUE: - /* Pointer types where reg offset is explicitly allowed: */ - case PTR_TO_PACKET: - case PTR_TO_PACKET_META: - case PTR_TO_MAP_KEY: - case PTR_TO_MAP_VALUE: - case PTR_TO_MEM: - case PTR_TO_MEM | MEM_RDONLY: - case PTR_TO_MEM | MEM_ALLOC: - case PTR_TO_BUF: - case PTR_TO_BUF | MEM_RDONLY: - case PTR_TO_STACK: - /* Some of the argument types nevertheless require a - * zero register offset. - */ - if (arg_type == ARG_PTR_TO_ALLOC_MEM) - goto force_off_check; - break; - /* All the rest must be rejected: */ - default: -force_off_check: - err = __check_ptr_off_reg(env, reg, regno, - type == PTR_TO_BTF_ID); + if (type == PTR_TO_CTX) { + err = check_ctx_reg(env, reg, regno); if (err < 0) return err; - break; } skip_type_check: @@ -5391,11 +5128,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, false, NULL); - } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || - base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { - if (type_may_be_null(arg_type) && register_is_null(reg)) - return 0; - + } else if (arg_type == ARG_PTR_TO_MAP_VALUE || + (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && + !register_is_null(reg)) || + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity */ @@ -5687,11 +5423,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_task_storage_delete) goto error; break; - case BPF_MAP_TYPE_BLOOM_FILTER: - if (func_id != BPF_FUNC_map_peek_elem && - func_id != BPF_FUNC_map_push_elem) - goto error; - break; default: break; } @@ -5759,16 +5490,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_SOCKHASH) goto error; break; - case BPF_FUNC_map_pop_elem: - if (map->map_type != BPF_MAP_TYPE_QUEUE && - map->map_type != BPF_MAP_TYPE_STACK) - goto error; - break; case BPF_FUNC_map_peek_elem: + case BPF_FUNC_map_pop_elem: case BPF_FUNC_map_push_elem: if (map->map_type != BPF_MAP_TYPE_QUEUE && - map->map_type != BPF_MAP_TYPE_STACK && - map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) + map->map_type != BPF_MAP_TYPE_STACK) goto error; break; case BPF_FUNC_sk_storage_get: @@ -6118,9 +5844,9 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); - print_verifier_state(env, caller, true); + print_verifier_state(env, caller); verbose(env, "callee:\n"); - print_verifier_state(env, callee, true); + print_verifier_state(env, callee); } return 0; } @@ -6211,27 +5937,6 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env, return 0; } -static int set_loop_callback_state(struct bpf_verifier_env *env, - struct bpf_func_state *caller, - struct bpf_func_state *callee, - int insn_idx) -{ - /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, - * u64 flags); - * callback_fn(u32 index, void *callback_ctx); - */ - callee->regs[BPF_REG_1].type = SCALAR_VALUE; - callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; - - /* unused */ - __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); - __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); - __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); - - callee->in_callback_fn = true; - return 0; -} - static int set_timer_callback_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, @@ -6261,33 +5966,6 @@ static int set_timer_callback_state(struct bpf_verifier_env *env, return 0; } -static int set_find_vma_callback_state(struct bpf_verifier_env *env, - struct bpf_func_state *caller, - struct bpf_func_state *callee, - int insn_idx) -{ - /* bpf_find_vma(struct task_struct *task, u64 addr, - * void *callback_fn, void *callback_ctx, u64 flags) - * (callback_fn)(struct task_struct *task, - * struct vm_area_struct *vma, void *callback_ctx); - */ - callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1]; - - callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID; - __mark_reg_known_zero(&callee->regs[BPF_REG_2]); - callee->regs[BPF_REG_2].btf = btf_vmlinux; - callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA], - - /* pointer to stack or null */ - callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4]; - - /* unused */ - __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); - __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); - callee->in_callback_fn = true; - return 0; -} - static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { struct bpf_verifier_state *state = env->cur_state; @@ -6335,9 +6013,9 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) *insn_idx = callee->callsite + 1; if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "returning from callee:\n"); - print_verifier_state(env, callee, true); + print_verifier_state(env, callee); verbose(env, "to caller at %d:\n", *insn_idx); - print_verifier_state(env, caller, true); + print_verifier_state(env, caller); } /* clear everything in the callee */ free_func_state(callee); @@ -6503,11 +6181,13 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, static int check_get_func_ip(struct bpf_verifier_env *env) { + enum bpf_attach_type eatype = env->prog->expected_attach_type; enum bpf_prog_type type = resolve_prog_type(env->prog); int func_id = BPF_FUNC_get_func_ip; if (type == BPF_PROG_TYPE_TRACING) { - if (!bpf_prog_has_trampoline(env->prog)) { + if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT && + eatype != BPF_MODIFY_RETURN) { verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n", func_id_name(func_id), func_id); return -ENOTSUPP; @@ -6526,8 +6206,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn int *insn_idx_p) { const struct bpf_func_proto *fn = NULL; - enum bpf_return_type ret_type; - enum bpf_type_flag ret_flag; struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; int insn_idx = *insn_idx_p; @@ -6605,7 +6283,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return err; } - if (is_release_function(func_id)) { + if (func_id == BPF_FUNC_tail_call) { + err = check_reference_leak(env); + if (err) { + verbose(env, "tail_call would lead to reference leak\n"); + return err; + } + } else if (is_release_function(func_id)) { err = release_reference(env, meta.ref_obj_id); if (err) { verbose(env, "func %s#%d reference has not been acquired before\n", @@ -6616,46 +6300,34 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs = cur_regs(env); - switch (func_id) { - case BPF_FUNC_tail_call: - err = check_reference_leak(env); - if (err) { - verbose(env, "tail_call would lead to reference leak\n"); - return err; - } - break; - case BPF_FUNC_get_local_storage: - /* check that flags argument in get_local_storage(map, flags) is 0, - * this is required because get_local_storage() can't return an error. - */ - if (!register_is_null(®s[BPF_REG_2])) { - verbose(env, "get_local_storage() doesn't support non-zero flags\n"); - return -EINVAL; - } - break; - case BPF_FUNC_for_each_map_elem: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_map_elem_callback_state); - break; - case BPF_FUNC_timer_set_callback: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_timer_callback_state); - break; - case BPF_FUNC_find_vma: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_find_vma_callback_state); - break; - case BPF_FUNC_snprintf: - err = check_bpf_snprintf_call(env, regs); - break; - case BPF_FUNC_loop: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_loop_callback_state); - break; + /* check that flags argument in get_local_storage(map, flags) is 0, + * this is required because get_local_storage() can't return an error. + */ + if (func_id == BPF_FUNC_get_local_storage && + !register_is_null(®s[BPF_REG_2])) { + verbose(env, "get_local_storage() doesn't support non-zero flags\n"); + return -EINVAL; } - if (err) - return err; + if (func_id == BPF_FUNC_for_each_map_elem) { + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_map_elem_callback_state); + if (err < 0) + return -EINVAL; + } + + if (func_id == BPF_FUNC_timer_set_callback) { + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_timer_callback_state); + if (err < 0) + return -EINVAL; + } + + if (func_id == BPF_FUNC_snprintf) { + err = check_bpf_snprintf_call(env, regs); + if (err < 0) + return err; + } /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { @@ -6667,14 +6339,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; /* update return register (already marked as written above) */ - ret_type = fn->ret_type; - ret_flag = type_flag(fn->ret_type); - if (ret_type == RET_INTEGER) { + if (fn->ret_type == RET_INTEGER) { /* sets type to SCALAR_VALUE */ mark_reg_unknown(env, regs, BPF_REG_0); - } else if (ret_type == RET_VOID) { + } else if (fn->ret_type == RET_VOID) { regs[BPF_REG_0].type = NOT_INIT; - } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) { + } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || + fn->ret_type == RET_PTR_TO_MAP_VALUE) { /* There is no offset yet applied, variable or fixed */ mark_reg_known_zero(env, regs, BPF_REG_0); /* remember map_ptr, so that check_map_access() @@ -6688,25 +6359,28 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].map_uid = meta.map_uid; - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; - if (!type_may_be_null(ret_type) && - map_value_has_spin_lock(meta.map_ptr)) { - regs[BPF_REG_0].id = ++env->id_gen; + if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; + if (map_value_has_spin_lock(meta.map_ptr)) + regs[BPF_REG_0].id = ++env->id_gen; + } else { + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; } - } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) { + } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag; - } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) { + regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; + } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag; - } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) { + regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; + } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag; - } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) { + regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; + } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; + regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; regs[BPF_REG_0].mem_size = meta.mem_size; - } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) { + } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL || + fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) { const struct btf_type *t; mark_reg_known_zero(env, regs, BPF_REG_0); @@ -6724,30 +6398,29 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn tname, PTR_ERR(ret)); return -EINVAL; } - regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; + regs[BPF_REG_0].type = + fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? + PTR_TO_MEM : PTR_TO_MEM_OR_NULL; regs[BPF_REG_0].mem_size = tsize; } else { - /* MEM_RDONLY may be carried from ret_flag, but it - * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise - * it will confuse the check of PTR_TO_BTF_ID in - * check_mem_access(). - */ - ret_flag &= ~MEM_RDONLY; - - regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; + regs[BPF_REG_0].type = + fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? + PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; regs[BPF_REG_0].btf = meta.ret_btf; regs[BPF_REG_0].btf_id = meta.ret_btf_id; } - } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) { + } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL || + fn->ret_type == RET_PTR_TO_BTF_ID) { int ret_btf_id; mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; + regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ? + PTR_TO_BTF_ID : + PTR_TO_BTF_ID_OR_NULL; ret_btf_id = *fn->ret_btf_id; if (ret_btf_id == 0) { - verbose(env, "invalid return type %u of func %s#%d\n", - base_type(ret_type), func_id_name(func_id), - func_id); + verbose(env, "invalid return type %d of func %s#%d\n", + fn->ret_type, func_id_name(func_id), func_id); return -EINVAL; } /* current BPF helper definitions are only coming from @@ -6756,12 +6429,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].btf = btf_vmlinux; regs[BPF_REG_0].btf_id = ret_btf_id; } else { - verbose(env, "unknown return type %u of func %s#%d\n", - base_type(ret_type), func_id_name(func_id), func_id); + verbose(env, "unknown return type %d of func %s#%d\n", + fn->ret_type, func_id_name(func_id), func_id); return -EINVAL; } - if (type_may_be_null(regs[BPF_REG_0].type)) + if (reg_type_may_be_null(regs[BPF_REG_0].type)) regs[BPF_REG_0].id = ++env->id_gen; if (is_ptr_cast_function(func_id)) { @@ -6848,33 +6521,23 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; u32 i, nargs, func_id, ptr_type_id; - struct module *btf_mod = NULL; const struct btf_param *args; - struct btf *desc_btf; int err; - /* skip for now, but return error when we find this in fixup_kfunc_call */ - if (!insn->imm) - return 0; - - desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod); - if (IS_ERR(desc_btf)) - return PTR_ERR(desc_btf); - func_id = insn->imm; - func = btf_type_by_id(desc_btf, func_id); - func_name = btf_name_by_offset(desc_btf, func->name_off); - func_proto = btf_type_by_id(desc_btf, func->type); + func = btf_type_by_id(btf_vmlinux, func_id); + func_name = btf_name_by_offset(btf_vmlinux, func->name_off); + func_proto = btf_type_by_id(btf_vmlinux, func->type); if (!env->ops->check_kfunc_call || - !env->ops->check_kfunc_call(func_id, btf_mod)) { + !env->ops->check_kfunc_call(func_id)) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; } /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); + err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs); if (err) return err; @@ -6882,15 +6545,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ - t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); + t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL); if (btf_type_is_scalar(t)) { mark_reg_unknown(env, regs, BPF_REG_0); mark_btf_func_reg_size(env, BPF_REG_0, t->size); } else if (btf_type_is_ptr(t)) { - ptr_type = btf_type_skip_modifiers(desc_btf, t->type, + ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type, &ptr_type_id); if (!btf_type_is_struct(ptr_type)) { - ptr_type_name = btf_name_by_offset(desc_btf, + ptr_type_name = btf_name_by_offset(btf_vmlinux, ptr_type->name_off); verbose(env, "kernel function %s returns pointer type %s %s is not supported\n", func_name, btf_type_str(ptr_type), @@ -6898,7 +6561,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].btf = desc_btf; + regs[BPF_REG_0].btf = btf_vmlinux; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); @@ -6909,7 +6572,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) for (i = 0; i < nargs; i++) { u32 regno = i + 1; - t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL); + t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL); if (btf_type_is_ptr(t)) mark_btf_func_reg_size(env, regno, sizeof(void *)); else @@ -6970,25 +6633,25 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env, if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { verbose(env, "math between %s pointer and %lld is not allowed\n", - reg_type_str(env, type), val); + reg_type_str[type], val); return false; } if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { verbose(env, "%s pointer offset %d is not allowed\n", - reg_type_str(env, type), reg->off); + reg_type_str[type], reg->off); return false; } if (smin == S64_MIN) { verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", - reg_type_str(env, type)); + reg_type_str[type]); return false; } if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { verbose(env, "value %lld makes %s pointer be out of bounds\n", - smin, reg_type_str(env, type)); + smin, reg_type_str[type]); return false; } @@ -7365,13 +7028,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return -EACCES; } - if (ptr_reg->type & PTR_MAYBE_NULL) { + switch (ptr_reg->type) { + case PTR_TO_MAP_VALUE_OR_NULL: verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", - dst, reg_type_str(env, ptr_reg->type)); + dst, reg_type_str[ptr_reg->type]); return -EACCES; - } - - switch (base_type(ptr_reg->type)) { case CONST_PTR_TO_MAP: /* smin_val represents the known value */ if (known && smin_val == 0 && opcode == BPF_ADD) @@ -7382,10 +7043,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case PTR_TO_SOCK_COMMON: case PTR_TO_TCP_SOCK: case PTR_TO_XDP_SOCK: +reject: verbose(env, "R%d pointer arithmetic on %s prohibited\n", - dst, reg_type_str(env, ptr_reg->type)); + dst, reg_type_str[ptr_reg->type]); return -EACCES; default: + if (reg_type_may_be_null(ptr_reg->type)) + goto reject; break; } @@ -8356,12 +8020,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* Got here implies adding two SCALAR_VALUEs */ if (WARN_ON_ONCE(ptr_reg)) { - print_verifier_state(env, state, true); + print_verifier_state(env, state); verbose(env, "verifier internal error: unexpected ptr_reg\n"); return -EINVAL; } if (WARN_ON(!src_reg)) { - print_verifier_state(env, state, true); + print_verifier_state(env, state); verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } @@ -9106,7 +8770,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, struct bpf_reg_state *reg, u32 id, bool is_null) { - if (type_may_be_null(reg->type) && reg->id == id && + if (reg_type_may_be_null(reg->type) && reg->id == id && !WARN_ON_ONCE(!reg->id)) { if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0) || @@ -9484,7 +9148,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, */ if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && - type_may_be_null(dst_reg->type)) { + reg_type_may_be_null(dst_reg->type)) { /* Mark all identical registers in each branch as either * safe or unknown depending R == 0 or R != 0 conditional. */ @@ -9500,7 +9164,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return -EACCES; } if (env->log.level & BPF_LOG_LEVEL) - print_insn_state(env, this_branch->frame[this_branch->curframe]); + print_verifier_state(env, this_branch->frame[this_branch->curframe]); return 0; } @@ -9543,7 +9207,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) if (insn->src_reg == BPF_PSEUDO_BTF_ID) { dst_reg->type = aux->btf_var.reg_type; - switch (base_type(dst_reg->type)) { + switch (dst_reg->type) { case PTR_TO_MEM: dst_reg->mem_size = aux->btf_var.mem_size; break; @@ -9561,8 +9225,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) if (insn->src_reg == BPF_PSEUDO_FUNC) { struct bpf_prog_aux *aux = env->prog->aux; - u32 subprogno = find_subprog(env, - env->insn_idx + insn->imm + 1); + u32 subprogno = insn[1].imm; if (!aux->func_info) { verbose(env, "missing btf func_info\n"); @@ -9682,7 +9345,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } - err = check_ptr_off_reg(env, ®s[ctx_reg], ctx_reg); + err = check_ctx_reg(env, ®s[ctx_reg], ctx_reg); if (err < 0) return err; @@ -9741,7 +9404,7 @@ static int check_return_code(struct bpf_verifier_env *env) /* enforce return zero from async callbacks like timer */ if (reg->type != SCALAR_VALUE) { verbose(env, "In async callback the register R0 is not a known value (%s)\n", - reg_type_str(env, reg->type)); + reg_type_str[reg->type]); return -EINVAL; } @@ -9755,7 +9418,7 @@ static int check_return_code(struct bpf_verifier_env *env) if (is_subprog) { if (reg->type != SCALAR_VALUE) { verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", - reg_type_str(env, reg->type)); + reg_type_str[reg->type]); return -EINVAL; } return 0; @@ -9819,7 +9482,7 @@ static int check_return_code(struct bpf_verifier_env *env) if (reg->type != SCALAR_VALUE) { verbose(env, "At program exit the register R0 is not a known value (%s)\n", - reg_type_str(env, reg->type)); + reg_type_str[reg->type]); return -EINVAL; } @@ -10402,78 +10065,6 @@ static int check_btf_line(struct bpf_verifier_env *env, return err; } -#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo) -#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE - -static int check_core_relo(struct bpf_verifier_env *env, - const union bpf_attr *attr, - bpfptr_t uattr) -{ - u32 i, nr_core_relo, ncopy, expected_size, rec_size; - struct bpf_core_relo core_relo = {}; - struct bpf_prog *prog = env->prog; - const struct btf *btf = prog->aux->btf; - struct bpf_core_ctx ctx = { - .log = &env->log, - .btf = btf, - }; - bpfptr_t u_core_relo; - int err; - - nr_core_relo = attr->core_relo_cnt; - if (!nr_core_relo) - return 0; - if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo)) - return -EINVAL; - - rec_size = attr->core_relo_rec_size; - if (rec_size < MIN_CORE_RELO_SIZE || - rec_size > MAX_CORE_RELO_SIZE || - rec_size % sizeof(u32)) - return -EINVAL; - - u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel); - expected_size = sizeof(struct bpf_core_relo); - ncopy = min_t(u32, expected_size, rec_size); - - /* Unlike func_info and line_info, copy and apply each CO-RE - * relocation record one at a time. - */ - for (i = 0; i < nr_core_relo; i++) { - /* future proofing when sizeof(bpf_core_relo) changes */ - err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size); - if (err) { - if (err == -E2BIG) { - verbose(env, "nonzero tailing record in core_relo"); - if (copy_to_bpfptr_offset(uattr, - offsetof(union bpf_attr, core_relo_rec_size), - &expected_size, sizeof(expected_size))) - err = -EFAULT; - } - break; - } - - if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) { - err = -EFAULT; - break; - } - - if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) { - verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n", - i, core_relo.insn_off, prog->len); - err = -EINVAL; - break; - } - - err = bpf_core_apply(&ctx, &core_relo, i, - &prog->insnsi[core_relo.insn_off / 8]); - if (err) - break; - bpfptr_add(&u_core_relo, rec_size); - } - return err; -} - static int check_btf_info(struct bpf_verifier_env *env, const union bpf_attr *attr, bpfptr_t uattr) @@ -10504,10 +10095,6 @@ static int check_btf_info(struct bpf_verifier_env *env, if (err) return err; - err = check_core_relo(env, attr, uattr); - if (err) - return err; - return 0; } @@ -10676,7 +10263,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, return true; if (rcur->type == NOT_INIT) return false; - switch (base_type(rold->type)) { + switch (rold->type) { case SCALAR_VALUE: if (env->explore_alu_limits) return false; @@ -10698,22 +10285,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: - /* a PTR_TO_MAP_VALUE could be safe to use as a - * PTR_TO_MAP_VALUE_OR_NULL into the same map. - * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- - * checked, doing so could have affected others with the same - * id, and we can't check for that because we lost the id when - * we converted to a PTR_TO_MAP_VALUE. - */ - if (type_may_be_null(rold->type)) { - if (!type_may_be_null(rcur->type)) - return false; - if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) - return false; - /* Check our ids match any regs they're supposed to */ - return check_ids(rold->id, rcur->id, idmap); - } - /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. * 'id' is not compared, since it's only used for maps with @@ -10725,6 +10296,20 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); + case PTR_TO_MAP_VALUE_OR_NULL: + /* a PTR_TO_MAP_VALUE could be safe to use as a + * PTR_TO_MAP_VALUE_OR_NULL into the same map. + * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- + * checked, doing so could have affected others with the same + * id, and we can't check for that because we lost the id when + * we converted to a PTR_TO_MAP_VALUE. + */ + if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL) + return false; + if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) + return false; + /* Check our ids match any regs they're supposed to */ + return check_ids(rold->id, rcur->id, idmap); case PTR_TO_PACKET_META: case PTR_TO_PACKET: if (rcur->type != rold->type) @@ -10753,8 +10338,11 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, case PTR_TO_PACKET_END: case PTR_TO_FLOW_KEYS: case PTR_TO_SOCKET: + case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: + case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: /* Only valid matches are exact, which memcmp() above * would have accepted @@ -10811,9 +10399,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, * return false to continue verification of this path */ return false; - if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1) + if (i % BPF_REG_SIZE) continue; - if (!is_spilled_reg(&old->stack[spi])) + if (old->stack[spi].slot_type[0] != STACK_SPILL) continue; if (!regsafe(env, &old->stack[spi].spilled_ptr, &cur->stack[spi].spilled_ptr, idmap)) @@ -11020,7 +10608,7 @@ static int propagate_precision(struct bpf_verifier_env *env, } for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (!is_spilled_reg(&state->stack[i])) + if (state->stack[i].slot_type[0] != STACK_SPILL) continue; state_reg = &state->stack[i].spilled_ptr; if (state_reg->type != SCALAR_VALUE || @@ -11280,13 +10868,17 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) /* Return true if it's OK to have the same insn return a different type. */ static bool reg_type_mismatch_ok(enum bpf_reg_type type) { - switch (base_type(type)) { + switch (type) { case PTR_TO_CTX: case PTR_TO_SOCKET: + case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: + case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: + case PTR_TO_BTF_ID_OR_NULL: return false; default: return true; @@ -11366,12 +10958,16 @@ static int do_check(struct bpf_verifier_env *env) if (need_resched()) cond_resched(); - if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) { - verbose(env, "\nfrom %d to %d%s:", - env->prev_insn_idx, env->insn_idx, - env->cur_state->speculative ? - " (speculative execution)" : ""); - print_verifier_state(env, state->frame[state->curframe], true); + if (env->log.level & BPF_LOG_LEVEL2 || + (env->log.level & BPF_LOG_LEVEL && do_print_state)) { + if (env->log.level & BPF_LOG_LEVEL2) + verbose(env, "%d:", env->insn_idx); + else + verbose(env, "\nfrom %d to %d%s:", + env->prev_insn_idx, env->insn_idx, + env->cur_state->speculative ? + " (speculative execution)" : ""); + print_verifier_state(env, state->frame[state->curframe]); do_print_state = false; } @@ -11382,15 +10978,9 @@ static int do_check(struct bpf_verifier_env *env) .private_data = env, }; - if (verifier_state_scratched(env)) - print_insn_state(env, state->frame[state->curframe]); - verbose_linfo(env, env->insn_idx, "; "); - env->prev_log_len = env->log.len_used; verbose(env, "%d: ", env->insn_idx); print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); - env->prev_insn_print_len = env->log.len_used - env->prev_log_len; - env->prev_log_len = env->log.len_used; } if (bpf_prog_is_dev_bound(env->prog->aux)) { @@ -11512,7 +11102,7 @@ static int do_check(struct bpf_verifier_env *env) if (is_ctx_reg(env, insn->dst_reg)) { verbose(env, "BPF_ST stores into R%d %s is not allowed\n", insn->dst_reg, - reg_type_str(env, reg_state(env, insn->dst_reg)->type)); + reg_type_str[reg_state(env, insn->dst_reg)->type]); return -EACCES; } @@ -11529,8 +11119,7 @@ static int do_check(struct bpf_verifier_env *env) env->jmps_processed++; if (opcode == BPF_CALL) { if (BPF_SRC(insn->code) != BPF_K || - (insn->src_reg != BPF_PSEUDO_KFUNC_CALL - && insn->off != 0) || + insn->off != 0 || (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL && insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || @@ -11599,7 +11188,6 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; process_bpf_exit: - mark_verifier_state_scratched(env); update_branch_counts(env, env->cur_state); err = pop_stack(env, &prev_insn_idx, &env->insn_idx, pop_log); @@ -11765,7 +11353,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, err = -EINVAL; goto err_put; } - aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; + aux->btf_var.reg_type = PTR_TO_MEM; aux->btf_var.mem_size = tsize; } else { aux->btf_var.reg_type = PTR_TO_BTF_ID; @@ -11925,9 +11513,6 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } break; case BPF_MAP_TYPE_RINGBUF: - case BPF_MAP_TYPE_INODE_STORAGE: - case BPF_MAP_TYPE_SK_STORAGE: - case BPF_MAP_TYPE_TASK_STORAGE: break; default: verbose(env, @@ -12815,9 +12400,14 @@ static int jit_subprogs(struct bpf_verifier_env *env) return 0; for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { - if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn)) + if (bpf_pseudo_func(insn)) { + env->insn_aux_data[i].call_imm = insn->imm; + /* subprog is encoded in insn[1].imm */ continue; + } + if (!bpf_pseudo_call(insn)) + continue; /* Upon error here we cannot fall back to interpreter but * need a hard reject of the program. Thus -EFAULT is * propagated in any case. @@ -12838,12 +12428,6 @@ static int jit_subprogs(struct bpf_verifier_env *env) env->insn_aux_data[i].call_imm = insn->imm; /* point imm to __bpf_call_base+1 from JITs point of view */ insn->imm = 1; - if (bpf_pseudo_func(insn)) - /* jit (e.g. x86_64) may emit fewer instructions - * if it learns a u32 imm is the same as a u64 imm. - * Force a non zero here. - */ - insn[1].imm = 1; } err = bpf_prog_alloc_jited_linfo(prog); @@ -12898,7 +12482,6 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; - func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab; func[i]->aux->linfo = prog->aux->linfo; func[i]->aux->nr_linfo = prog->aux->nr_linfo; func[i]->aux->jited_linfo = prog->aux->jited_linfo; @@ -12928,7 +12511,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { if (bpf_pseudo_func(insn)) { - subprog = insn->off; + subprog = insn[1].imm; insn[0].imm = (u32)(long)func[subprog]->bpf_func; insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; continue; @@ -12936,7 +12519,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) if (!bpf_pseudo_call(insn)) continue; subprog = insn->off; - insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func); + insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - + __bpf_call_base; } /* we use the aux data to keep a list of the start addresses @@ -12979,8 +12563,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { if (bpf_pseudo_func(insn)) { insn[0].imm = env->insn_aux_data[i].call_imm; - insn[1].imm = insn->off; - insn->off = 0; + insn[1].imm = find_subprog(env, i + insn[0].imm + 1); continue; } if (!bpf_pseudo_call(insn)) @@ -13085,15 +12668,10 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, { const struct bpf_kfunc_desc *desc; - if (!insn->imm) { - verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); - return -EINVAL; - } - /* insn->imm has the btf func_id. Replace it with * an address (relative to __bpf_base_call). */ - desc = find_kfunc_desc(env->prog, insn->imm, insn->off); + desc = find_kfunc_desc(env->prog, insn->imm); if (!desc) { verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n", insn->imm); @@ -13111,7 +12689,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, static int do_misc_fixups(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; - enum bpf_attach_type eatype = prog->expected_attach_type; bool expect_blinding = bpf_jit_blinding_enabled(prog); enum bpf_prog_type prog_type = resolve_prog_type(prog); struct bpf_insn *insn = prog->insnsi; @@ -13375,8 +12952,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_map_push_elem || insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem || - insn->imm == BPF_FUNC_redirect_map || - insn->imm == BPF_FUNC_for_each_map_elem)) { + insn->imm == BPF_FUNC_redirect_map)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@ -13420,37 +12996,36 @@ static int do_misc_fixups(struct bpf_verifier_env *env) (int (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_redirect, (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL)); - BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, - (int (*)(struct bpf_map *map, - bpf_callback_t callback_fn, - void *callback_ctx, - u64 flags))NULL)); patch_map_ops_generic: switch (insn->imm) { case BPF_FUNC_map_lookup_elem: - insn->imm = BPF_CALL_IMM(ops->map_lookup_elem); + insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - + __bpf_call_base; continue; case BPF_FUNC_map_update_elem: - insn->imm = BPF_CALL_IMM(ops->map_update_elem); + insn->imm = BPF_CAST_CALL(ops->map_update_elem) - + __bpf_call_base; continue; case BPF_FUNC_map_delete_elem: - insn->imm = BPF_CALL_IMM(ops->map_delete_elem); + insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - + __bpf_call_base; continue; case BPF_FUNC_map_push_elem: - insn->imm = BPF_CALL_IMM(ops->map_push_elem); + insn->imm = BPF_CAST_CALL(ops->map_push_elem) - + __bpf_call_base; continue; case BPF_FUNC_map_pop_elem: - insn->imm = BPF_CALL_IMM(ops->map_pop_elem); + insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - + __bpf_call_base; continue; case BPF_FUNC_map_peek_elem: - insn->imm = BPF_CALL_IMM(ops->map_peek_elem); + insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - + __bpf_call_base; continue; case BPF_FUNC_redirect_map: - insn->imm = BPF_CALL_IMM(ops->map_redirect); - continue; - case BPF_FUNC_for_each_map_elem: - insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); + insn->imm = BPF_CAST_CALL(ops->map_redirect) - + __bpf_call_base; continue; } @@ -13482,79 +13057,11 @@ static int do_misc_fixups(struct bpf_verifier_env *env) continue; } - /* Implement bpf_get_func_arg inline. */ - if (prog_type == BPF_PROG_TYPE_TRACING && - insn->imm == BPF_FUNC_get_func_arg) { - /* Load nr_args from ctx - 8 */ - insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); - insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6); - insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3); - insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1); - insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0); - insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); - insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0); - insn_buf[7] = BPF_JMP_A(1); - insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); - cnt = 9; - - new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); - if (!new_prog) - return -ENOMEM; - - delta += cnt - 1; - env->prog = prog = new_prog; - insn = new_prog->insnsi + i + delta; - continue; - } - - /* Implement bpf_get_func_ret inline. */ - if (prog_type == BPF_PROG_TYPE_TRACING && - insn->imm == BPF_FUNC_get_func_ret) { - if (eatype == BPF_TRACE_FEXIT || - eatype == BPF_MODIFY_RETURN) { - /* Load nr_args from ctx - 8 */ - insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); - insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); - insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); - insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); - insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0); - insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0); - cnt = 6; - } else { - insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP); - cnt = 1; - } - - new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); - if (!new_prog) - return -ENOMEM; - - delta += cnt - 1; - env->prog = prog = new_prog; - insn = new_prog->insnsi + i + delta; - continue; - } - - /* Implement get_func_arg_cnt inline. */ - if (prog_type == BPF_PROG_TYPE_TRACING && - insn->imm == BPF_FUNC_get_func_arg_cnt) { - /* Load nr_args from ctx - 8 */ - insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); - - new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); - if (!new_prog) - return -ENOMEM; - - env->prog = prog = new_prog; - insn = new_prog->insnsi + i + delta; - continue; - } - /* Implement bpf_get_func_ip inline. */ if (prog_type == BPF_PROG_TYPE_TRACING && insn->imm == BPF_FUNC_get_func_ip) { - /* Load IP address from ctx - 16 */ - insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16); + /* Load IP address from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); if (!new_prog) @@ -13668,7 +13175,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) mark_reg_known_zero(env, regs, i); else if (regs[i].type == SCALAR_VALUE) mark_reg_unknown(env, regs, i); - else if (base_type(regs[i].type) == PTR_TO_MEM) { + else if (regs[i].type == PTR_TO_MEM_OR_NULL) { const u32 mem_size = regs[i].mem_size; mark_reg_known_zero(env, regs, i); @@ -13862,7 +13369,7 @@ BTF_SET_START(btf_non_sleepable_error_inject) /* Three functions below can be called from sleepable and non-sleepable context. * Assume non-sleepable from bpf safety point of view. */ -BTF_ID(func, __filemap_add_folio) +BTF_ID(func, __add_to_page_cache_locked) BTF_ID(func, should_fail_alloc_page) BTF_ID(func, should_failslab) BTF_SET_END(btf_non_sleepable_error_inject) @@ -14263,8 +13770,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) } } - mark_verifier_state_clean(env); - if (IS_ERR(btf_vmlinux)) { /* Either gcc or pahole or kernel are broken. */ verbose(env, "in-kernel BTF is malformed\n"); @@ -14371,7 +13876,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) env->verification_time = ktime_get_ns() - start_time; print_verification_stats(env); - env->prog->aux->verified_insns = env->insn_processed; if (log->level && bpf_verifier_log_full(log)) ret = -ENOSPC; diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index afc6c0e9c9..58900dc92a 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -63,6 +63,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) for_each_root(root) { struct cgroup *from_cgrp; + if (root == &cgrp_dfl_root) + continue; + spin_lock_irq(&css_set_lock); from_cgrp = task_cgroup_from_root(from, root); spin_unlock_irq(&css_set_lock); @@ -672,9 +675,11 @@ int proc_cgroupstats_show(struct seq_file *m, void *v) seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); /* - * Grab the subsystems state racily. No need to add avenue to - * cgroup_mutex contention. + * ideally we don't want subsystems moving around while we do this. + * cgroup_mutex is also necessary to guarantee an atomic snapshot of + * subsys/hierarchy state. */ + mutex_lock(&cgroup_mutex); for_each_subsys(ss, i) seq_printf(m, "%s\t%d\t%d\t%d\n", @@ -682,6 +687,7 @@ int proc_cgroupstats_show(struct seq_file *m, void *v) atomic_read(&ss->root->nr_cgrps), cgroup_ssid_enabled(i)); + mutex_unlock(&cgroup_mutex); return 0; } @@ -708,6 +714,8 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) kernfs_type(kn) != KERNFS_DIR) return -EINVAL; + mutex_lock(&cgroup_mutex); + /* * We aren't being called from kernfs and there's no guarantee on * @kn->priv's validity. For this and css_tryget_online_from_dir(), @@ -715,8 +723,9 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) */ rcu_read_lock(); cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); - if (!cgrp || !cgroup_tryget(cgrp)) { + if (!cgrp || cgroup_is_dead(cgrp)) { rcu_read_unlock(); + mutex_unlock(&cgroup_mutex); return -ENOENT; } rcu_read_unlock(); @@ -744,7 +753,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) } css_task_iter_end(&it); - cgroup_put(cgrp); + mutex_unlock(&cgroup_mutex); return 0; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index cd3890829d..4cb09fcdc1 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -30,7 +30,6 @@ #include "cgroup-internal.h" -#include #include #include #include @@ -2651,11 +2650,11 @@ void cgroup_migrate_add_src(struct css_set *src_cset, if (src_cset->dead) return; + src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); + if (!list_empty(&src_cset->mg_preload_node)) return; - src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); - WARN_ON(src_cset->mg_src_cgrp); WARN_ON(src_cset->mg_dst_cgrp); WARN_ON(!list_empty(&src_cset->mg_tasks)); @@ -5748,7 +5747,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) /* Create the root cgroup state for this subsystem */ ss->root = &cgrp_dfl_root; - css = ss->css_alloc(NULL); + css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss)); /* We don't handle early failures gracefully */ BUG_ON(IS_ERR(css)); init_and_link_css(css, ss, &cgrp_dfl_root.cgrp); @@ -5980,20 +5979,17 @@ struct cgroup *cgroup_get_from_id(u64 id) struct kernfs_node *kn; struct cgroup *cgrp = NULL; + mutex_lock(&cgroup_mutex); kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id); if (!kn) - goto out; + goto out_unlock; - rcu_read_lock(); - - cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); - if (cgrp && !cgroup_tryget(cgrp)) + cgrp = kn->priv; + if (cgroup_is_dead(cgrp) || !cgroup_tryget(cgrp)) cgrp = NULL; - - rcu_read_unlock(); - kernfs_put(kn); -out: +out_unlock: + mutex_unlock(&cgroup_mutex); return cgrp; } EXPORT_SYMBOL_GPL(cgroup_get_from_id); @@ -6175,20 +6171,6 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) if (ret) goto err; - /* - * Spawning a task directly into a cgroup works by passing a file - * descriptor to the target cgroup directory. This can even be an O_PATH - * file descriptor. But it can never be a cgroup.procs file descriptor. - * This was done on purpose so spawning into a cgroup could be - * conceptualized as an atomic - * - * fd = openat(dfd_cgroup, "cgroup.procs", ...); - * write(fd, , ...); - * - * sequence, i.e. it's a shorthand for the caller opening and writing - * cgroup.procs of the cgroup indicated by @dfd_cgroup. This allows us - * to always use the caller's credentials. - */ ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, !(kargs->flags & CLONE_THREAD), current->nsproxy->cgroup_ns); @@ -6590,34 +6572,30 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) * * Find the cgroup at @path on the default hierarchy, increment its * reference count and return it. Returns pointer to the found cgroup on - * success, ERR_PTR(-ENOENT) if @path doesn't exist or if the cgroup has already - * been released and ERR_PTR(-ENOTDIR) if @path points to a non-directory. + * success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR) + * if @path points to a non-directory. */ struct cgroup *cgroup_get_from_path(const char *path) { struct kernfs_node *kn; - struct cgroup *cgrp = ERR_PTR(-ENOENT); + struct cgroup *cgrp; + + mutex_lock(&cgroup_mutex); kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path); - if (!kn) - goto out; - - if (kernfs_type(kn) != KERNFS_DIR) { - cgrp = ERR_PTR(-ENOTDIR); - goto out_kernfs; + if (kn) { + if (kernfs_type(kn) == KERNFS_DIR) { + cgrp = kn->priv; + cgroup_get_live(cgrp); + } else { + cgrp = ERR_PTR(-ENOTDIR); + } + kernfs_put(kn); + } else { + cgrp = ERR_PTR(-ENOENT); } - rcu_read_lock(); - - cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); - if (!cgrp || !cgroup_tryget(cgrp)) - cgrp = ERR_PTR(-ENOENT); - - rcu_read_unlock(); - -out_kernfs: - kernfs_put(kn); -out: + mutex_unlock(&cgroup_mutex); return cgrp; } EXPORT_SYMBOL_GPL(cgroup_get_from_path); @@ -6745,6 +6723,44 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) #endif /* CONFIG_SOCK_CGROUP_DATA */ +#ifdef CONFIG_CGROUP_BPF +int cgroup_bpf_attach(struct cgroup *cgrp, + struct bpf_prog *prog, struct bpf_prog *replace_prog, + struct bpf_cgroup_link *link, + enum bpf_attach_type type, + u32 flags) +{ + int ret; + + mutex_lock(&cgroup_mutex); + ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags); + mutex_unlock(&cgroup_mutex); + return ret; +} + +int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type) +{ + int ret; + + mutex_lock(&cgroup_mutex); + ret = __cgroup_bpf_detach(cgrp, prog, NULL, type); + mutex_unlock(&cgroup_mutex); + return ret; +} + +int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + int ret; + + mutex_lock(&cgroup_mutex); + ret = __cgroup_bpf_query(cgrp, attr, uattr); + mutex_unlock(&cgroup_mutex); + return ret; +} +#endif /* CONFIG_CGROUP_BPF */ + #ifdef CONFIG_SYSFS static ssize_t show_delegatable_files(struct cftype *files, char *buf, ssize_t size, const char *prefix) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 5de1844801..f6794602ab 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -69,13 +69,6 @@ DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); -/* - * There could be abnormal cpuset configurations for cpu or memory - * node binding, add this key to provide a quick low-cost judgement - * of the situation. - */ -DEFINE_STATIC_KEY_FALSE(cpusets_insane_config_key); - /* See "Frequency meter" comments, below. */ struct fmeter { @@ -379,17 +372,6 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); -static inline void check_insane_mems_config(nodemask_t *nodes) -{ - if (!cpusets_insane_config() && - movable_only_nodes(nodes)) { - static_branch_enable(&cpusets_insane_config_key); - pr_info("Unsupported (movable nodes only) cpuset configuration detected (nmask=%*pbl)!\n" - "Cpuset allocations might fail even with a lot of memory available.\n", - nodemask_pr_args(nodes)); - } -} - /* * Cgroup v2 behavior is used on the "cpus" and "mems" control files when * on default hierarchy or when the cpuset_v2_mode flag is set by mounting @@ -590,35 +572,6 @@ static inline void free_cpuset(struct cpuset *cs) kfree(cs); } -/* - * validate_change_legacy() - Validate conditions specific to legacy (v1) - * behavior. - */ -static int validate_change_legacy(struct cpuset *cur, struct cpuset *trial) -{ - struct cgroup_subsys_state *css; - struct cpuset *c, *par; - int ret; - - WARN_ON_ONCE(!rcu_read_lock_held()); - - /* Each of our child cpusets must be a subset of us */ - ret = -EBUSY; - cpuset_for_each_child(c, css, cur) - if (!is_cpuset_subset(c, trial)) - goto out; - - /* On legacy hierarchy, we must be a subset of our parent cpuset. */ - ret = -EACCES; - par = parent_cs(cur); - if (par && !is_cpuset_subset(trial, par)) - goto out; - - ret = 0; -out: - return ret; -} - /* * validate_change() - Used to validate that any proposed cpuset change * follows the structural rules for cpusets. @@ -643,21 +596,28 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) { struct cgroup_subsys_state *css; struct cpuset *c, *par; - int ret = 0; + int ret; rcu_read_lock(); - if (!is_in_v2_mode()) - ret = validate_change_legacy(cur, trial); - if (ret) - goto out; + /* Each of our child cpusets must be a subset of us */ + ret = -EBUSY; + cpuset_for_each_child(c, css, cur) + if (!is_cpuset_subset(c, trial)) + goto out; /* Remaining checks don't apply to root cpuset */ + ret = 0; if (cur == &top_cpuset) goto out; par = parent_cs(cur); + /* On legacy hierarchy, we must be a subset of our parent cpuset. */ + ret = -EACCES; + if (!is_in_v2_mode() && !is_cpuset_subset(trial, par)) + goto out; + /* * If either I or some sibling (!= me) is exclusive, we can't * overlap @@ -1205,7 +1165,9 @@ enum subparts_cmd { * * Because of the implicit cpu exclusive nature of a partition root, * cpumask changes that violates the cpu exclusivity rule will not be - * permitted when checked by validate_change(). + * permitted when checked by validate_change(). The validate_change() + * function will also prevent any changes to the cpu list if it is not + * a superset of children's cpu lists. */ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, struct cpumask *newmask, @@ -1917,8 +1879,6 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, if (retval < 0) goto done; - check_insane_mems_config(&trialcs->mems_allowed); - spin_lock_irq(&callback_lock); cs->mems_allowed = trialcs->mems_allowed; spin_unlock_irq(&callback_lock); @@ -3224,9 +3184,6 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); mems_updated = !nodes_equal(new_mems, cs->effective_mems); - if (mems_updated) - check_insane_mems_config(&new_mems); - if (is_in_v2_mode()) hotplug_update_tasks(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); @@ -3524,8 +3481,8 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) return cs; } -/* - * __cpuset_node_allowed - Can we allocate on a memory node? +/** + * cpuset_node_allowed - Can we allocate on a memory node? * @node: is this an allowed node? * @gfp_mask: memory allocation flags * @@ -3567,7 +3524,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) bool __cpuset_node_allowed(int node, gfp_t gfp_mask) { struct cpuset *cs; /* current cpuset ancestors */ - bool allowed; /* is allocation in zone z allowed? */ + int allowed; /* is allocation in zone z allowed? */ unsigned long flags; if (in_interrupt()) @@ -3696,8 +3653,8 @@ void cpuset_print_current_mems_allowed(void) int cpuset_memory_pressure_enabled __read_mostly; -/* - * __cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. +/** + * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. * * Keep a running average of the rate of synchronous (direct) * page reclaim efforts initiated by tasks in each cpuset. @@ -3712,7 +3669,7 @@ int cpuset_memory_pressure_enabled __read_mostly; * "memory_pressure". Value displayed is an integer * representing the recent rate of entry into the synchronous * (direct) page reclaim by any task attached to the cpuset. - */ + **/ void __cpuset_memory_pressure_bump(void) { diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c index fe3e8a0eb7..ec02d963ca 100644 --- a/kernel/cgroup/misc.c +++ b/kernel/cgroup/misc.c @@ -157,6 +157,13 @@ int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, new_usage = atomic_long_add_return(amount, &res->usage); if (new_usage > READ_ONCE(res->max) || new_usage > READ_ONCE(misc_res_capacity[type])) { + if (!res->failed) { + pr_info("cgroup: charge rejected by the misc controller for %s resource in ", + misc_res_name[type]); + pr_cont_cgroup_path(i->css.cgroup); + pr_cont("\n"); + res->failed = true; + } ret = -EBUSY; goto err_charge; } @@ -164,11 +171,6 @@ int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, return 0; err_charge: - for (j = i; j; j = parent_misc(j)) { - atomic_long_inc(&j->res[type].events); - cgroup_file_notify(&j->events_file); - } - for (j = cg; j != i; j = parent_misc(j)) misc_cg_cancel_charge(type, j, amount); misc_cg_cancel_charge(type, i, amount); @@ -333,19 +335,6 @@ static int misc_cg_capacity_show(struct seq_file *sf, void *v) return 0; } -static int misc_events_show(struct seq_file *sf, void *v) -{ - struct misc_cg *cg = css_misc(seq_css(sf)); - unsigned long events, i; - - for (i = 0; i < MISC_CG_RES_TYPES; i++) { - events = atomic_long_read(&cg->res[i].events); - if (READ_ONCE(misc_res_capacity[i]) || events) - seq_printf(sf, "%s.max %lu\n", misc_res_name[i], events); - } - return 0; -} - /* Misc cgroup interface files */ static struct cftype misc_cg_files[] = { { @@ -364,12 +353,6 @@ static struct cftype misc_cg_files[] = { .seq_show = misc_cg_capacity_show, .flags = CFTYPE_ONLY_ON_ROOT, }, - { - .name = "events", - .flags = CFTYPE_NOT_ON_ROOT, - .file_offset = offsetof(struct misc_cg, events_file), - .seq_show = misc_events_show, - }, {} }; diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 9d331ba448..1486768f23 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -35,7 +35,7 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) * instead of NULL, we can tell whether @cgrp is on the list by * testing the next pointer for NULL. */ - if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next)) + if (cgroup_rstat_cpu(cgrp, cpu)->updated_next) return; raw_spin_lock_irqsave(cpu_lock, flags); @@ -88,7 +88,6 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, struct cgroup *root, int cpu) { struct cgroup_rstat_cpu *rstatc; - struct cgroup *parent; if (pos == root) return NULL; @@ -97,14 +96,10 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, * We're gonna walk down to the first leaf and visit/remove it. We * can pick whatever unvisited node as the starting point. */ - if (!pos) { + if (!pos) pos = root; - /* return NULL if this subtree is not on-list */ - if (!cgroup_rstat_cpu(pos, cpu)->updated_next) - return NULL; - } else { + else pos = cgroup_parent(pos); - } /* walk down to the first leaf */ while (true) { @@ -120,25 +115,33 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, * However, due to the way we traverse, @pos will be the first * child in most cases. The only exception is @root. */ - parent = cgroup_parent(pos); - if (parent) { - struct cgroup_rstat_cpu *prstatc; - struct cgroup **nextp; + if (rstatc->updated_next) { + struct cgroup *parent = cgroup_parent(pos); - prstatc = cgroup_rstat_cpu(parent, cpu); - nextp = &prstatc->updated_children; - while (*nextp != pos) { - struct cgroup_rstat_cpu *nrstatc; + if (parent) { + struct cgroup_rstat_cpu *prstatc; + struct cgroup **nextp; - nrstatc = cgroup_rstat_cpu(*nextp, cpu); - WARN_ON_ONCE(*nextp == parent); - nextp = &nrstatc->updated_next; + prstatc = cgroup_rstat_cpu(parent, cpu); + nextp = &prstatc->updated_children; + while (true) { + struct cgroup_rstat_cpu *nrstatc; + + nrstatc = cgroup_rstat_cpu(*nextp, cpu); + if (*nextp == pos) + break; + WARN_ON_ONCE(*nextp == parent); + nextp = &nrstatc->updated_next; + } + *nextp = rstatc->updated_next; } - *nextp = rstatc->updated_next; + + rstatc->updated_next = NULL; + return pos; } - rstatc->updated_next = NULL; - return pos; + /* only happens for @root */ + return NULL; } /* see cgroup_rstat_flush() */ diff --git a/kernel/delayacct.c b/kernel/delayacct.c index c5e8cea9e0..51530d5b15 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -100,10 +100,19 @@ void __delayacct_blkio_start(void) */ void __delayacct_blkio_end(struct task_struct *p) { - delayacct_end(&p->delays->lock, - &p->delays->blkio_start, - &p->delays->blkio_delay, - &p->delays->blkio_count); + struct task_delay_info *delays = p->delays; + u64 *total; + u32 *count; + + if (p->delays->flags & DELAYACCT_PF_SWAPIN) { + total = &delays->swapin_delay; + count = &delays->swapin_count; + } else { + total = &delays->blkio_delay; + count = &delays->blkio_count; + } + + delayacct_end(&delays->lock, &delays->blkio_start, total, count); } int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) @@ -155,13 +164,10 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay; d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; - tmp = d->compact_delay_total + tsk->delays->compact_delay; - d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; d->swapin_count += tsk->delays->swapin_count; d->freepages_count += tsk->delays->freepages_count; d->thrashing_count += tsk->delays->thrashing_count; - d->compact_count += tsk->delays->compact_count; raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return 0; @@ -173,7 +179,8 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk) unsigned long flags; raw_spin_lock_irqsave(&tsk->delays->lock, flags); - ret = nsec_to_clock_t(tsk->delays->blkio_delay); + ret = nsec_to_clock_t(tsk->delays->blkio_delay + + tsk->delays->swapin_delay); raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return ret; } @@ -203,29 +210,3 @@ void __delayacct_thrashing_end(void) ¤t->delays->thrashing_delay, ¤t->delays->thrashing_count); } - -void __delayacct_swapin_start(void) -{ - current->delays->swapin_start = local_clock(); -} - -void __delayacct_swapin_end(void) -{ - delayacct_end(¤t->delays->lock, - ¤t->delays->swapin_start, - ¤t->delays->swapin_delay, - ¤t->delays->swapin_count); -} - -void __delayacct_compact_start(void) -{ - current->delays->compact_start = local_clock(); -} - -void __delayacct_compact_end(void) -{ - delayacct_end(¤t->delays->lock, - ¤t->delays->compact_start, - ¤t->delays->compact_delay, - ¤t->delays->compact_count); -} diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 375fb3c953..25fc85a7ae 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -40,6 +40,7 @@ static struct dma_coherent_mem *dma_init_coherent_memory(phys_addr_t phys_addr, { struct dma_coherent_mem *dma_mem; int pages = size >> PAGE_SHIFT; + int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); void *mem_base; if (!size) @@ -52,7 +53,7 @@ static struct dma_coherent_mem *dma_init_coherent_memory(phys_addr_t phys_addr, dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); if (!dma_mem) goto out_unmap_membase; - dma_mem->bitmap = bitmap_zalloc(pages, GFP_KERNEL); + dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); if (!dma_mem->bitmap) goto out_free_dma_mem; @@ -80,7 +81,7 @@ static void dma_release_coherent_memory(struct dma_coherent_mem *mem) return; memunmap(mem->virt_base); - bitmap_free(mem->bitmap); + kfree(mem->bitmap); kfree(mem); } diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 50f48e9e45..4c6c5e0635 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -75,45 +75,15 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } -static int dma_set_decrypted(struct device *dev, void *vaddr, size_t size) -{ - if (!force_dma_unencrypted(dev)) - return 0; - return set_memory_decrypted((unsigned long)vaddr, 1 << get_order(size)); -} - -static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size) -{ - int ret; - - if (!force_dma_unencrypted(dev)) - return 0; - ret = set_memory_encrypted((unsigned long)vaddr, 1 << get_order(size)); - if (ret) - pr_warn_ratelimited("leaking DMA memory that can't be re-encrypted\n"); - return ret; -} - static void __dma_direct_free_pages(struct device *dev, struct page *page, size_t size) { - if (swiotlb_free(dev, page, size)) + if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) && + swiotlb_free(dev, page, size)) return; dma_free_contiguous(dev, page, size); } -static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size) -{ - struct page *page = swiotlb_alloc(dev, size); - - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - swiotlb_free(dev, page, size); - return NULL; - } - - return page; -} - static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp_t gfp) { @@ -123,11 +93,18 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, WARN_ON_ONCE(!PAGE_ALIGNED(size)); - if (is_swiotlb_for_alloc(dev)) - return dma_direct_alloc_swiotlb(dev, size); - gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_limit); + if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) && + is_swiotlb_for_alloc(dev)) { + page = swiotlb_alloc(dev, size); + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + __dma_direct_free_pages(dev, page, size); + return NULL; + } + return page; + } + page = dma_alloc_contiguous(dev, size, gfp); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { dma_free_contiguous(dev, page, size); @@ -156,15 +133,6 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, return page; } -/* - * Check if a potentially blocking operations needs to dip into the atomic - * pools for the given device/gfp. - */ -static bool dma_direct_use_pool(struct device *dev, gfp_t gfp) -{ - return !gfpflags_allow_blocking(gfp) && !is_swiotlb_for_alloc(dev); -} - static void *dma_direct_alloc_from_pool(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { @@ -172,9 +140,6 @@ static void *dma_direct_alloc_from_pool(struct device *dev, size_t size, u64 phys_mask; void *ret; - if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))) - return NULL; - gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_mask); page = dma_alloc_from_pool(dev, size, &ret, gfp, dma_coherent_ok); @@ -184,103 +149,64 @@ static void *dma_direct_alloc_from_pool(struct device *dev, size_t size, return ret; } -static void *dma_direct_alloc_no_mapping(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - struct page *page; - - page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); - if (!page) - return NULL; - - /* remove any dirty cache lines on the kernel alias */ - if (!PageHighMem(page)) - arch_dma_prep_coherent(page, size); - - /* return the page pointer as the opaque cookie */ - *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); - return page; -} - void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { - bool remap = false, set_uncached = false; struct page *page; void *ret; + int err; size = PAGE_ALIGN(size); if (attrs & DMA_ATTR_NO_WARN) gfp |= __GFP_NOWARN; if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) - return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp); - - if (!dev_is_dma_coherent(dev)) { - /* - * Fallback to the arch handler if it exists. This should - * eventually go away. - */ - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - !IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && - !is_swiotlb_for_alloc(dev)) - return arch_dma_alloc(dev, size, dma_handle, gfp, - attrs); - - /* - * If there is a global pool, always allocate from it for - * non-coherent devices. - */ - if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL)) - return dma_alloc_from_global_coherent(dev, size, - dma_handle); - - /* - * Otherwise remap if the architecture is asking for it. But - * given that remapping memory is a blocking operation we'll - * instead have to dip into the atomic pools. - */ - remap = IS_ENABLED(CONFIG_DMA_DIRECT_REMAP); - if (remap) { - if (dma_direct_use_pool(dev, gfp)) - return dma_direct_alloc_from_pool(dev, size, - dma_handle, gfp); - } else { - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED)) - return NULL; - set_uncached = true; - } + !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) { + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); + if (!page) + return NULL; + /* remove any dirty cache lines on the kernel alias */ + if (!PageHighMem(page)) + arch_dma_prep_coherent(page, size); + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + /* return the page pointer as the opaque cookie */ + return page; } + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + !IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && + !dev_is_dma_coherent(dev) && + !is_swiotlb_for_alloc(dev)) + return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); + + if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && + !dev_is_dma_coherent(dev)) + return dma_alloc_from_global_coherent(dev, size, dma_handle); + /* - * Decrypting memory may block, so allocate the memory from the atomic - * pools if we can't block. + * Remapping or decrypting memory may block. If either is required and + * we can't block, allocate the memory from the atomic pools. + * If restricted DMA (i.e., is_swiotlb_for_alloc) is required, one must + * set up another device coherent pool by shared-dma-pool and use + * dma_alloc_from_dev_coherent instead. */ - if (force_dma_unencrypted(dev) && dma_direct_use_pool(dev, gfp)) + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + !gfpflags_allow_blocking(gfp) && + (force_dma_unencrypted(dev) || + (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + !dev_is_dma_coherent(dev))) && + !is_swiotlb_for_alloc(dev)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); /* we always manually zero the memory once we are done */ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); if (!page) return NULL; - if (PageHighMem(page)) { - /* - * Depending on the cma= arguments and per-arch setup, - * dma_alloc_contiguous could return highmem pages. - * Without remapping there is no way to return them here, so - * log an error and fail. - */ - if (!IS_ENABLED(CONFIG_DMA_REMAP)) { - dev_info(dev, "Rejecting highmem page from CMA.\n"); - goto out_free_pages; - } - remap = true; - set_uncached = false; - } - if (remap) { + if ((IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + !dev_is_dma_coherent(dev)) || + (IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) { /* remove any dirty cache lines on the kernel alias */ arch_dma_prep_coherent(page, size); @@ -290,27 +216,56 @@ void *dma_direct_alloc(struct device *dev, size_t size, __builtin_return_address(0)); if (!ret) goto out_free_pages; - } else { - ret = page_address(page); - if (dma_set_decrypted(dev, ret, size)) + if (force_dma_unencrypted(dev)) { + err = set_memory_decrypted((unsigned long)ret, + 1 << get_order(size)); + if (err) + goto out_free_pages; + } + memset(ret, 0, size); + goto done; + } + + if (PageHighMem(page)) { + /* + * Depending on the cma= arguments and per-arch setup + * dma_alloc_contiguous could return highmem pages. + * Without remapping there is no way to return them here, + * so log an error and fail. + */ + dev_info(dev, "Rejecting highmem page from CMA.\n"); + goto out_free_pages; + } + + ret = page_address(page); + if (force_dma_unencrypted(dev)) { + err = set_memory_decrypted((unsigned long)ret, + 1 << get_order(size)); + if (err) goto out_free_pages; } memset(ret, 0, size); - if (set_uncached) { + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !dev_is_dma_coherent(dev)) { arch_dma_prep_coherent(page, size); ret = arch_dma_set_uncached(ret, size); if (IS_ERR(ret)) goto out_encrypt_pages; } - +done: *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return ret; out_encrypt_pages: - if (dma_set_encrypted(dev, page_address(page), size)) - return NULL; + if (force_dma_unencrypted(dev)) { + err = set_memory_encrypted((unsigned long)page_address(page), + 1 << get_order(size)); + /* If memory cannot be re-encrypted, it must be leaked */ + if (err) + return NULL; + } out_free_pages: __dma_direct_free_pages(dev, page, size); return NULL; @@ -349,14 +304,13 @@ void dma_direct_free(struct device *dev, size_t size, dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) return; - if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { + if (force_dma_unencrypted(dev)) + set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); + + if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) vunmap(cpu_addr); - } else { - if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) - arch_dma_clear_uncached(cpu_addr, size); - if (dma_set_encrypted(dev, cpu_addr, 1 << page_order)) - return; - } + else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) + arch_dma_clear_uncached(cpu_addr, size); __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size); } @@ -367,7 +321,9 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, struct page *page; void *ret; - if (force_dma_unencrypted(dev) && dma_direct_use_pool(dev, gfp)) + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp) && + !is_swiotlb_for_alloc(dev)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); page = __dma_direct_alloc_pages(dev, size, gfp); @@ -385,8 +341,11 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, } ret = page_address(page); - if (dma_set_decrypted(dev, ret, size)) - goto out_free_pages; + if (force_dma_unencrypted(dev)) { + if (set_memory_decrypted((unsigned long)ret, + 1 << get_order(size))) + goto out_free_pages; + } memset(ret, 0, size); *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; @@ -407,8 +366,9 @@ void dma_direct_free_pages(struct device *dev, size_t size, dma_free_from_pool(dev, vaddr, size)) return; - if (dma_set_encrypted(dev, vaddr, 1 << page_order)) - return; + if (force_dma_unencrypted(dev)) + set_memory_encrypted((unsigned long)vaddr, 1 << page_order); + __dma_direct_free_pages(dev, page, size); } diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 9478eccd1c..8349a9f2c3 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -296,6 +296,10 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; + /* Don't allow RAM to be mapped */ + if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) + return DMA_MAPPING_ERROR; + if (dma_map_direct(dev, ops)) addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); else if (ops->map_resource) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index bfc56cb217..87c40517e8 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_DEBUG_FS #include @@ -50,7 +50,6 @@ #include #include -#include #include #include #include @@ -73,8 +72,6 @@ enum swiotlb_force swiotlb_force; struct io_tlb_mem io_tlb_default_mem; -phys_addr_t swiotlb_unencrypted_base; - /* * Max segment that we can provide which (if pages are contingous) will * not be bounced (unless SWIOTLB_FORCE is set). @@ -158,34 +155,6 @@ static inline unsigned long nr_slots(u64 val) return DIV_ROUND_UP(val, IO_TLB_SIZE); } -/* - * Remap swioltb memory in the unencrypted physical address space - * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP - * Isolation VMs). - */ -#ifdef CONFIG_HAS_IOMEM -static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes) -{ - void *vaddr = NULL; - - if (swiotlb_unencrypted_base) { - phys_addr_t paddr = mem->start + swiotlb_unencrypted_base; - - vaddr = memremap(paddr, bytes, MEMREMAP_WB); - if (!vaddr) - pr_err("Failed to map the unencrypted memory %pa size %lx.\n", - &paddr, bytes); - } - - return vaddr; -} -#else -static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes) -{ - return NULL; -} -#endif - /* * Early SWIOTLB allocation may be too early to allow an architecture to * perform the desired operations. This function allows the architecture to @@ -203,12 +172,7 @@ void __init swiotlb_update_mem_attributes(void) vaddr = phys_to_virt(mem->start); bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - - mem->vaddr = swiotlb_mem_remap(mem, bytes); - if (!mem->vaddr) - mem->vaddr = vaddr; - - memset(mem->vaddr, 0, bytes); + memset(vaddr, 0, bytes); } static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, @@ -232,17 +196,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; } - - /* - * If swiotlb_unencrypted_base is set, the bounce buffer memory will - * be remapped and cleared in swiotlb_update_mem_attributes. - */ - if (swiotlb_unencrypted_base) - return; - memset(vaddr, 0, bytes); - mem->vaddr = vaddr; - return; } int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) @@ -293,7 +247,7 @@ swiotlb_init(int verbose) return; fail_free_mem: - memblock_free(tlb, bytes); + memblock_free_early(__pa(tlb), bytes); fail: pr_warn("Cannot allocate buffer"); } @@ -417,7 +371,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; + unsigned char *vaddr = phys_to_virt(tlb_addr); unsigned int tlb_offset, orig_addr_offset; if (orig_addr == INVALID_PHYS_ADDR) @@ -505,7 +459,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index) * allocate a buffer from that IO TLB pool. */ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, - size_t alloc_size, unsigned int alloc_align_mask) + size_t alloc_size) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned long boundary_mask = dma_get_seg_boundary(dev); @@ -529,7 +483,6 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; if (alloc_size >= PAGE_SIZE) stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); - stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1); spin_lock_irqsave(&mem->lock, flags); if (unlikely(nslots > mem->nslabs - mem->used)) @@ -588,8 +541,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, size_t mapping_size, size_t alloc_size, - unsigned int alloc_align_mask, enum dma_data_direction dir, - unsigned long attrs) + enum dma_data_direction dir, unsigned long attrs) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned int offset = swiotlb_align_offset(dev, orig_addr); @@ -600,7 +552,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, if (!mem) panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) + if (mem_encrypt_active()) pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); if (mapping_size > alloc_size) { @@ -609,8 +561,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, return (phys_addr_t)DMA_MAPPING_ERROR; } - index = swiotlb_find_slots(dev, orig_addr, - alloc_size + offset, alloc_align_mask); + index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset); if (index == -1) { if (!(attrs & DMA_ATTR_NO_WARN)) dev_warn_ratelimited(dev, @@ -628,8 +579,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, mem->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(mem->start, index) + offset; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE || - dir == DMA_BIDIRECTIONAL)) + (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); return tlb_addr; } @@ -725,7 +675,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, swiotlb_force); - swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir, + swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir, attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; @@ -809,7 +759,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) if (!mem) return NULL; - index = swiotlb_find_slots(dev, 0, size, 0); + index = swiotlb_find_slots(dev, 0, size); if (index == -1) return NULL; diff --git a/kernel/entry/common.c b/kernel/entry/common.c index bad713684c..d5a61d565a 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -187,7 +187,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, /* Check if any of the above work has queued a deferred wakeup */ tick_nohz_user_enter_prepare(); - ti_work = read_thread_flags(); + ti_work = READ_ONCE(current_thread_info()->flags); } /* Return the latest work state for arch_exit_to_user_mode() */ @@ -196,7 +196,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, static void exit_to_user_mode_prepare(struct pt_regs *regs) { - unsigned long ti_work = read_thread_flags(); + unsigned long ti_work = READ_ONCE(current_thread_info()->flags); lockdep_assert_irqs_disabled(); diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 96d476e06c..49972ee99a 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -26,7 +26,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) if (ret) return ret; - ti_work = read_thread_flags(); + ti_work = READ_ONCE(current_thread_info()->flags); } while (ti_work & XFER_TO_GUEST_MODE_WORK || need_resched()); return 0; } @@ -43,7 +43,7 @@ int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) * disabled in the inner loop before going into guest mode. No need * to disable interrupts here. */ - ti_work = read_thread_flags(); + ti_work = READ_ONCE(current_thread_info()->flags); if (!(ti_work & XFER_TO_GUEST_MODE_WORK)) return 0; diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 8591c180b5..3c022e33c1 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile @@ -1,5 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE) +endif + obj-y := core.o ring_buffer.o callchain.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_UPROBES) += uprobes.o + diff --git a/kernel/events/core.c b/kernel/events/core.c index 6859229497..b81652fc2c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1875,8 +1875,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) list_add_rcu(&event->event_entry, &ctx->event_list); ctx->nr_events++; - if (event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) - ctx->nr_user++; if (event->attr.inherit_stat) ctx->nr_stat++; @@ -2068,8 +2066,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) event->attach_state &= ~PERF_ATTACH_CONTEXT; ctx->nr_events--; - if (event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) - ctx->nr_user--; if (event->attr.inherit_stat) ctx->nr_stat--; @@ -6602,43 +6598,33 @@ static void perf_pending_event(struct irq_work *entry) perf_swevent_put_recursion_context(rctx); } -#ifdef CONFIG_GUEST_PERF_EVENTS +/* + * We assume there is only KVM supporting the callbacks. + * Later on, we might change it to a list if there is + * another virtualization implementation supporting the callbacks. + */ struct perf_guest_info_callbacks __rcu *perf_guest_cbs; -DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state); -DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip); -DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr); - -void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) +int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) { if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs))) - return; + return -EBUSY; rcu_assign_pointer(perf_guest_cbs, cbs); - static_call_update(__perf_guest_state, cbs->state); - static_call_update(__perf_guest_get_ip, cbs->get_ip); - - /* Implementing ->handle_intel_pt_intr is optional. */ - if (cbs->handle_intel_pt_intr) - static_call_update(__perf_guest_handle_intel_pt_intr, - cbs->handle_intel_pt_intr); + return 0; } EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); -void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) +int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) { if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs) != cbs)) - return; + return -EINVAL; rcu_assign_pointer(perf_guest_cbs, NULL); - static_call_update(__perf_guest_state, (void *)&__static_call_return0); - static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0); - static_call_update(__perf_guest_handle_intel_pt_intr, - (void *)&__static_call_return0); synchronize_rcu(); + return 0; } EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); -#endif static void perf_output_sample_regs(struct perf_output_handle *handle, @@ -9197,36 +9183,6 @@ static void perf_log_itrace_start(struct perf_event *event) perf_output_end(&handle); } -void perf_report_aux_output_id(struct perf_event *event, u64 hw_id) -{ - struct perf_output_handle handle; - struct perf_sample_data sample; - struct perf_aux_event { - struct perf_event_header header; - u64 hw_id; - } rec; - int ret; - - if (event->parent) - event = event->parent; - - rec.header.type = PERF_RECORD_AUX_OUTPUT_HW_ID; - rec.header.misc = 0; - rec.header.size = sizeof(rec); - rec.hw_id = hw_id; - - perf_event_header__init_id(&rec.header, &sample, event); - ret = perf_output_begin(&handle, &sample, event, rec.header.size); - - if (ret) - return; - - perf_output_put(&handle, rec); - perf_event__output_id_sample(event, &handle, &sample); - - perf_output_end(&handle); -} - static int __perf_event_account_interrupt(struct perf_event *event, int throttle) { @@ -13592,5 +13548,3 @@ struct cgroup_subsys perf_event_cgrp_subsys = { .threaded = true, }; #endif /* CONFIG_CGROUP_PERF */ - -DEFINE_STATIC_CALL_RET0(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 082832738c..228801e207 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -205,7 +205,12 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) static inline int get_recursion_context(int *recursion) { - unsigned char rctx = interrupt_context_level(); + unsigned int pc = preempt_count(); + unsigned char rctx = 0; + + rctx += !!(pc & (NMI_MASK)); + rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK)); + rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); if (recursion[rctx]) return -1; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 6357c3580d..af24dc3feb 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -167,8 +167,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, addr + PAGE_SIZE); if (new_page) { - err = mem_cgroup_charge(page_folio(new_page), vma->vm_mm, - GFP_KERNEL); + err = mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL); if (err) return err; } diff --git a/kernel/exit.c b/kernel/exit.c index b00a25bb4a..91a43e57a3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -48,6 +48,7 @@ #include #include /* for audit_free() */ #include +#include #include #include #include @@ -63,7 +64,6 @@ #include #include #include -#include #include #include @@ -116,7 +116,7 @@ static void __exit_signal(struct task_struct *tsk) * then notify it: */ if (sig->notify_count > 0 && !--sig->notify_count) - wake_up_process(sig->group_exec_task); + wake_up_process(sig->group_exit_task); if (tsk == sig->curr_target) sig->curr_target = next_thread(tsk); @@ -168,7 +168,6 @@ static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); - kprobe_flush_task(tsk); perf_event_delayed_put(tsk); trace_sched_process_free(tsk); put_task_struct(tsk); @@ -340,46 +339,6 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) } } -static void coredump_task_exit(struct task_struct *tsk) -{ - struct core_state *core_state; - - /* - * Serialize with any possible pending coredump. - * We must hold siglock around checking core_state - * and setting PF_POSTCOREDUMP. The core-inducing thread - * will increment ->nr_threads for each thread in the - * group without PF_POSTCOREDUMP set. - */ - spin_lock_irq(&tsk->sighand->siglock); - tsk->flags |= PF_POSTCOREDUMP; - core_state = tsk->signal->core_state; - spin_unlock_irq(&tsk->sighand->siglock); - if (core_state) { - struct core_thread self; - - self.task = current; - if (self.task->flags & PF_SIGNALED) - self.next = xchg(&core_state->dumper.next, &self); - else - self.task = NULL; - /* - * Implies mb(), the result of xchg() must be visible - * to core_state->dumper. - */ - if (atomic_dec_and_test(&core_state->nr_threads)) - complete(&core_state->startup); - - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (!self.task) /* see coredump_finish() */ - break; - freezable_schedule(); - } - __set_current_state(TASK_RUNNING); - } -} - #ifdef CONFIG_MEMCG /* * A task is exiting. If it owned this mm, find a new owner for the mm. @@ -475,12 +434,47 @@ void mm_update_next_owner(struct mm_struct *mm) static void exit_mm(void) { struct mm_struct *mm = current->mm; + struct core_state *core_state; exit_mm_release(current, mm); if (!mm) return; sync_mm_rss(mm); + /* + * Serialize with any possible pending coredump. + * We must hold mmap_lock around checking core_state + * and clearing tsk->mm. The core-inducing thread + * will increment ->nr_threads for each thread in the + * group with ->mm != NULL. + */ mmap_read_lock(mm); + core_state = mm->core_state; + if (core_state) { + struct core_thread self; + + mmap_read_unlock(mm); + + self.task = current; + if (self.task->flags & PF_SIGNALED) + self.next = xchg(&core_state->dumper.next, &self); + else + self.task = NULL; + /* + * Implies mb(), the result of xchg() must be visible + * to core_state->dumper. + */ + if (atomic_dec_and_test(&core_state->nr_threads)) + complete(&core_state->startup); + + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!self.task) /* see coredump_finish() */ + break; + freezable_schedule(); + } + __set_current_state(TASK_RUNNING); + mmap_read_lock(mm); + } mmgrab(mm); BUG_ON(mm != current->active_mm); /* more a memory barrier than a real lock */ @@ -697,7 +691,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) /* mt-exec, de_thread() is waiting for group leader */ if (unlikely(tsk->signal->notify_count < 0)) - wake_up_process(tsk->signal->group_exec_task); + wake_up_process(tsk->signal->group_exit_task); write_unlock_irq(&tasklist_lock); list_for_each_entry_safe(p, n, &dead, ptrace_entry) { @@ -735,29 +729,54 @@ void __noreturn do_exit(long code) struct task_struct *tsk = current; int group_dead; + /* + * We can get here from a kernel oops, sometimes with preemption off. + * Start by checking for critical errors. + * Then fix up important state like USER_DS and preemption. + * Then do everything else. + */ + WARN_ON(blk_needs_flush_plug(tsk)); + if (unlikely(in_interrupt())) + panic("Aiee, killing interrupt handler!"); + if (unlikely(!tsk->pid)) + panic("Attempted to kill the idle task!"); + /* - * If do_dead is called because this processes oopsed, it's possible + * If do_exit is called because this processes oopsed, it's possible * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before * continuing. Amongst other possible reasons, this is to prevent * mm_release()->clear_child_tid() from writing to a user-controlled * kernel address. - * - * On uptodate architectures force_uaccess_begin is a noop. On - * architectures that still have set_fs/get_fs in addition to handling - * oopses handles kernel threads that run as set_fs(KERNEL_DS) by - * default. */ force_uaccess_begin(); + if (unlikely(in_atomic())) { + pr_info("note: %s[%d] exited with preempt_count %d\n", + current->comm, task_pid_nr(current), + preempt_count()); + preempt_count_set(PREEMPT_ENABLED); + } + + profile_task_exit(tsk); kcov_task_exit(tsk); - coredump_task_exit(tsk); ptrace_event(PTRACE_EVENT_EXIT, code); validate_creds_for_do_exit(tsk); + /* + * We're taking recursive faults here in do_exit. Safest is to just + * leave this task alone and wait for reboot. + */ + if (unlikely(tsk->flags & PF_EXITING)) { + pr_alert("Fixing recursive fault but reboot is needed!\n"); + futex_exit_recursive(tsk); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + } + io_uring_files_cancel(); exit_signals(tsk); /* sets PF_EXITING */ @@ -856,46 +875,16 @@ void __noreturn do_exit(long code) lockdep_free_task(tsk); do_task_dead(); } +EXPORT_SYMBOL_GPL(do_exit); -void __noreturn make_task_dead(int signr) +void complete_and_exit(struct completion *comp, long code) { - /* - * Take the task off the cpu after something catastrophic has - * happened. - * - * We can get here from a kernel oops, sometimes with preemption off. - * Start by checking for critical errors. - * Then fix up important state like USER_DS and preemption. - * Then do everything else. - */ - struct task_struct *tsk = current; + if (comp) + complete(comp); - if (unlikely(in_interrupt())) - panic("Aiee, killing interrupt handler!"); - if (unlikely(!tsk->pid)) - panic("Attempted to kill the idle task!"); - - if (unlikely(in_atomic())) { - pr_info("note: %s[%d] exited with preempt_count %d\n", - current->comm, task_pid_nr(current), - preempt_count()); - preempt_count_set(PREEMPT_ENABLED); - } - - /* - * We're taking recursive faults here in make_task_dead. Safest is to just - * leave this task alone and wait for reboot. - */ - if (unlikely(tsk->flags & PF_EXITING)) { - pr_alert("Fixing recursive fault but reboot is needed!\n"); - futex_exit_recursive(tsk); - tsk->exit_state = EXIT_DEAD; - refcount_inc(&tsk->rcu_users); - do_task_dead(); - } - - do_exit(signr); + do_exit(code); } +EXPORT_SYMBOL(complete_and_exit); SYSCALL_DEFINE1(exit, int, error_code) { @@ -911,19 +900,17 @@ do_group_exit(int exit_code) { struct signal_struct *sig = current->signal; - if (sig->flags & SIGNAL_GROUP_EXIT) + BUG_ON(exit_code & 0x80); /* core dumps don't get here */ + + if (signal_group_exit(sig)) exit_code = sig->group_exit_code; - else if (sig->group_exec_task) - exit_code = 0; else if (!thread_group_empty(current)) { struct sighand_struct *const sighand = current->sighand; spin_lock_irq(&sighand->siglock); - if (sig->flags & SIGNAL_GROUP_EXIT) + if (signal_group_exit(sig)) /* Another thread got here before we took the lock. */ exit_code = sig->group_exit_code; - else if (sig->group_exec_task) - exit_code = 0; else { sig->group_exit_code = exit_code; sig->flags = SIGNAL_GROUP_EXIT; @@ -1018,8 +1005,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) return 0; if (unlikely(wo->wo_flags & WNOWAIT)) { - status = (p->signal->flags & SIGNAL_GROUP_EXIT) - ? p->signal->group_exit_code : p->exit_code; + status = p->exit_code; get_task_struct(p); read_unlock(&tasklist_lock); sched_annotate_sleep(); diff --git a/kernel/extable.c b/kernel/extable.c index b6f330f0fe..b0ea5eb0c3 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -62,13 +62,40 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } +int init_kernel_text(unsigned long addr) +{ + if (addr >= (unsigned long)_sinittext && + addr < (unsigned long)_einittext) + return 1; + return 0; +} + int notrace core_kernel_text(unsigned long addr) { - if (is_kernel_text(addr)) + if (addr >= (unsigned long)_stext && + addr < (unsigned long)_etext) return 1; - if (system_state < SYSTEM_FREEING_INITMEM && - is_kernel_inittext(addr)) + if (system_state < SYSTEM_RUNNING && + init_kernel_text(addr)) + return 1; + return 0; +} + +/** + * core_kernel_data - tell if addr points to kernel data + * @addr: address to test + * + * Returns true if @addr passed in is from the core kernel data + * section. + * + * Note: On some archs it may return true for core RODATA, and false + * for others. But will always be true for core RW data. + */ +int core_kernel_data(unsigned long addr) +{ + if (addr >= (unsigned long)_sdata && + addr < (unsigned long)_edata) return 1; return 0; } @@ -85,7 +112,7 @@ int __kernel_text_address(unsigned long addr) * Since we are after the module-symbols check, there's * no danger of address overlap: */ - if (is_kernel_inittext(addr)) + if (init_kernel_text(addr)) return 1; return 0; } diff --git a/kernel/fork.c b/kernel/fork.c index a024bf6254..89475c994c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -77,6 +76,7 @@ #include #include #include +#include #include #include #include @@ -366,14 +366,12 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) *new = data_race(*orig); INIT_LIST_HEAD(&new->anon_vma_chain); new->vm_next = new->vm_prev = NULL; - dup_vma_anon_name(orig, new); } return new; } void vm_area_free(struct vm_area_struct *vma) { - free_vma_anon_name(vma); kmem_cache_free(vm_area_cachep, vma); } @@ -757,7 +755,9 @@ void __put_task_struct(struct task_struct *tsk) delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); sched_core_free(tsk); - free_task(tsk); + + if (!profile_handoff_task(tsk)) + free_task(tsk); } EXPORT_SYMBOL_GPL(__put_task_struct); @@ -951,7 +951,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->splice_pipe = NULL; tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; - tsk->worker_private = NULL; + tsk->pf_io_worker = NULL; account_kernel_stack(tsk, 1); @@ -1044,6 +1044,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, seqcount_init(&mm->write_protect_seq); mmap_init_lock(mm); INIT_LIST_HEAD(&mm->mmlist); + mm->core_state = NULL; mm_pgtables_bytes_init(mm); mm->map_count = 0; mm->locked_vm = 0; @@ -1391,7 +1392,8 @@ static void mm_release(struct task_struct *tsk, struct mm_struct *mm) * purposes. */ if (tsk->clear_child_tid) { - if (atomic_read(&mm->mm_users) > 1) { + if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) && + atomic_read(&mm->mm_users) > 1) { /* * We don't check the error code - if userspace has * not set up a proper pointer then tough luck. @@ -1557,6 +1559,32 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk) return error; } +static int copy_io(unsigned long clone_flags, struct task_struct *tsk) +{ +#ifdef CONFIG_BLOCK + struct io_context *ioc = current->io_context; + struct io_context *new_ioc; + + if (!ioc) + return 0; + /* + * Share io context with parent, if CLONE_IO is set + */ + if (clone_flags & CLONE_IO) { + ioc_task_link(ioc); + tsk->io_context = ioc; + } else if (ioprio_valid(ioc->ioprio)) { + new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); + if (unlikely(!new_ioc)) + return -ENOMEM; + + new_ioc->ioprio = ioc->ioprio; + put_io_context(new_ioc); + } +#endif + return 0; +} + static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) { struct sighand_struct *sig; @@ -2007,6 +2035,12 @@ static __latent_entropy struct task_struct *copy_process( siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); } + /* + * This _must_ happen before we call free_task(), i.e. before we jump + * to any of the bad_fork_* labels. This is to avoid freeing + * p->set_child_tid which is (ab)used as a kthread's data pointer for + * kernel threads (PF_KTHREAD). + */ p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL; /* * Clear TID on mm_release()? @@ -2087,16 +2121,12 @@ static __latent_entropy struct task_struct *copy_process( p->io_context = NULL; audit_set_context(p, NULL); cgroup_fork(p); - if (p->flags & PF_KTHREAD) { - if (!set_kthread_struct(p)) - goto bad_fork_cleanup_delayacct; - } #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; - goto bad_fork_cleanup_delayacct; + goto bad_fork_cleanup_threadgroup_lock; } #endif #ifdef CONFIG_CPUSETS @@ -2443,8 +2473,8 @@ static __latent_entropy struct task_struct *copy_process( lockdep_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); +bad_fork_cleanup_threadgroup_lock: #endif -bad_fork_cleanup_delayacct: delayacct_tsk_free(p); bad_fork_cleanup_count: dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); @@ -3008,7 +3038,7 @@ int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, int ksys_unshare(unsigned long unshare_flags) { struct fs_struct *fs, *new_fs = NULL; - struct files_struct *new_fd = NULL; + struct files_struct *fd, *new_fd = NULL; struct cred *new_cred = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; @@ -3095,8 +3125,11 @@ int ksys_unshare(unsigned long unshare_flags) spin_unlock(&fs->lock); } - if (new_fd) - swap(current->files, new_fd); + if (new_fd) { + fd = current->files; + current->files = new_fd; + new_fd = fd; + } task_unlock(current); diff --git a/kernel/futex.c b/kernel/futex.c index 2f72a221d8..c15ad276fd 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Fast Userspace Mutexes (which I call "Futexes!"). * (C) Rusty Russell, IBM 2002 @@ -29,42 +30,15 @@ * * "The futexes are also cursed." * "But they come in a choice of three flavours!" - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include -#include -#include -#include +#include #include -#include -#include -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include +#include #include @@ -144,8 +118,7 @@ * * Where (A) orders the waiters increment and the futex value read through * atomic operations (see hb_waiters_inc) and where (B) orders the write - * to futex and the waiters read -- this is done by the barriers for both - * shared and private futexes in get_futex_key_refs(). + * to futex and the waiters read (see hb_waiters_pending()). * * This yields the following case (where X:=waiters, Y:=futex): * @@ -171,8 +144,10 @@ * double_lock_hb() and double_unlock_hb(), respectively. */ -#ifndef CONFIG_HAVE_FUTEX_CMPXCHG -int __read_mostly futex_cmpxchg_enabled; +#ifdef CONFIG_HAVE_FUTEX_CMPXCHG +#define futex_cmpxchg_enabled 1 +#else +static int __read_mostly futex_cmpxchg_enabled; #endif /* @@ -204,10 +179,10 @@ struct futex_pi_state { /* * The PI object: */ - struct rt_mutex pi_mutex; + struct rt_mutex_base pi_mutex; struct task_struct *owner; - atomic_t refcount; + refcount_t refcount; union futex_key key; } __randomize_layout; @@ -222,8 +197,10 @@ struct futex_pi_state { * @rt_waiter: rt_waiter storage for use with requeue_pi * @requeue_pi_key: the requeue_pi target futex key * @bitset: bitset for the optional bitmasked wakeup + * @requeue_state: State field for futex_requeue_pi() + * @requeue_wait: RCU wait for futex_requeue_pi() (RT only) * - * We use this hashed waitqueue, instead of a normal wait_queue_t, so + * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so * we can wake only the relevant ones (hashed queues may be shared). * * A futex_q has a woken state, just like tasks have TASK_RUNNING. @@ -244,12 +221,68 @@ struct futex_q { struct rt_mutex_waiter *rt_waiter; union futex_key *requeue_pi_key; u32 bitset; + atomic_t requeue_state; +#ifdef CONFIG_PREEMPT_RT + struct rcuwait requeue_wait; +#endif } __randomize_layout; +/* + * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an + * underlying rtmutex. The task which is about to be requeued could have + * just woken up (timeout, signal). After the wake up the task has to + * acquire hash bucket lock, which is held by the requeue code. As a task + * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking + * and the hash bucket lock blocking would collide and corrupt state. + * + * On !PREEMPT_RT this is not a problem and everything could be serialized + * on hash bucket lock, but aside of having the benefit of common code, + * this allows to avoid doing the requeue when the task is already on the + * way out and taking the hash bucket lock of the original uaddr1 when the + * requeue has been completed. + * + * The following state transitions are valid: + * + * On the waiter side: + * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE + * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT + * + * On the requeue side: + * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS + * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED + * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed) + * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED + * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed) + * + * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this + * signals that the waiter is already on the way out. It also means that + * the waiter is still on the 'wait' futex, i.e. uaddr1. + * + * The waiter side signals early wakeup to the requeue side either through + * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending + * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately + * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT, + * which means the wakeup is interleaving with a requeue in progress it has + * to wait for the requeue side to change the state. Either to DONE/LOCKED + * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex + * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by + * the requeue side when the requeue attempt failed via deadlock detection + * and therefore the waiter q is still on the uaddr1 futex. + */ +enum { + Q_REQUEUE_PI_NONE = 0, + Q_REQUEUE_PI_IGNORE, + Q_REQUEUE_PI_IN_PROGRESS, + Q_REQUEUE_PI_WAIT, + Q_REQUEUE_PI_DONE, + Q_REQUEUE_PI_LOCKED, +}; + static const struct futex_q futex_q_init = { /* list gets initialized in queue_me()*/ - .key = FUTEX_KEY_INIT, - .bitset = FUTEX_BITSET_MATCH_ANY + .key = FUTEX_KEY_INIT, + .bitset = FUTEX_BITSET_MATCH_ANY, + .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE), }; /* @@ -316,12 +349,8 @@ static int __init fail_futex_debugfs(void) if (IS_ERR(dir)) return PTR_ERR(dir); - if (!debugfs_create_bool("ignore-private", mode, dir, - &fail_futex.ignore_private)) { - debugfs_remove_recursive(dir); - return -ENOMEM; - } - + debugfs_create_bool("ignore-private", mode, dir, + &fail_futex.ignore_private); return 0; } @@ -336,16 +365,9 @@ static inline bool should_fail_futex(bool fshared) } #endif /* CONFIG_FAIL_FUTEX */ -static inline void futex_get_mm(union futex_key *key) -{ - atomic_inc(&key->private.mm->mm_count); - /* - * Ensure futex_get_mm() implies a full barrier such that - * get_futex_key() implies a full barrier. This is relied upon - * as smp_mb(); (B), see the ordering comment above. - */ - smp_mb__after_atomic(); -} +#ifdef CONFIG_COMPAT +static void compat_exit_robust_list(struct task_struct *curr); +#endif /* * Reflects a new waiter being added to the waitqueue. @@ -375,6 +397,10 @@ static inline void hb_waiters_dec(struct futex_hash_bucket *hb) static inline int hb_waiters_pending(struct futex_hash_bucket *hb) { #ifdef CONFIG_SMP + /* + * Full barrier (B), see the ordering comment above. + */ + smp_mb(); return atomic_read(&hb->waiters); #else return 1; @@ -390,9 +416,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb) */ static struct futex_hash_bucket *hash_futex(union futex_key *key) { - u32 hash = jhash2((u32*)&key->both.word, - (sizeof(key->both.word)+sizeof(key->both.ptr))/4, + u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, key->both.offset); + return &futex_queues[hash & (futex_hashsize - 1)]; } @@ -412,90 +438,109 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2) && key1->both.offset == key2->both.offset); } -/* - * Take a reference to the resource addressed by a key. - * Can be called while holding spinlocks. +enum futex_access { + FUTEX_READ, + FUTEX_WRITE +}; + +/** + * futex_setup_timer - set up the sleeping hrtimer. + * @time: ptr to the given timeout value + * @timeout: the hrtimer_sleeper structure to be set up + * @flags: futex flags + * @range_ns: optional range in ns * + * Return: Initialized hrtimer_sleeper structure or NULL if no timeout + * value given */ -static void get_futex_key_refs(union futex_key *key) +static inline struct hrtimer_sleeper * +futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, + int flags, u64 range_ns) { - if (!key->both.ptr) - return; + if (!time) + return NULL; + hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ? + CLOCK_REALTIME : CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); /* - * On MMU less systems futexes are always "private" as there is no per - * process address space. We need the smp wmb nevertheless - yes, - * arch/blackfin has MMU less SMP ... + * If range_ns is 0, calling hrtimer_set_expires_range_ns() is + * effectively the same as calling hrtimer_set_expires(). */ - if (!IS_ENABLED(CONFIG_MMU)) { - smp_mb(); /* explicit smp_mb(); (B) */ - return; - } + hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns); - switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { - case FUT_OFF_INODE: - ihold(key->shared.inode); /* implies smp_mb(); (B) */ - break; - case FUT_OFF_MMSHARED: - futex_get_mm(key); /* implies smp_mb(); (B) */ - break; - default: - /* - * Private futexes do not hold reference on an inode or - * mm, therefore the only purpose of calling get_futex_key_refs - * is because we need the barrier for the lockless waiter check. - */ - smp_mb(); /* explicit smp_mb(); (B) */ - } + return timeout; } /* - * Drop a reference to the resource addressed by a key. - * The hash bucket spinlock must not be held. This is - * a no-op for private futexes, see comment in the get - * counterpart. + * Generate a machine wide unique identifier for this inode. + * + * This relies on u64 not wrapping in the life-time of the machine; which with + * 1ns resolution means almost 585 years. + * + * This further relies on the fact that a well formed program will not unmap + * the file while it has a (shared) futex waiting on it. This mapping will have + * a file reference which pins the mount and inode. + * + * If for some reason an inode gets evicted and read back in again, it will get + * a new sequence number and will _NOT_ match, even though it is the exact same + * file. + * + * It is important that match_futex() will never have a false-positive, esp. + * for PI futexes that can mess up the state. The above argues that false-negatives + * are only possible for malformed programs. */ -static void drop_futex_key_refs(union futex_key *key) +static u64 get_inode_sequence_number(struct inode *inode) { - if (!key->both.ptr) { - /* If we're here then we tried to put a key we failed to get */ - WARN_ON_ONCE(1); - return; - } + static atomic64_t i_seq; + u64 old; - if (!IS_ENABLED(CONFIG_MMU)) - return; + /* Does the inode already have a sequence number? */ + old = atomic64_read(&inode->i_sequence); + if (likely(old)) + return old; - switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { - case FUT_OFF_INODE: - iput(key->shared.inode); - break; - case FUT_OFF_MMSHARED: - mmdrop(key->private.mm); - break; + for (;;) { + u64 new = atomic64_add_return(1, &i_seq); + if (WARN_ON_ONCE(!new)) + continue; + + old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); + if (old) + return old; + return new; } } /** * get_futex_key() - Get parameters which are the keys for a futex * @uaddr: virtual address of the futex - * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED + * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED * @key: address where result is stored. - * @rw: mapping needs to be read/write (values: VERIFY_READ, - * VERIFY_WRITE) + * @rw: mapping needs to be read/write (values: FUTEX_READ, + * FUTEX_WRITE) * * Return: a negative error code or 0 * - * The key words are stored in *key on success. + * The key words are stored in @key on success. * - * For shared mappings, it's (page->index, file_inode(vma->vm_file), - * offset_within_page). For private mappings, it's (uaddr, current->mm). - * We can usually work out the index without swapping in the page. + * For shared mappings (when @fshared), the key is: + * + * ( inode->i_sequence, page->index, offset_within_page ) + * + * [ also see get_inode_sequence_number() ] + * + * For private mappings (or when !@fshared), the key is: + * + * ( current->mm, address, 0 ) + * + * This allows (cross process, where applicable) identification of the futex + * without keeping the page pinned for the duration of the FUTEX_WAIT. * * lock_page() might sleep, the caller should not hold a spinlock. */ -static int -get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) +static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, + enum futex_access rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; @@ -503,11 +548,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) struct address_space *mapping; int err, ro = 0; -#ifdef CONFIG_PAX_SEGMEXEC - if ((mm->pax_flags & MF_PAX_SEGMEXEC) && address >= SEGMEXEC_TASK_SIZE) - return -EFAULT; -#endif - /* * The futex address must be "naturally" aligned. */ @@ -516,7 +556,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) return -EINVAL; address -= key->both.offset; - if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) + if (unlikely(!access_ok(uaddr, sizeof(u32)))) return -EFAULT; if (unlikely(should_fail_futex(fshared))) @@ -532,21 +572,20 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) if (!fshared) { key->private.mm = mm; key->private.address = address; - get_futex_key_refs(key); /* implies smp_mb(); (B) */ return 0; } again: /* Ignore any VERIFY_READ mapping (futex common case) */ - if (unlikely(should_fail_futex(fshared))) + if (unlikely(should_fail_futex(true))) return -EFAULT; - err = get_user_pages_fast(address, 1, 1, &page); + err = get_user_pages_fast(address, 1, FOLL_WRITE, &page); /* * If write access is not required (eg. FUTEX_WAIT), try * and get read-only access. */ - if (err == -EFAULT && rw == VERIFY_READ) { + if (err == -EFAULT && rw == FUTEX_READ) { err = get_user_pages_fast(address, 1, 0, &page); ro = 1; } @@ -626,7 +665,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) * A RO anonymous page will never change and thus doesn't make * sense for futex operations. */ - if (unlikely(should_fail_futex(fshared)) || ro) { + if (unlikely(should_fail_futex(true)) || ro) { err = -EFAULT; goto out; } @@ -635,8 +674,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) key->private.mm = mm; key->private.address = address; - get_futex_key_refs(key); /* implies smp_mb(); (B) */ - } else { struct inode *inode; @@ -668,36 +705,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) goto again; } - /* - * Take a reference unless it is about to be freed. Previously - * this reference was taken by ihold under the page lock - * pinning the inode in place so i_lock was unnecessary. The - * only way for this check to fail is if the inode was - * truncated in parallel so warn for now if this happens. - * - * We are not calling into get_futex_key_refs() in file-backed - * cases, therefore a successful atomic_inc return below will - * guarantee that get_futex_key() will still imply smp_mb(); (B). - */ - if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) { - rcu_read_unlock(); - put_page(page); - - goto again; - } - - /* Should be impossible but lets be paranoid for now */ - if (WARN_ON_ONCE(inode->i_mapping != mapping)) { - err = -EFAULT; - rcu_read_unlock(); - iput(inode); - - goto out; - } - key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = inode; - key->shared.pgoff = basepage_index(tail); + key->shared.i_seq = get_inode_sequence_number(inode); + key->shared.pgoff = page_to_pgoff(tail); rcu_read_unlock(); } @@ -706,11 +716,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) return err; } -static inline void put_futex_key(union futex_key *key) -{ - drop_futex_key_refs(key); -} - /** * fault_in_user_writeable() - Fault in user address and verify RW access * @uaddr: pointer to faulting user space address @@ -728,10 +733,10 @@ static int fault_in_user_writeable(u32 __user *uaddr) struct mm_struct *mm = current->mm; int ret; - down_read(&mm->mmap_sem); - ret = fixup_user_fault(current, mm, (unsigned long)uaddr, + mmap_read_lock(mm); + ret = fixup_user_fault(mm, (unsigned long)uaddr, FAULT_FLAG_WRITE, NULL); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); return ret < 0 ? ret : 0; } @@ -797,7 +802,7 @@ static int refill_pi_state_cache(void) INIT_LIST_HEAD(&pi_state->list); /* pi_mutex gets initialized later */ pi_state->owner = NULL; - atomic_set(&pi_state->refcount, 1); + refcount_set(&pi_state->refcount, 1); pi_state->key = FUTEX_KEY_INIT; current->pi_state_cache = pi_state; @@ -805,7 +810,7 @@ static int refill_pi_state_cache(void) return 0; } -static struct futex_pi_state * alloc_pi_state(void) +static struct futex_pi_state *alloc_pi_state(void) { struct futex_pi_state *pi_state = current->pi_state_cache; @@ -815,18 +820,44 @@ static struct futex_pi_state * alloc_pi_state(void) return pi_state; } +static void pi_state_update_owner(struct futex_pi_state *pi_state, + struct task_struct *new_owner) +{ + struct task_struct *old_owner = pi_state->owner; + + lockdep_assert_held(&pi_state->pi_mutex.wait_lock); + + if (old_owner) { + raw_spin_lock(&old_owner->pi_lock); + WARN_ON(list_empty(&pi_state->list)); + list_del_init(&pi_state->list); + raw_spin_unlock(&old_owner->pi_lock); + } + + if (new_owner) { + raw_spin_lock(&new_owner->pi_lock); + WARN_ON(!list_empty(&pi_state->list)); + list_add(&pi_state->list, &new_owner->pi_state_list); + pi_state->owner = new_owner; + raw_spin_unlock(&new_owner->pi_lock); + } +} + +static void get_pi_state(struct futex_pi_state *pi_state) +{ + WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount)); +} + /* * Drops a reference to the pi_state object and frees or caches it * when the last reference is gone. - * - * Must be called with the hb lock held. */ static void put_pi_state(struct futex_pi_state *pi_state) { if (!pi_state) return; - if (!atomic_dec_and_test(&pi_state->refcount)) + if (!refcount_dec_and_test(&pi_state->refcount)) return; /* @@ -834,51 +865,36 @@ static void put_pi_state(struct futex_pi_state *pi_state) * and has cleaned up the pi_state already */ if (pi_state->owner) { - raw_spin_lock_irq(&pi_state->owner->pi_lock); - list_del_init(&pi_state->list); - raw_spin_unlock_irq(&pi_state->owner->pi_lock); + unsigned long flags; - rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); + raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); + pi_state_update_owner(pi_state, NULL); + rt_mutex_proxy_unlock(&pi_state->pi_mutex); + raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } - if (current->pi_state_cache) + if (current->pi_state_cache) { kfree(pi_state); - else { + } else { /* * pi_state->list is already empty. * clear pi_state->owner. * refcount is at 0 - put it back to 1. */ pi_state->owner = NULL; - atomic_set(&pi_state->refcount, 1); + refcount_set(&pi_state->refcount, 1); current->pi_state_cache = pi_state; } } -/* - * Look up the task based on what TID userspace gave us. - * We dont trust it. - */ -static struct task_struct * futex_find_get_task(pid_t pid) -{ - struct task_struct *p; - - rcu_read_lock(); - p = find_task_by_vpid(pid); - if (p) - get_task_struct(p); - - rcu_read_unlock(); - - return p; -} +#ifdef CONFIG_FUTEX_PI /* * This task is holding PI mutexes at exit time => bad. * Kernel cleans up PI-state, but userspace is likely hosed. * (Robust-futex cleanup is separate and might save the day for userspace.) */ -void exit_pi_state_list(struct task_struct *curr) +static void exit_pi_state_list(struct task_struct *curr) { struct list_head *next, *head = &curr->pi_state_list; struct futex_pi_state *pi_state; @@ -894,22 +910,41 @@ void exit_pi_state_list(struct task_struct *curr) */ raw_spin_lock_irq(&curr->pi_lock); while (!list_empty(head)) { - next = head->next; pi_state = list_entry(next, struct futex_pi_state, list); key = pi_state->key; hb = hash_futex(&key); + + /* + * We can race against put_pi_state() removing itself from the + * list (a waiter going away). put_pi_state() will first + * decrement the reference count and then modify the list, so + * its possible to see the list entry but fail this reference + * acquire. + * + * In that case; drop the locks to let put_pi_state() make + * progress and retry the loop. + */ + if (!refcount_inc_not_zero(&pi_state->refcount)) { + raw_spin_unlock_irq(&curr->pi_lock); + cpu_relax(); + raw_spin_lock_irq(&curr->pi_lock); + continue; + } raw_spin_unlock_irq(&curr->pi_lock); spin_lock(&hb->lock); - - raw_spin_lock_irq(&curr->pi_lock); + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + raw_spin_lock(&curr->pi_lock); /* * We dropped the pi-lock, so re-check whether this * task still owns the PI-state: */ if (head->next != next) { + /* retain curr->pi_lock for the loop invariant */ + raw_spin_unlock(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); + put_pi_state(pi_state); continue; } @@ -917,16 +952,21 @@ void exit_pi_state_list(struct task_struct *curr) WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); pi_state->owner = NULL; - raw_spin_unlock_irq(&curr->pi_lock); - - rt_mutex_unlock(&pi_state->pi_mutex); + raw_spin_unlock(&curr->pi_lock); + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); + rt_mutex_futex_unlock(&pi_state->pi_mutex); + put_pi_state(pi_state); + raw_spin_lock_irq(&curr->pi_lock); } raw_spin_unlock_irq(&curr->pi_lock); } +#else +static inline void exit_pi_state_list(struct task_struct *curr) { } +#endif /* * We need to check the following states: @@ -950,7 +990,7 @@ void exit_pi_state_list(struct task_struct *curr) * [10] Found | Found | task | !=taskTID | 0/1 | Invalid * * [1] Indicates that the kernel can acquire the futex atomically. We - * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. + * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. * * [2] Valid, if TID does not belong to a kernel thread. If no matching * thread is found then it indicates that the owner TID has died. @@ -975,7 +1015,42 @@ void exit_pi_state_list(struct task_struct *curr) * FUTEX_OWNER_DIED bit. See [4] * * [10] There is no transient state which leaves owner and user space - * TID out of sync. + * TID out of sync. Except one error case where the kernel is denied + * write access to the user address, see fixup_pi_state_owner(). + * + * + * Serialization and lifetime rules: + * + * hb->lock: + * + * hb -> futex_q, relation + * futex_q -> pi_state, relation + * + * (cannot be raw because hb can contain arbitrary amount + * of futex_q's) + * + * pi_mutex->wait_lock: + * + * {uval, pi_state} + * + * (and pi_mutex 'obviously') + * + * p->pi_lock: + * + * p->pi_state_list -> pi_state->list, relation + * pi_mutex->owner -> pi_state->owner, relation + * + * pi_state->refcount: + * + * pi_state lifetime + * + * + * Lock order: + * + * hb->lock + * pi_mutex->wait_lock + * p->pi_lock + * */ /* @@ -983,10 +1058,13 @@ void exit_pi_state_list(struct task_struct *curr) * the pi_state against the user space value. If correct, attach to * it. */ -static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, +static int attach_to_pi_state(u32 __user *uaddr, u32 uval, + struct futex_pi_state *pi_state, struct futex_pi_state **ps) { pid_t pid = uval & FUTEX_TID_MASK; + u32 uval2; + int ret; /* * Userspace might have messed up non-PI and PI futexes [3] @@ -994,7 +1072,37 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, if (unlikely(!pi_state)) return -EINVAL; - WARN_ON(!atomic_read(&pi_state->refcount)); + /* + * We get here with hb->lock held, and having found a + * futex_top_waiter(). This means that futex_lock_pi() of said futex_q + * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), + * which in turn means that futex_lock_pi() still has a reference on + * our pi_state. + * + * The waiter holding a reference on @pi_state also protects against + * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi() + * and futex_wait_requeue_pi() as it cannot go to 0 and consequently + * free pi_state before we can take a reference ourselves. + */ + WARN_ON(!refcount_read(&pi_state->refcount)); + + /* + * Now that we have a pi_state, we can acquire wait_lock + * and do the state validation. + */ + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + + /* + * Since {uval, pi_state} is serialized by wait_lock, and our current + * uval was read without holding it, it can have changed. Verify it + * still is what we expect it to be, otherwise retry the entire + * operation. + */ + if (get_futex_value_locked(&uval2, uaddr)) + goto out_efault; + + if (uval != uval2) + goto out_eagain; /* * Handle the owner died case: @@ -1011,11 +1119,11 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, * is not 0. Inconsistent state. [5] */ if (pid) - return -EINVAL; + goto out_einval; /* * Take a ref on the state and return success. [4] */ - goto out_state; + goto out_attach; } /* @@ -1027,14 +1135,14 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, * Take a ref on the state and return success. [6] */ if (!pid) - goto out_state; + goto out_attach; } else { /* * If the owner died bit is not set, then the pi_state * must have an owner. [7] */ if (!pi_state->owner) - return -EINVAL; + goto out_einval; } /* @@ -1043,63 +1151,128 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, * user space TID. [9/10] */ if (pid != task_pid_vnr(pi_state->owner)) - return -EINVAL; -out_state: - atomic_inc(&pi_state->refcount); + goto out_einval; + +out_attach: + get_pi_state(pi_state); + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); *ps = pi_state; return 0; + +out_einval: + ret = -EINVAL; + goto out_error; + +out_eagain: + ret = -EAGAIN; + goto out_error; + +out_efault: + ret = -EFAULT; + goto out_error; + +out_error: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + return ret; } -/* - * Lookup the task for the TID provided from user space and attach to - * it after doing proper sanity checks. +/** + * wait_for_owner_exiting - Block until the owner has exited + * @ret: owner's current futex lock status + * @exiting: Pointer to the exiting task + * + * Caller must hold a refcount on @exiting. */ -static int attach_to_pi_owner(u32 uval, union futex_key *key, - struct futex_pi_state **ps) +static void wait_for_owner_exiting(int ret, struct task_struct *exiting) { - pid_t pid = uval & FUTEX_TID_MASK; - struct futex_pi_state *pi_state; - struct task_struct *p; - - /* - * We are the first waiter - try to look up the real owner and attach - * the new pi_state to it, but bail out when TID = 0 [1] - */ - if (!pid) - return -ESRCH; - p = futex_find_get_task(pid); - if (!p) - return -ESRCH; - - if (unlikely(p->flags & PF_KTHREAD)) { - put_task_struct(p); - return -EPERM; + if (ret != -EBUSY) { + WARN_ON_ONCE(exiting); + return; } + if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) + return; + + mutex_lock(&exiting->futex_exit_mutex); /* - * We need to look at the task state flags to figure out, - * whether the task is exiting. To protect against the do_exit - * change of the task flags, we do this protected by - * p->pi_lock: + * No point in doing state checking here. If the waiter got here + * while the task was in exec()->exec_futex_release() then it can + * have any FUTEX_STATE_* value when the waiter has acquired the + * mutex. OK, if running, EXITING or DEAD if it reached exit() + * already. Highly unlikely and not a problem. Just one more round + * through the futex maze. */ - raw_spin_lock_irq(&p->pi_lock); - if (unlikely(p->flags & PF_EXITING)) { - /* - * The task is on the way out. When PF_EXITPIDONE is - * set, we know that the task has finished the - * cleanup: - */ - int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; + mutex_unlock(&exiting->futex_exit_mutex); - raw_spin_unlock_irq(&p->pi_lock); - put_task_struct(p); - return ret; - } + put_task_struct(exiting); +} +static int handle_exit_race(u32 __user *uaddr, u32 uval, + struct task_struct *tsk) +{ + u32 uval2; + + /* + * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the + * caller that the alleged owner is busy. + */ + if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) + return -EBUSY; + + /* + * Reread the user space value to handle the following situation: + * + * CPU0 CPU1 + * + * sys_exit() sys_futex() + * do_exit() futex_lock_pi() + * futex_lock_pi_atomic() + * exit_signals(tsk) No waiters: + * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID + * mm_release(tsk) Set waiter bit + * exit_robust_list(tsk) { *uaddr = 0x80000PID; + * Set owner died attach_to_pi_owner() { + * *uaddr = 0xC0000000; tsk = get_task(PID); + * } if (!tsk->flags & PF_EXITING) { + * ... attach(); + * tsk->futex_state = } else { + * FUTEX_STATE_DEAD; if (tsk->futex_state != + * FUTEX_STATE_DEAD) + * return -EAGAIN; + * return -ESRCH; <--- FAIL + * } + * + * Returning ESRCH unconditionally is wrong here because the + * user space value has been changed by the exiting task. + * + * The same logic applies to the case where the exiting task is + * already gone. + */ + if (get_futex_value_locked(&uval2, uaddr)) + return -EFAULT; + + /* If the user space value has changed, try again. */ + if (uval2 != uval) + return -EAGAIN; + + /* + * The exiting task did not have a robust list, the robust list was + * corrupted or the user space value in *uaddr is simply bogus. + * Give up and tell user space. + */ + return -ESRCH; +} + +static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key, + struct futex_pi_state **ps) +{ /* * No existing pi state. First waiter. [2] + * + * This creates pi_state, we have hb->lock held, this means nothing can + * observe this state, wait_lock is irrelevant. */ - pi_state = alloc_pi_state(); + struct futex_pi_state *pi_state = alloc_pi_state(); /* * Initialize the pi_mutex in locked state and make @p @@ -1112,46 +1285,95 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &p->pi_state_list); + /* + * Assignment without holding pi_state->pi_mutex.wait_lock is safe + * because there is no concurrency as the object is not published yet. + */ pi_state->owner = p; + + *ps = pi_state; +} +/* + * Lookup the task for the TID provided from user space and attach to + * it after doing proper sanity checks. + */ +static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, + struct futex_pi_state **ps, + struct task_struct **exiting) +{ + pid_t pid = uval & FUTEX_TID_MASK; + struct task_struct *p; + + /* + * We are the first waiter - try to look up the real owner and attach + * the new pi_state to it, but bail out when TID = 0 [1] + * + * The !pid check is paranoid. None of the call sites should end up + * with pid == 0, but better safe than sorry. Let the caller retry + */ + if (!pid) + return -EAGAIN; + p = find_get_task_by_vpid(pid); + if (!p) + return handle_exit_race(uaddr, uval, NULL); + + if (unlikely(p->flags & PF_KTHREAD)) { + put_task_struct(p); + return -EPERM; + } + + /* + * We need to look at the task state to figure out, whether the + * task is exiting. To protect against the change of the task state + * in futex_exit_release(), we do this protected by p->pi_lock: + */ + raw_spin_lock_irq(&p->pi_lock); + if (unlikely(p->futex_state != FUTEX_STATE_OK)) { + /* + * The task is on the way out. When the futex state is + * FUTEX_STATE_DEAD, we know that the task has finished + * the cleanup: + */ + int ret = handle_exit_race(uaddr, uval, p); + + raw_spin_unlock_irq(&p->pi_lock); + /* + * If the owner task is between FUTEX_STATE_EXITING and + * FUTEX_STATE_DEAD then store the task pointer and keep + * the reference on the task struct. The calling code will + * drop all locks, wait for the task to reach + * FUTEX_STATE_DEAD and then drop the refcount. This is + * required to prevent a live lock when the current task + * preempted the exiting task between the two states. + */ + if (ret == -EBUSY) + *exiting = p; + else + put_task_struct(p); + return ret; + } + + __attach_to_pi_owner(p, key, ps); raw_spin_unlock_irq(&p->pi_lock); put_task_struct(p); - *ps = pi_state; - return 0; } -static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, - union futex_key *key, struct futex_pi_state **ps) -{ - struct futex_q *match = futex_top_waiter(hb, key); - - /* - * If there is a waiter on that futex, validate it and - * attach to the pi_state when the validation succeeds. - */ - if (match) - return attach_to_pi_state(uval, match->pi_state, ps); - - /* - * We are the first waiter - try to look up the owner based on - * @uval and attach to it. - */ - return attach_to_pi_owner(uval, key, ps); -} - static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) { - u32 uninitialized_var(curval); + int err; + u32 curval; if (unlikely(should_fail_futex(true))) return -EFAULT; - if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) - return -EFAULT; + err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); + if (unlikely(err)) + return err; - /*If user space value changed, let the caller retry */ + /* If user space value changed, let the caller retry */ return curval != uval ? -EAGAIN : 0; } @@ -1164,22 +1386,30 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) * lookup * @task: the task to perform the atomic lock work for. This will * be "current" except in the case of requeue pi. + * @exiting: Pointer to store the task pointer of the owner task + * which is in the middle of exiting * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * * Return: - * 0 - ready to wait; - * 1 - acquired the lock; - * <0 - error + * - 0 - ready to wait; + * - 1 - acquired the lock; + * - <0 - error * - * The hb->lock and futex_key refs shall be held by the caller. + * The hb->lock must be held by the caller. + * + * @exiting is only set when the return value is -EBUSY. If so, this holds + * a refcount on the exiting task on return and the caller needs to drop it + * after waiting for the exit to complete. */ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps, - struct task_struct *task, int set_waiters) + struct task_struct *task, + struct task_struct **exiting, + int set_waiters) { u32 uval, newval, vpid = task_pid_vnr(task); - struct futex_q *match; + struct futex_q *top_waiter; int ret; /* @@ -1205,9 +1435,9 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, * Lookup existing state first. If it exists, try to attach to * its pi_state. */ - match = futex_top_waiter(hb, key); - if (match) - return attach_to_pi_state(uval, match->pi_state, ps); + top_waiter = futex_top_waiter(hb, key); + if (top_waiter) + return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); /* * No waiter and user TID is 0. We are here because the @@ -1228,8 +1458,26 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, newval |= FUTEX_WAITERS; ret = lock_pi_update_atomic(uaddr, uval, newval); - /* If the take over worked, return 1 */ - return ret < 0 ? ret : 1; + if (ret) + return ret; + + /* + * If the waiter bit was requested the caller also needs PI + * state attached to the new owner of the user space futex. + * + * @task is guaranteed to be alive and it cannot be exiting + * because it is either sleeping or waiting in + * futex_requeue_pi_wakeup_sync(). + * + * No need to do the full attach_to_pi_owner() exercise + * because @task is known and valid. + */ + if (set_waiters) { + raw_spin_lock_irq(&task->pi_lock); + __attach_to_pi_owner(task, key, ps); + raw_spin_unlock_irq(&task->pi_lock); + } + return 1; } /* @@ -1246,7 +1494,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, * attach to the owner. If that fails, no harm done, we only * set the FUTEX_WAITERS bit in the user space variable. */ - return attach_to_pi_owner(uval, key, ps); + return attach_to_pi_owner(uaddr, newval, key, ps, exiting); } /** @@ -1259,9 +1507,9 @@ static void __unqueue_futex(struct futex_q *q) { struct futex_hash_bucket *hb; - if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr)) - || WARN_ON(plist_node_empty(&q->list))) + if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list))) return; + lockdep_assert_held(q->lock_ptr); hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); plist_del(&q->list, &hb->chain); @@ -1281,66 +1529,66 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) return; - /* - * Queue the task for later wakeup for after we've released - * the hb->lock. wake_q_add() grabs reference to p. - */ - wake_q_add(wake_q, p); + get_task_struct(p); __unqueue_futex(q); /* - * The waiting task can free the futex_q as soon as - * q->lock_ptr = NULL is written, without taking any locks. A - * memory barrier is required here to prevent the following - * store to lock_ptr from getting ahead of the plist_del. + * The waiting task can free the futex_q as soon as q->lock_ptr = NULL + * is written, without taking any locks. This is possible in the event + * of a spurious wakeup, for example. A memory barrier is required here + * to prevent the following store to lock_ptr from getting ahead of the + * plist_del in __unqueue_futex(). */ - smp_wmb(); - q->lock_ptr = NULL; + smp_store_release(&q->lock_ptr, NULL); + + /* + * Queue the task for later wakeup for after we've released + * the hb->lock. + */ + wake_q_add_safe(wake_q, p); } -static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, - struct futex_hash_bucket *hb) +/* + * Caller must hold a reference on @pi_state. + */ +static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) { + struct rt_mutex_waiter *top_waiter; struct task_struct *new_owner; - struct futex_pi_state *pi_state = this->pi_state; - u32 uninitialized_var(curval), newval; - WAKE_Q(wake_q); - bool deboost; + bool postunlock = false; + DEFINE_RT_WAKE_Q(wqh); + u32 curval, newval; int ret = 0; - if (!pi_state) - return -EINVAL; + top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); + if (WARN_ON_ONCE(!top_waiter)) { + /* + * As per the comment in futex_unlock_pi() this should not happen. + * + * When this happens, give up our locks and try again, giving + * the futex_lock_pi() instance time to complete, either by + * waiting on the rtmutex or removing itself from the futex + * queue. + */ + ret = -EAGAIN; + goto out_unlock; + } + + new_owner = top_waiter->task; /* - * If current does not own the pi_state then the futex is - * inconsistent and user space fiddled with the futex value. - */ - if (pi_state->owner != current) - return -EINVAL; - - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); - new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); - - /* - * It is possible that the next waiter (the one that brought - * this owner to the kernel) timed out and is no longer - * waiting on the lock. - */ - if (!new_owner) - new_owner = this->task; - - /* - * We pass it to the next owner. The WAITERS bit is always - * kept enabled while there is PI state around. We cleanup the - * owner died bit, because we are the owner. + * We pass it to the next owner. The WAITERS bit is always kept + * enabled while there is PI state around. We cleanup the owner + * died bit, because we are the owner. */ newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - if (unlikely(should_fail_futex(true))) + if (unlikely(should_fail_futex(true))) { ret = -EFAULT; + goto out_unlock; + } - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { - ret = -EFAULT; - } else if (curval != uval) { + ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); + if (!ret && (curval != uval)) { /* * If a unconditional UNLOCK_PI operation (user space did not * try the TID->0 transition) raced with a waiter setting the @@ -1352,38 +1600,24 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, else ret = -EINVAL; } - if (ret) { - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - return ret; + + if (!ret) { + /* + * This is a point of no return; once we modified the uval + * there is no going back and subsequent operations must + * not fail. + */ + pi_state_update_owner(pi_state, new_owner); + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh); } - raw_spin_lock(&pi_state->owner->pi_lock); - WARN_ON(list_empty(&pi_state->list)); - list_del_init(&pi_state->list); - raw_spin_unlock(&pi_state->owner->pi_lock); - - raw_spin_lock(&new_owner->pi_lock); - WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &new_owner->pi_state_list); - pi_state->owner = new_owner; - raw_spin_unlock(&new_owner->pi_lock); - +out_unlock: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); + if (postunlock) + rt_mutex_postunlock(&wqh); - /* - * First unlock HB so the waiter does not spin on it once he got woken - * up. Second wake up the waiter before the priority is adjusted. If we - * deboost first (and lose our higher priority), then the task might get - * scheduled away before the wake up can take place. - */ - spin_unlock(&hb->lock); - wake_up_q(&wake_q); - if (deboost) - rt_mutex_adjust_prio(current); - - return 0; + return ret; } /* @@ -1420,20 +1654,20 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) struct futex_q *this, *next; union futex_key key = FUTEX_KEY_INIT; int ret; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ); if (unlikely(ret != 0)) - goto out; + return ret; hb = hash_futex(&key); /* Make sure we really have tasks to wakeup */ if (!hb_waiters_pending(hb)) - goto out_put_key; + return ret; spin_lock(&hb->lock); @@ -1456,12 +1690,55 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) spin_unlock(&hb->lock); wake_up_q(&wake_q); -out_put_key: - put_futex_key(&key); -out: return ret; } +static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) +{ + unsigned int op = (encoded_op & 0x70000000) >> 28; + unsigned int cmp = (encoded_op & 0x0f000000) >> 24; + int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11); + int cmparg = sign_extend32(encoded_op & 0x00000fff, 11); + int oldval, ret; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) { + if (oparg < 0 || oparg > 31) { + char comm[sizeof(current->comm)]; + /* + * kill this print and return -EINVAL when userspace + * is sane again + */ + pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n", + get_task_comm(comm, current), oparg); + oparg &= 31; + } + oparg = 1 << oparg; + } + + pagefault_disable(); + ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr); + pagefault_enable(); + if (ret) + return ret; + + switch (cmp) { + case FUTEX_OP_CMP_EQ: + return oldval == cmparg; + case FUTEX_OP_CMP_NE: + return oldval != cmparg; + case FUTEX_OP_CMP_LT: + return oldval < cmparg; + case FUTEX_OP_CMP_GE: + return oldval >= cmparg; + case FUTEX_OP_CMP_LE: + return oldval <= cmparg; + case FUTEX_OP_CMP_GT: + return oldval > cmparg; + default: + return -ENOSYS; + } +} + /* * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: @@ -1474,15 +1751,15 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, struct futex_hash_bucket *hb1, *hb2; struct futex_q *this, *next; int ret, op_ret; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); retry: - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); + ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); if (unlikely(ret != 0)) - goto out; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); + return ret; + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); if (unlikely(ret != 0)) - goto out_put_key1; + return ret; hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); @@ -1491,32 +1768,27 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, double_lock_hb(hb1, hb2); op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { - double_unlock_hb(hb1, hb2); -#ifndef CONFIG_MMU - /* - * we don't get EFAULT from MMU faults if we don't have an MMU, - * but we might get them from range checking - */ - ret = op_ret; - goto out_put_keys; -#endif - - if (unlikely(op_ret != -EFAULT)) { + if (!IS_ENABLED(CONFIG_MMU) || + unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { + /* + * we don't get EFAULT from MMU faults if we don't have + * an MMU, but we might get them from range checking + */ ret = op_ret; - goto out_put_keys; + return ret; } - ret = fault_in_user_writeable(uaddr2); - if (ret) - goto out_put_keys; + if (op_ret == -EFAULT) { + ret = fault_in_user_writeable(uaddr2); + if (ret) + return ret; + } + cond_resched(); if (!(flags & FLAGS_SHARED)) goto retry_private; - - put_futex_key(&key2); - put_futex_key(&key1); goto retry; } @@ -1551,11 +1823,6 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, out_unlock: double_unlock_hb(hb1, hb2); wake_up_q(&wake_q); -out_put_keys: - put_futex_key(&key2); -out_put_key1: - put_futex_key(&key1); -out: return ret; } @@ -1582,10 +1849,111 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, plist_add(&q->list, &hb2->chain); q->lock_ptr = &hb2->lock; } - get_futex_key_refs(key2); q->key = *key2; } +static inline bool futex_requeue_pi_prepare(struct futex_q *q, + struct futex_pi_state *pi_state) +{ + int old, new; + + /* + * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has + * already set Q_REQUEUE_PI_IGNORE to signal that requeue should + * ignore the waiter. + */ + old = atomic_read_acquire(&q->requeue_state); + do { + if (old == Q_REQUEUE_PI_IGNORE) + return false; + + /* + * futex_proxy_trylock_atomic() might have set it to + * IN_PROGRESS and a interleaved early wake to WAIT. + * + * It was considered to have an extra state for that + * trylock, but that would just add more conditionals + * all over the place for a dubious value. + */ + if (old != Q_REQUEUE_PI_NONE) + break; + + new = Q_REQUEUE_PI_IN_PROGRESS; + } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); + + q->pi_state = pi_state; + return true; +} + +static inline void futex_requeue_pi_complete(struct futex_q *q, int locked) +{ + int old, new; + + old = atomic_read_acquire(&q->requeue_state); + do { + if (old == Q_REQUEUE_PI_IGNORE) + return; + + if (locked >= 0) { + /* Requeue succeeded. Set DONE or LOCKED */ + WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS && + old != Q_REQUEUE_PI_WAIT); + new = Q_REQUEUE_PI_DONE + locked; + } else if (old == Q_REQUEUE_PI_IN_PROGRESS) { + /* Deadlock, no early wakeup interleave */ + new = Q_REQUEUE_PI_NONE; + } else { + /* Deadlock, early wakeup interleave. */ + WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT); + new = Q_REQUEUE_PI_IGNORE; + } + } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); + +#ifdef CONFIG_PREEMPT_RT + /* If the waiter interleaved with the requeue let it know */ + if (unlikely(old == Q_REQUEUE_PI_WAIT)) + rcuwait_wake_up(&q->requeue_wait); +#endif +} + +static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q) +{ + int old, new; + + old = atomic_read_acquire(&q->requeue_state); + do { + /* Is requeue done already? */ + if (old >= Q_REQUEUE_PI_DONE) + return old; + + /* + * If not done, then tell the requeue code to either ignore + * the waiter or to wake it up once the requeue is done. + */ + new = Q_REQUEUE_PI_WAIT; + if (old == Q_REQUEUE_PI_NONE) + new = Q_REQUEUE_PI_IGNORE; + } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); + + /* If the requeue was in progress, wait for it to complete */ + if (old == Q_REQUEUE_PI_IN_PROGRESS) { +#ifdef CONFIG_PREEMPT_RT + rcuwait_wait_event(&q->requeue_wait, + atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT, + TASK_UNINTERRUPTIBLE); +#else + (void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT); +#endif + } + + /* + * Requeue is now either prohibited or complete. Reread state + * because during the wait above it might have changed. Nothing + * will modify q->requeue_state after this point. + */ + return atomic_read(&q->requeue_state); +} + /** * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue * @q: the futex_q @@ -1593,18 +1961,31 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, * @hb: the hash_bucket of the requeue target futex * * During futex_requeue, with requeue_pi=1, it is possible to acquire the - * target futex if it is uncontended or via a lock steal. Set the futex_q key - * to the requeue target futex so the waiter can detect the wakeup on the right - * futex, but remove it from the hb and NULL the rt_waiter so it can detect - * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock - * to protect access to the pi_state to fixup the owner later. Must be called - * with both q->lock_ptr and hb->lock held. + * target futex if it is uncontended or via a lock steal. + * + * 1) Set @q::key to the requeue target futex key so the waiter can detect + * the wakeup on the right futex. + * + * 2) Dequeue @q from the hash bucket. + * + * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock + * acquisition. + * + * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that + * the waiter has to fixup the pi state. + * + * 5) Complete the requeue state so the waiter can make progress. After + * this point the waiter task can return from the syscall immediately in + * case that the pi state does not have to be fixed up. + * + * 6) Wake the waiter task. + * + * Must be called with both q->lock_ptr and hb->lock held. */ static inline void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, struct futex_hash_bucket *hb) { - get_futex_key_refs(key); q->key = *key; __unqueue_futex(q); @@ -1614,6 +1995,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, q->lock_ptr = &hb->lock; + /* Signal locked state to the waiter */ + futex_requeue_pi_complete(q, 1); wake_up_state(q->task, TASK_NORMAL); } @@ -1625,6 +2008,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, * @key1: the from futex key * @key2: the to futex key * @ps: address to store the pi_state pointer + * @exiting: Pointer to store the task pointer of the owner task + * which is in the middle of exiting * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * * Try and get the lock on behalf of the top waiter if we can do it atomically. @@ -1632,20 +2017,24 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. * hb1 and hb2 must be held by the caller. * + * @exiting is only set when the return value is -EBUSY. If so, this holds + * a refcount on the exiting task on return and the caller needs to drop it + * after waiting for the exit to complete. + * * Return: - * 0 - failed to acquire the lock atomically; - * >0 - acquired the lock, return value is vpid of the top_waiter - * <0 - error + * - 0 - failed to acquire the lock atomically; + * - >0 - acquired the lock, return value is vpid of the top_waiter + * - <0 - error */ -static int futex_proxy_trylock_atomic(u32 __user *pifutex, - struct futex_hash_bucket *hb1, - struct futex_hash_bucket *hb2, - union futex_key *key1, union futex_key *key2, - struct futex_pi_state **ps, int set_waiters) +static int +futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, + struct futex_hash_bucket *hb2, union futex_key *key1, + union futex_key *key2, struct futex_pi_state **ps, + struct task_struct **exiting, int set_waiters) { struct futex_q *top_waiter = NULL; u32 curval; - int ret, vpid; + int ret; if (get_futex_value_locked(&curval, pifutex)) return -EFAULT; @@ -1658,7 +2047,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, * If the caller intends to requeue more than 1 waiter to pifutex, * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now, * as we have means to handle the possible fault. If not, don't set - * the bit unecessarily as it will force the subsequent unlock to enter + * the bit unnecessarily as it will force the subsequent unlock to enter * the kernel. */ top_waiter = futex_top_waiter(hb1, key1); @@ -1667,21 +2056,52 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, if (!top_waiter) return 0; + /* + * Ensure that this is a waiter sitting in futex_wait_requeue_pi() + * and waiting on the 'waitqueue' futex which is always !PI. + */ + if (!top_waiter->rt_waiter || top_waiter->pi_state) + return -EINVAL; + /* Ensure we requeue to the expected futex. */ if (!match_futex(top_waiter->requeue_pi_key, key2)) return -EINVAL; + /* Ensure that this does not race against an early wakeup */ + if (!futex_requeue_pi_prepare(top_waiter, NULL)) + return -EAGAIN; + /* - * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in - * the contended case or if set_waiters is 1. The pi_state is returned - * in ps in contended cases. + * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit + * in the contended case or if @set_waiters is true. + * + * In the contended case PI state is attached to the lock owner. If + * the user space lock can be acquired then PI state is attached to + * the new owner (@top_waiter->task) when @set_waiters is true. */ - vpid = task_pid_vnr(top_waiter->task); ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, - set_waiters); + exiting, set_waiters); if (ret == 1) { + /* + * Lock was acquired in user space and PI state was + * attached to @top_waiter->task. That means state is fully + * consistent and the waiter can return to user space + * immediately after the wakeup. + */ requeue_pi_wake_futex(top_waiter, key2, hb2); - return vpid; + } else if (ret < 0) { + /* Rewind top_waiter::requeue_state */ + futex_requeue_pi_complete(top_waiter, ret); + } else { + /* + * futex_lock_pi_atomic() did not acquire the user space + * futex, but managed to establish the proxy lock and pi + * state. top_waiter::requeue_state cannot be fixed up here + * because the waiter is not enqueued on the rtmutex + * yet. This is handled at the callsite depending on the + * result of rt_mutex_start_proxy_lock() which is + * guaranteed to be reached with this function returning 0. + */ } return ret; } @@ -1701,19 +2121,31 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, * uaddr2 atomically on behalf of the top waiter. * * Return: - * >=0 - on success, the number of tasks requeued or woken; - * <0 - on error + * - >=0 - on success, the number of tasks requeued or woken; + * - <0 - on error */ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; - int drop_count = 0, task_count = 0, ret; + int task_count = 0, ret; struct futex_pi_state *pi_state = NULL; struct futex_hash_bucket *hb1, *hb2; struct futex_q *this, *next; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); + + if (nr_wake < 0 || nr_requeue < 0) + return -EINVAL; + + /* + * When PI not supported: return -ENOSYS if requeue_pi is true, + * consequently the compiler knows requeue_pi is always false past + * this point which will optimize away all the conditional code + * further down. + */ + if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi) + return -ENOSYS; if (requeue_pi) { /* @@ -1723,43 +2155,53 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, if (uaddr1 == uaddr2) return -EINVAL; + /* + * futex_requeue() allows the caller to define the number + * of waiters to wake up via the @nr_wake argument. With + * REQUEUE_PI, waking up more than one waiter is creating + * more problems than it solves. Waking up a waiter makes + * only sense if the PI futex @uaddr2 is uncontended as + * this allows the requeue code to acquire the futex + * @uaddr2 before waking the waiter. The waiter can then + * return to user space without further action. A secondary + * wakeup would just make the futex_wait_requeue_pi() + * handling more complex, because that code would have to + * look up pi_state and do more or less all the handling + * which the requeue code has to do for the to be requeued + * waiters. So restrict the number of waiters to wake to + * one, and only wake it up when the PI futex is + * uncontended. Otherwise requeue it and let the unlock of + * the PI futex handle the wakeup. + * + * All REQUEUE_PI users, e.g. pthread_cond_signal() and + * pthread_cond_broadcast() must use nr_wake=1. + */ + if (nr_wake != 1) + return -EINVAL; + /* * requeue_pi requires a pi_state, try to allocate it now * without any locks in case it fails. */ if (refill_pi_state_cache()) return -ENOMEM; - /* - * requeue_pi must wake as many tasks as it can, up to nr_wake - * + nr_requeue, since it acquires the rt_mutex prior to - * returning to userspace, so as to not leave the rt_mutex with - * waiters and no owner. However, second and third wake-ups - * cannot be predicted as they involve race conditions with the - * first wake and a fault while looking up the pi_state. Both - * pthread_cond_signal() and pthread_cond_broadcast() should - * use nr_wake=1. - */ - if (nr_wake != 1) - return -EINVAL; } retry: - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); + ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); if (unlikely(ret != 0)) - goto out; + return ret; ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, - requeue_pi ? VERIFY_WRITE : VERIFY_READ); + requeue_pi ? FUTEX_WRITE : FUTEX_READ); if (unlikely(ret != 0)) - goto out_put_key1; + return ret; /* * The check above which compares uaddrs is not sufficient for * shared futexes. We need to compare the keys: */ - if (requeue_pi && match_futex(&key1, &key2)) { - ret = -EINVAL; - goto out_put_keys; - } + if (requeue_pi && match_futex(&key1, &key2)) + return -EINVAL; hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); @@ -1779,13 +2221,11 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, ret = get_user(curval, uaddr1); if (ret) - goto out_put_keys; + return ret; if (!(flags & FLAGS_SHARED)) goto retry_private; - put_futex_key(&key2); - put_futex_key(&key1); goto retry; } if (curval != *cmpval) { @@ -1794,70 +2234,96 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, } } - if (requeue_pi && (task_count - nr_wake < nr_requeue)) { + if (requeue_pi) { + struct task_struct *exiting = NULL; + /* * Attempt to acquire uaddr2 and wake the top waiter. If we * intend to requeue waiters, force setting the FUTEX_WAITERS * bit. We force this here where we are able to easily handle * faults rather in the requeue loop below. + * + * Updates topwaiter::requeue_state if a top waiter exists. */ ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, - &key2, &pi_state, nr_requeue); + &key2, &pi_state, + &exiting, nr_requeue); /* - * At this point the top_waiter has either taken uaddr2 or is - * waiting on it. If the former, then the pi_state will not - * exist yet, look it up one more time to ensure we have a - * reference to it. If the lock was taken, ret contains the - * vpid of the top waiter task. - * If the lock was not taken, we have pi_state and an initial - * refcount on it. In case of an error we have nothing. + * At this point the top_waiter has either taken uaddr2 or + * is waiting on it. In both cases pi_state has been + * established and an initial refcount on it. In case of an + * error there's nothing. + * + * The top waiter's requeue_state is up to date: + * + * - If the lock was acquired atomically (ret == 1), then + * the state is Q_REQUEUE_PI_LOCKED. + * + * The top waiter has been dequeued and woken up and can + * return to user space immediately. The kernel/user + * space state is consistent. In case that there must be + * more waiters requeued the WAITERS bit in the user + * space futex is set so the top waiter task has to go + * into the syscall slowpath to unlock the futex. This + * will block until this requeue operation has been + * completed and the hash bucket locks have been + * dropped. + * + * - If the trylock failed with an error (ret < 0) then + * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing + * happened", or Q_REQUEUE_PI_IGNORE when there was an + * interleaved early wakeup. + * + * - If the trylock did not succeed (ret == 0) then the + * state is either Q_REQUEUE_PI_IN_PROGRESS or + * Q_REQUEUE_PI_WAIT if an early wakeup interleaved. + * This will be cleaned up in the loop below, which + * cannot fail because futex_proxy_trylock_atomic() did + * the same sanity checks for requeue_pi as the loop + * below does. */ - if (ret > 0) { - WARN_ON(pi_state); - drop_count++; - task_count++; - /* - * If we acquired the lock, then the user space value - * of uaddr2 should be vpid. It cannot be changed by - * the top waiter as it is blocked on hb2 lock if it - * tries to do so. If something fiddled with it behind - * our back the pi state lookup might unearth it. So - * we rather use the known value than rereading and - * handing potential crap to lookup_pi_state. - * - * If that call succeeds then we have pi_state and an - * initial refcount on it. - */ - ret = lookup_pi_state(ret, hb2, &key2, &pi_state); - } - switch (ret) { case 0: /* We hold a reference on the pi state. */ break; - /* If the above failed, then pi_state is NULL */ + case 1: + /* + * futex_proxy_trylock_atomic() acquired the user space + * futex. Adjust task_count. + */ + task_count++; + ret = 0; + break; + + /* + * If the above failed, then pi_state is NULL and + * waiter::requeue_state is correct. + */ case -EFAULT: double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); - put_futex_key(&key2); - put_futex_key(&key1); ret = fault_in_user_writeable(uaddr2); if (!ret) goto retry; - goto out; + return ret; + case -EBUSY: case -EAGAIN: /* * Two reasons for this: - * - Owner is exiting and we just wait for the + * - EBUSY: Owner is exiting and we just wait for the * exit to complete. - * - The user space value changed. + * - EAGAIN: The user space value changed. */ double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); - put_futex_key(&key2); - put_futex_key(&key1); + /* + * Handle the case where the owner is in the middle of + * exiting. Wait for the exit to complete otherwise + * this task might loop forever, aka. live lock. + */ + wait_for_owner_exiting(ret, exiting); cond_resched(); goto retry; default: @@ -1873,7 +2339,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, continue; /* - * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always + * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always * be paired with each other and no other futex ops. * * We should never be requeueing a futex_q with a pi_state, @@ -1886,18 +2352,17 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, break; } - /* - * Wake nr_wake waiters. For requeue_pi, if we acquired the - * lock, we already woke the top_waiter. If not, it will be - * woken by futex_unlock_pi(). - */ - if (++task_count <= nr_wake && !requeue_pi) { - mark_wake_futex(&wake_q, this); + /* Plain futexes just wake or requeue and are done */ + if (!requeue_pi) { + if (++task_count <= nr_wake) + mark_wake_futex(&wake_q, this); + else + requeue_futex(this, hb1, hb2, &key2); continue; } /* Ensure we requeue to the expected futex for requeue_pi. */ - if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) { + if (!match_futex(this->requeue_pi_key, &key2)) { ret = -EINVAL; break; } @@ -1905,56 +2370,66 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, /* * Requeue nr_requeue waiters and possibly one more in the case * of requeue_pi if we couldn't acquire the lock atomically. + * + * Prepare the waiter to take the rt_mutex. Take a refcount + * on the pi_state and store the pointer in the futex_q + * object of the waiter. */ - if (requeue_pi) { + get_pi_state(pi_state); + + /* Don't requeue when the waiter is already on the way out. */ + if (!futex_requeue_pi_prepare(this, pi_state)) { /* - * Prepare the waiter to take the rt_mutex. Take a - * refcount on the pi_state and store the pointer in - * the futex_q object of the waiter. + * Early woken waiter signaled that it is on the + * way out. Drop the pi_state reference and try the + * next waiter. @this->pi_state is still NULL. */ - atomic_inc(&pi_state->refcount); - this->pi_state = pi_state; - ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, - this->rt_waiter, - this->task); - if (ret == 1) { - /* - * We got the lock. We do neither drop the - * refcount on pi_state nor clear - * this->pi_state because the waiter needs the - * pi_state for cleaning up the user space - * value. It will drop the refcount after - * doing so. - */ - requeue_pi_wake_futex(this, &key2, hb2); - drop_count++; - continue; - } else if (ret) { - /* - * rt_mutex_start_proxy_lock() detected a - * potential deadlock when we tried to queue - * that waiter. Drop the pi_state reference - * which we took above and remove the pointer - * to the state from the waiters futex_q - * object. - */ - this->pi_state = NULL; - put_pi_state(pi_state); - /* - * We stop queueing more waiters and let user - * space deal with the mess. - */ - break; - } + put_pi_state(pi_state); + continue; + } + + ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, + this->rt_waiter, + this->task); + + if (ret == 1) { + /* + * We got the lock. We do neither drop the refcount + * on pi_state nor clear this->pi_state because the + * waiter needs the pi_state for cleaning up the + * user space value. It will drop the refcount + * after doing so. this::requeue_state is updated + * in the wakeup as well. + */ + requeue_pi_wake_futex(this, &key2, hb2); + task_count++; + } else if (!ret) { + /* Waiter is queued, move it to hb2 */ + requeue_futex(this, hb1, hb2, &key2); + futex_requeue_pi_complete(this, 0); + task_count++; + } else { + /* + * rt_mutex_start_proxy_lock() detected a potential + * deadlock when we tried to queue that waiter. + * Drop the pi_state reference which we took above + * and remove the pointer to the state from the + * waiters futex_q object. + */ + this->pi_state = NULL; + put_pi_state(pi_state); + futex_requeue_pi_complete(this, ret); + /* + * We stop queueing more waiters and let user space + * deal with the mess. + */ + break; } - requeue_futex(this, hb1, hb2, &key2); - drop_count++; } /* - * We took an extra initial reference to the pi_state either - * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We - * need to drop it here again. + * We took an extra initial reference to the pi_state in + * futex_proxy_trylock_atomic(). We need to drop it here again. */ put_pi_state(pi_state); @@ -1962,21 +2437,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, double_unlock_hb(hb1, hb2); wake_up_q(&wake_q); hb_waiters_dec(hb2); - - /* - * drop_futex_key_refs() must be called outside the spinlocks. During - * the requeue we moved futex_q's from the hash bucket at key1 to the - * one at key2 and updated their key pointer. We no longer need to - * hold the references to key1. - */ - while (--drop_count >= 0) - drop_futex_key_refs(&key1); - -out_put_keys: - put_futex_key(&key2); -out_put_key1: - put_futex_key(&key1); -out: return ret ? ret : task_count; } @@ -1996,11 +2456,11 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) * decrement the counter at queue_unlock() when some error has * occurred and we don't end up adding the task to the list. */ - hb_waiters_inc(hb); + hb_waiters_inc(hb); /* implies smp_mb(); (A) */ q->lock_ptr = &hb->lock; - spin_lock(&hb->lock); /* implies smp_mb(); (A) */ + spin_lock(&hb->lock); return hb; } @@ -2012,6 +2472,25 @@ queue_unlock(struct futex_hash_bucket *hb) hb_waiters_dec(hb); } +static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) +{ + int prio; + + /* + * The priority used to register this element is + * - either the real thread-priority for the real-time threads + * (i.e. threads with a priority lower than MAX_RT_PRIO) + * - or MAX_RT_PRIO for non-RT threads. + * Thus, all RT-threads are woken first in priority order, and + * the others are woken last, in FIFO order. + */ + prio = min(current->normal_prio, MAX_RT_PRIO); + + plist_node_init(&q->list, prio); + plist_add(&q->list, &hb->chain); + q->task = current; +} + /** * queue_me() - Enqueue the futex_q on the futex_hash_bucket * @q: The futex_q to enqueue @@ -2027,21 +2506,7 @@ queue_unlock(struct futex_hash_bucket *hb) static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) __releases(&hb->lock) { - int prio; - - /* - * The priority used to register this element is - * - either the real thread-priority for the real-time threads - * (i.e. threads with a priority lower than MAX_RT_PRIO) - * - or MAX_RT_PRIO for non-RT threads. - * Thus, all RT-threads are woken first in priority order, and - * the others are woken last, in FIFO order. - */ - prio = min(current->normal_prio, MAX_RT_PRIO); - - plist_node_init(&q->list, prio); - plist_add(&q->list, &hb->chain); - q->task = current; + __queue_me(q, hb); spin_unlock(&hb->lock); } @@ -2053,8 +2518,8 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) * be paired with exactly one earlier call to queue_me(). * * Return: - * 1 - if the futex_q was still queued (and we removed unqueued it); - * 0 - if the futex_q was already removed by the waking thread + * - 1 - if the futex_q was still queued (and we removed unqueued it); + * - 0 - if the futex_q was already removed by the waking thread */ static int unqueue_me(struct futex_q *q) { @@ -2096,72 +2561,115 @@ static int unqueue_me(struct futex_q *q) ret = 1; } - drop_futex_key_refs(&q->key); return ret; } /* - * PI futexes can not be requeued and must remove themself from the - * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry - * and dropped here. + * PI futexes can not be requeued and must remove themselves from the + * hash bucket. The hash bucket lock (i.e. lock_ptr) is held. */ static void unqueue_me_pi(struct futex_q *q) - __releases(q->lock_ptr) { __unqueue_futex(q); BUG_ON(!q->pi_state); put_pi_state(q->pi_state); q->pi_state = NULL; - - spin_unlock(q->lock_ptr); } -/* - * Fixup the pi_state owner with the new owner. - * - * Must be called with hash bucket lock held and mm->sem held for non - * private futexes. - */ -static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *newowner) +static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, + struct task_struct *argowner) { - u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; - struct task_struct *oldowner = pi_state->owner; - u32 uval, uninitialized_var(curval), newval; - int ret; + struct task_struct *oldowner, *newowner; + u32 uval, curval, newval, newtid; + int err = 0; - /* Owner died? */ - if (!pi_state->owner) - newtid |= FUTEX_OWNER_DIED; + oldowner = pi_state->owner; /* - * We are here either because we stole the rtmutex from the - * previous highest priority waiter or we are the highest priority - * waiter but failed to get the rtmutex the first time. - * We have to replace the newowner TID in the user space variable. + * We are here because either: + * + * - we stole the lock and pi_state->owner needs updating to reflect + * that (@argowner == current), + * + * or: + * + * - someone stole our lock and we need to fix things to point to the + * new owner (@argowner == NULL). + * + * Either way, we have to replace the TID in the user space variable. * This must be atomic as we have to preserve the owner died bit here. * * Note: We write the user space value _before_ changing the pi_state * because we can fault here. Imagine swapped out pages or a fork * that marked all the anonymous memory readonly for cow. * - * Modifying pi_state _before_ the user space value would - * leave the pi_state in an inconsistent state when we fault - * here, because we need to drop the hash bucket lock to - * handle the fault. This might be observed in the PID check - * in lookup_pi_state. + * Modifying pi_state _before_ the user space value would leave the + * pi_state in an inconsistent state when we fault here, because we + * need to drop the locks to handle the fault. This might be observed + * in the PID checks when attaching to PI state . */ retry: - if (get_futex_value_locked(&uval, uaddr)) - goto handle_fault; + if (!argowner) { + if (oldowner != current) { + /* + * We raced against a concurrent self; things are + * already fixed up. Nothing to do. + */ + return 0; + } - while (1) { + if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { + /* We got the lock. pi_state is correct. Tell caller. */ + return 1; + } + + /* + * The trylock just failed, so either there is an owner or + * there is a higher priority waiter than this one. + */ + newowner = rt_mutex_owner(&pi_state->pi_mutex); + /* + * If the higher priority waiter has not yet taken over the + * rtmutex then newowner is NULL. We can't return here with + * that state because it's inconsistent vs. the user space + * state. So drop the locks and try again. It's a valid + * situation and not any different from the other retry + * conditions. + */ + if (unlikely(!newowner)) { + err = -EAGAIN; + goto handle_err; + } + } else { + WARN_ON_ONCE(argowner != current); + if (oldowner == current) { + /* + * We raced against a concurrent self; things are + * already fixed up. Nothing to do. + */ + return 1; + } + newowner = argowner; + } + + newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; + /* Owner died? */ + if (!pi_state->owner) + newtid |= FUTEX_OWNER_DIED; + + err = get_futex_value_locked(&uval, uaddr); + if (err) + goto handle_err; + + for (;;) { newval = (uval & FUTEX_OWNER_DIED) | newtid; - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) - goto handle_fault; + err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); + if (err) + goto handle_err; + if (curval == uval) break; uval = curval; @@ -2171,48 +2679,88 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, * We fixed up user space. Now we need to fix the pi_state * itself. */ - if (pi_state->owner != NULL) { - raw_spin_lock_irq(&pi_state->owner->pi_lock); - WARN_ON(list_empty(&pi_state->list)); - list_del_init(&pi_state->list); - raw_spin_unlock_irq(&pi_state->owner->pi_lock); - } + pi_state_update_owner(pi_state, newowner); - pi_state->owner = newowner; - - raw_spin_lock_irq(&newowner->pi_lock); - WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &newowner->pi_state_list); - raw_spin_unlock_irq(&newowner->pi_lock); - return 0; + return argowner == current; /* - * To handle the page fault we need to drop the hash bucket - * lock here. That gives the other task (either the highest priority - * waiter itself or the task which stole the rtmutex) the - * chance to try the fixup of the pi_state. So once we are - * back from handling the fault we need to check the pi_state - * after reacquiring the hash bucket lock and before trying to - * do another fixup. When the fixup has been done already we - * simply return. + * In order to reschedule or handle a page fault, we need to drop the + * locks here. In the case of a fault, this gives the other task + * (either the highest priority waiter itself or the task which stole + * the rtmutex) the chance to try the fixup of the pi_state. So once we + * are back from handling the fault we need to check the pi_state after + * reacquiring the locks and before trying to do another fixup. When + * the fixup has been done already we simply return. + * + * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely + * drop hb->lock since the caller owns the hb -> futex_q relation. + * Dropping the pi_mutex->wait_lock requires the state revalidate. */ -handle_fault: +handle_err: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(q->lock_ptr); - ret = fault_in_user_writeable(uaddr); + switch (err) { + case -EFAULT: + err = fault_in_user_writeable(uaddr); + break; + + case -EAGAIN: + cond_resched(); + err = 0; + break; + + default: + WARN_ON_ONCE(1); + break; + } spin_lock(q->lock_ptr); + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); /* * Check if someone else fixed it for us: */ if (pi_state->owner != oldowner) - return 0; + return argowner == current; - if (ret) - return ret; + /* Retry if err was -EAGAIN or the fault in succeeded */ + if (!err) + goto retry; - goto retry; + /* + * fault_in_user_writeable() failed so user state is immutable. At + * best we can make the kernel state consistent but user state will + * be most likely hosed and any subsequent unlock operation will be + * rejected due to PI futex rule [10]. + * + * Ensure that the rtmutex owner is also the pi_state owner despite + * the user space value claiming something different. There is no + * point in unlocking the rtmutex if current is the owner as it + * would need to wait until the next waiter has taken the rtmutex + * to guarantee consistent state. Keep it simple. Userspace asked + * for this wreckaged state. + * + * The rtmutex has an owner - either current or some other + * task. See the EAGAIN loop above. + */ + pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); + + return err; +} + +static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, + struct task_struct *argowner) +{ + struct futex_pi_state *pi_state = q->pi_state; + int ret; + + lockdep_assert_held(q->lock_ptr); + + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + ret = __fixup_pi_state_owner(uaddr, q, argowner); + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + return ret; } static long futex_wait_restart(struct restart_block *restart); @@ -2228,66 +2776,45 @@ static long futex_wait_restart(struct restart_block *restart); * acquire the lock. Must be called with the hb lock held. * * Return: - * 1 - success, lock taken; - * 0 - success, lock not taken; - * <0 - on error (-EFAULT) + * - 1 - success, lock taken; + * - 0 - success, lock not taken; + * - <0 - on error (-EFAULT) */ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) { - struct task_struct *owner; - int ret = 0; - if (locked) { /* * Got the lock. We might not be the anticipated owner if we * did a lock-steal - fix up the PI-state in that case: + * + * Speculative pi_state->owner read (we don't hold wait_lock); + * since we own the lock pi_state->owner == current is the + * stable state, anything else needs more attention. */ if (q->pi_state->owner != current) - ret = fixup_pi_state_owner(uaddr, q, current); - goto out; + return fixup_pi_state_owner(uaddr, q, current); + return 1; } /* - * Catch the rare case, where the lock was released when we were on the - * way back before we locked the hash bucket. + * If we didn't get the lock; check if anybody stole it from us. In + * that case, we need to fix up the uval to point to them instead of + * us, otherwise bad things happen. [10] + * + * Another speculative read; pi_state->owner == current is unstable + * but needs our attention. */ - if (q->pi_state->owner == current) { - /* - * Try to get the rt_mutex now. This might fail as some other - * task acquired the rt_mutex after we removed ourself from the - * rt_mutex waiters list. - */ - if (rt_mutex_trylock(&q->pi_state->pi_mutex)) { - locked = 1; - goto out; - } - - /* - * pi_state is incorrect, some other task did a lock steal and - * we returned due to timeout or signal without taking the - * rt_mutex. Too late. - */ - raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock); - owner = rt_mutex_owner(&q->pi_state->pi_mutex); - if (!owner) - owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); - raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock); - ret = fixup_pi_state_owner(uaddr, q, owner); - goto out; - } + if (q->pi_state->owner == current) + return fixup_pi_state_owner(uaddr, q, NULL); /* * Paranoia check. If we did not take the lock, then we should not be - * the owner of the rt_mutex. + * the owner of the rt_mutex. Warn and establish consistent state. */ - if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) - printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " - "pi-state %p\n", ret, - q->pi_state->pi_mutex.owner, - q->pi_state->owner); + if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) + return fixup_pi_state_owner(uaddr, q, current); -out: - return ret ? ret : locked; + return 0; } /** @@ -2310,7 +2837,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, /* Arm the timer */ if (timeout) - hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); + hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS); /* * If we have been removed from the hash list, then another task @@ -2338,12 +2865,11 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, * * Setup the futex_q and locate the hash_bucket. Get the futex value and * compare it with the expected value. Handle atomic faults internally. - * Return with the hb lock held and a q.key reference on success, and unlocked - * with no q.key reference on failure. + * Return with the hb lock held on success, and unlocked on failure. * * Return: - * 0 - uaddr contains val and hb has been locked; - * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked + * - 0 - uaddr contains val and hb has been locked; + * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked */ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, struct futex_q *q, struct futex_hash_bucket **hb) @@ -2370,7 +2896,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, * while the syscall executes. */ retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ); if (unlikely(ret != 0)) return ret; @@ -2384,12 +2910,11 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, ret = get_user(uval, uaddr); if (ret) - goto out; + return ret; if (!(flags & FLAGS_SHARED)) goto retry_private; - put_futex_key(&q->key); goto retry; } @@ -2398,16 +2923,13 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, ret = -EWOULDBLOCK; } -out: - if (ret) - put_futex_key(&q->key); return ret; } static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) { - struct hrtimer_sleeper timeout, *to = NULL; + struct hrtimer_sleeper timeout, *to; struct restart_block *restart; struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; @@ -2417,21 +2939,12 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, return -EINVAL; q.bitset = bitset; - if (abs_time) { - to = &timeout; - - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? - CLOCK_REALTIME : CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); - hrtimer_init_sleeper(to, current); - hrtimer_set_expires_range_ns(&to->timer, *abs_time, - current->timer_slack_ns); - } - + to = futex_setup_timer(abs_time, &timeout, flags, + current->timer_slack_ns); retry: /* - * Prepare to wait on uaddr. On success, holds hb lock and increments - * q.key refs. + * Prepare to wait on uaddr. On success, it holds hb->lock and q + * is initialized. */ ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) @@ -2442,7 +2955,6 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, /* If we were woken (and unqueued), we succeeded, whatever. */ ret = 0; - /* unqueue_me() drops q.key ref */ if (!unqueue_me(&q)) goto out; ret = -ETIMEDOUT; @@ -2461,14 +2973,13 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, goto out; restart = ¤t->restart_block; - restart->fn = futex_wait_restart; restart->futex.uaddr = uaddr; restart->futex.val = val; - restart->futex.time = abs_time->tv64; + restart->futex.time = *abs_time; restart->futex.bitset = bitset; restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; - ret = -ERESTART_RESTARTBLOCK; + ret = set_restart_fn(restart, futex_wait_restart); out: if (to) { @@ -2485,7 +2996,7 @@ static long futex_wait_restart(struct restart_block *restart) ktime_t t, *tp = NULL; if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { - t.tv64 = restart->futex.time; + t = restart->futex.time; tp = &t; } restart->fn = do_no_restart_syscall; @@ -2507,31 +3018,31 @@ static long futex_wait_restart(struct restart_block *restart) static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) { - struct hrtimer_sleeper timeout, *to = NULL; + struct hrtimer_sleeper timeout, *to; + struct task_struct *exiting = NULL; + struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; int res, ret; + if (!IS_ENABLED(CONFIG_FUTEX_PI)) + return -ENOSYS; + if (refill_pi_state_cache()) return -ENOMEM; - if (time) { - to = &timeout; - hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, - HRTIMER_MODE_ABS); - hrtimer_init_sleeper(to, current); - hrtimer_set_expires(&to->timer, *time); - } + to = futex_setup_timer(time, &timeout, flags, 0); retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE); if (unlikely(ret != 0)) goto out; retry_private: hb = queue_lock(&q); - ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); + ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, + &exiting, 0); if (unlikely(ret)) { /* * Atomic work succeeded and we got the lock, @@ -2544,15 +3055,21 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, goto out_unlock_put_key; case -EFAULT: goto uaddr_faulted; + case -EBUSY: case -EAGAIN: /* * Two reasons for this: - * - Task is exiting and we just wait for the + * - EBUSY: Task is exiting and we just wait for the * exit to complete. - * - The user space value changed. + * - EAGAIN: The user space value changed. */ queue_unlock(hb); - put_futex_key(&q.key); + /* + * Handle the case where the owner is in the middle of + * exiting. Wait for the exit to complete otherwise + * this task might loop forever, aka. live lock. + */ + wait_for_owner_exiting(ret, exiting); cond_resched(); goto retry; default: @@ -2560,56 +3077,95 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, } } + WARN_ON(!q.pi_state); + /* * Only actually queue now that the atomic ops are done: */ - queue_me(&q, hb); + __queue_me(&q, hb); - WARN_ON(!q.pi_state); - /* - * Block on the PI mutex: - */ - if (!trylock) { - ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to); - } else { - ret = rt_mutex_trylock(&q.pi_state->pi_mutex); + if (trylock) { + ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); /* Fixup the trylock return value: */ ret = ret ? 0 : -EWOULDBLOCK; + goto no_block; } + rt_mutex_init_waiter(&rt_waiter); + + /* + * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not + * hold it while doing rt_mutex_start_proxy(), because then it will + * include hb->lock in the blocking chain, even through we'll not in + * fact hold it while blocking. This will lead it to report -EDEADLK + * and BUG when futex_unlock_pi() interleaves with this. + * + * Therefore acquire wait_lock while holding hb->lock, but drop the + * latter before calling __rt_mutex_start_proxy_lock(). This + * interleaves with futex_unlock_pi() -- which does a similar lock + * handoff -- such that the latter can observe the futex_q::pi_state + * before __rt_mutex_start_proxy_lock() is done. + */ + raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); + spin_unlock(q.lock_ptr); + /* + * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter + * such that futex_unlock_pi() is guaranteed to observe the waiter when + * it sees the futex_q::pi_state. + */ + ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); + raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); + + if (ret) { + if (ret == 1) + ret = 0; + goto cleanup; + } + + if (unlikely(to)) + hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); + + ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); + +cleanup: spin_lock(q.lock_ptr); + /* + * If we failed to acquire the lock (deadlock/signal/timeout), we must + * first acquire the hb->lock before removing the lock from the + * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait + * lists consistent. + * + * In particular; it is important that futex_unlock_pi() can not + * observe this inconsistency. + */ + if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) + ret = 0; + +no_block: /* * Fixup the pi_state owner and possibly acquire the lock if we * haven't already. */ res = fixup_owner(uaddr, &q, !ret); /* - * If fixup_owner() returned an error, proprogate that. If it acquired + * If fixup_owner() returned an error, propagate that. If it acquired * the lock, clear our -ETIMEDOUT or -EINTR. */ if (res) ret = (res < 0) ? res : 0; - /* - * If fixup_owner() faulted and was unable to handle the fault, unlock - * it and return the fault to userspace. - */ - if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) - rt_mutex_unlock(&q.pi_state->pi_mutex); - - /* Unqueue and drop the lock */ unqueue_me_pi(&q); - - goto out_put_key; + spin_unlock(q.lock_ptr); + goto out; out_unlock_put_key: queue_unlock(hb); -out_put_key: - put_futex_key(&q.key); out: - if (to) + if (to) { + hrtimer_cancel(&to->timer); destroy_hrtimer_on_stack(&to->timer); + } return ret != -EINTR ? ret : -ERESTARTNOINTR; uaddr_faulted: @@ -2617,12 +3173,11 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ret = fault_in_user_writeable(uaddr); if (ret) - goto out_put_key; + goto out; if (!(flags & FLAGS_SHARED)) goto retry_private; - put_futex_key(&q.key); goto retry; } @@ -2633,12 +3188,15 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, */ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) { - u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current); + u32 curval, uval, vpid = task_pid_vnr(current); union futex_key key = FUTEX_KEY_INIT; struct futex_hash_bucket *hb; - struct futex_q *match; + struct futex_q *top_waiter; int ret; + if (!IS_ENABLED(CONFIG_FUTEX_PI)) + return -ENOSYS; + retry: if (get_user(uval, uaddr)) return -EFAULT; @@ -2648,7 +3206,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) if ((uval & FUTEX_TID_MASK) != vpid) return -EPERM; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE); if (ret) return ret; @@ -2660,15 +3218,45 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) * all and we at least want to know if user space fiddled * with the futex value instead of blindly unlocking. */ - match = futex_top_waiter(hb, &key); - if (match) { - ret = wake_futex_pi(uaddr, uval, match, hb); + top_waiter = futex_top_waiter(hb, &key); + if (top_waiter) { + struct futex_pi_state *pi_state = top_waiter->pi_state; + + ret = -EINVAL; + if (!pi_state) + goto out_unlock; + /* - * In case of success wake_futex_pi dropped the hash - * bucket lock. + * If current does not own the pi_state then the futex is + * inconsistent and user space fiddled with the futex value. + */ + if (pi_state->owner != current) + goto out_unlock; + + get_pi_state(pi_state); + /* + * By taking wait_lock while still holding hb->lock, we ensure + * there is no point where we hold neither; and therefore + * wake_futex_pi() must observe a state consistent with what we + * observed. + * + * In particular; this forces __rt_mutex_start_proxy() to + * complete such that we're guaranteed to observe the + * rt_waiter. Also see the WARN in wake_futex_pi(). + */ + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + spin_unlock(&hb->lock); + + /* drops pi_state->pi_mutex.wait_lock */ + ret = wake_futex_pi(uaddr, uval, pi_state); + + put_pi_state(pi_state); + + /* + * Success, we're done! No tricky corner cases. */ if (!ret) - goto out_putkey; + return ret; /* * The atomic access to the futex value generated a * pagefault, so retry the user-access and the wakeup: @@ -2679,16 +3267,13 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) * A unconditional UNLOCK_PI op raced against a waiter * setting the FUTEX_WAITERS bit. Try again. */ - if (ret == -EAGAIN) { - spin_unlock(&hb->lock); - put_futex_key(&key); - goto retry; - } + if (ret == -EAGAIN) + goto pi_retry; /* * wake_futex_pi has detected invalid state. Tell user * space. */ - goto out_unlock; + return ret; } /* @@ -2698,8 +3283,20 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) * preserve the WAITERS bit not the OWNER_DIED one. We are the * owner. */ - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) - goto pi_faulted; + if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) { + spin_unlock(&hb->lock); + switch (ret) { + case -EFAULT: + goto pi_faulted; + + case -EAGAIN: + goto pi_retry; + + default: + WARN_ON_ONCE(1); + return ret; + } + } /* * If uval has changed, let user space handle it. @@ -2708,13 +3305,13 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) out_unlock: spin_unlock(&hb->lock); -out_putkey: - put_futex_key(&key); return ret; +pi_retry: + cond_resched(); + goto retry; + pi_faulted: - spin_unlock(&hb->lock); - put_futex_key(&key); ret = fault_in_user_writeable(uaddr); if (!ret) @@ -2724,27 +3321,22 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) } /** - * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex + * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex * @hb: the hash_bucket futex_q was original enqueued on * @q: the futex_q woken while waiting to be requeued - * @key2: the futex_key of the requeue target futex * @timeout: the timeout associated with the wait (NULL if none) * - * Detect if the task was woken on the initial futex as opposed to the requeue - * target futex. If so, determine if it was a timeout or a signal that caused - * the wakeup and return the appropriate error code to the caller. Must be - * called with the hb lock held. + * Determine the cause for the early wakeup. * * Return: - * 0 = no early wakeup detected; - * <0 = -ETIMEDOUT or -ERESTARTNOINTR + * -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR */ static inline int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, - struct futex_q *q, union futex_key *key2, + struct futex_q *q, struct hrtimer_sleeper *timeout) { - int ret = 0; + int ret; /* * With the hb lock held, we avoid races while we process the wakeup. @@ -2753,22 +3345,21 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * It can't be requeued from uaddr2 to something else since we don't * support a PI aware source futex for requeue. */ - if (!match_futex(&q->key, key2)) { - WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); - /* - * We were woken prior to requeue by a timeout or a signal. - * Unqueue the futex_q and determine which it was. - */ - plist_del(&q->list, &hb->chain); - hb_waiters_dec(hb); + WARN_ON_ONCE(&hb->lock != q->lock_ptr); - /* Handle spurious wakeups gracefully */ - ret = -EWOULDBLOCK; - if (timeout && !timeout->task) - ret = -ETIMEDOUT; - else if (signal_pending(current)) - ret = -ERESTARTNOINTR; - } + /* + * We were woken prior to requeue by a timeout or a signal. + * Unqueue the futex_q and determine which it was. + */ + plist_del(&q->list, &hb->chain); + hb_waiters_dec(hb); + + /* Handle spurious wakeups gracefully */ + ret = -EWOULDBLOCK; + if (timeout && !timeout->task) + ret = -ETIMEDOUT; + else if (signal_pending(current)) + ret = -ERESTARTNOINTR; return ret; } @@ -2809,46 +3400,40 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * If 4 or 7, we cleanup and return with -ETIMEDOUT. * * Return: - * 0 - On success; - * <0 - On error + * - 0 - On success; + * - <0 - On error */ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset, u32 __user *uaddr2) { - struct hrtimer_sleeper timeout, *to = NULL; + struct hrtimer_sleeper timeout, *to; struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; + struct rt_mutex_base *pi_mutex; int res, ret; + if (!IS_ENABLED(CONFIG_FUTEX_PI)) + return -ENOSYS; + if (uaddr == uaddr2) return -EINVAL; if (!bitset) return -EINVAL; - if (abs_time) { - to = &timeout; - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? - CLOCK_REALTIME : CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); - hrtimer_init_sleeper(to, current); - hrtimer_set_expires_range_ns(&to->timer, *abs_time, - current->timer_slack_ns); - } + to = futex_setup_timer(abs_time, &timeout, flags, + current->timer_slack_ns); /* * The waiter is allocated on our stack, manipulated by the requeue * code while we sleep on uaddr. */ - debug_rt_mutex_init_waiter(&rt_waiter); - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry); - RB_CLEAR_NODE(&rt_waiter.tree_entry); - rt_waiter.task = NULL; + rt_mutex_init_waiter(&rt_waiter); - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); if (unlikely(ret != 0)) goto out; @@ -2857,12 +3442,12 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, q.requeue_pi_key = &key2; /* - * Prepare to wait on uaddr. On success, increments q.key (key1) ref - * count. + * Prepare to wait on uaddr. On success, it holds hb->lock and q + * is initialized. */ ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) - goto out_key2; + goto out; /* * The check above which compares uaddrs is not sufficient for @@ -2871,99 +3456,81 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (match_futex(&q.key, &key2)) { queue_unlock(hb); ret = -EINVAL; - goto out_put_keys; + goto out; } /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); - spin_lock(&hb->lock); - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); - spin_unlock(&hb->lock); - if (ret) - goto out_put_keys; + switch (futex_requeue_pi_wakeup_sync(&q)) { + case Q_REQUEUE_PI_IGNORE: + /* The waiter is still on uaddr1 */ + spin_lock(&hb->lock); + ret = handle_early_requeue_pi_wakeup(hb, &q, to); + spin_unlock(&hb->lock); + break; - /* - * In order for us to be here, we know our q.key == key2, and since - * we took the hb->lock above, we also know that futex_requeue() has - * completed and we no longer have to concern ourselves with a wakeup - * race with the atomic proxy lock acquisition by the requeue code. The - * futex_requeue dropped our key1 reference and incremented our key2 - * reference count. - */ - - /* Check if the requeue code acquired the second futex for us. */ - if (!q.rt_waiter) { - /* - * Got the lock. We might not be the anticipated owner if we - * did a lock-steal - fix up the PI-state in that case. - */ + case Q_REQUEUE_PI_LOCKED: + /* The requeue acquired the lock */ if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); - ret = fixup_pi_state_owner(uaddr2, &q, current); - if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) - rt_mutex_unlock(&q.pi_state->pi_mutex); + ret = fixup_owner(uaddr2, &q, true); /* - * Drop the reference to the pi state which - * the requeue_pi() code acquired for us. + * Drop the reference to the pi state which the + * requeue_pi() code acquired for us. */ put_pi_state(q.pi_state); spin_unlock(q.lock_ptr); + /* + * Adjust the return value. It's either -EFAULT or + * success (1) but the caller expects 0 for success. + */ + ret = ret < 0 ? ret : 0; } - } else { - struct rt_mutex *pi_mutex; + break; - /* - * We have been woken up by futex_unlock_pi(), a timeout, or a - * signal. futex_unlock_pi() will not destroy the lock_ptr nor - * the pi_state. - */ - WARN_ON(!q.pi_state); + case Q_REQUEUE_PI_DONE: + /* Requeue completed. Current is 'pi_blocked_on' the rtmutex */ pi_mutex = &q.pi_state->pi_mutex; - ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); - debug_rt_mutex_free_waiter(&rt_waiter); + ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); + /* Current is not longer pi_blocked_on */ spin_lock(q.lock_ptr); + if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) + ret = 0; + + debug_rt_mutex_free_waiter(&rt_waiter); /* * Fixup the pi_state owner and possibly acquire the lock if we * haven't already. */ res = fixup_owner(uaddr2, &q, !ret); /* - * If fixup_owner() returned an error, proprogate that. If it + * If fixup_owner() returned an error, propagate that. If it * acquired the lock, clear -ETIMEDOUT or -EINTR. */ if (res) ret = (res < 0) ? res : 0; - /* - * If fixup_pi_state_owner() faulted and was unable to handle - * the fault, unlock the rt_mutex and return the fault to - * userspace. - */ - if (ret && rt_mutex_owner(pi_mutex) == current) - rt_mutex_unlock(pi_mutex); - - /* Unqueue and drop the lock. */ unqueue_me_pi(&q); - } + spin_unlock(q.lock_ptr); - if (ret == -EINTR) { - /* - * We've already been requeued, but cannot restart by calling - * futex_lock_pi() directly. We could restart this syscall, but - * it would detect that the user space "val" changed and return - * -EWOULDBLOCK. Save the overhead of the restart and return - * -EWOULDBLOCK directly. - */ - ret = -EWOULDBLOCK; + if (ret == -EINTR) { + /* + * We've already been requeued, but cannot restart + * by calling futex_lock_pi() directly. We could + * restart this syscall, but it would detect that + * the user space "val" changed and return + * -EWOULDBLOCK. Save the overhead of the restart + * and return -EWOULDBLOCK directly. + */ + ret = -EWOULDBLOCK; + } + break; + default: + BUG(); } -out_put_keys: - put_futex_key(&q.key); -out_key2: - put_futex_key(&key2); - out: if (to) { hrtimer_cancel(&to->timer); @@ -3053,54 +3620,115 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, return ret; } +/* Constants for the pending_op argument of handle_futex_death */ +#define HANDLE_DEATH_PENDING true +#define HANDLE_DEATH_LIST false + /* * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */ -int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) +static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, + bool pi, bool pending_op) { - u32 uval, uninitialized_var(nval), mval; + u32 uval, nval, mval; + int err; + + /* Futex address must be 32bit aligned */ + if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) + return -1; retry: if (get_user(uval, uaddr)) return -1; - if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) { - /* - * Ok, this dying thread is truly holding a futex - * of interest. Set the OWNER_DIED bit atomically - * via cmpxchg, and if the value had FUTEX_WAITERS - * set, wake up a waiter (if any). (We have to do a - * futex_wake() even if OWNER_DIED is already set - - * to handle the rare but possible case of recursive - * thread-death.) The rest of the cleanup is done in - * userspace. - */ - mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; - /* - * We are not holding a lock here, but we want to have - * the pagefault_disable/enable() protection because - * we want to handle the fault gracefully. If the - * access fails we try to fault in the futex with R/W - * verification via get_user_pages. get_user() above - * does not guarantee R/W access. If that fails we - * give up and leave the futex locked. - */ - if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) { + /* + * Special case for regular (non PI) futexes. The unlock path in + * user space has two race scenarios: + * + * 1. The unlock path releases the user space futex value and + * before it can execute the futex() syscall to wake up + * waiters it is killed. + * + * 2. A woken up waiter is killed before it can acquire the + * futex in user space. + * + * In both cases the TID validation below prevents a wakeup of + * potential waiters which can cause these waiters to block + * forever. + * + * In both cases the following conditions are met: + * + * 1) task->robust_list->list_op_pending != NULL + * @pending_op == true + * 2) User space futex value == 0 + * 3) Regular futex: @pi == false + * + * If these conditions are met, it is safe to attempt waking up a + * potential waiter without touching the user space futex value and + * trying to set the OWNER_DIED bit. The user space futex value is + * uncontended and the rest of the user space mutex state is + * consistent, so a woken waiter will just take over the + * uncontended futex. Setting the OWNER_DIED bit would create + * inconsistent state and malfunction of the user space owner died + * handling. + */ + if (pending_op && !pi && !uval) { + futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + return 0; + } + + if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) + return 0; + + /* + * Ok, this dying thread is truly holding a futex + * of interest. Set the OWNER_DIED bit atomically + * via cmpxchg, and if the value had FUTEX_WAITERS + * set, wake up a waiter (if any). (We have to do a + * futex_wake() even if OWNER_DIED is already set - + * to handle the rare but possible case of recursive + * thread-death.) The rest of the cleanup is done in + * userspace. + */ + mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; + + /* + * We are not holding a lock here, but we want to have + * the pagefault_disable/enable() protection because + * we want to handle the fault gracefully. If the + * access fails we try to fault in the futex with R/W + * verification via get_user_pages. get_user() above + * does not guarantee R/W access. If that fails we + * give up and leave the futex locked. + */ + if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) { + switch (err) { + case -EFAULT: if (fault_in_user_writeable(uaddr)) return -1; goto retry; - } - if (nval != uval) + + case -EAGAIN: + cond_resched(); goto retry; - /* - * Wake robust non-PI futexes here. The wakeup of - * PI futexes happens in exit_pi_state(): - */ - if (!pi && (uval & FUTEX_WAITERS)) - futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + default: + WARN_ON_ONCE(1); + return err; + } } + + if (nval != uval) + goto retry; + + /* + * Wake robust non-PI futexes here. The wakeup of + * PI futexes happens in exit_pi_state(): + */ + if (!pi && (uval & FUTEX_WAITERS)) + futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + return 0; } @@ -3128,12 +3756,12 @@ static inline int fetch_robust_entry(struct robust_list __user **entry, * * We silently return on any sign of list-walking problem. */ -void exit_robust_list(struct task_struct *curr) +static void exit_robust_list(struct task_struct *curr) { struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; - unsigned int uninitialized_var(next_pi); + unsigned int next_pi; unsigned long futex_offset; int rc; @@ -3169,10 +3797,11 @@ void exit_robust_list(struct task_struct *curr) * A pending lock might already be on the list, so * don't process it twice: */ - if (entry != pending) + if (entry != pending) { if (handle_futex_death((void __user *)entry + futex_offset, - curr, pi)) + curr, pi, HANDLE_DEATH_LIST)) return; + } if (rc) return; entry = next_entry; @@ -3186,9 +3815,118 @@ void exit_robust_list(struct task_struct *curr) cond_resched(); } - if (pending) + if (pending) { handle_futex_death((void __user *)pending + futex_offset, - curr, pip); + curr, pip, HANDLE_DEATH_PENDING); + } +} + +static void futex_cleanup(struct task_struct *tsk) +{ + if (unlikely(tsk->robust_list)) { + exit_robust_list(tsk); + tsk->robust_list = NULL; + } + +#ifdef CONFIG_COMPAT + if (unlikely(tsk->compat_robust_list)) { + compat_exit_robust_list(tsk); + tsk->compat_robust_list = NULL; + } +#endif + + if (unlikely(!list_empty(&tsk->pi_state_list))) + exit_pi_state_list(tsk); +} + +/** + * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD + * @tsk: task to set the state on + * + * Set the futex exit state of the task lockless. The futex waiter code + * observes that state when a task is exiting and loops until the task has + * actually finished the futex cleanup. The worst case for this is that the + * waiter runs through the wait loop until the state becomes visible. + * + * This is called from the recursive fault handling path in do_exit(). + * + * This is best effort. Either the futex exit code has run already or + * not. If the OWNER_DIED bit has been set on the futex then the waiter can + * take it over. If not, the problem is pushed back to user space. If the + * futex exit code did not run yet, then an already queued waiter might + * block forever, but there is nothing which can be done about that. + */ +void futex_exit_recursive(struct task_struct *tsk) +{ + /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ + if (tsk->futex_state == FUTEX_STATE_EXITING) + mutex_unlock(&tsk->futex_exit_mutex); + tsk->futex_state = FUTEX_STATE_DEAD; +} + +static void futex_cleanup_begin(struct task_struct *tsk) +{ + /* + * Prevent various race issues against a concurrent incoming waiter + * including live locks by forcing the waiter to block on + * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in + * attach_to_pi_owner(). + */ + mutex_lock(&tsk->futex_exit_mutex); + + /* + * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. + * + * This ensures that all subsequent checks of tsk->futex_state in + * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with + * tsk->pi_lock held. + * + * It guarantees also that a pi_state which was queued right before + * the state change under tsk->pi_lock by a concurrent waiter must + * be observed in exit_pi_state_list(). + */ + raw_spin_lock_irq(&tsk->pi_lock); + tsk->futex_state = FUTEX_STATE_EXITING; + raw_spin_unlock_irq(&tsk->pi_lock); +} + +static void futex_cleanup_end(struct task_struct *tsk, int state) +{ + /* + * Lockless store. The only side effect is that an observer might + * take another loop until it becomes visible. + */ + tsk->futex_state = state; + /* + * Drop the exit protection. This unblocks waiters which observed + * FUTEX_STATE_EXITING to reevaluate the state. + */ + mutex_unlock(&tsk->futex_exit_mutex); +} + +void futex_exec_release(struct task_struct *tsk) +{ + /* + * The state handling is done for consistency, but in the case of + * exec() there is no way to prevent further damage as the PID stays + * the same. But for the unlikely and arguably buggy case that a + * futex is held on exec(), this provides at least as much state + * consistency protection which is possible. + */ + futex_cleanup_begin(tsk); + futex_cleanup(tsk); + /* + * Reset the state to FUTEX_STATE_OK. The task is alive and about + * exec a new binary. + */ + futex_cleanup_end(tsk, FUTEX_STATE_OK); +} + +void futex_exit_release(struct task_struct *tsk) +{ + futex_cleanup_begin(tsk); + futex_cleanup(tsk); + futex_cleanup_end(tsk, FUTEX_STATE_DEAD); } long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, @@ -3202,13 +3940,14 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, if (op & FUTEX_CLOCK_REALTIME) { flags |= FLAGS_CLOCKRT; - if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \ - cmd != FUTEX_WAIT_REQUEUE_PI) + if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI && + cmd != FUTEX_LOCK_PI2) return -ENOSYS; } switch (cmd) { case FUTEX_LOCK_PI: + case FUTEX_LOCK_PI2: case FUTEX_UNLOCK_PI: case FUTEX_TRYLOCK_PI: case FUTEX_WAIT_REQUEUE_PI: @@ -3220,10 +3959,12 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; + fallthrough; case FUTEX_WAIT_BITSET: return futex_wait(uaddr, flags, val, timeout, val3); case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; + fallthrough; case FUTEX_WAKE_BITSET: return futex_wake(uaddr, flags, val, val3); case FUTEX_REQUEUE: @@ -3233,6 +3974,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, case FUTEX_WAKE_OP: return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); case FUTEX_LOCK_PI: + flags |= FLAGS_CLOCKRT; + fallthrough; + case FUTEX_LOCK_PI2: return futex_lock_pi(uaddr, flags, timeout, 0); case FUTEX_UNLOCK_PI: return futex_unlock_pi(uaddr, flags); @@ -3248,47 +3992,239 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, return -ENOSYS; } +static __always_inline bool futex_cmd_has_timeout(u32 cmd) +{ + switch (cmd) { + case FUTEX_WAIT: + case FUTEX_LOCK_PI: + case FUTEX_LOCK_PI2: + case FUTEX_WAIT_BITSET: + case FUTEX_WAIT_REQUEUE_PI: + return true; + } + return false; +} + +static __always_inline int +futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t) +{ + if (!timespec64_valid(ts)) + return -EINVAL; + + *t = timespec64_to_ktime(*ts); + if (cmd == FUTEX_WAIT) + *t = ktime_add_safe(ktime_get(), *t); + else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME)) + *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t); + return 0; +} SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - struct timespec __user *, utime, u32 __user *, uaddr2, - u32, val3) + const struct __kernel_timespec __user *, utime, + u32 __user *, uaddr2, u32, val3) { - struct timespec ts; + int ret, cmd = op & FUTEX_CMD_MASK; ktime_t t, *tp = NULL; - u32 val2 = 0; - int cmd = op & FUTEX_CMD_MASK; + struct timespec64 ts; - if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET || - cmd == FUTEX_WAIT_REQUEUE_PI)) { + if (utime && futex_cmd_has_timeout(cmd)) { if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) return -EFAULT; - if (copy_from_user(&ts, utime, sizeof(ts)) != 0) + if (get_timespec64(&ts, utime)) return -EFAULT; - if (!timespec_valid(&ts)) - return -EINVAL; - - t = timespec_to_ktime(ts); - if (cmd == FUTEX_WAIT) - t = ktime_add_safe(ktime_get(), t); + ret = futex_init_timeout(cmd, op, &ts, &t); + if (ret) + return ret; tp = &t; } - /* - * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*. - * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. - */ - if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || - cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) - val2 = (u32) (unsigned long) utime; - return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); + return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } +#ifdef CONFIG_COMPAT +/* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int +compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, + compat_uptr_t __user *head, unsigned int *pi) +{ + if (get_user(*uentry, head)) + return -EFAULT; + + *entry = compat_ptr((*uentry) & ~1); + *pi = (unsigned int)(*uentry) & 1; + + return 0; +} + +static void __user *futex_uaddr(struct robust_list __user *entry, + compat_long_t futex_offset) +{ + compat_uptr_t base = ptr_to_compat(entry); + void __user *uaddr = compat_ptr(base + futex_offset); + + return uaddr; +} + +/* + * Walk curr->robust_list (very carefully, it's a userspace list!) + * and mark any locks found there dead, and notify any waiters. + * + * We silently return on any sign of list-walking problem. + */ +static void compat_exit_robust_list(struct task_struct *curr) +{ + struct compat_robust_list_head __user *head = curr->compat_robust_list; + struct robust_list __user *entry, *next_entry, *pending; + unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; + unsigned int next_pi; + compat_uptr_t uentry, next_uentry, upending; + compat_long_t futex_offset; + int rc; + + if (!futex_cmpxchg_enabled) + return; + + /* + * Fetch the list head (which was registered earlier, via + * sys_set_robust_list()): + */ + if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) + return; + /* + * Fetch the relative futex offset: + */ + if (get_user(futex_offset, &head->futex_offset)) + return; + /* + * Fetch any possibly pending lock-add first, and handle it + * if it exists: + */ + if (compat_fetch_robust_entry(&upending, &pending, + &head->list_op_pending, &pip)) + return; + + next_entry = NULL; /* avoid warning with gcc */ + while (entry != (struct robust_list __user *) &head->list) { + /* + * Fetch the next entry in the list before calling + * handle_futex_death: + */ + rc = compat_fetch_robust_entry(&next_uentry, &next_entry, + (compat_uptr_t __user *)&entry->next, &next_pi); + /* + * A pending lock might already be on the list, so + * dont process it twice: + */ + if (entry != pending) { + void __user *uaddr = futex_uaddr(entry, futex_offset); + + if (handle_futex_death(uaddr, curr, pi, + HANDLE_DEATH_LIST)) + return; + } + if (rc) + return; + uentry = next_uentry; + entry = next_entry; + pi = next_pi; + /* + * Avoid excessively long or circular lists: + */ + if (!--limit) + break; + + cond_resched(); + } + if (pending) { + void __user *uaddr = futex_uaddr(pending, futex_offset); + + handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING); + } +} + +COMPAT_SYSCALL_DEFINE2(set_robust_list, + struct compat_robust_list_head __user *, head, + compat_size_t, len) +{ + if (!futex_cmpxchg_enabled) + return -ENOSYS; + + if (unlikely(len != sizeof(*head))) + return -EINVAL; + + current->compat_robust_list = head; + + return 0; +} + +COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + compat_uptr_t __user *, head_ptr, + compat_size_t __user *, len_ptr) +{ + struct compat_robust_list_head __user *head; + unsigned long ret; + struct task_struct *p; + + if (!futex_cmpxchg_enabled) + return -ENOSYS; + + rcu_read_lock(); + + ret = -ESRCH; + if (!pid) + p = current; + else { + p = find_task_by_vpid(pid); + if (!p) + goto err_unlock; + } + + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) + goto err_unlock; + + head = p->compat_robust_list; + rcu_read_unlock(); + + if (put_user(sizeof(*head), len_ptr)) + return -EFAULT; + return put_user(ptr_to_compat(head), head_ptr); + +err_unlock: + rcu_read_unlock(); + + return ret; +} +#endif /* CONFIG_COMPAT */ + +#ifdef CONFIG_COMPAT_32BIT_TIME +SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + const struct old_timespec32 __user *, utime, u32 __user *, uaddr2, + u32, val3) +{ + int ret, cmd = op & FUTEX_CMD_MASK; + ktime_t t, *tp = NULL; + struct timespec64 ts; + + if (utime && futex_cmd_has_timeout(cmd)) { + if (get_old_timespec32(&ts, utime)) + return -EFAULT; + ret = futex_init_timeout(cmd, op, &ts, &t); + if (ret) + return ret; + tp = &t; + } + + return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); +} +#endif /* CONFIG_COMPAT_32BIT_TIME */ + static void __init futex_detect_cmpxchg(void) { #ifndef CONFIG_HAVE_FUTEX_CMPXCHG u32 curval; - mm_segment_t oldfs; /* * This will fail and we want it. Some arch implementations do @@ -3300,11 +4236,8 @@ static void __init futex_detect_cmpxchg(void) * implementation, the non-functional ones will return * -ENOSYS. */ - oldfs = get_fs(); - set_fs(USER_DS); if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) futex_cmpxchg_enabled = 1; - set_fs(oldfs); #endif } diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 04f4ebdc3c..053447183a 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -4,6 +4,7 @@ menu "GCOV-based kernel profiling" config GCOV_KERNEL bool "Enable gcov-based kernel profiling" depends on DEBUG_FS + depends on !CC_IS_CLANG || CLANG_VERSION >= 110000 depends on !ARCH_WANTS_NO_INSTR || CC_HAS_NO_PROFILE_FN_ATTR select CONSTRUCTORS default n diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 52501e5f76..9888e2bc8c 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -63,9 +63,7 @@ static struct task_struct *watchdog_task; * Should we dump all CPUs backtraces in a hung task event? * Defaults to 0, can be changed via sysctl. */ -static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace; -#else -#define sysctl_hung_task_all_cpu_backtrace 0 +unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace; #endif /* CONFIG_SMP */ /* @@ -224,13 +222,11 @@ static long hung_timeout_jiffies(unsigned long last_checked, MAX_SCHEDULE_TIMEOUT; } -#ifdef CONFIG_SYSCTL /* * Process updating of timeout sysctl */ -static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) +int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -245,76 +241,6 @@ static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, return ret; } -/* - * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs - * and hung_task_check_interval_secs - */ -static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ); -static struct ctl_table hung_task_sysctls[] = { -#ifdef CONFIG_SMP - { - .procname = "hung_task_all_cpu_backtrace", - .data = &sysctl_hung_task_all_cpu_backtrace, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SMP */ - { - .procname = "hung_task_panic", - .data = &sysctl_hung_task_panic, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "hung_task_check_count", - .data = &sysctl_hung_task_check_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - }, - { - .procname = "hung_task_timeout_secs", - .data = &sysctl_hung_task_timeout_secs, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = proc_dohung_task_timeout_secs, - .extra2 = (void *)&hung_task_timeout_max, - }, - { - .procname = "hung_task_check_interval_secs", - .data = &sysctl_hung_task_check_interval_secs, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = proc_dohung_task_timeout_secs, - .extra2 = (void *)&hung_task_timeout_max, - }, - { - .procname = "hung_task_warnings", - .data = &sysctl_hung_task_warnings, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_NEG_ONE, - }, - {} -}; - -static void __init hung_task_sysctl_init(void) -{ - register_sysctl_init("kernel", hung_task_sysctls); -} -#else -#define hung_task_sysctl_init() do { } while (0) -#endif /* CONFIG_SYSCTL */ - - static atomic_t reset_hung_task = ATOMIC_INIT(0); void reset_hung_task_detector(void) @@ -384,7 +310,6 @@ static int __init hung_task_init(void) pm_notifier(hungtask_pm_notify, 0); watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); - hung_task_sysctl_init(); return 0; } diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 10929eda98..fbc54c2a7f 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -97,6 +97,9 @@ config GENERIC_MSI_IRQ_DOMAIN config IRQ_MSI_IOMMU bool +config HANDLE_DOMAIN_IRQ + bool + config IRQ_TIMINGS bool @@ -141,10 +144,3 @@ config GENERIC_IRQ_MULTI_HANDLER bool help Allow to specify the low level IRQ handler at run time. - -# Cavium Octeon is the last system to use this deprecated option -# Do not even think of enabling this on any new platform -config DEPRECATED_IRQ_CPU_ONOFFLINE - bool - depends on CAVIUM_OCTEON_SOC - default CAVIUM_OCTEON_SOC diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index c093246630..a98bcfc4be 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -575,6 +575,8 @@ EXPORT_SYMBOL_GPL(handle_simple_irq); */ void handle_untracked_irq(struct irq_desc *desc) { + unsigned int flags = 0; + raw_spin_lock(&desc->lock); if (!irq_may_run(desc)) @@ -591,7 +593,7 @@ void handle_untracked_irq(struct irq_desc *desc) irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); raw_spin_unlock(&desc->lock); - __handle_irq_event_percpu(desc); + __handle_irq_event_percpu(desc, &flags); raw_spin_lock(&desc->lock); irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); @@ -1120,7 +1122,6 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) } EXPORT_SYMBOL_GPL(irq_modify_status); -#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE /** * irq_cpu_online - Invoke all irq_cpu_online functions. * @@ -1180,7 +1181,6 @@ void irq_cpu_offline(void) raw_spin_unlock_irqrestore(&desc->lock, flags); } } -#endif #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index f0862eb6b5..cc7cdd26e2 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -25,7 +25,6 @@ static DEFINE_RAW_SPINLOCK(gc_lock); void irq_gc_noop(struct irq_data *d) { } -EXPORT_SYMBOL_GPL(irq_gc_noop); /** * irq_gc_mask_disable_reg - Mask chip via disable register @@ -45,7 +44,6 @@ void irq_gc_mask_disable_reg(struct irq_data *d) *ct->mask_cache &= ~mask; irq_gc_unlock(gc); } -EXPORT_SYMBOL_GPL(irq_gc_mask_disable_reg); /** * irq_gc_mask_set_bit - Mask chip via setting bit in mask register @@ -105,7 +103,6 @@ void irq_gc_unmask_enable_reg(struct irq_data *d) *ct->mask_cache |= mask; irq_gc_unlock(gc); } -EXPORT_SYMBOL_GPL(irq_gc_unmask_enable_reg); /** * irq_gc_ack_set_bit - Ack pending interrupt via setting bit @@ -451,7 +448,7 @@ static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq) } -const struct irq_domain_ops irq_generic_chip_ops = { +struct irq_domain_ops irq_generic_chip_ops = { .map = irq_map_generic_chip, .unmap = irq_unmap_generic_chip, .xlate = irq_domain_xlate_onetwocell, diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 9489f93b3d..221d80c31e 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -14,8 +14,6 @@ #include #include -#include - #include #include "internals.h" @@ -136,7 +134,7 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) wake_up_process(action->thread); } -irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) +irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags) { irqreturn_t retval = IRQ_NONE; unsigned int irq = desc->irq_data.irq; @@ -174,6 +172,10 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) } __irq_wake_thread(desc, action); + + fallthrough; /* to add to randomness */ + case IRQ_HANDLED: + *flags |= action->flags; break; default: @@ -189,10 +191,11 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) { irqreturn_t retval; + unsigned int flags = 0; - retval = __handle_irq_event_percpu(desc); + retval = __handle_irq_event_percpu(desc, &flags); - add_interrupt_randomness(desc->irq_data.irq); + add_interrupt_randomness(desc->irq_data.irq, flags); if (!irq_settings_no_debug(desc)) note_interrupt(desc, retval); @@ -223,20 +226,4 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) handle_arch_irq = handle_irq; return 0; } - -/** - * generic_handle_arch_irq - root irq handler for architectures which do no - * entry accounting themselves - * @regs: Register file coming from the low-level handling code - */ -asmlinkage void noinstr generic_handle_arch_irq(struct pt_regs *regs) -{ - struct pt_regs *old_regs; - - irq_enter(); - old_regs = set_irq_regs(regs); - handle_arch_irq(regs); - set_irq_regs(old_regs); - irq_exit(); -} #endif diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 99cbdf55a8..54363527fe 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -103,7 +103,7 @@ extern int __irq_get_irqchip_state(struct irq_data *data, extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); -irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc); +irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags); irqreturn_t handle_irq_event_percpu(struct irq_desc *desc); irqreturn_t handle_irq_event(struct irq_desc *desc); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 2267e6527d..4e3c29bb60 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -646,16 +646,13 @@ int handle_irq_desc(struct irq_desc *desc) generic_handle_irq_desc(desc); return 0; } +EXPORT_SYMBOL_GPL(handle_irq_desc); /** * generic_handle_irq - Invoke the handler for a particular irq * @irq: The irq number to handle * - * Returns: 0 on success, or -EINVAL if conversion has failed - * - * This function must be called from an IRQ context with irq regs - * initialized. - */ + */ int generic_handle_irq(unsigned int irq) { return handle_irq_desc(irq_to_desc(irq)); @@ -665,38 +662,88 @@ EXPORT_SYMBOL_GPL(generic_handle_irq); #ifdef CONFIG_IRQ_DOMAIN /** * generic_handle_domain_irq - Invoke the handler for a HW irq belonging - * to a domain. + * to a domain, usually for a non-root interrupt + * controller * @domain: The domain where to perform the lookup * @hwirq: The HW irq number to convert to a logical one * * Returns: 0 on success, or -EINVAL if conversion has failed * - * This function must be called from an IRQ context with irq regs - * initialized. */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) { - WARN_ON_ONCE(!in_irq()); return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); } EXPORT_SYMBOL_GPL(generic_handle_domain_irq); +#ifdef CONFIG_HANDLE_DOMAIN_IRQ /** - * generic_handle_domain_nmi - Invoke the handler for a HW nmi belonging - * to a domain. + * handle_domain_irq - Invoke the handler for a HW irq belonging to a domain, + * usually for a root interrupt controller * @domain: The domain where to perform the lookup * @hwirq: The HW irq number to convert to a logical one + * @regs: Register file coming from the low-level handling code * * Returns: 0 on success, or -EINVAL if conversion has failed - * - * This function must be called from an NMI context with irq regs - * initialized. - **/ -int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq) + */ +int handle_domain_irq(struct irq_domain *domain, + unsigned int hwirq, struct pt_regs *regs) { - WARN_ON_ONCE(!in_nmi()); - return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); + struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; + int ret = 0; + + irq_enter(); + + /* The irqdomain code provides boundary checks */ + desc = irq_resolve_mapping(domain, hwirq); + if (likely(desc)) + handle_irq_desc(desc); + else + ret = -EINVAL; + + irq_exit(); + set_irq_regs(old_regs); + return ret; } + +/** + * handle_domain_nmi - Invoke the handler for a HW irq belonging to a domain + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * @regs: Register file coming from the low-level handling code + * + * This function must be called from an NMI context. + * + * Returns: 0 on success, or -EINVAL if conversion has failed + */ +int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, + struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; + int ret = 0; + + /* + * NMI context needs to be setup earlier in order to deal with tracing. + */ + WARN_ON(!in_nmi()); + + desc = irq_resolve_mapping(domain, hwirq); + + /* + * ack_bad_irq is not NMI-safe, just report + * an invalid interrupt. + */ + if (likely(desc)) + handle_irq_desc(desc); + else + ret = -EINVAL; + + set_irq_regs(old_regs); + return ret; +} +#endif #endif /* Dynamic interrupt handling */ diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index bf38c546aa..4d8fc65cf3 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -744,8 +744,9 @@ static int irq_domain_translate(struct irq_domain *d, return 0; } -void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, - unsigned int count, struct irq_fwspec *fwspec) +static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, + unsigned int count, + struct irq_fwspec *fwspec) { int i; @@ -755,7 +756,6 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, for (i = 0; i < count; i++) fwspec->param[i] = args[i]; } -EXPORT_SYMBOL_GPL(of_phandle_args_to_fwspec); unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) { @@ -1502,7 +1502,6 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, irq_free_descs(virq, nr_irqs); return ret; } -EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs); /* The irq_data was moved, fix the revmap to refer to the new location */ static void irq_domain_fix_revmap(struct irq_data *d) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index f23ffd3038..27667e82ec 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -486,8 +486,7 @@ int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) } EXPORT_SYMBOL_GPL(irq_force_affinity); -int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, - bool setaffinity) +int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) { unsigned long flags; struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); @@ -496,11 +495,12 @@ int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, return -EINVAL; desc->affinity_hint = m; irq_put_desc_unlock(desc, flags); - if (m && setaffinity) + /* set the initial affinity to prevent every interrupt being on CPU0 */ + if (m) __irq_set_affinity(irq, m, false); return 0; } -EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint); +EXPORT_SYMBOL_GPL(irq_set_affinity_hint); static void irq_affinity_notify(struct work_struct *work) { @@ -1259,8 +1259,6 @@ static int irq_thread(void *data) irqreturn_t (*handler_fn)(struct irq_desc *desc, struct irqaction *action); - sched_set_fifo(current); - if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, &action->thread_flags)) handler_fn = irq_forced_thread_fn; @@ -1426,6 +1424,8 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) if (IS_ERR(t)) return PTR_ERR(t); + sched_set_fifo(t); + /* * We keep the reference to the task struct even if * the thread dies to avoid that the interrupt code @@ -2827,7 +2827,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state); * This call sets the internal irqchip state of an interrupt, * depending on the value of @which. * - * This function should be called with migration disabled if the + * This function should be called with preemption disabled if the * interrupt controller has per-cpu registers. */ int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 2bdfce5eda..7f350ae59c 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -14,15 +14,12 @@ #include #include #include -#include #include #include "internals.h" -static inline int msi_sysfs_create_group(struct device *dev); - /** - * msi_alloc_desc - Allocate an initialized msi_desc + * alloc_msi_entry - Allocate an initialized msi_desc * @dev: Pointer to the device for which this is allocated * @nvec: The number of vectors used in this entry * @affinity: Optional pointer to an affinity mask array size of @nvec @@ -32,134 +29,34 @@ static inline int msi_sysfs_create_group(struct device *dev); * * Return: pointer to allocated &msi_desc on success or %NULL on failure */ -static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec, - const struct irq_affinity_desc *affinity) +struct msi_desc *alloc_msi_entry(struct device *dev, int nvec, + const struct irq_affinity_desc *affinity) { - struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); + struct msi_desc *desc; + desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) return NULL; + INIT_LIST_HEAD(&desc->list); desc->dev = dev; desc->nvec_used = nvec; if (affinity) { - desc->affinity = kmemdup(affinity, nvec * sizeof(*desc->affinity), GFP_KERNEL); + desc->affinity = kmemdup(affinity, + nvec * sizeof(*desc->affinity), GFP_KERNEL); if (!desc->affinity) { kfree(desc); return NULL; } } + return desc; } -static void msi_free_desc(struct msi_desc *desc) +void free_msi_entry(struct msi_desc *entry) { - kfree(desc->affinity); - kfree(desc); -} - -static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc, unsigned int index) -{ - int ret; - - desc->msi_index = index; - ret = xa_insert(&md->__store, index, desc, GFP_KERNEL); - if (ret) - msi_free_desc(desc); - return ret; -} - -/** - * msi_add_msi_desc - Allocate and initialize a MSI descriptor - * @dev: Pointer to the device for which the descriptor is allocated - * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor - * - * Return: 0 on success or an appropriate failure code. - */ -int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc) -{ - struct msi_desc *desc; - - lockdep_assert_held(&dev->msi.data->mutex); - - desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity); - if (!desc) - return -ENOMEM; - - /* Copy type specific data to the new descriptor. */ - desc->pci = init_desc->pci; - return msi_insert_desc(dev->msi.data, desc, init_desc->msi_index); -} - -/** - * msi_add_simple_msi_descs - Allocate and initialize MSI descriptors - * @dev: Pointer to the device for which the descriptors are allocated - * @index: Index for the first MSI descriptor - * @ndesc: Number of descriptors to allocate - * - * Return: 0 on success or an appropriate failure code. - */ -static int msi_add_simple_msi_descs(struct device *dev, unsigned int index, unsigned int ndesc) -{ - unsigned int idx, last = index + ndesc - 1; - struct msi_desc *desc; - int ret; - - lockdep_assert_held(&dev->msi.data->mutex); - - for (idx = index; idx <= last; idx++) { - desc = msi_alloc_desc(dev, 1, NULL); - if (!desc) - goto fail_mem; - ret = msi_insert_desc(dev->msi.data, desc, idx); - if (ret) - goto fail; - } - return 0; - -fail_mem: - ret = -ENOMEM; -fail: - msi_free_msi_descs_range(dev, MSI_DESC_NOTASSOCIATED, index, last); - return ret; -} - -static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) -{ - switch (filter) { - case MSI_DESC_ALL: - return true; - case MSI_DESC_NOTASSOCIATED: - return !desc->irq; - case MSI_DESC_ASSOCIATED: - return !!desc->irq; - } - WARN_ON_ONCE(1); - return false; -} - -/** - * msi_free_msi_descs_range - Free MSI descriptors of a device - * @dev: Device to free the descriptors - * @filter: Descriptor state filter - * @first_index: Index to start freeing from - * @last_index: Last index to be freed - */ -void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter, - unsigned int first_index, unsigned int last_index) -{ - struct xarray *xa = &dev->msi.data->__store; - struct msi_desc *desc; - unsigned long idx; - - lockdep_assert_held(&dev->msi.data->mutex); - - xa_for_each_range(xa, idx, desc, first_index, last_index) { - if (msi_desc_match(desc, filter)) { - xa_erase(xa, idx); - msi_free_desc(desc); - } - } + kfree(entry->affinity); + kfree(entry); } void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg) @@ -175,289 +72,138 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) } EXPORT_SYMBOL_GPL(get_cached_msi_msg); -static void msi_device_data_release(struct device *dev, void *res) -{ - struct msi_device_data *md = res; - - WARN_ON_ONCE(!xa_empty(&md->__store)); - xa_destroy(&md->__store); - dev->msi.data = NULL; -} - -/** - * msi_setup_device_data - Setup MSI device data - * @dev: Device for which MSI device data should be set up - * - * Return: 0 on success, appropriate error code otherwise - * - * This can be called more than once for @dev. If the MSI device data is - * already allocated the call succeeds. The allocated memory is - * automatically released when the device is destroyed. - */ -int msi_setup_device_data(struct device *dev) -{ - struct msi_device_data *md; - int ret; - - if (dev->msi.data) - return 0; - - md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL); - if (!md) - return -ENOMEM; - - ret = msi_sysfs_create_group(dev); - if (ret) { - devres_free(md); - return ret; - } - - xa_init(&md->__store); - mutex_init(&md->mutex); - dev->msi.data = md; - devres_add(dev, md); - return 0; -} - -/** - * msi_lock_descs - Lock the MSI descriptor storage of a device - * @dev: Device to operate on - */ -void msi_lock_descs(struct device *dev) -{ - mutex_lock(&dev->msi.data->mutex); -} -EXPORT_SYMBOL_GPL(msi_lock_descs); - -/** - * msi_unlock_descs - Unlock the MSI descriptor storage of a device - * @dev: Device to operate on - */ -void msi_unlock_descs(struct device *dev) -{ - /* Invalidate the index wich was cached by the iterator */ - dev->msi.data->__iter_idx = MSI_MAX_INDEX; - mutex_unlock(&dev->msi.data->mutex); -} -EXPORT_SYMBOL_GPL(msi_unlock_descs); - -static struct msi_desc *msi_find_desc(struct msi_device_data *md, enum msi_desc_filter filter) -{ - struct msi_desc *desc; - - xa_for_each_start(&md->__store, md->__iter_idx, desc, md->__iter_idx) { - if (msi_desc_match(desc, filter)) - return desc; - } - md->__iter_idx = MSI_MAX_INDEX; - return NULL; -} - -/** - * msi_first_desc - Get the first MSI descriptor of a device - * @dev: Device to operate on - * @filter: Descriptor state filter - * - * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs() - * must be invoked before the call. - * - * Return: Pointer to the first MSI descriptor matching the search - * criteria, NULL if none found. - */ -struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter) -{ - struct msi_device_data *md = dev->msi.data; - - if (WARN_ON_ONCE(!md)) - return NULL; - - lockdep_assert_held(&md->mutex); - - md->__iter_idx = 0; - return msi_find_desc(md, filter); -} -EXPORT_SYMBOL_GPL(msi_first_desc); - -/** - * msi_next_desc - Get the next MSI descriptor of a device - * @dev: Device to operate on - * - * The first invocation of msi_next_desc() has to be preceeded by a - * successful invocation of __msi_first_desc(). Consecutive invocations are - * only valid if the previous one was successful. All these operations have - * to be done within the same MSI mutex held region. - * - * Return: Pointer to the next MSI descriptor matching the search - * criteria, NULL if none found. - */ -struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter) -{ - struct msi_device_data *md = dev->msi.data; - - if (WARN_ON_ONCE(!md)) - return NULL; - - lockdep_assert_held(&md->mutex); - - if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX) - return NULL; - - md->__iter_idx++; - return msi_find_desc(md, filter); -} -EXPORT_SYMBOL_GPL(msi_next_desc); - -/** - * msi_get_virq - Return Linux interrupt number of a MSI interrupt - * @dev: Device to operate on - * @index: MSI interrupt index to look for (0-based) - * - * Return: The Linux interrupt number on success (> 0), 0 if not found - */ -unsigned int msi_get_virq(struct device *dev, unsigned int index) -{ - struct msi_desc *desc; - unsigned int ret = 0; - bool pcimsi; - - if (!dev->msi.data) - return 0; - - pcimsi = dev_is_pci(dev) ? to_pci_dev(dev)->msi_enabled : false; - - msi_lock_descs(dev); - desc = xa_load(&dev->msi.data->__store, pcimsi ? 0 : index); - if (desc && desc->irq) { - /* - * PCI-MSI has only one descriptor for multiple interrupts. - * PCI-MSIX and platform MSI use a descriptor per - * interrupt. - */ - if (pcimsi) { - if (index < desc->nvec_used) - ret = desc->irq + index; - } else { - ret = desc->irq; - } - } - msi_unlock_descs(dev); - return ret; -} -EXPORT_SYMBOL_GPL(msi_get_virq); - -#ifdef CONFIG_SYSFS -static struct attribute *msi_dev_attrs[] = { - NULL -}; - -static const struct attribute_group msi_irqs_group = { - .name = "msi_irqs", - .attrs = msi_dev_attrs, -}; - -static inline int msi_sysfs_create_group(struct device *dev) -{ - return devm_device_add_group(dev, &msi_irqs_group); -} - static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - /* MSI vs. MSIX is per device not per interrupt */ - bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false; + struct msi_desc *entry; + bool is_msix = false; + unsigned long irq; + int retval; + + retval = kstrtoul(attr->attr.name, 10, &irq); + if (retval) + return retval; + + entry = irq_get_msi_desc(irq); + if (!entry) + return -ENODEV; + + if (dev_is_pci(dev)) + is_msix = entry->msi_attrib.is_msix; return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi"); } -static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) +/** + * msi_populate_sysfs - Populate msi_irqs sysfs entries for devices + * @dev: The device(PCI, platform etc) who will get sysfs entries + * + * Return attribute_group ** so that specific bus MSI can save it to + * somewhere during initilizing msi irqs. If devices has no MSI irq, + * return NULL; if it fails to populate sysfs, return ERR_PTR + */ +const struct attribute_group **msi_populate_sysfs(struct device *dev) { - struct device_attribute *attrs = desc->sysfs_attrs; + const struct attribute_group **msi_irq_groups; + struct attribute **msi_attrs, *msi_attr; + struct device_attribute *msi_dev_attr; + struct attribute_group *msi_irq_group; + struct msi_desc *entry; + int ret = -ENOMEM; + int num_msi = 0; + int count = 0; int i; - if (!attrs) - return; + /* Determine how many msi entries we have */ + for_each_msi_entry(entry, dev) + num_msi += entry->nvec_used; + if (!num_msi) + return NULL; - desc->sysfs_attrs = NULL; - for (i = 0; i < desc->nvec_used; i++) { - if (attrs[i].show) - sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name); - kfree(attrs[i].attr.name); - } - kfree(attrs); -} + /* Dynamically create the MSI attributes for the device */ + msi_attrs = kcalloc(num_msi + 1, sizeof(void *), GFP_KERNEL); + if (!msi_attrs) + return ERR_PTR(-ENOMEM); -static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) -{ - struct device_attribute *attrs; - int ret, i; + for_each_msi_entry(entry, dev) { + for (i = 0; i < entry->nvec_used; i++) { + msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL); + if (!msi_dev_attr) + goto error_attrs; + msi_attrs[count] = &msi_dev_attr->attr; - attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL); - if (!attrs) - return -ENOMEM; - - desc->sysfs_attrs = attrs; - for (i = 0; i < desc->nvec_used; i++) { - sysfs_attr_init(&attrs[i].attr); - attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i); - if (!attrs[i].attr.name) { - ret = -ENOMEM; - goto fail; - } - - attrs[i].attr.mode = 0444; - attrs[i].show = msi_mode_show; - - ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name); - if (ret) { - attrs[i].show = NULL; - goto fail; + sysfs_attr_init(&msi_dev_attr->attr); + msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d", + entry->irq + i); + if (!msi_dev_attr->attr.name) + goto error_attrs; + msi_dev_attr->attr.mode = 0444; + msi_dev_attr->show = msi_mode_show; + ++count; } } - return 0; -fail: - msi_sysfs_remove_desc(dev, desc); - return ret; -} + msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL); + if (!msi_irq_group) + goto error_attrs; + msi_irq_group->name = "msi_irqs"; + msi_irq_group->attrs = msi_attrs; -#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS -/** - * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device - * @dev: The device (PCI, platform etc) which will get sysfs entries - */ -int msi_device_populate_sysfs(struct device *dev) -{ - struct msi_desc *desc; - int ret; + msi_irq_groups = kcalloc(2, sizeof(void *), GFP_KERNEL); + if (!msi_irq_groups) + goto error_irq_group; + msi_irq_groups[0] = msi_irq_group; - msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) { - if (desc->sysfs_attrs) - continue; - ret = msi_sysfs_populate_desc(dev, desc); - if (ret) - return ret; + ret = sysfs_create_groups(&dev->kobj, msi_irq_groups); + if (ret) + goto error_irq_groups; + + return msi_irq_groups; + +error_irq_groups: + kfree(msi_irq_groups); +error_irq_group: + kfree(msi_irq_group); +error_attrs: + count = 0; + msi_attr = msi_attrs[count]; + while (msi_attr) { + msi_dev_attr = container_of(msi_attr, struct device_attribute, attr); + kfree(msi_attr->name); + kfree(msi_dev_attr); + ++count; + msi_attr = msi_attrs[count]; } - return 0; + kfree(msi_attrs); + return ERR_PTR(ret); } /** - * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device - * @dev: The device (PCI, platform etc) for which to remove - * sysfs entries + * msi_destroy_sysfs - Destroy msi_irqs sysfs entries for devices + * @dev: The device(PCI, platform etc) who will remove sysfs entries + * @msi_irq_groups: attribute_group for device msi_irqs entries */ -void msi_device_destroy_sysfs(struct device *dev) +void msi_destroy_sysfs(struct device *dev, const struct attribute_group **msi_irq_groups) { - struct msi_desc *desc; + struct device_attribute *dev_attr; + struct attribute **msi_attrs; + int count = 0; - msi_for_each_desc(desc, dev, MSI_DESC_ALL) - msi_sysfs_remove_desc(dev, desc); + if (msi_irq_groups) { + sysfs_remove_groups(&dev->kobj, msi_irq_groups); + msi_attrs = msi_irq_groups[0]->attrs; + while (msi_attrs[count]) { + dev_attr = container_of(msi_attrs[count], + struct device_attribute, attr); + kfree(dev_attr->attr.name); + kfree(dev_attr); + ++count; + } + kfree(msi_attrs); + kfree(msi_irq_groups[0]); + kfree(msi_irq_groups); + } } -#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK */ -#else /* CONFIG_SYSFS */ -static inline int msi_sysfs_create_group(struct device *dev) { return 0; } -static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; } -static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { } -#endif /* !CONFIG_SYSFS */ #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN static inline void irq_chip_write_msi_msg(struct irq_data *data, @@ -710,38 +456,43 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, } int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, - int virq_base, int nvec, msi_alloc_info_t *arg) + int virq, int nvec, msi_alloc_info_t *arg) { struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; struct msi_desc *desc; - int ret, virq; + int ret = 0; - msi_lock_descs(dev); - ret = msi_add_simple_msi_descs(dev, virq_base, nvec); - if (ret) - goto unlock; + for_each_msi_entry(desc, dev) { + /* Don't even try the multi-MSI brain damage. */ + if (WARN_ON(!desc->irq || desc->nvec_used != 1)) { + ret = -EINVAL; + break; + } - for (virq = virq_base; virq < virq_base + nvec; virq++) { - desc = xa_load(&dev->msi.data->__store, virq); - desc->irq = virq; + if (!(desc->irq >= virq && desc->irq < (virq + nvec))) + continue; ops->set_desc(arg, desc); - ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg); + /* Assumes the domain mutex is held! */ + ret = irq_domain_alloc_irqs_hierarchy(domain, desc->irq, 1, + arg); if (ret) - goto fail; + break; - irq_set_msi_desc(virq, desc); + irq_set_msi_desc_off(desc->irq, 0, desc); + } + + if (ret) { + /* Mop up the damage */ + for_each_msi_entry(desc, dev) { + if (!(desc->irq >= virq && desc->irq < (virq + nvec))) + continue; + + irq_domain_free_irqs_common(domain, desc->irq, 1); + } } - msi_unlock_descs(dev); - return 0; -fail: - for (--virq; virq >= virq_base; virq--) - irq_domain_free_irqs_common(domain, virq, 1); - msi_free_msi_descs_range(dev, MSI_DESC_ALL, virq_base, virq_base + nvec - 1); -unlock: - msi_unlock_descs(dev); return ret; } @@ -780,59 +531,8 @@ static bool msi_check_reservation_mode(struct irq_domain *domain, * Checking the first MSI descriptor is sufficient. MSIX supports * masking and MSI does so when the can_mask attribute is set. */ - desc = msi_first_desc(dev, MSI_DESC_ALL); - return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask; -} - -static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc, - int allocated) -{ - switch(domain->bus_token) { - case DOMAIN_BUS_PCI_MSI: - case DOMAIN_BUS_VMD_MSI: - if (IS_ENABLED(CONFIG_PCI_MSI)) - break; - fallthrough; - default: - return -ENOSPC; - } - - /* Let a failed PCI multi MSI allocation retry */ - if (desc->nvec_used > 1) - return 1; - - /* If there was a successful allocation let the caller know */ - return allocated ? allocated : -ENOSPC; -} - -#define VIRQ_CAN_RESERVE 0x01 -#define VIRQ_ACTIVATE 0x02 -#define VIRQ_NOMASK_QUIRK 0x04 - -static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags) -{ - struct irq_data *irqd = irq_domain_get_irq_data(domain, virq); - int ret; - - if (!(vflags & VIRQ_CAN_RESERVE)) { - irqd_clr_can_reserve(irqd); - if (vflags & VIRQ_NOMASK_QUIRK) - irqd_set_msi_nomask_quirk(irqd); - } - - if (!(vflags & VIRQ_ACTIVATE)) - return 0; - - ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE); - if (ret) - return ret; - /* - * If the interrupt uses reservation mode, clear the activated bit - * so request_irq() will assign the final vector. - */ - if (vflags & VIRQ_CAN_RESERVE) - irqd_clr_activated(irqd); - return 0; + desc = first_msi_entry(dev); + return desc->msi_attrib.is_msix || desc->msi_attrib.can_mask; } int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, @@ -840,103 +540,83 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, { struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; - msi_alloc_info_t arg = { }; - unsigned int vflags = 0; + struct irq_data *irq_data; struct msi_desc *desc; - int allocated = 0; + msi_alloc_info_t arg = { }; int i, ret, virq; + bool can_reserve; ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); if (ret) return ret; + for_each_msi_entry(desc, dev) { + ops->set_desc(&arg, desc); + + virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used, + dev_to_node(dev), &arg, false, + desc->affinity); + if (virq < 0) { + ret = -ENOSPC; + if (ops->handle_error) + ret = ops->handle_error(domain, desc, ret); + if (ops->msi_finish) + ops->msi_finish(&arg, ret); + return ret; + } + + for (i = 0; i < desc->nvec_used; i++) { + irq_set_msi_desc_off(virq, i, desc); + irq_debugfs_copy_devname(virq + i, dev); + } + } + + if (ops->msi_finish) + ops->msi_finish(&arg, 0); + + can_reserve = msi_check_reservation_mode(domain, info, dev); + /* * This flag is set by the PCI layer as we need to activate * the MSI entries before the PCI layer enables MSI in the * card. Otherwise the card latches a random msi message. */ - if (info->flags & MSI_FLAG_ACTIVATE_EARLY) - vflags |= VIRQ_ACTIVATE; + if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY)) + goto skip_activate; - /* - * Interrupt can use a reserved vector and will not occupy - * a real device vector until the interrupt is requested. - */ - if (msi_check_reservation_mode(domain, info, dev)) { - vflags |= VIRQ_CAN_RESERVE; - /* - * MSI affinity setting requires a special quirk (X86) when - * reservation mode is active. - */ - if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) - vflags |= VIRQ_NOMASK_QUIRK; + for_each_msi_vector(desc, i, dev) { + if (desc->irq == i) { + virq = desc->irq; + dev_dbg(dev, "irq [%d-%d] for MSI\n", + virq, virq + desc->nvec_used - 1); + } + + irq_data = irq_domain_get_irq_data(domain, i); + if (!can_reserve) { + irqd_clr_can_reserve(irq_data); + if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) + irqd_set_msi_nomask_quirk(irq_data); + } + ret = irq_domain_activate_irq(irq_data, can_reserve); + if (ret) + goto cleanup; } - msi_for_each_desc(desc, dev, MSI_DESC_NOTASSOCIATED) { - ops->set_desc(&arg, desc); - - virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used, - dev_to_node(dev), &arg, false, - desc->affinity); - if (virq < 0) - return msi_handle_pci_fail(domain, desc, allocated); - - for (i = 0; i < desc->nvec_used; i++) { - irq_set_msi_desc_off(virq, i, desc); - irq_debugfs_copy_devname(virq + i, dev); - ret = msi_init_virq(domain, virq + i, vflags); - if (ret) - return ret; +skip_activate: + /* + * If these interrupts use reservation mode, clear the activated bit + * so request_irq() will assign the final vector. + */ + if (can_reserve) { + for_each_msi_vector(desc, i, dev) { + irq_data = irq_domain_get_irq_data(domain, i); + irqd_clr_activated(irq_data); } - if (info->flags & MSI_FLAG_DEV_SYSFS) { - ret = msi_sysfs_populate_desc(dev, desc); - if (ret) - return ret; - } - allocated++; } return 0; -} -static int msi_domain_add_simple_msi_descs(struct msi_domain_info *info, - struct device *dev, - unsigned int num_descs) -{ - if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS)) - return 0; - - return msi_add_simple_msi_descs(dev, 0, num_descs); -} - -/** - * msi_domain_alloc_irqs_descs_locked - Allocate interrupts from a MSI interrupt domain - * @domain: The domain to allocate from - * @dev: Pointer to device struct of the device for which the interrupts - * are allocated - * @nvec: The number of interrupts to allocate - * - * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() - * pair. Use this for MSI irqdomains which implement their own vector - * allocation/free. - * - * Return: %0 on success or an error code. - */ -int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev, - int nvec) -{ - struct msi_domain_info *info = domain->host_data; - struct msi_domain_ops *ops = info->ops; - int ret; - - lockdep_assert_held(&dev->msi.data->mutex); - - ret = msi_domain_add_simple_msi_descs(info, dev, nvec); - if (ret) - return ret; - - ret = ops->domain_alloc_irqs(domain, dev, nvec); - if (ret) - msi_domain_free_irqs_descs_locked(domain, dev); +cleanup: + msi_domain_free_irqs(domain, dev); return ret; } @@ -949,65 +629,38 @@ int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device * * Return: %0 on success or an error code. */ -int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec) -{ - int ret; - - msi_lock_descs(dev); - ret = msi_domain_alloc_irqs_descs_locked(domain, dev, nvec); - msi_unlock_descs(dev); - return ret; -} - -void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) -{ - struct msi_domain_info *info = domain->host_data; - struct irq_data *irqd; - struct msi_desc *desc; - int i; - - /* Only handle MSI entries which have an interrupt associated */ - msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) { - /* Make sure all interrupts are deactivated */ - for (i = 0; i < desc->nvec_used; i++) { - irqd = irq_domain_get_irq_data(domain, desc->irq + i); - if (irqd && irqd_is_activated(irqd)) - irq_domain_deactivate_irq(irqd); - } - - irq_domain_free_irqs(desc->irq, desc->nvec_used); - if (info->flags & MSI_FLAG_DEV_SYSFS) - msi_sysfs_remove_desc(dev, desc); - desc->irq = 0; - } -} - -static void msi_domain_free_msi_descs(struct msi_domain_info *info, - struct device *dev) -{ - if (info->flags & MSI_FLAG_FREE_MSI_DESCS) - msi_free_msi_descs(dev); -} - -/** - * msi_domain_free_irqs_descs_locked - Free interrupts from a MSI interrupt @domain associated to @dev - * @domain: The domain to managing the interrupts - * @dev: Pointer to device struct of the device for which the interrupts - * are free - * - * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() - * pair. Use this for MSI irqdomains which implement their own vector - * allocation. - */ -void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev) +int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, + int nvec) { struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; - lockdep_assert_held(&dev->msi.data->mutex); + return ops->domain_alloc_irqs(domain, dev, nvec); +} - ops->domain_free_irqs(domain, dev); - msi_domain_free_msi_descs(info, dev); +void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) +{ + struct irq_data *irq_data; + struct msi_desc *desc; + int i; + + for_each_msi_vector(desc, i, dev) { + irq_data = irq_domain_get_irq_data(domain, i); + if (irqd_is_activated(irq_data)) + irq_domain_deactivate_irq(irq_data); + } + + for_each_msi_entry(desc, dev) { + /* + * We might have failed to allocate an MSI early + * enough that there is no IRQ associated to this + * entry. If that's the case, don't do anything. + */ + if (desc->irq) { + irq_domain_free_irqs(desc->irq, desc->nvec_used); + desc->irq = 0; + } + } } /** @@ -1018,9 +671,10 @@ void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device */ void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) { - msi_lock_descs(dev); - msi_domain_free_irqs_descs_locked(domain, dev); - msi_unlock_descs(dev); + struct msi_domain_info *info = domain->host_data; + struct msi_domain_ops *ops = info->ops; + + return ops->domain_free_irqs(domain, dev); } /** diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 02b2daf074..c481d84583 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -447,10 +447,6 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); static int __init irqfixup_setup(char *str) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) { - pr_warn("irqfixup boot option not supported with PREEMPT_RT\n"); - return 1; - } irqfixup = 1; printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); printk(KERN_WARNING "This may impact system performance.\n"); @@ -463,10 +459,6 @@ module_param(irqfixup, int, 0644); static int __init irqpoll_setup(char *str) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) { - pr_warn("irqpoll boot option not supported with PREEMPT_RT\n"); - return 1; - } irqfixup = 2; printk(KERN_WARNING "Misrouted IRQ fixup and polling support " "enabled\n"); diff --git a/kernel/irq_work.c b/kernel/irq_work.c index f7df715ec2..db8c248ebc 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -18,36 +18,11 @@ #include #include #include -#include #include #include static DEFINE_PER_CPU(struct llist_head, raised_list); static DEFINE_PER_CPU(struct llist_head, lazy_list); -static DEFINE_PER_CPU(struct task_struct *, irq_workd); - -static void wake_irq_workd(void) -{ - struct task_struct *tsk = __this_cpu_read(irq_workd); - - if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk) - wake_up_process(tsk); -} - -#ifdef CONFIG_SMP -static void irq_work_wake(struct irq_work *entry) -{ - wake_irq_workd(); -} - -static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) = - IRQ_WORK_INIT_HARD(irq_work_wake); -#endif - -static int irq_workd_should_run(unsigned int cpu) -{ - return !llist_empty(this_cpu_ptr(&lazy_list)); -} /* * Claim the entry so that no one else will poke at it. @@ -77,29 +52,15 @@ void __weak arch_irq_work_raise(void) /* Enqueue on current CPU, work must already be claimed and preempt disabled */ static void __irq_work_queue_local(struct irq_work *work) { - struct llist_head *list; - bool rt_lazy_work = false; - bool lazy_work = false; - int work_flags; - - work_flags = atomic_read(&work->node.a_flags); - if (work_flags & IRQ_WORK_LAZY) - lazy_work = true; - else if (IS_ENABLED(CONFIG_PREEMPT_RT) && - !(work_flags & IRQ_WORK_HARD_IRQ)) - rt_lazy_work = true; - - if (lazy_work || rt_lazy_work) - list = this_cpu_ptr(&lazy_list); - else - list = this_cpu_ptr(&raised_list); - - if (!llist_add(&work->node.llist, list)) - return; - /* If the work is "lazy", handle it from next tick if any */ - if (!lazy_work || tick_nohz_tick_stopped()) - arch_irq_work_raise(); + if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) { + if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) && + tick_nohz_tick_stopped()) + arch_irq_work_raise(); + } else { + if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list))) + arch_irq_work_raise(); + } } /* Enqueue the irq work @work on the current CPU */ @@ -143,34 +104,17 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) if (cpu != smp_processor_id()) { /* Arch remote IPI send/receive backend aren't NMI safe */ WARN_ON_ONCE(in_nmi()); - - /* - * On PREEMPT_RT the items which are not marked as - * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work - * item is used on the remote CPU to wake the thread. - */ - if (IS_ENABLED(CONFIG_PREEMPT_RT) && - !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) { - - if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu))) - goto out; - - work = &per_cpu(irq_work_wakeup, cpu); - if (!irq_work_claim(work)) - goto out; - } - __smp_call_single_queue(cpu, &work->node.llist); } else { __irq_work_queue_local(work); } -out: preempt_enable(); return true; #endif /* CONFIG_SMP */ } + bool irq_work_needs_cpu(void) { struct llist_head *raised, *lazy; @@ -216,10 +160,6 @@ void irq_work_single(void *arg) * else claimed it meanwhile. */ (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY); - - if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || - !arch_irq_work_has_interrupt()) - rcuwait_wake_up(&work->irqwait); } static void irq_work_run_list(struct llist_head *list) @@ -227,12 +167,7 @@ static void irq_work_run_list(struct llist_head *list) struct irq_work *work, *tmp; struct llist_node *llnode; - /* - * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed - * in a per-CPU thread in preemptible context. Only the items which are - * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context. - */ - BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT)); + BUG_ON(!irqs_disabled()); if (llist_empty(list)) return; @@ -249,10 +184,7 @@ static void irq_work_run_list(struct llist_head *list) void irq_work_run(void) { irq_work_run_list(this_cpu_ptr(&raised_list)); - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) - irq_work_run_list(this_cpu_ptr(&lazy_list)); - else - wake_irq_workd(); + irq_work_run_list(this_cpu_ptr(&lazy_list)); } EXPORT_SYMBOL_GPL(irq_work_run); @@ -262,11 +194,7 @@ void irq_work_tick(void) if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) irq_work_run_list(raised); - - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) - irq_work_run_list(this_cpu_ptr(&lazy_list)); - else - wake_irq_workd(); + irq_work_run_list(this_cpu_ptr(&lazy_list)); } /* @@ -276,42 +204,8 @@ void irq_work_tick(void) void irq_work_sync(struct irq_work *work) { lockdep_assert_irqs_enabled(); - might_sleep(); - - if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || - !arch_irq_work_has_interrupt()) { - rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work), - TASK_UNINTERRUPTIBLE); - return; - } while (irq_work_is_busy(work)) cpu_relax(); } EXPORT_SYMBOL_GPL(irq_work_sync); - -static void run_irq_workd(unsigned int cpu) -{ - irq_work_run_list(this_cpu_ptr(&lazy_list)); -} - -static void irq_workd_setup(unsigned int cpu) -{ - sched_set_fifo_low(current); -} - -static struct smp_hotplug_thread irqwork_threads = { - .store = &irq_workd, - .setup = irq_workd_setup, - .thread_should_run = irq_workd_should_run, - .thread_fn = run_irq_workd, - .thread_comm = "irq_work/%u", -}; - -static __init int irq_work_init_threads(void) -{ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - BUG_ON(smpboot_register_percpu_thread(&irqwork_threads)); - return 0; -} -early_initcall(irq_work_init_threads); diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 951c93216f..0ba87982d0 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -164,46 +164,26 @@ static unsigned long kallsyms_sym_address(int idx) return kallsyms_relative_base - 1 - kallsyms_offsets[idx]; } -static bool cleanup_symbol_name(char *s) +#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN) +/* + * LLVM appends a hash to static function names when ThinLTO and CFI are + * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b. + * This causes confusion and potentially breaks user space tools, so we + * strip the suffix from expanded symbol names. + */ +static inline bool cleanup_symbol_name(char *s) { char *res; - if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return false; - - /* - * LLVM appends various suffixes for local functions and variables that - * must be promoted to global scope as part of LTO. This can break - * hooking of static functions with kprobes. '.' is not a valid - * character in an identifier in C. Suffixes observed: - * - foo.llvm.[0-9a-f]+ - * - foo.[0-9a-f]+ - * - foo.[0-9a-f]+.cfi_jt - */ - res = strchr(s, '.'); - if (res) { - *res = '\0'; - return true; - } - - if (!IS_ENABLED(CONFIG_CFI_CLANG) || - !IS_ENABLED(CONFIG_LTO_CLANG_THIN) || - CONFIG_CLANG_VERSION >= 130000) - return false; - - /* - * Prior to LLVM 13, the following suffixes were observed when thinLTO - * and CFI are both enabled: - * - foo$[0-9]+ - */ res = strrchr(s, '$'); - if (res) { + if (res) *res = '\0'; - return true; - } - return false; + return res != NULL; } +#else +static inline bool cleanup_symbol_name(char *s) { return false; } +#endif /* Lookup the address for this symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name) @@ -243,7 +223,6 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, ret = fn(data, namebuf, NULL, kallsyms_sym_address(i)); if (ret != 0) return ret; - cond_resched(); } return 0; } diff --git a/kernel/kcov.c b/kernel/kcov.c index 36ca640c4f..80bfe71bbe 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -88,7 +88,6 @@ static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas); struct kcov_percpu_data { void *irq_area; - local_lock_t lock; unsigned int saved_mode; unsigned int saved_size; @@ -97,9 +96,7 @@ struct kcov_percpu_data { int saved_sequence; }; -static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data) = { - .lock = INIT_LOCAL_LOCK(lock), -}; +static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data); /* Must be called with kcov_remote_lock locked. */ static struct kcov_remote *kcov_remote_find(u64 handle) @@ -827,7 +824,7 @@ void kcov_remote_start(u64 handle) if (!in_task() && !in_serving_softirq()) return; - local_lock_irqsave(&kcov_percpu_data.lock, flags); + local_irq_save(flags); /* * Check that kcov_remote_start() is not called twice in background @@ -835,7 +832,7 @@ void kcov_remote_start(u64 handle) */ mode = READ_ONCE(t->kcov_mode); if (WARN_ON(in_task() && kcov_mode_enabled(mode))) { - local_unlock_irqrestore(&kcov_percpu_data.lock, flags); + local_irq_restore(flags); return; } /* @@ -844,15 +841,14 @@ void kcov_remote_start(u64 handle) * happened while collecting coverage from a background thread. */ if (WARN_ON(in_serving_softirq() && t->kcov_softirq)) { - local_unlock_irqrestore(&kcov_percpu_data.lock, flags); + local_irq_restore(flags); return; } spin_lock(&kcov_remote_lock); remote = kcov_remote_find(handle); if (!remote) { - spin_unlock(&kcov_remote_lock); - local_unlock_irqrestore(&kcov_percpu_data.lock, flags); + spin_unlock_irqrestore(&kcov_remote_lock, flags); return; } kcov_debug("handle = %llx, context: %s\n", handle, @@ -873,19 +869,19 @@ void kcov_remote_start(u64 handle) size = CONFIG_KCOV_IRQ_AREA_SIZE; area = this_cpu_ptr(&kcov_percpu_data)->irq_area; } - spin_unlock(&kcov_remote_lock); + spin_unlock_irqrestore(&kcov_remote_lock, flags); /* Can only happen when in_task(). */ if (!area) { - local_unlock_irqrestore(&kcov_percpu_data.lock, flags); area = vmalloc(size * sizeof(unsigned long)); if (!area) { kcov_put(kcov); return; } - local_lock_irqsave(&kcov_percpu_data.lock, flags); } + local_irq_save(flags); + /* Reset coverage size. */ *(u64 *)area = 0; @@ -895,7 +891,7 @@ void kcov_remote_start(u64 handle) } kcov_start(t, kcov, size, area, mode, sequence); - local_unlock_irqrestore(&kcov_percpu_data.lock, flags); + local_irq_restore(flags); } EXPORT_SYMBOL(kcov_remote_start); @@ -969,12 +965,12 @@ void kcov_remote_stop(void) if (!in_task() && !in_serving_softirq()) return; - local_lock_irqsave(&kcov_percpu_data.lock, flags); + local_irq_save(flags); mode = READ_ONCE(t->kcov_mode); barrier(); if (!kcov_mode_enabled(mode)) { - local_unlock_irqrestore(&kcov_percpu_data.lock, flags); + local_irq_restore(flags); return; } /* @@ -982,12 +978,12 @@ void kcov_remote_stop(void) * actually found the remote handle and started collecting coverage. */ if (in_serving_softirq() && !t->kcov_softirq) { - local_unlock_irqrestore(&kcov_percpu_data.lock, flags); + local_irq_restore(flags); return; } /* Make sure that kcov_softirq is only set when in softirq. */ if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) { - local_unlock_irqrestore(&kcov_percpu_data.lock, flags); + local_irq_restore(flags); return; } @@ -1017,7 +1013,7 @@ void kcov_remote_stop(void) spin_unlock(&kcov_remote_lock); } - local_unlock_irqrestore(&kcov_percpu_data.lock, flags); + local_irq_restore(flags); /* Get in kcov_remote_start(). */ kcov_put(kcov); @@ -1038,8 +1034,8 @@ static int __init kcov_init(void) int cpu; for_each_possible_cpu(cpu) { - void *area = vmalloc_node(CONFIG_KCOV_IRQ_AREA_SIZE * - sizeof(unsigned long), cpu_to_node(cpu)); + void *area = vmalloc(CONFIG_KCOV_IRQ_AREA_SIZE * + sizeof(unsigned long)); if (!area) return -ENOMEM; per_cpu_ptr(&kcov_percpu_data, cpu)->irq_area = area; diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile index 4f35d1bced..c2bb07f5bc 100644 --- a/kernel/kcsan/Makefile +++ b/kernel/kcsan/Makefile @@ -8,12 +8,9 @@ CFLAGS_REMOVE_debugfs.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE) CFLAGS_core.o := $(call cc-option,-fno-conserve-stack) \ - $(call cc-option,-mno-outline-atomics) \ -fno-stack-protector -DDISABLE_BRANCH_PROFILING obj-y := core.o debugfs.o report.o - -KCSAN_INSTRUMENT_BARRIERS_selftest.o := y obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index fe12dfe254..76e67d1e02 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -40,17 +40,15 @@ module_param_named(udelay_interrupt, kcsan_udelay_interrupt, uint, 0644); module_param_named(skip_watch, kcsan_skip_watch, long, 0644); module_param_named(interrupt_watcher, kcsan_interrupt_watcher, bool, 0444); -#ifdef CONFIG_KCSAN_WEAK_MEMORY -static bool kcsan_weak_memory = true; -module_param_named(weak_memory, kcsan_weak_memory, bool, 0644); -#else -#define kcsan_weak_memory false -#endif - bool kcsan_enabled; /* Per-CPU kcsan_ctx for interrupts */ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { + .disable_count = 0, + .atomic_next = 0, + .atomic_nest_count = 0, + .in_flat_atomic = false, + .access_mask = 0, .scoped_accesses = {LIST_POISON1, NULL}, }; @@ -204,29 +202,22 @@ static __always_inline struct kcsan_ctx *get_ctx(void) return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); } -static __always_inline void -check_access(const volatile void *ptr, size_t size, int type, unsigned long ip); - /* Check scoped accesses; never inline because this is a slow-path! */ static noinline void kcsan_check_scoped_accesses(void) { struct kcsan_ctx *ctx = get_ctx(); + struct list_head *prev_save = ctx->scoped_accesses.prev; struct kcsan_scoped_access *scoped_access; - if (ctx->disable_scoped) - return; - - ctx->disable_scoped++; - list_for_each_entry(scoped_access, &ctx->scoped_accesses, list) { - check_access(scoped_access->ptr, scoped_access->size, - scoped_access->type, scoped_access->ip); - } - ctx->disable_scoped--; + ctx->scoped_accesses.prev = NULL; /* Avoid recursion. */ + list_for_each_entry(scoped_access, &ctx->scoped_accesses, list) + __kcsan_check_access(scoped_access->ptr, scoped_access->size, scoped_access->type); + ctx->scoped_accesses.prev = prev_save; } /* Rules for generic atomic accesses. Called from fast-path. */ static __always_inline bool -is_atomic(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, int type) +is_atomic(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx) { if (type & KCSAN_ACCESS_ATOMIC) return true; @@ -263,7 +254,7 @@ is_atomic(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, int type } static __always_inline bool -should_watch(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, int type) +should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx) { /* * Never set up watchpoints when memory operations are atomic. @@ -272,7 +263,7 @@ should_watch(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, int t * should not count towards skipped instructions, and (2) to actually * decrement kcsan_atomic_next for consecutive instruction stream. */ - if (is_atomic(ctx, ptr, size, type)) + if (is_atomic(ptr, size, type, ctx)) return false; if (this_cpu_dec_return(kcsan_skip) >= 0) @@ -329,21 +320,6 @@ static void delay_access(int type) udelay(delay); } -/* - * Reads the instrumented memory for value change detection; value change - * detection is currently done for accesses up to a size of 8 bytes. - */ -static __always_inline u64 read_instrumented_memory(const volatile void *ptr, size_t size) -{ - switch (size) { - case 1: return READ_ONCE(*(const u8 *)ptr); - case 2: return READ_ONCE(*(const u16 *)ptr); - case 4: return READ_ONCE(*(const u32 *)ptr); - case 8: return READ_ONCE(*(const u64 *)ptr); - default: return 0; /* Ignore; we do not diff the values. */ - } -} - void kcsan_save_irqtrace(struct task_struct *task) { #ifdef CONFIG_TRACE_IRQFLAGS @@ -358,76 +334,6 @@ void kcsan_restore_irqtrace(struct task_struct *task) #endif } -static __always_inline int get_kcsan_stack_depth(void) -{ -#ifdef CONFIG_KCSAN_WEAK_MEMORY - return current->kcsan_stack_depth; -#else - BUILD_BUG(); - return 0; -#endif -} - -static __always_inline void add_kcsan_stack_depth(int val) -{ -#ifdef CONFIG_KCSAN_WEAK_MEMORY - current->kcsan_stack_depth += val; -#else - BUILD_BUG(); -#endif -} - -static __always_inline struct kcsan_scoped_access *get_reorder_access(struct kcsan_ctx *ctx) -{ -#ifdef CONFIG_KCSAN_WEAK_MEMORY - return ctx->disable_scoped ? NULL : &ctx->reorder_access; -#else - return NULL; -#endif -} - -static __always_inline bool -find_reorder_access(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, - int type, unsigned long ip) -{ - struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx); - - if (!reorder_access) - return false; - - /* - * Note: If accesses are repeated while reorder_access is identical, - * never matches the new access, because !(type & KCSAN_ACCESS_SCOPED). - */ - return reorder_access->ptr == ptr && reorder_access->size == size && - reorder_access->type == type && reorder_access->ip == ip; -} - -static inline void -set_reorder_access(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, - int type, unsigned long ip) -{ - struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx); - - if (!reorder_access || !kcsan_weak_memory) - return; - - /* - * To avoid nested interrupts or scheduler (which share kcsan_ctx) - * reading an inconsistent reorder_access, ensure that the below has - * exclusive access to reorder_access by disallowing concurrent use. - */ - ctx->disable_scoped++; - barrier(); - reorder_access->ptr = ptr; - reorder_access->size = size; - reorder_access->type = type | KCSAN_ACCESS_SCOPED; - reorder_access->ip = ip; - reorder_access->stack_depth = get_kcsan_stack_depth(); - barrier(); - ctx->disable_scoped--; -} - /* * Pull everything together: check_access() below contains the performance * critical operations; the fast-path (including check_access) functions should @@ -444,7 +350,6 @@ set_reorder_access(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, static noinline void kcsan_found_watchpoint(const volatile void *ptr, size_t size, int type, - unsigned long ip, atomic_long_t *watchpoint, long encoded_watchpoint) { @@ -466,10 +371,8 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, * The access_mask check relies on value-change comparison. To avoid * reporting a race where e.g. the writer set up the watchpoint, but the * reader has access_mask!=0, we have to ignore the found watchpoint. - * - * reorder_access is never created from an access with access_mask set. */ - if (ctx->access_mask && !find_reorder_access(ctx, ptr, size, type, ip)) + if (ctx->access_mask) return; /* @@ -493,7 +396,7 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, if (consumed) { kcsan_save_irqtrace(current); - kcsan_report_set_info(ptr, size, type, ip, watchpoint - watchpoints); + kcsan_report_set_info(ptr, size, type, watchpoint - watchpoints); kcsan_restore_irqtrace(current); } else { /* @@ -513,19 +416,17 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, } static noinline void -kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned long ip) +kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) { const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0; const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0; atomic_long_t *watchpoint; u64 old, new, diff; + unsigned long access_mask; enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE; - bool interrupt_watcher = kcsan_interrupt_watcher; unsigned long ua_flags = user_access_save(); struct kcsan_ctx *ctx = get_ctx(); - unsigned long access_mask = ctx->access_mask; unsigned long irq_flags = 0; - bool is_reorder_access; /* * Always reset kcsan_skip counter in slow-path to avoid underflow; see @@ -548,33 +449,13 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned goto out; } - /* - * The local CPU cannot observe reordering of its own accesses, and - * therefore we need to take care of 2 cases to avoid false positives: - * - * 1. Races of the reordered access with interrupts. To avoid, if - * the current access is reorder_access, disable interrupts. - * 2. Avoid races of scoped accesses from nested interrupts (below). - */ - is_reorder_access = find_reorder_access(ctx, ptr, size, type, ip); - if (is_reorder_access) - interrupt_watcher = false; - /* - * Avoid races of scoped accesses from nested interrupts (or scheduler). - * Assume setting up a watchpoint for a non-scoped (normal) access that - * also conflicts with a current scoped access. In a nested interrupt, - * which shares the context, it would check a conflicting scoped access. - * To avoid, disable scoped access checking. - */ - ctx->disable_scoped++; - /* * Save and restore the IRQ state trace touched by KCSAN, since KCSAN's * runtime is entered for every memory access, and potentially useful * information is lost if dirtied by KCSAN. */ kcsan_save_irqtrace(current); - if (!interrupt_watcher) + if (!kcsan_interrupt_watcher) local_irq_save(irq_flags); watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write); @@ -595,7 +476,23 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned * Read the current value, to later check and infer a race if the data * was modified via a non-instrumented access, e.g. from a device. */ - old = is_reorder_access ? 0 : read_instrumented_memory(ptr, size); + old = 0; + switch (size) { + case 1: + old = READ_ONCE(*(const u8 *)ptr); + break; + case 2: + old = READ_ONCE(*(const u16 *)ptr); + break; + case 4: + old = READ_ONCE(*(const u32 *)ptr); + break; + case 8: + old = READ_ONCE(*(const u64 *)ptr); + break; + default: + break; /* ignore; we do not diff the values */ + } /* * Delay this thread, to increase probability of observing a racy @@ -607,16 +504,23 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned * Re-read value, and check if it is as expected; if not, we infer a * racy access. */ - if (!is_reorder_access) { - new = read_instrumented_memory(ptr, size); - } else { - /* - * Reordered accesses cannot be used for value change detection, - * because the memory location may no longer be accessible and - * could result in a fault. - */ - new = 0; - access_mask = 0; + access_mask = ctx->access_mask; + new = 0; + switch (size) { + case 1: + new = READ_ONCE(*(const u8 *)ptr); + break; + case 2: + new = READ_ONCE(*(const u16 *)ptr); + break; + case 4: + new = READ_ONCE(*(const u32 *)ptr); + break; + case 8: + new = READ_ONCE(*(const u64 *)ptr); + break; + default: + break; /* ignore; we do not diff the values */ } diff = old ^ new; @@ -664,8 +568,8 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE) atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]); - kcsan_report_known_origin(ptr, size, type, ip, - value_change, watchpoint - watchpoints, + kcsan_report_known_origin(ptr, size, type, value_change, + watchpoint - watchpoints, old, new, access_mask); } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) { /* Inferring a race, since the value should not have changed. */ @@ -674,10 +578,8 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned if (is_assert) atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]); - if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) { - kcsan_report_unknown_origin(ptr, size, type, ip, - old, new, access_mask); - } + if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) + kcsan_report_unknown_origin(ptr, size, type, old, new, access_mask); } /* @@ -686,27 +588,18 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned */ remove_watchpoint(watchpoint); atomic_long_dec(&kcsan_counters[KCSAN_COUNTER_USED_WATCHPOINTS]); - out_unlock: - if (!interrupt_watcher) + if (!kcsan_interrupt_watcher) local_irq_restore(irq_flags); kcsan_restore_irqtrace(current); - ctx->disable_scoped--; - - /* - * Reordered accesses cannot be used for value change detection, - * therefore never consider for reordering if access_mask is set. - * ASSERT_EXCLUSIVE are not real accesses, ignore them as well. - */ - if (!access_mask && !is_assert) - set_reorder_access(ctx, ptr, size, type, ip); out: user_access_restore(ua_flags); } -static __always_inline void -check_access(const volatile void *ptr, size_t size, int type, unsigned long ip) +static __always_inline void check_access(const volatile void *ptr, size_t size, + int type) { + const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0; atomic_long_t *watchpoint; long encoded_watchpoint; @@ -717,14 +610,12 @@ check_access(const volatile void *ptr, size_t size, int type, unsigned long ip) if (unlikely(size == 0)) return; -again: /* * Avoid user_access_save in fast-path: find_watchpoint is safe without * user_access_save, as the address that ptr points to is only used to * check if a watchpoint exists; ptr is never dereferenced. */ - watchpoint = find_watchpoint((unsigned long)ptr, size, - !(type & KCSAN_ACCESS_WRITE), + watchpoint = find_watchpoint((unsigned long)ptr, size, !is_write, &encoded_watchpoint); /* * It is safe to check kcsan_is_enabled() after find_watchpoint in the @@ -734,46 +625,14 @@ check_access(const volatile void *ptr, size_t size, int type, unsigned long ip) */ if (unlikely(watchpoint != NULL)) - kcsan_found_watchpoint(ptr, size, type, ip, watchpoint, encoded_watchpoint); + kcsan_found_watchpoint(ptr, size, type, watchpoint, + encoded_watchpoint); else { struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */ - if (unlikely(should_watch(ctx, ptr, size, type))) { - kcsan_setup_watchpoint(ptr, size, type, ip); - return; - } - - if (!(type & KCSAN_ACCESS_SCOPED)) { - struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx); - - if (reorder_access) { - /* - * reorder_access check: simulates reordering of - * the access after subsequent operations. - */ - ptr = reorder_access->ptr; - type = reorder_access->type; - ip = reorder_access->ip; - /* - * Upon a nested interrupt, this context's - * reorder_access can be modified (shared ctx). - * We know that upon return, reorder_access is - * always invalidated by setting size to 0 via - * __tsan_func_exit(). Therefore we must read - * and check size after the other fields. - */ - barrier(); - size = READ_ONCE(reorder_access->size); - if (size) - goto again; - } - } - - /* - * Always checked last, right before returning from runtime; - * if reorder_access is valid, checked after it was checked. - */ - if (unlikely(ctx->scoped_accesses.prev)) + if (unlikely(should_watch(ptr, size, type, ctx))) + kcsan_setup_watchpoint(ptr, size, type); + else if (unlikely(ctx->scoped_accesses.prev)) kcsan_check_scoped_accesses(); } } @@ -898,7 +757,7 @@ kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, { struct kcsan_ctx *ctx = get_ctx(); - check_access(ptr, size, type, _RET_IP_); + __kcsan_check_access(ptr, size, type); ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */ @@ -906,7 +765,6 @@ kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, sa->ptr = ptr; sa->size = size; sa->type = type; - sa->ip = _RET_IP_; if (!ctx->scoped_accesses.prev) /* Lazy initialize list head. */ INIT_LIST_HEAD(&ctx->scoped_accesses); @@ -938,32 +796,16 @@ void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) ctx->disable_count--; - check_access(sa->ptr, sa->size, sa->type, sa->ip); + __kcsan_check_access(sa->ptr, sa->size, sa->type); } EXPORT_SYMBOL(kcsan_end_scoped_access); void __kcsan_check_access(const volatile void *ptr, size_t size, int type) { - check_access(ptr, size, type, _RET_IP_); + check_access(ptr, size, type); } EXPORT_SYMBOL(__kcsan_check_access); -#define DEFINE_MEMORY_BARRIER(name, order_before_cond) \ - void __kcsan_##name(void) \ - { \ - struct kcsan_scoped_access *sa = get_reorder_access(get_ctx()); \ - if (!sa) \ - return; \ - if (order_before_cond) \ - sa->size = 0; \ - } \ - EXPORT_SYMBOL(__kcsan_##name) - -DEFINE_MEMORY_BARRIER(mb, true); -DEFINE_MEMORY_BARRIER(wmb, sa->type & (KCSAN_ACCESS_WRITE | KCSAN_ACCESS_COMPOUND)); -DEFINE_MEMORY_BARRIER(rmb, !(sa->type & KCSAN_ACCESS_WRITE) || (sa->type & KCSAN_ACCESS_COMPOUND)); -DEFINE_MEMORY_BARRIER(release, true); - /* * KCSAN uses the same instrumentation that is emitted by supported compilers * for ThreadSanitizer (TSAN). @@ -981,7 +823,7 @@ DEFINE_MEMORY_BARRIER(release, true); void __tsan_read##size(void *ptr); \ void __tsan_read##size(void *ptr) \ { \ - check_access(ptr, size, 0, _RET_IP_); \ + check_access(ptr, size, 0); \ } \ EXPORT_SYMBOL(__tsan_read##size); \ void __tsan_unaligned_read##size(void *ptr) \ @@ -990,7 +832,7 @@ DEFINE_MEMORY_BARRIER(release, true); void __tsan_write##size(void *ptr); \ void __tsan_write##size(void *ptr) \ { \ - check_access(ptr, size, KCSAN_ACCESS_WRITE, _RET_IP_); \ + check_access(ptr, size, KCSAN_ACCESS_WRITE); \ } \ EXPORT_SYMBOL(__tsan_write##size); \ void __tsan_unaligned_write##size(void *ptr) \ @@ -1000,8 +842,7 @@ DEFINE_MEMORY_BARRIER(release, true); void __tsan_read_write##size(void *ptr) \ { \ check_access(ptr, size, \ - KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE, \ - _RET_IP_); \ + KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE); \ } \ EXPORT_SYMBOL(__tsan_read_write##size); \ void __tsan_unaligned_read_write##size(void *ptr) \ @@ -1017,14 +858,14 @@ DEFINE_TSAN_READ_WRITE(16); void __tsan_read_range(void *ptr, size_t size); void __tsan_read_range(void *ptr, size_t size) { - check_access(ptr, size, 0, _RET_IP_); + check_access(ptr, size, 0); } EXPORT_SYMBOL(__tsan_read_range); void __tsan_write_range(void *ptr, size_t size); void __tsan_write_range(void *ptr, size_t size) { - check_access(ptr, size, KCSAN_ACCESS_WRITE, _RET_IP_); + check_access(ptr, size, KCSAN_ACCESS_WRITE); } EXPORT_SYMBOL(__tsan_write_range); @@ -1045,8 +886,7 @@ EXPORT_SYMBOL(__tsan_write_range); IS_ALIGNED((unsigned long)ptr, size); \ if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) && is_atomic) \ return; \ - check_access(ptr, size, is_atomic ? KCSAN_ACCESS_ATOMIC : 0, \ - _RET_IP_); \ + check_access(ptr, size, is_atomic ? KCSAN_ACCESS_ATOMIC : 0); \ } \ EXPORT_SYMBOL(__tsan_volatile_read##size); \ void __tsan_unaligned_volatile_read##size(void *ptr) \ @@ -1061,8 +901,7 @@ EXPORT_SYMBOL(__tsan_write_range); return; \ check_access(ptr, size, \ KCSAN_ACCESS_WRITE | \ - (is_atomic ? KCSAN_ACCESS_ATOMIC : 0), \ - _RET_IP_); \ + (is_atomic ? KCSAN_ACCESS_ATOMIC : 0)); \ } \ EXPORT_SYMBOL(__tsan_volatile_write##size); \ void __tsan_unaligned_volatile_write##size(void *ptr) \ @@ -1076,56 +915,19 @@ DEFINE_TSAN_VOLATILE_READ_WRITE(8); DEFINE_TSAN_VOLATILE_READ_WRITE(16); /* - * Function entry and exit are used to determine the validty of reorder_access. - * Reordering of the access ends at the end of the function scope where the - * access happened. This is done for two reasons: - * - * 1. Artificially limits the scope where missing barriers are detected. - * This minimizes false positives due to uninstrumented functions that - * contain the required barriers but were missed. - * - * 2. Simplifies generating the stack trace of the access. + * The below are not required by KCSAN, but can still be emitted by the + * compiler. */ void __tsan_func_entry(void *call_pc); -noinline void __tsan_func_entry(void *call_pc) +void __tsan_func_entry(void *call_pc) { - if (!IS_ENABLED(CONFIG_KCSAN_WEAK_MEMORY)) - return; - - add_kcsan_stack_depth(1); } EXPORT_SYMBOL(__tsan_func_entry); - void __tsan_func_exit(void); -noinline void __tsan_func_exit(void) +void __tsan_func_exit(void) { - struct kcsan_scoped_access *reorder_access; - - if (!IS_ENABLED(CONFIG_KCSAN_WEAK_MEMORY)) - return; - - reorder_access = get_reorder_access(get_ctx()); - if (!reorder_access) - goto out; - - if (get_kcsan_stack_depth() <= reorder_access->stack_depth) { - /* - * Access check to catch cases where write without a barrier - * (supposed release) was last access in function: because - * instrumentation is inserted before the real access, a data - * race due to the write giving up a c-s would only be caught if - * we do the conflicting access after. - */ - check_access(reorder_access->ptr, reorder_access->size, - reorder_access->type, reorder_access->ip); - reorder_access->size = 0; - reorder_access->stack_depth = INT_MIN; - } -out: - add_kcsan_stack_depth(-1); } EXPORT_SYMBOL(__tsan_func_exit); - void __tsan_init(void); void __tsan_init(void) { @@ -1148,21 +950,12 @@ EXPORT_SYMBOL(__tsan_init); * functions, whose job is to also execute the operation itself. */ -static __always_inline void kcsan_atomic_builtin_memorder(int memorder) -{ - if (memorder == __ATOMIC_RELEASE || - memorder == __ATOMIC_SEQ_CST || - memorder == __ATOMIC_ACQ_REL) - __kcsan_release(); -} - #define DEFINE_TSAN_ATOMIC_LOAD_STORE(bits) \ u##bits __tsan_atomic##bits##_load(const u##bits *ptr, int memorder); \ u##bits __tsan_atomic##bits##_load(const u##bits *ptr, int memorder) \ { \ - kcsan_atomic_builtin_memorder(memorder); \ if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ - check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_ATOMIC, _RET_IP_); \ + check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_ATOMIC); \ } \ return __atomic_load_n(ptr, memorder); \ } \ @@ -1170,10 +963,9 @@ static __always_inline void kcsan_atomic_builtin_memorder(int memorder) void __tsan_atomic##bits##_store(u##bits *ptr, u##bits v, int memorder); \ void __tsan_atomic##bits##_store(u##bits *ptr, u##bits v, int memorder) \ { \ - kcsan_atomic_builtin_memorder(memorder); \ if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ check_access(ptr, bits / BITS_PER_BYTE, \ - KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC, _RET_IP_); \ + KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ } \ __atomic_store_n(ptr, v, memorder); \ } \ @@ -1183,11 +975,10 @@ static __always_inline void kcsan_atomic_builtin_memorder(int memorder) u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder); \ u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder) \ { \ - kcsan_atomic_builtin_memorder(memorder); \ if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ check_access(ptr, bits / BITS_PER_BYTE, \ KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \ - KCSAN_ACCESS_ATOMIC, _RET_IP_); \ + KCSAN_ACCESS_ATOMIC); \ } \ return __atomic_##op##suffix(ptr, v, memorder); \ } \ @@ -1216,11 +1007,10 @@ static __always_inline void kcsan_atomic_builtin_memorder(int memorder) int __tsan_atomic##bits##_compare_exchange_##strength(u##bits *ptr, u##bits *exp, \ u##bits val, int mo, int fail_mo) \ { \ - kcsan_atomic_builtin_memorder(mo); \ if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ check_access(ptr, bits / BITS_PER_BYTE, \ KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \ - KCSAN_ACCESS_ATOMIC, _RET_IP_); \ + KCSAN_ACCESS_ATOMIC); \ } \ return __atomic_compare_exchange_n(ptr, exp, val, weak, mo, fail_mo); \ } \ @@ -1232,11 +1022,10 @@ static __always_inline void kcsan_atomic_builtin_memorder(int memorder) u##bits __tsan_atomic##bits##_compare_exchange_val(u##bits *ptr, u##bits exp, u##bits val, \ int mo, int fail_mo) \ { \ - kcsan_atomic_builtin_memorder(mo); \ if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ check_access(ptr, bits / BITS_PER_BYTE, \ KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \ - KCSAN_ACCESS_ATOMIC, _RET_IP_); \ + KCSAN_ACCESS_ATOMIC); \ } \ __atomic_compare_exchange_n(ptr, &exp, val, 0, mo, fail_mo); \ return exp; \ @@ -1264,47 +1053,10 @@ DEFINE_TSAN_ATOMIC_OPS(64); void __tsan_atomic_thread_fence(int memorder); void __tsan_atomic_thread_fence(int memorder) { - kcsan_atomic_builtin_memorder(memorder); __atomic_thread_fence(memorder); } EXPORT_SYMBOL(__tsan_atomic_thread_fence); -/* - * In instrumented files, we emit instrumentation for barriers by mapping the - * kernel barriers to an __atomic_signal_fence(), which is interpreted specially - * and otherwise has no relation to a real __atomic_signal_fence(). No known - * kernel code uses __atomic_signal_fence(). - * - * Since fsanitize=thread instrumentation handles __atomic_signal_fence(), which - * are turned into calls to __tsan_atomic_signal_fence(), such instrumentation - * can be disabled via the __no_kcsan function attribute (vs. an explicit call - * which could not). When __no_kcsan is requested, __atomic_signal_fence() - * generates no code. - * - * Note: The result of using __atomic_signal_fence() with KCSAN enabled is - * potentially limiting the compiler's ability to reorder operations; however, - * if barriers were instrumented with explicit calls (without LTO), the compiler - * couldn't optimize much anyway. The result of a hypothetical architecture - * using __atomic_signal_fence() in normal code would be KCSAN false negatives. - */ void __tsan_atomic_signal_fence(int memorder); -noinline void __tsan_atomic_signal_fence(int memorder) -{ - switch (memorder) { - case __KCSAN_BARRIER_TO_SIGNAL_FENCE_mb: - __kcsan_mb(); - break; - case __KCSAN_BARRIER_TO_SIGNAL_FENCE_wmb: - __kcsan_wmb(); - break; - case __KCSAN_BARRIER_TO_SIGNAL_FENCE_rmb: - __kcsan_rmb(); - break; - case __KCSAN_BARRIER_TO_SIGNAL_FENCE_release: - __kcsan_release(); - break; - default: - break; - } -} +void __tsan_atomic_signal_fence(int memorder) { } EXPORT_SYMBOL(__tsan_atomic_signal_fence); diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index ae33c2a7f0..f36e25c497 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -121,7 +121,7 @@ enum kcsan_value_change { * to be consumed by the reporting thread. No report is printed yet. */ void kcsan_report_set_info(const volatile void *ptr, size_t size, int access_type, - unsigned long ip, int watchpoint_idx); + int watchpoint_idx); /* * The calling thread observed that the watchpoint it set up was hit and @@ -129,14 +129,14 @@ void kcsan_report_set_info(const volatile void *ptr, size_t size, int access_typ * thread. */ void kcsan_report_known_origin(const volatile void *ptr, size_t size, int access_type, - unsigned long ip, enum kcsan_value_change value_change, - int watchpoint_idx, u64 old, u64 new, u64 mask); + enum kcsan_value_change value_change, int watchpoint_idx, + u64 old, u64 new, u64 mask); /* * No other thread was observed to race with the access, but the data value * before and after the stall differs. Reports a race of "unknown origin". */ void kcsan_report_unknown_origin(const volatile void *ptr, size_t size, int access_type, - unsigned long ip, u64 old, u64 new, u64 mask); + u64 old, u64 new, u64 mask); #endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index a36fca063a..dc55fd5a36 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -16,12 +16,9 @@ #define pr_fmt(fmt) "kcsan_test: " fmt #include -#include -#include #include #include #include -#include #include #include #include @@ -32,11 +29,6 @@ #include #include -#define KCSAN_TEST_REQUIRES(test, cond) do { \ - if (!(cond)) \ - kunit_skip((test), "Test requires: " #cond); \ -} while (0) - #ifdef CONFIG_CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE #define __KCSAN_ACCESS_RW(alt) (KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE) #else @@ -154,7 +146,7 @@ struct expect_report { /* Check observed report matches information in @r. */ __no_kcsan -static bool __report_matches(const struct expect_report *r) +static bool report_matches(const struct expect_report *r) { const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT; bool ret = false; @@ -213,12 +205,10 @@ static bool __report_matches(const struct expect_report *r) "read-write" : "write") : "read"); - const bool is_atomic = (ty & KCSAN_ACCESS_ATOMIC); - const bool is_scoped = (ty & KCSAN_ACCESS_SCOPED); const char *const access_type_aux = - (is_atomic && is_scoped) ? " (marked, reordered)" - : (is_atomic ? " (marked)" - : (is_scoped ? " (reordered)" : "")); + (ty & KCSAN_ACCESS_ATOMIC) ? + " (marked)" : + ((ty & KCSAN_ACCESS_SCOPED) ? " (scoped)" : ""); if (i == 1) { /* Access 2 */ @@ -256,40 +246,6 @@ static bool __report_matches(const struct expect_report *r) return ret; } -static __always_inline const struct expect_report * -__report_set_scoped(struct expect_report *r, int accesses) -{ - BUILD_BUG_ON(accesses > 3); - - if (accesses & 1) - r->access[0].type |= KCSAN_ACCESS_SCOPED; - else - r->access[0].type &= ~KCSAN_ACCESS_SCOPED; - - if (accesses & 2) - r->access[1].type |= KCSAN_ACCESS_SCOPED; - else - r->access[1].type &= ~KCSAN_ACCESS_SCOPED; - - return r; -} - -__no_kcsan -static bool report_matches_any_reordered(struct expect_report *r) -{ - return __report_matches(__report_set_scoped(r, 0)) || - __report_matches(__report_set_scoped(r, 1)) || - __report_matches(__report_set_scoped(r, 2)) || - __report_matches(__report_set_scoped(r, 3)); -} - -#ifdef CONFIG_KCSAN_WEAK_MEMORY -/* Due to reordering accesses, any access may appear as "(reordered)". */ -#define report_matches report_matches_any_reordered -#else -#define report_matches __report_matches -#endif - /* ===== Test kernels ===== */ static long test_sink; @@ -300,8 +256,6 @@ static struct { long val[8]; } test_struct; static DEFINE_SEQLOCK(test_seqlock); -static DEFINE_SPINLOCK(test_spinlock); -static DEFINE_MUTEX(test_mutex); /* * Helper to avoid compiler optimizing out reads, and to generate source values @@ -310,16 +264,6 @@ static DEFINE_MUTEX(test_mutex); __no_kcsan static noinline void sink_value(long v) { WRITE_ONCE(test_sink, v); } -/* - * Generates a delay and some accesses that enter the runtime but do not produce - * data races. - */ -static noinline void test_delay(int iter) -{ - while (iter--) - sink_value(READ_ONCE(test_sink)); -} - static noinline void test_kernel_read(void) { sink_value(test_var); } static noinline void test_kernel_write(void) @@ -389,10 +333,7 @@ static noinline void test_kernel_assert_bits_nochange(void) ASSERT_EXCLUSIVE_BITS(test_var, ~TEST_CHANGE_BITS); } -/* - * Scoped assertions do trigger anywhere in scope. However, the report should - * still only point at the start of the scope. - */ +/* To check that scoped assertions do trigger anywhere in scope. */ static noinline void test_enter_scope(void) { int x = 0; @@ -481,239 +422,19 @@ static noinline void test_kernel_xor_1bit(void) kcsan_nestable_atomic_end(); } -#define TEST_KERNEL_LOCKED(name, acquire, release) \ - static noinline void test_kernel_##name(void) \ - { \ - long *flag = &test_struct.val[0]; \ - long v = 0; \ - if (!(acquire)) \ - return; \ - while (v++ < 100) { \ - test_var++; \ - barrier(); \ - } \ - release; \ - test_delay(10); \ - } - -TEST_KERNEL_LOCKED(with_memorder, - cmpxchg_acquire(flag, 0, 1) == 0, - smp_store_release(flag, 0)); -TEST_KERNEL_LOCKED(wrong_memorder, - cmpxchg_relaxed(flag, 0, 1) == 0, - WRITE_ONCE(*flag, 0)); -TEST_KERNEL_LOCKED(atomic_builtin_with_memorder, - __atomic_compare_exchange_n(flag, &v, 1, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED), - __atomic_store_n(flag, 0, __ATOMIC_RELEASE)); -TEST_KERNEL_LOCKED(atomic_builtin_wrong_memorder, - __atomic_compare_exchange_n(flag, &v, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED), - __atomic_store_n(flag, 0, __ATOMIC_RELAXED)); - /* ===== Test cases ===== */ -/* - * Tests that various barriers have the expected effect on internal state. Not - * exhaustive on atomic_t operations. Unlike the selftest, also checks for - * too-strict barrier instrumentation; these can be tolerated, because it does - * not cause false positives, but at least we should be aware of such cases. - */ -static void test_barrier_nothreads(struct kunit *test) -{ -#ifdef CONFIG_KCSAN_WEAK_MEMORY - struct kcsan_scoped_access *reorder_access = ¤t->kcsan_ctx.reorder_access; -#else - struct kcsan_scoped_access *reorder_access = NULL; -#endif - arch_spinlock_t arch_spinlock = __ARCH_SPIN_LOCK_UNLOCKED; - atomic_t dummy; - - KCSAN_TEST_REQUIRES(test, reorder_access != NULL); - KCSAN_TEST_REQUIRES(test, IS_ENABLED(CONFIG_SMP)); - -#define __KCSAN_EXPECT_BARRIER(access_type, barrier, order_before, name) \ - do { \ - reorder_access->type = (access_type) | KCSAN_ACCESS_SCOPED; \ - reorder_access->size = sizeof(test_var); \ - barrier; \ - KUNIT_EXPECT_EQ_MSG(test, reorder_access->size, \ - order_before ? 0 : sizeof(test_var), \ - "improperly instrumented type=(" #access_type "): " name); \ - } while (0) -#define KCSAN_EXPECT_READ_BARRIER(b, o) __KCSAN_EXPECT_BARRIER(0, b, o, #b) -#define KCSAN_EXPECT_WRITE_BARRIER(b, o) __KCSAN_EXPECT_BARRIER(KCSAN_ACCESS_WRITE, b, o, #b) -#define KCSAN_EXPECT_RW_BARRIER(b, o) __KCSAN_EXPECT_BARRIER(KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE, b, o, #b) - - /* - * Lockdep initialization can strengthen certain locking operations due - * to calling into instrumented files; "warm up" our locks. - */ - spin_lock(&test_spinlock); - spin_unlock(&test_spinlock); - mutex_lock(&test_mutex); - mutex_unlock(&test_mutex); - - /* Force creating a valid entry in reorder_access first. */ - test_var = 0; - while (test_var++ < 1000000 && reorder_access->size != sizeof(test_var)) - __kcsan_check_read(&test_var, sizeof(test_var)); - KUNIT_ASSERT_EQ(test, reorder_access->size, sizeof(test_var)); - - kcsan_nestable_atomic_begin(); /* No watchpoints in called functions. */ - - KCSAN_EXPECT_READ_BARRIER(mb(), true); - KCSAN_EXPECT_READ_BARRIER(wmb(), false); - KCSAN_EXPECT_READ_BARRIER(rmb(), true); - KCSAN_EXPECT_READ_BARRIER(smp_mb(), true); - KCSAN_EXPECT_READ_BARRIER(smp_wmb(), false); - KCSAN_EXPECT_READ_BARRIER(smp_rmb(), true); - KCSAN_EXPECT_READ_BARRIER(dma_wmb(), false); - KCSAN_EXPECT_READ_BARRIER(dma_rmb(), true); - KCSAN_EXPECT_READ_BARRIER(smp_mb__before_atomic(), true); - KCSAN_EXPECT_READ_BARRIER(smp_mb__after_atomic(), true); - KCSAN_EXPECT_READ_BARRIER(smp_mb__after_spinlock(), true); - KCSAN_EXPECT_READ_BARRIER(smp_store_mb(test_var, 0), true); - KCSAN_EXPECT_READ_BARRIER(smp_load_acquire(&test_var), false); - KCSAN_EXPECT_READ_BARRIER(smp_store_release(&test_var, 0), true); - KCSAN_EXPECT_READ_BARRIER(xchg(&test_var, 0), true); - KCSAN_EXPECT_READ_BARRIER(xchg_release(&test_var, 0), true); - KCSAN_EXPECT_READ_BARRIER(xchg_relaxed(&test_var, 0), false); - KCSAN_EXPECT_READ_BARRIER(cmpxchg(&test_var, 0, 0), true); - KCSAN_EXPECT_READ_BARRIER(cmpxchg_release(&test_var, 0, 0), true); - KCSAN_EXPECT_READ_BARRIER(cmpxchg_relaxed(&test_var, 0, 0), false); - KCSAN_EXPECT_READ_BARRIER(atomic_read(&dummy), false); - KCSAN_EXPECT_READ_BARRIER(atomic_read_acquire(&dummy), false); - KCSAN_EXPECT_READ_BARRIER(atomic_set(&dummy, 0), false); - KCSAN_EXPECT_READ_BARRIER(atomic_set_release(&dummy, 0), true); - KCSAN_EXPECT_READ_BARRIER(atomic_add(1, &dummy), false); - KCSAN_EXPECT_READ_BARRIER(atomic_add_return(1, &dummy), true); - KCSAN_EXPECT_READ_BARRIER(atomic_add_return_acquire(1, &dummy), false); - KCSAN_EXPECT_READ_BARRIER(atomic_add_return_release(1, &dummy), true); - KCSAN_EXPECT_READ_BARRIER(atomic_add_return_relaxed(1, &dummy), false); - KCSAN_EXPECT_READ_BARRIER(atomic_fetch_add(1, &dummy), true); - KCSAN_EXPECT_READ_BARRIER(atomic_fetch_add_acquire(1, &dummy), false); - KCSAN_EXPECT_READ_BARRIER(atomic_fetch_add_release(1, &dummy), true); - KCSAN_EXPECT_READ_BARRIER(atomic_fetch_add_relaxed(1, &dummy), false); - KCSAN_EXPECT_READ_BARRIER(test_and_set_bit(0, &test_var), true); - KCSAN_EXPECT_READ_BARRIER(test_and_clear_bit(0, &test_var), true); - KCSAN_EXPECT_READ_BARRIER(test_and_change_bit(0, &test_var), true); - KCSAN_EXPECT_READ_BARRIER(clear_bit_unlock(0, &test_var), true); - KCSAN_EXPECT_READ_BARRIER(__clear_bit_unlock(0, &test_var), true); - KCSAN_EXPECT_READ_BARRIER(arch_spin_lock(&arch_spinlock), false); - KCSAN_EXPECT_READ_BARRIER(arch_spin_unlock(&arch_spinlock), true); - KCSAN_EXPECT_READ_BARRIER(spin_lock(&test_spinlock), false); - KCSAN_EXPECT_READ_BARRIER(spin_unlock(&test_spinlock), true); - KCSAN_EXPECT_READ_BARRIER(mutex_lock(&test_mutex), false); - KCSAN_EXPECT_READ_BARRIER(mutex_unlock(&test_mutex), true); - - KCSAN_EXPECT_WRITE_BARRIER(mb(), true); - KCSAN_EXPECT_WRITE_BARRIER(wmb(), true); - KCSAN_EXPECT_WRITE_BARRIER(rmb(), false); - KCSAN_EXPECT_WRITE_BARRIER(smp_mb(), true); - KCSAN_EXPECT_WRITE_BARRIER(smp_wmb(), true); - KCSAN_EXPECT_WRITE_BARRIER(smp_rmb(), false); - KCSAN_EXPECT_WRITE_BARRIER(dma_wmb(), true); - KCSAN_EXPECT_WRITE_BARRIER(dma_rmb(), false); - KCSAN_EXPECT_WRITE_BARRIER(smp_mb__before_atomic(), true); - KCSAN_EXPECT_WRITE_BARRIER(smp_mb__after_atomic(), true); - KCSAN_EXPECT_WRITE_BARRIER(smp_mb__after_spinlock(), true); - KCSAN_EXPECT_WRITE_BARRIER(smp_store_mb(test_var, 0), true); - KCSAN_EXPECT_WRITE_BARRIER(smp_load_acquire(&test_var), false); - KCSAN_EXPECT_WRITE_BARRIER(smp_store_release(&test_var, 0), true); - KCSAN_EXPECT_WRITE_BARRIER(xchg(&test_var, 0), true); - KCSAN_EXPECT_WRITE_BARRIER(xchg_release(&test_var, 0), true); - KCSAN_EXPECT_WRITE_BARRIER(xchg_relaxed(&test_var, 0), false); - KCSAN_EXPECT_WRITE_BARRIER(cmpxchg(&test_var, 0, 0), true); - KCSAN_EXPECT_WRITE_BARRIER(cmpxchg_release(&test_var, 0, 0), true); - KCSAN_EXPECT_WRITE_BARRIER(cmpxchg_relaxed(&test_var, 0, 0), false); - KCSAN_EXPECT_WRITE_BARRIER(atomic_read(&dummy), false); - KCSAN_EXPECT_WRITE_BARRIER(atomic_read_acquire(&dummy), false); - KCSAN_EXPECT_WRITE_BARRIER(atomic_set(&dummy, 0), false); - KCSAN_EXPECT_WRITE_BARRIER(atomic_set_release(&dummy, 0), true); - KCSAN_EXPECT_WRITE_BARRIER(atomic_add(1, &dummy), false); - KCSAN_EXPECT_WRITE_BARRIER(atomic_add_return(1, &dummy), true); - KCSAN_EXPECT_WRITE_BARRIER(atomic_add_return_acquire(1, &dummy), false); - KCSAN_EXPECT_WRITE_BARRIER(atomic_add_return_release(1, &dummy), true); - KCSAN_EXPECT_WRITE_BARRIER(atomic_add_return_relaxed(1, &dummy), false); - KCSAN_EXPECT_WRITE_BARRIER(atomic_fetch_add(1, &dummy), true); - KCSAN_EXPECT_WRITE_BARRIER(atomic_fetch_add_acquire(1, &dummy), false); - KCSAN_EXPECT_WRITE_BARRIER(atomic_fetch_add_release(1, &dummy), true); - KCSAN_EXPECT_WRITE_BARRIER(atomic_fetch_add_relaxed(1, &dummy), false); - KCSAN_EXPECT_WRITE_BARRIER(test_and_set_bit(0, &test_var), true); - KCSAN_EXPECT_WRITE_BARRIER(test_and_clear_bit(0, &test_var), true); - KCSAN_EXPECT_WRITE_BARRIER(test_and_change_bit(0, &test_var), true); - KCSAN_EXPECT_WRITE_BARRIER(clear_bit_unlock(0, &test_var), true); - KCSAN_EXPECT_WRITE_BARRIER(__clear_bit_unlock(0, &test_var), true); - KCSAN_EXPECT_WRITE_BARRIER(arch_spin_lock(&arch_spinlock), false); - KCSAN_EXPECT_WRITE_BARRIER(arch_spin_unlock(&arch_spinlock), true); - KCSAN_EXPECT_WRITE_BARRIER(spin_lock(&test_spinlock), false); - KCSAN_EXPECT_WRITE_BARRIER(spin_unlock(&test_spinlock), true); - KCSAN_EXPECT_WRITE_BARRIER(mutex_lock(&test_mutex), false); - KCSAN_EXPECT_WRITE_BARRIER(mutex_unlock(&test_mutex), true); - - KCSAN_EXPECT_RW_BARRIER(mb(), true); - KCSAN_EXPECT_RW_BARRIER(wmb(), true); - KCSAN_EXPECT_RW_BARRIER(rmb(), true); - KCSAN_EXPECT_RW_BARRIER(smp_mb(), true); - KCSAN_EXPECT_RW_BARRIER(smp_wmb(), true); - KCSAN_EXPECT_RW_BARRIER(smp_rmb(), true); - KCSAN_EXPECT_RW_BARRIER(dma_wmb(), true); - KCSAN_EXPECT_RW_BARRIER(dma_rmb(), true); - KCSAN_EXPECT_RW_BARRIER(smp_mb__before_atomic(), true); - KCSAN_EXPECT_RW_BARRIER(smp_mb__after_atomic(), true); - KCSAN_EXPECT_RW_BARRIER(smp_mb__after_spinlock(), true); - KCSAN_EXPECT_RW_BARRIER(smp_store_mb(test_var, 0), true); - KCSAN_EXPECT_RW_BARRIER(smp_load_acquire(&test_var), false); - KCSAN_EXPECT_RW_BARRIER(smp_store_release(&test_var, 0), true); - KCSAN_EXPECT_RW_BARRIER(xchg(&test_var, 0), true); - KCSAN_EXPECT_RW_BARRIER(xchg_release(&test_var, 0), true); - KCSAN_EXPECT_RW_BARRIER(xchg_relaxed(&test_var, 0), false); - KCSAN_EXPECT_RW_BARRIER(cmpxchg(&test_var, 0, 0), true); - KCSAN_EXPECT_RW_BARRIER(cmpxchg_release(&test_var, 0, 0), true); - KCSAN_EXPECT_RW_BARRIER(cmpxchg_relaxed(&test_var, 0, 0), false); - KCSAN_EXPECT_RW_BARRIER(atomic_read(&dummy), false); - KCSAN_EXPECT_RW_BARRIER(atomic_read_acquire(&dummy), false); - KCSAN_EXPECT_RW_BARRIER(atomic_set(&dummy, 0), false); - KCSAN_EXPECT_RW_BARRIER(atomic_set_release(&dummy, 0), true); - KCSAN_EXPECT_RW_BARRIER(atomic_add(1, &dummy), false); - KCSAN_EXPECT_RW_BARRIER(atomic_add_return(1, &dummy), true); - KCSAN_EXPECT_RW_BARRIER(atomic_add_return_acquire(1, &dummy), false); - KCSAN_EXPECT_RW_BARRIER(atomic_add_return_release(1, &dummy), true); - KCSAN_EXPECT_RW_BARRIER(atomic_add_return_relaxed(1, &dummy), false); - KCSAN_EXPECT_RW_BARRIER(atomic_fetch_add(1, &dummy), true); - KCSAN_EXPECT_RW_BARRIER(atomic_fetch_add_acquire(1, &dummy), false); - KCSAN_EXPECT_RW_BARRIER(atomic_fetch_add_release(1, &dummy), true); - KCSAN_EXPECT_RW_BARRIER(atomic_fetch_add_relaxed(1, &dummy), false); - KCSAN_EXPECT_RW_BARRIER(test_and_set_bit(0, &test_var), true); - KCSAN_EXPECT_RW_BARRIER(test_and_clear_bit(0, &test_var), true); - KCSAN_EXPECT_RW_BARRIER(test_and_change_bit(0, &test_var), true); - KCSAN_EXPECT_RW_BARRIER(clear_bit_unlock(0, &test_var), true); - KCSAN_EXPECT_RW_BARRIER(__clear_bit_unlock(0, &test_var), true); - KCSAN_EXPECT_RW_BARRIER(arch_spin_lock(&arch_spinlock), false); - KCSAN_EXPECT_RW_BARRIER(arch_spin_unlock(&arch_spinlock), true); - KCSAN_EXPECT_RW_BARRIER(spin_lock(&test_spinlock), false); - KCSAN_EXPECT_RW_BARRIER(spin_unlock(&test_spinlock), true); - KCSAN_EXPECT_RW_BARRIER(mutex_lock(&test_mutex), false); - KCSAN_EXPECT_RW_BARRIER(mutex_unlock(&test_mutex), true); - -#ifdef clear_bit_unlock_is_negative_byte - KCSAN_EXPECT_READ_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true); - KCSAN_EXPECT_WRITE_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true); - KCSAN_EXPECT_RW_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true); -#endif - kcsan_nestable_atomic_end(); -} - /* Simple test with normal data race. */ __no_kcsan static void test_basic(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, { test_kernel_read, &test_var, sizeof(test_var), 0 }, }, }; - struct expect_report never = { + static const struct expect_report never = { .access = { { test_kernel_read, &test_var, sizeof(test_var), 0 }, { test_kernel_read, &test_var, sizeof(test_var), 0 }, @@ -738,14 +459,14 @@ static void test_basic(struct kunit *test) __no_kcsan static void test_concurrent_races(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { /* NULL will match any address. */ { test_kernel_rmw_array, NULL, 0, __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) }, { test_kernel_rmw_array, NULL, 0, __KCSAN_ACCESS_RW(0) }, }, }; - struct expect_report never = { + static const struct expect_report never = { .access = { { test_kernel_rmw_array, NULL, 0, 0 }, { test_kernel_rmw_array, NULL, 0, 0 }, @@ -767,24 +488,17 @@ static void test_concurrent_races(struct kunit *test) __no_kcsan static void test_novalue_change(struct kunit *test) { - struct expect_report expect_rw = { + const struct expect_report expect = { .access = { { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, { test_kernel_read, &test_var, sizeof(test_var), 0 }, }, }; - struct expect_report expect_ww = { - .access = { - { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, - { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, - }, - }; bool match_expect = false; - test_kernel_write_nochange(); /* Reset value. */ begin_test_checks(test_kernel_write_nochange, test_kernel_read); do { - match_expect = report_matches(&expect_rw) || report_matches(&expect_ww); + match_expect = report_matches(&expect); } while (!end_test_checks(match_expect)); if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY)) KUNIT_EXPECT_FALSE(test, match_expect); @@ -799,24 +513,17 @@ static void test_novalue_change(struct kunit *test) __no_kcsan static void test_novalue_change_exception(struct kunit *test) { - struct expect_report expect_rw = { + const struct expect_report expect = { .access = { { test_kernel_write_nochange_rcu, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, { test_kernel_read, &test_var, sizeof(test_var), 0 }, }, }; - struct expect_report expect_ww = { - .access = { - { test_kernel_write_nochange_rcu, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, - { test_kernel_write_nochange_rcu, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, - }, - }; bool match_expect = false; - test_kernel_write_nochange_rcu(); /* Reset value. */ begin_test_checks(test_kernel_write_nochange_rcu, test_kernel_read); do { - match_expect = report_matches(&expect_rw) || report_matches(&expect_ww); + match_expect = report_matches(&expect); } while (!end_test_checks(match_expect)); KUNIT_EXPECT_TRUE(test, match_expect); } @@ -825,7 +532,7 @@ static void test_novalue_change_exception(struct kunit *test) __no_kcsan static void test_unknown_origin(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_read, &test_var, sizeof(test_var), 0 }, { NULL }, @@ -847,7 +554,7 @@ static void test_unknown_origin(struct kunit *test) __no_kcsan static void test_write_write_assume_atomic(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, @@ -873,7 +580,7 @@ static void test_write_write_assume_atomic(struct kunit *test) __no_kcsan static void test_write_write_struct(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, @@ -895,7 +602,7 @@ static void test_write_write_struct(struct kunit *test) __no_kcsan static void test_write_write_struct_part(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, { test_kernel_write_struct_part, &test_struct.val[3], sizeof(test_struct.val[3]), KCSAN_ACCESS_WRITE }, @@ -927,7 +634,7 @@ static void test_read_atomic_write_atomic(struct kunit *test) __no_kcsan static void test_read_plain_atomic_write(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_read, &test_var, sizeof(test_var), 0 }, { test_kernel_write_atomic, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC }, @@ -935,7 +642,8 @@ static void test_read_plain_atomic_write(struct kunit *test) }; bool match_expect = false; - KCSAN_TEST_REQUIRES(test, !IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)); + if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) + return; begin_test_checks(test_kernel_read, test_kernel_write_atomic); do { @@ -948,7 +656,7 @@ static void test_read_plain_atomic_write(struct kunit *test) __no_kcsan static void test_read_plain_atomic_rmw(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_read, &test_var, sizeof(test_var), 0 }, { test_kernel_atomic_rmw, &test_var, sizeof(test_var), @@ -957,7 +665,8 @@ static void test_read_plain_atomic_rmw(struct kunit *test) }; bool match_expect = false; - KCSAN_TEST_REQUIRES(test, !IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)); + if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) + return; begin_test_checks(test_kernel_read, test_kernel_atomic_rmw); do { @@ -970,13 +679,13 @@ static void test_read_plain_atomic_rmw(struct kunit *test) __no_kcsan static void test_zero_size_access(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, }, }; - struct expect_report never = { + const struct expect_report never = { .access = { { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, { test_kernel_read_struct_zero_size, &test_struct.val[3], 0, 0 }, @@ -1010,7 +719,7 @@ static void test_data_race(struct kunit *test) __no_kcsan static void test_assert_exclusive_writer(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT }, { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, @@ -1028,7 +737,7 @@ static void test_assert_exclusive_writer(struct kunit *test) __no_kcsan static void test_assert_exclusive_access(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE }, { test_kernel_read, &test_var, sizeof(test_var), 0 }, @@ -1046,19 +755,19 @@ static void test_assert_exclusive_access(struct kunit *test) __no_kcsan static void test_assert_exclusive_access_writer(struct kunit *test) { - struct expect_report expect_access_writer = { + const struct expect_report expect_access_writer = { .access = { { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE }, { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT }, }, }; - struct expect_report expect_access_access = { + const struct expect_report expect_access_access = { .access = { { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE }, { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE }, }, }; - struct expect_report never = { + const struct expect_report never = { .access = { { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT }, { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT }, @@ -1082,7 +791,7 @@ static void test_assert_exclusive_access_writer(struct kunit *test) __no_kcsan static void test_assert_exclusive_bits_change(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_assert_bits_change, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT }, { test_kernel_change_bits, &test_var, sizeof(test_var), @@ -1113,43 +822,43 @@ static void test_assert_exclusive_bits_nochange(struct kunit *test) __no_kcsan static void test_assert_exclusive_writer_scoped(struct kunit *test) { - struct expect_report expect_start = { + const struct expect_report expect_start = { .access = { { test_kernel_assert_writer_scoped, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_SCOPED }, { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, }, }; - struct expect_report expect_inscope = { + const struct expect_report expect_anywhere = { .access = { { test_enter_scope, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_SCOPED }, { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, }, }; bool match_expect_start = false; - bool match_expect_inscope = false; + bool match_expect_anywhere = false; begin_test_checks(test_kernel_assert_writer_scoped, test_kernel_write_nochange); do { match_expect_start |= report_matches(&expect_start); - match_expect_inscope |= report_matches(&expect_inscope); - } while (!end_test_checks(match_expect_inscope)); + match_expect_anywhere |= report_matches(&expect_anywhere); + } while (!end_test_checks(match_expect_start && match_expect_anywhere)); KUNIT_EXPECT_TRUE(test, match_expect_start); - KUNIT_EXPECT_FALSE(test, match_expect_inscope); + KUNIT_EXPECT_TRUE(test, match_expect_anywhere); } __no_kcsan static void test_assert_exclusive_access_scoped(struct kunit *test) { - struct expect_report expect_start1 = { + const struct expect_report expect_start1 = { .access = { { test_kernel_assert_access_scoped, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_SCOPED }, { test_kernel_read, &test_var, sizeof(test_var), 0 }, }, }; - struct expect_report expect_start2 = { + const struct expect_report expect_start2 = { .access = { expect_start1.access[0], expect_start1.access[0] }, }; - struct expect_report expect_inscope = { + const struct expect_report expect_inscope = { .access = { { test_enter_scope, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_SCOPED }, { test_kernel_read, &test_var, sizeof(test_var), 0 }, @@ -1163,9 +872,9 @@ static void test_assert_exclusive_access_scoped(struct kunit *test) do { match_expect_start |= report_matches(&expect_start1) || report_matches(&expect_start2); match_expect_inscope |= report_matches(&expect_inscope); - } while (!end_test_checks(match_expect_inscope)); + } while (!end_test_checks(match_expect_start && match_expect_inscope)); KUNIT_EXPECT_TRUE(test, match_expect_start); - KUNIT_EXPECT_FALSE(test, match_expect_inscope); + KUNIT_EXPECT_TRUE(test, match_expect_inscope); } /* @@ -1254,7 +963,7 @@ static void test_atomic_builtins(struct kunit *test) __no_kcsan static void test_1bit_value_change(struct kunit *test) { - struct expect_report expect = { + const struct expect_report expect = { .access = { { test_kernel_read, &test_var, sizeof(test_var), 0 }, { test_kernel_xor_1bit, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) }, @@ -1274,90 +983,6 @@ static void test_1bit_value_change(struct kunit *test) KUNIT_EXPECT_TRUE(test, match); } -__no_kcsan -static void test_correct_barrier(struct kunit *test) -{ - struct expect_report expect = { - .access = { - { test_kernel_with_memorder, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) }, - { test_kernel_with_memorder, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(0) }, - }, - }; - bool match_expect = false; - - test_struct.val[0] = 0; /* init unlocked */ - begin_test_checks(test_kernel_with_memorder, test_kernel_with_memorder); - do { - match_expect = report_matches_any_reordered(&expect); - } while (!end_test_checks(match_expect)); - KUNIT_EXPECT_FALSE(test, match_expect); -} - -__no_kcsan -static void test_missing_barrier(struct kunit *test) -{ - struct expect_report expect = { - .access = { - { test_kernel_wrong_memorder, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) }, - { test_kernel_wrong_memorder, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(0) }, - }, - }; - bool match_expect = false; - - test_struct.val[0] = 0; /* init unlocked */ - begin_test_checks(test_kernel_wrong_memorder, test_kernel_wrong_memorder); - do { - match_expect = report_matches_any_reordered(&expect); - } while (!end_test_checks(match_expect)); - if (IS_ENABLED(CONFIG_KCSAN_WEAK_MEMORY)) - KUNIT_EXPECT_TRUE(test, match_expect); - else - KUNIT_EXPECT_FALSE(test, match_expect); -} - -__no_kcsan -static void test_atomic_builtins_correct_barrier(struct kunit *test) -{ - struct expect_report expect = { - .access = { - { test_kernel_atomic_builtin_with_memorder, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) }, - { test_kernel_atomic_builtin_with_memorder, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(0) }, - }, - }; - bool match_expect = false; - - test_struct.val[0] = 0; /* init unlocked */ - begin_test_checks(test_kernel_atomic_builtin_with_memorder, - test_kernel_atomic_builtin_with_memorder); - do { - match_expect = report_matches_any_reordered(&expect); - } while (!end_test_checks(match_expect)); - KUNIT_EXPECT_FALSE(test, match_expect); -} - -__no_kcsan -static void test_atomic_builtins_missing_barrier(struct kunit *test) -{ - struct expect_report expect = { - .access = { - { test_kernel_atomic_builtin_wrong_memorder, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) }, - { test_kernel_atomic_builtin_wrong_memorder, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(0) }, - }, - }; - bool match_expect = false; - - test_struct.val[0] = 0; /* init unlocked */ - begin_test_checks(test_kernel_atomic_builtin_wrong_memorder, - test_kernel_atomic_builtin_wrong_memorder); - do { - match_expect = report_matches_any_reordered(&expect); - } while (!end_test_checks(match_expect)); - if (IS_ENABLED(CONFIG_KCSAN_WEAK_MEMORY)) - KUNIT_EXPECT_TRUE(test, match_expect); - else - KUNIT_EXPECT_FALSE(test, match_expect); -} - /* * Generate thread counts for all test cases. Values generated are in interval * [2, 5] followed by exponentially increasing thread counts from 8 to 32. @@ -1407,7 +1032,6 @@ static const void *nthreads_gen_params(const void *prev, char *desc) #define KCSAN_KUNIT_CASE(test_name) KUNIT_CASE_PARAM(test_name, nthreads_gen_params) static struct kunit_case kcsan_test_cases[] = { - KUNIT_CASE(test_barrier_nothreads), KCSAN_KUNIT_CASE(test_basic), KCSAN_KUNIT_CASE(test_concurrent_races), KCSAN_KUNIT_CASE(test_novalue_change), @@ -1432,10 +1056,6 @@ static struct kunit_case kcsan_test_cases[] = { KCSAN_KUNIT_CASE(test_seqlock_noreport), KCSAN_KUNIT_CASE(test_atomic_builtins), KCSAN_KUNIT_CASE(test_1bit_value_change), - KCSAN_KUNIT_CASE(test_correct_barrier), - KCSAN_KUNIT_CASE(test_missing_barrier), - KCSAN_KUNIT_CASE(test_atomic_builtins_correct_barrier), - KCSAN_KUNIT_CASE(test_atomic_builtins_missing_barrier), {}, }; @@ -1500,9 +1120,6 @@ static int test_init(struct kunit *test) observed.nlines = 0; spin_unlock_irqrestore(&observed.lock, flags); - if (strstr(test->name, "nothreads")) - return 0; - if (!torture_init_begin((char *)test->name, 1)) return -EBUSY; @@ -1545,9 +1162,6 @@ static void test_exit(struct kunit *test) struct task_struct **stop_thread; int i; - if (strstr(test->name, "nothreads")) - return; - if (torture_cleanup_begin()) return; @@ -1610,7 +1224,7 @@ static void kcsan_test_exit(void) tracepoint_synchronize_unregister(); } -late_initcall_sync(kcsan_test_init); +late_initcall(kcsan_test_init); module_exit(kcsan_test_exit); MODULE_LICENSE("GPL v2"); diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 6779440404..21137929d4 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -32,7 +31,6 @@ struct access_info { int access_type; int task_pid; int cpu_id; - unsigned long ip; }; /* @@ -215,9 +213,9 @@ static const char *get_access_type(int type) if (type & KCSAN_ACCESS_ASSERT) { if (type & KCSAN_ACCESS_SCOPED) { if (type & KCSAN_ACCESS_WRITE) - return "assert no accesses (reordered)"; + return "assert no accesses (scoped)"; else - return "assert no writes (reordered)"; + return "assert no writes (scoped)"; } else { if (type & KCSAN_ACCESS_WRITE) return "assert no accesses"; @@ -240,17 +238,13 @@ static const char *get_access_type(int type) case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: return "read-write (marked)"; case KCSAN_ACCESS_SCOPED: - return "read (reordered)"; + return "read (scoped)"; case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC: - return "read (marked, reordered)"; + return "read (marked, scoped)"; case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE: - return "write (reordered)"; + return "write (scoped)"; case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: - return "write (marked, reordered)"; - case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE: - return "read-write (reordered)"; - case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: - return "read-write (marked, reordered)"; + return "write (marked, scoped)"; default: BUG(); } @@ -306,52 +300,6 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries return skip; } -/* - * Skips to the first entry that matches the function of @ip, and then replaces - * that entry with @ip, returning the entries to skip with @replaced containing - * the replaced entry. - */ -static int -replace_stack_entry(unsigned long stack_entries[], int num_entries, unsigned long ip, - unsigned long *replaced) -{ - unsigned long symbolsize, offset; - unsigned long target_func; - int skip; - - if (kallsyms_lookup_size_offset(ip, &symbolsize, &offset)) - target_func = ip - offset; - else - goto fallback; - - for (skip = 0; skip < num_entries; ++skip) { - unsigned long func = stack_entries[skip]; - - if (!kallsyms_lookup_size_offset(func, &symbolsize, &offset)) - goto fallback; - func -= offset; - - if (func == target_func) { - *replaced = stack_entries[skip]; - stack_entries[skip] = ip; - return skip; - } - } - -fallback: - /* Should not happen; the resulting stack trace is likely misleading. */ - WARN_ONCE(1, "Cannot find frame for %pS in stack trace", (void *)ip); - return get_stack_skipnr(stack_entries, num_entries); -} - -static int -sanitize_stack_entries(unsigned long stack_entries[], int num_entries, unsigned long ip, - unsigned long *replaced) -{ - return ip ? replace_stack_entry(stack_entries, num_entries, ip, replaced) : - get_stack_skipnr(stack_entries, num_entries); -} - /* Compares symbolized strings of addr1 and addr2. */ static int sym_strcmp(void *addr1, void *addr2) { @@ -364,14 +312,6 @@ static int sym_strcmp(void *addr1, void *addr2) return strncmp(buf1, buf2, sizeof(buf1)); } -static void -print_stack_trace(unsigned long stack_entries[], int num_entries, unsigned long reordered_to) -{ - stack_trace_print(stack_entries, num_entries, 0); - if (reordered_to) - pr_err(" |\n +-> reordered to: %pS\n", (void *)reordered_to); -} - static void print_verbose_info(struct task_struct *task) { if (!task) @@ -387,15 +327,13 @@ static void print_verbose_info(struct task_struct *task) static void print_report(enum kcsan_value_change value_change, const struct access_info *ai, - struct other_info *other_info, + const struct other_info *other_info, u64 old, u64 new, u64 mask) { - unsigned long reordered_to = 0; unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); - int skipnr = sanitize_stack_entries(stack_entries, num_stack_entries, ai->ip, &reordered_to); + int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); unsigned long this_frame = stack_entries[skipnr]; - unsigned long other_reordered_to = 0; unsigned long other_frame = 0; int other_skipnr = 0; /* silence uninit warnings */ @@ -406,9 +344,8 @@ static void print_report(enum kcsan_value_change value_change, return; if (other_info) { - other_skipnr = sanitize_stack_entries(other_info->stack_entries, - other_info->num_stack_entries, - other_info->ai.ip, &other_reordered_to); + other_skipnr = get_stack_skipnr(other_info->stack_entries, + other_info->num_stack_entries); other_frame = other_info->stack_entries[other_skipnr]; /* @value_change is only known for the other thread */ @@ -448,9 +385,10 @@ static void print_report(enum kcsan_value_change value_change, other_info->ai.cpu_id); /* Print the other thread's stack trace. */ - print_stack_trace(other_info->stack_entries + other_skipnr, + stack_trace_print(other_info->stack_entries + other_skipnr, other_info->num_stack_entries - other_skipnr, - other_reordered_to); + 0); + if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) print_verbose_info(other_info->task); @@ -464,7 +402,9 @@ static void print_report(enum kcsan_value_change value_change, get_thread_desc(ai->task_pid), ai->cpu_id); } /* Print stack trace of this thread. */ - print_stack_trace(stack_entries + skipnr, num_stack_entries - skipnr, reordered_to); + stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, + 0); + if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) print_verbose_info(current); @@ -636,23 +576,21 @@ static bool prepare_report_consumer(unsigned long *flags, } static struct access_info prepare_access_info(const volatile void *ptr, size_t size, - int access_type, unsigned long ip) + int access_type) { return (struct access_info) { .ptr = ptr, .size = size, .access_type = access_type, .task_pid = in_task() ? task_pid_nr(current) : -1, - .cpu_id = raw_smp_processor_id(), - /* Only replace stack entry with @ip if scoped access. */ - .ip = (access_type & KCSAN_ACCESS_SCOPED) ? ip : 0, + .cpu_id = raw_smp_processor_id() }; } void kcsan_report_set_info(const volatile void *ptr, size_t size, int access_type, - unsigned long ip, int watchpoint_idx) + int watchpoint_idx) { - const struct access_info ai = prepare_access_info(ptr, size, access_type, ip); + const struct access_info ai = prepare_access_info(ptr, size, access_type); unsigned long flags; kcsan_disable_current(); @@ -665,10 +603,10 @@ void kcsan_report_set_info(const volatile void *ptr, size_t size, int access_typ } void kcsan_report_known_origin(const volatile void *ptr, size_t size, int access_type, - unsigned long ip, enum kcsan_value_change value_change, - int watchpoint_idx, u64 old, u64 new, u64 mask) + enum kcsan_value_change value_change, int watchpoint_idx, + u64 old, u64 new, u64 mask) { - const struct access_info ai = prepare_access_info(ptr, size, access_type, ip); + const struct access_info ai = prepare_access_info(ptr, size, access_type); struct other_info *other_info = &other_infos[watchpoint_idx]; unsigned long flags = 0; @@ -699,9 +637,9 @@ void kcsan_report_known_origin(const volatile void *ptr, size_t size, int access } void kcsan_report_unknown_origin(const volatile void *ptr, size_t size, int access_type, - unsigned long ip, u64 old, u64 new, u64 mask) + u64 old, u64 new, u64 mask) { - const struct access_info ai = prepare_access_info(ptr, size, access_type, ip); + const struct access_info ai = prepare_access_info(ptr, size, access_type); unsigned long flags; kcsan_disable_current(); diff --git a/kernel/kcsan/selftest.c b/kernel/kcsan/selftest.c index 75712959c8..7f29cb0f5e 100644 --- a/kernel/kcsan/selftest.c +++ b/kernel/kcsan/selftest.c @@ -7,15 +7,10 @@ #define pr_fmt(fmt) "kcsan: " fmt -#include -#include #include -#include #include #include #include -#include -#include #include #include "encoding.h" @@ -23,7 +18,7 @@ #define ITERS_PER_TEST 2000 /* Test requirements. */ -static bool __init test_requires(void) +static bool test_requires(void) { /* random should be initialized for the below tests */ return prandom_u32() + prandom_u32() != 0; @@ -33,18 +28,14 @@ static bool __init test_requires(void) * Test watchpoint encode and decode: check that encoding some access's info, * and then subsequent decode preserves the access's info. */ -static bool __init test_encode_decode(void) +static bool test_encode_decode(void) { int i; for (i = 0; i < ITERS_PER_TEST; ++i) { size_t size = prandom_u32_max(MAX_ENCODABLE_SIZE) + 1; bool is_write = !!prandom_u32_max(2); - unsigned long verif_masked_addr; - long encoded_watchpoint; - bool verif_is_write; unsigned long addr; - size_t verif_size; prandom_bytes(&addr, sizeof(addr)); if (addr < PAGE_SIZE) @@ -53,37 +44,53 @@ static bool __init test_encode_decode(void) if (WARN_ON(!check_encodable(addr, size))) return false; - encoded_watchpoint = encode_watchpoint(addr, size, is_write); + /* Encode and decode */ + { + const long encoded_watchpoint = + encode_watchpoint(addr, size, is_write); + unsigned long verif_masked_addr; + size_t verif_size; + bool verif_is_write; - /* Check special watchpoints */ - if (WARN_ON(decode_watchpoint(INVALID_WATCHPOINT, &verif_masked_addr, &verif_size, &verif_is_write))) - return false; - if (WARN_ON(decode_watchpoint(CONSUMED_WATCHPOINT, &verif_masked_addr, &verif_size, &verif_is_write))) - return false; + /* Check special watchpoints */ + if (WARN_ON(decode_watchpoint( + INVALID_WATCHPOINT, &verif_masked_addr, + &verif_size, &verif_is_write))) + return false; + if (WARN_ON(decode_watchpoint( + CONSUMED_WATCHPOINT, &verif_masked_addr, + &verif_size, &verif_is_write))) + return false; - /* Check decoding watchpoint returns same data */ - if (WARN_ON(!decode_watchpoint(encoded_watchpoint, &verif_masked_addr, &verif_size, &verif_is_write))) - return false; - if (WARN_ON(verif_masked_addr != (addr & WATCHPOINT_ADDR_MASK))) - goto fail; - if (WARN_ON(verif_size != size)) - goto fail; - if (WARN_ON(is_write != verif_is_write)) - goto fail; + /* Check decoding watchpoint returns same data */ + if (WARN_ON(!decode_watchpoint( + encoded_watchpoint, &verif_masked_addr, + &verif_size, &verif_is_write))) + return false; + if (WARN_ON(verif_masked_addr != + (addr & WATCHPOINT_ADDR_MASK))) + goto fail; + if (WARN_ON(verif_size != size)) + goto fail; + if (WARN_ON(is_write != verif_is_write)) + goto fail; - continue; + continue; fail: - pr_err("%s fail: %s %zu bytes @ %lx -> encoded: %lx -> %s %zu bytes @ %lx\n", - __func__, is_write ? "write" : "read", size, addr, encoded_watchpoint, - verif_is_write ? "write" : "read", verif_size, verif_masked_addr); - return false; + pr_err("%s fail: %s %zu bytes @ %lx -> encoded: %lx -> %s %zu bytes @ %lx\n", + __func__, is_write ? "write" : "read", size, + addr, encoded_watchpoint, + verif_is_write ? "write" : "read", verif_size, + verif_masked_addr); + return false; + } } return true; } /* Test access matching function. */ -static bool __init test_matching_access(void) +static bool test_matching_access(void) { if (WARN_ON(!matching_access(10, 1, 10, 1))) return false; @@ -108,143 +115,6 @@ static bool __init test_matching_access(void) return true; } -/* - * Correct memory barrier instrumentation is critical to avoiding false - * positives: simple test to check at boot certain barriers are always properly - * instrumented. See kcsan_test for a more complete test. - */ -static DEFINE_SPINLOCK(test_spinlock); -static bool __init test_barrier(void) -{ -#ifdef CONFIG_KCSAN_WEAK_MEMORY - struct kcsan_scoped_access *reorder_access = ¤t->kcsan_ctx.reorder_access; -#else - struct kcsan_scoped_access *reorder_access = NULL; -#endif - bool ret = true; - arch_spinlock_t arch_spinlock = __ARCH_SPIN_LOCK_UNLOCKED; - atomic_t dummy; - long test_var; - - if (!reorder_access || !IS_ENABLED(CONFIG_SMP)) - return true; - -#define __KCSAN_CHECK_BARRIER(access_type, barrier, name) \ - do { \ - reorder_access->type = (access_type) | KCSAN_ACCESS_SCOPED; \ - reorder_access->size = 1; \ - barrier; \ - if (reorder_access->size != 0) { \ - pr_err("improperly instrumented type=(" #access_type "): " name "\n"); \ - ret = false; \ - } \ - } while (0) -#define KCSAN_CHECK_READ_BARRIER(b) __KCSAN_CHECK_BARRIER(0, b, #b) -#define KCSAN_CHECK_WRITE_BARRIER(b) __KCSAN_CHECK_BARRIER(KCSAN_ACCESS_WRITE, b, #b) -#define KCSAN_CHECK_RW_BARRIER(b) __KCSAN_CHECK_BARRIER(KCSAN_ACCESS_WRITE | KCSAN_ACCESS_COMPOUND, b, #b) - - kcsan_nestable_atomic_begin(); /* No watchpoints in called functions. */ - - KCSAN_CHECK_READ_BARRIER(mb()); - KCSAN_CHECK_READ_BARRIER(rmb()); - KCSAN_CHECK_READ_BARRIER(smp_mb()); - KCSAN_CHECK_READ_BARRIER(smp_rmb()); - KCSAN_CHECK_READ_BARRIER(dma_rmb()); - KCSAN_CHECK_READ_BARRIER(smp_mb__before_atomic()); - KCSAN_CHECK_READ_BARRIER(smp_mb__after_atomic()); - KCSAN_CHECK_READ_BARRIER(smp_mb__after_spinlock()); - KCSAN_CHECK_READ_BARRIER(smp_store_mb(test_var, 0)); - KCSAN_CHECK_READ_BARRIER(smp_store_release(&test_var, 0)); - KCSAN_CHECK_READ_BARRIER(xchg(&test_var, 0)); - KCSAN_CHECK_READ_BARRIER(xchg_release(&test_var, 0)); - KCSAN_CHECK_READ_BARRIER(cmpxchg(&test_var, 0, 0)); - KCSAN_CHECK_READ_BARRIER(cmpxchg_release(&test_var, 0, 0)); - KCSAN_CHECK_READ_BARRIER(atomic_set_release(&dummy, 0)); - KCSAN_CHECK_READ_BARRIER(atomic_add_return(1, &dummy)); - KCSAN_CHECK_READ_BARRIER(atomic_add_return_release(1, &dummy)); - KCSAN_CHECK_READ_BARRIER(atomic_fetch_add(1, &dummy)); - KCSAN_CHECK_READ_BARRIER(atomic_fetch_add_release(1, &dummy)); - KCSAN_CHECK_READ_BARRIER(test_and_set_bit(0, &test_var)); - KCSAN_CHECK_READ_BARRIER(test_and_clear_bit(0, &test_var)); - KCSAN_CHECK_READ_BARRIER(test_and_change_bit(0, &test_var)); - KCSAN_CHECK_READ_BARRIER(clear_bit_unlock(0, &test_var)); - KCSAN_CHECK_READ_BARRIER(__clear_bit_unlock(0, &test_var)); - arch_spin_lock(&arch_spinlock); - KCSAN_CHECK_READ_BARRIER(arch_spin_unlock(&arch_spinlock)); - spin_lock(&test_spinlock); - KCSAN_CHECK_READ_BARRIER(spin_unlock(&test_spinlock)); - - KCSAN_CHECK_WRITE_BARRIER(mb()); - KCSAN_CHECK_WRITE_BARRIER(wmb()); - KCSAN_CHECK_WRITE_BARRIER(smp_mb()); - KCSAN_CHECK_WRITE_BARRIER(smp_wmb()); - KCSAN_CHECK_WRITE_BARRIER(dma_wmb()); - KCSAN_CHECK_WRITE_BARRIER(smp_mb__before_atomic()); - KCSAN_CHECK_WRITE_BARRIER(smp_mb__after_atomic()); - KCSAN_CHECK_WRITE_BARRIER(smp_mb__after_spinlock()); - KCSAN_CHECK_WRITE_BARRIER(smp_store_mb(test_var, 0)); - KCSAN_CHECK_WRITE_BARRIER(smp_store_release(&test_var, 0)); - KCSAN_CHECK_WRITE_BARRIER(xchg(&test_var, 0)); - KCSAN_CHECK_WRITE_BARRIER(xchg_release(&test_var, 0)); - KCSAN_CHECK_WRITE_BARRIER(cmpxchg(&test_var, 0, 0)); - KCSAN_CHECK_WRITE_BARRIER(cmpxchg_release(&test_var, 0, 0)); - KCSAN_CHECK_WRITE_BARRIER(atomic_set_release(&dummy, 0)); - KCSAN_CHECK_WRITE_BARRIER(atomic_add_return(1, &dummy)); - KCSAN_CHECK_WRITE_BARRIER(atomic_add_return_release(1, &dummy)); - KCSAN_CHECK_WRITE_BARRIER(atomic_fetch_add(1, &dummy)); - KCSAN_CHECK_WRITE_BARRIER(atomic_fetch_add_release(1, &dummy)); - KCSAN_CHECK_WRITE_BARRIER(test_and_set_bit(0, &test_var)); - KCSAN_CHECK_WRITE_BARRIER(test_and_clear_bit(0, &test_var)); - KCSAN_CHECK_WRITE_BARRIER(test_and_change_bit(0, &test_var)); - KCSAN_CHECK_WRITE_BARRIER(clear_bit_unlock(0, &test_var)); - KCSAN_CHECK_WRITE_BARRIER(__clear_bit_unlock(0, &test_var)); - arch_spin_lock(&arch_spinlock); - KCSAN_CHECK_WRITE_BARRIER(arch_spin_unlock(&arch_spinlock)); - spin_lock(&test_spinlock); - KCSAN_CHECK_WRITE_BARRIER(spin_unlock(&test_spinlock)); - - KCSAN_CHECK_RW_BARRIER(mb()); - KCSAN_CHECK_RW_BARRIER(wmb()); - KCSAN_CHECK_RW_BARRIER(rmb()); - KCSAN_CHECK_RW_BARRIER(smp_mb()); - KCSAN_CHECK_RW_BARRIER(smp_wmb()); - KCSAN_CHECK_RW_BARRIER(smp_rmb()); - KCSAN_CHECK_RW_BARRIER(dma_wmb()); - KCSAN_CHECK_RW_BARRIER(dma_rmb()); - KCSAN_CHECK_RW_BARRIER(smp_mb__before_atomic()); - KCSAN_CHECK_RW_BARRIER(smp_mb__after_atomic()); - KCSAN_CHECK_RW_BARRIER(smp_mb__after_spinlock()); - KCSAN_CHECK_RW_BARRIER(smp_store_mb(test_var, 0)); - KCSAN_CHECK_RW_BARRIER(smp_store_release(&test_var, 0)); - KCSAN_CHECK_RW_BARRIER(xchg(&test_var, 0)); - KCSAN_CHECK_RW_BARRIER(xchg_release(&test_var, 0)); - KCSAN_CHECK_RW_BARRIER(cmpxchg(&test_var, 0, 0)); - KCSAN_CHECK_RW_BARRIER(cmpxchg_release(&test_var, 0, 0)); - KCSAN_CHECK_RW_BARRIER(atomic_set_release(&dummy, 0)); - KCSAN_CHECK_RW_BARRIER(atomic_add_return(1, &dummy)); - KCSAN_CHECK_RW_BARRIER(atomic_add_return_release(1, &dummy)); - KCSAN_CHECK_RW_BARRIER(atomic_fetch_add(1, &dummy)); - KCSAN_CHECK_RW_BARRIER(atomic_fetch_add_release(1, &dummy)); - KCSAN_CHECK_RW_BARRIER(test_and_set_bit(0, &test_var)); - KCSAN_CHECK_RW_BARRIER(test_and_clear_bit(0, &test_var)); - KCSAN_CHECK_RW_BARRIER(test_and_change_bit(0, &test_var)); - KCSAN_CHECK_RW_BARRIER(clear_bit_unlock(0, &test_var)); - KCSAN_CHECK_RW_BARRIER(__clear_bit_unlock(0, &test_var)); - arch_spin_lock(&arch_spinlock); - KCSAN_CHECK_RW_BARRIER(arch_spin_unlock(&arch_spinlock)); - spin_lock(&test_spinlock); - KCSAN_CHECK_RW_BARRIER(spin_unlock(&test_spinlock)); - -#ifdef clear_bit_unlock_is_negative_byte - KCSAN_CHECK_RW_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var)); - KCSAN_CHECK_READ_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var)); - KCSAN_CHECK_WRITE_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var)); -#endif - kcsan_nestable_atomic_end(); - - return ret; -} - static int __init kcsan_selftest(void) { int passed = 0; @@ -262,7 +132,6 @@ static int __init kcsan_selftest(void) RUN_TEST(test_requires); RUN_TEST(test_encode_decode); RUN_TEST(test_matching_access); - RUN_TEST(test_barrier); pr_info("selftest: %d/%d tests passed\n", passed, total); if (passed != total) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 68480f7311..5a5d192a89 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -81,7 +81,7 @@ int kexec_should_crash(struct task_struct *p) if (crash_kexec_post_notifiers) return 0; /* - * There are 4 panic() calls in make_task_dead() path, each of which + * There are 4 panic() calls in do_exit() path, each of which * corresponds to each of these 4 conditions. */ if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 8347fc158d..33400ff051 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -556,11 +556,6 @@ static int kexec_walk_memblock(struct kexec_buf *kbuf, if (kbuf->image->type == KEXEC_TYPE_CRASH) return func(&crashk_res, kbuf); - /* - * Using MEMBLOCK_NONE will properly skip MEMBLOCK_DRIVER_MANAGED. See - * IORESOURCE_SYSRAM_DRIVER_MANAGED handling in - * locate_mem_hole_callback(). - */ if (kbuf->top_down) { for_each_free_mem_range_reverse(i, NUMA_NO_NODE, MEMBLOCK_NONE, &mstart, &mend, NULL) { diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 94cab8c9ce..2ef90d1569 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * Kernel Probes (KProbes) + * kernel/kprobes.c * * Copyright (C) IBM Corporation, 2002, 2004 * @@ -17,9 +18,6 @@ * and Prasanna S Panchamukhi * added function-return probes. */ - -#define pr_fmt(fmt) "kprobes: " fmt - #include #include #include @@ -48,24 +46,21 @@ #define KPROBE_HASH_BITS 6 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) -#if !defined(CONFIG_OPTPROBES) || !defined(CONFIG_SYSCTL) -#define kprobe_sysctls_init() do { } while (0) -#endif static int kprobes_initialized; /* kprobe_table can be accessed by - * - Normal hlist traversal and RCU add/del under 'kprobe_mutex' is held. + * - Normal hlist traversal and RCU add/del under kprobe_mutex is held. * Or * - RCU hlist traversal under disabling preempt (breakpoint handlers) */ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; -/* NOTE: change this value only with 'kprobe_mutex' held */ +/* NOTE: change this value only with kprobe_mutex held */ static bool kprobes_all_disarmed; -/* This protects 'kprobe_table' and 'optimizing_list' */ +/* This protects kprobe_table and optimizing_list */ static DEFINE_MUTEX(kprobe_mutex); -static DEFINE_PER_CPU(struct kprobe *, kprobe_instance); +static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; kprobe_opcode_t * __weak kprobe_lookup_name(const char *name, unsigned int __unused) @@ -73,15 +68,12 @@ kprobe_opcode_t * __weak kprobe_lookup_name(const char *name, return ((kprobe_opcode_t *)(kallsyms_lookup_name(name))); } -/* - * Blacklist -- list of 'struct kprobe_blacklist_entry' to store info where - * kprobes can not probe. - */ +/* Blacklist -- list of struct kprobe_blacklist_entry */ static LIST_HEAD(kprobe_blacklist); #ifdef __ARCH_WANT_KPROBES_INSN_SLOT /* - * 'kprobe::ainsn.insn' points to the copy of the instruction to be + * kprobe->ainsn.insn points to the copy of the instruction to be * single-stepped. x86_64, POWER4 and above have no-exec support and * stepping on the instruction on a vmalloced/kmalloced/data page * is a recipe for disaster @@ -112,12 +104,6 @@ enum kprobe_slot_state { void __weak *alloc_insn_page(void) { - /* - * Use module_alloc() so this page is within +/- 2GB of where the - * kernel image and loaded module images reside. This is required - * for most of the architectures. - * (e.g. x86-64 needs this to handle the %rip-relative fixups.) - */ return module_alloc(PAGE_SIZE); } @@ -153,7 +139,6 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) list_for_each_entry_rcu(kip, &c->pages, list) { if (kip->nused < slots_per_page(c)) { int i; - for (i = 0; i < slots_per_page(c); i++) { if (kip->slot_used[i] == SLOT_CLEAN) { kip->slot_used[i] = SLOT_USED; @@ -179,6 +164,11 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) if (!kip) goto out; + /* + * Use module_alloc so this page is within +/- 2GB of where the + * kernel image and loaded module images reside. This is required + * so x86_64 can correctly handle the %rip-relative fixups. + */ kip->insns = c->alloc(); if (!kip->insns) { kfree(kip); @@ -201,8 +191,8 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) return slot; } -/* Return true if all garbages are collected, otherwise false. */ -static bool collect_one_slot(struct kprobe_insn_page *kip, int idx) +/* Return 1 if all garbages are collected, otherwise 0. */ +static int collect_one_slot(struct kprobe_insn_page *kip, int idx) { kip->slot_used[idx] = SLOT_CLEAN; kip->nused--; @@ -226,9 +216,9 @@ static bool collect_one_slot(struct kprobe_insn_page *kip, int idx) kip->cache->free(kip->insns); kfree(kip); } - return true; + return 1; } - return false; + return 0; } static int collect_garbage_slots(struct kprobe_insn_cache *c) @@ -240,7 +230,6 @@ static int collect_garbage_slots(struct kprobe_insn_cache *c) list_for_each_entry_safe(kip, next, &c->pages, list) { int i; - if (kip->ngarbage == 0) continue; kip->ngarbage = 0; /* we will collect all garbages */ @@ -321,7 +310,7 @@ int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, list_for_each_entry_rcu(kip, &c->pages, list) { if ((*symnum)--) continue; - strscpy(sym, c->sym, KSYM_NAME_LEN); + strlcpy(sym, c->sym, KSYM_NAME_LEN); *type = 't'; *value = (unsigned long)kip->insns; ret = 0; @@ -369,9 +358,9 @@ static inline void reset_kprobe_instance(void) /* * This routine is called either: - * - under the 'kprobe_mutex' - during kprobe_[un]register(). - * OR - * - with preemption disabled - from architecture specific code. + * - under the kprobe_mutex - during kprobe_[un]register() + * OR + * - with preemption disabled - from arch/xxx/kernel/kprobes.c */ struct kprobe *get_kprobe(void *addr) { @@ -391,20 +380,22 @@ NOKPROBE_SYMBOL(get_kprobe); static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs); -/* Return true if 'p' is an aggregator */ -static inline bool kprobe_aggrprobe(struct kprobe *p) +/* Return true if the kprobe is an aggregator */ +static inline int kprobe_aggrprobe(struct kprobe *p) { return p->pre_handler == aggr_pre_handler; } -/* Return true if 'p' is unused */ -static inline bool kprobe_unused(struct kprobe *p) +/* Return true(!0) if the kprobe is unused */ +static inline int kprobe_unused(struct kprobe *p) { return kprobe_aggrprobe(p) && kprobe_disabled(p) && list_empty(&p->list); } -/* Keep all fields in the kprobe consistent. */ +/* + * Keep all fields in the kprobe consistent + */ static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p) { memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t)); @@ -412,11 +403,11 @@ static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p) } #ifdef CONFIG_OPTPROBES -/* NOTE: This is protected by 'kprobe_mutex'. */ +/* NOTE: change this value only with kprobe_mutex held */ static bool kprobes_allow_optimization; /* - * Call all 'kprobe::pre_handler' on the list, but ignores its return value. + * Call all pre_handler on the list, but ignores its return value. * This must be called from arch-dep optimized caller. */ void opt_pre_handler(struct kprobe *p, struct pt_regs *regs) @@ -444,7 +435,7 @@ static void free_aggr_kprobe(struct kprobe *p) kfree(op); } -/* Return true if the kprobe is ready for optimization. */ +/* Return true(!0) if the kprobe is ready for optimization. */ static inline int kprobe_optready(struct kprobe *p) { struct optimized_kprobe *op; @@ -457,8 +448,8 @@ static inline int kprobe_optready(struct kprobe *p) return 0; } -/* Return true if the kprobe is disarmed. Note: p must be on hash list */ -static inline bool kprobe_disarmed(struct kprobe *p) +/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */ +static inline int kprobe_disarmed(struct kprobe *p) { struct optimized_kprobe *op; @@ -471,32 +462,32 @@ static inline bool kprobe_disarmed(struct kprobe *p) return kprobe_disabled(p) && list_empty(&op->list); } -/* Return true if the probe is queued on (un)optimizing lists */ -static bool kprobe_queued(struct kprobe *p) +/* Return true(!0) if the probe is queued on (un)optimizing lists */ +static int kprobe_queued(struct kprobe *p) { struct optimized_kprobe *op; if (kprobe_aggrprobe(p)) { op = container_of(p, struct optimized_kprobe, kp); if (!list_empty(&op->list)) - return true; + return 1; } - return false; + return 0; } /* * Return an optimized kprobe whose optimizing code replaces - * instructions including 'addr' (exclude breakpoint). + * instructions including addr (exclude breakpoint). */ -static struct kprobe *get_optimized_kprobe(kprobe_opcode_t *addr) +static struct kprobe *get_optimized_kprobe(unsigned long addr) { int i; struct kprobe *p = NULL; struct optimized_kprobe *op; /* Don't check i == 0, since that is a breakpoint case. */ - for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH / sizeof(kprobe_opcode_t); i++) - p = get_kprobe(addr - i); + for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++) + p = get_kprobe((void *)(addr - i)); if (p && kprobe_optready(p)) { op = container_of(p, struct optimized_kprobe, kp); @@ -507,7 +498,7 @@ static struct kprobe *get_optimized_kprobe(kprobe_opcode_t *addr) return NULL; } -/* Optimization staging list, protected by 'kprobe_mutex' */ +/* Optimization staging list, protected by kprobe_mutex */ static LIST_HEAD(optimizing_list); static LIST_HEAD(unoptimizing_list); static LIST_HEAD(freeing_list); @@ -518,20 +509,20 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); /* * Optimize (replace a breakpoint with a jump) kprobes listed on - * 'optimizing_list'. + * optimizing_list. */ static void do_optimize_kprobes(void) { lockdep_assert_held(&text_mutex); /* - * The optimization/unoptimization refers 'online_cpus' via - * stop_machine() and cpu-hotplug modifies the 'online_cpus'. - * And same time, 'text_mutex' will be held in cpu-hotplug and here. - * This combination can cause a deadlock (cpu-hotplug tries to lock - * 'text_mutex' but stop_machine() can not be done because - * the 'online_cpus' has been changed) - * To avoid this deadlock, caller must have locked cpu-hotplug - * for preventing cpu-hotplug outside of 'text_mutex' locking. + * The optimization/unoptimization refers online_cpus via + * stop_machine() and cpu-hotplug modifies online_cpus. + * And same time, text_mutex will be held in cpu-hotplug and here. + * This combination can cause a deadlock (cpu-hotplug try to lock + * text_mutex but stop_machine can not be done because online_cpus + * has been changed) + * To avoid this deadlock, caller must have locked cpu hotplug + * for preventing cpu-hotplug outside of text_mutex locking. */ lockdep_assert_cpus_held(); @@ -545,7 +536,7 @@ static void do_optimize_kprobes(void) /* * Unoptimize (replace a jump with a breakpoint and remove the breakpoint - * if need) kprobes listed on 'unoptimizing_list'. + * if need) kprobes listed on unoptimizing_list. */ static void do_unoptimize_kprobes(void) { @@ -560,7 +551,7 @@ static void do_unoptimize_kprobes(void) return; arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); - /* Loop on 'freeing_list' for disarming */ + /* Loop free_list for disarming */ list_for_each_entry_safe(op, tmp, &freeing_list, list) { /* Switching from detour code to origin */ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; @@ -571,7 +562,7 @@ static void do_unoptimize_kprobes(void) /* * Remove unused probes from hash list. After waiting * for synchronization, these probes are reclaimed. - * (reclaiming is done by do_free_cleaned_kprobes().) + * (reclaiming is done by do_free_cleaned_kprobes.) */ hlist_del_rcu(&op->kp.hlist); } else @@ -579,7 +570,7 @@ static void do_unoptimize_kprobes(void) } } -/* Reclaim all kprobes on the 'freeing_list' */ +/* Reclaim all kprobes on the free_list */ static void do_free_cleaned_kprobes(void) { struct optimized_kprobe *op, *tmp; @@ -651,9 +642,9 @@ void wait_for_kprobe_optimizer(void) while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) { mutex_unlock(&kprobe_mutex); - /* This will also make 'optimizing_work' execute immmediately */ + /* this will also make optimizing_work execute immmediately */ flush_delayed_work(&optimizing_work); - /* 'optimizing_work' might not have been queued yet, relax */ + /* @optimizing_work might not have been queued yet, relax */ cpu_relax(); mutex_lock(&kprobe_mutex); @@ -684,7 +675,7 @@ static void optimize_kprobe(struct kprobe *p) (kprobe_disabled(p) || kprobes_all_disarmed)) return; - /* kprobes with 'post_handler' can not be optimized */ + /* kprobes with post_handler can not be optimized */ if (p->post_handler) return; @@ -704,10 +695,7 @@ static void optimize_kprobe(struct kprobe *p) } op->kp.flags |= KPROBE_FLAG_OPTIMIZED; - /* - * On the 'unoptimizing_list' and 'optimizing_list', - * 'op' must have OPTIMIZED flag - */ + /* On unoptimizing/optimizing_list, op must have OPTIMIZED flag */ if (WARN_ON_ONCE(!list_empty(&op->list))) return; @@ -777,7 +765,7 @@ static int reuse_unused_kprobe(struct kprobe *ap) WARN_ON_ONCE(list_empty(&op->list)); /* Enable the probe again */ ap->flags &= ~KPROBE_FLAG_DISABLED; - /* Optimize it again. (remove from 'op->list') */ + /* Optimize it again (remove from op->list) */ if (!kprobe_optready(ap)) return -EINVAL; @@ -827,7 +815,7 @@ static void prepare_optimized_kprobe(struct kprobe *p) __prepare_optimized_kprobe(op, p); } -/* Allocate new optimized_kprobe and try to prepare optimized instructions. */ +/* Allocate new optimized_kprobe and try to prepare optimized instructions */ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) { struct optimized_kprobe *op; @@ -846,19 +834,19 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p); /* - * Prepare an optimized_kprobe and optimize it. - * NOTE: 'p' must be a normal registered kprobe. + * Prepare an optimized_kprobe and optimize it + * NOTE: p must be a normal registered kprobe */ static void try_to_optimize_kprobe(struct kprobe *p) { struct kprobe *ap; struct optimized_kprobe *op; - /* Impossible to optimize ftrace-based kprobe. */ + /* Impossible to optimize ftrace-based kprobe */ if (kprobe_ftrace(p)) return; - /* For preparing optimization, jump_label_text_reserved() is called. */ + /* For preparing optimization, jump_label_text_reserved() is called */ cpus_read_lock(); jump_label_lock(); mutex_lock(&text_mutex); @@ -869,14 +857,14 @@ static void try_to_optimize_kprobe(struct kprobe *p) op = container_of(ap, struct optimized_kprobe, kp); if (!arch_prepared_optinsn(&op->optinsn)) { - /* If failed to setup optimizing, fallback to kprobe. */ + /* If failed to setup optimizing, fallback to kprobe */ arch_remove_optimized_kprobe(op); kfree(op); goto out; } init_aggr_kprobe(ap, p); - optimize_kprobe(ap); /* This just kicks optimizer thread. */ + optimize_kprobe(ap); /* This just kicks optimizer thread */ out: mutex_unlock(&text_mutex); @@ -891,7 +879,7 @@ static void optimize_all_kprobes(void) unsigned int i; mutex_lock(&kprobe_mutex); - /* If optimization is already allowed, just return. */ + /* If optimization is already allowed, just return */ if (kprobes_allow_optimization) goto out; @@ -904,7 +892,7 @@ static void optimize_all_kprobes(void) optimize_kprobe(p); } cpus_read_unlock(); - pr_info("kprobe jump-optimization is enabled. All kprobes are optimized if possible.\n"); + printk(KERN_INFO "Kprobes globally optimized\n"); out: mutex_unlock(&kprobe_mutex); } @@ -917,7 +905,7 @@ static void unoptimize_all_kprobes(void) unsigned int i; mutex_lock(&kprobe_mutex); - /* If optimization is already prohibited, just return. */ + /* If optimization is already prohibited, just return */ if (!kprobes_allow_optimization) { mutex_unlock(&kprobe_mutex); return; @@ -935,16 +923,16 @@ static void unoptimize_all_kprobes(void) cpus_read_unlock(); mutex_unlock(&kprobe_mutex); - /* Wait for unoptimizing completion. */ + /* Wait for unoptimizing completion */ wait_for_kprobe_optimizer(); - pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n"); + printk(KERN_INFO "Kprobes globally unoptimized\n"); } static DEFINE_MUTEX(kprobe_sysctl_mutex); -static int sysctl_kprobes_optimization; -static int proc_kprobes_optimization_handler(struct ctl_table *table, - int write, void *buffer, - size_t *length, loff_t *ppos) +int sysctl_kprobes_optimization; +int proc_kprobes_optimization_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, + loff_t *ppos) { int ret; @@ -960,35 +948,15 @@ static int proc_kprobes_optimization_handler(struct ctl_table *table, return ret; } - -static struct ctl_table kprobe_sysctls[] = { - { - .procname = "kprobes-optimization", - .data = &sysctl_kprobes_optimization, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_kprobes_optimization_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - {} -}; - -static void __init kprobe_sysctls_init(void) -{ - register_sysctl_init("debug", kprobe_sysctls); -} #endif /* CONFIG_SYSCTL */ -/* Put a breakpoint for a probe. */ +/* Put a breakpoint for a probe. Must be called with text_mutex locked */ static void __arm_kprobe(struct kprobe *p) { struct kprobe *_p; - lockdep_assert_held(&text_mutex); - - /* Find the overlapping optimized kprobes. */ - _p = get_optimized_kprobe(p->addr); + /* Check collision with other optimized kprobes */ + _p = get_optimized_kprobe((unsigned long)p->addr); if (unlikely(_p)) /* Fallback to unoptimized kprobe */ unoptimize_kprobe(_p, true); @@ -997,29 +965,22 @@ static void __arm_kprobe(struct kprobe *p) optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */ } -/* Remove the breakpoint of a probe. */ +/* Remove the breakpoint of a probe. Must be called with text_mutex locked */ static void __disarm_kprobe(struct kprobe *p, bool reopt) { struct kprobe *_p; - lockdep_assert_held(&text_mutex); - /* Try to unoptimize */ unoptimize_kprobe(p, kprobes_all_disarmed); if (!kprobe_queued(p)) { arch_disarm_kprobe(p); - /* If another kprobe was blocked, re-optimize it. */ - _p = get_optimized_kprobe(p->addr); + /* If another kprobe was blocked, optimize it. */ + _p = get_optimized_kprobe((unsigned long)p->addr); if (unlikely(_p) && reopt) optimize_kprobe(_p); } - /* - * TODO: Since unoptimization and real disarming will be done by - * the worker thread, we can not check whether another probe are - * unoptimized because of this probe here. It should be re-optimized - * by the worker thread. - */ + /* TODO: reoptimize others after unoptimized this probe */ } #else /* !CONFIG_OPTPROBES */ @@ -1042,7 +1003,7 @@ static int reuse_unused_kprobe(struct kprobe *ap) * unregistered. * Thus there should be no chance to reuse unused kprobe. */ - WARN_ON_ONCE(1); + printk(KERN_ERR "Error: There should be no unused kprobe here.\n"); return -EINVAL; } @@ -1072,21 +1033,34 @@ static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = { static int kprobe_ipmodify_enabled; static int kprobe_ftrace_enabled; +/* Must ensure p->addr is really on ftrace */ +static int prepare_kprobe(struct kprobe *p) +{ + if (!kprobe_ftrace(p)) + return arch_prepare_kprobe(p); + + return arch_prepare_kprobe_ftrace(p); +} + +/* Caller must lock kprobe_mutex */ static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, int *cnt) { int ret = 0; - lockdep_assert_held(&kprobe_mutex); - ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0); - if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret)) + if (ret) { + pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n", + p->addr, ret); return ret; + } if (*cnt == 0) { ret = register_ftrace_function(ops); - if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret)) + if (ret) { + pr_debug("Failed to init kprobe-ftrace (%d)\n", ret); goto err_ftrace; + } } (*cnt)++; @@ -1110,23 +1084,22 @@ static int arm_kprobe_ftrace(struct kprobe *p) ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled); } +/* Caller must lock kprobe_mutex */ static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, int *cnt) { int ret = 0; - lockdep_assert_held(&kprobe_mutex); - if (*cnt == 1) { ret = unregister_ftrace_function(ops); - if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (error %d)\n", ret)) + if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret)) return ret; } (*cnt)--; ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0); - WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (error %d)\n", + WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n", p->addr, ret); return ret; } @@ -1140,6 +1113,11 @@ static int disarm_kprobe_ftrace(struct kprobe *p) ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled); } #else /* !CONFIG_KPROBES_ON_FTRACE */ +static inline int prepare_kprobe(struct kprobe *p) +{ + return arch_prepare_kprobe(p); +} + static inline int arm_kprobe_ftrace(struct kprobe *p) { return -ENODEV; @@ -1151,15 +1129,7 @@ static inline int disarm_kprobe_ftrace(struct kprobe *p) } #endif -static int prepare_kprobe(struct kprobe *p) -{ - /* Must ensure p->addr is really on ftrace */ - if (kprobe_ftrace(p)) - return arch_prepare_kprobe_ftrace(p); - - return arch_prepare_kprobe(p); -} - +/* Arm a kprobe with text_mutex */ static int arm_kprobe(struct kprobe *kp) { if (unlikely(kprobe_ftrace(kp))) @@ -1174,6 +1144,7 @@ static int arm_kprobe(struct kprobe *kp) return 0; } +/* Disarm a kprobe with text_mutex */ static int disarm_kprobe(struct kprobe *kp, bool reopt) { if (unlikely(kprobe_ftrace(kp))) @@ -1223,17 +1194,17 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, } NOKPROBE_SYMBOL(aggr_post_handler); -/* Walks the list and increments 'nmissed' if 'p' has child probes. */ +/* Walks the list and increments nmissed count for multiprobe case */ void kprobes_inc_nmissed_count(struct kprobe *p) { struct kprobe *kp; - if (!kprobe_aggrprobe(p)) { p->nmissed++; } else { list_for_each_entry_rcu(kp, &p->list, list) kp->nmissed++; } + return; } NOKPROBE_SYMBOL(kprobes_inc_nmissed_count); @@ -1251,9 +1222,9 @@ static void recycle_rp_inst(struct kretprobe_instance *ri) { struct kretprobe *rp = get_kretprobe(ri); - if (likely(rp)) + if (likely(rp)) { freelist_add(&ri->freelist, &rp->freelist); - else + } else call_rcu(&ri->rcu, free_rp_inst_rcu); } NOKPROBE_SYMBOL(recycle_rp_inst); @@ -1279,10 +1250,10 @@ void kprobe_busy_end(void) } /* - * This function is called from delayed_put_task_struct() when a task is - * dead and cleaned up to recycle any kretprobe instances associated with - * this task. These left over instances represent probed functions that - * have been called but will never return. + * This function is called from finish_task_switch when task tk becomes dead, + * so that we can recycle any function-return probe instances associated + * with this task. These left over instances represent probed functions + * that have been called but will never return. */ void kprobe_flush_task(struct task_struct *tk) { @@ -1328,7 +1299,7 @@ static inline void free_rp_inst(struct kretprobe *rp) } } -/* Add the new probe to 'ap->list'. */ +/* Add the new probe to ap->list */ static int add_new_kprobe(struct kprobe *ap, struct kprobe *p) { if (p->post_handler) @@ -1342,12 +1313,12 @@ static int add_new_kprobe(struct kprobe *ap, struct kprobe *p) } /* - * Fill in the required fields of the aggregator kprobe. Replace the - * earlier kprobe in the hlist with the aggregator kprobe. + * Fill in the required fields of the "manager kprobe". Replace the + * earlier kprobe in the hlist with the manager kprobe */ static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) { - /* Copy the insn slot of 'p' to 'ap'. */ + /* Copy p's insn slot to ap */ copy_kprobe(p, ap); flush_insn_slot(ap); ap->addr = p->addr; @@ -1365,7 +1336,8 @@ static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) } /* - * This registers the second or subsequent kprobe at the same address. + * This is the second or subsequent kprobe at the address - handle + * the intricacies */ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) { @@ -1379,7 +1351,7 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) mutex_lock(&text_mutex); if (!kprobe_aggrprobe(orig_p)) { - /* If 'orig_p' is not an 'aggr_kprobe', create new one. */ + /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */ ap = alloc_aggr_kprobe(orig_p); if (!ap) { ret = -ENOMEM; @@ -1404,8 +1376,8 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) if (ret) /* * Even if fail to allocate new slot, don't need to - * free the 'ap'. It will be used next time, or - * freed by unregister_kprobe(). + * free aggr_probe. It will be used next time, or + * freed by unregister_kprobe. */ goto out; @@ -1420,7 +1392,7 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) | KPROBE_FLAG_DISABLED; } - /* Copy the insn slot of 'p' to 'ap'. */ + /* Copy ap's insn slot to p */ copy_kprobe(ap, p); ret = add_new_kprobe(ap, p); @@ -1446,7 +1418,7 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) bool __weak arch_within_kprobe_blacklist(unsigned long addr) { - /* The '__kprobes' functions and entry code must not be probed. */ + /* The __kprobes marked functions and entry code must not be probed */ return addr >= (unsigned long)__kprobes_text_start && addr < (unsigned long)__kprobes_text_end; } @@ -1458,8 +1430,8 @@ static bool __within_kprobe_blacklist(unsigned long addr) if (arch_within_kprobe_blacklist(addr)) return true; /* - * If 'kprobe_blacklist' is defined, check the address and - * reject any probe registration in the prohibited area. + * If there exists a kprobe_blacklist, verify and + * fail any probe registration in the prohibited area */ list_for_each_entry(ent, &kprobe_blacklist, list) { if (addr >= ent->start_addr && addr < ent->end_addr) @@ -1489,7 +1461,7 @@ bool within_kprobe_blacklist(unsigned long addr) } /* - * If 'symbol_name' is specified, look it up and add the 'offset' + * If we have a symbol_name argument, look it up and add the offset field * to it. This way, we can specify a relative address to a symbol. * This returns encoded errors if it fails to look up symbol or invalid * combination of parameters. @@ -1519,10 +1491,7 @@ static kprobe_opcode_t *kprobe_addr(struct kprobe *p) return _kprobe_addr(p->addr, p->symbol_name, p->offset); } -/* - * Check the 'p' is valid and return the aggregator kprobe - * at the same address. - */ +/* Check passed kprobe is valid and return kprobe in kprobe_table. */ static struct kprobe *__get_valid_kprobe(struct kprobe *p) { struct kprobe *ap, *list_p; @@ -1560,7 +1529,7 @@ static inline int warn_kprobe_rereg(struct kprobe *p) return ret; } -static int check_ftrace_location(struct kprobe *p) +int __weak arch_check_ftrace_location(struct kprobe *p) { unsigned long ftrace_addr; @@ -1583,7 +1552,7 @@ static int check_kprobe_address_safe(struct kprobe *p, { int ret; - ret = check_ftrace_location(p); + ret = arch_check_ftrace_location(p); if (ret) return ret; jump_label_lock(); @@ -1599,7 +1568,7 @@ static int check_kprobe_address_safe(struct kprobe *p, goto out; } - /* Check if 'p' is probing a module. */ + /* Check if are we probing a module */ *probed_mod = __module_text_address((unsigned long) p->addr); if (*probed_mod) { /* @@ -1612,7 +1581,7 @@ static int check_kprobe_address_safe(struct kprobe *p, } /* - * If the module freed '.init.text', we couldn't insert + * If the module freed .init.text, we couldn't insert * kprobes in there. */ if (within_module_init((unsigned long)p->addr, *probed_mod) && @@ -1659,7 +1628,7 @@ int register_kprobe(struct kprobe *p) old_p = get_kprobe(p->addr); if (old_p) { - /* Since this may unoptimize 'old_p', locking 'text_mutex'. */ + /* Since this may unoptimize old_p, locking text_mutex. */ ret = register_aggr_kprobe(old_p, p); goto out; } @@ -1698,8 +1667,8 @@ int register_kprobe(struct kprobe *p) } EXPORT_SYMBOL_GPL(register_kprobe); -/* Check if all probes on the 'ap' are disabled. */ -static bool aggr_kprobe_disabled(struct kprobe *ap) +/* Check if all probes on the aggrprobe are disabled */ +static int aggr_kprobe_disabled(struct kprobe *ap) { struct kprobe *kp; @@ -1708,21 +1677,20 @@ static bool aggr_kprobe_disabled(struct kprobe *ap) list_for_each_entry(kp, &ap->list, list) if (!kprobe_disabled(kp)) /* - * Since there is an active probe on the list, - * we can't disable this 'ap'. + * There is an active probe on the list. + * We can't disable this ap. */ - return false; + return 0; - return true; + return 1; } +/* Disable one kprobe: Make sure called under kprobe_mutex is locked */ static struct kprobe *__disable_kprobe(struct kprobe *p) { struct kprobe *orig_p; int ret; - lockdep_assert_held(&kprobe_mutex); - /* Get an original kprobe for return */ orig_p = __get_valid_kprobe(p); if (unlikely(orig_p == NULL)) @@ -1736,7 +1704,7 @@ static struct kprobe *__disable_kprobe(struct kprobe *p) /* Try to disarm and disable this/parent probe */ if (p == orig_p || aggr_kprobe_disabled(orig_p)) { /* - * If 'kprobes_all_disarmed' is set, 'orig_p' + * If kprobes_all_disarmed is set, orig_p * should have already been disarmed, so * skip unneed disarming process. */ @@ -1882,105 +1850,53 @@ static struct notifier_block kprobe_exceptions_nb = { .priority = 0x7fffffff /* we need to be notified first */ }; +unsigned long __weak arch_deref_entry_point(void *entry) +{ + return (unsigned long)entry; +} + #ifdef CONFIG_KRETPROBES -/* This assumes the 'tsk' is the current task or the is not running. */ -static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk, - struct llist_node **cur) -{ - struct kretprobe_instance *ri = NULL; - struct llist_node *node = *cur; - - if (!node) - node = tsk->kretprobe_instances.first; - else - node = node->next; - - while (node) { - ri = container_of(node, struct kretprobe_instance, llist); - if (ri->ret_addr != kretprobe_trampoline_addr()) { - *cur = node; - return ri->ret_addr; - } - node = node->next; - } - return NULL; -} -NOKPROBE_SYMBOL(__kretprobe_find_ret_addr); - -/** - * kretprobe_find_ret_addr -- Find correct return address modified by kretprobe - * @tsk: Target task - * @fp: A frame pointer - * @cur: a storage of the loop cursor llist_node pointer for next call - * - * Find the correct return address modified by a kretprobe on @tsk in unsigned - * long type. If it finds the return address, this returns that address value, - * or this returns 0. - * The @tsk must be 'current' or a task which is not running. @fp is a hint - * to get the currect return address - which is compared with the - * kretprobe_instance::fp field. The @cur is a loop cursor for searching the - * kretprobe return addresses on the @tsk. The '*@cur' should be NULL at the - * first call, but '@cur' itself must NOT NULL. - */ -unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, - struct llist_node **cur) -{ - struct kretprobe_instance *ri = NULL; - kprobe_opcode_t *ret; - - if (WARN_ON_ONCE(!cur)) - return 0; - - do { - ret = __kretprobe_find_ret_addr(tsk, cur); - if (!ret) - break; - ri = container_of(*cur, struct kretprobe_instance, llist); - } while (ri->fp != fp); - - return (unsigned long)ret; -} -NOKPROBE_SYMBOL(kretprobe_find_ret_addr); - -void __weak arch_kretprobe_fixup_return(struct pt_regs *regs, - kprobe_opcode_t *correct_ret_addr) -{ - /* - * Do nothing by default. Please fill this to update the fake return - * address on the stack with the correct one on each arch if possible. - */ -} - unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, + void *trampoline_address, void *frame_pointer) { kprobe_opcode_t *correct_ret_addr = NULL; struct kretprobe_instance *ri = NULL; - struct llist_node *first, *node = NULL; + struct llist_node *first, *node; struct kretprobe *rp; - /* Find correct address and all nodes for this frame. */ - correct_ret_addr = __kretprobe_find_ret_addr(current, &node); - if (!correct_ret_addr) { - pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n"); - BUG_ON(1); + /* Find all nodes for this frame. */ + first = node = current->kretprobe_instances.first; + while (node) { + ri = container_of(node, struct kretprobe_instance, llist); + + BUG_ON(ri->fp != frame_pointer); + + if (ri->ret_addr != trampoline_address) { + correct_ret_addr = ri->ret_addr; + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + goto found; + } + + node = node->next; } + pr_err("Oops! Kretprobe fails to find correct return address.\n"); + BUG_ON(1); - /* - * Set the return address as the instruction pointer, because if the - * user handler calls stack_trace_save_regs() with this 'regs', - * the stack trace will start from the instruction pointer. - */ - instruction_pointer_set(regs, (unsigned long)correct_ret_addr); +found: + /* Unlink all nodes for this frame. */ + current->kretprobe_instances.first = node->next; + node->next = NULL; - /* Run the user handler of the nodes. */ - first = current->kretprobe_instances.first; + /* Run them.. */ while (first) { ri = container_of(first, struct kretprobe_instance, llist); - - if (WARN_ON_ONCE(ri->fp != frame_pointer)) - break; + first = first->next; rp = get_kretprobe(ri); if (rp && rp->handler) { @@ -1991,23 +1907,6 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, rp->handler(ri, regs); __this_cpu_write(current_kprobe, prev); } - if (first == node) - break; - - first = first->next; - } - - arch_kretprobe_fixup_return(regs, correct_ret_addr); - - /* Unlink all nodes for this frame. */ - first = current->kretprobe_instances.first; - current->kretprobe_instances.first = node->next; - node->next = NULL; - - /* Recycle free instances. */ - while (first) { - ri = container_of(first, struct kretprobe_instance, llist); - first = first->next; recycle_rp_inst(ri); } @@ -2092,7 +1991,7 @@ int register_kretprobe(struct kretprobe *rp) if (ret) return ret; - /* If only 'rp->kp.addr' is specified, check reregistering kprobes */ + /* If only rp->kp.addr is specified, check reregistering kprobes */ if (rp->kp.addr && warn_kprobe_rereg(&rp->kp)) return -EINVAL; @@ -2200,13 +2099,13 @@ EXPORT_SYMBOL_GPL(unregister_kretprobes); #else /* CONFIG_KRETPROBES */ int register_kretprobe(struct kretprobe *rp) { - return -EOPNOTSUPP; + return -ENOSYS; } EXPORT_SYMBOL_GPL(register_kretprobe); int register_kretprobes(struct kretprobe **rps, int num) { - return -EOPNOTSUPP; + return -ENOSYS; } EXPORT_SYMBOL_GPL(register_kretprobes); @@ -2255,7 +2154,7 @@ static void kill_kprobe(struct kprobe *p) /* * The module is going away. We should disarm the kprobe which * is using ftrace, because ftrace framework is still available at - * 'MODULE_STATE_GOING' notification. + * MODULE_STATE_GOING notification. */ if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed) disarm_kprobe_ftrace(p); @@ -2318,7 +2217,8 @@ EXPORT_SYMBOL_GPL(enable_kprobe); /* Caller must NOT call this in usual path. This is only for critical case */ void dump_kprobe(struct kprobe *kp) { - pr_err("Dump kprobe:\n.symbol_name = %s, .offset = %x, .addr = %pS\n", + pr_err("Dumping kprobe:\n"); + pr_err("Name: %s\nOffset: %x\nAddress: %pS\n", kp->symbol_name, kp->offset, kp->addr); } NOKPROBE_SYMBOL(dump_kprobe); @@ -2420,7 +2320,7 @@ static int __init populate_kprobe_blacklist(unsigned long *start, int ret; for (iter = start; iter < end; iter++) { - entry = (unsigned long)dereference_symbol_descriptor((void *)*iter); + entry = arch_deref_entry_point((void *)*iter); ret = kprobe_add_ksym_blacklist(entry); if (ret == -EINVAL) continue; @@ -2428,13 +2328,13 @@ static int __init populate_kprobe_blacklist(unsigned long *start, return ret; } - /* Symbols in '__kprobes_text' are blacklisted */ + /* Symbols in __kprobes_text are blacklisted */ ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start, (unsigned long)__kprobes_text_end); if (ret) return ret; - /* Symbols in 'noinstr' section are blacklisted */ + /* Symbols in noinstr section are blacklisted */ ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start, (unsigned long)__noinstr_text_end); @@ -2506,9 +2406,9 @@ static int kprobes_module_callback(struct notifier_block *nb, return NOTIFY_DONE; /* - * When 'MODULE_STATE_GOING' was notified, both of module '.text' and - * '.init.text' sections would be freed. When 'MODULE_STATE_LIVE' was - * notified, only '.init.text' section would be freed. We need to + * When MODULE_STATE_GOING was notified, both of module .text and + * .init.text sections would be freed. When MODULE_STATE_LIVE was + * notified, only .init.text section would be freed. We need to * disable kprobes which have been inserted in the sections. */ mutex_lock(&kprobe_mutex); @@ -2525,9 +2425,9 @@ static int kprobes_module_callback(struct notifier_block *nb, * * Note, this will also move any optimized probes * that are pending to be removed from their - * corresponding lists to the 'freeing_list' and + * corresponding lists to the freeing_list and * will not be touched by the delayed - * kprobe_optimizer() work handler. + * kprobe_optimizer work handler. */ kill_kprobe(p); } @@ -2543,6 +2443,10 @@ static struct notifier_block kprobe_module_nb = { .priority = 0 }; +/* Markers of _kprobe_blacklist section */ +extern unsigned long __start_kprobe_blacklist[]; +extern unsigned long __stop_kprobe_blacklist[]; + void kprobe_free_init_mem(void) { void *start = (void *)(&__init_begin); @@ -2553,7 +2457,7 @@ void kprobe_free_init_mem(void) mutex_lock(&kprobe_mutex); - /* Kill all kprobes on initmem because the target code has been freed. */ + /* Kill all kprobes on initmem */ for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry(p, head, hlist) { @@ -2576,8 +2480,10 @@ static int __init init_kprobes(void) err = populate_kprobe_blacklist(__start_kprobe_blacklist, __stop_kprobe_blacklist); - if (err) - pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err); + if (err) { + pr_err("kprobes: failed to populate blacklist: %d\n", err); + pr_err("Please take care of using kprobes.\n"); + } if (kretprobe_blacklist_size) { /* lookup the function address from its name */ @@ -2585,7 +2491,7 @@ static int __init init_kprobes(void) kretprobe_blacklist[i].addr = kprobe_lookup_name(kretprobe_blacklist[i].name, 0); if (!kretprobe_blacklist[i].addr) - pr_err("Failed to lookup symbol '%s' for kretprobe blacklist. Maybe the target function is removed or renamed.\n", + printk("kretprobe: lookup failed: %s\n", kretprobe_blacklist[i].name); } } @@ -2594,7 +2500,7 @@ static int __init init_kprobes(void) kprobes_all_disarmed = false; #if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT) - /* Init 'kprobe_optinsn_slots' for allocation */ + /* Init kprobe_optinsn_slots for allocation */ kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; #endif @@ -2605,7 +2511,9 @@ static int __init init_kprobes(void) err = register_module_notifier(&kprobe_module_nb); kprobes_initialized = (err == 0); - kprobe_sysctls_init(); + + if (!err) + init_test_probes(); return err; } early_initcall(init_kprobes); @@ -2726,7 +2634,7 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) list_entry(v, struct kprobe_blacklist_entry, list); /* - * If '/proc/kallsyms' is not showing kernel address, we won't + * If /proc/kallsyms is not showing kernel address, we won't * show them here either. */ if (!kallsyms_show_value(m->file->f_cred)) @@ -2787,7 +2695,7 @@ static int arm_all_kprobes(void) } if (errors) - pr_warn("Kprobes globally enabled, but failed to enable %d out of %d probes. Please check which kprobes are kept disabled via debugfs.\n", + pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n", errors, total); else pr_info("Kprobes globally enabled\n"); @@ -2830,7 +2738,7 @@ static int disarm_all_kprobes(void) } if (errors) - pr_warn("Kprobes globally disabled, but failed to disable %d out of %d probes. Please check which kprobes are kept enabled via debugfs.\n", + pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n", errors, total); else pr_info("Kprobes globally disabled\n"); @@ -2865,14 +2773,30 @@ static ssize_t read_enabled_file_bool(struct file *file, static ssize_t write_enabled_file_bool(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { - bool enable; - int ret; + char buf[32]; + size_t buf_size; + int ret = 0; - ret = kstrtobool_from_user(user_buf, count, &enable); - if (ret) - return ret; + buf_size = min(count, (sizeof(buf)-1)); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + switch (buf[0]) { + case 'y': + case 'Y': + case '1': + ret = arm_all_kprobes(); + break; + case 'n': + case 'N': + case '0': + ret = disarm_all_kprobes(); + break; + default: + return -EINVAL; + } - ret = enable ? arm_all_kprobes() : disarm_all_kprobes(); if (ret) return ret; diff --git a/kernel/kthread.c b/kernel/kthread.c index 38c6dd822d..5b37a85671 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -52,7 +52,6 @@ struct kthread_create_info struct kthread { unsigned long flags; unsigned int cpu; - int result; int (*threadfn)(void *); void *data; mm_segment_t oldfs; @@ -61,8 +60,6 @@ struct kthread { #ifdef CONFIG_BLK_CGROUP struct cgroup_subsys_state *blkcg_css; #endif - /* To store the full name if task comm is truncated. */ - char *full_name; }; enum KTHREAD_BITS { @@ -74,7 +71,7 @@ enum KTHREAD_BITS { static inline struct kthread *to_kthread(struct task_struct *k) { WARN_ON(!(k->flags & PF_KTHREAD)); - return k->worker_private; + return (__force void *)k->set_child_tid; } /* @@ -82,7 +79,7 @@ static inline struct kthread *to_kthread(struct task_struct *k) * * Per construction; when: * - * (p->flags & PF_KTHREAD) && p->worker_private + * (p->flags & PF_KTHREAD) && p->set_child_tid * * the task is both a kthread and struct kthread is persistent. However * PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and @@ -90,41 +87,26 @@ static inline struct kthread *to_kthread(struct task_struct *k) */ static inline struct kthread *__to_kthread(struct task_struct *p) { - void *kthread = p->worker_private; + void *kthread = (__force void *)p->set_child_tid; if (kthread && !(p->flags & PF_KTHREAD)) kthread = NULL; return kthread; } -void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk) -{ - struct kthread *kthread = to_kthread(tsk); - - if (!kthread || !kthread->full_name) { - __get_task_comm(buf, buf_size, tsk); - return; - } - - strscpy_pad(buf, kthread->full_name, buf_size); -} - -bool set_kthread_struct(struct task_struct *p) +void set_kthread_struct(struct task_struct *p) { struct kthread *kthread; - if (WARN_ON_ONCE(to_kthread(p))) - return false; + if (__to_kthread(p)) + return; kthread = kzalloc(sizeof(*kthread), GFP_KERNEL); - if (!kthread) - return false; - - init_completion(&kthread->exited); - init_completion(&kthread->parked); - p->vfork_done = &kthread->exited; - - p->worker_private = kthread; - return true; + /* + * We abuse ->set_child_tid to avoid the new member and because it + * can't be wrongly copied by copy_process(). We also rely on fact + * that the caller can't exec, so PF_KTHREAD can't be cleared. + */ + p->set_child_tid = (__force void __user *)kthread; } void free_kthread_struct(struct task_struct *k) @@ -132,17 +114,13 @@ void free_kthread_struct(struct task_struct *k) struct kthread *kthread; /* - * Can be NULL if kmalloc() in set_kthread_struct() failed. + * Can be NULL if this kthread was created by kernel_thread() + * or if kmalloc() in kthread() failed. */ kthread = to_kthread(k); - if (!kthread) - return; - #ifdef CONFIG_BLK_CGROUP - WARN_ON_ONCE(kthread->blkcg_css); + WARN_ON_ONCE(kthread && kthread->blkcg_css); #endif - k->worker_private = NULL; - kfree(kthread->full_name); kfree(kthread); } @@ -290,47 +268,8 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); -/** - * kthread_exit - Cause the current kthread return @result to kthread_stop(). - * @result: The integer value to return to kthread_stop(). - * - * While kthread_exit can be called directly, it exists so that - * functions which do some additional work in non-modular code such as - * module_put_and_kthread_exit can be implemented. - * - * Does not return. - */ -void __noreturn kthread_exit(long result) -{ - struct kthread *kthread = to_kthread(current); - kthread->result = result; - do_exit(0); -} - -/** - * kthread_complete_and_exit - Exit the current kthread. - * @comp: Completion to complete - * @code: The integer value to return to kthread_stop(). - * - * If present complete @comp and the reuturn code to kthread_stop(). - * - * A kernel thread whose module may be removed after the completion of - * @comp can use this function exit safely. - * - * Does not return. - */ -void __noreturn kthread_complete_and_exit(struct completion *comp, long code) -{ - if (comp) - complete(comp); - - kthread_exit(code); -} -EXPORT_SYMBOL(kthread_complete_and_exit); - static int kthread(void *_create) { - static const struct sched_param param = { .sched_priority = 0 }; /* Copy data: it's on kthread's stack */ struct kthread_create_info *create = _create; int (*threadfn)(void *data) = create->threadfn; @@ -339,24 +278,27 @@ static int kthread(void *_create) struct kthread *self; int ret; + set_kthread_struct(current); self = to_kthread(current); /* If user was SIGKILLed, I release the structure. */ done = xchg(&create->done, NULL); if (!done) { kfree(create); - kthread_exit(-EINTR); + do_exit(-EINTR); + } + + if (!self) { + create->result = ERR_PTR(-ENOMEM); + complete(done); + do_exit(-ENOMEM); } self->threadfn = threadfn; self->data = data; - - /* - * The new thread inherited kthreadd's priority and CPU mask. Reset - * back to default in case they have been changed. - */ - sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m); - set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD)); + init_completion(&self->exited); + init_completion(&self->parked); + current->vfork_done = &self->exited; /* OK, tell user we're spawned, wait for stop or wakeup */ __set_current_state(TASK_UNINTERRUPTIBLE); @@ -376,7 +318,7 @@ static int kthread(void *_create) __kthread_parkme(self); ret = threadfn(data); } - kthread_exit(ret); + do_exit(ret); } /* called from kernel_clone() to get node information for about to be created task */ @@ -455,24 +397,22 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), } task = create->result; if (!IS_ERR(task)) { + static const struct sched_param param = { .sched_priority = 0 }; char name[TASK_COMM_LEN]; - va_list aq; - int len; /* * task is already visible to other tasks, so updating * COMM must be protected. */ - va_copy(aq, args); - len = vsnprintf(name, sizeof(name), namefmt, aq); - va_end(aq); - if (len >= TASK_COMM_LEN) { - struct kthread *kthread = to_kthread(task); - - /* leave it truncated when out of memory. */ - kthread->full_name = kvasprintf(GFP_KERNEL, namefmt, args); - } + vsnprintf(name, sizeof(name), namefmt, args); set_task_comm(task, name); + /* + * root may have changed our (kthreadd's) priority or CPU mask. + * The kernel thread should not inherit these properties. + */ + sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m); + set_cpus_allowed_ptr(task, + housekeeping_cpumask(HK_FLAG_KTHREAD)); } kfree(create); return task; @@ -493,7 +433,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), * If thread is going to be bound on a particular cpu, give its node * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE. * When woken, the thread will run @threadfn() with @data as its - * argument. @threadfn() can either return directly if it is a + * argument. @threadfn() can either call do_exit() directly if it is a * standalone thread for which no one will call kthread_stop(), or * return when 'kthread_should_stop()' is true (which means * kthread_stop() has been called). The return value should be zero @@ -583,7 +523,6 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), to_kthread(p)->cpu = cpu; return p; } -EXPORT_SYMBOL(kthread_create_on_cpu); void kthread_set_per_cpu(struct task_struct *k, int cpu) { @@ -688,7 +627,7 @@ EXPORT_SYMBOL_GPL(kthread_park); * instead of calling wake_up_process(): the thread will exit without * calling threadfn(). * - * If threadfn() may call kthread_exit() itself, the caller must ensure + * If threadfn() may call do_exit() itself, the caller must ensure * task_struct can't go away. * * Returns the result of threadfn(), or %-EINTR if wake_up_process() @@ -707,7 +646,7 @@ int kthread_stop(struct task_struct *k) kthread_unpark(k); wake_up_process(k); wait_for_completion(&kthread->exited); - ret = kthread->result; + ret = k->exit_code; put_task_struct(k); trace_sched_kthread_stop_ret(ret); diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 585494ec46..335d988bd8 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -862,11 +862,14 @@ static void klp_init_object_early(struct klp_patch *patch, list_add_tail(&obj->node, &patch->obj_list); } -static void klp_init_patch_early(struct klp_patch *patch) +static int klp_init_patch_early(struct klp_patch *patch) { struct klp_object *obj; struct klp_func *func; + if (!patch->objs) + return -EINVAL; + INIT_LIST_HEAD(&patch->list); INIT_LIST_HEAD(&patch->obj_list); kobject_init(&patch->kobj, &klp_ktype_patch); @@ -876,12 +879,20 @@ static void klp_init_patch_early(struct klp_patch *patch) init_completion(&patch->finish); klp_for_each_object_static(patch, obj) { + if (!obj->funcs) + return -EINVAL; + klp_init_object_early(patch, obj); klp_for_each_func_static(obj, func) { klp_init_func_early(obj, func); } } + + if (!try_module_get(patch->mod)) + return -ENODEV; + + return 0; } static int klp_init_patch(struct klp_patch *patch) @@ -1013,17 +1024,10 @@ static int __klp_enable_patch(struct klp_patch *patch) int klp_enable_patch(struct klp_patch *patch) { int ret; - struct klp_object *obj; - if (!patch || !patch->mod || !patch->objs) + if (!patch || !patch->mod) return -EINVAL; - klp_for_each_object_static(patch, obj) { - if (!obj->funcs) - return -EINVAL; - } - - if (!is_livepatch_module(patch->mod)) { pr_err("module %s is not marked as a livepatch module\n", patch->mod->name); @@ -1047,13 +1051,12 @@ int klp_enable_patch(struct klp_patch *patch) return -EINVAL; } - if (!try_module_get(patch->mod)) { + ret = klp_init_patch_early(patch); + if (ret) { mutex_unlock(&klp_mutex); - return -ENODEV; + return ret; } - klp_init_patch_early(patch); - ret = klp_init_patch(patch); if (ret) goto err; diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index fe316c021d..e8029aea67 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -49,15 +49,14 @@ static void notrace klp_ftrace_handler(unsigned long ip, ops = container_of(fops, struct klp_ops, fops); - /* - * The ftrace_test_recursion_trylock() will disable preemption, - * which is required for the variant of synchronize_rcu() that is - * used to allow patching functions where RCU is not watching. - * See klp_synchronize_transition() for more details. - */ bit = ftrace_test_recursion_trylock(ip, parent_ip); if (WARN_ON_ONCE(bit < 0)) return; + /* + * A variant of synchronize_rcu() is used to allow patching functions + * where RCU is not watching, see klp_synchronize_transition(). + */ + preempt_disable_notrace(); func = list_first_or_null_rcu(&ops->func_stack, struct klp_func, stack_node); @@ -121,6 +120,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, klp_arch_set_pc(fregs, (unsigned long)func->new_func); unlock: + preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index 5683ac0d25..291b857a6e 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -13,6 +13,7 @@ #include "core.h" #include "patch.h" #include "transition.h" +#include "../sched/sched.h" #define MAX_STACK_ENTRIES 100 #define STACK_ERR_BUF_SIZE 128 @@ -239,7 +240,7 @@ static int klp_check_stack_func(struct klp_func *func, unsigned long *entries, * Determine whether it's safe to transition the task to the target patch state * by looking for any to-be-patched or to-be-unpatched functions on its stack. */ -static int klp_check_stack(struct task_struct *task, const char **oldname) +static int klp_check_stack(struct task_struct *task, char *err_buf) { static unsigned long entries[MAX_STACK_ENTRIES]; struct klp_object *obj; @@ -247,8 +248,12 @@ static int klp_check_stack(struct task_struct *task, const char **oldname) int ret, nr_entries; ret = stack_trace_save_tsk_reliable(task, entries, ARRAY_SIZE(entries)); - if (ret < 0) - return -EINVAL; + if (ret < 0) { + snprintf(err_buf, STACK_ERR_BUF_SIZE, + "%s: %s:%d has an unreliable stack\n", + __func__, task->comm, task->pid); + return ret; + } nr_entries = ret; klp_for_each_object(klp_transition_patch, obj) { @@ -257,8 +262,11 @@ static int klp_check_stack(struct task_struct *task, const char **oldname) klp_for_each_func(obj, func) { ret = klp_check_stack_func(func, entries, nr_entries); if (ret) { - *oldname = func->old_name; - return -EADDRINUSE; + snprintf(err_buf, STACK_ERR_BUF_SIZE, + "%s: %s:%d is sleeping on function %s\n", + __func__, task->comm, task->pid, + func->old_name); + return ret; } } } @@ -266,22 +274,6 @@ static int klp_check_stack(struct task_struct *task, const char **oldname) return 0; } -static int klp_check_and_switch_task(struct task_struct *task, void *arg) -{ - int ret; - - if (task_curr(task) && task != current) - return -EBUSY; - - ret = klp_check_stack(task, arg); - if (ret) - return ret; - - clear_tsk_thread_flag(task, TIF_PATCH_PENDING); - task->patch_state = klp_target_state; - return 0; -} - /* * Try to safely switch a task to the target patch state. If it's currently * running, or it's sleeping on a to-be-patched or to-be-unpatched function, or @@ -289,8 +281,13 @@ static int klp_check_and_switch_task(struct task_struct *task, void *arg) */ static bool klp_try_switch_task(struct task_struct *task) { - const char *old_name; + static char err_buf[STACK_ERR_BUF_SIZE]; + struct rq *rq; + struct rq_flags flags; int ret; + bool success = false; + + err_buf[0] = '\0'; /* check if this task has already switched over */ if (task->patch_state == klp_target_state) @@ -308,31 +305,36 @@ static bool klp_try_switch_task(struct task_struct *task) * functions. If all goes well, switch the task to the target patch * state. */ - ret = task_call_func(task, klp_check_and_switch_task, &old_name); - switch (ret) { - case 0: /* success */ - break; + rq = task_rq_lock(task, &flags); - case -EBUSY: /* klp_check_and_switch_task() */ - pr_debug("%s: %s:%d is running\n", - __func__, task->comm, task->pid); - break; - case -EINVAL: /* klp_check_and_switch_task() */ - pr_debug("%s: %s:%d has an unreliable stack\n", - __func__, task->comm, task->pid); - break; - case -EADDRINUSE: /* klp_check_and_switch_task() */ - pr_debug("%s: %s:%d is sleeping on function %s\n", - __func__, task->comm, task->pid, old_name); - break; - - default: - pr_debug("%s: Unknown error code (%d) when trying to switch %s:%d\n", - __func__, ret, task->comm, task->pid); - break; + if (task_running(rq, task) && task != current) { + snprintf(err_buf, STACK_ERR_BUF_SIZE, + "%s: %s:%d is running\n", __func__, task->comm, + task->pid); + goto done; } - return !ret; + ret = klp_check_stack(task, err_buf); + if (ret) + goto done; + + success = true; + + clear_tsk_thread_flag(task, TIF_PATCH_PENDING); + task->patch_state = klp_target_state; + +done: + task_rq_unlock(rq, task, &flags); + + /* + * Due to console deadlock issues, pr_debug() can't be used while + * holding the task rq lock. Instead we have to use a temporary buffer + * and print the debug message after releasing the lock. + */ + if (err_buf[0] != '\0') + pr_debug("%s", err_buf); + + return success; } /* @@ -413,11 +415,8 @@ void klp_try_complete_transition(void) for_each_possible_cpu(cpu) { task = idle_task(cpu); if (cpu_online(cpu)) { - if (!klp_try_switch_task(task)) { + if (!klp_try_switch_task(task)) complete = false; - /* Make idle task go through the main loop. */ - wake_up_if_idle(cpu); - } } else if (task->patch_state != klp_target_state) { /* offline idle tasks can be switched immediately */ clear_tsk_thread_flag(task, TIF_PATCH_PENDING); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f8a0212189..92127296cf 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -788,21 +788,6 @@ static int very_verbose(struct lock_class *class) * Is this the address of a static object: */ #ifdef __KERNEL__ -/* - * Check if an address is part of freed initmem. After initmem is freed, - * memory can be allocated from it, and such allocations would then have - * addresses within the range [_stext, _end]. - */ -#ifndef arch_is_kernel_initmem_freed -static int arch_is_kernel_initmem_freed(unsigned long addr) -{ - if (system_state < SYSTEM_FREEING_INITMEM) - return 0; - - return init_section_contains((void *)addr, 1); -} -#endif - static int static_obj(const void *obj) { unsigned long start = (unsigned long) &_stext, @@ -818,6 +803,9 @@ static int static_obj(const void *obj) if ((addr >= start) && (addr < end)) return 1; + if (arch_is_kernel_data(addr)) + return 1; + /* * in-kernel percpu var? */ @@ -4683,7 +4671,7 @@ print_lock_invalid_wait_context(struct task_struct *curr, /* * Verify the wait_type context. * - * This check validates we take locks in the right wait-type order; that is it + * This check validates we takes locks in the right wait-type order; that is it * ensures that we do not take mutexes inside spinlocks and do not attempt to * acquire spinlocks inside raw_spinlocks and the sort. * @@ -5485,7 +5473,6 @@ static noinstr void check_flags(unsigned long flags) } } -#ifndef CONFIG_PREEMPT_RT /* * We dont accurately track softirq state in e.g. * hardirq contexts (such as on 4KSTACKS), so only @@ -5500,7 +5487,6 @@ static noinstr void check_flags(unsigned long flags) DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); } } -#endif if (!debug_locks) print_irqtrace_events(current); diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 9c2fb613a5..7c5a4a087c 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -1022,23 +1022,23 @@ static int __init lock_torture_init(void) if (onoff_interval > 0) { firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval * HZ, NULL); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (shuffle_interval > 0) { firsterr = torture_shuffle_init(shuffle_interval); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (shutdown_secs > 0) { firsterr = torture_shutdown_init(shutdown_secs, lock_torture_cleanup); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (stutter > 0) { firsterr = torture_stutter_init(stutter, stutter); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } @@ -1047,7 +1047,7 @@ static int __init lock_torture_init(void) sizeof(writer_tasks[0]), GFP_KERNEL); if (writer_tasks == NULL) { - TOROUT_ERRSTRING("writer_tasks: Out of memory"); + VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); firsterr = -ENOMEM; goto unwind; } @@ -1058,7 +1058,7 @@ static int __init lock_torture_init(void) sizeof(reader_tasks[0]), GFP_KERNEL); if (reader_tasks == NULL) { - TOROUT_ERRSTRING("reader_tasks: Out of memory"); + VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory"); kfree(writer_tasks); writer_tasks = NULL; firsterr = -ENOMEM; @@ -1082,7 +1082,7 @@ static int __init lock_torture_init(void) /* Create writer. */ firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i], writer_tasks[i]); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; create_reader: @@ -1091,13 +1091,13 @@ static int __init lock_torture_init(void) /* Create reader. */ firsterr = torture_create_kthread(lock_torture_reader, &cxt.lrsa[j], reader_tasks[j]); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (stat_interval > 0) { firsterr = torture_create_kthread(lock_torture_stats, NULL, stats_task); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } torture_init_end(); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 5e3585950e..d456579d09 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -94,9 +94,6 @@ static inline unsigned long __owner_flags(unsigned long owner) return owner & MUTEX_FLAGS; } -/* - * Returns: __mutex_owner(lock) on failure or NULL on success. - */ static inline struct task_struct *__mutex_trylock_common(struct mutex *lock, bool handoff) { unsigned long owner, curr = (unsigned long)current; @@ -351,23 +348,21 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, { bool ret = true; - lockdep_assert_preemption_disabled(); - + rcu_read_lock(); while (__mutex_owner(lock) == owner) { /* * Ensure we emit the owner->on_cpu, dereference _after_ - * checking lock->owner still matches owner. And we already - * disabled preemption which is equal to the RCU read-side - * crital section in optimistic spinning code. Thus the - * task_strcut structure won't go away during the spinning - * period + * checking lock->owner still matches owner. If that fails, + * owner might point to freed memory. If it still matches, + * the rcu_read_lock() ensures the memory stays valid. */ barrier(); /* * Use vcpu_is_preempted to detect lock holder preemption issue. */ - if (!owner_on_cpu(owner) || need_resched()) { + if (!owner->on_cpu || need_resched() || + vcpu_is_preempted(task_cpu(owner))) { ret = false; break; } @@ -379,6 +374,7 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, cpu_relax(); } + rcu_read_unlock(); return ret; } @@ -391,19 +387,19 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) struct task_struct *owner; int retval = 1; - lockdep_assert_preemption_disabled(); - if (need_resched()) return 0; - /* - * We already disabled preemption which is equal to the RCU read-side - * crital section in optimistic spinning code. Thus the task_strcut - * structure won't go away during the spinning period. - */ + rcu_read_lock(); owner = __mutex_owner(lock); + + /* + * As lock holder preemption issue, we both skip spinning if task is not + * on cpu or its cpu is preempted + */ if (owner) - retval = owner_on_cpu(owner); + retval = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); + rcu_read_unlock(); /* * If lock->owner is not set, the mutex has been released. Return true @@ -740,44 +736,6 @@ __ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass, return __mutex_lock_common(lock, state, subclass, NULL, ip, ww_ctx, true); } -/** - * ww_mutex_trylock - tries to acquire the w/w mutex with optional acquire context - * @ww: mutex to lock - * @ww_ctx: optional w/w acquire context - * - * Trylocks a mutex with the optional acquire context; no deadlock detection is - * possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise. - * - * Unlike ww_mutex_lock, no deadlock handling is performed. However, if a @ctx is - * specified, -EALREADY handling may happen in calls to ww_mutex_trylock. - * - * A mutex acquired with this function must be released with ww_mutex_unlock. - */ -int ww_mutex_trylock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) -{ - if (!ww_ctx) - return mutex_trylock(&ww->base); - - MUTEX_WARN_ON(ww->base.magic != &ww->base); - - /* - * Reset the wounded flag after a kill. No other process can - * race and wound us here, since they can't have a valid owner - * pointer if we don't have any locks held. - */ - if (ww_ctx->acquired == 0) - ww_ctx->wounded = 0; - - if (__mutex_trylock(&ww->base)) { - ww_mutex_set_context_fastpath(ww, ww_ctx); - mutex_acquire_nest(&ww->base.dep_map, 0, 1, &ww_ctx->dep_map, _RET_IP_); - return 1; - } - - return 0; -} -EXPORT_SYMBOL(ww_mutex_trylock); - #ifdef CONFIG_DEBUG_LOCK_ALLOC void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 8555c4efe9..ea5a701ab2 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -446,24 +446,17 @@ static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) } /* RT mutex specific wake_q wrappers */ -static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh, - struct task_struct *task, - unsigned int wake_state) -{ - if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) { - if (IS_ENABLED(CONFIG_PROVE_LOCKING)) - WARN_ON_ONCE(wqh->rtlock_task); - get_task_struct(task); - wqh->rtlock_task = task; - } else { - wake_q_add(&wqh->head, task); - } -} - static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, struct rt_mutex_waiter *w) { - rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state); + if (IS_ENABLED(CONFIG_PREEMPT_RT) && w->wake_state != TASK_NORMAL) { + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) + WARN_ON_ONCE(wqh->rtlock_task); + get_task_struct(w->task); + wqh->rtlock_task = w->task; + } else { + wake_q_add(&wqh->head, w->task); + } } static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) @@ -1103,11 +1096,8 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, * the other will detect the deadlock and return -EDEADLOCK, * which is wrong, as the other waiter is not in a deadlock * situation. - * - * Except for ww_mutex, in that case the chain walk must already deal - * with spurious cycles, see the comments at [3] and [6]. */ - if (owner == task && !(build_ww_mutex() && ww_ctx)) + if (owner == task) return -EDEADLK; raw_spin_lock(&task->pi_lock); @@ -1382,8 +1372,9 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, * for CONFIG_PREEMPT_RCU=y) * - the VCPU on which owner runs is preempted */ - if (!owner_on_cpu(owner) || need_resched() || - !rt_mutex_waiter_is_top_waiter(lock, waiter)) { + if (!owner->on_cpu || need_resched() || + !rt_mutex_waiter_is_top_waiter(lock, waiter) || + vcpu_is_preempted(task_cpu(owner))) { res = false; break; } diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c index 900220941c..5c9299aaab 100644 --- a/kernel/locking/rtmutex_api.c +++ b/kernel/locking/rtmutex_api.c @@ -21,13 +21,12 @@ int max_lock_depth = 1024; */ static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock, unsigned int state, - struct lockdep_map *nest_lock, unsigned int subclass) { int ret; might_sleep(); - mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, _RET_IP_); + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); ret = __rt_mutex_lock(&lock->rtmutex, state); if (ret) mutex_release(&lock->dep_map, _RET_IP_); @@ -49,16 +48,10 @@ EXPORT_SYMBOL(rt_mutex_base_init); */ void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) { - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass); + __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); } EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); -void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock) -{ - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0); -} -EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock); - #else /* !CONFIG_DEBUG_LOCK_ALLOC */ /** @@ -68,7 +61,7 @@ EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock); */ void __sched rt_mutex_lock(struct rt_mutex *lock) { - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0); + __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); } EXPORT_SYMBOL_GPL(rt_mutex_lock); #endif @@ -84,25 +77,10 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); */ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) { - return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, NULL, 0); + return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0); } EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); -/** - * rt_mutex_lock_killable - lock a rt_mutex killable - * - * @lock: the rt_mutex to be locked - * - * Returns: - * 0 on success - * -EINTR when interrupted by a signal - */ -int __sched rt_mutex_lock_killable(struct rt_mutex *lock) -{ - return __rt_mutex_lock_common(lock, TASK_KILLABLE, NULL, 0); -} -EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); - /** * rt_mutex_trylock - try to lock a rt_mutex * diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 6fd3162e40..88191f6e25 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -59,7 +59,8 @@ static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb) * set. */ for (r = atomic_read(&rwb->readers); r < 0;) { - if (likely(atomic_try_cmpxchg_acquire(&rwb->readers, &r, r + 1))) + /* Fully-ordered if cmpxchg() succeeds, provides ACQUIRE */ + if (likely(atomic_try_cmpxchg(&rwb->readers, &r, r + 1))) return 1; } return 0; @@ -147,7 +148,6 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, { struct rt_mutex_base *rtm = &rwb->rtmutex; struct task_struct *owner; - DEFINE_RT_WAKE_Q(wqh); raw_spin_lock_irq(&rtm->wait_lock); /* @@ -158,12 +158,9 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, */ owner = rt_mutex_owner(rtm); if (owner) - rt_mutex_wake_q_add_task(&wqh, owner, state); + wake_up_state(owner, state); - /* Pairs with the preempt_enable in rt_mutex_wake_up_q() */ - preempt_disable(); raw_spin_unlock_irq(&rtm->wait_lock); - rt_mutex_wake_up_q(&wqh); } static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb, @@ -186,7 +183,7 @@ static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias, /* * _release() is needed in case that reader is in fast path, pairing - * with atomic_try_cmpxchg_acquire() in rwbase_read_trylock(). + * with atomic_try_cmpxchg() in rwbase_read_trylock(), provides RELEASE */ (void)atomic_add_return_release(READER_BIAS - bias, &rwb->readers); raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 69aba4abe1..e63f740c2c 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -56,6 +56,7 @@ * * A fast path reader optimistic lock stealing is supported when the rwsem * is previously owned by a writer and the following conditions are met: + * - OSQ is empty * - rwsem is not currently writer owned * - the handoff isn't set. */ @@ -508,7 +509,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, /* * Limit # of readers that can be woken up per wakeup call. */ - if (unlikely(woken >= MAX_READERS_WAKEUP)) + if (woken >= MAX_READERS_WAKEUP) break; } @@ -658,6 +659,15 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) return false; } +static inline bool owner_on_cpu(struct task_struct *owner) +{ + /* + * As lock holder preemption issue, we both skip spinning if + * task is not on cpu or its cpu is preempted + */ + return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); +} + static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner; @@ -670,10 +680,7 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) } preempt_disable(); - /* - * Disable preemption is equal to the RCU read-side crital section, - * thus the task_strcut structure won't go away. - */ + rcu_read_lock(); owner = rwsem_owner_flags(sem, &flags); /* * Don't check the read-owner as the entry may be stale. @@ -681,6 +688,7 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) if ((flags & RWSEM_NONSPINNABLE) || (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner))) ret = false; + rcu_read_unlock(); preempt_enable(); lockevent_cond_inc(rwsem_opt_fail, !ret); @@ -708,13 +716,12 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) unsigned long flags, new_flags; enum owner_state state; - lockdep_assert_preemption_disabled(); - owner = rwsem_owner_flags(sem, &flags); state = rwsem_owner_state(owner, flags); if (state != OWNER_WRITER) return state; + rcu_read_lock(); for (;;) { /* * When a waiting writer set the handoff flag, it may spin @@ -732,9 +739,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) * Ensure we emit the owner->on_cpu, dereference _after_ * checking sem->owner still matches owner, if that fails, * owner might point to free()d memory, if it still matches, - * our spinning context already disabled preemption which is - * equal to RCU read-side crital section ensures the memory - * stays valid. + * the rcu_read_lock() ensures the memory stays valid. */ barrier(); @@ -745,6 +750,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) cpu_relax(); } + rcu_read_unlock(); return state; } @@ -1239,14 +1245,17 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); - tmp = atomic_long_read(&sem->count); - while (!(tmp & RWSEM_READ_FAILED_MASK)) { + /* + * Optimize for the case when the rwsem is not locked at all. + */ + tmp = RWSEM_UNLOCKED_VALUE; + do { if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, - tmp + RWSEM_READER_BIAS)) { + tmp + RWSEM_READER_BIAS)) { rwsem_set_reader_owned(sem); return 1; } - } + } while (!(tmp & RWSEM_READ_FAILED_MASK)); return 0; } diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 7f49baaa49..c5830cfa37 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -300,16 +300,6 @@ void __lockfunc _raw_write_lock(rwlock_t *lock) __raw_write_lock(lock); } EXPORT_SYMBOL(_raw_write_lock); - -#ifndef CONFIG_DEBUG_LOCK_ALLOC -#define __raw_write_lock_nested(lock, subclass) __raw_write_lock(((void)(subclass), (lock))) -#endif - -void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass) -{ - __raw_write_lock_nested(lock, subclass); -} -EXPORT_SYMBOL(_raw_write_lock_nested); #endif #ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE @@ -388,7 +378,8 @@ unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, local_irq_save(flags); preempt_disable(); spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); + LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock, + do_raw_spin_lock_flags, &flags); return flags; } EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested); diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c index 48a19ed848..d2912e44d6 100644 --- a/kernel/locking/spinlock_rt.c +++ b/kernel/locking/spinlock_rt.c @@ -24,17 +24,6 @@ #define RT_MUTEX_BUILD_SPINLOCKS #include "rtmutex.c" -/* - * __might_resched() skips the state check as rtlocks are state - * preserving. Take RCU nesting into account as spin/read/write_lock() can - * legitimately nest into an RCU read side critical section. - */ -#define RTLOCK_RESCHED_OFFSETS \ - (rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT) - -#define rtlock_might_resched() \ - __might_resched(__FILE__, __LINE__, RTLOCK_RESCHED_OFFSETS) - static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) { if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) @@ -43,7 +32,7 @@ static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) static __always_inline void __rt_spin_lock(spinlock_t *lock) { - rtlock_might_resched(); + ___might_sleep(__FILE__, __LINE__, 0); rtlock_lock(&lock->lock); rcu_read_lock(); migrate_disable(); @@ -221,7 +210,7 @@ EXPORT_SYMBOL(rt_write_trylock); void __sched rt_read_lock(rwlock_t *rwlock) { - rtlock_might_resched(); + ___might_sleep(__FILE__, __LINE__, 0); rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); rcu_read_lock(); @@ -231,7 +220,7 @@ EXPORT_SYMBOL(rt_read_lock); void __sched rt_write_lock(rwlock_t *rwlock) { - rtlock_might_resched(); + ___might_sleep(__FILE__, __LINE__, 0); rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); rcu_read_lock(); @@ -239,18 +228,6 @@ void __sched rt_write_lock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_write_lock); -#ifdef CONFIG_DEBUG_LOCK_ALLOC -void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) -{ - rtlock_might_resched(); - rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_); - rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); - rcu_read_lock(); - migrate_disable(); -} -EXPORT_SYMBOL(rt_write_lock_nested); -#endif - void __sched rt_read_unlock(rwlock_t *rwlock) { rwlock_release(&rwlock->dep_map, _RET_IP_); @@ -269,6 +246,12 @@ void __sched rt_write_unlock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_write_unlock); +int __sched rt_rwlock_is_contended(rwlock_t *rwlock) +{ + return rw_base_is_contended(&rwlock->rwbase); +} +EXPORT_SYMBOL(rt_rwlock_is_contended); + #ifdef CONFIG_DEBUG_LOCK_ALLOC void __rt_rwlock_init(rwlock_t *rwlock, const char *name, struct lock_class_key *key) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 353004155d..3e82f449b4 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -16,15 +16,6 @@ static DEFINE_WD_CLASS(ww_class); struct workqueue_struct *wq; -#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH -#define ww_acquire_init_noinject(a, b) do { \ - ww_acquire_init((a), (b)); \ - (a)->deadlock_inject_countdown = ~0U; \ - } while (0) -#else -#define ww_acquire_init_noinject(a, b) ww_acquire_init((a), (b)) -#endif - struct test_mutex { struct work_struct work; struct ww_mutex mutex; @@ -45,7 +36,7 @@ static void test_mutex_work(struct work_struct *work) wait_for_completion(&mtx->go); if (mtx->flags & TEST_MTX_TRY) { - while (!ww_mutex_trylock(&mtx->mutex, NULL)) + while (!ww_mutex_trylock(&mtx->mutex)) cond_resched(); } else { ww_mutex_lock(&mtx->mutex, NULL); @@ -118,39 +109,19 @@ static int test_mutex(void) return 0; } -static int test_aa(bool trylock) +static int test_aa(void) { struct ww_mutex mutex; struct ww_acquire_ctx ctx; int ret; - const char *from = trylock ? "trylock" : "lock"; ww_mutex_init(&mutex, &ww_class); ww_acquire_init(&ctx, &ww_class); - if (!trylock) { - ret = ww_mutex_lock(&mutex, &ctx); - if (ret) { - pr_err("%s: initial lock failed!\n", __func__); - goto out; - } - } else { - ret = !ww_mutex_trylock(&mutex, &ctx); - if (ret) { - pr_err("%s: initial trylock failed!\n", __func__); - goto out; - } - } + ww_mutex_lock(&mutex, &ctx); - if (ww_mutex_trylock(&mutex, NULL)) { - pr_err("%s: trylocked itself without context from %s!\n", __func__, from); - ww_mutex_unlock(&mutex); - ret = -EINVAL; - goto out; - } - - if (ww_mutex_trylock(&mutex, &ctx)) { - pr_err("%s: trylocked itself with context from %s!\n", __func__, from); + if (ww_mutex_trylock(&mutex)) { + pr_err("%s: trylocked itself!\n", __func__); ww_mutex_unlock(&mutex); ret = -EINVAL; goto out; @@ -158,17 +129,17 @@ static int test_aa(bool trylock) ret = ww_mutex_lock(&mutex, &ctx); if (ret != -EALREADY) { - pr_err("%s: missed deadlock for recursing, ret=%d from %s\n", - __func__, ret, from); + pr_err("%s: missed deadlock for recursing, ret=%d\n", + __func__, ret); if (!ret) ww_mutex_unlock(&mutex); ret = -EINVAL; goto out; } - ww_mutex_unlock(&mutex); ret = 0; out: + ww_mutex_unlock(&mutex); ww_acquire_fini(&ctx); return ret; } @@ -179,7 +150,7 @@ struct test_abba { struct ww_mutex b_mutex; struct completion a_ready; struct completion b_ready; - bool resolve, trylock; + bool resolve; int result; }; @@ -189,13 +160,8 @@ static void test_abba_work(struct work_struct *work) struct ww_acquire_ctx ctx; int err; - ww_acquire_init_noinject(&ctx, &ww_class); - if (!abba->trylock) - ww_mutex_lock(&abba->b_mutex, &ctx); - else - WARN_ON(!ww_mutex_trylock(&abba->b_mutex, &ctx)); - - WARN_ON(READ_ONCE(abba->b_mutex.ctx) != &ctx); + ww_acquire_init(&ctx, &ww_class); + ww_mutex_lock(&abba->b_mutex, &ctx); complete(&abba->b_ready); wait_for_completion(&abba->a_ready); @@ -215,7 +181,7 @@ static void test_abba_work(struct work_struct *work) abba->result = err; } -static int test_abba(bool trylock, bool resolve) +static int test_abba(bool resolve) { struct test_abba abba; struct ww_acquire_ctx ctx; @@ -226,18 +192,12 @@ static int test_abba(bool trylock, bool resolve) INIT_WORK_ONSTACK(&abba.work, test_abba_work); init_completion(&abba.a_ready); init_completion(&abba.b_ready); - abba.trylock = trylock; abba.resolve = resolve; schedule_work(&abba.work); - ww_acquire_init_noinject(&ctx, &ww_class); - if (!trylock) - ww_mutex_lock(&abba.a_mutex, &ctx); - else - WARN_ON(!ww_mutex_trylock(&abba.a_mutex, &ctx)); - - WARN_ON(READ_ONCE(abba.a_mutex.ctx) != &ctx); + ww_acquire_init(&ctx, &ww_class); + ww_mutex_lock(&abba.a_mutex, &ctx); complete(&abba.a_ready); wait_for_completion(&abba.b_ready); @@ -289,7 +249,7 @@ static void test_cycle_work(struct work_struct *work) struct ww_acquire_ctx ctx; int err, erra = 0; - ww_acquire_init_noinject(&ctx, &ww_class); + ww_acquire_init(&ctx, &ww_class); ww_mutex_lock(&cycle->a_mutex, &ctx); complete(cycle->a_signal); @@ -621,9 +581,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) static int __init test_ww_mutex_init(void) { int ncpus = num_online_cpus(); - int ret, i; - - printk(KERN_INFO "Beginning ww mutex selftests\n"); + int ret; wq = alloc_workqueue("test-ww_mutex", WQ_UNBOUND, 0); if (!wq) @@ -633,19 +591,17 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = test_aa(false); + ret = test_aa(); if (ret) return ret; - ret = test_aa(true); + ret = test_abba(false); if (ret) return ret; - for (i = 0; i < 4; i++) { - ret = test_abba(i & 1, i & 2); - if (ret) - return ret; - } + ret = test_abba(true); + if (ret) + return ret; ret = test_cycle(ncpus); if (ret) @@ -663,7 +619,6 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - printk(KERN_INFO "All ww mutex selftests passed\n"); return 0; } diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c index d1473c6241..3f1fff7d27 100644 --- a/kernel/locking/ww_rt_mutex.c +++ b/kernel/locking/ww_rt_mutex.c @@ -9,31 +9,6 @@ #define WW_RT #include "rtmutex.c" -int ww_mutex_trylock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) -{ - struct rt_mutex *rtm = &lock->base; - - if (!ww_ctx) - return rt_mutex_trylock(rtm); - - /* - * Reset the wounded flag after a kill. No other process can - * race and wound us here, since they can't have a valid owner - * pointer if we don't have any locks held. - */ - if (ww_ctx->acquired == 0) - ww_ctx->wounded = 0; - - if (__rt_mutex_trylock(&rtm->rtmutex)) { - ww_mutex_set_context_fastpath(lock, ww_ctx); - mutex_acquire_nest(&rtm->dep_map, 0, 1, &ww_ctx->dep_map, _RET_IP_); - return 1; - } - - return 0; -} -EXPORT_SYMBOL(ww_mutex_trylock); - static int __sched __ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx, unsigned int state, unsigned long ip) diff --git a/kernel/module-internal.h b/kernel/module-internal.h index 8c381c9906..33783abc37 100644 --- a/kernel/module-internal.h +++ b/kernel/module-internal.h @@ -22,11 +22,6 @@ struct load_info { bool sig_ok; #ifdef CONFIG_KALLSYMS unsigned long mod_kallsyms_init_off; -#endif -#ifdef CONFIG_MODULE_DECOMPRESS - struct page **pages; - unsigned int max_pages; - unsigned int used_pages; #endif struct { unsigned int sym, str, mod, vers, info, pcpu; @@ -34,17 +29,3 @@ struct load_info { }; extern int mod_verify_sig(const void *mod, struct load_info *info); - -#ifdef CONFIG_MODULE_DECOMPRESS -int module_decompress(struct load_info *info, const void *buf, size_t size); -void module_decompress_cleanup(struct load_info *info); -#else -static inline int module_decompress(struct load_info *info, - const void *buf, size_t size) -{ - return -EOPNOTSUPP; -} -static inline void module_decompress_cleanup(struct load_info *info) -{ -} -#endif diff --git a/kernel/module.c b/kernel/module.c index 46a5c2ed19..83991c2d5a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -337,12 +337,12 @@ static inline void add_taint_module(struct module *mod, unsigned flag, * A thread that wants to hold a reference to a module only while it * is running can call this to safely exit. nfsd and lockd use this. */ -void __noreturn __module_put_and_kthread_exit(struct module *mod, long code) +void __noreturn __module_put_and_exit(struct module *mod, long code) { module_put(mod); - kthread_exit(code); + do_exit(code); } -EXPORT_SYMBOL(__module_put_and_kthread_exit); +EXPORT_SYMBOL(__module_put_and_exit); /* Find a module section: 0 means not found. */ static unsigned int find_sec(const struct load_info *info, const char *name) @@ -958,6 +958,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, } } + /* Stop the machine so refcounts can't move and disable module. */ ret = try_stop_module(mod, flags, &forced); if (ret != 0) goto out; @@ -2883,13 +2884,12 @@ static int module_sig_check(struct load_info *info, int flags) const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; const char *reason; const void *mod = info->hdr; - bool mangled_module = flags & (MODULE_INIT_IGNORE_MODVERSIONS | - MODULE_INIT_IGNORE_VERMAGIC); + /* - * Do not allow mangled modules as a module with version information - * removed is no longer the module that was signed. + * Require flags == 0, as a module with version information + * removed is no longer the module that was signed */ - if (!mangled_module && + if (flags == 0 && info->len > markerlen && memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { /* We truncate the module to discard the signature */ @@ -2942,11 +2942,7 @@ static int module_sig_check(struct load_info *info, int flags) static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr) { -#if defined(CONFIG_64BIT) - unsigned long long secend; -#else unsigned long secend; -#endif /* * Check for both overflow and offset/size being @@ -2971,29 +2967,14 @@ static int elf_validity_check(struct load_info *info) Elf_Shdr *shdr, *strhdr; int err; - if (info->len < sizeof(*(info->hdr))) { - pr_err("Invalid ELF header len %lu\n", info->len); - goto no_exec; - } + if (info->len < sizeof(*(info->hdr))) + return -ENOEXEC; - if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0) { - pr_err("Invalid ELF header magic: != %s\n", ELFMAG); - goto no_exec; - } - if (info->hdr->e_type != ET_REL) { - pr_err("Invalid ELF header type: %u != %u\n", - info->hdr->e_type, ET_REL); - goto no_exec; - } - if (!elf_check_arch(info->hdr)) { - pr_err("Invalid architecture in ELF header: %u\n", - info->hdr->e_machine); - goto no_exec; - } - if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) { - pr_err("Invalid ELF section header size\n"); - goto no_exec; - } + if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0 + || info->hdr->e_type != ET_REL + || !elf_check_arch(info->hdr) + || info->hdr->e_shentsize != sizeof(Elf_Shdr)) + return -ENOEXEC; /* * e_shnum is 16 bits, and sizeof(Elf_Shdr) is @@ -3002,10 +2983,8 @@ static int elf_validity_check(struct load_info *info) */ if (info->hdr->e_shoff >= info->len || (info->hdr->e_shnum * sizeof(Elf_Shdr) > - info->len - info->hdr->e_shoff)) { - pr_err("Invalid ELF section header overflow\n"); - goto no_exec; - } + info->len - info->hdr->e_shoff)) + return -ENOEXEC; info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; @@ -3013,19 +2992,13 @@ static int elf_validity_check(struct load_info *info) * Verify if the section name table index is valid. */ if (info->hdr->e_shstrndx == SHN_UNDEF - || info->hdr->e_shstrndx >= info->hdr->e_shnum) { - pr_err("Invalid ELF section name index: %d || e_shstrndx (%d) >= e_shnum (%d)\n", - info->hdr->e_shstrndx, info->hdr->e_shstrndx, - info->hdr->e_shnum); - goto no_exec; - } + || info->hdr->e_shstrndx >= info->hdr->e_shnum) + return -ENOEXEC; strhdr = &info->sechdrs[info->hdr->e_shstrndx]; err = validate_section_offset(info, strhdr); - if (err < 0) { - pr_err("Invalid ELF section hdr(type %u)\n", strhdr->sh_type); + if (err < 0) return err; - } /* * The section name table must be NUL-terminated, as required @@ -3033,10 +3006,8 @@ static int elf_validity_check(struct load_info *info) * strings in the section safe. */ info->secstrings = (void *)info->hdr + strhdr->sh_offset; - if (info->secstrings[strhdr->sh_size - 1] != '\0') { - pr_err("ELF Spec violation: section name table isn't null terminated\n"); - goto no_exec; - } + if (info->secstrings[strhdr->sh_size - 1] != '\0') + return -ENOEXEC; /* * The code assumes that section 0 has a length of zero and @@ -3044,11 +3015,8 @@ static int elf_validity_check(struct load_info *info) */ if (info->sechdrs[0].sh_type != SHT_NULL || info->sechdrs[0].sh_size != 0 - || info->sechdrs[0].sh_addr != 0) { - pr_err("ELF Spec violation: section 0 type(%d)!=SH_NULL or non-zero len or addr\n", - info->sechdrs[0].sh_type); - goto no_exec; - } + || info->sechdrs[0].sh_addr != 0) + return -ENOEXEC; for (i = 1; i < info->hdr->e_shnum; i++) { shdr = &info->sechdrs[i]; @@ -3058,12 +3026,8 @@ static int elf_validity_check(struct load_info *info) continue; case SHT_SYMTAB: if (shdr->sh_link == SHN_UNDEF - || shdr->sh_link >= info->hdr->e_shnum) { - pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n", - shdr->sh_link, shdr->sh_link, - info->hdr->e_shnum); - goto no_exec; - } + || shdr->sh_link >= info->hdr->e_shnum) + return -ENOEXEC; fallthrough; default: err = validate_section_offset(info, shdr); @@ -3085,9 +3049,6 @@ static int elf_validity_check(struct load_info *info) } return 0; - -no_exec: - return -ENOEXEC; } #define COPY_CHUNK_SIZE (16*PAGE_SIZE) @@ -3174,12 +3135,9 @@ static int copy_module_from_user(const void __user *umod, unsigned long len, return err; } -static void free_copy(struct load_info *info, int flags) +static void free_copy(struct load_info *info) { - if (flags & MODULE_INIT_COMPRESSED_FILE) - module_decompress_cleanup(info); - else - vfree(info->hdr); + vfree(info->hdr); } static int rewrite_section_headers(struct load_info *info, int flags) @@ -3967,8 +3925,10 @@ static int load_module(struct load_info *info, const char __user *uargs, * sections. */ err = elf_validity_check(info); - if (err) + if (err) { + pr_err("Module has invalid ELF structures\n"); goto free_copy; + } /* * Everything checks out, so set up the section info @@ -4113,7 +4073,7 @@ static int load_module(struct load_info *info, const char __user *uargs, } /* Get rid of temporary copy. */ - free_copy(info, flags); + free_copy(info); /* Done! */ trace_module_load(mod); @@ -4162,7 +4122,7 @@ static int load_module(struct load_info *info, const char __user *uargs, module_deallocate(mod, info); free_copy: - free_copy(info, flags); + free_copy(info); return err; } @@ -4189,8 +4149,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) { struct load_info info = { }; - void *buf = NULL; - int len; + void *hdr = NULL; int err; err = may_init_module(); @@ -4200,24 +4159,15 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS - |MODULE_INIT_IGNORE_VERMAGIC - |MODULE_INIT_COMPRESSED_FILE)) + |MODULE_INIT_IGNORE_VERMAGIC)) return -EINVAL; - len = kernel_read_file_from_fd(fd, 0, &buf, INT_MAX, NULL, + err = kernel_read_file_from_fd(fd, 0, &hdr, INT_MAX, NULL, READING_MODULE); - if (len < 0) - return len; - - if (flags & MODULE_INIT_COMPRESSED_FILE) { - err = module_decompress(&info, buf, len); - vfree(buf); /* compressed data is no longer needed */ - if (err) - return err; - } else { - info.hdr = buf; - info.len = len; - } + if (err < 0) + return err; + info.hdr = hdr; + info.len = err; return load_module(&info, uargs, flags); } @@ -4497,8 +4447,6 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, mod, kallsyms_symbol_value(sym)); if (ret != 0) goto out; - - cond_resched(); } } out: diff --git a/kernel/notifier.c b/kernel/notifier.c index ba005ebf47..b8251dc0bc 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -20,13 +20,12 @@ BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); */ static int notifier_chain_register(struct notifier_block **nl, - struct notifier_block *n) + struct notifier_block *n) { while ((*nl) != NULL) { if (unlikely((*nl) == n)) { - WARN(1, "notifier callback %ps already registered", - n->notifier_call); - return -EEXIST; + WARN(1, "double register detected"); + return 0; } if (n->priority > (*nl)->priority) break; @@ -135,7 +134,7 @@ static int notifier_call_chain_robust(struct notifier_block **nl, * * Adds a notifier to an atomic notifier chain. * - * Returns 0 on success, %-EEXIST on error. + * Currently always returns zero. */ int atomic_notifier_chain_register(struct atomic_notifier_head *nh, struct notifier_block *n) @@ -217,7 +216,7 @@ NOKPROBE_SYMBOL(atomic_notifier_call_chain); * Adds a notifier to a blocking notifier chain. * Must be called in process context. * - * Returns 0 on success, %-EEXIST on error. + * Currently always returns zero. */ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, struct notifier_block *n) @@ -336,7 +335,7 @@ EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); * Adds a notifier to a raw notifier chain. * All locking must be provided by the caller. * - * Returns 0 on success, %-EEXIST on error. + * Currently always returns zero. */ int raw_notifier_chain_register(struct raw_notifier_head *nh, struct notifier_block *n) @@ -407,7 +406,7 @@ EXPORT_SYMBOL_GPL(raw_notifier_call_chain); * Adds a notifier to an SRCU notifier chain. * Must be called in process context. * - * Returns 0 on success, %-EEXIST on error. + * Currently always returns zero. */ int srcu_notifier_chain_register(struct srcu_notifier_head *nh, struct notifier_block *n) diff --git a/kernel/panic.c b/kernel/panic.c index 55b50e052e..cefd7d8236 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #define PANIC_TIMER_STEP 100 @@ -534,9 +533,26 @@ void oops_enter(void) trigger_all_cpu_backtrace(); } +/* + * 64-bit random ID for oopses: + */ +static u64 oops_id; + +static int init_oops_id(void) +{ + if (!oops_id) + get_random_bytes(&oops_id, sizeof(oops_id)); + else + oops_id++; + + return 0; +} +late_initcall(init_oops_id); + static void print_oops_end_marker(void) { - pr_warn("---[ end trace %016llx ]---\n", 0ULL); + init_oops_id(); + pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id); } /* @@ -593,7 +609,6 @@ void __warn(const char *file, int line, void *caller, unsigned taint, print_irqtrace_events(current); print_oops_end_marker(); - trace_error_report_end(ERROR_DETECTOR_WARN, (unsigned long)caller); /* Just a warning, don't kill lockdep. */ add_taint(taint, LOCKDEP_STILL_OK); diff --git a/kernel/params.c b/kernel/params.c index 5b92310425..8299bd764e 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -926,9 +926,9 @@ static const struct sysfs_ops module_sysfs_ops = { .store = module_attr_store, }; -static int uevent_filter(struct kobject *kobj) +static int uevent_filter(struct kset *kset, struct kobject *kobj) { - const struct kobj_type *ktype = get_ktype(kobj); + struct kobj_type *ktype = get_ktype(kobj); if (ktype == &module_ktype) return 1; diff --git a/kernel/pid.c b/kernel/pid.c index 2fc0a16ec7..efe87db446 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -539,42 +539,6 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) return pid; } -/** - * pidfd_get_task() - Get the task associated with a pidfd - * - * @pidfd: pidfd for which to get the task - * @flags: flags associated with this pidfd - * - * Return the task associated with @pidfd. The function takes a reference on - * the returned task. The caller is responsible for releasing that reference. - * - * Currently, the process identified by @pidfd is always a thread-group leader. - * This restriction currently exists for all aspects of pidfds including pidfd - * creation (CLONE_PIDFD cannot be used with CLONE_THREAD) and pidfd polling - * (only supports thread group leaders). - * - * Return: On success, the task_struct associated with the pidfd. - * On error, a negative errno number will be returned. - */ -struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags) -{ - unsigned int f_flags; - struct pid *pid; - struct task_struct *task; - - pid = pidfd_get_pid(pidfd, &f_flags); - if (IS_ERR(pid)) - return ERR_CAST(pid); - - task = get_pid_task(pid, PIDTYPE_TGID); - put_pid(pid); - if (!task) - return ERR_PTR(-ESRCH); - - *flags = f_flags; - return task; -} - /** * pidfd_create() - Create a new pid file descriptor. * diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 0153b0ca7b..97e62469a6 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -2,7 +2,7 @@ /* * Energy Model of devices * - * Copyright (c) 2018-2021, Arm ltd. + * Copyright (c) 2018-2020, Arm ltd. * Written by: Quentin Perret, Arm ltd. * Improvements provided by: Lukasz Luba, Arm ltd. */ @@ -10,7 +10,6 @@ #define pr_fmt(fmt) "energy_model: " fmt #include -#include #include #include #include @@ -43,7 +42,6 @@ static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd) debugfs_create_ulong("frequency", 0444, d, &ps->frequency); debugfs_create_ulong("power", 0444, d, &ps->power); debugfs_create_ulong("cost", 0444, d, &ps->cost); - debugfs_create_ulong("inefficient", 0444, d, &ps->flags); } static int em_debug_cpus_show(struct seq_file *s, void *unused) @@ -57,8 +55,7 @@ DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); static int em_debug_units_show(struct seq_file *s, void *unused) { struct em_perf_domain *pd = s->private; - char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ? - "milliWatts" : "bogoWatts"; + char *units = pd->milliwatts ? "milliWatts" : "bogoWatts"; seq_printf(s, "%s\n", units); @@ -66,17 +63,6 @@ static int em_debug_units_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(em_debug_units); -static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused) -{ - struct em_perf_domain *pd = s->private; - int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0; - - seq_printf(s, "%d\n", enabled); - - return 0; -} -DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies); - static void em_debug_create_pd(struct device *dev) { struct dentry *d; @@ -90,8 +76,6 @@ static void em_debug_create_pd(struct device *dev) &em_debug_cpus_fops); debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops); - debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd, - &em_debug_skip_inefficiencies_fops); /* Create a sub-directory for each performance state */ for (i = 0; i < dev->em_pd->nr_perf_states; i++) @@ -178,7 +162,6 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, table[i].cost = div64_u64(fmax * power_res, table[i].frequency); if (table[i].cost >= prev_cost) { - table[i].flags = EM_PERF_STATE_INEFFICIENT; dev_dbg(dev, "EM: OPP:%lu is inefficient\n", table[i].frequency); } else { @@ -232,43 +215,6 @@ static int em_create_pd(struct device *dev, int nr_states, return 0; } -static void em_cpufreq_update_efficiencies(struct device *dev) -{ - struct em_perf_domain *pd = dev->em_pd; - struct em_perf_state *table; - struct cpufreq_policy *policy; - int found = 0; - int i; - - if (!_is_cpu_device(dev) || !pd) - return; - - policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd))); - if (!policy) { - dev_warn(dev, "EM: Access to CPUFreq policy failed"); - return; - } - - table = pd->table; - - for (i = 0; i < pd->nr_perf_states; i++) { - if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT)) - continue; - - if (!cpufreq_table_set_inefficient(policy, table[i].frequency)) - found++; - } - - if (!found) - return; - - /* - * Efficiencies have been installed in CPUFreq, inefficient frequencies - * will be skipped. The EM can do the same. - */ - pd->flags |= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES; -} - /** * em_pd_get() - Return the performance domain for a device * @dev : Device to find the performance domain for @@ -382,10 +328,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, if (ret) goto unlock; - if (milliwatts) - dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS; - - em_cpufreq_update_efficiencies(dev); + dev->em_pd->milliwatts = milliwatts; em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e6af502c2f..b0888e9224 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -300,7 +300,7 @@ static int create_image(int platform_mode) if (error || hibernation_test(TEST_PLATFORM)) goto Platform_finish; - error = pm_sleep_disable_secondary_cpus(); + error = suspend_disable_secondary_cpus(); if (error || hibernation_test(TEST_CPUS)) goto Enable_cpus; @@ -342,7 +342,7 @@ static int create_image(int platform_mode) local_irq_enable(); Enable_cpus: - pm_sleep_enable_secondary_cpus(); + suspend_enable_secondary_cpus(); /* Allow architectures to do nosmt-specific post-resume dances */ if (!in_suspend) @@ -466,8 +466,6 @@ static int resume_target_kernel(bool platform_mode) if (error) goto Cleanup; - cpuidle_pause(); - error = hibernate_resume_nonboot_cpu_disable(); if (error) goto Enable_cpus; @@ -511,7 +509,7 @@ static int resume_target_kernel(bool platform_mode) local_irq_enable(); Enable_cpus: - pm_sleep_enable_secondary_cpus(); + suspend_enable_secondary_cpus(); Cleanup: platform_restore_cleanup(platform_mode); @@ -589,7 +587,7 @@ int hibernation_platform_enter(void) if (error) goto Platform_finish; - error = pm_sleep_disable_secondary_cpus(); + error = suspend_disable_secondary_cpus(); if (error) goto Enable_cpus; @@ -611,7 +609,7 @@ int hibernation_platform_enter(void) local_irq_enable(); Enable_cpus: - pm_sleep_enable_secondary_cpus(); + suspend_enable_secondary_cpus(); Platform_finish: hibernation_ops->finish(); diff --git a/kernel/power/power.h b/kernel/power/power.h index b4f4339432..778bf431ec 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -4,8 +4,6 @@ #include #include #include -#include -#include struct swsusp_info { struct new_utsname uts; @@ -170,7 +168,6 @@ extern int swsusp_swap_in_use(void); #define SF_PLATFORM_MODE 1 #define SF_NOCOMPRESS_MODE 2 #define SF_CRC32_MODE 4 -#define SF_HW_SIG 8 /* kernel/power/hibernate.c */ extern int swsusp_check(void); @@ -313,15 +310,3 @@ extern int pm_wake_lock(const char *buf); extern int pm_wake_unlock(const char *buf); #endif /* !CONFIG_PM_WAKELOCKS */ - -static inline int pm_sleep_disable_secondary_cpus(void) -{ - cpuidle_pause(); - return suspend_disable_secondary_cpus(); -} - -static inline void pm_sleep_enable_secondary_cpus(void) -{ - suspend_enable_secondary_cpus(); - cpuidle_resume(); -} diff --git a/kernel/power/process.c b/kernel/power/process.c index 11b570fcf0..ee78a39463 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -94,7 +94,7 @@ static int try_to_freeze_tasks(bool user_only) todo - wq_busy, wq_busy); if (wq_busy) - show_all_workqueues(); + show_workqueue_state(); if (!wakeup || pm_debug_messages_on) { read_lock(&tasklist_lock); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index ad10359030..f3a1086f7c 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -36,8 +36,6 @@ #define HIBERNATE_SIG "S1SUSPEND" -u32 swsusp_hardware_signature; - /* * When reading an {un,}compressed image, we may restore pages in place, * in which case some architectures need these pages cleaning before they @@ -106,8 +104,7 @@ struct swap_map_handle { struct swsusp_header { char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) - - sizeof(u32) - sizeof(u32)]; - u32 hw_sig; + sizeof(u32)]; u32 crc32; sector_t image; unsigned int flags; /* Flags to pass to the "boot" kernel */ @@ -315,6 +312,7 @@ static int hib_wait_io(struct hib_bio_batch *hb) /* * Saving part */ + static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) { int error; @@ -326,10 +324,6 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); memcpy(swsusp_header->sig, HIBERNATE_SIG, 10); swsusp_header->image = handle->first_sector; - if (swsusp_hardware_signature) { - swsusp_header->hw_sig = swsusp_hardware_signature; - flags |= SF_HW_SIG; - } swsusp_header->flags = flags; if (flags & SF_CRC32_MODE) swsusp_header->crc32 = handle->crc32; @@ -711,19 +705,22 @@ static int save_image_lzo(struct swap_map_handle *handle, goto out_clean; } - data = vzalloc(array_size(nr_threads, sizeof(*data))); + data = vmalloc(array_size(nr_threads, sizeof(*data))); if (!data) { pr_err("Failed to allocate LZO data\n"); ret = -ENOMEM; goto out_clean; } + for (thr = 0; thr < nr_threads; thr++) + memset(&data[thr], 0, offsetof(struct cmp_data, go)); - crc = kzalloc(sizeof(*crc), GFP_KERNEL); + crc = kmalloc(sizeof(*crc), GFP_KERNEL); if (!crc) { pr_err("Failed to allocate crc\n"); ret = -ENOMEM; goto out_clean; } + memset(crc, 0, offsetof(struct crc_data, go)); /* * Start the compression threads. @@ -1201,19 +1198,22 @@ static int load_image_lzo(struct swap_map_handle *handle, goto out_clean; } - data = vzalloc(array_size(nr_threads, sizeof(*data))); + data = vmalloc(array_size(nr_threads, sizeof(*data))); if (!data) { pr_err("Failed to allocate LZO data\n"); ret = -ENOMEM; goto out_clean; } + for (thr = 0; thr < nr_threads; thr++) + memset(&data[thr], 0, offsetof(struct dec_data, go)); - crc = kzalloc(sizeof(*crc), GFP_KERNEL); + crc = kmalloc(sizeof(*crc), GFP_KERNEL); if (!crc) { pr_err("Failed to allocate crc\n"); ret = -ENOMEM; goto out_clean; } + memset(crc, 0, offsetof(struct crc_data, go)); clean_pages_on_decompress = true; @@ -1543,12 +1543,6 @@ int swsusp_check(void) } else { error = -EINVAL; } - if (!error && swsusp_header->flags & SF_HW_SIG && - swsusp_header->hw_sig != swsusp_hardware_signature) { - pr_info("Suspend image hardware signature mismatch (%08x now %08x); aborting resume.\n", - swsusp_header->hw_sig, swsusp_hardware_signature); - error = -EINVAL; - } put: if (error) diff --git a/kernel/power/user.c b/kernel/power/user.c index ad241b4ff6..740723bb38 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -177,7 +177,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, if (res <= 0) goto unlock; } else { - res = PAGE_SIZE; + res = PAGE_SIZE - pg_offp; } if (!data_of(data->handle)) { diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index f5b388e810..d118739874 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -2,8 +2,5 @@ obj-y = printk.o obj-$(CONFIG_PRINTK) += printk_safe.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o +obj-$(CONFIG_PRINTK) += printk_ringbuffer.o obj-$(CONFIG_PRINTK_INDEX) += index.o - -obj-$(CONFIG_PRINTK) += printk_support.o -printk_support-y := printk_ringbuffer.o -printk_support-$(CONFIG_SYSCTL) += sysctl.o diff --git a/kernel/printk/index.c b/kernel/printk/index.c index c85be186a7..d3709408de 100644 --- a/kernel/printk/index.c +++ b/kernel/printk/index.c @@ -26,9 +26,10 @@ static struct pi_entry *pi_get_entry(const struct module *mod, loff_t pos) if (mod) { entries = mod->printk_index_start; nr_entries = mod->printk_index_size; - } else + } #endif - { + + if (!mod) { /* vmlinux, comes from linker symbols */ entries = __start_printk_index; nr_entries = __stop_printk_index - __start_printk_index; diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index d947ca6c84..9f3ed2fdb7 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -4,14 +4,6 @@ */ #include -#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) -void __init printk_sysctl_init(void); -int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -#else -#define printk_sysctl_init() do { } while (0) -#endif - #ifdef CONFIG_PRINTK /* Flags for a single printk record. */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 82abfaf3c2..99221b016c 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -171,7 +171,7 @@ static int __init control_devkmsg(char *str) __setup("printk.devkmsg=", control_devkmsg); char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; -#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) + int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -210,7 +210,6 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, return 0; } -#endif /* CONFIG_PRINTK && CONFIG_SYSCTL */ /* Number of registered extended console drivers. */ static int nr_ext_console_drivers; @@ -281,6 +280,7 @@ static struct console *exclusive_console; static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; static int preferred_console = -1; +static bool has_preferred_console; int console_set_on_cmdline; EXPORT_SYMBOL(console_set_on_cmdline); @@ -847,7 +847,7 @@ static int devkmsg_open(struct inode *inode, struct file *file) return err; } - user = kvmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); + user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); if (!user) return -ENOMEM; @@ -875,7 +875,7 @@ static int devkmsg_release(struct inode *inode, struct file *file) ratelimit_state_exit(&user->rs); mutex_destroy(&user->lock); - kvfree(user); + kfree(user); return 0; } @@ -1166,9 +1166,9 @@ void __init setup_log_buf(int early) return; err_free_descs: - memblock_free(new_descs, new_descs_size); + memblock_free_ptr(new_descs, new_descs_size); err_free_log_buf: - memblock_free(new_log_buf, new_log_buf_len); + memblock_free_ptr(new_log_buf, new_log_buf_len); } static bool __read_mostly ignore_loglevel; @@ -2066,7 +2066,6 @@ u16 printk_parse_prefix(const char *text, int *level, return prefix_len; } -__printf(5, 0) static u16 printk_sprint(char *text, u16 size, int facility, enum printk_info_flags *flags, const char *fmt, va_list args) @@ -2861,8 +2860,7 @@ early_param("keep_bootcon", keep_bootcon_setup); * Care need to be taken with consoles that are statically * enabled such as netconsole */ -static int try_enable_preferred_console(struct console *newcon, - bool user_specified) +static int try_enable_new_console(struct console *newcon, bool user_specified) { struct console_cmdline *c; int i, err; @@ -2892,8 +2890,10 @@ static int try_enable_preferred_console(struct console *newcon, return err; } newcon->flags |= CON_ENABLED; - if (i == preferred_console) + if (i == preferred_console) { newcon->flags |= CON_CONSDEV; + has_preferred_console = true; + } return 0; } @@ -2908,21 +2908,6 @@ static int try_enable_preferred_console(struct console *newcon, return -ENOENT; } -/* Try to enable the console unconditionally */ -static void try_enable_default_console(struct console *newcon) -{ - if (newcon->index < 0) - newcon->index = 0; - - if (newcon->setup && newcon->setup(newcon, NULL) != 0) - return; - - newcon->flags |= CON_ENABLED; - - if (newcon->device) - newcon->flags |= CON_CONSDEV; -} - /* * The console driver calls this routine during kernel initialization * to register the console printing procedure with printk() and to @@ -2944,56 +2929,59 @@ static void try_enable_default_console(struct console *newcon) */ void register_console(struct console *newcon) { - struct console *con; - bool bootcon_enabled = false; - bool realcon_enabled = false; + struct console *bcon = NULL; int err; - for_each_console(con) { - if (WARN(con == newcon, "console '%s%d' already registered\n", - con->name, con->index)) + for_each_console(bcon) { + if (WARN(bcon == newcon, "console '%s%d' already registered\n", + bcon->name, bcon->index)) return; } - for_each_console(con) { - if (con->flags & CON_BOOT) - bootcon_enabled = true; - else - realcon_enabled = true; + /* + * before we register a new CON_BOOT console, make sure we don't + * already have a valid console + */ + if (newcon->flags & CON_BOOT) { + for_each_console(bcon) { + if (!(bcon->flags & CON_BOOT)) { + pr_info("Too late to register bootconsole %s%d\n", + newcon->name, newcon->index); + return; + } + } } - /* Do not register boot consoles when there already is a real one. */ - if (newcon->flags & CON_BOOT && realcon_enabled) { - pr_info("Too late to register bootconsole %s%d\n", - newcon->name, newcon->index); - return; - } + if (console_drivers && console_drivers->flags & CON_BOOT) + bcon = console_drivers; + + if (!has_preferred_console || bcon || !console_drivers) + has_preferred_console = preferred_console >= 0; /* - * See if we want to enable this console driver by default. - * - * Nope when a console is preferred by the command line, device - * tree, or SPCR. - * - * The first real console with tty binding (driver) wins. More - * consoles might get enabled before the right one is found. - * - * Note that a console with tty binding will have CON_CONSDEV - * flag set and will be first in the list. + * See if we want to use this console driver. If we + * didn't select a console we take the first one + * that registers here. */ - if (preferred_console < 0) { - if (!console_drivers || !console_drivers->device || - console_drivers->flags & CON_BOOT) { - try_enable_default_console(newcon); + if (!has_preferred_console) { + if (newcon->index < 0) + newcon->index = 0; + if (newcon->setup == NULL || + newcon->setup(newcon, NULL) == 0) { + newcon->flags |= CON_ENABLED; + if (newcon->device) { + newcon->flags |= CON_CONSDEV; + has_preferred_console = true; + } } } /* See if this console matches one we selected on the command line */ - err = try_enable_preferred_console(newcon, true); + err = try_enable_new_console(newcon, true); /* If not, try to match against the platform default(s) */ if (err == -ENOENT) - err = try_enable_preferred_console(newcon, false); + err = try_enable_new_console(newcon, false); /* printk() messages are not printed to the Braille console. */ if (err || newcon->flags & CON_BRL) @@ -3005,10 +2993,8 @@ void register_console(struct console *newcon) * the real console are the same physical device, it's annoying to * see the beginning boot messages twice */ - if (bootcon_enabled && - ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { + if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) newcon->flags &= ~CON_PRINTBUFFER; - } /* * Put this console in the list - keep the @@ -3064,15 +3050,15 @@ void register_console(struct console *newcon) pr_info("%sconsole [%s%d] enabled\n", (newcon->flags & CON_BOOT) ? "boot" : "" , newcon->name, newcon->index); - if (bootcon_enabled && + if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && !keep_bootcon) { /* We need to iterate through all boot consoles, to make * sure we print everything out, before we unregister them. */ - for_each_console(con) - if (con->flags & CON_BOOT) - unregister_console(con); + for_each_console(bcon) + if (bcon->flags & CON_BOOT) + unregister_console(bcon); } } EXPORT_SYMBOL(register_console); @@ -3212,7 +3198,6 @@ static int __init printk_late_init(void) ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", console_cpu_notify, NULL); WARN_ON(ret < 0); - printk_sysctl_init(); return 0; } late_initcall(printk_late_init); diff --git a/kernel/profile.c b/kernel/profile.c index 37640a0bd8..eb9c7f0f5a 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -133,6 +133,79 @@ int __ref profile_init(void) return -ENOMEM; } +/* Profile event notifications */ + +static BLOCKING_NOTIFIER_HEAD(task_exit_notifier); +static ATOMIC_NOTIFIER_HEAD(task_free_notifier); +static BLOCKING_NOTIFIER_HEAD(munmap_notifier); + +void profile_task_exit(struct task_struct *task) +{ + blocking_notifier_call_chain(&task_exit_notifier, 0, task); +} + +int profile_handoff_task(struct task_struct *task) +{ + int ret; + ret = atomic_notifier_call_chain(&task_free_notifier, 0, task); + return (ret == NOTIFY_OK) ? 1 : 0; +} + +void profile_munmap(unsigned long addr) +{ + blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr); +} + +int task_handoff_register(struct notifier_block *n) +{ + return atomic_notifier_chain_register(&task_free_notifier, n); +} +EXPORT_SYMBOL_GPL(task_handoff_register); + +int task_handoff_unregister(struct notifier_block *n) +{ + return atomic_notifier_chain_unregister(&task_free_notifier, n); +} +EXPORT_SYMBOL_GPL(task_handoff_unregister); + +int profile_event_register(enum profile_type type, struct notifier_block *n) +{ + int err = -EINVAL; + + switch (type) { + case PROFILE_TASK_EXIT: + err = blocking_notifier_chain_register( + &task_exit_notifier, n); + break; + case PROFILE_MUNMAP: + err = blocking_notifier_chain_register( + &munmap_notifier, n); + break; + } + + return err; +} +EXPORT_SYMBOL_GPL(profile_event_register); + +int profile_event_unregister(enum profile_type type, struct notifier_block *n) +{ + int err = -EINVAL; + + switch (type) { + case PROFILE_TASK_EXIT: + err = blocking_notifier_chain_unregister( + &task_exit_notifier, n); + break; + case PROFILE_MUNMAP: + err = blocking_notifier_chain_unregister( + &munmap_notifier, n); + break; + } + + return err; +} +EXPORT_SYMBOL_GPL(profile_event_unregister); + #if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) /* * Each cpu has a pair of open-addressed hashtables for pending diff --git a/kernel/ptrace.c b/kernel/ptrace.c index eea265082e..f8589bf8d7 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -419,6 +419,8 @@ static int ptrace_attach(struct task_struct *task, long request, if (task->ptrace) goto unlock_tasklist; + if (seize) + flags |= PT_SEIZED; task->ptrace = flags; ptrace_link(task, current); diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index bf8e341e75..3128b7cf8e 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -112,7 +112,7 @@ config RCU_STALL_COMMON making these warnings mandatory for the tree variants. config RCU_NEED_SEGCBLIST - def_bool ( TREE_RCU || TREE_SRCU || TASKS_RCU_GENERIC ) + def_bool ( TREE_RCU || TREE_SRCU ) config RCU_FANOUT int "Tree-based hierarchical RCU fanout value" @@ -169,6 +169,24 @@ config RCU_FANOUT_LEAF Take the default if unsure. +config RCU_FAST_NO_HZ + bool "Accelerate last non-dyntick-idle CPU's grace periods" + depends on NO_HZ_COMMON && SMP && RCU_EXPERT + default n + help + This option permits CPUs to enter dynticks-idle state even if + they have RCU callbacks queued, and prevents RCU from waking + these CPUs up more than roughly once every four jiffies (by + default, you can adjust this using the rcutree.rcu_idle_gp_delay + parameter), thus improving energy efficiency. On the other + hand, this option increases the duration of RCU grace periods, + for example, slowing down synchronize_rcu(). + + Say Y if energy efficiency is critically important, and you + don't care about increased grace-period durations. + + Say N if you are unsure. + config RCU_BOOST bool "Enable RCU priority boosting" depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index 81145c3ece..aaa111237b 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -261,14 +261,16 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp) } /* - * Mark the specified rcu_segcblist structure as offloaded (or not) + * Mark the specified rcu_segcblist structure as offloaded. */ void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload) { - if (offload) - rcu_segcblist_set_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED); - else + if (offload) { + rcu_segcblist_clear_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY); + rcu_segcblist_set_flags(rsclp, SEGCBLIST_OFFLOADED); + } else { rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED); + } } /* diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index e373fbe44d..9a19328ff2 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -80,14 +80,11 @@ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp) return rcu_segcblist_test_flags(rsclp, SEGCBLIST_ENABLED); } -/* - * Is the specified rcu_segcblist NOCB offloaded (or in the middle of the - * [de]offloading process)? - */ +/* Is the specified rcu_segcblist offloaded, or is SEGCBLIST_SOFTIRQ_ONLY set? */ static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp) { if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && - rcu_segcblist_test_flags(rsclp, SEGCBLIST_LOCKING)) + !rcu_segcblist_test_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY)) return true; return false; @@ -95,8 +92,9 @@ static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp) static inline bool rcu_segcblist_completely_offloaded(struct rcu_segcblist *rsclp) { - if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && - !rcu_segcblist_test_flags(rsclp, SEGCBLIST_RCU_CORE)) + int flags = SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | SEGCBLIST_OFFLOADED; + + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && (rsclp->flags & flags) == flags) return true; return false; diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 5e4f1f83d3..2cc34a22a5 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -50,8 +50,8 @@ MODULE_AUTHOR("Paul E. McKenney "); pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s) #define VERBOSE_SCALEOUT_STRING(s) \ do { if (verbose) pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s); } while (0) -#define SCALEOUT_ERRSTRING(s) \ - pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s) +#define VERBOSE_SCALEOUT_ERRSTRING(s) \ + do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s); } while (0) /* * The intended use cases for the nreaders and nwriters module parameters @@ -514,11 +514,11 @@ rcu_scale_cleanup(void) * during the mid-boot phase, so have to wait till the end. */ if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) - SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); + VERBOSE_SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); if (rcu_gp_is_normal() && gp_exp) - SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); + VERBOSE_SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); if (gp_exp && gp_async) - SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); + VERBOSE_SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); if (torture_cleanup_begin()) return; @@ -758,7 +758,7 @@ kfree_scale_init(void) init_waitqueue_head(&shutdown_wq); firsterr = torture_create_kthread(kfree_scale_shutdown, NULL, shutdown_task); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; schedule_timeout_uninterruptible(1); } @@ -775,7 +775,7 @@ kfree_scale_init(void) for (i = 0; i < kfree_nrealthreads; i++) { firsterr = torture_create_kthread(kfree_scale_thread, (void *)i, kfree_reader_tasks[i]); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } @@ -838,21 +838,21 @@ rcu_scale_init(void) init_waitqueue_head(&shutdown_wq); firsterr = torture_create_kthread(rcu_scale_shutdown, NULL, shutdown_task); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; schedule_timeout_uninterruptible(1); } reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]), GFP_KERNEL); if (reader_tasks == NULL) { - SCALEOUT_ERRSTRING("out of memory"); + VERBOSE_SCALEOUT_ERRSTRING("out of memory"); firsterr = -ENOMEM; goto unwind; } for (i = 0; i < nrealreaders; i++) { firsterr = torture_create_kthread(rcu_scale_reader, (void *)i, reader_tasks[i]); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } while (atomic_read(&n_rcu_scale_reader_started) < nrealreaders) @@ -865,7 +865,7 @@ rcu_scale_init(void) kcalloc(nrealwriters, sizeof(*writer_n_durations), GFP_KERNEL); if (!writer_tasks || !writer_durations || !writer_n_durations) { - SCALEOUT_ERRSTRING("out of memory"); + VERBOSE_SCALEOUT_ERRSTRING("out of memory"); firsterr = -ENOMEM; goto unwind; } @@ -879,7 +879,7 @@ rcu_scale_init(void) } firsterr = torture_create_kthread(rcu_scale_writer, (void *)i, writer_tasks[i]); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } torture_init_end(); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 422f7e4cc0..f922937eb3 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -54,18 +54,15 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney and Josh Triplett "); /* Bits for ->extendables field, extendables param, and related definitions. */ -#define RCUTORTURE_RDR_SHIFT_1 8 /* Put SRCU index in upper bits. */ -#define RCUTORTURE_RDR_MASK_1 (1 << RCUTORTURE_RDR_SHIFT_1) -#define RCUTORTURE_RDR_SHIFT_2 9 /* Put SRCU index in upper bits. */ -#define RCUTORTURE_RDR_MASK_2 (1 << RCUTORTURE_RDR_SHIFT_2) +#define RCUTORTURE_RDR_SHIFT 8 /* Put SRCU index in upper bits. */ +#define RCUTORTURE_RDR_MASK ((1 << RCUTORTURE_RDR_SHIFT) - 1) #define RCUTORTURE_RDR_BH 0x01 /* Extend readers by disabling bh. */ #define RCUTORTURE_RDR_IRQ 0x02 /* ... disabling interrupts. */ #define RCUTORTURE_RDR_PREEMPT 0x04 /* ... disabling preemption. */ #define RCUTORTURE_RDR_RBH 0x08 /* ... rcu_read_lock_bh(). */ #define RCUTORTURE_RDR_SCHED 0x10 /* ... rcu_read_lock_sched(). */ -#define RCUTORTURE_RDR_RCU_1 0x20 /* ... entering another RCU reader. */ -#define RCUTORTURE_RDR_RCU_2 0x40 /* ... entering another RCU reader. */ -#define RCUTORTURE_RDR_NBITS 7 /* Number of bits defined above. */ +#define RCUTORTURE_RDR_RCU 0x20 /* ... entering another RCU reader. */ +#define RCUTORTURE_RDR_NBITS 6 /* Number of bits defined above. */ #define RCUTORTURE_MAX_EXTEND \ (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ | RCUTORTURE_RDR_PREEMPT | \ RCUTORTURE_RDR_RBH | RCUTORTURE_RDR_SCHED) @@ -79,7 +76,7 @@ torture_param(int, fqs_duration, 0, "Duration of fqs bursts (us), 0 to disable"); torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)"); torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)"); -torture_param(int, fwd_progress, 1, "Test grace-period forward progress"); +torture_param(bool, fwd_progress, 1, "Test grace-period forward progress"); torture_param(int, fwd_progress_div, 4, "Fraction of CPU stall to wait"); torture_param(int, fwd_progress_holdoff, 60, "Time between forward-progress tests (s)"); @@ -146,7 +143,7 @@ static struct task_struct *stats_task; static struct task_struct *fqs_task; static struct task_struct *boost_tasks[NR_CPUS]; static struct task_struct *stall_task; -static struct task_struct **fwd_prog_tasks; +static struct task_struct *fwd_prog_task; static struct task_struct **barrier_cbs_tasks; static struct task_struct *barrier_task; static struct task_struct *read_exit_task; @@ -348,12 +345,10 @@ struct rcu_torture_ops { void (*gp_kthread_dbg)(void); bool (*check_boost_failed)(unsigned long gp_state, int *cpup); int (*stall_dur)(void); - long cbflood_max; int irq_capable; int can_boost; int extendables; int slow_gps; - int no_pi_lock; const char *name; }; @@ -675,7 +670,6 @@ static struct rcu_torture_ops srcu_ops = { .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, .irq_capable = 1, - .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .name = "srcu" }; @@ -709,7 +703,6 @@ static struct rcu_torture_ops srcud_ops = { .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, .irq_capable = 1, - .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .name = "srcud" }; @@ -730,7 +723,6 @@ static struct rcu_torture_ops busted_srcud_ops = { .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, .irq_capable = 1, - .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .extendables = RCUTORTURE_MAX_EXTEND, .name = "busted_srcud" }; @@ -842,7 +834,6 @@ static struct rcu_torture_ops tasks_rude_ops = { .call = call_rcu_tasks_rude, .cb_barrier = rcu_barrier_tasks_rude, .gp_kthread_dbg = show_rcu_tasks_rude_gp_kthread, - .cbflood_max = 50000, .fqs = NULL, .stats = NULL, .irq_capable = 1, @@ -883,7 +874,6 @@ static struct rcu_torture_ops tasks_tracing_ops = { .call = call_rcu_tasks_trace, .cb_barrier = rcu_barrier_tasks_trace, .gp_kthread_dbg = show_rcu_tasks_trace_gp_kthread, - .cbflood_max = 50000, .fqs = NULL, .stats = NULL, .irq_capable = 1, @@ -1433,15 +1423,13 @@ static void rcutorture_one_extend(int *readstate, int newstate, struct rt_read_seg *rtrsp) { unsigned long flags; - int idxnew1 = -1; - int idxnew2 = -1; - int idxold1 = *readstate; - int idxold2 = idxold1; + int idxnew = -1; + int idxold = *readstate; int statesnew = ~*readstate & newstate; int statesold = *readstate & ~newstate; - WARN_ON_ONCE(idxold2 < 0); - WARN_ON_ONCE((idxold2 >> RCUTORTURE_RDR_SHIFT_2) > 1); + WARN_ON_ONCE(idxold < 0); + WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1); rtrsp->rt_readstate = newstate; /* First, put new protection in place to avoid critical-section gap. */ @@ -1455,10 +1443,8 @@ static void rcutorture_one_extend(int *readstate, int newstate, preempt_disable(); if (statesnew & RCUTORTURE_RDR_SCHED) rcu_read_lock_sched(); - if (statesnew & RCUTORTURE_RDR_RCU_1) - idxnew1 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_1; - if (statesnew & RCUTORTURE_RDR_RCU_2) - idxnew2 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_2; + if (statesnew & RCUTORTURE_RDR_RCU) + idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT; /* * Next, remove old protection, in decreasing order of strength @@ -1477,20 +1463,12 @@ static void rcutorture_one_extend(int *readstate, int newstate, local_bh_enable(); if (statesold & RCUTORTURE_RDR_RBH) rcu_read_unlock_bh(); - if (statesold & RCUTORTURE_RDR_RCU_2) { - cur_ops->readunlock((idxold2 >> RCUTORTURE_RDR_SHIFT_2) & 0x1); - WARN_ON_ONCE(idxnew2 != -1); - idxold2 = 0; - } - if (statesold & RCUTORTURE_RDR_RCU_1) { - bool lockit; + if (statesold & RCUTORTURE_RDR_RCU) { + bool lockit = !statesnew && !(torture_random(trsp) & 0xffff); - lockit = !cur_ops->no_pi_lock && !statesnew && !(torture_random(trsp) & 0xffff); if (lockit) raw_spin_lock_irqsave(¤t->pi_lock, flags); - cur_ops->readunlock((idxold1 >> RCUTORTURE_RDR_SHIFT_1) & 0x1); - WARN_ON_ONCE(idxnew1 != -1); - idxold1 = 0; + cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT); if (lockit) raw_spin_unlock_irqrestore(¤t->pi_lock, flags); } @@ -1500,19 +1478,13 @@ static void rcutorture_one_extend(int *readstate, int newstate, cur_ops->read_delay(trsp, rtrsp); /* Update the reader state. */ - if (idxnew1 == -1) - idxnew1 = idxold1 & RCUTORTURE_RDR_MASK_1; - WARN_ON_ONCE(idxnew1 < 0); - if (WARN_ON_ONCE((idxnew1 >> RCUTORTURE_RDR_SHIFT_1) > 1)) - pr_info("Unexpected idxnew1 value of %#x\n", idxnew1); - if (idxnew2 == -1) - idxnew2 = idxold2 & RCUTORTURE_RDR_MASK_2; - WARN_ON_ONCE(idxnew2 < 0); - WARN_ON_ONCE((idxnew2 >> RCUTORTURE_RDR_SHIFT_2) > 1); - *readstate = idxnew1 | idxnew2 | newstate; - WARN_ON_ONCE(*readstate < 0); - if (WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT_2) > 1)) - pr_info("Unexpected idxnew2 value of %#x\n", idxnew2); + if (idxnew == -1) + idxnew = idxold & ~RCUTORTURE_RDR_MASK; + WARN_ON_ONCE(idxnew < 0); + WARN_ON_ONCE((idxnew >> RCUTORTURE_RDR_SHIFT) > 1); + *readstate = idxnew | newstate; + WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) < 0); + WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) > 1); } /* Return the biggest extendables mask given current RCU and boot parameters. */ @@ -1522,7 +1494,7 @@ static int rcutorture_extend_mask_max(void) WARN_ON_ONCE(extendables & ~RCUTORTURE_MAX_EXTEND); mask = extendables & RCUTORTURE_MAX_EXTEND & cur_ops->extendables; - mask = mask | RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2; + mask = mask | RCUTORTURE_RDR_RCU; return mask; } @@ -1537,21 +1509,13 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ; unsigned long bhs = RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH; - WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT_1); + WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT); /* Mostly only one bit (need preemption!), sometimes lots of bits. */ if (!(randmask1 & 0x7)) mask = mask & randmask2; else mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS)); - // Can't have nested RCU reader without outer RCU reader. - if (!(mask & RCUTORTURE_RDR_RCU_1) && (mask & RCUTORTURE_RDR_RCU_2)) { - if (oldmask & RCUTORTURE_RDR_RCU_1) - mask &= ~RCUTORTURE_RDR_RCU_2; - else - mask |= RCUTORTURE_RDR_RCU_1; - } - /* * Can't enable bh w/irq disabled. */ @@ -1571,7 +1535,7 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) mask |= oldmask & bhs; } - return mask ?: RCUTORTURE_RDR_RCU_1; + return mask ?: RCUTORTURE_RDR_RCU; } /* @@ -1665,7 +1629,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) rcu_torture_writer_state, cookie, cur_ops->get_gp_state()); rcutorture_one_extend(&readstate, 0, trsp, rtrsp); - WARN_ON_ONCE(readstate); + WARN_ON_ONCE(readstate & RCUTORTURE_RDR_MASK); // This next splat is expected behavior if leakpointer, especially // for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels. WARN_ON_ONCE(leakpointer && READ_ONCE(p->rtort_pipe_count) > 1); @@ -2031,8 +1995,9 @@ static int rcutorture_booster_init(unsigned int cpu) mutex_lock(&boost_mutex); rcu_torture_disable_rt_throttle(); VERBOSE_TOROUT_STRING("Creating rcu_torture_boost task"); - boost_tasks[cpu] = kthread_run_on_cpu(rcu_torture_boost, NULL, - cpu, "rcu_torture_boost_%u"); + boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL, + cpu_to_node(cpu), + "rcu_torture_boost"); if (IS_ERR(boost_tasks[cpu])) { retval = PTR_ERR(boost_tasks[cpu]); VERBOSE_TOROUT_STRING("rcu_torture_boost task create failed"); @@ -2041,6 +2006,8 @@ static int rcutorture_booster_init(unsigned int cpu) mutex_unlock(&boost_mutex); return retval; } + kthread_bind(boost_tasks[cpu], cpu); + wake_up_process(boost_tasks[cpu]); mutex_unlock(&boost_mutex); return 0; } @@ -2161,13 +2128,10 @@ struct rcu_fwd { unsigned long rcu_fwd_startat; struct rcu_launder_hist n_launders_hist[N_LAUNDERS_HIST]; unsigned long rcu_launder_gp_seq_start; - int rcu_fwd_id; }; static DEFINE_MUTEX(rcu_fwd_mutex); static struct rcu_fwd *rcu_fwds; -static unsigned long rcu_fwd_seq; -static atomic_long_t rcu_fwd_max_cbs; static bool rcu_fwd_emergency_stop; static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp) @@ -2180,9 +2144,8 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp) for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--) if (rfp->n_launders_hist[i].n_launders > 0) break; - mutex_lock(&rcu_fwd_mutex); // Serialize histograms. - pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):", - __func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat); + pr_alert("%s: Callback-invocation histogram (duration %lu jiffies):", + __func__, jiffies - rfp->rcu_fwd_startat); gps_old = rfp->rcu_launder_gp_seq_start; for (j = 0; j <= i; j++) { gps = rfp->n_launders_hist[j].launder_gp_seq; @@ -2193,7 +2156,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp) gps_old = gps; } pr_cont("\n"); - mutex_unlock(&rcu_fwd_mutex); } /* Callback function for continuous-flood RCU callbacks. */ @@ -2319,8 +2281,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp, cver = READ_ONCE(rcu_torture_current_version) - cver; gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps); WARN_ON(!cver && gps < 2); - pr_alert("%s: %d Duration %ld cver %ld gps %ld\n", __func__, - rfp->rcu_fwd_id, dur, cver, gps); + pr_alert("%s: Duration %ld cver %ld gps %ld\n", __func__, dur, cver, gps); } if (selfpropcb) { WRITE_ONCE(fcs.stop, 1); @@ -2388,7 +2349,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) rfp->rcu_fwd_cb_head = rfcpn; n_launders++; n_launders_sa++; - } else if (!cur_ops->cbflood_max || cur_ops->cbflood_max > n_max_cbs) { + } else { rfcp = kmalloc(sizeof(*rfcp), GFP_KERNEL); if (WARN_ON_ONCE(!rfcp)) { schedule_timeout_interruptible(1); @@ -2398,11 +2359,8 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) n_launders_sa = 0; rfcp->rfc_gps = 0; rfcp->rfc_rfp = rfp; - } else { - rfcp = NULL; } - if (rfcp) - cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr); + cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr); rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs); if (tick_nohz_full_enabled()) { local_irq_save(flags); @@ -2426,7 +2384,6 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) n_launders + n_max_cbs - n_launders_cb_snap, n_launders, n_launders_sa, n_max_gps, n_max_cbs, cver, gps); - atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs); rcu_torture_fwd_cb_hist(rfp); } schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */ @@ -2442,8 +2399,6 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) static int rcutorture_oom_notify(struct notifier_block *self, unsigned long notused, void *nfreed) { - int i; - long ncbs; struct rcu_fwd *rfp; mutex_lock(&rcu_fwd_mutex); @@ -2454,26 +2409,18 @@ static int rcutorture_oom_notify(struct notifier_block *self, } WARN(1, "%s invoked upon OOM during forward-progress testing.\n", __func__); - for (i = 0; i < fwd_progress; i++) { - rcu_torture_fwd_cb_hist(&rfp[i]); - rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rfp[i].rcu_fwd_startat)) / 2); - } + rcu_torture_fwd_cb_hist(rfp); + rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rfp->rcu_fwd_startat)) / 2); WRITE_ONCE(rcu_fwd_emergency_stop, true); smp_mb(); /* Emergency stop before free and wait to avoid hangs. */ - ncbs = 0; - for (i = 0; i < fwd_progress; i++) - ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]); - pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs); + pr_info("%s: Freed %lu RCU callbacks.\n", + __func__, rcu_torture_fwd_prog_cbfree(rfp)); rcu_barrier(); - ncbs = 0; - for (i = 0; i < fwd_progress; i++) - ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]); - pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs); + pr_info("%s: Freed %lu RCU callbacks.\n", + __func__, rcu_torture_fwd_prog_cbfree(rfp)); rcu_barrier(); - ncbs = 0; - for (i = 0; i < fwd_progress; i++) - ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]); - pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs); + pr_info("%s: Freed %lu RCU callbacks.\n", + __func__, rcu_torture_fwd_prog_cbfree(rfp)); smp_mb(); /* Frees before return to avoid redoing OOM. */ (*(unsigned long *)nfreed)++; /* Forward progress CBs freed! */ pr_info("%s returning after OOM processing.\n", __func__); @@ -2488,10 +2435,7 @@ static struct notifier_block rcutorture_oom_nb = { /* Carry out grace-period forward-progress testing. */ static int rcu_torture_fwd_prog(void *args) { - bool firsttime = true; - long max_cbs; int oldnice = task_nice(current); - unsigned long oldseq = READ_ONCE(rcu_fwd_seq); struct rcu_fwd *rfp = args; int tested = 0; int tested_tries = 0; @@ -2501,38 +2445,21 @@ static int rcu_torture_fwd_prog(void *args) if (!IS_ENABLED(CONFIG_SMP) || !IS_ENABLED(CONFIG_RCU_BOOST)) set_user_nice(current, MAX_NICE); do { - if (!rfp->rcu_fwd_id) { - schedule_timeout_interruptible(fwd_progress_holdoff * HZ); - WRITE_ONCE(rcu_fwd_emergency_stop, false); - if (!firsttime) { - max_cbs = atomic_long_xchg(&rcu_fwd_max_cbs, 0); - pr_alert("%s n_max_cbs: %ld\n", __func__, max_cbs); - } - firsttime = false; - WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1); - } else { - while (READ_ONCE(rcu_fwd_seq) == oldseq) - schedule_timeout_interruptible(1); - oldseq = READ_ONCE(rcu_fwd_seq); - } - pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id); - if (rcu_inkernel_boot_has_ended() && torture_num_online_cpus() > rfp->rcu_fwd_id) - rcu_torture_fwd_prog_cr(rfp); - if ((cur_ops->stall_dur && cur_ops->stall_dur() > 0) && - (!IS_ENABLED(CONFIG_TINY_RCU) || - (rcu_inkernel_boot_has_ended() && - torture_num_online_cpus() > rfp->rcu_fwd_id))) + schedule_timeout_interruptible(fwd_progress_holdoff * HZ); + WRITE_ONCE(rcu_fwd_emergency_stop, false); + if (!IS_ENABLED(CONFIG_TINY_RCU) || + rcu_inkernel_boot_has_ended()) rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries); + if (rcu_inkernel_boot_has_ended()) + rcu_torture_fwd_prog_cr(rfp); /* Avoid slow periods, better to test when busy. */ if (stutter_wait("rcu_torture_fwd_prog")) sched_set_normal(current, oldnice); } while (!torture_must_stop()); /* Short runs might not contain a valid forward-progress attempt. */ - if (!rfp->rcu_fwd_id) { - WARN_ON(!tested && tested_tries >= 5); - pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries); - } + WARN_ON(!tested && tested_tries >= 5); + pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries); torture_kthread_stopping("rcu_torture_fwd_prog"); return 0; } @@ -2540,29 +2467,18 @@ static int rcu_torture_fwd_prog(void *args) /* If forward-progress checking is requested and feasible, spawn the thread. */ static int __init rcu_torture_fwd_prog_init(void) { - int i; - int ret = 0; struct rcu_fwd *rfp; if (!fwd_progress) return 0; /* Not requested, so don't do it. */ - if (fwd_progress >= nr_cpu_ids) { - VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Limiting fwd_progress to # CPUs.\n"); - fwd_progress = nr_cpu_ids; - } else if (fwd_progress < 0) { - fwd_progress = nr_cpu_ids; - } if ((!cur_ops->sync && !cur_ops->call) || - (!cur_ops->cbflood_max && (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0)) || - cur_ops == &rcu_busted_ops) { + !cur_ops->stall_dur || cur_ops->stall_dur() <= 0 || cur_ops == &rcu_busted_ops) { VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, unsupported by RCU flavor under test"); - fwd_progress = 0; return 0; } if (stall_cpu > 0) { VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, conflicts with CPU-stall testing"); - fwd_progress = 0; - if (IS_MODULE(CONFIG_RCU_TORTURE_TEST)) + if (IS_MODULE(CONFIG_RCU_TORTURE_TESTS)) return -EINVAL; /* In module, can fail back to user. */ WARN_ON(1); /* Make sure rcutorture notices conflict. */ return 0; @@ -2571,51 +2487,29 @@ static int __init rcu_torture_fwd_prog_init(void) fwd_progress_holdoff = 1; if (fwd_progress_div <= 0) fwd_progress_div = 4; - rfp = kcalloc(fwd_progress, sizeof(*rfp), GFP_KERNEL); - fwd_prog_tasks = kcalloc(fwd_progress, sizeof(*fwd_prog_tasks), GFP_KERNEL); - if (!rfp || !fwd_prog_tasks) { - kfree(rfp); - kfree(fwd_prog_tasks); - fwd_prog_tasks = NULL; - fwd_progress = 0; + rfp = kzalloc(sizeof(*rfp), GFP_KERNEL); + if (!rfp) return -ENOMEM; - } - for (i = 0; i < fwd_progress; i++) { - spin_lock_init(&rfp[i].rcu_fwd_lock); - rfp[i].rcu_fwd_cb_tail = &rfp[i].rcu_fwd_cb_head; - rfp[i].rcu_fwd_id = i; - } + spin_lock_init(&rfp->rcu_fwd_lock); + rfp->rcu_fwd_cb_tail = &rfp->rcu_fwd_cb_head; mutex_lock(&rcu_fwd_mutex); rcu_fwds = rfp; mutex_unlock(&rcu_fwd_mutex); register_oom_notifier(&rcutorture_oom_nb); - for (i = 0; i < fwd_progress; i++) { - ret = torture_create_kthread(rcu_torture_fwd_prog, &rcu_fwds[i], fwd_prog_tasks[i]); - if (ret) { - fwd_progress = i; - return ret; - } - } - return 0; + return torture_create_kthread(rcu_torture_fwd_prog, rfp, fwd_prog_task); } static void rcu_torture_fwd_prog_cleanup(void) { - int i; struct rcu_fwd *rfp; - if (!rcu_fwds || !fwd_prog_tasks) - return; - for (i = 0; i < fwd_progress; i++) - torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_tasks[i]); - unregister_oom_notifier(&rcutorture_oom_nb); - mutex_lock(&rcu_fwd_mutex); + torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task); rfp = rcu_fwds; + mutex_lock(&rcu_fwd_mutex); rcu_fwds = NULL; mutex_unlock(&rcu_fwd_mutex); + unregister_oom_notifier(&rcutorture_oom_nb); kfree(rfp); - kfree(fwd_prog_tasks); - fwd_prog_tasks = NULL; } /* Callback function for RCU barrier testing. */ @@ -2852,7 +2746,7 @@ static int rcu_torture_read_exit(void *unused) &trs, "%s", "rcu_torture_read_exit_child"); if (IS_ERR(tsp)) { - TOROUT_ERRSTRING("out of memory"); + VERBOSE_TOROUT_ERRSTRING("out of memory"); errexit = true; tsp = NULL; break; @@ -2876,7 +2770,7 @@ static int rcu_torture_read_exit(void *unused) static int rcu_torture_read_exit_init(void) { if (read_exit_burst <= 0) - return 0; + return -EINVAL; init_waitqueue_head(&read_exit_wq); read_exit_child_stop = false; read_exit_child_stopped = false; @@ -2954,7 +2848,7 @@ rcu_torture_cleanup(void) rcutorture_seq_diff(gp_seq, start_gp_seq)); torture_stop_kthread(rcu_torture_stats, stats_task); torture_stop_kthread(rcu_torture_fqs, fqs_task); - if (rcu_torture_can_boost() && rcutor_hp >= 0) + if (rcu_torture_can_boost()) cpuhp_remove_state(rcutor_hp); /* @@ -3172,14 +3066,14 @@ rcu_torture_init(void) rcu_torture_write_types(); firsterr = torture_create_kthread(rcu_torture_writer, NULL, writer_task); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; if (nfakewriters > 0) { fakewriter_tasks = kcalloc(nfakewriters, sizeof(fakewriter_tasks[0]), GFP_KERNEL); if (fakewriter_tasks == NULL) { - TOROUT_ERRSTRING("out of memory"); + VERBOSE_TOROUT_ERRSTRING("out of memory"); firsterr = -ENOMEM; goto unwind; } @@ -3187,7 +3081,7 @@ rcu_torture_init(void) for (i = 0; i < nfakewriters; i++) { firsterr = torture_create_kthread(rcu_torture_fakewriter, NULL, fakewriter_tasks[i]); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]), @@ -3195,7 +3089,7 @@ rcu_torture_init(void) rcu_torture_reader_mbchk = kcalloc(nrealreaders, sizeof(*rcu_torture_reader_mbchk), GFP_KERNEL); if (!reader_tasks || !rcu_torture_reader_mbchk) { - TOROUT_ERRSTRING("out of memory"); + VERBOSE_TOROUT_ERRSTRING("out of memory"); firsterr = -ENOMEM; goto unwind; } @@ -3203,7 +3097,7 @@ rcu_torture_init(void) rcu_torture_reader_mbchk[i].rtc_chkrdr = -1; firsterr = torture_create_kthread(rcu_torture_reader, (void *)i, reader_tasks[i]); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } nrealnocbers = nocbs_nthreads; @@ -3214,7 +3108,7 @@ rcu_torture_init(void) if (nrealnocbers > 0) { nocb_tasks = kcalloc(nrealnocbers, sizeof(nocb_tasks[0]), GFP_KERNEL); if (nocb_tasks == NULL) { - TOROUT_ERRSTRING("out of memory"); + VERBOSE_TOROUT_ERRSTRING("out of memory"); firsterr = -ENOMEM; goto unwind; } @@ -3223,18 +3117,18 @@ rcu_torture_init(void) } for (i = 0; i < nrealnocbers; i++) { firsterr = torture_create_kthread(rcu_nocb_toggle, NULL, nocb_tasks[i]); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (stat_interval > 0) { firsterr = torture_create_kthread(rcu_torture_stats, NULL, stats_task); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (test_no_idle_hz && shuffle_interval > 0) { firsterr = torture_shuffle_init(shuffle_interval * HZ); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (stutter < 0) @@ -3244,7 +3138,7 @@ rcu_torture_init(void) t = cur_ops->stall_dur ? cur_ops->stall_dur() : stutter * HZ; firsterr = torture_stutter_init(stutter * HZ, t); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (fqs_duration < 0) @@ -3253,7 +3147,7 @@ rcu_torture_init(void) /* Create the fqs thread */ firsterr = torture_create_kthread(rcu_torture_fqs, NULL, fqs_task); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (test_boost_interval < 1) @@ -3267,9 +3161,9 @@ rcu_torture_init(void) firsterr = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "RCU_TORTURE", rcutorture_booster_init, rcutorture_booster_cleanup); - rcutor_hp = firsterr; - if (torture_init_error(firsterr)) + if (firsterr < 0) goto unwind; + rcutor_hp = firsterr; // Testing RCU priority boosting requires rcutorture do // some serious abuse. Counter this by running ksoftirqd @@ -3288,23 +3182,23 @@ rcu_torture_init(void) } shutdown_jiffies = jiffies + shutdown_secs * HZ; firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval, rcutorture_sync); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; firsterr = rcu_torture_stall_init(); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; firsterr = rcu_torture_fwd_prog_init(); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; firsterr = rcu_torture_barrier_init(); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; firsterr = rcu_torture_read_exit_init(); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; if (object_debug) rcu_test_debug_objects(); diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index 5489ff7f47..66dc14cf56 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -44,10 +44,7 @@ pr_alert("%s" SCALE_FLAG s, scale_type, ## x) #define VERBOSE_SCALEOUT(s, x...) \ - do { \ - if (verbose) \ - pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ - } while (0) + do { if (verbose) pr_alert("%s" SCALE_FLAG s, scale_type, ## x); } while (0) static atomic_t verbose_batch_ctr; @@ -57,11 +54,12 @@ do { \ (verbose_batched <= 0 || \ !(atomic_inc_return(&verbose_batch_ctr) % verbose_batched))) { \ schedule_timeout_uninterruptible(1); \ - pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ + pr_alert("%s" SCALE_FLAG s, scale_type, ## x); \ } \ } while (0) -#define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x) +#define VERBOSE_SCALEOUT_ERRSTRING(s, x...) \ + do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! " s, scale_type, ## x); } while (0) MODULE_LICENSE("GPL"); MODULE_AUTHOR("Joel Fernandes (Google) "); @@ -606,7 +604,7 @@ static u64 process_durations(int n) char *buf; u64 sum = 0; - buf = kmalloc(800 + 64, GFP_KERNEL); + buf = kmalloc(128 + nreaders * 32, GFP_KERNEL); if (!buf) return 0; buf[0] = 0; @@ -619,15 +617,13 @@ static u64 process_durations(int n) if (i % 5 == 0) strcat(buf, "\n"); - if (strlen(buf) >= 800) { - pr_alert("%s", buf); - buf[0] = 0; - } strcat(buf, buf1); sum += rt->last_duration_ns; } - pr_alert("%s\n", buf); + strcat(buf, "\n"); + + SCALEOUT("%s\n", buf); kfree(buf); return sum; @@ -641,6 +637,7 @@ static u64 process_durations(int n) // point all the timestamps are printed. static int main_func(void *arg) { + bool errexit = false; int exp, r; char buf1[64]; char *buf; @@ -651,10 +648,10 @@ static int main_func(void *arg) VERBOSE_SCALEOUT("main_func task started"); result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL); - buf = kzalloc(800 + 64, GFP_KERNEL); + buf = kzalloc(64 + nruns * 32, GFP_KERNEL); if (!result_avg || !buf) { - SCALEOUT_ERRSTRING("out of memory"); - goto oom_exit; + VERBOSE_SCALEOUT_ERRSTRING("out of memory"); + errexit = true; } if (holdoff) schedule_timeout_interruptible(holdoff * HZ); @@ -666,6 +663,8 @@ static int main_func(void *arg) // Start exp readers up per experiment for (exp = 0; exp < nruns && !torture_must_stop(); exp++) { + if (errexit) + break; if (torture_must_stop()) goto end; @@ -699,23 +698,26 @@ static int main_func(void *arg) // Print the average of all experiments SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n"); - pr_alert("Runs\tTime(ns)\n"); + if (!errexit) { + buf[0] = 0; + strcat(buf, "\n"); + strcat(buf, "Runs\tTime(ns)\n"); + } + for (exp = 0; exp < nruns; exp++) { u64 avg; u32 rem; + if (errexit) + break; avg = div_u64_rem(result_avg[exp], 1000, &rem); sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem); strcat(buf, buf1); - if (strlen(buf) >= 800) { - pr_alert("%s", buf); - buf[0] = 0; - } } - pr_alert("%s", buf); + if (!errexit) + SCALEOUT("%s", buf); -oom_exit: // This will shutdown everything including us. if (shutdown) { shutdown_start = 1; @@ -822,7 +824,7 @@ ref_scale_init(void) init_waitqueue_head(&shutdown_wq); firsterr = torture_create_kthread(ref_scale_shutdown, NULL, shutdown_task); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; schedule_timeout_uninterruptible(1); } @@ -839,17 +841,17 @@ ref_scale_init(void) reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]), GFP_KERNEL); if (!reader_tasks) { - SCALEOUT_ERRSTRING("out of memory"); + VERBOSE_SCALEOUT_ERRSTRING("out of memory"); firsterr = -ENOMEM; goto unwind; } - VERBOSE_SCALEOUT("Starting %d reader threads", nreaders); + VERBOSE_SCALEOUT("Starting %d reader threads\n", nreaders); for (i = 0; i < nreaders; i++) { firsterr = torture_create_kthread(ref_scale_reader, (void *)i, reader_tasks[i].task); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; init_waitqueue_head(&(reader_tasks[i].wq)); @@ -858,7 +860,7 @@ ref_scale_init(void) // Main Task init_waitqueue_head(&main_wq); firsterr = torture_create_kthread(main_func, NULL, main_task); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; torture_init_end(); diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 92c002d654..a0ba2ed49b 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -99,7 +99,7 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) int newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1; WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); - if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task()) + if (!newval && READ_ONCE(ssp->srcu_gp_waiting)) swake_up_one(&ssp->srcu_wq); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index d64f0b1d8c..6591914af4 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -6,7 +6,6 @@ */ #ifdef CONFIG_TASKS_RCU_GENERIC -#include "rcu_segcblist.h" //////////////////////////////////////////////////////////////////////// // @@ -20,34 +19,12 @@ typedef void (*postscan_func_t)(struct list_head *hop); typedef void (*holdouts_func_t)(struct list_head *hop, bool ndrpt, bool *frptp); typedef void (*postgp_func_t)(struct rcu_tasks *rtp); -/** - * struct rcu_tasks_percpu - Per-CPU component of definition for a Tasks-RCU-like mechanism. - * @cblist: Callback list. - * @lock: Lock protecting per-CPU callback list. - * @rtp_jiffies: Jiffies counter value for statistics. - * @rtp_n_lock_retries: Rough lock-contention statistic. - * @rtp_work: Work queue for invoking callbacks. - * @rtp_irq_work: IRQ work queue for deferred wakeups. - * @barrier_q_head: RCU callback for barrier operation. - * @cpu: CPU number corresponding to this entry. - * @rtpp: Pointer to the rcu_tasks structure. - */ -struct rcu_tasks_percpu { - struct rcu_segcblist cblist; - raw_spinlock_t __private lock; - unsigned long rtp_jiffies; - unsigned long rtp_n_lock_retries; - struct work_struct rtp_work; - struct irq_work rtp_irq_work; - struct rcu_head barrier_q_head; - int cpu; - struct rcu_tasks *rtpp; -}; - /** * struct rcu_tasks - Definition for a Tasks-RCU-like mechanism. + * @cbs_head: Head of callback list. + * @cbs_tail: Tail pointer for callback list. * @cbs_wq: Wait queue allowing new callback to get kthread's attention. - * @cbs_gbl_lock: Lock protecting callback list. + * @cbs_lock: Lock protecting callback list. * @kthread_ptr: This flavor's grace-period/callback-invocation kthread. * @gp_func: This flavor's grace-period-wait function. * @gp_state: Grace period's most recent state transition (debugging). @@ -55,7 +32,7 @@ struct rcu_tasks_percpu { * @init_fract: Initial backoff sleep interval. * @gp_jiffies: Time of last @gp_state transition. * @gp_start: Most recent grace-period start in jiffies. - * @tasks_gp_seq: Number of grace periods completed since boot. + * @n_gps: Number of grace periods completed since boot. * @n_ipis: Number of IPIs sent to encourage grace periods to end. * @n_ipis_fails: Number of IPI-send failures. * @pregp_func: This flavor's pre-grace-period function (optional). @@ -64,27 +41,20 @@ struct rcu_tasks_percpu { * @holdouts_func: This flavor's holdout-list scan function (optional). * @postgp_func: This flavor's post-grace-period function (optional). * @call_func: This flavor's call_rcu()-equivalent function. - * @rtpcpu: This flavor's rcu_tasks_percpu structure. - * @percpu_enqueue_shift: Shift down CPU ID this much when enqueuing callbacks. - * @percpu_enqueue_lim: Number of per-CPU callback queues in use for enqueuing. - * @percpu_dequeue_lim: Number of per-CPU callback queues in use for dequeuing. - * @percpu_dequeue_gpseq: RCU grace-period number to propagate enqueue limit to dequeuers. - * @barrier_q_mutex: Serialize barrier operations. - * @barrier_q_count: Number of queues being waited on. - * @barrier_q_completion: Barrier wait/wakeup mechanism. - * @barrier_q_seq: Sequence number for barrier operations. * @name: This flavor's textual name. * @kname: This flavor's kthread name. */ struct rcu_tasks { + struct rcu_head *cbs_head; + struct rcu_head **cbs_tail; struct wait_queue_head cbs_wq; - raw_spinlock_t cbs_gbl_lock; + raw_spinlock_t cbs_lock; int gp_state; int gp_sleep; int init_fract; unsigned long gp_jiffies; unsigned long gp_start; - unsigned long tasks_gp_seq; + unsigned long n_gps; unsigned long n_ipis; unsigned long n_ipis_fails; struct task_struct *kthread_ptr; @@ -95,40 +65,20 @@ struct rcu_tasks { holdouts_func_t holdouts_func; postgp_func_t postgp_func; call_rcu_func_t call_func; - struct rcu_tasks_percpu __percpu *rtpcpu; - int percpu_enqueue_shift; - int percpu_enqueue_lim; - int percpu_dequeue_lim; - unsigned long percpu_dequeue_gpseq; - struct mutex barrier_q_mutex; - atomic_t barrier_q_count; - struct completion barrier_q_completion; - unsigned long barrier_q_seq; char *name; char *kname; }; -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp); - -#define DEFINE_RCU_TASKS(rt_name, gp, call, n) \ -static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock), \ - .rtp_irq_work = IRQ_WORK_INIT(call_rcu_tasks_iw_wakeup), \ -}; \ -static struct rcu_tasks rt_name = \ -{ \ - .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \ - .cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock), \ - .gp_func = gp, \ - .call_func = call, \ - .rtpcpu = &rt_name ## __percpu, \ - .name = n, \ - .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1, \ - .percpu_enqueue_lim = 1, \ - .percpu_dequeue_lim = 1, \ - .barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \ - .barrier_q_seq = (0UL - 50UL) << RCU_SEQ_CTR_SHIFT, \ - .kname = #rt_name, \ +#define DEFINE_RCU_TASKS(rt_name, gp, call, n) \ +static struct rcu_tasks rt_name = \ +{ \ + .cbs_tail = &rt_name.cbs_head, \ + .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \ + .cbs_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_lock), \ + .gp_func = gp, \ + .call_func = call, \ + .name = n, \ + .kname = #rt_name, \ } /* Track exiting tasks in order to allow them to be waited for. */ @@ -144,15 +94,6 @@ module_param(rcu_task_ipi_delay, int, 0644); static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT; module_param(rcu_task_stall_timeout, int, 0644); -static int rcu_task_enqueue_lim __read_mostly = -1; -module_param(rcu_task_enqueue_lim, int, 0444); - -static bool rcu_task_cb_adjust; -static int rcu_task_contend_lim __read_mostly = 100; -module_param(rcu_task_contend_lim, int, 0444); -static int rcu_task_collapse_lim __read_mostly = 10; -module_param(rcu_task_collapse_lim, int, 0444); - /* RCU tasks grace-period state for debugging. */ #define RTGS_INIT 0 #define RTGS_WAIT_WAIT_CBS 1 @@ -187,8 +128,6 @@ static const char * const rcu_tasks_gp_state_names[] = { // // Generic code. -static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp); - /* Record grace-period phase and time. */ static void set_tasks_gp_state(struct rcu_tasks *rtp, int newstate) { @@ -209,110 +148,23 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp) } #endif /* #ifndef CONFIG_TINY_RCU */ -// Initialize per-CPU callback lists for the specified flavor of -// Tasks RCU. -static void cblist_init_generic(struct rcu_tasks *rtp) -{ - int cpu; - unsigned long flags; - int lim; - int shift; - - raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); - if (rcu_task_enqueue_lim < 0) { - rcu_task_enqueue_lim = 1; - rcu_task_cb_adjust = true; - pr_info("%s: Setting adjustable number of callback queues.\n", __func__); - } else if (rcu_task_enqueue_lim == 0) { - rcu_task_enqueue_lim = 1; - } - lim = rcu_task_enqueue_lim; - - if (lim > nr_cpu_ids) - lim = nr_cpu_ids; - shift = ilog2(nr_cpu_ids / lim); - if (((nr_cpu_ids - 1) >> shift) >= lim) - shift++; - WRITE_ONCE(rtp->percpu_enqueue_shift, shift); - WRITE_ONCE(rtp->percpu_dequeue_lim, lim); - smp_store_release(&rtp->percpu_enqueue_lim, lim); - for_each_possible_cpu(cpu) { - struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); - - WARN_ON_ONCE(!rtpcp); - if (cpu) - raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock)); - raw_spin_lock_rcu_node(rtpcp); // irqs already disabled. - if (rcu_segcblist_empty(&rtpcp->cblist)) - rcu_segcblist_init(&rtpcp->cblist); - INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq); - rtpcp->cpu = cpu; - rtpcp->rtpp = rtp; - raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled. - } - raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); - pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim)); -} - -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic(). -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{ - struct rcu_tasks *rtp; - struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work); - - rtp = rtpcp->rtpp; - wake_up(&rtp->cbs_wq); -} - // Enqueue a callback for the specified flavor of Tasks RCU. static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, struct rcu_tasks *rtp) { unsigned long flags; - unsigned long j; - bool needadjust = false; bool needwake; - struct rcu_tasks_percpu *rtpcp; rhp->next = NULL; rhp->func = func; - local_irq_save(flags); - rcu_read_lock(); - rtpcp = per_cpu_ptr(rtp->rtpcpu, - smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift)); - if (!raw_spin_trylock_rcu_node(rtpcp)) { // irqs already disabled. - raw_spin_lock_rcu_node(rtpcp); // irqs already disabled. - j = jiffies; - if (rtpcp->rtp_jiffies != j) { - rtpcp->rtp_jiffies = j; - rtpcp->rtp_n_lock_retries = 0; - } - if (rcu_task_cb_adjust && ++rtpcp->rtp_n_lock_retries > rcu_task_contend_lim && - READ_ONCE(rtp->percpu_enqueue_lim) != nr_cpu_ids) - needadjust = true; // Defer adjustment to avoid deadlock. - } - if (!rcu_segcblist_is_enabled(&rtpcp->cblist)) { - raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled. - cblist_init_generic(rtp); - raw_spin_lock_rcu_node(rtpcp); // irqs already disabled. - } - needwake = rcu_segcblist_empty(&rtpcp->cblist); - rcu_segcblist_enqueue(&rtpcp->cblist, rhp); - raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); - if (unlikely(needadjust)) { - raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); - if (rtp->percpu_enqueue_lim != nr_cpu_ids) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); - WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids); - smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids); - pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name); - } - raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); - } - rcu_read_unlock(); + raw_spin_lock_irqsave(&rtp->cbs_lock, flags); + needwake = !rtp->cbs_head; + WRITE_ONCE(*rtp->cbs_tail, rhp); + rtp->cbs_tail = &rhp->next; + raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags); /* We can't create the thread unless interrupts are enabled. */ if (needwake && READ_ONCE(rtp->kthread_ptr)) - irq_work_queue(&rtpcp->rtp_irq_work); + wake_up(&rtp->cbs_wq); } // Wait for a grace period for the specified flavor of Tasks RCU. @@ -326,173 +178,12 @@ static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp) wait_rcu_gp(rtp->call_func); } -// RCU callback function for rcu_barrier_tasks_generic(). -static void rcu_barrier_tasks_generic_cb(struct rcu_head *rhp) -{ - struct rcu_tasks *rtp; - struct rcu_tasks_percpu *rtpcp; - - rtpcp = container_of(rhp, struct rcu_tasks_percpu, barrier_q_head); - rtp = rtpcp->rtpp; - if (atomic_dec_and_test(&rtp->barrier_q_count)) - complete(&rtp->barrier_q_completion); -} - -// Wait for all in-flight callbacks for the specified RCU Tasks flavor. -// Operates in a manner similar to rcu_barrier(). -static void rcu_barrier_tasks_generic(struct rcu_tasks *rtp) -{ - int cpu; - unsigned long flags; - struct rcu_tasks_percpu *rtpcp; - unsigned long s = rcu_seq_snap(&rtp->barrier_q_seq); - - mutex_lock(&rtp->barrier_q_mutex); - if (rcu_seq_done(&rtp->barrier_q_seq, s)) { - smp_mb(); - mutex_unlock(&rtp->barrier_q_mutex); - return; - } - rcu_seq_start(&rtp->barrier_q_seq); - init_completion(&rtp->barrier_q_completion); - atomic_set(&rtp->barrier_q_count, 2); - for_each_possible_cpu(cpu) { - if (cpu >= smp_load_acquire(&rtp->percpu_dequeue_lim)) - break; - rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); - rtpcp->barrier_q_head.func = rcu_barrier_tasks_generic_cb; - raw_spin_lock_irqsave_rcu_node(rtpcp, flags); - if (rcu_segcblist_entrain(&rtpcp->cblist, &rtpcp->barrier_q_head)) - atomic_inc(&rtp->barrier_q_count); - raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); - } - if (atomic_sub_and_test(2, &rtp->barrier_q_count)) - complete(&rtp->barrier_q_completion); - wait_for_completion(&rtp->barrier_q_completion); - rcu_seq_end(&rtp->barrier_q_seq); - mutex_unlock(&rtp->barrier_q_mutex); -} - -// Advance callbacks and indicate whether either a grace period or -// callback invocation is needed. -static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp) -{ - int cpu; - unsigned long flags; - long n; - long ncbs = 0; - long ncbsnz = 0; - int needgpcb = 0; - - for (cpu = 0; cpu < smp_load_acquire(&rtp->percpu_dequeue_lim); cpu++) { - struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); - - /* Advance and accelerate any new callbacks. */ - if (!rcu_segcblist_n_cbs(&rtpcp->cblist)) - continue; - raw_spin_lock_irqsave_rcu_node(rtpcp, flags); - // Should we shrink down to a single callback queue? - n = rcu_segcblist_n_cbs(&rtpcp->cblist); - if (n) { - ncbs += n; - if (cpu > 0) - ncbsnz += n; - } - rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq)); - (void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq)); - if (rcu_segcblist_pend_cbs(&rtpcp->cblist)) - needgpcb |= 0x3; - if (!rcu_segcblist_empty(&rtpcp->cblist)) - needgpcb |= 0x1; - raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); - } - - // Shrink down to a single callback queue if appropriate. - // This is done in two stages: (1) If there are no more than - // rcu_task_collapse_lim callbacks on CPU 0 and none on any other - // CPU, limit enqueueing to CPU 0. (2) After an RCU grace period, - // if there has not been an increase in callbacks, limit dequeuing - // to CPU 0. Note the matching RCU read-side critical section in - // call_rcu_tasks_generic(). - if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) { - raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); - if (rtp->percpu_enqueue_lim > 1) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); - smp_store_release(&rtp->percpu_enqueue_lim, 1); - rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu(); - pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name); - } - raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); - } - if (rcu_task_cb_adjust && !ncbsnz && - poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq)) { - raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); - if (rtp->percpu_enqueue_lim < rtp->percpu_dequeue_lim) { - WRITE_ONCE(rtp->percpu_dequeue_lim, 1); - pr_info("Completing switch %s to CPU-0 callback queuing.\n", rtp->name); - } - raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); - } - - return needgpcb; -} - -// Advance callbacks and invoke any that are ready. -static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu *rtpcp) -{ - int cpu; - int cpunext; - unsigned long flags; - int len; - struct rcu_head *rhp; - struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); - struct rcu_tasks_percpu *rtpcp_next; - - cpu = rtpcp->cpu; - cpunext = cpu * 2 + 1; - if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) { - rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext); - queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work); - cpunext++; - if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) { - rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext); - queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work); - } - } - - if (rcu_segcblist_empty(&rtpcp->cblist)) - return; - raw_spin_lock_irqsave_rcu_node(rtpcp, flags); - rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq)); - rcu_segcblist_extract_done_cbs(&rtpcp->cblist, &rcl); - raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); - len = rcl.len; - for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = rcu_cblist_dequeue(&rcl)) { - local_bh_disable(); - rhp->func(rhp); - local_bh_enable(); - cond_resched(); - } - raw_spin_lock_irqsave_rcu_node(rtpcp, flags); - rcu_segcblist_add_len(&rtpcp->cblist, -len); - (void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq)); - raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); -} - -// Workqueue flood to advance callbacks and invoke any that are ready. -static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp) -{ - struct rcu_tasks *rtp; - struct rcu_tasks_percpu *rtpcp = container_of(wp, struct rcu_tasks_percpu, rtp_work); - - rtp = rtpcp->rtpp; - rcu_tasks_invoke_cbs(rtp, rtpcp); -} - /* RCU-tasks kthread that detects grace periods and invokes callbacks. */ static int __noreturn rcu_tasks_kthread(void *arg) { - int needgpcb; + unsigned long flags; + struct rcu_head *list; + struct rcu_head *next; struct rcu_tasks *rtp = arg; /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ @@ -508,22 +199,42 @@ static int __noreturn rcu_tasks_kthread(void *arg) for (;;) { set_tasks_gp_state(rtp, RTGS_WAIT_CBS); - /* If there were none, wait a bit and start over. */ - wait_event_idle(rtp->cbs_wq, (needgpcb = rcu_tasks_need_gpcb(rtp))); + /* Pick up any new callbacks. */ + raw_spin_lock_irqsave(&rtp->cbs_lock, flags); + smp_mb__after_spinlock(); // Order updates vs. GP. + list = rtp->cbs_head; + rtp->cbs_head = NULL; + rtp->cbs_tail = &rtp->cbs_head; + raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags); - if (needgpcb & 0x2) { - // Wait for one grace period. - set_tasks_gp_state(rtp, RTGS_WAIT_GP); - rtp->gp_start = jiffies; - rcu_seq_start(&rtp->tasks_gp_seq); - rtp->gp_func(rtp); - rcu_seq_end(&rtp->tasks_gp_seq); + /* If there were none, wait a bit and start over. */ + if (!list) { + wait_event_interruptible(rtp->cbs_wq, + READ_ONCE(rtp->cbs_head)); + if (!rtp->cbs_head) { + WARN_ON(signal_pending(current)); + set_tasks_gp_state(rtp, RTGS_WAIT_WAIT_CBS); + schedule_timeout_idle(HZ/10); + } + continue; } - /* Invoke callbacks. */ - set_tasks_gp_state(rtp, RTGS_INVOKE_CBS); - rcu_tasks_invoke_cbs(rtp, per_cpu_ptr(rtp->rtpcpu, 0)); + // Wait for one grace period. + set_tasks_gp_state(rtp, RTGS_WAIT_GP); + rtp->gp_start = jiffies; + rtp->gp_func(rtp); + rtp->n_gps++; + /* Invoke the callbacks. */ + set_tasks_gp_state(rtp, RTGS_INVOKE_CBS); + while (list) { + next = list->next; + local_bh_disable(); + list->func(list); + local_bh_enable(); + list = next; + cond_resched(); + } /* Paranoid sleep to keep this from entering a tight loop */ schedule_timeout_idle(rtp->gp_sleep); } @@ -568,15 +279,14 @@ static void __init rcu_tasks_bootup_oddness(void) /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s) { - struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, 0); // for_each... pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n", rtp->kname, tasks_gp_state_getname(rtp), data_race(rtp->gp_state), jiffies - data_race(rtp->gp_jiffies), - data_race(rcu_seq_current(&rtp->tasks_gp_seq)), + data_race(rtp->n_gps), data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis), ".k"[!!data_race(rtp->kthread_ptr)], - ".C"[!data_race(rcu_segcblist_empty(&rtpcp->cblist))], + ".C"[!!data_race(rtp->cbs_head)], s); } #endif // #ifndef CONFIG_TINY_RCU @@ -658,7 +368,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) //////////////////////////////////////////////////////////////////////// // // Simple variant of RCU whose quiescent states are voluntary context -// switch, cond_resched_tasks_rcu_qs(), user-space execution, and idle. +// switch, cond_resched_rcu_qs(), user-space execution, and idle. // As such, grace periods can take one good long time. There are no // read-side primitives similar to rcu_read_lock() and rcu_read_unlock() // because this implementation is intended to get the system into a safe @@ -701,10 +411,10 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) // exit_tasks_rcu_finish() functions begin and end, respectively, the SRCU // read-side critical sections waited for by rcu_tasks_postscan(). // -// Pre-grace-period update-side code is ordered before the grace -// via the raw_spin_lock.*rcu_node(). Pre-grace-period read-side code -// is ordered before the grace period via synchronize_rcu() call in -// rcu_tasks_pregp_step() and by the scheduler's locks and interrupt +// Pre-grace-period update-side code is ordered before the grace via the +// ->cbs_lock and the smp_mb__after_spinlock(). Pre-grace-period read-side +// code is ordered before the grace period via synchronize_rcu() call +// in rcu_tasks_pregp_step() and by the scheduler's locks and interrupt // disabling. /* Pre-grace-period preparation. */ @@ -829,7 +539,7 @@ DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks"); * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_tasks() assumes * that the read-side critical sections end at a voluntary context - * switch (not a preemption!), cond_resched_tasks_rcu_qs(), entry into idle, + * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle, * or transition to usermode execution. As such, there are no read-side * primitives analogous to rcu_read_lock() and rcu_read_unlock() because * this primitive is intended to determine that all tasks have passed @@ -876,13 +586,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks); */ void rcu_barrier_tasks(void) { - rcu_barrier_tasks_generic(&rcu_tasks); + /* There is only one callback queue, so this is easy. ;-) */ + synchronize_rcu_tasks(); } EXPORT_SYMBOL_GPL(rcu_barrier_tasks); static int __init rcu_spawn_tasks_kthread(void) { - cblist_init_generic(&rcu_tasks); rcu_tasks.gp_sleep = HZ / 10; rcu_tasks.init_fract = HZ / 10; rcu_tasks.pregp_func = rcu_tasks_pregp_step; @@ -967,11 +677,11 @@ DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude, * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_tasks_rude() * assumes that the read-side critical sections end at context switch, - * cond_resched_tasks_rcu_qs(), or transition to usermode execution (as - * usermode execution is schedulable). As such, there are no read-side - * primitives analogous to rcu_read_lock() and rcu_read_unlock() because - * this primitive is intended to determine that all tasks have passed - * through a safe state, not so much for data-structure synchronization. + * cond_resched_rcu_qs(), or transition to usermode execution. As such, + * there are no read-side primitives analogous to rcu_read_lock() and + * rcu_read_unlock() because this primitive is intended to determine + * that all tasks have passed through a safe state, not so much for + * data-structure synchronization. * * See the description of call_rcu() for more detailed information on * memory ordering guarantees. @@ -989,8 +699,8 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks_rude); * grace period has elapsed, in other words after all currently * executing rcu-tasks read-side critical sections have elapsed. These * read-side critical sections are delimited by calls to schedule(), - * cond_resched_tasks_rcu_qs(), userspace execution (which is a schedulable - * context), and (in theory, anyway) cond_resched(). + * cond_resched_tasks_rcu_qs(), userspace execution, and (in theory, + * anyway) cond_resched(). * * This is a very specialized primitive, intended only for a few uses in * tracing and other situations requiring manipulation of function preambles @@ -1014,13 +724,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude); */ void rcu_barrier_tasks_rude(void) { - rcu_barrier_tasks_generic(&rcu_tasks_rude); + /* There is only one callback queue, so this is easy. ;-) */ + synchronize_rcu_tasks_rude(); } EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude); static int __init rcu_spawn_tasks_rude_kthread(void) { - cblist_init_generic(&rcu_tasks_rude); rcu_tasks_rude.gp_sleep = HZ / 10; rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude); return 0; @@ -1047,7 +757,7 @@ EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread); // 2. Protects code in the idle loop, exception entry/exit, and // CPU-hotplug code paths, similar to the capabilities of SRCU. // -// 3. Avoids expensive read-side instructions, having overhead similar +// 3. Avoids expensive read-side instruction, having overhead similar // to that of Preemptible RCU. // // There are of course downsides. The grace-period code can send IPIs to @@ -1137,7 +847,7 @@ static void rcu_read_unlock_iw(struct irq_work *iwp) static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw); /* If we are the last reader, wake up the grace-period kthread. */ -void rcu_read_unlock_trace_special(struct task_struct *t) +void rcu_read_unlock_trace_special(struct task_struct *t, int nesting) { int nq = READ_ONCE(t->trc_reader_special.b.need_qs); @@ -1147,7 +857,7 @@ void rcu_read_unlock_trace_special(struct task_struct *t) // Update .need_qs before ->trc_reader_nesting for irq/NMI handlers. if (nq) WRITE_ONCE(t->trc_reader_special.b.need_qs, false); - WRITE_ONCE(t->trc_reader_nesting, 0); + WRITE_ONCE(t->trc_reader_nesting, nesting); if (nq && atomic_dec_and_test(&trc_n_readers_need_end)) irq_work_queue(&rcu_tasks_trace_iw); } @@ -1179,24 +889,32 @@ static void trc_read_check_handler(void *t_in) // If the task is no longer running on this CPU, leave. if (unlikely(texp != t)) { + if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) + wake_up(&trc_wait); goto reset_ipi; // Already on holdout list, so will check later. } // If the task is not in a read-side critical section, and // if this is the last reader, awaken the grace-period kthread. if (likely(!READ_ONCE(t->trc_reader_nesting))) { + if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) + wake_up(&trc_wait); + // Mark as checked after decrement to avoid false + // positives on the above WARN_ON_ONCE(). WRITE_ONCE(t->trc_reader_checked, true); goto reset_ipi; } // If we are racing with an rcu_read_unlock_trace(), try again later. - if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) + if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) { + if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) + wake_up(&trc_wait); goto reset_ipi; + } WRITE_ONCE(t->trc_reader_checked, true); // Get here if the task is in a read-side critical section. Set // its state so that it will awaken the grace-period kthread upon // exit from that critical section. - atomic_inc(&trc_n_readers_need_end); // One more to wait on. WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); WRITE_ONCE(t->trc_reader_special.b.need_qs, true); @@ -1209,10 +927,10 @@ static void trc_read_check_handler(void *t_in) } /* Callback function for scheduler to check locked-down task. */ -static int trc_inspect_reader(struct task_struct *t, void *arg) +static bool trc_inspect_reader(struct task_struct *t, void *arg) { int cpu = task_cpu(t); - int nesting; + bool in_qs = false; bool ofl = cpu_is_offline(cpu); if (task_curr(t)) { @@ -1220,7 +938,7 @@ static int trc_inspect_reader(struct task_struct *t, void *arg) // If no chance of heavyweight readers, do it the hard way. if (!ofl && !IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) - return -EINVAL; + return false; // If heavyweight readers are enabled on the remote task, // we can inspect its state despite its currently running. @@ -1228,22 +946,22 @@ static int trc_inspect_reader(struct task_struct *t, void *arg) n_heavy_reader_attempts++; if (!ofl && // Check for "running" idle tasks on offline CPUs. !rcu_dynticks_zero_in_eqs(cpu, &t->trc_reader_nesting)) - return -EINVAL; // No quiescent state, do it the hard way. + return false; // No quiescent state, do it the hard way. n_heavy_reader_updates++; if (ofl) n_heavy_reader_ofl_updates++; - nesting = 0; + in_qs = true; } else { // The task is not running, so C-language access is safe. - nesting = t->trc_reader_nesting; + in_qs = likely(!t->trc_reader_nesting); } - // If not exiting a read-side critical section, mark as checked - // so that the grace-period kthread will remove it from the - // holdout list. - t->trc_reader_checked = nesting >= 0; - if (nesting <= 0) - return nesting ? -EINVAL : 0; // If in QS, done, otherwise try again later. + // Mark as checked so that the grace-period kthread will + // remove it from the holdout list. + t->trc_reader_checked = true; + + if (in_qs) + return true; // Already in quiescent state, done!!! // The task is in a read-side critical section, so set up its // state so that it will awaken the grace-period kthread upon exit @@ -1251,7 +969,7 @@ static int trc_inspect_reader(struct task_struct *t, void *arg) atomic_inc(&trc_n_readers_need_end); // One more to wait on. WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); WRITE_ONCE(t->trc_reader_special.b.need_qs, true); - return 0; + return true; } /* Attempt to extract the state for the specified task. */ @@ -1273,7 +991,7 @@ static void trc_wait_for_one_reader(struct task_struct *t, // Attempt to nail down the task for inspection. get_task_struct(t); - if (!task_call_func(t, trc_inspect_reader, NULL)) { + if (try_invoke_on_locked_down_task(t, trc_inspect_reader, NULL)) { put_task_struct(t); return; } @@ -1281,7 +999,7 @@ static void trc_wait_for_one_reader(struct task_struct *t, // If this task is not yet on the holdout list, then we are in // an RCU read-side critical section. Otherwise, the invocation of - // trc_add_holdout() that added it to the list did the necessary + // rcu_add_holdout() that added it to the list did the necessary // get_task_struct(). Either way, the task cannot be freed out // from under this code. @@ -1296,17 +1014,21 @@ static void trc_wait_for_one_reader(struct task_struct *t, if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0) return; + atomic_inc(&trc_n_readers_need_end); per_cpu(trc_ipi_to_cpu, cpu) = true; t->trc_ipi_to_cpu = cpu; rcu_tasks_trace.n_ipis++; - if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) { + if (smp_call_function_single(cpu, + trc_read_check_handler, t, 0)) { // Just in case there is some other reason for // failure than the target CPU being offline. - WARN_ONCE(1, "%s(): smp_call_function_single() failed for CPU: %d\n", - __func__, cpu); rcu_tasks_trace.n_ipis_fails++; per_cpu(trc_ipi_to_cpu, cpu) = false; - t->trc_ipi_to_cpu = -1; + t->trc_ipi_to_cpu = cpu; + if (atomic_dec_and_test(&trc_n_readers_need_end)) { + WARN_ON_ONCE(1); + wake_up(&trc_wait); + } } } } @@ -1363,50 +1085,25 @@ static void rcu_tasks_trace_postscan(struct list_head *hop) // Any tasks that exit after this point will set ->trc_reader_checked. } -/* Communicate task state back to the RCU tasks trace stall warning request. */ -struct trc_stall_chk_rdr { - int nesting; - int ipi_to_cpu; - u8 needqs; -}; - -static int trc_check_slow_task(struct task_struct *t, void *arg) -{ - struct trc_stall_chk_rdr *trc_rdrp = arg; - - if (task_curr(t)) - return false; // It is running, so decline to inspect it. - trc_rdrp->nesting = READ_ONCE(t->trc_reader_nesting); - trc_rdrp->ipi_to_cpu = READ_ONCE(t->trc_ipi_to_cpu); - trc_rdrp->needqs = READ_ONCE(t->trc_reader_special.b.need_qs); - return true; -} - /* Show the state of a task stalling the current RCU tasks trace GP. */ static void show_stalled_task_trace(struct task_struct *t, bool *firstreport) { int cpu; - struct trc_stall_chk_rdr trc_rdr; - bool is_idle_tsk = is_idle_task(t); if (*firstreport) { pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n"); *firstreport = false; } + // FIXME: This should attempt to use try_invoke_on_nonrunning_task(). cpu = task_cpu(t); - if (!task_call_func(t, trc_check_slow_task, &trc_rdr)) - pr_alert("P%d: %c\n", - t->pid, - ".i"[is_idle_tsk]); - else - pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n", - t->pid, - ".I"[trc_rdr.ipi_to_cpu >= 0], - ".i"[is_idle_tsk], - ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)], - trc_rdr.nesting, - " N"[!!trc_rdr.needqs], - cpu); + pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n", + t->pid, + ".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0], + ".i"[is_idle_task(t)], + ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)], + READ_ONCE(t->trc_reader_nesting), + " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)], + cpu); sched_show_task(t); } @@ -1436,8 +1133,7 @@ static void check_all_holdout_tasks_trace(struct list_head *hop, trc_wait_for_one_reader(t, hop); // If check succeeded, remove this task from the list. - if (smp_load_acquire(&t->trc_ipi_to_cpu) == -1 && - READ_ONCE(t->trc_reader_checked)) + if (READ_ONCE(t->trc_reader_checked)) trc_del_holdout(t); else if (needreport) show_stalled_task_trace(t, firstreport); @@ -1447,34 +1143,20 @@ static void check_all_holdout_tasks_trace(struct list_head *hop, cpus_read_unlock(); if (needreport) { - if (*firstreport) + if (firstreport) pr_err("INFO: rcu_tasks_trace detected stalls? (Late IPI?)\n"); show_stalled_ipi_trace(); } } -static void rcu_tasks_trace_empty_fn(void *unused) -{ -} - /* Wait for grace period to complete and provide ordering. */ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp) { - int cpu; bool firstreport; struct task_struct *g, *t; LIST_HEAD(holdouts); long ret; - // Wait for any lingering IPI handlers to complete. Note that - // if a CPU has gone offline or transitioned to userspace in the - // meantime, all IPI handlers should have been drained beforehand. - // Yes, this assumes that CPUs process IPIs in order. If that ever - // changes, there will need to be a recheck and/or timed wait. - for_each_online_cpu(cpu) - if (WARN_ON_ONCE(smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu)))) - smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1); - // Remove the safety count. smp_mb__before_atomic(); // Order vs. earlier atomics atomic_dec(&trc_n_readers_need_end); @@ -1517,7 +1199,7 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t) WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); WRITE_ONCE(t->trc_reader_nesting, 0); if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs))) - rcu_read_unlock_trace_special(t); + rcu_read_unlock_trace_special(t, 0); } /** @@ -1525,11 +1207,15 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t) * @rhp: structure to be used for queueing the RCU updates. * @func: actual callback function to be invoked after the grace period * - * The callback function will be invoked some time after a trace rcu-tasks - * grace period elapses, in other words after all currently executing - * trace rcu-tasks read-side critical sections have completed. These - * read-side critical sections are delimited by calls to rcu_read_lock_trace() - * and rcu_read_unlock_trace(). + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_tasks_trace() + * assumes that the read-side critical sections end at context switch, + * cond_resched_rcu_qs(), or transition to usermode execution. As such, + * there are no read-side primitives analogous to rcu_read_lock() and + * rcu_read_unlock() because this primitive is intended to determine + * that all tasks have passed through a safe state, not so much for + * data-structure synchronization. * * See the description of call_rcu() for more detailed information on * memory ordering guarantees. @@ -1545,7 +1231,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks_trace); * * Control will return to the caller some time after a trace rcu-tasks * grace period has elapsed, in other words after all currently executing - * trace rcu-tasks read-side critical sections have elapsed. These read-side + * rcu-tasks read-side critical sections have elapsed. These read-side * critical sections are delimited by calls to rcu_read_lock_trace() * and rcu_read_unlock_trace(). * @@ -1572,13 +1258,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_trace); */ void rcu_barrier_tasks_trace(void) { - rcu_barrier_tasks_generic(&rcu_tasks_trace); + /* There is only one callback queue, so this is easy. ;-) */ + synchronize_rcu_tasks_trace(); } EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace); static int __init rcu_spawn_tasks_trace_kthread(void) { - cblist_init_generic(&rcu_tasks_trace); if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) { rcu_tasks_trace.gp_sleep = HZ / 10; rcu_tasks_trace.init_fract = HZ / 10; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a4c25a6283..4ca6d5b199 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -79,7 +79,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, .dynticks = ATOMIC_INIT(1), #ifdef CONFIG_RCU_NOCB_CPU - .cblist.flags = SEGCBLIST_RCU_CORE, + .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY, #endif }; static struct rcu_state rcu_state = { @@ -624,6 +624,7 @@ static noinstr void rcu_eqs_enter(bool user) instrumentation_begin(); trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); + rcu_prepare_for_idle(); rcu_preempt_deferred_qs(current); // instrumentation for the noinstr rcu_dynticks_eqs_enter() @@ -767,6 +768,9 @@ noinstr void rcu_nmi_exit(void) trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ + if (!in_nmi()) + rcu_prepare_for_idle(); + // instrumentation for the noinstr rcu_dynticks_eqs_enter() instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); instrumentation_end(); @@ -868,6 +872,7 @@ static void noinstr rcu_eqs_exit(bool user) // instrumentation for the noinstr rcu_dynticks_eqs_exit() instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); + rcu_cleanup_after_idle(); trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); WRITE_ONCE(rdp->dynticks_nesting, 1); @@ -1009,6 +1014,12 @@ noinstr void rcu_nmi_enter(void) rcu_dynticks_eqs_exit(); // ... but is watching here. + if (!in_nmi()) { + instrumentation_begin(); + rcu_cleanup_after_idle(); + instrumentation_end(); + } + instrumentation_begin(); // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs() instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks)); @@ -1075,24 +1086,6 @@ void rcu_irq_enter_irqson(void) local_irq_restore(flags); } -/* - * Check to see if any future non-offloaded RCU-related work will need - * to be done by the current CPU, even if none need be done immediately, - * returning 1 if so. This function is part of the RCU implementation; - * it is -not- an exported member of the RCU API. This is used by - * the idle-entry code to figure out whether it is safe to disable the - * scheduler-clock interrupt. - * - * Just check whether or not this CPU has non-offloaded RCU callbacks - * queued. - */ -int rcu_needs_cpu(u64 basemono, u64 *nextevt) -{ - *nextevt = KTIME_MAX; - return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && - !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); -} - /* * If any sort of urgency was applied to the current CPU (for example, * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order @@ -1226,6 +1219,8 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { unsigned long jtsq; + bool *rnhqp; + bool *ruqp; struct rcu_node *rnp = rdp->mynode; /* @@ -1290,15 +1285,17 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * is set way high. */ jtsq = READ_ONCE(jiffies_to_sched_qs); - if (!READ_ONCE(rdp->rcu_need_heavy_qs) && + ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu); + rnhqp = per_cpu_ptr(&rcu_data.rcu_need_heavy_qs, rdp->cpu); + if (!READ_ONCE(*rnhqp) && (time_after(jiffies, rcu_state.gp_start + jtsq * 2) || time_after(jiffies, rcu_state.jiffies_resched) || rcu_state.cbovld)) { - WRITE_ONCE(rdp->rcu_need_heavy_qs, true); + WRITE_ONCE(*rnhqp, true); /* Store rcu_need_heavy_qs before rcu_urgent_qs. */ - smp_store_release(&rdp->rcu_urgent_qs, true); + smp_store_release(ruqp, true); } else if (time_after(jiffies, rcu_state.gp_start + jtsq)) { - WRITE_ONCE(rdp->rcu_urgent_qs, true); + WRITE_ONCE(*ruqp, true); } /* @@ -1312,7 +1309,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) if (tick_nohz_full_cpu(rdp->cpu) && (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) || rcu_state.cbovld)) { - WRITE_ONCE(rdp->rcu_urgent_qs, true); + WRITE_ONCE(*ruqp, true); resched_cpu(rdp->cpu); WRITE_ONCE(rdp->last_fqs_resched, jiffies); } @@ -1474,7 +1471,7 @@ static void rcu_gp_kthread_wake(void) { struct task_struct *t = READ_ONCE(rcu_state.gp_kthread); - if ((current == t && !in_hardirq() && !in_serving_softirq()) || + if ((current == t && !in_irq() && !in_serving_softirq()) || !READ_ONCE(rcu_state.gp_flags) || !t) return; WRITE_ONCE(rcu_state.gp_wake_time, jiffies); @@ -1783,8 +1780,6 @@ static noinline_for_stack bool rcu_gp_init(void) */ WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF); rcu_for_each_leaf_node(rnp) { - // Wait for CPU-hotplug operations that might have - // started before this grace period did. smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values. firstseq = READ_ONCE(rnp->ofl_seq); if (firstseq & 0x1) @@ -2285,7 +2280,7 @@ rcu_report_qs_rdp(struct rcu_data *rdp) unsigned long flags; unsigned long mask; bool needwake = false; - bool needacc = false; + const bool offloaded = rcu_rdp_is_offloaded(rdp); struct rcu_node *rnp; WARN_ON_ONCE(rdp->cpu != smp_processor_id()); @@ -2312,30 +2307,15 @@ rcu_report_qs_rdp(struct rcu_data *rdp) /* * This GP can't end until cpu checks in, so all of our * callbacks can be processed during the next GP. - * - * NOCB kthreads have their own way to deal with that... */ - if (!rcu_rdp_is_offloaded(rdp)) { + if (!offloaded) needwake = rcu_accelerate_cbs(rnp, rdp); - } else if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) { - /* - * ...but NOCB kthreads may miss or delay callbacks acceleration - * if in the middle of a (de-)offloading process. - */ - needacc = true; - } rcu_disable_urgency_upon_qs(rdp); rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); /* ^^^ Released rnp->lock */ if (needwake) rcu_gp_kthread_wake(); - - if (needacc) { - rcu_nocb_lock_irqsave(rdp, flags); - rcu_accelerate_cbs_unlocked(rnp, rdp); - rcu_nocb_unlock_irqrestore(rdp, flags); - } } } @@ -2379,7 +2359,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp) int rcutree_dying_cpu(unsigned int cpu) { bool blkd; - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); struct rcu_node *rnp = rdp->mynode; if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) @@ -2467,6 +2447,7 @@ static void rcu_do_batch(struct rcu_data *rdp) int div; bool __maybe_unused empty; unsigned long flags; + const bool offloaded = rcu_rdp_is_offloaded(rdp); struct rcu_head *rhp; struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); long bl, count = 0; @@ -2484,17 +2465,18 @@ static void rcu_do_batch(struct rcu_data *rdp) } /* - * Extract the list of ready callbacks, disabling IRQs to prevent + * Extract the list of ready callbacks, disabling to prevent * races with call_rcu() from interrupt handlers. Leave the * callback counts, as rcu_barrier() needs to be conservative. */ - rcu_nocb_lock_irqsave(rdp, flags); + local_irq_save(flags); + rcu_nocb_lock(rdp); WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); pending = rcu_segcblist_n_cbs(&rdp->cblist); div = READ_ONCE(rcu_divisor); div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div; bl = max(rdp->blimit, pending >> div); - if (in_serving_softirq() && unlikely(bl > 100)) { + if (unlikely(bl > 100)) { long rrn = READ_ONCE(rcu_resched_ns); rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn; @@ -2503,7 +2485,7 @@ static void rcu_do_batch(struct rcu_data *rdp) trace_rcu_batch_start(rcu_state.name, rcu_segcblist_n_cbs(&rdp->cblist), bl); rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl); - if (rcu_rdp_is_offloaded(rdp)) + if (offloaded) rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued")); @@ -2531,21 +2513,18 @@ static void rcu_do_batch(struct rcu_data *rdp) /* * Stop only if limit reached and CPU has something to do. */ - if (in_serving_softirq()) { - if (count >= bl && (need_resched() || !is_idle_task(current))) - break; - /* - * Make sure we don't spend too much time here and deprive other - * softirq vectors of CPU cycles. - */ - if (unlikely(tlimit)) { - /* only call local_clock() every 32 callbacks */ - if (likely((count & 31) || local_clock() < tlimit)) - continue; - /* Exceeded the time limit, so leave. */ - break; - } - } else { + if (count >= bl && !offloaded && + (need_resched() || + (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) + break; + if (unlikely(tlimit)) { + /* only call local_clock() every 32 callbacks */ + if (likely((count & 31) || local_clock() < tlimit)) + continue; + /* Exceeded the time limit, so leave. */ + break; + } + if (!in_serving_softirq()) { local_bh_enable(); lockdep_assert_irqs_enabled(); cond_resched_tasks_rcu_qs(); @@ -2554,7 +2533,8 @@ static void rcu_do_batch(struct rcu_data *rdp) } } - rcu_nocb_lock_irqsave(rdp, flags); + local_irq_save(flags); + rcu_nocb_lock(rdp); rdp->n_cbs_invoked += count; trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(), is_idle_task(current), rcu_is_callbacks_kthread()); @@ -2588,6 +2568,9 @@ static void rcu_do_batch(struct rcu_data *rdp) rcu_nocb_unlock_irqrestore(rdp, flags); + /* Re-invoke RCU core processing if there are callbacks remaining. */ + if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist)) + invoke_rcu_core(); tick_dep_clear_task(current, TICK_DEP_BIT_RCU); } @@ -2726,23 +2709,6 @@ static __latent_entropy void rcu_core(void) unsigned long flags; struct rcu_data *rdp = raw_cpu_ptr(&rcu_data); struct rcu_node *rnp = rdp->mynode; - /* - * On RT rcu_core() can be preempted when IRQs aren't disabled. - * Therefore this function can race with concurrent NOCB (de-)offloading - * on this CPU and the below condition must be considered volatile. - * However if we race with: - * - * _ Offloading: In the worst case we accelerate or process callbacks - * concurrently with NOCB kthreads. We are guaranteed to - * call rcu_nocb_lock() if that happens. - * - * _ Deoffloading: In the worst case we miss callbacks acceleration or - * processing. This is fine because the early stage - * of deoffloading invokes rcu_core() after setting - * SEGCBLIST_RCU_CORE. So we guarantee that we'll process - * what could have been dismissed without the need to wait - * for the next rcu_pending() check in the next jiffy. - */ const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist); if (cpu_is_offline(smp_processor_id())) @@ -2751,7 +2717,7 @@ static __latent_entropy void rcu_core(void) WARN_ON_ONCE(!rdp->beenonline); /* Report any deferred quiescent states if preemption enabled. */ - if (IS_ENABLED(CONFIG_PREEMPT_COUNT) && (!(preempt_count() & PREEMPT_MASK))) { + if (!(preempt_count() & PREEMPT_MASK)) { rcu_preempt_deferred_qs(current); } else if (rcu_preempt_need_deferred_qs(current)) { set_tsk_need_resched(current); @@ -2774,12 +2740,8 @@ static __latent_entropy void rcu_core(void) /* If there are callbacks ready, invoke them. */ if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) && - likely(READ_ONCE(rcu_scheduler_fully_active))) { + likely(READ_ONCE(rcu_scheduler_fully_active))) rcu_do_batch(rdp); - /* Re-invoke RCU core processing if there are callbacks remaining. */ - if (rcu_segcblist_ready_cbs(&rdp->cblist)) - invoke_rcu_core(); - } /* Do any needed deferred wakeups of rcuo kthreads. */ do_nocb_deferred_wakeup(rdp); @@ -3023,7 +2985,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func) head->func = func; head->next = NULL; local_irq_save(flags); - kasan_record_aux_stack_noalloc(head); + kasan_record_aux_stack(head); rdp = this_cpu_ptr(&rcu_data); /* Add the callback to our list. */ @@ -3588,7 +3550,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) return; } - kasan_record_aux_stack_noalloc(ptr); + kasan_record_aux_stack(ptr); success = add_ptr_to_bulk_krc_lock(&krcp, &flags, ptr, !head); if (!success) { run_page_cache_worker(krcp); @@ -4170,6 +4132,7 @@ int rcutree_prepare_cpu(unsigned int cpu) rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); rdp->blimit = blimit; rdp->dynticks_nesting = 1; /* CPU not up, no tearing. */ + rcu_dynticks_eqs_online(); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ /* @@ -4289,7 +4252,6 @@ void rcu_cpu_starting(unsigned int cpu) mask = rdp->grpmask; WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); - rcu_dynticks_eqs_online(); smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). raw_spin_lock_irqsave_rcu_node(rnp, flags); WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask); @@ -4335,7 +4297,9 @@ void rcu_report_dead(unsigned int cpu) do_nocb_deferred_wakeup(rdp); /* QS for any half-done expedited grace period. */ - rcu_report_exp_rdp(rdp); + preempt_disable(); + rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); + preempt_enable(); rcu_preempt_deferred_qs(current); /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 486fc901bd..305cf6aeb4 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -157,6 +157,7 @@ struct rcu_data { bool core_needs_qs; /* Core waits for quiescent state. */ bool beenonline; /* CPU online at least once. */ bool gpwrap; /* Possible ->gp_seq wrap. */ + bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */ bool cpu_started; /* RCU watching this onlining CPU. */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ unsigned long grpmask; /* Mask to apply to leaf qsmask. */ @@ -188,6 +189,11 @@ struct rcu_data { bool rcu_urgent_qs; /* GP old need light quiescent state. */ bool rcu_forced_tick; /* Forced tick to provide QS. */ bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */ +#ifdef CONFIG_RCU_FAST_NO_HZ + unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */ + unsigned long last_advance_all; /* Last jiffy CBs were all advanced. */ + int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ +#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ /* 4) rcu_barrier(), OOM callbacks, and expediting. */ struct rcu_head barrier_head; @@ -221,11 +227,8 @@ struct rcu_data { struct swait_queue_head nocb_gp_wq; /* For nocb kthreads to sleep on. */ bool nocb_cb_sleep; /* Is the nocb CB thread asleep? */ struct task_struct *nocb_cb_kthread; - struct list_head nocb_head_rdp; /* - * Head of rcu_data list in wakeup chain, - * if rdp_gp. - */ - struct list_head nocb_entry_rdp; /* rcu_data node in wakeup chain. */ + struct rcu_data *nocb_next_cb_rdp; + /* Next rcu_data in wakeup chain. */ /* The following fields are used by CB kthread, hence new cacheline. */ struct rcu_data *nocb_gp_rdp ____cacheline_internodealigned_in_smp; @@ -416,6 +419,8 @@ static bool rcu_is_callbacks_kthread(void); static void rcu_cpu_kthread_setup(unsigned int cpu); static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp); static void __init rcu_spawn_boost_kthreads(void); +static void rcu_cleanup_after_idle(void); +static void rcu_prepare_for_idle(void); static bool rcu_preempt_has_tasks(struct rcu_node *rnp); static bool rcu_preempt_need_deferred_qs(struct task_struct *t); static void rcu_preempt_deferred_qs(struct task_struct *t); @@ -442,16 +447,12 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp); #ifdef CONFIG_RCU_NOCB_CPU static void __init rcu_organize_nocb_kthreads(void); - -/* - * Disable IRQs before checking offloaded state so that local - * locking is safe against concurrent de-offloading. - */ -#define rcu_nocb_lock_irqsave(rdp, flags) \ -do { \ - local_irq_save(flags); \ - if (rcu_segcblist_is_offloaded(&(rdp)->cblist)) \ - raw_spin_lock(&(rdp)->nocb_lock); \ +#define rcu_nocb_lock_irqsave(rdp, flags) \ +do { \ + if (!rcu_segcblist_is_offloaded(&(rdp)->cblist)) \ + local_irq_save(flags); \ + else \ + raw_spin_lock_irqsave(&(rdp)->nocb_lock, (flags)); \ } while (0) #else /* #ifdef CONFIG_RCU_NOCB_CPU */ #define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 237a79989a..16f94118ca 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -255,7 +255,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, */ static void rcu_report_exp_rdp(struct rcu_data *rdp) { - WRITE_ONCE(rdp->cpu_no_qs.b.exp, false); + WRITE_ONCE(rdp->exp_deferred_qs, false); rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true); } @@ -507,15 +507,13 @@ static void synchronize_rcu_expedited_wait(void) if (rdp->rcu_forced_tick_exp) continue; rdp->rcu_forced_tick_exp = true; - preempt_disable(); - if (cpu_online(cpu)) - tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP); - preempt_enable(); + tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP); } } j = READ_ONCE(jiffies_till_first_fqs); if (synchronize_rcu_expedited_wait_once(j + HZ)) return; + WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)); } for (;;) { @@ -659,7 +657,7 @@ static void rcu_exp_handler(void *unused) rcu_dynticks_curr_cpu_in_eqs()) { rcu_report_exp_rdp(rdp); } else { - WRITE_ONCE(rdp->cpu_no_qs.b.exp, true); + rdp->exp_deferred_qs = true; set_tsk_need_resched(t); set_preempt_need_resched(); } @@ -681,7 +679,7 @@ static void rcu_exp_handler(void *unused) if (depth > 0) { raw_spin_lock_irqsave_rcu_node(rnp, flags); if (rnp->expmask & rdp->grpmask) { - WRITE_ONCE(rdp->cpu_no_qs.b.exp, true); + rdp->exp_deferred_qs = true; t->rcu_read_unlock_special.b.exp_hint = true; } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); @@ -763,7 +761,7 @@ static void sync_sched_exp_online_cleanup(int cpu) my_cpu = get_cpu(); /* Quiescent state either not needed or already requested, leave. */ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || - READ_ONCE(rdp->cpu_no_qs.b.exp)) { + rdp->cpu_no_qs.b.exp) { put_cpu(); return; } diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index eeafb546a7..8fdf44f852 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -60,22 +60,16 @@ static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp) * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. * If the list is invalid, a warning is emitted and all CPUs are offloaded. */ - -static bool rcu_nocb_is_setup; - static int __init rcu_nocb_setup(char *str) { alloc_bootmem_cpumask_var(&rcu_nocb_mask); - if (*str == '=') { - if (cpulist_parse(++str, rcu_nocb_mask)) { - pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n"); - cpumask_setall(rcu_nocb_mask); - } + if (cpulist_parse(str, rcu_nocb_mask)) { + pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n"); + cpumask_setall(rcu_nocb_mask); } - rcu_nocb_is_setup = true; return 1; } -__setup("rcu_nocbs", rcu_nocb_setup); +__setup("rcu_nocbs=", rcu_nocb_setup); static int __init parse_rcu_nocb_poll(char *arg) { @@ -555,6 +549,7 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, rcu_nocb_unlock_irqrestore(rdp, flags); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot")); } + return; } /* @@ -631,21 +626,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) * and the global grace-period kthread are awakened if needed. */ WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp); - /* - * An rcu_data structure is removed from the list after its - * CPU is de-offloaded and added to the list before that CPU is - * (re-)offloaded. If the following loop happens to be referencing - * that rcu_data structure during the time that the corresponding - * CPU is de-offloaded and then immediately re-offloaded, this - * loop's rdp pointer will be carried to the end of the list by - * the resulting pair of list operations. This can cause the loop - * to skip over some of the rcu_data structures that were supposed - * to have been scanned. Fortunately a new iteration through the - * entire loop is forced after a given CPU's rcu_data structure - * is added to the list, so the skipped-over rcu_data structures - * won't be ignored for long. - */ - list_for_each_entry_rcu(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp, 1) { + for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) { bool needwake_state = false; if (!nocb_gp_enabled_cb(rdp)) @@ -786,7 +767,6 @@ static int rcu_nocb_gp_kthread(void *arg) static inline bool nocb_cb_can_run(struct rcu_data *rdp) { u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_CB; - return rcu_segcblist_test_flags(&rdp->cblist, flags); } @@ -809,18 +789,6 @@ static void nocb_cb_wait(struct rcu_data *rdp) bool can_sleep = true; struct rcu_node *rnp = rdp->mynode; - do { - swait_event_interruptible_exclusive(rdp->nocb_cb_wq, - nocb_cb_wait_cond(rdp)); - - // VVV Ensure CB invocation follows _sleep test. - if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^ - WARN_ON(signal_pending(current)); - trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty")); - } - } while (!nocb_cb_can_run(rdp)); - - local_irq_save(flags); rcu_momentary_dyntick_idle(); local_irq_restore(flags); @@ -873,6 +841,17 @@ static void nocb_cb_wait(struct rcu_data *rdp) if (needwake_state) swake_up_one(&rdp->nocb_state_wq); + + do { + swait_event_interruptible_exclusive(rdp->nocb_cb_wq, + nocb_cb_wait_cond(rdp)); + + // VVV Ensure CB invocation follows _sleep test. + if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^ + WARN_ON(signal_pending(current)); + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty")); + } + } while (!nocb_cb_can_run(rdp)); } /* @@ -1011,33 +990,22 @@ static long rcu_nocb_rdp_deoffload(void *arg) * will refuse to put anything into the bypass. */ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); - /* - * Start with invoking rcu_core() early. This way if the current thread - * happens to preempt an ongoing call to rcu_core() in the middle, - * leaving some work dismissed because rcu_core() still thinks the rdp is - * completely offloaded, we are guaranteed a nearby future instance of - * rcu_core() to catch up. - */ - rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE); - invoke_rcu_core(); ret = rdp_offload_toggle(rdp, false, flags); swait_event_exclusive(rdp->nocb_state_wq, !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP)); - /* Stop nocb_gp_wait() from iterating over this structure. */ - list_del_rcu(&rdp->nocb_entry_rdp); /* * Lock one last time to acquire latest callback updates from kthreads * so we can later handle callbacks locally without locking. */ rcu_nocb_lock_irqsave(rdp, flags); /* - * Theoretically we could clear SEGCBLIST_LOCKING after the nocb + * Theoretically we could set SEGCBLIST_SOFTIRQ_ONLY after the nocb * lock is released but how about being paranoid for once? */ - rcu_segcblist_clear_flags(cblist, SEGCBLIST_LOCKING); + rcu_segcblist_set_flags(cblist, SEGCBLIST_SOFTIRQ_ONLY); /* - * Without SEGCBLIST_LOCKING, we can't use + * With SEGCBLIST_SOFTIRQ_ONLY, we can't use * rcu_nocb_unlock_irqrestore() anymore. */ raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); @@ -1089,26 +1057,15 @@ static long rcu_nocb_rdp_offload(void *arg) return -EINVAL; pr_info("Offloading %d\n", rdp->cpu); - /* - * Cause future nocb_gp_wait() invocations to iterate over - * structure, resetting ->nocb_gp_sleep and waking up the related - * "rcuog". Since nocb_gp_wait() in turn locks ->nocb_gp_lock - * before setting ->nocb_gp_sleep again, we are guaranteed to - * iterate this newly added structure before "rcuog" goes to - * sleep again. - */ - list_add_tail_rcu(&rdp->nocb_entry_rdp, &rdp->nocb_gp_rdp->nocb_head_rdp); - - /* - * Can't use rcu_nocb_lock_irqsave() before SEGCBLIST_LOCKING - * is set. + * Can't use rcu_nocb_lock_irqsave() while we are in + * SEGCBLIST_SOFTIRQ_ONLY mode. */ raw_spin_lock_irqsave(&rdp->nocb_lock, flags); /* * We didn't take the nocb lock while working on the - * rdp->cblist with SEGCBLIST_LOCKING cleared (pure softirq/rcuc mode). + * rdp->cblist in SEGCBLIST_SOFTIRQ_ONLY mode. * Every modifications that have been done previously on * rdp->cblist must be visible remotely by the nocb kthreads * upon wake up after reading the cblist flags. @@ -1127,14 +1084,6 @@ static long rcu_nocb_rdp_offload(void *arg) rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) && rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)); - /* - * All kthreads are ready to work, we can finally relieve rcu_core() and - * enable nocb bypass. - */ - rcu_nocb_lock_irqsave(rdp, flags); - rcu_segcblist_clear_flags(cblist, SEGCBLIST_RCU_CORE); - rcu_nocb_unlock_irqrestore(rdp, flags); - return ret; } @@ -1173,17 +1122,13 @@ void __init rcu_init_nohz(void) need_rcu_nocb_mask = true; #endif /* #if defined(CONFIG_NO_HZ_FULL) */ - if (need_rcu_nocb_mask) { - if (!cpumask_available(rcu_nocb_mask)) { - if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) { - pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n"); - return; - } + if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) { + if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) { + pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n"); + return; } - rcu_nocb_is_setup = true; } - - if (!rcu_nocb_is_setup) + if (!cpumask_available(rcu_nocb_mask)) return; #if defined(CONFIG_NO_HZ_FULL) @@ -1209,8 +1154,8 @@ void __init rcu_init_nohz(void) if (rcu_segcblist_empty(&rdp->cblist)) rcu_segcblist_init(&rdp->cblist); rcu_segcblist_offload(&rdp->cblist, true); - rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP); - rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_RCU_CORE); + rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB); + rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP); } rcu_organize_nocb_kthreads(); } @@ -1233,17 +1178,17 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) * rcuo CB kthread, spawn it. Additionally, if the rcuo GP kthread * for this CPU's group has not yet been created, spawn it as well. */ -static void rcu_spawn_cpu_nocb_kthread(int cpu) +static void rcu_spawn_one_nocb_kthread(int cpu) { struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_data *rdp_gp; struct task_struct *t; - if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup) - return; - - /* If there already is an rcuo kthread, then nothing to do. */ - if (rdp->nocb_cb_kthread) + /* + * If this isn't a no-CBs CPU or if it already has an rcuo kthread, + * then nothing to do. + */ + if (!rcu_is_nocb_cpu(cpu) || rdp->nocb_cb_kthread) return; /* If we didn't spawn the GP kthread first, reorganize! */ @@ -1265,6 +1210,16 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu) WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread); } +/* + * If the specified CPU is a no-CBs CPU that does not already have its + * rcuo kthread, spawn it. + */ +static void rcu_spawn_cpu_nocb_kthread(int cpu) +{ + if (rcu_scheduler_fully_active) + rcu_spawn_one_nocb_kthread(cpu); +} + /* * Once the scheduler is running, spawn rcuo kthreads for all online * no-CBs CPUs. This assumes that the early_initcall()s happen before @@ -1275,10 +1230,8 @@ static void __init rcu_spawn_nocb_kthreads(void) { int cpu; - if (rcu_nocb_is_setup) { - for_each_online_cpu(cpu) - rcu_spawn_cpu_nocb_kthread(cpu); - } + for_each_online_cpu(cpu) + rcu_spawn_cpu_nocb_kthread(cpu); } /* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */ @@ -1298,6 +1251,7 @@ static void __init rcu_organize_nocb_kthreads(void) int nl = 0; /* Next GP kthread. */ struct rcu_data *rdp; struct rcu_data *rdp_gp = NULL; /* Suppress misguided gcc warn. */ + struct rcu_data *rdp_prev = NULL; if (!cpumask_available(rcu_nocb_mask)) return; @@ -1311,14 +1265,14 @@ static void __init rcu_organize_nocb_kthreads(void) * Should the corresponding CPU come online in the future, then * we will spawn the needed set of rcu_nocb_kthread() kthreads. */ - for_each_possible_cpu(cpu) { + for_each_cpu(cpu, rcu_nocb_mask) { rdp = per_cpu_ptr(&rcu_data, cpu); if (rdp->cpu >= nl) { /* New GP kthread, set up for CBs & next GP. */ gotnocbs = true; nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls; + rdp->nocb_gp_rdp = rdp; rdp_gp = rdp; - INIT_LIST_HEAD(&rdp->nocb_head_rdp); if (dump_tree) { if (!firsttime) pr_cont("%s\n", gotnocbscbs @@ -1331,12 +1285,12 @@ static void __init rcu_organize_nocb_kthreads(void) } else { /* Another CB kthread, link to previous GP kthread. */ gotnocbscbs = true; + rdp->nocb_gp_rdp = rdp_gp; + rdp_prev->nocb_next_cb_rdp = rdp; if (dump_tree) pr_cont(" %d", cpu); } - rdp->nocb_gp_rdp = rdp_gp; - if (cpumask_test_cpu(cpu, rcu_nocb_mask)) - list_add_tail(&rdp->nocb_entry_rdp, &rdp_gp->nocb_head_rdp); + rdp_prev = rdp; } if (gotnocbs && dump_tree) pr_cont("%s\n", gotnocbscbs ? "" : " (self only)"); @@ -1398,7 +1352,6 @@ static void show_rcu_nocb_state(struct rcu_data *rdp) { char bufw[20]; char bufr[20]; - struct rcu_data *nocb_next_rdp; struct rcu_segcblist *rsclp = &rdp->cblist; bool waslocked; bool wassleep; @@ -1406,16 +1359,11 @@ static void show_rcu_nocb_state(struct rcu_data *rdp) if (rdp->nocb_gp_rdp == rdp) show_rcu_nocb_gp_state(rdp); - nocb_next_rdp = list_next_or_null_rcu(&rdp->nocb_gp_rdp->nocb_head_rdp, - &rdp->nocb_entry_rdp, - typeof(*rdp), - nocb_entry_rdp); - sprintf(bufw, "%ld", rsclp->gp_seq[RCU_WAIT_TAIL]); sprintf(bufr, "%ld", rsclp->gp_seq[RCU_NEXT_READY_TAIL]); pr_info(" CB %d^%d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n", rdp->cpu, rdp->nocb_gp_rdp->cpu, - nocb_next_rdp ? nocb_next_rdp->cpu : -1, + rdp->nocb_next_cb_rdp ? rdp->nocb_next_cb_rdp->cpu : -1, "kK"[!!rdp->nocb_cb_kthread], "bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)], "cC"[!!atomic_read(&rdp->nocb_lock_contended)], diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c5b45c2f68..0d21a5cdc7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -16,7 +16,7 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp) { /* - * In order to read the offloaded state of an rdp in a safe + * In order to read the offloaded state of an rdp is a safe * and stable way and prevent from its value to be changed * under us, we must either hold the barrier mutex, the cpu * hotplug lock (read or write) or the nocb lock. Local @@ -51,10 +51,12 @@ static void __init rcu_bootup_announce_oddness(void) RCU_FANOUT); if (rcu_fanout_exact) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); + if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) + pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); if (IS_ENABLED(CONFIG_PROVE_RCU)) pr_info("\tRCU lockdep checking is enabled.\n"); if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) - pr_info("\tRCU strict (and thus non-scalable) grace periods are enabled.\n"); + pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n"); if (RCU_NUM_LVLS >= 4) pr_info("\tFour(or more)-level hierarchy is enabled.\n"); if (RCU_FANOUT_LEAF != 16) @@ -86,13 +88,13 @@ static void __init rcu_bootup_announce_oddness(void) if (rcu_kick_kthreads) pr_info("\tKick kthreads if too-long grace period.\n"); if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) - pr_info("\tRCU callback double-/use-after-free debug is enabled.\n"); + pr_info("\tRCU callback double-/use-after-free debug enabled.\n"); if (gp_preinit_delay) pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay); if (gp_init_delay) pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay); if (gp_cleanup_delay) - pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay); + pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay); if (!use_softirq) pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n"); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG)) @@ -258,10 +260,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) * no need to check for a subsequent expedited GP. (Though we are * still in a quiescent state in any case.) */ - if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp) + if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs) rcu_report_exp_rdp(rdp); else - WARN_ON_ONCE(rdp->cpu_no_qs.b.exp); + WARN_ON_ONCE(rdp->exp_deferred_qs); } /* @@ -275,16 +277,12 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) * current task, there might be any number of other tasks blocked while * in an RCU read-side critical section. * - * Unlike non-preemptible-RCU, quiescent state reports for expedited - * grace periods are handled separately via deferred quiescent states - * and context switch events. - * * Callers to this function must disable preemption. */ static void rcu_qs(void) { RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n"); - if (__this_cpu_read(rcu_data.cpu_no_qs.b.norm)) { + if (__this_cpu_read(rcu_data.cpu_no_qs.s)) { trace_rcu_grace_period(TPS("rcu_preempt"), __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs")); @@ -352,7 +350,7 @@ void rcu_note_context_switch(bool preempt) * means that we continue to block the current grace period. */ rcu_qs(); - if (rdp->cpu_no_qs.b.exp) + if (rdp->exp_deferred_qs) rcu_report_exp_rdp(rdp); rcu_tasks_qs(current, preempt); trace_rcu_utilization(TPS("End context switch")); @@ -479,7 +477,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) */ special = t->rcu_read_unlock_special; rdp = this_cpu_ptr(&rcu_data); - if (!special.s && !rdp->cpu_no_qs.b.exp) { + if (!special.s && !rdp->exp_deferred_qs) { local_irq_restore(flags); return; } @@ -499,7 +497,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) * tasks are handled when removing the task from the * blocked-tasks list below. */ - if (rdp->cpu_no_qs.b.exp) + if (rdp->exp_deferred_qs) rcu_report_exp_rdp(rdp); /* Clean up if blocked during RCU read-side critical section. */ @@ -582,7 +580,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) */ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) { - return (__this_cpu_read(rcu_data.cpu_no_qs.b.exp) || + return (__this_cpu_read(rcu_data.exp_deferred_qs) || READ_ONCE(t->rcu_read_unlock_special.s)) && rcu_preempt_depth() == 0; } @@ -644,7 +642,7 @@ static void rcu_read_unlock_special(struct task_struct *t) (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled && t->rcu_blocked_node); // Need to defer quiescent state until everything is enabled. - if (use_softirq && (in_hardirq() || (expboost && !irqs_were_disabled))) { + if (use_softirq && (in_irq() || (expboost && !irqs_were_disabled))) { // Using softirq, safe to awaken, and either the // wakeup is free or there is either an expedited // GP in flight or a potential need to deboost. @@ -816,7 +814,8 @@ void rcu_read_unlock_strict(void) { struct rcu_data *rdp; - if (irqs_disabled() || preempt_count() || !rcu_state.gp_kthread) + if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) || + irqs_disabled() || preempt_count() || !rcu_state.gp_kthread) return; rdp = this_cpu_ptr(&rcu_data); rcu_report_qs_rdp(rdp); @@ -847,8 +846,10 @@ static void rcu_qs(void) trace_rcu_grace_period(TPS("rcu_sched"), __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs")); __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false); - if (__this_cpu_read(rcu_data.cpu_no_qs.b.exp)) - rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); + if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp)) + return; + __this_cpu_write(rcu_data.cpu_no_qs.b.exp, false); + rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); } /* @@ -925,18 +926,7 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) { return false; } - -// Except that we do need to respond to a request by an expedited grace -// period for a quiescent state from this CPU. Note that requests from -// tasks are handled when removing the task from the blocked-tasks list -// below. -static void rcu_preempt_deferred_qs(struct task_struct *t) -{ - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - - if (rdp->cpu_no_qs.b.exp) - rcu_report_exp_rdp(rdp); -} +static void rcu_preempt_deferred_qs(struct task_struct *t) { } /* * Because there is no preemptible RCU, there can be no readers blocked, @@ -1164,6 +1154,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) /* * Create an RCU-boost kthread for the specified node if one does not * already exist. We only create this kthread for preemptible RCU. + * Returns zero if all is well, a negated errno otherwise. */ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) { @@ -1214,9 +1205,8 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) if ((mask & leaf_node_cpu_bit(rnp, cpu)) && cpu != outgoingcpu) cpumask_set_cpu(cpu, cm); - cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU)); if (cpumask_weight(cm) == 0) - cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU)); + cpumask_setall(cm); set_cpus_allowed_ptr(t, cm); free_cpumask_var(cm); } @@ -1264,6 +1254,201 @@ static void __init rcu_spawn_boost_kthreads(void) #endif /* #else #ifdef CONFIG_RCU_BOOST */ +#if !defined(CONFIG_RCU_FAST_NO_HZ) + +/* + * Check to see if any future non-offloaded RCU-related work will need + * to be done by the current CPU, even if none need be done immediately, + * returning 1 if so. This function is part of the RCU implementation; + * it is -not- an exported member of the RCU API. + * + * Because we not have RCU_FAST_NO_HZ, just check whether or not this + * CPU has RCU callbacks queued. + */ +int rcu_needs_cpu(u64 basemono, u64 *nextevt) +{ + *nextevt = KTIME_MAX; + return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && + !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); +} + +/* + * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up + * after it. + */ +static void rcu_cleanup_after_idle(void) +{ +} + +/* + * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n, + * is nothing. + */ +static void rcu_prepare_for_idle(void) +{ +} + +#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ + +/* + * This code is invoked when a CPU goes idle, at which point we want + * to have the CPU do everything required for RCU so that it can enter + * the energy-efficient dyntick-idle mode. + * + * The following preprocessor symbol controls this: + * + * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted + * to sleep in dyntick-idle mode with RCU callbacks pending. This + * is sized to be roughly one RCU grace period. Those energy-efficiency + * benchmarkers who might otherwise be tempted to set this to a large + * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your + * system. And if you are -that- concerned about energy efficiency, + * just power the system down and be done with it! + * + * The value below works well in practice. If future workloads require + * adjustment, they can be converted into kernel config parameters, though + * making the state machine smarter might be a better option. + */ +#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ + +static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; +module_param(rcu_idle_gp_delay, int, 0644); + +/* + * Try to advance callbacks on the current CPU, but only if it has been + * awhile since the last time we did so. Afterwards, if there are any + * callbacks ready for immediate invocation, return true. + */ +static bool __maybe_unused rcu_try_advance_all_cbs(void) +{ + bool cbs_ready = false; + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + struct rcu_node *rnp; + + /* Exit early if we advanced recently. */ + if (jiffies == rdp->last_advance_all) + return false; + rdp->last_advance_all = jiffies; + + rnp = rdp->mynode; + + /* + * Don't bother checking unless a grace period has + * completed since we last checked and there are + * callbacks not yet ready to invoke. + */ + if ((rcu_seq_completed_gp(rdp->gp_seq, + rcu_seq_current(&rnp->gp_seq)) || + unlikely(READ_ONCE(rdp->gpwrap))) && + rcu_segcblist_pend_cbs(&rdp->cblist)) + note_gp_changes(rdp); + + if (rcu_segcblist_ready_cbs(&rdp->cblist)) + cbs_ready = true; + return cbs_ready; +} + +/* + * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready + * to invoke. If the CPU has callbacks, try to advance them. Tell the + * caller about what to set the timeout. + * + * The caller must have disabled interrupts. + */ +int rcu_needs_cpu(u64 basemono, u64 *nextevt) +{ + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + unsigned long dj; + + lockdep_assert_irqs_disabled(); + + /* If no non-offloaded callbacks, RCU doesn't need the CPU. */ + if (rcu_segcblist_empty(&rdp->cblist) || + rcu_rdp_is_offloaded(rdp)) { + *nextevt = KTIME_MAX; + return 0; + } + + /* Attempt to advance callbacks. */ + if (rcu_try_advance_all_cbs()) { + /* Some ready to invoke, so initiate later invocation. */ + invoke_rcu_core(); + return 1; + } + rdp->last_accelerate = jiffies; + + /* Request timer and round. */ + dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies; + + *nextevt = basemono + dj * TICK_NSEC; + return 0; +} + +/* + * Prepare a CPU for idle from an RCU perspective. The first major task is to + * sense whether nohz mode has been enabled or disabled via sysfs. The second + * major task is to accelerate (that is, assign grace-period numbers to) any + * recently arrived callbacks. + * + * The caller must have disabled interrupts. + */ +static void rcu_prepare_for_idle(void) +{ + bool needwake; + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + struct rcu_node *rnp; + int tne; + + lockdep_assert_irqs_disabled(); + if (rcu_rdp_is_offloaded(rdp)) + return; + + /* Handle nohz enablement switches conservatively. */ + tne = READ_ONCE(tick_nohz_active); + if (tne != rdp->tick_nohz_enabled_snap) { + if (!rcu_segcblist_empty(&rdp->cblist)) + invoke_rcu_core(); /* force nohz to see update. */ + rdp->tick_nohz_enabled_snap = tne; + return; + } + if (!tne) + return; + + /* + * If we have not yet accelerated this jiffy, accelerate all + * callbacks on this CPU. + */ + if (rdp->last_accelerate == jiffies) + return; + rdp->last_accelerate = jiffies; + if (rcu_segcblist_pend_cbs(&rdp->cblist)) { + rnp = rdp->mynode; + raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ + needwake = rcu_accelerate_cbs(rnp, rdp); + raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ + if (needwake) + rcu_gp_kthread_wake(); + } +} + +/* + * Clean up for exit from idle. Attempt to advance callbacks based on + * any grace periods that elapsed while the CPU was idle, and if any + * callbacks are now ready to invoke, initiate invocation. + */ +static void rcu_cleanup_after_idle(void) +{ + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + + lockdep_assert_irqs_disabled(); + if (rcu_rdp_is_offloaded(rdp)) + return; + if (rcu_try_advance_all_cbs()) + invoke_rcu_core(); +} + +#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ + /* * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the * grace-period kthread will do force_quiescent_state() processing? @@ -1271,7 +1456,7 @@ static void __init rcu_spawn_boost_kthreads(void) * CPU unless the grace period has extended for too long. * * This code relies on the fact that all NO_HZ_FULL CPUs are also - * RCU_NOCB_CPU CPUs. + * CONFIG_RCU_NOCB_CPU CPUs. */ static bool rcu_nohz_full_cpu(void) { diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 21bebf7c90..677ee3d867 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -240,16 +240,16 @@ struct rcu_stall_chk_rdr { * Report out the state of a not-running task that is stalling the * current RCU grace period. */ -static int check_slow_task(struct task_struct *t, void *arg) +static bool check_slow_task(struct task_struct *t, void *arg) { struct rcu_stall_chk_rdr *rscrp = arg; if (task_curr(t)) - return -EBUSY; // It is running, so decline to inspect it. + return false; // It is running, so decline to inspect it. rscrp->nesting = t->rcu_read_lock_nesting; rscrp->rs = t->rcu_read_unlock_special; rscrp->on_blkd_list = !list_empty(&t->rcu_node_entry); - return 0; + return true; } /* @@ -283,7 +283,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); while (i) { t = ts[--i]; - if (task_call_func(t, check_slow_task, &rscr)) + if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr)) pr_cont(" P%d", t->pid); else pr_cont(" P%d/%d:%c%c%c%c", @@ -347,6 +347,26 @@ static void rcu_dump_cpu_stacks(void) } } +#ifdef CONFIG_RCU_FAST_NO_HZ + +static void print_cpu_stall_fast_no_hz(char *cp, int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + + sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d", + rdp->last_accelerate & 0xffff, jiffies & 0xffff, + !!rdp->tick_nohz_enabled_snap); +} + +#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ + +static void print_cpu_stall_fast_no_hz(char *cp, int cpu) +{ + *cp = '\0'; +} + +#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ + static const char * const gp_state_names[] = { [RCU_GP_IDLE] = "RCU_GP_IDLE", [RCU_GP_WAIT_GPS] = "RCU_GP_WAIT_GPS", @@ -388,12 +408,13 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp) * of RCU grace periods that this CPU is ignorant of, for example, "1" * if the CPU was aware of the previous grace period. * - * Also print out idle info. + * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info. */ static void print_cpu_stall_info(int cpu) { unsigned long delta; bool falsepositive; + char fast_no_hz[72]; struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); char *ticks_title; unsigned long ticks_value; @@ -411,10 +432,11 @@ static void print_cpu_stall_info(int cpu) ticks_title = "ticks this GP"; ticks_value = rdp->ticks_this_gp; } + print_cpu_stall_fast_no_hz(fast_no_hz, cpu); delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); falsepositive = rcu_is_gp_kthread_starving(NULL) && rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)); - pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n", + pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s%s\n", cpu, "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], @@ -427,6 +449,7 @@ static void print_cpu_stall_info(int cpu) rdp->dynticks_nesting, rdp->dynticks_nmi_nesting, rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart, + fast_no_hz, falsepositive ? " (false positive?)" : ""); } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 156892c22b..c21b38cc25 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -54,11 +54,11 @@ #define MODULE_PARAM_PREFIX "rcupdate." #ifndef CONFIG_TINY_RCU -module_param(rcu_expedited, int, 0444); -module_param(rcu_normal, int, 0444); +module_param(rcu_expedited, int, 0); +module_param(rcu_normal, int, 0); static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT); -#if !defined(CONFIG_PREEMPT_RT) || defined(CONFIG_NO_HZ_FULL) -module_param(rcu_normal_after_boot, int, 0444); +#ifndef CONFIG_PREEMPT_RT +module_param(rcu_normal_after_boot, int, 0); #endif #endif /* #ifndef CONFIG_TINY_RCU */ @@ -247,7 +247,7 @@ struct lockdep_map rcu_lock_map = { .name = "rcu_read_lock", .key = &rcu_lock_key, .wait_type_outer = LD_WAIT_FREE, - .wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_RT implies PREEMPT_RCU */ + .wait_type_inner = LD_WAIT_CONFIG, /* XXX PREEMPT_RCU ? */ }; EXPORT_SYMBOL_GPL(rcu_lock_map); @@ -256,7 +256,7 @@ struct lockdep_map rcu_bh_lock_map = { .name = "rcu_read_lock_bh", .key = &rcu_bh_lock_key, .wait_type_outer = LD_WAIT_FREE, - .wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_RT makes BH preemptible. */ + .wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_LOCK also makes BH preemptible */ }; EXPORT_SYMBOL_GPL(rcu_bh_lock_map); diff --git a/kernel/reboot.c b/kernel/reboot.c index 6bcc5d6a65..f7440c0c7e 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -33,7 +33,6 @@ EXPORT_SYMBOL(cad_pid); #define DEFAULT_REBOOT_MODE #endif enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; -EXPORT_SYMBOL_GPL(reboot_mode); enum reboot_mode panic_reboot_mode = REBOOT_UNDEFINED; /* @@ -360,6 +359,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, case LINUX_REBOOT_CMD_HALT: kernel_halt(); do_exit(0); + panic("cannot halt"); case LINUX_REBOOT_CMD_POWER_OFF: kernel_power_off(); diff --git a/kernel/resource.c b/kernel/resource.c index e9646d1a41..dd5ac10ff8 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -73,18 +73,6 @@ static struct resource *next_resource(struct resource *p) return p->sibling; } -static struct resource *next_resource_skip_children(struct resource *p) -{ - while (!p->sibling && p->parent) - p = p->parent; - return p->sibling; -} - -#define for_each_resource(_root, _p, _skip_children) \ - for ((_p) = (_root)->child; (_p); \ - (_p) = (_skip_children) ? next_resource_skip_children(_p) : \ - next_resource(_p)) - static void *r_next(struct seq_file *m, void *v, loff_t *pos) { struct resource *p = v; @@ -99,7 +87,7 @@ enum { MAX_IORES_LEVEL = 5 }; static void *r_start(struct seq_file *m, loff_t *pos) __acquires(resource_lock) { - struct resource *p = pde_data(file_inode(m->file)); + struct resource *p = PDE_DATA(file_inode(m->file)); loff_t l = 0; read_lock(&resource_lock); for (p = p->child; p && l < *pos; p = r_next(m, p, &l)) @@ -115,7 +103,7 @@ static void r_stop(struct seq_file *m, void *v) static int r_show(struct seq_file *m, void *v) { - struct resource *root = pde_data(file_inode(m->file)); + struct resource *root = PDE_DATA(file_inode(m->file)); struct resource *r = v, *p; unsigned long long start, end; int width = root->end < 0x10000 ? 4 : 8; @@ -1725,49 +1713,37 @@ static int strict_iomem_checks; #endif /* - * Check if an address is exclusive to the kernel and must not be mapped to - * user space, for example, via /dev/mem. - * - * Returns true if exclusive to the kernel, otherwise returns false. + * check if an address is reserved in the iomem resource tree + * returns true if reserved, false if not reserved. */ bool iomem_is_exclusive(u64 addr) { - const unsigned int exclusive_system_ram = IORESOURCE_SYSTEM_RAM | - IORESOURCE_EXCLUSIVE; - bool skip_children = false, err = false; + struct resource *p = &iomem_resource; + bool err = false; + loff_t l; int size = PAGE_SIZE; - struct resource *p; + + if (!strict_iomem_checks) + return false; addr = addr & PAGE_MASK; read_lock(&resource_lock); - for_each_resource(&iomem_resource, p, skip_children) { + for (p = p->child; p ; p = r_next(NULL, p, &l)) { + /* + * We can probably skip the resources without + * IORESOURCE_IO attribute? + */ if (p->start >= addr + size) break; - if (p->end < addr) { - skip_children = true; + if (p->end < addr) continue; - } - skip_children = false; - - /* - * IORESOURCE_SYSTEM_RAM resources are exclusive if - * IORESOURCE_EXCLUSIVE is set, even if they - * are not busy and even if "iomem=relaxed" is set. The - * responsible driver dynamically adds/removes system RAM within - * such an area and uncontrolled access is dangerous. - */ - if ((p->flags & exclusive_system_ram) == exclusive_system_ram) { - err = true; - break; - } - /* * A resource is exclusive if IORESOURCE_EXCLUSIVE is set * or CONFIG_IO_STRICT_DEVMEM is enabled and the * resource is busy. */ - if (!strict_iomem_checks || !(p->flags & IORESOURCE_BUSY)) + if ((p->flags & IORESOURCE_BUSY) == 0) continue; if (IS_ENABLED(CONFIG_IO_STRICT_DEVMEM) || p->flags & IORESOURCE_EXCLUSIVE) { diff --git a/kernel/scftorture.c b/kernel/scftorture.c index dcb0410950..64a08288b1 100644 --- a/kernel/scftorture.c +++ b/kernel/scftorture.c @@ -38,10 +38,14 @@ #define SCFTORT_STRING "scftorture" #define SCFTORT_FLAG SCFTORT_STRING ": " -#define VERBOSE_SCFTORTOUT(s, x...) \ - do { if (verbose) pr_alert(SCFTORT_FLAG s "\n", ## x); } while (0) +#define SCFTORTOUT(s, x...) \ + pr_alert(SCFTORT_FLAG s, ## x) -#define SCFTORTOUT_ERRSTRING(s, x...) pr_alert(SCFTORT_FLAG "!!! " s "\n", ## x) +#define VERBOSE_SCFTORTOUT(s, x...) \ + do { if (verbose) pr_alert(SCFTORT_FLAG s, ## x); } while (0) + +#define VERBOSE_SCFTORTOUT_ERRSTRING(s, x...) \ + do { if (verbose) pr_alert(SCFTORT_FLAG "!!! " s, ## x); } while (0) MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney "); @@ -337,7 +341,6 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra cpu = torture_random(trsp) % nr_cpu_ids; scfp->n_resched++; resched_cpu(cpu); - this_cpu_inc(scf_invoked_count); } break; case SCF_PRIM_SINGLE: @@ -550,18 +553,18 @@ static int __init scf_torture_init(void) scftorture_print_module_parms("Start of test"); - if (weight_resched <= 0 && - weight_single <= 0 && weight_single_rpc <= 0 && weight_single_wait <= 0 && - weight_many <= 0 && weight_many_wait <= 0 && - weight_all <= 0 && weight_all_wait <= 0) { - weight_resched1 = weight_resched == 0 ? 0 : 2 * nr_cpu_ids; - weight_single1 = weight_single == 0 ? 0 : 2 * nr_cpu_ids; - weight_single_rpc1 = weight_single_rpc == 0 ? 0 : 2 * nr_cpu_ids; - weight_single_wait1 = weight_single_wait == 0 ? 0 : 2 * nr_cpu_ids; - weight_many1 = weight_many == 0 ? 0 : 2; - weight_many_wait1 = weight_many_wait == 0 ? 0 : 2; - weight_all1 = weight_all == 0 ? 0 : 1; - weight_all_wait1 = weight_all_wait == 0 ? 0 : 1; + if (weight_resched == -1 && + weight_single == -1 && weight_single_rpc == -1 && weight_single_wait == -1 && + weight_many == -1 && weight_many_wait == -1 && + weight_all == -1 && weight_all_wait == -1) { + weight_resched1 = 2 * nr_cpu_ids; + weight_single1 = 2 * nr_cpu_ids; + weight_single_rpc1 = 2 * nr_cpu_ids; + weight_single_wait1 = 2 * nr_cpu_ids; + weight_many1 = 2; + weight_many_wait1 = 2; + weight_all1 = 1; + weight_all_wait1 = 1; } else { if (weight_resched == -1) weight_resched1 = 0; @@ -580,17 +583,17 @@ static int __init scf_torture_init(void) if (weight_all_wait == -1) weight_all_wait1 = 0; } - if (weight_resched1 == 0 && weight_single1 == 0 && weight_single_rpc1 == 0 && - weight_single_wait1 == 0 && weight_many1 == 0 && weight_many_wait1 == 0 && + if (weight_single1 == 0 && weight_single_rpc1 == 0 && weight_single_wait1 == 0 && + weight_many1 == 0 && weight_many_wait1 == 0 && weight_all1 == 0 && weight_all_wait1 == 0) { - SCFTORTOUT_ERRSTRING("all zero weights makes no sense"); + VERBOSE_SCFTORTOUT_ERRSTRING("all zero weights makes no sense"); firsterr = -EINVAL; goto unwind; } if (IS_BUILTIN(CONFIG_SCF_TORTURE_TEST)) scf_sel_add(weight_resched1, SCF_PRIM_RESCHED, false); else if (weight_resched1) - SCFTORTOUT_ERRSTRING("built as module, weight_resched ignored"); + VERBOSE_SCFTORTOUT_ERRSTRING("built as module, weight_resched ignored"); scf_sel_add(weight_single1, SCF_PRIM_SINGLE, false); scf_sel_add(weight_single_rpc1, SCF_PRIM_SINGLE_RPC, true); scf_sel_add(weight_single_wait1, SCF_PRIM_SINGLE, true); @@ -602,17 +605,17 @@ static int __init scf_torture_init(void) if (onoff_interval > 0) { firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval, NULL); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (shutdown_secs > 0) { firsterr = torture_shutdown_init(shutdown_secs, scf_torture_cleanup); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (stutter > 0) { firsterr = torture_stutter_init(stutter, stutter); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } @@ -621,24 +624,24 @@ static int __init scf_torture_init(void) nthreads = num_online_cpus(); scf_stats_p = kcalloc(nthreads, sizeof(scf_stats_p[0]), GFP_KERNEL); if (!scf_stats_p) { - SCFTORTOUT_ERRSTRING("out of memory"); + VERBOSE_SCFTORTOUT_ERRSTRING("out of memory"); firsterr = -ENOMEM; goto unwind; } - VERBOSE_SCFTORTOUT("Starting %d smp_call_function() threads", nthreads); + VERBOSE_SCFTORTOUT("Starting %d smp_call_function() threads\n", nthreads); atomic_set(&n_started, nthreads); for (i = 0; i < nthreads; i++) { scf_stats_p[i].cpu = i; firsterr = torture_create_kthread(scftorture_invoker, (void *)&scf_stats_p[i], scf_stats_p[i].task); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } if (stat_interval > 0) { firsterr = torture_create_kthread(scf_torture_stats, NULL, scf_torture_stats_task); - if (torture_init_error(firsterr)) + if (firsterr) goto unwind; } @@ -648,10 +651,6 @@ static int __init scf_torture_init(void) unwind: torture_init_end(); scf_torture_cleanup(); - if (shutdown_secs) { - WARN_ON(!IS_MODULE(CONFIG_SCF_TORTURE_TEST)); - kernel_power_off(); - } return firsterr; } diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index c83b37af15..978fcfca58 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -3,18 +3,15 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE) endif -# The compilers are complaining about unused variables inside an if(0) scope -# block. This is daft, shut them up. -ccflags-y += $(call cc-disable-warning, unused-but-set-variable) - # These files are disabled because they produce non-interesting flaky coverage # that is not a function of syscall inputs. E.g. involuntary context switches. KCOV_INSTRUMENT := n -# Disable KCSAN to avoid excessive noise and performance degradation. To avoid -# false positives ensure barriers implied by sched functions are instrumented. +# There are numerous data races here, however, most of them are due to plain accesses. +# This would make it even harder for syzbot to find reproducers, because these +# bugs trigger without specific input. Disable by default, but should re-enable +# eventually. KCSAN_SANITIZE := n -KCSAN_INSTRUMENT_BARRIERS := y ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9745613d53..a0747eaa2d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -13,7 +13,7 @@ #include "sched.h" #include -#include + #include #include @@ -74,11 +74,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; * Number of tasks to iterate in a single balance run. * Limited because this is done with IRQs disabled. */ -#ifdef CONFIG_PREEMPT_RT -const_debug unsigned int sysctl_sched_nr_migrate = 8; -#else const_debug unsigned int sysctl_sched_nr_migrate = 32; -#endif /* * period over which we measure -rt task CPU usage in us. @@ -144,7 +140,7 @@ static inline bool __sched_core_less(struct task_struct *a, struct task_struct * return false; /* flip prio, so high prio is leftmost */ - if (prio_less(b, a, !!task_rq(a)->core->core_forceidle_count)) + if (prio_less(b, a, task_rq(a)->core->core_forceidle)) return true; return false; @@ -181,23 +177,15 @@ void sched_core_enqueue(struct rq *rq, struct task_struct *p) rb_add(&p->core_node, &rq->core_tree, rb_sched_core_less); } -void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) +void sched_core_dequeue(struct rq *rq, struct task_struct *p) { rq->core->core_task_seq++; - if (sched_core_enqueued(p)) { - rb_erase(&p->core_node, &rq->core_tree); - RB_CLEAR_NODE(&p->core_node); - } + if (!sched_core_enqueued(p)) + return; - /* - * Migrating the last task off the cpu, with the cpu in forced idle - * state. Reschedule to create an accounting edge for forced idle, - * and re-examine whether the core is still in forced idle state. - */ - if (!(flags & DEQUEUE_SAVE) && rq->nr_running == 1 && - rq->core->core_forceidle_count && rq->curr == rq->idle) - resched_curr(rq); + rb_erase(&p->core_node, &rq->core_tree); + RB_CLEAR_NODE(&p->core_node); } /* @@ -288,8 +276,6 @@ static void __sched_core_flip(bool enabled) for_each_cpu(t, smt_mask) cpu_rq(t)->core_enabled = enabled; - cpu_rq(cpu)->core->core_forceidle_start = 0; - sched_core_unlock(cpu, &flags); cpumask_andnot(&sched_core_mask, &sched_core_mask, smt_mask); @@ -374,8 +360,7 @@ void sched_core_put(void) #else /* !CONFIG_SCHED_CORE */ static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { } -static inline void -sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { } +static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { } #endif /* CONFIG_SCHED_CORE */ @@ -1977,25 +1962,6 @@ bool sched_task_on_rq(struct task_struct *p) return task_on_rq_queued(p); } -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long ip = 0; - unsigned int state; - - if (!p || p == current) - return 0; - - /* Only get wchan if task is blocked and we can keep it that way. */ - raw_spin_lock_irq(&p->pi_lock); - state = READ_ONCE(p->__state); - smp_rmb(); /* see try_to_wake_up() */ - if (state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq) - ip = __get_wchan(p); - raw_spin_unlock_irq(&p->pi_lock); - - return ip; -} - static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) { if (!(flags & ENQUEUE_NOCLOCK)) @@ -2016,7 +1982,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) { if (sched_core_enabled(rq)) - sched_core_dequeue(rq, p, flags); + sched_core_dequeue(rq, p); if (!(flags & DEQUEUE_NOCLOCK)) update_rq_clock(rq); @@ -2184,9 +2150,6 @@ void migrate_enable(void) return; } - if (WARN_ON_ONCE(!p->migration_disabled)) - return; - /* * Ensure stop_task runs either before or after this, and that * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). @@ -3288,7 +3251,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state ktime_t to = NSEC_PER_SEC / HZ; set_current_state(TASK_UNINTERRUPTIBLE); - schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); + schedule_hrtimeout(&to, HRTIMER_MODE_REL); continue; } @@ -3526,11 +3489,11 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) #ifdef CONFIG_SMP if (cpu == rq->cpu) { __schedstat_inc(rq->ttwu_local); - __schedstat_inc(p->stats.nr_wakeups_local); + __schedstat_inc(p->se.statistics.nr_wakeups_local); } else { struct sched_domain *sd; - __schedstat_inc(p->stats.nr_wakeups_remote); + __schedstat_inc(p->se.statistics.nr_wakeups_remote); rcu_read_lock(); for_each_domain(rq->cpu, sd) { if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { @@ -3542,14 +3505,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) } if (wake_flags & WF_MIGRATED) - __schedstat_inc(p->stats.nr_wakeups_migrate); + __schedstat_inc(p->se.statistics.nr_wakeups_migrate); #endif /* CONFIG_SMP */ __schedstat_inc(rq->ttwu_count); - __schedstat_inc(p->stats.nr_wakeups); + __schedstat_inc(p->se.statistics.nr_wakeups); if (wake_flags & WF_SYNC) - __schedstat_inc(p->stats.nr_wakeups_sync); + __schedstat_inc(p->se.statistics.nr_wakeups_sync); } /* @@ -3728,11 +3691,15 @@ void wake_up_if_idle(int cpu) if (!is_idle_task(rcu_dereference(rq->curr))) goto out; - rq_lock_irqsave(rq, &rf); - if (is_idle_task(rq->curr)) - resched_curr(rq); - /* Else CPU is not idle, do nothing here: */ - rq_unlock_irqrestore(rq, &rf); + if (set_nr_if_polling(rq->idle)) { + trace_sched_wake_idle_without_ipi(cpu); + } else { + rq_lock_irqsave(rq, &rf); + if (is_idle_task(rq->curr)) + smp_send_reschedule(cpu); + /* Else CPU is not idle, do nothing here: */ + rq_unlock_irqrestore(rq, &rf); + } out: rcu_read_unlock(); @@ -4142,61 +4109,46 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) } /** - * task_call_func - Invoke a function on task in fixed state + * try_invoke_on_locked_down_task - Invoke a function on task in fixed state * @p: Process for which the function is to be invoked, can be @current. * @func: Function to invoke. * @arg: Argument to function. * - * Fix the task in it's current state by avoiding wakeups and or rq operations - * and call @func(@arg) on it. This function can use ->on_rq and task_curr() - * to work out what the state is, if required. Given that @func can be invoked - * with a runqueue lock held, it had better be quite lightweight. + * If the specified task can be quickly locked into a definite state + * (either sleeping or on a given runqueue), arrange to keep it in that + * state while invoking @func(@arg). This function can use ->on_rq and + * task_curr() to work out what the state is, if required. Given that + * @func can be invoked with a runqueue lock held, it had better be quite + * lightweight. * * Returns: - * Whatever @func returns + * @false if the task slipped out from under the locks. + * @true if the task was locked onto a runqueue or is sleeping. + * However, @func can override this by returning @false. */ -int task_call_func(struct task_struct *p, task_call_f func, void *arg) +bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) { - struct rq *rq = NULL; - unsigned int state; struct rq_flags rf; - int ret; + bool ret = false; + struct rq *rq; raw_spin_lock_irqsave(&p->pi_lock, rf.flags); - - state = READ_ONCE(p->__state); - - /* - * Ensure we load p->on_rq after p->__state, otherwise it would be - * possible to, falsely, observe p->on_rq == 0. - * - * See try_to_wake_up() for a longer comment. - */ - smp_rmb(); - - /* - * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when - * the task is blocked. Make sure to check @state since ttwu() can drop - * locks at the end, see ttwu_queue_wakelist(). - */ - if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq) + if (p->on_rq) { rq = __task_rq_lock(p, &rf); - - /* - * At this point the task is pinned; either: - * - blocked and we're holding off wakeups (pi->lock) - * - woken, and we're holding off enqueue (rq->lock) - * - queued, and we're holding off schedule (rq->lock) - * - running, and we're holding off de-schedule (rq->lock) - * - * The called function (@func) can use: task_curr(), p->on_rq and - * p->__state to differentiate between these states. - */ - ret = func(p, arg); - - if (rq) + if (task_rq(p) == rq) + ret = func(p, arg); rq_unlock(rq, &rf); - + } else { + switch (READ_ONCE(p->__state)) { + case TASK_RUNNING: + case TASK_WAKING: + break; + default: + smp_rmb(); // See smp_rmb() comment in try_to_wake_up(). + if (!p->on_rq) + ret = func(p, arg); + } + } raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); return ret; } @@ -4247,7 +4199,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) #ifdef CONFIG_SCHEDSTATS /* Even if schedstat is disabled, there should not be garbage */ - memset(&p->stats, 0, sizeof(p->stats)); + memset(&p->se.statistics, 0, sizeof(p->se.statistics)); #endif RB_CLEAR_NODE(&p->dl.rb_node); @@ -4897,12 +4849,18 @@ static struct rq *finish_task_switch(struct task_struct *prev) */ if (mm) { membarrier_mm_sync_core_before_usermode(mm); - mmdrop_sched(mm); + mmdrop(mm); } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); + /* + * Remove function-return probe instances associated with this + * task and put them back on the free list. + */ + kprobe_flush_task(prev); + /* Task is done with its stack. */ put_task_stack(prev); @@ -5267,7 +5225,6 @@ void scheduler_tick(void) if (sched_feat(LATENCY_WARN)) resched_latency = cpu_resched_latency(rq); calc_global_load_tick(rq); - sched_core_tick(rq); rq_unlock(rq, &rf); @@ -5636,7 +5593,8 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) return p; } - BUG(); /* The idle class should always have a runnable task. */ + /* The idle class should always have a runnable task: */ + BUG(); } #ifdef CONFIG_SCHED_CORE @@ -5658,18 +5616,54 @@ static inline bool cookie_match(struct task_struct *a, struct task_struct *b) return a->core_cookie == b->core_cookie; } -static inline struct task_struct *pick_task(struct rq *rq) +// XXX fairness/fwd progress conditions +/* + * Returns + * - NULL if there is no runnable task for this class. + * - the highest priority task for this runqueue if it matches + * rq->core->core_cookie or its priority is greater than max. + * - Else returns idle_task. + */ +static struct task_struct * +pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *max, bool in_fi) { - const struct sched_class *class; - struct task_struct *p; + struct task_struct *class_pick, *cookie_pick; + unsigned long cookie = rq->core->core_cookie; - for_each_class(class) { - p = class->pick_task(rq); - if (p) - return p; + class_pick = class->pick_task(rq); + if (!class_pick) + return NULL; + + if (!cookie) { + /* + * If class_pick is tagged, return it only if it has + * higher priority than max. + */ + if (max && class_pick->core_cookie && + prio_less(class_pick, max, in_fi)) + return idle_sched_class.pick_task(rq); + + return class_pick; } - BUG(); /* The idle class should always have a runnable task. */ + /* + * If class_pick is idle or matches cookie, return early. + */ + if (cookie_equals(class_pick, cookie)) + return class_pick; + + cookie_pick = sched_core_find(rq, cookie); + + /* + * If class > max && class > cookie, it is the highest priority task on + * the core (so far) and it must be selected, otherwise we must go with + * the cookie pick in order to satisfy the constraint. + */ + if (prio_less(cookie_pick, class_pick, in_fi) && + (!max || prio_less(max, class_pick, in_fi))) + return class_pick; + + return cookie_pick; } extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi); @@ -5677,13 +5671,11 @@ extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_f static struct task_struct * pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { - struct task_struct *next, *p, *max = NULL; + struct task_struct *next, *max = NULL; + const struct sched_class *class; const struct cpumask *smt_mask; bool fi_before = false; - bool core_clock_updated = (rq == rq->core); - unsigned long cookie; - int i, cpu, occ = 0; - struct rq *rq_i; + int i, j, cpu, occ = 0; bool need_sync; if (!sched_core_enabled(rq)) @@ -5733,18 +5725,10 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) /* reset state */ rq->core->core_cookie = 0UL; - if (rq->core->core_forceidle_count) { - if (!core_clock_updated) { - update_rq_clock(rq->core); - core_clock_updated = true; - } - sched_core_account_forceidle(rq); - /* reset after accounting force idle */ - rq->core->core_forceidle_start = 0; - rq->core->core_forceidle_count = 0; - rq->core->core_forceidle_occupation = 0; + if (rq->core->core_forceidle) { need_sync = true; fi_before = true; + rq->core->core_forceidle = false; } /* @@ -5764,7 +5748,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * and there are no cookied tasks running on siblings. */ if (!need_sync) { - next = pick_task(rq); + for_each_class(class) { + next = class->pick_task(rq); + if (next) + break; + } + if (!next->core_cookie) { rq->core_pick = NULL; /* @@ -5777,62 +5766,77 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } } - /* - * For each thread: do the regular task pick and find the max prio task - * amongst them. - * - * Tie-break prio towards the current CPU - */ - for_each_cpu_wrap(i, smt_mask, cpu) { - rq_i = cpu_rq(i); + for_each_cpu(i, smt_mask) { + struct rq *rq_i = cpu_rq(i); - /* - * Current cpu always has its clock updated on entrance to - * pick_next_task(). If the current cpu is not the core, - * the core may also have been updated above. - */ - if (i != cpu && (rq_i != rq->core || !core_clock_updated)) + rq_i->core_pick = NULL; + + if (i != cpu) update_rq_clock(rq_i); - - p = rq_i->core_pick = pick_task(rq_i); - if (!max || prio_less(max, p, fi_before)) - max = p; } - cookie = rq->core->core_cookie = max->core_cookie; - /* - * For each thread: try and find a runnable task that matches @max or - * force idle. + * Try and select tasks for each sibling in descending sched_class + * order. */ - for_each_cpu(i, smt_mask) { - rq_i = cpu_rq(i); - p = rq_i->core_pick; + for_each_class(class) { +again: + for_each_cpu_wrap(i, smt_mask, cpu) { + struct rq *rq_i = cpu_rq(i); + struct task_struct *p; - if (!cookie_equals(p, cookie)) { - p = NULL; - if (cookie) - p = sched_core_find(rq_i, cookie); + if (rq_i->core_pick) + continue; + + /* + * If this sibling doesn't yet have a suitable task to + * run; ask for the most eligible task, given the + * highest priority task already selected for this + * core. + */ + p = pick_task(rq_i, class, max, fi_before); if (!p) - p = idle_sched_class.pick_task(rq_i); - } + continue; - rq_i->core_pick = p; + if (!is_task_rq_idle(p)) + occ++; - if (p == rq_i->idle) { - if (rq_i->nr_running) { - rq->core->core_forceidle_count++; + rq_i->core_pick = p; + if (rq_i->idle == p && rq_i->nr_running) { + rq->core->core_forceidle = true; if (!fi_before) rq->core->core_forceidle_seq++; } - } else { - occ++; - } - } - if (schedstat_enabled() && rq->core->core_forceidle_count) { - rq->core->core_forceidle_start = rq_clock(rq->core); - rq->core->core_forceidle_occupation = occ; + /* + * If this new candidate is of higher priority than the + * previous; and they're incompatible; we need to wipe + * the slate and start over. pick_task makes sure that + * p's priority is more than max if it doesn't match + * max's cookie. + * + * NOTE: this is a linear max-filter and is thus bounded + * in execution time. + */ + if (!max || !cookie_match(max, p)) { + struct task_struct *old_max = max; + + rq->core->core_cookie = p->core_cookie; + max = p; + + if (old_max) { + rq->core->core_forceidle = false; + for_each_cpu(j, smt_mask) { + if (j == i) + continue; + + cpu_rq(j)->core_pick = NULL; + } + occ = 1; + goto again; + } + } + } } rq->core->core_pick_seq = rq->core->core_task_seq; @@ -5851,7 +5855,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * non-matching user state. */ for_each_cpu(i, smt_mask) { - rq_i = cpu_rq(i); + struct rq *rq_i = cpu_rq(i); /* * An online sibling might have gone offline before a task @@ -5871,8 +5875,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * 1 0 1 * 1 1 0 */ - if (!(fi_before && rq->core->core_forceidle_count)) - task_vruntime_update(rq_i, rq_i->core_pick, !!rq->core->core_forceidle_count); + if (!(fi_before && rq->core->core_forceidle)) + task_vruntime_update(rq_i, rq_i->core_pick, rq->core->core_forceidle); rq_i->core_pick->core_occupation = occ; @@ -6076,19 +6080,11 @@ static void sched_core_cpu_deactivate(unsigned int cpu) goto unlock; /* copy the shared state to the new leader */ - core_rq->core_task_seq = rq->core_task_seq; - core_rq->core_pick_seq = rq->core_pick_seq; - core_rq->core_cookie = rq->core_cookie; - core_rq->core_forceidle_count = rq->core_forceidle_count; - core_rq->core_forceidle_seq = rq->core_forceidle_seq; - core_rq->core_forceidle_occupation = rq->core_forceidle_occupation; - - /* - * Accounting edge for forced idle is handled in pick_next_task(). - * Don't need another one here, since the hotplug thread shouldn't - * have a cookie. - */ - core_rq->core_forceidle_start = 0; + core_rq->core_task_seq = rq->core_task_seq; + core_rq->core_pick_seq = rq->core_pick_seq; + core_rq->core_cookie = rq->core_cookie; + core_rq->core_forceidle = rq->core_forceidle; + core_rq->core_forceidle_seq = rq->core_forceidle_seq; /* install new leader */ for_each_cpu(t, smt_mask) { @@ -6336,14 +6332,20 @@ static inline void sched_submit_work(struct task_struct *tsk) task_flags = tsk->flags; /* - * If a worker goes to sleep, notify and ask workqueue whether it - * wants to wake up a task to maintain concurrency. + * If a worker went to sleep, notify and ask workqueue whether + * it wants to wake up a task to maintain concurrency. + * As this function is called inside the schedule() context, + * we disable preemption to avoid it calling schedule() again + * in the possible wakeup of a kworker and because wq_worker_sleeping() + * requires it. */ if (task_flags & (PF_WQ_WORKER | PF_IO_WORKER)) { + preempt_disable(); if (task_flags & PF_WQ_WORKER) wq_worker_sleeping(tsk); else io_wq_worker_sleeping(tsk); + preempt_enable_no_resched(); } if (tsk_is_pi_blocked(tsk)) @@ -6354,7 +6356,7 @@ static inline void sched_submit_work(struct task_struct *tsk) * make sure to submit it to avoid deadlocks. */ if (blk_needs_flush_plug(tsk)) - blk_flush_plug(tsk->plug, true); + blk_schedule_flush_plug(tsk); } static void sched_update_worker(struct task_struct *tsk) @@ -6597,13 +6599,12 @@ EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); */ enum { - preempt_dynamic_undefined = -1, - preempt_dynamic_none, + preempt_dynamic_none = 0, preempt_dynamic_voluntary, preempt_dynamic_full, }; -int preempt_dynamic_mode = preempt_dynamic_undefined; +int preempt_dynamic_mode = preempt_dynamic_full; int sched_dynamic_mode(const char *str) { @@ -6676,27 +6677,7 @@ static int __init setup_preempt_mode(char *str) } __setup("preempt=", setup_preempt_mode); -static void __init preempt_dynamic_init(void) -{ - if (preempt_dynamic_mode == preempt_dynamic_undefined) { - if (IS_ENABLED(CONFIG_PREEMPT_NONE)) { - sched_dynamic_update(preempt_dynamic_none); - } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) { - sched_dynamic_update(preempt_dynamic_voluntary); - } else { - /* Default static call setting, nothing to do */ - WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); - preempt_dynamic_mode = preempt_dynamic_full; - pr_info("Dynamic Preempt: full\n"); - } - } -} - -#else /* !CONFIG_PREEMPT_DYNAMIC */ - -static inline void preempt_dynamic_init(void) { } - -#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */ +#endif /* CONFIG_PREEMPT_DYNAMIC */ /* * This is the entry point to schedule() from kernel preemption @@ -7177,7 +7158,7 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs, unsigned long sched_cpu_util(int cpu, unsigned long max) { - return effective_cpu_util(cpu, cpu_util_cfs(cpu), max, + return effective_cpu_util(cpu, cpu_util_cfs(cpu_rq(cpu)), max, ENERGY_UTIL, NULL); } #endif /* CONFIG_SMP */ @@ -8380,8 +8361,7 @@ int io_schedule_prepare(void) int old_iowait = current->in_iowait; current->in_iowait = 1; - if (current->plug) - blk_flush_plug(current->plug, true); + blk_schedule_flush_plug(current); return old_iowait; } @@ -8565,7 +8545,7 @@ void sched_show_task(struct task_struct *p) rcu_read_unlock(); pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n", free, task_pid_nr(p), ppid, - read_task_thread_flags(p)); + (unsigned long)task_thread_info(p)->flags); print_worker_info(KERN_INFO, p); print_stop_info(KERN_INFO, p); @@ -8644,6 +8624,14 @@ void __init init_idle(struct task_struct *idle, int cpu) __sched_fork(0, idle); + /* + * The idle task doesn't need the kthread struct to function, but it + * is dressed up as a per-CPU kthread and thus needs to play the part + * if we want to avoid special-casing it in code that deals with per-CPU + * kthreads. + */ + set_kthread_struct(idle); + raw_spin_lock_irqsave(&idle->pi_lock, flags); raw_spin_rq_lock(rq); @@ -9446,9 +9434,7 @@ void __init sched_init(void) rq->core_pick = NULL; rq->core_enabled = 0; rq->core_tree = RB_ROOT; - rq->core_forceidle_count = 0; - rq->core_forceidle_occupation = 0; - rq->core_forceidle_start = 0; + rq->core_forceidle = false; rq->core_cookie = 0UL; #endif @@ -9462,14 +9448,6 @@ void __init sched_init(void) mmgrab(&init_mm); enter_lazy_tlb(&init_mm, current); - /* - * The idle task doesn't need the kthread struct to function, but it - * is dressed up as a per-CPU kthread and thus needs to play the part - * if we want to avoid special-casing it in code that deals with per-CPU - * kthreads. - */ - WARN_ON(!set_kthread_struct(current)); - /* * Make us the idle thread. Technically, schedule() should not be * called from this thread, however somewhere below it might be, @@ -9490,14 +9468,18 @@ void __init sched_init(void) init_uclamp(); - preempt_dynamic_init(); - scheduler_running = 1; } #ifdef CONFIG_DEBUG_ATOMIC_SLEEP +static inline int preempt_count_equals(int preempt_offset) +{ + int nested = preempt_count() + rcu_preempt_depth(); -void __might_sleep(const char *file, int line) + return (nested == preempt_offset); +} + +void __might_sleep(const char *file, int line, int preempt_offset) { unsigned int state = get_current_state(); /* @@ -9511,32 +9493,11 @@ void __might_sleep(const char *file, int line) (void *)current->task_state_change, (void *)current->task_state_change); - __might_resched(file, line, 0); + ___might_sleep(file, line, preempt_offset); } EXPORT_SYMBOL(__might_sleep); -static void print_preempt_disable_ip(int preempt_offset, unsigned long ip) -{ - if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT)) - return; - - if (preempt_count() == preempt_offset) - return; - - pr_err("Preemption disabled at:"); - print_ip_sym(KERN_ERR, ip); -} - -static inline bool resched_offsets_ok(unsigned int offsets) -{ - unsigned int nested = preempt_count(); - - nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT; - - return nested == offsets; -} - -void __might_resched(const char *file, int line, unsigned int offsets) +void ___might_sleep(const char *file, int line, int preempt_offset) { /* Ratelimiting timestamp: */ static unsigned long prev_jiffy; @@ -9546,7 +9507,7 @@ void __might_resched(const char *file, int line, unsigned int offsets) /* WARN_ON_ONCE() by default, no rate limit required: */ rcu_sleep_check(); - if ((resched_offsets_ok(offsets) && !irqs_disabled() && + if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && !is_idle_task(current) && !current->non_block_count) || system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || oops_in_progress) @@ -9559,33 +9520,29 @@ void __might_resched(const char *file, int line, unsigned int offsets) /* Save this before calling printk(), since that will clobber it: */ preempt_disable_ip = get_preempt_disable_ip(current); - pr_err("BUG: sleeping function called from invalid context at %s:%d\n", - file, line); - pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", - in_atomic(), irqs_disabled(), current->non_block_count, - current->pid, current->comm); - pr_err("preempt_count: %x, expected: %x\n", preempt_count(), - offsets & MIGHT_RESCHED_PREEMPT_MASK); - - if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { - pr_err("RCU nest depth: %d, expected: %u\n", - rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT); - } + printk(KERN_ERR + "BUG: sleeping function called from invalid context at %s:%d\n", + file, line); + printk(KERN_ERR + "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), current->non_block_count, + current->pid, current->comm); if (task_stack_end_corrupted(current)) - pr_emerg("Thread overran stack, or stack corrupted\n"); + printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); debug_show_held_locks(current); if (irqs_disabled()) print_irqtrace_events(current); - - print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK, - preempt_disable_ip); - + if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) + && !preempt_count_equals(preempt_offset)) { + pr_err("Preemption disabled at:"); + print_ip_sym(KERN_ERR, preempt_disable_ip); + } dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } -EXPORT_SYMBOL(__might_resched); +EXPORT_SYMBOL(___might_sleep); void __cant_sleep(const char *file, int line, int preempt_offset) { @@ -9666,9 +9623,9 @@ void normalize_rt_tasks(void) continue; p->se.exec_start = 0; - schedstat_set(p->stats.wait_start, 0); - schedstat_set(p->stats.sleep_start, 0); - schedstat_set(p->stats.block_start, 0); + schedstat_set(p->se.statistics.wait_start, 0); + schedstat_set(p->se.statistics.sleep_start, 0); + schedstat_set(p->se.statistics.block_start, 0); if (!dl_task(p) && !rt_task(p)) { /* @@ -10536,21 +10493,15 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v) seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time); if (schedstat_enabled() && tg != &root_task_group) { - struct sched_statistics *stats; u64 ws = 0; int i; - for_each_possible_cpu(i) { - stats = __schedstats_from_se(tg->se[i]); - ws += schedstat_val(stats->wait_sum); - } + for_each_possible_cpu(i) + ws += schedstat_val(tg->se[i]->statistics.wait_sum); seq_printf(sf, "wait_sum %llu\n", ws); } - seq_printf(sf, "nr_bursts %d\n", cfs_b->nr_burst); - seq_printf(sf, "burst_time %llu\n", cfs_b->burst_time); - return 0; } #endif /* CONFIG_CFS_BANDWIDTH */ @@ -10666,20 +10617,16 @@ static int cpu_extra_stat_show(struct seq_file *sf, { struct task_group *tg = css_tg(css); struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; - u64 throttled_usec, burst_usec; + u64 throttled_usec; throttled_usec = cfs_b->throttled_time; do_div(throttled_usec, NSEC_PER_USEC); - burst_usec = cfs_b->burst_time; - do_div(burst_usec, NSEC_PER_USEC); seq_printf(sf, "nr_periods %d\n" "nr_throttled %d\n" - "throttled_usec %llu\n" - "nr_bursts %d\n" - "burst_usec %llu\n", + "throttled_usec %llu\n", cfs_b->nr_periods, cfs_b->nr_throttled, - throttled_usec, cfs_b->nr_burst, burst_usec); + throttled_usec); } #endif return 0; diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c index c8746a9a7a..9a80e9a474 100644 --- a/kernel/sched/core_sched.c +++ b/kernel/sched/core_sched.c @@ -11,7 +11,7 @@ struct sched_core_cookie { refcount_t refcnt; }; -static unsigned long sched_core_alloc_cookie(void) +unsigned long sched_core_alloc_cookie(void) { struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL); if (!ck) @@ -23,7 +23,7 @@ static unsigned long sched_core_alloc_cookie(void) return (unsigned long)ck; } -static void sched_core_put_cookie(unsigned long cookie) +void sched_core_put_cookie(unsigned long cookie) { struct sched_core_cookie *ptr = (void *)cookie; @@ -33,7 +33,7 @@ static void sched_core_put_cookie(unsigned long cookie) } } -static unsigned long sched_core_get_cookie(unsigned long cookie) +unsigned long sched_core_get_cookie(unsigned long cookie) { struct sched_core_cookie *ptr = (void *)cookie; @@ -53,8 +53,7 @@ static unsigned long sched_core_get_cookie(unsigned long cookie) * * Returns: the old cookie */ -static unsigned long sched_core_update_cookie(struct task_struct *p, - unsigned long cookie) +unsigned long sched_core_update_cookie(struct task_struct *p, unsigned long cookie) { unsigned long old_cookie; struct rq_flags rf; @@ -73,7 +72,7 @@ static unsigned long sched_core_update_cookie(struct task_struct *p, enqueued = sched_core_enqueued(p); if (enqueued) - sched_core_dequeue(rq, p, DEQUEUE_SAVE); + sched_core_dequeue(rq, p); old_cookie = p->core_cookie; p->core_cookie = cookie; @@ -85,10 +84,6 @@ static unsigned long sched_core_update_cookie(struct task_struct *p, * If task is currently running, it may not be compatible anymore after * the cookie change, so enter the scheduler on its CPU to schedule it * away. - * - * Note that it is possible that as a result of this cookie change, the - * core has now entered/left forced idle state. Defer accounting to the - * next scheduling edge, rather than always forcing a reschedule here. */ if (task_running(rq, p)) resched_curr(rq); @@ -139,10 +134,6 @@ int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, if (!static_branch_likely(&sched_smt_present)) return -ENODEV; - BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID); - BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID); - BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID); - if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 || (cmd != PR_SCHED_CORE_GET && uaddr)) return -EINVAL; @@ -236,63 +227,3 @@ int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, return err; } -#ifdef CONFIG_SCHEDSTATS - -/* REQUIRES: rq->core's clock recently updated. */ -void __sched_core_account_forceidle(struct rq *rq) -{ - const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq)); - u64 delta, now = rq_clock(rq->core); - struct rq *rq_i; - struct task_struct *p; - int i; - - lockdep_assert_rq_held(rq); - - WARN_ON_ONCE(!rq->core->core_forceidle_count); - - if (rq->core->core_forceidle_start == 0) - return; - - delta = now - rq->core->core_forceidle_start; - if (unlikely((s64)delta <= 0)) - return; - - rq->core->core_forceidle_start = now; - - if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) { - /* can't be forced idle without a running task */ - } else if (rq->core->core_forceidle_count > 1 || - rq->core->core_forceidle_occupation > 1) { - /* - * For larger SMT configurations, we need to scale the charged - * forced idle amount since there can be more than one forced - * idle sibling and more than one running cookied task. - */ - delta *= rq->core->core_forceidle_count; - delta = div_u64(delta, rq->core->core_forceidle_occupation); - } - - for_each_cpu(i, smt_mask) { - rq_i = cpu_rq(i); - p = rq_i->core_pick ?: rq_i->curr; - - if (p == rq_i->idle) - continue; - - __schedstat_add(p->stats.core_forceidle_sum, delta); - } -} - -void __sched_core_tick(struct rq *rq) -{ - if (!rq->core->core_forceidle_count) - return; - - if (rq != rq->core) - update_rq_clock(rq->core); - - __sched_core_account_forceidle(rq); -} - -#endif /* CONFIG_SCHEDSTATS */ diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 3d06c5e422..ab67d97a84 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -103,8 +103,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, * We allow index == CPUACCT_STAT_NSTATS here to read * the sum of usages. */ - if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS)) - return 0; + BUG_ON(index > CPUACCT_STAT_NSTATS); #ifndef CONFIG_64BIT /* @@ -261,30 +260,25 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V) static int cpuacct_stats_show(struct seq_file *sf, void *v) { struct cpuacct *ca = css_ca(seq_css(sf)); - struct task_cputime cputime; - u64 val[CPUACCT_STAT_NSTATS]; + s64 val[CPUACCT_STAT_NSTATS]; int cpu; int stat; - memset(&cputime, 0, sizeof(cputime)); + memset(val, 0, sizeof(val)); for_each_possible_cpu(cpu) { u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; - cputime.utime += cpustat[CPUTIME_USER]; - cputime.utime += cpustat[CPUTIME_NICE]; - cputime.stime += cpustat[CPUTIME_SYSTEM]; - cputime.stime += cpustat[CPUTIME_IRQ]; - cputime.stime += cpustat[CPUTIME_SOFTIRQ]; - - cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu); + val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; + val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; + val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; + val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; + val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; } - cputime_adjust(&cputime, &seq_css(sf)->cgroup->prev_cputime, - &val[CPUACCT_STAT_USER], &val[CPUACCT_STAT_SYSTEM]); - for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { - seq_printf(sf, "%s %llu\n", cpuacct_stat_desc[stat], - nsec_to_clock_t(val[stat])); + seq_printf(sf, "%s %lld\n", + cpuacct_stat_desc[stat], + (long long)nsec_to_clock_t(val[stat])); } return 0; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 26778884d9..e7af188573 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -168,7 +168,7 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) sg_cpu->max = max; sg_cpu->bw_dl = cpu_bw_dl(rq); - sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max, + sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(rq), max, FREQUENCY_UTIL, NULL); } diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index b7ec42732b..042a6dbce8 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -615,8 +615,7 @@ void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) .sum_exec_runtime = p->se.sum_exec_runtime, }; - if (task_cputime(p, &cputime.utime, &cputime.stime)) - cputime.sum_exec_runtime = task_sched_runtime(p); + task_cputime(p, &cputime.utime, &cputime.stime); cputime_adjust(&cputime, &p->prev_cputime, ut, st); } EXPORT_SYMBOL_GPL(task_cputime_adjusted); @@ -829,21 +828,19 @@ u64 task_gtime(struct task_struct *t) * add up the pending nohz execution time since the last * cputime snapshot. */ -bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) +void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { struct vtime *vtime = &t->vtime; unsigned int seq; u64 delta; - int ret; if (!vtime_accounting_enabled()) { *utime = t->utime; *stime = t->stime; - return false; + return; } do { - ret = false; seq = read_seqcount_begin(&vtime->seqcount); *utime = t->utime; @@ -853,7 +850,6 @@ bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) if (vtime->state < VTIME_SYS) continue; - ret = true; delta = vtime_delta(vtime); /* @@ -865,8 +861,6 @@ bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) else *utime += vtime->utime + delta; } while (read_seqcount_retry(&vtime->seqcount, seq)); - - return ret; } static int vtime_state_fetch(struct vtime *vtime, int cpu) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index d2c072b0ef..e94314633b 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1265,10 +1265,8 @@ static void update_curr_dl(struct rq *rq) return; } - schedstat_set(curr->stats.exec_max, - max(curr->stats.exec_max, delta_exec)); - - trace_sched_stat_runtime(curr, delta_exec, 0); + schedstat_set(curr->se.statistics.exec_max, + max(curr->se.statistics.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; account_group_exec_runtime(curr, delta_exec); @@ -1474,82 +1472,6 @@ static inline bool __dl_less(struct rb_node *a, const struct rb_node *b) return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline); } -static inline struct sched_statistics * -__schedstats_from_dl_se(struct sched_dl_entity *dl_se) -{ - return &dl_task_of(dl_se)->stats; -} - -static inline void -update_stats_wait_start_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se) -{ - struct sched_statistics *stats; - - if (!schedstat_enabled()) - return; - - stats = __schedstats_from_dl_se(dl_se); - __update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats); -} - -static inline void -update_stats_wait_end_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se) -{ - struct sched_statistics *stats; - - if (!schedstat_enabled()) - return; - - stats = __schedstats_from_dl_se(dl_se); - __update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats); -} - -static inline void -update_stats_enqueue_sleeper_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se) -{ - struct sched_statistics *stats; - - if (!schedstat_enabled()) - return; - - stats = __schedstats_from_dl_se(dl_se); - __update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats); -} - -static inline void -update_stats_enqueue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, - int flags) -{ - if (!schedstat_enabled()) - return; - - if (flags & ENQUEUE_WAKEUP) - update_stats_enqueue_sleeper_dl(dl_rq, dl_se); -} - -static inline void -update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, - int flags) -{ - struct task_struct *p = dl_task_of(dl_se); - - if (!schedstat_enabled()) - return; - - if ((flags & DEQUEUE_SLEEP)) { - unsigned int state; - - state = READ_ONCE(p->__state); - if (state & TASK_INTERRUPTIBLE) - __schedstat_set(p->stats.sleep_start, - rq_clock(rq_of_dl_rq(dl_rq))); - - if (state & TASK_UNINTERRUPTIBLE) - __schedstat_set(p->stats.block_start, - rq_clock(rq_of_dl_rq(dl_rq))); - } -} - static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) { struct dl_rq *dl_rq = dl_rq_of_se(dl_se); @@ -1580,8 +1502,6 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) { BUG_ON(on_dl_rq(dl_se)); - update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags); - /* * If this is a wakeup or a new instance, the scheduling * parameters of the task might need updating. Otherwise, @@ -1678,9 +1598,6 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) return; } - check_schedstat_required(); - update_stats_wait_start_dl(dl_rq_of_se(&p->dl), &p->dl); - enqueue_dl_entity(&p->dl, flags); if (!task_current(rq, p) && p->nr_cpus_allowed > 1) @@ -1689,7 +1606,6 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) { - update_stats_dequeue_dl(&rq->dl, &p->dl, flags); dequeue_dl_entity(&p->dl); dequeue_pushable_dl_task(rq, p); } @@ -1909,12 +1825,7 @@ static void start_hrtick_dl(struct rq *rq, struct task_struct *p) static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first) { - struct sched_dl_entity *dl_se = &p->dl; - struct dl_rq *dl_rq = &rq->dl; - p->se.exec_start = rq_clock_task(rq); - if (on_dl_rq(&p->dl)) - update_stats_wait_end_dl(dl_rq, dl_se); /* You can't push away the running task */ dequeue_pushable_dl_task(rq, p); @@ -1971,12 +1882,6 @@ static struct task_struct *pick_next_task_dl(struct rq *rq) static void put_prev_task_dl(struct rq *rq, struct task_struct *p) { - struct sched_dl_entity *dl_se = &p->dl; - struct dl_rq *dl_rq = &rq->dl; - - if (on_dl_rq(&p->dl)) - update_stats_wait_start_dl(dl_rq, dl_se); - update_curr_dl(rq); update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index aa29211de1..17a653b670 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -311,7 +311,6 @@ static __init int sched_init_debug(void) debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency); debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity); - debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity); debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity); debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); @@ -449,11 +448,9 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group struct sched_entity *se = tg->se[cpu]; #define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F) -#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \ - #F, (long long)schedstat_val(stats->F)) +#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F)) #define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F)) -#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \ - #F, SPLIT_NS((long long)schedstat_val(stats->F))) +#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F))) if (!se) return; @@ -463,19 +460,16 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group PN(se->sum_exec_runtime); if (schedstat_enabled()) { - struct sched_statistics *stats; - stats = __schedstats_from_se(se); - - PN_SCHEDSTAT(wait_start); - PN_SCHEDSTAT(sleep_start); - PN_SCHEDSTAT(block_start); - PN_SCHEDSTAT(sleep_max); - PN_SCHEDSTAT(block_max); - PN_SCHEDSTAT(exec_max); - PN_SCHEDSTAT(slice_max); - PN_SCHEDSTAT(wait_max); - PN_SCHEDSTAT(wait_sum); - P_SCHEDSTAT(wait_count); + PN_SCHEDSTAT(se->statistics.wait_start); + PN_SCHEDSTAT(se->statistics.sleep_start); + PN_SCHEDSTAT(se->statistics.block_start); + PN_SCHEDSTAT(se->statistics.sleep_max); + PN_SCHEDSTAT(se->statistics.block_max); + PN_SCHEDSTAT(se->statistics.exec_max); + PN_SCHEDSTAT(se->statistics.slice_max); + PN_SCHEDSTAT(se->statistics.wait_max); + PN_SCHEDSTAT(se->statistics.wait_sum); + P_SCHEDSTAT(se->statistics.wait_count); } P(se->load.weight); @@ -541,11 +535,10 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) (long long)(p->nvcsw + p->nivcsw), p->prio); - SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld %9lld.%06ld", - SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)), + SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", + SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)), SPLIT_NS(p->se.sum_exec_runtime), - SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)), - SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime))); + SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime))); #ifdef CONFIG_NUMA_BALANCING SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); @@ -621,8 +614,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cfs_rq->nr_spread_over); SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running); - SEQ_printf(m, " .%-30s: %d\n", "idle_nr_running", - cfs_rq->idle_nr_running); SEQ_printf(m, " .%-30s: %d\n", "idle_h_nr_running", cfs_rq->idle_h_nr_running); SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); @@ -819,7 +810,6 @@ static void sched_debug_header(struct seq_file *m) SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) PN(sysctl_sched_latency); PN(sysctl_sched_min_granularity); - PN(sysctl_sched_idle_min_granularity); PN(sysctl_sched_wakeup_granularity); P(sysctl_sched_child_runs_first); P(sysctl_sched_features); @@ -964,8 +954,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, "---------------------------------------------------------" "----------\n"); -#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->stats.F)) -#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F)) +#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F)) +#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F)) PN(se.exec_start); PN(se.vruntime); @@ -978,34 +968,33 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, if (schedstat_enabled()) { u64 avg_atom, avg_per_cpu; - PN_SCHEDSTAT(sum_sleep_runtime); - PN_SCHEDSTAT(sum_block_runtime); - PN_SCHEDSTAT(wait_start); - PN_SCHEDSTAT(sleep_start); - PN_SCHEDSTAT(block_start); - PN_SCHEDSTAT(sleep_max); - PN_SCHEDSTAT(block_max); - PN_SCHEDSTAT(exec_max); - PN_SCHEDSTAT(slice_max); - PN_SCHEDSTAT(wait_max); - PN_SCHEDSTAT(wait_sum); - P_SCHEDSTAT(wait_count); - PN_SCHEDSTAT(iowait_sum); - P_SCHEDSTAT(iowait_count); - P_SCHEDSTAT(nr_migrations_cold); - P_SCHEDSTAT(nr_failed_migrations_affine); - P_SCHEDSTAT(nr_failed_migrations_running); - P_SCHEDSTAT(nr_failed_migrations_hot); - P_SCHEDSTAT(nr_forced_migrations); - P_SCHEDSTAT(nr_wakeups); - P_SCHEDSTAT(nr_wakeups_sync); - P_SCHEDSTAT(nr_wakeups_migrate); - P_SCHEDSTAT(nr_wakeups_local); - P_SCHEDSTAT(nr_wakeups_remote); - P_SCHEDSTAT(nr_wakeups_affine); - P_SCHEDSTAT(nr_wakeups_affine_attempts); - P_SCHEDSTAT(nr_wakeups_passive); - P_SCHEDSTAT(nr_wakeups_idle); + PN_SCHEDSTAT(se.statistics.sum_sleep_runtime); + PN_SCHEDSTAT(se.statistics.wait_start); + PN_SCHEDSTAT(se.statistics.sleep_start); + PN_SCHEDSTAT(se.statistics.block_start); + PN_SCHEDSTAT(se.statistics.sleep_max); + PN_SCHEDSTAT(se.statistics.block_max); + PN_SCHEDSTAT(se.statistics.exec_max); + PN_SCHEDSTAT(se.statistics.slice_max); + PN_SCHEDSTAT(se.statistics.wait_max); + PN_SCHEDSTAT(se.statistics.wait_sum); + P_SCHEDSTAT(se.statistics.wait_count); + PN_SCHEDSTAT(se.statistics.iowait_sum); + P_SCHEDSTAT(se.statistics.iowait_count); + P_SCHEDSTAT(se.statistics.nr_migrations_cold); + P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine); + P_SCHEDSTAT(se.statistics.nr_failed_migrations_running); + P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot); + P_SCHEDSTAT(se.statistics.nr_forced_migrations); + P_SCHEDSTAT(se.statistics.nr_wakeups); + P_SCHEDSTAT(se.statistics.nr_wakeups_sync); + P_SCHEDSTAT(se.statistics.nr_wakeups_migrate); + P_SCHEDSTAT(se.statistics.nr_wakeups_local); + P_SCHEDSTAT(se.statistics.nr_wakeups_remote); + P_SCHEDSTAT(se.statistics.nr_wakeups_affine); + P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts); + P_SCHEDSTAT(se.statistics.nr_wakeups_passive); + P_SCHEDSTAT(se.statistics.nr_wakeups_idle); avg_atom = p->se.sum_exec_runtime; if (nr_switches) @@ -1023,10 +1012,6 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, __PN(avg_atom); __PN(avg_per_cpu); - -#ifdef CONFIG_SCHED_CORE - PN_SCHEDSTAT(core_forceidle_sum); -#endif } __P(nr_switches); @@ -1075,7 +1060,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, void proc_sched_set_task(struct task_struct *p) { #ifdef CONFIG_SCHEDSTATS - memset(&p->stats, 0, sizeof(p->stats)); + memset(&p->se.statistics, 0, sizeof(p->se.statistics)); #endif } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5146163bfa..6420580f27 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -59,14 +59,6 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; unsigned int sysctl_sched_min_granularity = 750000ULL; static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; -/* - * Minimal preemption granularity for CPU-bound SCHED_IDLE tasks. - * Applies only when SCHED_IDLE tasks compete with normal tasks. - * - * (default: 0.75 msec) - */ -unsigned int sysctl_sched_idle_min_granularity = 750000ULL; - /* * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity */ @@ -673,8 +665,6 @@ static u64 __sched_period(unsigned long nr_running) return sysctl_sched_latency; } -static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq); - /* * We calculate the wall-time slice from the period by taking a part * proportional to the weight. @@ -684,8 +674,6 @@ static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq); static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) { unsigned int nr_running = cfs_rq->nr_running; - struct sched_entity *init_se = se; - unsigned int min_gran; u64 slice; if (sched_feat(ALT_PERIOD)) @@ -696,13 +684,12 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) for_each_sched_entity(se) { struct load_weight *load; struct load_weight lw; - struct cfs_rq *qcfs_rq; - qcfs_rq = cfs_rq_of(se); - load = &qcfs_rq->load; + cfs_rq = cfs_rq_of(se); + load = &cfs_rq->load; if (unlikely(!se->on_rq)) { - lw = qcfs_rq->load; + lw = cfs_rq->load; update_load_add(&lw, se->load.weight); load = &lw; @@ -710,14 +697,8 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) slice = __calc_delta(slice, se->load.weight, load); } - if (sched_feat(BASE_SLICE)) { - if (se_is_idle(init_se) && !sched_idle_cfs_rq(cfs_rq)) - min_gran = sysctl_sched_idle_min_granularity; - else - min_gran = sysctl_sched_min_granularity; - - slice = max_t(u64, slice, min_gran); - } + if (sched_feat(BASE_SLICE)) + slice = max(slice, (u64)sysctl_sched_min_granularity); return slice; } @@ -856,13 +837,8 @@ static void update_curr(struct cfs_rq *cfs_rq) curr->exec_start = now; - if (schedstat_enabled()) { - struct sched_statistics *stats; - - stats = __schedstats_from_se(curr); - __schedstat_set(stats->exec_max, - max(delta_exec, stats->exec_max)); - } + schedstat_set(curr->statistics.exec_max, + max(delta_exec, curr->statistics.exec_max)); curr->sum_exec_runtime += delta_exec; schedstat_add(cfs_rq->exec_clock, delta_exec); @@ -887,70 +863,137 @@ static void update_curr_fair(struct rq *rq) } static inline void -update_stats_wait_start_fair(struct cfs_rq *cfs_rq, struct sched_entity *se) +update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) { - struct sched_statistics *stats; - struct task_struct *p = NULL; + u64 wait_start, prev_wait_start; if (!schedstat_enabled()) return; - stats = __schedstats_from_se(se); + wait_start = rq_clock(rq_of(cfs_rq)); + prev_wait_start = schedstat_val(se->statistics.wait_start); - if (entity_is_task(se)) - p = task_of(se); + if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) && + likely(wait_start > prev_wait_start)) + wait_start -= prev_wait_start; - __update_stats_wait_start(rq_of(cfs_rq), p, stats); + __schedstat_set(se->statistics.wait_start, wait_start); } static inline void -update_stats_wait_end_fair(struct cfs_rq *cfs_rq, struct sched_entity *se) +update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) { - struct sched_statistics *stats; - struct task_struct *p = NULL; + struct task_struct *p; + u64 delta; if (!schedstat_enabled()) return; - stats = __schedstats_from_se(se); - /* * When the sched_schedstat changes from 0 to 1, some sched se * maybe already in the runqueue, the se->statistics.wait_start * will be 0.So it will let the delta wrong. We need to avoid this * scenario. */ - if (unlikely(!schedstat_val(stats->wait_start))) + if (unlikely(!schedstat_val(se->statistics.wait_start))) return; - if (entity_is_task(se)) - p = task_of(se); + delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start); - __update_stats_wait_end(rq_of(cfs_rq), p, stats); + if (entity_is_task(se)) { + p = task_of(se); + if (task_on_rq_migrating(p)) { + /* + * Preserve migrating task's wait time so wait_start + * time stamp can be adjusted to accumulate wait time + * prior to migration. + */ + __schedstat_set(se->statistics.wait_start, delta); + return; + } + trace_sched_stat_wait(p, delta); + } + + __schedstat_set(se->statistics.wait_max, + max(schedstat_val(se->statistics.wait_max), delta)); + __schedstat_inc(se->statistics.wait_count); + __schedstat_add(se->statistics.wait_sum, delta); + __schedstat_set(se->statistics.wait_start, 0); } static inline void -update_stats_enqueue_sleeper_fair(struct cfs_rq *cfs_rq, struct sched_entity *se) +update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) { - struct sched_statistics *stats; struct task_struct *tsk = NULL; + u64 sleep_start, block_start; if (!schedstat_enabled()) return; - stats = __schedstats_from_se(se); + sleep_start = schedstat_val(se->statistics.sleep_start); + block_start = schedstat_val(se->statistics.block_start); if (entity_is_task(se)) tsk = task_of(se); - __update_stats_enqueue_sleeper(rq_of(cfs_rq), tsk, stats); + if (sleep_start) { + u64 delta = rq_clock(rq_of(cfs_rq)) - sleep_start; + + if ((s64)delta < 0) + delta = 0; + + if (unlikely(delta > schedstat_val(se->statistics.sleep_max))) + __schedstat_set(se->statistics.sleep_max, delta); + + __schedstat_set(se->statistics.sleep_start, 0); + __schedstat_add(se->statistics.sum_sleep_runtime, delta); + + if (tsk) { + account_scheduler_latency(tsk, delta >> 10, 1); + trace_sched_stat_sleep(tsk, delta); + } + } + if (block_start) { + u64 delta = rq_clock(rq_of(cfs_rq)) - block_start; + + if ((s64)delta < 0) + delta = 0; + + if (unlikely(delta > schedstat_val(se->statistics.block_max))) + __schedstat_set(se->statistics.block_max, delta); + + __schedstat_set(se->statistics.block_start, 0); + __schedstat_add(se->statistics.sum_sleep_runtime, delta); + + if (tsk) { + if (tsk->in_iowait) { + __schedstat_add(se->statistics.iowait_sum, delta); + __schedstat_inc(se->statistics.iowait_count); + trace_sched_stat_iowait(tsk, delta); + } + + trace_sched_stat_blocked(tsk, delta); + + /* + * Blocking time is in units of nanosecs, so shift by + * 20 to get a milliseconds-range estimation of the + * amount of time that the task spent sleeping: + */ + if (unlikely(prof_on == SLEEP_PROFILING)) { + profile_hits(SLEEP_PROFILING, + (void *)get_wchan(tsk), + delta >> 20); + } + account_scheduler_latency(tsk, delta >> 10, 0); + } + } } /* * Task is being enqueued - update stats: */ static inline void -update_stats_enqueue_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) +update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { if (!schedstat_enabled()) return; @@ -960,14 +1003,14 @@ update_stats_enqueue_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, int fl * a dequeue/enqueue event is a NOP) */ if (se != cfs_rq->curr) - update_stats_wait_start_fair(cfs_rq, se); + update_stats_wait_start(cfs_rq, se); if (flags & ENQUEUE_WAKEUP) - update_stats_enqueue_sleeper_fair(cfs_rq, se); + update_stats_enqueue_sleeper(cfs_rq, se); } static inline void -update_stats_dequeue_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) +update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { if (!schedstat_enabled()) @@ -978,7 +1021,7 @@ update_stats_dequeue_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, int fl * waiting task: */ if (se != cfs_rq->curr) - update_stats_wait_end_fair(cfs_rq, se); + update_stats_wait_end(cfs_rq, se); if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) { struct task_struct *tsk = task_of(se); @@ -987,10 +1030,10 @@ update_stats_dequeue_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, int fl /* XXX racy against TTWU */ state = READ_ONCE(tsk->__state); if (state & TASK_INTERRUPTIBLE) - __schedstat_set(tsk->stats.sleep_start, + __schedstat_set(se->statistics.sleep_start, rq_clock(rq_of(cfs_rq))); if (state & TASK_UNINTERRUPTIBLE) - __schedstat_set(tsk->stats.block_start, + __schedstat_set(se->statistics.block_start, rq_clock(rq_of(cfs_rq))); } } @@ -1038,12 +1081,11 @@ struct numa_group { unsigned long total_faults; unsigned long max_faults_cpu; /* - * faults[] array is split into two regions: faults_mem and faults_cpu. - * * Faults_cpu is used to decide whether memory should move * towards the CPU. As a consequence, these stats are weighted * more by CPU use than by memory faults. */ + unsigned long *faults_cpu; unsigned long faults[]; }; @@ -1217,8 +1259,8 @@ static inline unsigned long group_faults(struct task_struct *p, int nid) static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) { - return group->faults[task_faults_idx(NUMA_CPU, nid, 0)] + - group->faults[task_faults_idx(NUMA_CPU, nid, 1)]; + return group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 0)] + + group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)]; } static inline unsigned long group_faults_priv(struct numa_group *ng) @@ -1502,6 +1544,7 @@ struct task_numa_env { static unsigned long cpu_load(struct rq *rq); static unsigned long cpu_runnable(struct rq *rq); +static unsigned long cpu_util(int cpu); static inline long adjust_numa_imbalance(int imbalance, int dst_running, int dst_weight); @@ -1568,7 +1611,7 @@ static void update_numa_stats(struct task_numa_env *env, ns->load += cpu_load(rq); ns->runnable += cpu_runnable(rq); - ns->util += cpu_util_cfs(cpu); + ns->util += cpu_util(cpu); ns->nr_running += rq->cfs.h_nr_running; ns->compute_capacity += capacity_of(cpu); @@ -2073,7 +2116,7 @@ static void numa_migrate_preferred(struct task_struct *p) } /* - * Find out how many nodes the workload is actively running on. Do this by + * Find out how many nodes on the workload is actively running on. Do this by * tracking the nodes from which NUMA hinting faults are triggered. This can * be different from the set of nodes where the workload's memory is currently * located. @@ -2127,7 +2170,7 @@ static void update_task_scan_period(struct task_struct *p, /* * If there were no record hinting faults then either the task is - * completely idle or all activity is in areas that are not of interest + * completely idle or all activity is areas that are not of interest * to automatic numa balancing. Related to that, if there were failed * migration then it implies we are migrating too quickly or the local * node is overloaded. In either case, scan slower @@ -2384,7 +2427,7 @@ static void task_numa_placement(struct task_struct *p) * is at the beginning of the numa_faults array. */ ng->faults[mem_idx] += diff; - ng->faults[cpu_idx] += f_diff; + ng->faults_cpu[mem_idx] += f_diff; ng->total_faults += diff; group_faults += ng->faults[mem_idx]; } @@ -2438,8 +2481,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, if (unlikely(!deref_curr_numa_group(p))) { unsigned int size = sizeof(struct numa_group) + - NR_NUMA_HINT_FAULT_STATS * - nr_node_ids * sizeof(unsigned long); + 4*nr_node_ids*sizeof(unsigned long); grp = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); if (!grp) @@ -2450,6 +2492,9 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, grp->max_faults_cpu = 0; spin_lock_init(&grp->lock); grp->gid = p->pid; + /* Second half of the array tracks nids where faults happen */ + grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES * + nr_node_ids; for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) grp->faults[i] = p->numa_faults[i]; @@ -2950,8 +2995,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) } #endif cfs_rq->nr_running++; - if (se_is_idle(se)) - cfs_rq->idle_nr_running++; } static void @@ -2965,8 +3008,6 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) } #endif cfs_rq->nr_running--; - if (se_is_idle(se)) - cfs_rq->idle_nr_running--; } /* @@ -3028,11 +3069,9 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) static inline void dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { + u32 divider = get_pelt_divider(&se->avg); sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg); - sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum); - /* See update_cfs_rq_load_avg() */ - cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum, - cfs_rq->avg.load_avg * PELT_MIN_DIVIDER); + cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider; } #else static inline void @@ -3241,7 +3280,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags) * As is, the util number is not freq-invariant (we'd have to * implement arch_scale_freq_capacity() for that). * - * See cpu_util_cfs(). + * See cpu_util(). */ cpufreq_update_util(rq, flags); } @@ -3453,11 +3492,11 @@ void set_task_rq_fair(struct sched_entity *se, static inline void update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { - long delta_sum, delta_avg = gcfs_rq->avg.util_avg - se->avg.util_avg; - u32 new_sum, divider; + long delta = gcfs_rq->avg.util_avg - se->avg.util_avg; + u32 divider; /* Nothing to update */ - if (!delta_avg) + if (!delta) return; /* @@ -3466,30 +3505,23 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq */ divider = get_pelt_divider(&cfs_rq->avg); - /* Set new sched_entity's utilization */ se->avg.util_avg = gcfs_rq->avg.util_avg; - new_sum = se->avg.util_avg * divider; - delta_sum = (long)new_sum - (long)se->avg.util_sum; - se->avg.util_sum = new_sum; + se->avg.util_sum = se->avg.util_avg * divider; /* Update parent cfs_rq utilization */ - add_positive(&cfs_rq->avg.util_avg, delta_avg); - add_positive(&cfs_rq->avg.util_sum, delta_sum); - - /* See update_cfs_rq_load_avg() */ - cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum, - cfs_rq->avg.util_avg * PELT_MIN_DIVIDER); + add_positive(&cfs_rq->avg.util_avg, delta); + cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider; } static inline void update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { - long delta_sum, delta_avg = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg; - u32 new_sum, divider; + long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg; + u32 divider; /* Nothing to update */ - if (!delta_avg) + if (!delta) return; /* @@ -3500,25 +3532,19 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf /* Set new sched_entity's runnable */ se->avg.runnable_avg = gcfs_rq->avg.runnable_avg; - new_sum = se->avg.runnable_avg * divider; - delta_sum = (long)new_sum - (long)se->avg.runnable_sum; - se->avg.runnable_sum = new_sum; + se->avg.runnable_sum = se->avg.runnable_avg * divider; /* Update parent cfs_rq runnable */ - add_positive(&cfs_rq->avg.runnable_avg, delta_avg); - add_positive(&cfs_rq->avg.runnable_sum, delta_sum); - /* See update_cfs_rq_load_avg() */ - cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum, - cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER); + add_positive(&cfs_rq->avg.runnable_avg, delta); + cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider; } static inline void update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { - long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; + long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; unsigned long load_avg; u64 load_sum = 0; - s64 delta_sum; u32 divider; if (!runnable_sum) @@ -3545,7 +3571,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq * assuming all tasks are equally runnable. */ if (scale_load_down(gcfs_rq->load.weight)) { - load_sum = div_u64(gcfs_rq->avg.load_sum, + load_sum = div_s64(gcfs_rq->avg.load_sum, scale_load_down(gcfs_rq->load.weight)); } @@ -3562,22 +3588,19 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT; runnable_sum = max(runnable_sum, running_sum); - load_sum = se_weight(se) * runnable_sum; - load_avg = div_u64(load_sum, divider); - - delta_avg = load_avg - se->avg.load_avg; - if (!delta_avg) - return; - - delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum; + load_sum = (s64)se_weight(se) * runnable_sum; + load_avg = div_s64(load_sum, divider); se->avg.load_sum = runnable_sum; + + delta = load_avg - se->avg.load_avg; + if (!delta) + return; + se->avg.load_avg = load_avg; - add_positive(&cfs_rq->avg.load_avg, delta_avg); - add_positive(&cfs_rq->avg.load_sum, delta_sum); - /* See update_cfs_rq_load_avg() */ - cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum, - cfs_rq->avg.load_avg * PELT_MIN_DIVIDER); + + add_positive(&cfs_rq->avg.load_avg, delta); + cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider; } static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum) @@ -3668,7 +3691,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum * * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example. * - * Return: true if the load decayed or we removed load. + * Returns true if the load decayed or we removed load. * * Since both these conditions indicate a changed cfs_rq->avg.load we should * call update_tg_load_avg() when this function returns true. @@ -3693,9 +3716,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) r = removed_load; sub_positive(&sa->load_avg, r); - sub_positive(&sa->load_sum, r * divider); - /* See sa->util_sum below */ - sa->load_sum = max_t(u32, sa->load_sum, sa->load_avg * PELT_MIN_DIVIDER); + sa->load_sum = sa->load_avg * divider; r = removed_util; sub_positive(&sa->util_avg, r); @@ -3715,10 +3736,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) r = removed_runnable; sub_positive(&sa->runnable_avg, r); - sub_positive(&sa->runnable_sum, r * divider); - /* See sa->util_sum above */ - sa->runnable_sum = max_t(u32, sa->runnable_sum, - sa->runnable_avg * PELT_MIN_DIVIDER); + sa->runnable_sum = sa->runnable_avg * divider; /* * removed_runnable is the unweighted version of removed_load so we @@ -3805,18 +3823,17 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s */ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { + /* + * cfs_rq->avg.period_contrib can be used for both cfs_rq and se. + * See ___update_load_avg() for details. + */ + u32 divider = get_pelt_divider(&cfs_rq->avg); + dequeue_load_avg(cfs_rq, se); sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); - sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum); - /* See update_cfs_rq_load_avg() */ - cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum, - cfs_rq->avg.util_avg * PELT_MIN_DIVIDER); - + cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider; sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg); - sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum); - /* See update_cfs_rq_load_avg() */ - cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum, - cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER); + cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider; add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum); @@ -4103,8 +4120,7 @@ static inline void util_est_update(struct cfs_rq *cfs_rq, trace_sched_util_est_se_tp(&p->se); } -static inline int task_fits_capacity(struct task_struct *p, - unsigned long capacity) +static inline int task_fits_capacity(struct task_struct *p, long capacity) { return fits_capacity(uclamp_task_util(p), capacity); } @@ -4201,12 +4217,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) /* sleeps up to a single latency don't count. */ if (!initial) { - unsigned long thresh; - - if (se_is_idle(se)) - thresh = sysctl_sched_min_granularity; - else - thresh = sysctl_sched_latency; + unsigned long thresh = sysctl_sched_latency; /* * Halve their sleep time's effect, to allow @@ -4224,6 +4235,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) static void check_enqueue_throttle(struct cfs_rq *cfs_rq); +static inline void check_schedstat_required(void) +{ +#ifdef CONFIG_SCHEDSTATS + if (schedstat_enabled()) + return; + + /* Force schedstat enabled if a dependent tracepoint is active */ + if (trace_sched_stat_wait_enabled() || + trace_sched_stat_sleep_enabled() || + trace_sched_stat_iowait_enabled() || + trace_sched_stat_blocked_enabled() || + trace_sched_stat_runtime_enabled()) { + printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, " + "stat_blocked and stat_runtime require the " + "kernel parameter schedstats=enable or " + "kernel.sched_schedstats=1\n"); + } +#endif +} + static inline bool cfs_bandwidth_used(void); /* @@ -4297,7 +4328,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) place_entity(cfs_rq, se, 0); check_schedstat_required(); - update_stats_enqueue_fair(cfs_rq, se, flags); + update_stats_enqueue(cfs_rq, se, flags); check_spread(cfs_rq, se); if (!curr) __enqueue_entity(cfs_rq, se); @@ -4381,7 +4412,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) update_load_avg(cfs_rq, se, UPDATE_TG); se_update_runnable(se); - update_stats_dequeue_fair(cfs_rq, se, flags); + update_stats_dequeue(cfs_rq, se, flags); clear_buddies(cfs_rq, se); @@ -4466,7 +4497,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) * a CPU. So account for the time it spent waiting on the * runqueue. */ - update_stats_wait_end_fair(cfs_rq, se); + update_stats_wait_end(cfs_rq, se); __dequeue_entity(cfs_rq, se); update_load_avg(cfs_rq, se, UPDATE_TG); } @@ -4481,12 +4512,9 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) */ if (schedstat_enabled() && rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) { - struct sched_statistics *stats; - - stats = __schedstats_from_se(se); - __schedstat_set(stats->slice_max, - max((u64)stats->slice_max, - se->sum_exec_runtime - se->prev_sum_exec_runtime)); + schedstat_set(se->statistics.slice_max, + max((u64)schedstat_val(se->statistics.slice_max), + se->sum_exec_runtime - se->prev_sum_exec_runtime)); } se->prev_sum_exec_runtime = se->sum_exec_runtime; @@ -4568,7 +4596,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) check_spread(cfs_rq, prev); if (prev->on_rq) { - update_stats_wait_start_fair(cfs_rq, prev); + update_stats_wait_start(cfs_rq, prev); /* Put 'current' back into the tree. */ __enqueue_entity(cfs_rq, prev); /* in !on_rq case, update occurred at dequeue */ @@ -4669,20 +4697,11 @@ static inline u64 sched_cfs_bandwidth_slice(void) */ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) { - s64 runtime; - if (unlikely(cfs_b->quota == RUNTIME_INF)) return; cfs_b->runtime += cfs_b->quota; - runtime = cfs_b->runtime_snap - cfs_b->runtime; - if (runtime > 0) { - cfs_b->burst_time += runtime; - cfs_b->nr_burst++; - } - cfs_b->runtime = min(cfs_b->runtime, cfs_b->quota + cfs_b->burst); - cfs_b->runtime_snap = cfs_b->runtime; } static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) @@ -5543,9 +5562,11 @@ static inline void hrtick_update(struct rq *rq) #endif #ifdef CONFIG_SMP +static inline unsigned long cpu_util(int cpu); + static inline bool cpu_overutilized(int cpu) { - return !fits_capacity(cpu_util_cfs(cpu), capacity_of(cpu)); + return !fits_capacity(cpu_util(cpu), capacity_of(cpu)); } static inline void update_overutilized_status(struct rq *rq) @@ -5566,17 +5587,6 @@ static int sched_idle_rq(struct rq *rq) rq->nr_running); } -/* - * Returns true if cfs_rq only has SCHED_IDLE entities enqueued. Note the use - * of idle_nr_running, which does not consider idle descendants of normal - * entities. - */ -static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq) -{ - return cfs_rq->nr_running && - cfs_rq->nr_running == cfs_rq->idle_nr_running; -} - #ifdef CONFIG_SMP static int sched_idle_cpu(int cpu) { @@ -5787,7 +5797,6 @@ static struct { cpumask_var_t idle_cpus_mask; atomic_t nr_cpus; int has_blocked; /* Idle CPUS has blocked load */ - int needs_update; /* Newly idle CPUs need their next_balance collated */ unsigned long next_balance; /* in jiffy units */ unsigned long next_blocked; /* Next update of blocked load in jiffies */ } nohz ____cacheline_aligned; @@ -5998,12 +6007,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits) target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync); - schedstat_inc(p->stats.nr_wakeups_affine_attempts); + schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts); if (target == nr_cpumask_bits) return prev_cpu; schedstat_inc(sd->ttwu_move_affine); - schedstat_inc(p->stats.nr_wakeups_affine); + schedstat_inc(p->se.statistics.nr_wakeups_affine); return target; } @@ -6377,7 +6386,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) return best_cpu; } -static inline bool asym_fits_capacity(unsigned long task_util, int cpu) +static inline bool asym_fits_capacity(int task_util, int cpu) { if (static_branch_unlikely(&sched_asym_cpucapacity)) return fits_capacity(task_util, capacity_of(cpu)); @@ -6446,6 +6455,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) && asym_fits_capacity(task_util, recent_used_cpu)) { + /* + * Replace recent_used_cpu with prev as it is a potential + * candidate for the next wake: + */ + p->recent_used_cpu = prev; return recent_used_cpu; } @@ -6490,6 +6504,58 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) return target; } +/** + * cpu_util - Estimates the amount of capacity of a CPU used by CFS tasks. + * @cpu: the CPU to get the utilization of + * + * The unit of the return value must be the one of capacity so we can compare + * the utilization with the capacity of the CPU that is available for CFS task + * (ie cpu_capacity). + * + * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the + * recent utilization of currently non-runnable tasks on a CPU. It represents + * the amount of utilization of a CPU in the range [0..capacity_orig] where + * capacity_orig is the cpu_capacity available at the highest frequency + * (arch_scale_freq_capacity()). + * The utilization of a CPU converges towards a sum equal to or less than the + * current capacity (capacity_curr <= capacity_orig) of the CPU because it is + * the running time on this CPU scaled by capacity_curr. + * + * The estimated utilization of a CPU is defined to be the maximum between its + * cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks + * currently RUNNABLE on that CPU. + * This allows to properly represent the expected utilization of a CPU which + * has just got a big task running since a long sleep period. At the same time + * however it preserves the benefits of the "blocked utilization" in + * describing the potential for other tasks waking up on the same CPU. + * + * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even + * higher than capacity_orig because of unfortunate rounding in + * cfs.avg.util_avg or just after migrating tasks and new task wakeups until + * the average stabilizes with the new running time. We need to check that the + * utilization stays within the range of [0..capacity_orig] and cap it if + * necessary. Without utilization capping, a group could be seen as overloaded + * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of + * available capacity. We allow utilization to overshoot capacity_curr (but not + * capacity_orig) as it useful for predicting the capacity required after task + * migrations (scheduler-driven DVFS). + * + * Return: the (estimated) utilization for the specified CPU + */ +static inline unsigned long cpu_util(int cpu) +{ + struct cfs_rq *cfs_rq; + unsigned int util; + + cfs_rq = &cpu_rq(cpu)->cfs; + util = READ_ONCE(cfs_rq->avg.util_avg); + + if (sched_feat(UTIL_EST)) + util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued)); + + return min_t(unsigned long, util, capacity_orig_of(cpu)); +} + /* * cpu_util_without: compute cpu utilization without any contributions from *p * @cpu: the CPU which utilization is requested @@ -6510,7 +6576,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p) /* Task has no contribution or is new */ if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time)) - return cpu_util_cfs(cpu); + return cpu_util(cpu); cfs_rq = &cpu_rq(cpu)->cfs; util = READ_ONCE(cfs_rq->avg.util_avg); @@ -6574,7 +6640,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p) /* * Utilization (estimated) can exceed the CPU capacity, thus let's * clamp to the maximum CPU capacity to ensure consistency with - * cpu_util. + * the cpu_util call. */ return min_t(unsigned long, util, capacity_orig_of(cpu)); } @@ -6606,7 +6672,7 @@ static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu) * During wake-up, the task isn't enqueued yet and doesn't * appear in the cfs_rq->avg.util_est.enqueued of any rq, * so just add it (if needed) to "simulate" what will be - * cpu_util after the task has been enqueued. + * cpu_util() after the task has been enqueued. */ if (dst_cpu == cpu) util_est += _task_util_est(p); @@ -6897,11 +6963,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) break; } - /* - * Usually only true for WF_EXEC and WF_FORK, as sched_domains - * usually do not have SD_BALANCE_WAKE set. That means wakeup - * will usually go to the fast path. - */ if (tmp->flags & sd_flag) sd = tmp; else if (!want_affine) @@ -7757,7 +7818,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) { int cpu; - schedstat_inc(p->stats.nr_failed_migrations_affine); + schedstat_inc(p->se.statistics.nr_failed_migrations_affine); env->flags |= LBF_SOME_PINNED; @@ -7791,7 +7852,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) env->flags &= ~LBF_ALL_PINNED; if (task_running(env->src_rq, p)) { - schedstat_inc(p->stats.nr_failed_migrations_running); + schedstat_inc(p->se.statistics.nr_failed_migrations_running); return 0; } @@ -7813,12 +7874,12 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) env->sd->nr_balance_failed > env->sd->cache_nice_tries) { if (tsk_cache_hot == 1) { schedstat_inc(env->sd->lb_hot_gained[env->idle]); - schedstat_inc(p->stats.nr_forced_migrations); + schedstat_inc(p->se.statistics.nr_forced_migrations); } return 1; } - schedstat_inc(p->stats.nr_failed_migrations_hot); + schedstat_inc(p->se.statistics.nr_failed_migrations_hot); return 0; } @@ -8552,111 +8613,14 @@ group_type group_classify(unsigned int imbalance_pct, return group_has_spare; } -/** - * asym_smt_can_pull_tasks - Check whether the load balancing CPU can pull tasks - * @dst_cpu: Destination CPU of the load balancing - * @sds: Load-balancing data with statistics of the local group - * @sgs: Load-balancing statistics of the candidate busiest group - * @sg: The candidate busiest group - * - * Check the state of the SMT siblings of both @sds::local and @sg and decide - * if @dst_cpu can pull tasks. - * - * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of - * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks - * only if @dst_cpu has higher priority. - * - * If both @dst_cpu and @sg have SMT siblings, and @sg has exactly one more - * busy CPU than @sds::local, let @dst_cpu pull tasks if it has higher priority. - * Bigger imbalances in the number of busy CPUs will be dealt with in - * update_sd_pick_busiest(). - * - * If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings - * of @dst_cpu are idle and @sg has lower priority. - * - * Return: true if @dst_cpu can pull tasks, false otherwise. - */ -static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds, - struct sg_lb_stats *sgs, - struct sched_group *sg) -{ -#ifdef CONFIG_SCHED_SMT - bool local_is_smt, sg_is_smt; - int sg_busy_cpus; - - local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY; - sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY; - - sg_busy_cpus = sgs->group_weight - sgs->idle_cpus; - - if (!local_is_smt) { - /* - * If we are here, @dst_cpu is idle and does not have SMT - * siblings. Pull tasks if candidate group has two or more - * busy CPUs. - */ - if (sg_busy_cpus >= 2) /* implies sg_is_smt */ - return true; - - /* - * @dst_cpu does not have SMT siblings. @sg may have SMT - * siblings and only one is busy. In such case, @dst_cpu - * can help if it has higher priority and is idle (i.e., - * it has no running tasks). - */ - return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu); - } - - /* @dst_cpu has SMT siblings. */ - - if (sg_is_smt) { - int local_busy_cpus = sds->local->group_weight - - sds->local_stat.idle_cpus; - int busy_cpus_delta = sg_busy_cpus - local_busy_cpus; - - if (busy_cpus_delta == 1) - return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu); - - return false; - } - - /* - * @sg does not have SMT siblings. Ensure that @sds::local does not end - * up with more than one busy SMT sibling and only pull tasks if there - * are not busy CPUs (i.e., no CPU has running tasks). - */ - if (!sds->local_stat.sum_nr_running) - return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu); - - return false; -#else - /* Always return false so that callers deal with non-SMT cases. */ - return false; -#endif -} - -static inline bool -sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs, - struct sched_group *group) -{ - /* Only do SMT checks if either local or candidate have SMT siblings */ - if ((sds->local->flags & SD_SHARE_CPUCAPACITY) || - (group->flags & SD_SHARE_CPUCAPACITY)) - return asym_smt_can_pull_tasks(env->dst_cpu, sds, sgs, group); - - return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu); -} - /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. * @env: The load balancing environment. - * @sds: Load-balancing data with statistics of the local group. * @group: sched_group whose statistics are to be updated. * @sgs: variable to hold the statistics for this group. * @sg_status: Holds flag indicating the status of the sched_group */ static inline void update_sg_lb_stats(struct lb_env *env, - struct sd_lb_stats *sds, struct sched_group *group, struct sg_lb_stats *sgs, int *sg_status) @@ -8665,13 +8629,13 @@ static inline void update_sg_lb_stats(struct lb_env *env, memset(sgs, 0, sizeof(*sgs)); - local_group = group == sds->local; + local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(group)); for_each_cpu_and(i, sched_group_span(group), env->cpus) { struct rq *rq = cpu_rq(i); sgs->group_load += cpu_load(rq); - sgs->group_util += cpu_util_cfs(i); + sgs->group_util += cpu_util(i); sgs->group_runnable += cpu_runnable(rq); sgs->sum_h_nr_running += rq->cfs.h_nr_running; @@ -8708,17 +8672,18 @@ static inline void update_sg_lb_stats(struct lb_env *env, } } + /* Check if dst CPU is idle and preferred to this group */ + if (env->sd->flags & SD_ASYM_PACKING && + env->idle != CPU_NOT_IDLE && + sgs->sum_h_nr_running && + sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu)) { + sgs->group_asym_packing = 1; + } + sgs->group_capacity = group->sgc->capacity; sgs->group_weight = group->group_weight; - /* Check if dst CPU is idle and preferred to this group */ - if (!local_group && env->sd->flags & SD_ASYM_PACKING && - env->idle != CPU_NOT_IDLE && sgs->sum_h_nr_running && - sched_asym(env, sds, sgs, group)) { - sgs->group_asym_packing = 1; - } - sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs); /* Computing avg_load makes sense only when group is overloaded */ @@ -9227,7 +9192,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd update_group_capacity(env->sd, env->dst_cpu); } - update_sg_lb_stats(env, sds, sg, sgs, &sg_status); + update_sg_lb_stats(env, sg, sgs, &sg_status); if (local_group) goto next_group; @@ -9458,11 +9423,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s /** * find_busiest_group - Returns the busiest group within the sched_domain * if there is an imbalance. - * @env: The load balancing environment. * * Also calculates the amount of runnable load which should be moved * to restore balance. * + * @env: The load balancing environment. + * * Return: - The busiest group if imbalance exists. */ static struct sched_group *find_busiest_group(struct lb_env *env) @@ -9649,12 +9615,6 @@ static struct rq *find_busiest_queue(struct lb_env *env, nr_running == 1) continue; - /* Make sure we only pull tasks from a CPU of lower priority */ - if ((env->sd->flags & SD_ASYM_PACKING) && - sched_asym_prefer(i, env->dst_cpu) && - nr_running == 1) - continue; - switch (env->migration_type) { case migrate_load: /* @@ -9688,7 +9648,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, break; case migrate_util: - util = cpu_util_cfs(i); + util = cpu_util(cpu_of(rq)); /* * Don't try to pull utilization from a CPU with one @@ -10228,30 +10188,6 @@ void update_max_interval(void) max_load_balance_interval = HZ*num_online_cpus()/10; } -static inline bool update_newidle_cost(struct sched_domain *sd, u64 cost) -{ - if (cost > sd->max_newidle_lb_cost) { - /* - * Track max cost of a domain to make sure to not delay the - * next wakeup on the CPU. - */ - sd->max_newidle_lb_cost = cost; - sd->last_decay_max_lb_cost = jiffies; - } else if (time_after(jiffies, sd->last_decay_max_lb_cost + HZ)) { - /* - * Decay the newidle max times by ~1% per second to ensure that - * it is not outdated and the current max cost is actually - * shorter. - */ - sd->max_newidle_lb_cost = (sd->max_newidle_lb_cost * 253) / 256; - sd->last_decay_max_lb_cost = jiffies; - - return true; - } - - return false; -} - /* * It checks each scheduling domain to see if it is due to be balanced, * and initiates a balancing operation if so. @@ -10275,9 +10211,14 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) for_each_domain(cpu, sd) { /* * Decay the newidle max times here because this is a regular - * visit to all the domains. + * visit to all the domains. Decay ~1% per second. */ - need_decay = update_newidle_cost(sd, 0); + if (time_after(jiffies, sd->next_decay_max_lb_cost)) { + sd->max_newidle_lb_cost = + (sd->max_newidle_lb_cost * 253) / 256; + sd->next_decay_max_lb_cost = jiffies + HZ; + need_decay = 1; + } max_cost += sd->max_newidle_lb_cost; /* @@ -10446,7 +10387,7 @@ static void nohz_balancer_kick(struct rq *rq) goto out; if (rq->nr_running >= 2) { - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK; + flags = NOHZ_KICK_MASK; goto out; } @@ -10460,7 +10401,7 @@ static void nohz_balancer_kick(struct rq *rq) * on. */ if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) { - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK; + flags = NOHZ_KICK_MASK; goto unlock; } } @@ -10474,7 +10415,7 @@ static void nohz_balancer_kick(struct rq *rq) */ for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { if (sched_asym_prefer(i, cpu)) { - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK; + flags = NOHZ_KICK_MASK; goto unlock; } } @@ -10487,7 +10428,7 @@ static void nohz_balancer_kick(struct rq *rq) * to run the misfit task on. */ if (check_misfit_status(rq, sd)) { - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK; + flags = NOHZ_KICK_MASK; goto unlock; } @@ -10514,16 +10455,13 @@ static void nohz_balancer_kick(struct rq *rq) */ nr_busy = atomic_read(&sds->nr_busy_cpus); if (nr_busy > 1) { - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK; + flags = NOHZ_KICK_MASK; goto unlock; } } unlock: rcu_read_unlock(); out: - if (READ_ONCE(nohz.needs_update)) - flags |= NOHZ_NEXT_KICK; - if (flags) kick_ilb(flags); } @@ -10620,13 +10558,12 @@ void nohz_balance_enter_idle(int cpu) /* * Ensures that if nohz_idle_balance() fails to observe our * @idle_cpus_mask store, it must observe the @has_blocked - * and @needs_update stores. + * store. */ smp_mb__after_atomic(); set_cpu_sd_state_idle(cpu); - WRITE_ONCE(nohz.needs_update, 1); out: /* * Each time a cpu enter idle, we assume that it has blocked load and @@ -10675,17 +10612,12 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags, /* * We assume there will be no idle load after this update and clear * the has_blocked flag. If a cpu enters idle in the mean time, it will - * set the has_blocked flag and trigger another update of idle load. + * set the has_blocked flag and trig another update of idle load. * Because a cpu that becomes idle, is added to idle_cpus_mask before * setting the flag, we are sure to not clear the state and not * check the load of an idle cpu. - * - * Same applies to idle_cpus_mask vs needs_update. */ - if (flags & NOHZ_STATS_KICK) - WRITE_ONCE(nohz.has_blocked, 0); - if (flags & NOHZ_NEXT_KICK) - WRITE_ONCE(nohz.needs_update, 0); + WRITE_ONCE(nohz.has_blocked, 0); /* * Ensures that if we miss the CPU, we must see the has_blocked @@ -10707,17 +10639,13 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags, * balancing owner will pick it up. */ if (need_resched()) { - if (flags & NOHZ_STATS_KICK) - has_blocked_load = true; - if (flags & NOHZ_NEXT_KICK) - WRITE_ONCE(nohz.needs_update, 1); + has_blocked_load = true; goto abort; } rq = cpu_rq(balance_cpu); - if (flags & NOHZ_STATS_KICK) - has_blocked_load |= update_nohz_stats(rq); + has_blocked_load |= update_nohz_stats(rq); /* * If time for next balance is due, @@ -10748,9 +10676,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags, if (likely(update_next_balance)) nohz.next_balance = next_balance; - if (flags & NOHZ_STATS_KICK) - WRITE_ONCE(nohz.next_blocked, - now + msecs_to_jiffies(LOAD_AVG_PERIOD)); + WRITE_ONCE(nohz.next_blocked, + now + msecs_to_jiffies(LOAD_AVG_PERIOD)); abort: /* There is still blocked load, enable periodic update */ @@ -10848,9 +10775,9 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) { unsigned long next_balance = jiffies + HZ; int this_cpu = this_rq->cpu; - u64 t0, t1, curr_cost = 0; struct sched_domain *sd; int pulled_task = 0; + u64 curr_cost = 0; update_misfit_status(NULL, this_rq); @@ -10881,49 +10808,47 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) */ rq_unpin_lock(this_rq, rf); - rcu_read_lock(); - sd = rcu_dereference_check_sched_domain(this_rq->sd); - - if (!READ_ONCE(this_rq->rd->overload) || - (sd && this_rq->avg_idle < sd->max_newidle_lb_cost)) { + if (this_rq->avg_idle < sysctl_sched_migration_cost || + !READ_ONCE(this_rq->rd->overload)) { + rcu_read_lock(); + sd = rcu_dereference_check_sched_domain(this_rq->sd); if (sd) update_next_balance(sd, &next_balance); rcu_read_unlock(); goto out; } - rcu_read_unlock(); raw_spin_rq_unlock(this_rq); - t0 = sched_clock_cpu(this_cpu); update_blocked_averages(this_cpu); - rcu_read_lock(); for_each_domain(this_cpu, sd) { int continue_balancing = 1; - u64 domain_cost; + u64 t0, domain_cost; - update_next_balance(sd, &next_balance); - - if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) + if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) { + update_next_balance(sd, &next_balance); break; + } if (sd->flags & SD_BALANCE_NEWIDLE) { + t0 = sched_clock_cpu(this_cpu); pulled_task = load_balance(this_cpu, this_rq, sd, CPU_NEWLY_IDLE, &continue_balancing); - t1 = sched_clock_cpu(this_cpu); - domain_cost = t1 - t0; - update_newidle_cost(sd, domain_cost); + domain_cost = sched_clock_cpu(this_cpu) - t0; + if (domain_cost > sd->max_newidle_lb_cost) + sd->max_newidle_lb_cost = domain_cost; curr_cost += domain_cost; - t0 = t1; } + update_next_balance(sd, &next_balance); + /* * Stop searching for tasks to pull if there are * now runnable tasks on this rq. @@ -11057,7 +10982,7 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr) * MIN_NR_TASKS_DURING_FORCEIDLE - 1 tasks and use that to check * if we need to give up the CPU. */ - if (rq->core->core_forceidle_count && rq->cfs.nr_running == 1 && + if (rq->core->core_forceidle && rq->cfs.nr_running == 1 && __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE)) resched_curr(rq); } @@ -11479,7 +11404,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) if (!cfs_rq) goto err; - se = kzalloc_node(sizeof(struct sched_entity_stats), + se = kzalloc_node(sizeof(struct sched_entity), GFP_KERNEL, cpu_to_node(i)); if (!se) goto err_free_rq; @@ -11647,7 +11572,7 @@ int sched_group_set_idle(struct task_group *tg, long idle) for_each_possible_cpu(i) { struct rq *rq = cpu_rq(i); struct sched_entity *se = tg->se[i]; - struct cfs_rq *parent_cfs_rq, *grp_cfs_rq = tg->cfs_rq[i]; + struct cfs_rq *grp_cfs_rq = tg->cfs_rq[i]; bool was_idle = cfs_rq_is_idle(grp_cfs_rq); long idle_task_delta; struct rq_flags rf; @@ -11658,14 +11583,6 @@ int sched_group_set_idle(struct task_group *tg, long idle) if (WARN_ON_ONCE(was_idle == cfs_rq_is_idle(grp_cfs_rq))) goto next_cpu; - if (se->on_rq) { - parent_cfs_rq = cfs_rq_of(se); - if (cfs_rq_is_idle(grp_cfs_rq)) - parent_cfs_rq->idle_nr_running++; - else - parent_cfs_rq->idle_nr_running--; - } - idle_task_delta = grp_cfs_rq->h_nr_running - grp_cfs_rq->idle_h_nr_running; if (!cfs_rq_is_idle(grp_cfs_rq)) diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 1cf435bbcd..7f8dace096 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -46,16 +46,11 @@ SCHED_FEAT(DOUBLE_TICK, false) */ SCHED_FEAT(NONTASK_CAPACITY, true) -#ifdef CONFIG_PREEMPT_RT -SCHED_FEAT(TTWU_QUEUE, false) -#else - /* * Queue remote wakeups on the target CPU and process them * using the scheduler IPI. Reduces rq->lock contention/bounces. */ SCHED_FEAT(TTWU_QUEUE, true) -#endif /* * When doing wakeups, attempt to limit superfluous scans of the LLC domain. diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index e143581788..422f3b0445 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1,4 +1,3 @@ -// SPDX-License-Identifier: GPL-2.0 /* * Pressure stall information for CPU, memory and IO * @@ -842,6 +841,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, /* * When switching between tasks that have an identical * runtime state, the cgroup that contains both tasks + * runtime state, the cgroup that contains both tasks * we reach the first common ancestor. Iterate @next's * ancestors only until we encounter @prev's ONCPU. */ diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7b4f4fbbb4..54f9bb3f15 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1020,10 +1020,8 @@ static void update_curr_rt(struct rq *rq) if (unlikely((s64)delta_exec <= 0)) return; - schedstat_set(curr->stats.exec_max, - max(curr->stats.exec_max, delta_exec)); - - trace_sched_stat_runtime(curr, delta_exec, 0); + schedstat_set(curr->se.statistics.exec_max, + max(curr->se.statistics.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; account_group_exec_runtime(curr, delta_exec); @@ -1288,112 +1286,6 @@ static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_arr rt_se->on_list = 0; } -static inline struct sched_statistics * -__schedstats_from_rt_se(struct sched_rt_entity *rt_se) -{ -#ifdef CONFIG_RT_GROUP_SCHED - /* schedstats is not supported for rt group. */ - if (!rt_entity_is_task(rt_se)) - return NULL; -#endif - - return &rt_task_of(rt_se)->stats; -} - -static inline void -update_stats_wait_start_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) -{ - struct sched_statistics *stats; - struct task_struct *p = NULL; - - if (!schedstat_enabled()) - return; - - if (rt_entity_is_task(rt_se)) - p = rt_task_of(rt_se); - - stats = __schedstats_from_rt_se(rt_se); - if (!stats) - return; - - __update_stats_wait_start(rq_of_rt_rq(rt_rq), p, stats); -} - -static inline void -update_stats_enqueue_sleeper_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) -{ - struct sched_statistics *stats; - struct task_struct *p = NULL; - - if (!schedstat_enabled()) - return; - - if (rt_entity_is_task(rt_se)) - p = rt_task_of(rt_se); - - stats = __schedstats_from_rt_se(rt_se); - if (!stats) - return; - - __update_stats_enqueue_sleeper(rq_of_rt_rq(rt_rq), p, stats); -} - -static inline void -update_stats_enqueue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, - int flags) -{ - if (!schedstat_enabled()) - return; - - if (flags & ENQUEUE_WAKEUP) - update_stats_enqueue_sleeper_rt(rt_rq, rt_se); -} - -static inline void -update_stats_wait_end_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) -{ - struct sched_statistics *stats; - struct task_struct *p = NULL; - - if (!schedstat_enabled()) - return; - - if (rt_entity_is_task(rt_se)) - p = rt_task_of(rt_se); - - stats = __schedstats_from_rt_se(rt_se); - if (!stats) - return; - - __update_stats_wait_end(rq_of_rt_rq(rt_rq), p, stats); -} - -static inline void -update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, - int flags) -{ - struct task_struct *p = NULL; - - if (!schedstat_enabled()) - return; - - if (rt_entity_is_task(rt_se)) - p = rt_task_of(rt_se); - - if ((flags & DEQUEUE_SLEEP) && p) { - unsigned int state; - - state = READ_ONCE(p->__state); - if (state & TASK_INTERRUPTIBLE) - __schedstat_set(p->stats.sleep_start, - rq_clock(rq_of_rt_rq(rt_rq))); - - if (state & TASK_UNINTERRUPTIBLE) - __schedstat_set(p->stats.block_start, - rq_clock(rq_of_rt_rq(rt_rq))); - } -} - static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) { struct rt_rq *rt_rq = rt_rq_of_se(rt_se); @@ -1467,8 +1359,6 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) { struct rq *rq = rq_of_rt_se(rt_se); - update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags); - dequeue_rt_stack(rt_se, flags); for_each_sched_rt_entity(rt_se) __enqueue_rt_entity(rt_se, flags); @@ -1479,8 +1369,6 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) { struct rq *rq = rq_of_rt_se(rt_se); - update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags); - dequeue_rt_stack(rt_se, flags); for_each_sched_rt_entity(rt_se) { @@ -1503,9 +1391,6 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) if (flags & ENQUEUE_WAKEUP) rt_se->timeout = 0; - check_schedstat_required(); - update_stats_wait_start_rt(rt_rq_of_se(rt_se), rt_se); - enqueue_rt_entity(rt_se, flags); if (!task_current(rq, p) && p->nr_cpus_allowed > 1) @@ -1706,12 +1591,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first) { - struct sched_rt_entity *rt_se = &p->rt; - struct rt_rq *rt_rq = &rq->rt; - p->se.exec_start = rq_clock_task(rq); - if (on_rt_rq(&p->rt)) - update_stats_wait_end_rt(rt_rq, rt_se); /* The running task is never eligible for pushing */ dequeue_pushable_task(rq, p); @@ -1785,12 +1665,6 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) static void put_prev_task_rt(struct rq *rq, struct task_struct *p) { - struct sched_rt_entity *rt_se = &p->rt; - struct rt_rq *rt_rq = &rq->rt; - - if (on_rt_rq(&p->rt)) - update_stats_wait_start_rt(rt_rq, rt_se); - update_curr_rt(rq); update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index de53be9057..4f43282693 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -368,7 +369,6 @@ struct cfs_bandwidth { u64 quota; u64 runtime; u64 burst; - u64 runtime_snap; s64 hierarchical_quota; u8 idle; @@ -381,9 +381,7 @@ struct cfs_bandwidth { /* Statistics: */ int nr_periods; int nr_throttled; - int nr_burst; u64 throttled_time; - u64 burst_time; #endif }; @@ -533,7 +531,6 @@ struct cfs_rq { struct load_weight load; unsigned int nr_running; unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */ - unsigned int idle_nr_running; /* SCHED_IDLE */ unsigned int idle_h_nr_running; /* SCHED_IDLE */ u64 exec_clock; @@ -1111,10 +1108,8 @@ struct rq { unsigned int core_task_seq; unsigned int core_pick_seq; unsigned long core_cookie; - unsigned int core_forceidle_count; + unsigned char core_forceidle; unsigned int core_forceidle_seq; - unsigned int core_forceidle_occupation; - u64 core_forceidle_start; #endif }; @@ -1255,11 +1250,16 @@ static inline bool sched_core_enqueued(struct task_struct *p) } extern void sched_core_enqueue(struct rq *rq, struct task_struct *p); -extern void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags); +extern void sched_core_dequeue(struct rq *rq, struct task_struct *p); extern void sched_core_get(void); extern void sched_core_put(void); +extern unsigned long sched_core_alloc_cookie(void); +extern void sched_core_put_cookie(unsigned long cookie); +extern unsigned long sched_core_get_cookie(unsigned long cookie); +extern unsigned long sched_core_update_cookie(struct task_struct *p, unsigned long cookie); + #else /* !CONFIG_SCHED_CORE */ static inline bool sched_core_enabled(struct rq *rq) @@ -1423,6 +1423,11 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) extern void update_rq_clock(struct rq *rq); +static inline u64 __rq_clock_broken(struct rq *rq) +{ + return READ_ONCE(rq->clock); +} + /* * rq::clock_update_flags bits * @@ -1617,6 +1622,14 @@ rq_lock(struct rq *rq, struct rq_flags *rf) rq_pin_lock(rq, rf); } +static inline void +rq_relock(struct rq *rq, struct rq_flags *rf) + __acquires(rq->lock) +{ + raw_spin_rq_lock(rq); + rq_repin_lock(rq, rf); +} + static inline void rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) __releases(rq->lock) @@ -1797,7 +1810,6 @@ struct sched_group { unsigned int group_weight; struct sched_group_capacity *sgc; int asym_prefer_cpu; /* CPU of highest priority in group */ - int flags; /* * The CPUs this group covers. @@ -1856,32 +1868,6 @@ static inline void flush_smp_call_function_from_idle(void) { } #include "stats.h" #include "autogroup.h" -#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS) - -extern void __sched_core_account_forceidle(struct rq *rq); - -static inline void sched_core_account_forceidle(struct rq *rq) -{ - if (schedstat_enabled()) - __sched_core_account_forceidle(rq); -} - -extern void __sched_core_tick(struct rq *rq); - -static inline void sched_core_tick(struct rq *rq) -{ - if (sched_core_enabled(rq) && schedstat_enabled()) - __sched_core_tick(rq); -} - -#else - -static inline void sched_core_account_forceidle(struct rq *rq) {} - -static inline void sched_core_tick(struct rq *rq) {} - -#endif /* CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS */ - #ifdef CONFIG_CGROUP_SCHED /* @@ -1941,7 +1927,11 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) * per-task data have been completed by this moment. */ smp_wmb(); +#ifdef CONFIG_THREAD_INFO_IN_TASK + WRITE_ONCE(p->cpu, cpu); +#else WRITE_ONCE(task_thread_info(p)->cpu, cpu); +#endif p->wake_cpu = cpu; #endif } @@ -2413,7 +2403,6 @@ extern const_debug unsigned int sysctl_sched_migration_cost; #ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; -extern unsigned int sysctl_sched_idle_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern int sysctl_resched_latency_warn_ms; extern int sysctl_resched_latency_warn_once; @@ -2721,18 +2710,12 @@ extern void cfs_bandwidth_usage_dec(void); #define NOHZ_BALANCE_KICK_BIT 0 #define NOHZ_STATS_KICK_BIT 1 #define NOHZ_NEWILB_KICK_BIT 2 -#define NOHZ_NEXT_KICK_BIT 3 -/* Run rebalance_domains() */ #define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) -/* Update blocked load */ #define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) -/* Update blocked load when entering idle */ #define NOHZ_NEWILB_KICK BIT(NOHZ_NEWILB_KICK_BIT) -/* Update nohz.next_balance */ -#define NOHZ_NEXT_KICK BIT(NOHZ_NEXT_KICK_BIT) -#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK | NOHZ_NEXT_KICK) +#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK) #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) @@ -2966,52 +2949,16 @@ static inline unsigned long cpu_util_dl(struct rq *rq) return READ_ONCE(rq->avg_dl.util_avg); } -/** - * cpu_util_cfs() - Estimates the amount of CPU capacity used by CFS tasks. - * @cpu: the CPU to get the utilization for. - * - * The unit of the return value must be the same as the one of CPU capacity - * so that CPU utilization can be compared with CPU capacity. - * - * CPU utilization is the sum of running time of runnable tasks plus the - * recent utilization of currently non-runnable tasks on that CPU. - * It represents the amount of CPU capacity currently used by CFS tasks in - * the range [0..max CPU capacity] with max CPU capacity being the CPU - * capacity at f_max. - * - * The estimated CPU utilization is defined as the maximum between CPU - * utilization and sum of the estimated utilization of the currently - * runnable tasks on that CPU. It preserves a utilization "snapshot" of - * previously-executed tasks, which helps better deduce how busy a CPU will - * be when a long-sleeping task wakes up. The contribution to CPU utilization - * of such a task would be significantly decayed at this point of time. - * - * CPU utilization can be higher than the current CPU capacity - * (f_curr/f_max * max CPU capacity) or even the max CPU capacity because - * of rounding errors as well as task migrations or wakeups of new tasks. - * CPU utilization has to be capped to fit into the [0..max CPU capacity] - * range. Otherwise a group of CPUs (CPU0 util = 121% + CPU1 util = 80%) - * could be seen as over-utilized even though CPU1 has 20% of spare CPU - * capacity. CPU utilization is allowed to overshoot current CPU capacity - * though since this is useful for predicting the CPU capacity required - * after task migrations (scheduler-driven DVFS). - * - * Return: (Estimated) utilization for the specified CPU. - */ -static inline unsigned long cpu_util_cfs(int cpu) +static inline unsigned long cpu_util_cfs(struct rq *rq) { - struct cfs_rq *cfs_rq; - unsigned long util; - - cfs_rq = &cpu_rq(cpu)->cfs; - util = READ_ONCE(cfs_rq->avg.util_avg); + unsigned long util = READ_ONCE(rq->cfs.avg.util_avg); if (sched_feat(UTIL_EST)) { util = max_t(unsigned long, util, - READ_ONCE(cfs_rq->avg.util_est.enqueued)); + READ_ONCE(rq->cfs.avg.util_est.enqueued)); } - return min(util, capacity_orig_of(cpu)); + return util; } static inline unsigned long cpu_util_rt(struct rq *rq) diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 07dde2928c..3f93fc3b56 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -4,110 +4,6 @@ */ #include "sched.h" -void __update_stats_wait_start(struct rq *rq, struct task_struct *p, - struct sched_statistics *stats) -{ - u64 wait_start, prev_wait_start; - - wait_start = rq_clock(rq); - prev_wait_start = schedstat_val(stats->wait_start); - - if (p && likely(wait_start > prev_wait_start)) - wait_start -= prev_wait_start; - - __schedstat_set(stats->wait_start, wait_start); -} - -void __update_stats_wait_end(struct rq *rq, struct task_struct *p, - struct sched_statistics *stats) -{ - u64 delta = rq_clock(rq) - schedstat_val(stats->wait_start); - - if (p) { - if (task_on_rq_migrating(p)) { - /* - * Preserve migrating task's wait time so wait_start - * time stamp can be adjusted to accumulate wait time - * prior to migration. - */ - __schedstat_set(stats->wait_start, delta); - - return; - } - - trace_sched_stat_wait(p, delta); - } - - __schedstat_set(stats->wait_max, - max(schedstat_val(stats->wait_max), delta)); - __schedstat_inc(stats->wait_count); - __schedstat_add(stats->wait_sum, delta); - __schedstat_set(stats->wait_start, 0); -} - -void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p, - struct sched_statistics *stats) -{ - u64 sleep_start, block_start; - - sleep_start = schedstat_val(stats->sleep_start); - block_start = schedstat_val(stats->block_start); - - if (sleep_start) { - u64 delta = rq_clock(rq) - sleep_start; - - if ((s64)delta < 0) - delta = 0; - - if (unlikely(delta > schedstat_val(stats->sleep_max))) - __schedstat_set(stats->sleep_max, delta); - - __schedstat_set(stats->sleep_start, 0); - __schedstat_add(stats->sum_sleep_runtime, delta); - - if (p) { - account_scheduler_latency(p, delta >> 10, 1); - trace_sched_stat_sleep(p, delta); - } - } - - if (block_start) { - u64 delta = rq_clock(rq) - block_start; - - if ((s64)delta < 0) - delta = 0; - - if (unlikely(delta > schedstat_val(stats->block_max))) - __schedstat_set(stats->block_max, delta); - - __schedstat_set(stats->block_start, 0); - __schedstat_add(stats->sum_sleep_runtime, delta); - __schedstat_add(stats->sum_block_runtime, delta); - - if (p) { - if (p->in_iowait) { - __schedstat_add(stats->iowait_sum, delta); - __schedstat_inc(stats->iowait_count); - trace_sched_stat_iowait(p, delta); - } - - trace_sched_stat_blocked(p, delta); - - /* - * Blocking time is in units of nanosecs, so shift by - * 20 to get a milliseconds-range estimation of the - * amount of time that the task spent sleeping: - */ - if (unlikely(prof_on == SLEEP_PROFILING)) { - profile_hits(SLEEP_PROFILING, - (void *)get_wchan(p), - delta >> 20); - } - account_scheduler_latency(p, delta >> 10, 0); - } - } -} - /* * Current schedstat API version. * diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 3a3c826dd8..606a3982d1 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -2,8 +2,6 @@ #ifdef CONFIG_SCHEDSTATS -extern struct static_key_false sched_schedstats; - /* * Expects runqueue lock to be held for atomicity of update */ @@ -42,31 +40,7 @@ rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) #define schedstat_val(var) (var) #define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0) -void __update_stats_wait_start(struct rq *rq, struct task_struct *p, - struct sched_statistics *stats); - -void __update_stats_wait_end(struct rq *rq, struct task_struct *p, - struct sched_statistics *stats); -void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p, - struct sched_statistics *stats); - -static inline void -check_schedstat_required(void) -{ - if (schedstat_enabled()) - return; - - /* Force schedstat enabled if a dependent tracepoint is active */ - if (trace_sched_stat_wait_enabled() || - trace_sched_stat_sleep_enabled() || - trace_sched_stat_iowait_enabled() || - trace_sched_stat_blocked_enabled() || - trace_sched_stat_runtime_enabled()) - printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n"); -} - #else /* !CONFIG_SCHEDSTATS: */ - static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { } static inline void rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { } static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delta) { } @@ -79,31 +53,8 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt # define schedstat_set(var, val) do { } while (0) # define schedstat_val(var) 0 # define schedstat_val_or_zero(var) 0 - -# define __update_stats_wait_start(rq, p, stats) do { } while (0) -# define __update_stats_wait_end(rq, p, stats) do { } while (0) -# define __update_stats_enqueue_sleeper(rq, p, stats) do { } while (0) -# define check_schedstat_required() do { } while (0) - #endif /* CONFIG_SCHEDSTATS */ -#ifdef CONFIG_FAIR_GROUP_SCHED -struct sched_entity_stats { - struct sched_entity se; - struct sched_statistics stats; -} __no_randomize_layout; -#endif - -static inline struct sched_statistics * -__schedstats_from_se(struct sched_entity *se) -{ -#ifdef CONFIG_FAIR_GROUP_SCHED - if (!entity_is_task(se)) - return &container_of(se, struct sched_entity_stats, se)->stats; -#endif - return &task_of(se)->stats; -} - #ifdef CONFIG_PSI /* * PSI tracks state that persists across sleeps, such as iowaits and diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 0b165a25f2..f988ebe3fe 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -78,8 +78,8 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) if (unlikely((s64)delta_exec < 0)) delta_exec = 0; - schedstat_set(curr->stats.exec_max, - max(curr->stats.exec_max, delta_exec)); + schedstat_set(curr->se.statistics.exec_max, + max(curr->se.statistics.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; account_group_exec_runtime(curr, delta_exec); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index d201a7052a..4e8698e62f 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -526,7 +526,7 @@ static int init_rootdomain(struct root_domain *rd) #ifdef HAVE_RT_PUSH_IPI rd->rto_cpu = -1; raw_spin_lock_init(&rd->rto_lock); - rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func); + init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); #endif rd->visit_gen = 0; @@ -688,6 +688,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) { struct rq *rq = cpu_rq(cpu); struct sched_domain *tmp; + int numa_distance = 0; /* Remove the sched domains which do not contribute to scheduling. */ for (tmp = sd; tmp; ) { @@ -715,22 +716,13 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) tmp = sd; sd = sd->parent; destroy_sched_domain(tmp); - if (sd) { - struct sched_group *sg = sd->groups; - - /* - * sched groups hold the flags of the child sched - * domain for convenience. Clear such flags since - * the child is being destroyed. - */ - do { - sg->flags = 0; - } while (sg != sd->groups); - + if (sd) sd->child = NULL; - } } + for (tmp = sd; tmp; tmp = tmp->parent) + numa_distance += !!(tmp->flags & SD_NUMA); + sched_domain_debug(sd, cpu); rq_attach_root(rq, rd); @@ -924,12 +916,10 @@ build_group_from_child_sched_domain(struct sched_domain *sd, int cpu) return NULL; sg_span = sched_group_span(sg); - if (sd->child) { + if (sd->child) cpumask_copy(sg_span, sched_domain_span(sd->child)); - sg->flags = sd->child->flags; - } else { + else cpumask_copy(sg_span, sched_domain_span(sd)); - } atomic_inc(&sg->ref); return sg; @@ -1179,7 +1169,6 @@ static struct sched_group *get_group(int cpu, struct sd_data *sdd) if (child) { cpumask_copy(sched_group_span(sg), sched_domain_span(child)); cpumask_copy(group_balance_mask(sg), sched_group_span(sg)); - sg->flags = child->flags; } else { cpumask_set_cpu(cpu, sched_group_span(sg)); cpumask_set_cpu(cpu, group_balance_mask(sg)); @@ -1492,6 +1481,7 @@ static int sched_domains_curr_level; int sched_max_numa_distance; static int *sched_domains_numa_distance; static struct cpumask ***sched_domains_numa_masks; +int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; static unsigned long __read_mostly *sched_numa_onlined_nodes; #endif @@ -1567,7 +1557,7 @@ sd_init(struct sched_domain_topology_level *tl, .last_balance = jiffies, .balance_interval = sd_weight, .max_newidle_lb_cost = 0, - .last_decay_max_lb_cost = jiffies, + .next_decay_max_lb_cost = jiffies, .child = child, #ifdef CONFIG_SCHED_DEBUG .name = tl->name, @@ -1637,11 +1627,6 @@ static struct sched_domain_topology_level default_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, #endif - -#ifdef CONFIG_SCHED_CLUSTER - { cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) }, -#endif - #ifdef CONFIG_SCHED_MC { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, #endif diff --git a/kernel/signal.c b/kernel/signal.c index 9b04631acd..6e3dbb3d12 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -626,8 +626,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, * * All callers have to hold the siglock. */ -int dequeue_signal(struct task_struct *tsk, sigset_t *mask, - kernel_siginfo_t *info, enum pid_type *type) +int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *info) { bool resched_timer = false; int signr; @@ -635,10 +634,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ - *type = PIDTYPE_PID; signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer); if (!signr) { - *type = PIDTYPE_TGID; signr = __dequeue_signal(&tsk->signal->shared_pending, mask, info, &resched_timer); #ifdef CONFIG_POSIX_TIMERS @@ -906,8 +903,8 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force) struct task_struct *t; sigset_t flush; - if (signal->flags & SIGNAL_GROUP_EXIT) { - if (signal->core_state) + if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) { + if (!(signal->flags & SIGNAL_GROUP_EXIT)) return sig == SIGKILL; /* * The process is in the middle of dying, nothing to do. @@ -1032,7 +1029,7 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type) * then start taking the whole group down immediately. */ if (sig_fatal(p, sig) && - (signal->core_state || !(signal->flags & SIGNAL_GROUP_EXIT)) && + !(signal->flags & SIGNAL_GROUP_EXIT) && !sigismember(&t->real_blocked, sig) && (sig == SIGKILL || !p->ptrace)) { /* @@ -1824,7 +1821,6 @@ int force_sig_perf(void __user *addr, u32 type, u64 sig_data) * force_sig_seccomp - signals the task to allow in-process syscall emulation * @syscall: syscall number to send to userland * @reason: filter-supplied reason code to send to userland (via si_errno) - * @force_coredump: true to trigger a coredump * * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info. */ @@ -2181,6 +2177,31 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, spin_unlock_irqrestore(&sighand->siglock, flags); } +static inline bool may_ptrace_stop(void) +{ + if (!likely(current->ptrace)) + return false; + /* + * Are we in the middle of do_coredump? + * If so and our tracer is also part of the coredump stopping + * is a deadlock situation, and pointless because our tracer + * is dead so don't allow us to stop. + * If SIGKILL was already sent before the caller unlocked + * ->siglock we must see ->core_state != NULL. Otherwise it + * is safe to enter schedule(). + * + * This is almost outdated, a task with the pending SIGKILL can't + * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported + * after SIGKILL was already dequeued. + */ + if (unlikely(current->mm->core_state) && + unlikely(current->mm == current->parent->mm)) + return false; + + return true; +} + + /* * This must be called with current->sighand->siglock held. * @@ -2198,7 +2219,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t { bool gstop_done = false; - if (arch_ptrace_stop_needed()) { + if (arch_ptrace_stop_needed(exit_code, info)) { /* * The arch code has something special to do before a * ptrace stop. This is allowed to block, e.g. for faults @@ -2208,7 +2229,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t * any signal bookkeeping like checking group_stop_count. */ spin_unlock_irq(¤t->sighand->siglock); - arch_ptrace_stop(); + arch_ptrace_stop(exit_code, info); spin_lock_irq(¤t->sighand->siglock); } @@ -2261,7 +2282,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t spin_unlock_irq(¤t->sighand->siglock); read_lock(&tasklist_lock); - if (likely(current->ptrace)) { + if (may_ptrace_stop()) { /* * Notify parents of the stop. * @@ -2388,8 +2409,7 @@ static bool do_signal_stop(int signr) WARN_ON_ONCE(signr & ~JOBCTL_STOP_SIGMASK); if (!likely(current->jobctl & JOBCTL_STOP_DEQUEUED) || - unlikely(sig->flags & SIGNAL_GROUP_EXIT) || - unlikely(sig->group_exec_task)) + unlikely(signal_group_exit(sig))) return false; /* * There is no group stop already in progress. We must @@ -2550,7 +2570,7 @@ static void do_freezer_trap(void) freezable_schedule(); } -static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type) +static int ptrace_signal(int signr, kernel_siginfo_t *info) { /* * We do not check sig_kernel_stop(signr) but set this marker @@ -2590,9 +2610,8 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type) } /* If the (new) signal is now blocked, requeue it. */ - if (sigismember(¤t->blocked, signr) || - fatal_signal_pending(current)) { - send_signal(signr, info, current, type); + if (sigismember(¤t->blocked, signr)) { + send_signal(signr, info, current, PIDTYPE_PID); signr = 0; } @@ -2693,11 +2712,9 @@ bool get_signal(struct ksignal *ksig) for (;;) { struct k_sigaction *ka; - enum pid_type type; /* Has this task already been marked for death? */ - if ((signal->flags & SIGNAL_GROUP_EXIT) || - signal->group_exec_task) { + if (signal_group_exit(signal)) { ksig->info.si_signo = signr = SIGKILL; sigdelset(¤t->pending.signal, SIGKILL); trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO, @@ -2737,18 +2754,16 @@ bool get_signal(struct ksignal *ksig) * so that the instruction pointer in the signal stack * frame points to the faulting instruction. */ - type = PIDTYPE_PID; signr = dequeue_synchronous_signal(&ksig->info); if (!signr) - signr = dequeue_signal(current, ¤t->blocked, - &ksig->info, &type); + signr = dequeue_signal(current, ¤t->blocked, &ksig->info); if (!signr) break; /* will return 0 */ if (unlikely(current->ptrace) && (signr != SIGKILL) && !(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) { - signr = ptrace_signal(signr, &ksig->info, type); + signr = ptrace_signal(signr, &ksig->info); if (!signr) continue; } @@ -2874,13 +2889,13 @@ bool get_signal(struct ksignal *ksig) } /** - * signal_delivered - called after signal delivery to update blocked signals + * signal_delivered - * @ksig: kernel signal struct * @stepping: nonzero if debugger single-step or block-step in use * * This function should be called when a signal has successfully been * delivered. It updates the blocked signals accordingly (@ksig->ka.sa.sa_mask - * is always blocked), and the signal itself is blocked unless %SA_NODEFER + * is always blocked, and the signal itself is blocked unless %SA_NODEFER * is set in @ksig->ka.sa.sa_flags. Tracing is notified. */ static void signal_delivered(struct ksignal *ksig, int stepping) @@ -2953,7 +2968,7 @@ void exit_signals(struct task_struct *tsk) */ cgroup_threadgroup_change_begin(tsk); - if (thread_group_empty(tsk) || (tsk->signal->flags & SIGNAL_GROUP_EXIT)) { + if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { tsk->flags |= PF_EXITING; cgroup_threadgroup_change_end(tsk); return; @@ -3573,7 +3588,6 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info, ktime_t *to = NULL, timeout = KTIME_MAX; struct task_struct *tsk = current; sigset_t mask = *which; - enum pid_type type; int sig, ret = 0; if (ts) { @@ -3590,7 +3604,7 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info, signotset(&mask); spin_lock_irq(&tsk->sighand->siglock); - sig = dequeue_signal(tsk, &mask, info, &type); + sig = dequeue_signal(tsk, &mask, info); if (!sig && timeout) { /* * None ready, temporarily unblock those we're interested @@ -3609,7 +3623,7 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info, spin_lock_irq(&tsk->sighand->siglock); __set_task_blocked(tsk, &tsk->real_blocked); sigemptyset(&tsk->real_blocked); - sig = dequeue_signal(tsk, &mask, info, &type); + sig = dequeue_signal(tsk, &mask, info); } spin_unlock_irq(&tsk->sighand->siglock); @@ -4151,29 +4165,11 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) return 0; } -#ifdef CONFIG_DYNAMIC_SIGFRAME -static inline void sigaltstack_lock(void) - __acquires(¤t->sighand->siglock) -{ - spin_lock_irq(¤t->sighand->siglock); -} - -static inline void sigaltstack_unlock(void) - __releases(¤t->sighand->siglock) -{ - spin_unlock_irq(¤t->sighand->siglock); -} -#else -static inline void sigaltstack_lock(void) { } -static inline void sigaltstack_unlock(void) { } -#endif - static int do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, size_t min_ss_size) { struct task_struct *t = current; - int ret = 0; if (oss) { memset(oss, 0, sizeof(stack_t)); @@ -4197,33 +4193,19 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, ss_mode != 0)) return -EINVAL; - /* - * Return before taking any locks if no actual - * sigaltstack changes were requested. - */ - if (t->sas_ss_sp == (unsigned long)ss_sp && - t->sas_ss_size == ss_size && - t->sas_ss_flags == ss_flags) - return 0; - - sigaltstack_lock(); if (ss_mode == SS_DISABLE) { ss_size = 0; ss_sp = NULL; } else { if (unlikely(ss_size < min_ss_size)) - ret = -ENOMEM; - if (!sigaltstack_size_valid(ss_size)) - ret = -ENOMEM; + return -ENOMEM; } - if (!ret) { - t->sas_ss_sp = (unsigned long) ss_sp; - t->sas_ss_size = ss_size; - t->sas_ss_flags = ss_flags; - } - sigaltstack_unlock(); + + t->sas_ss_sp = (unsigned long) ss_sp; + t->sas_ss_size = ss_size; + t->sas_ss_flags = ss_flags; } - return ret; + return 0; } SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) diff --git a/kernel/smp.c b/kernel/smp.c index 01a7c1706a..f43ede0ab1 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -1170,12 +1170,14 @@ void wake_up_all_idle_cpus(void) { int cpu; - for_each_possible_cpu(cpu) { - preempt_disable(); - if (cpu != smp_processor_id() && cpu_online(cpu)) - wake_up_if_idle(cpu); - preempt_enable(); + preempt_disable(); + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + + wake_up_if_idle(cpu); } + preempt_enable(); } EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus); diff --git a/kernel/softirq.c b/kernel/softirq.c index 41f470929e..322b65d456 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -595,8 +595,7 @@ void irq_enter_rcu(void) { __irq_enter_raw(); - if (tick_nohz_full_cpu(smp_processor_id()) || - (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET))) + if (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)) tick_irq_enter(); account_hardirq_enter(current); diff --git a/kernel/stackleak.c b/kernel/stackleak.c index ddb5a7f48d..dd07239ddf 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -16,13 +16,11 @@ #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE #include #include -#include static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass); -#ifdef CONFIG_SYSCTL -static int stack_erasing_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +int stack_erasing_sysctl(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; int state = !static_branch_unlikely(&stack_erasing_bypass); @@ -44,26 +42,6 @@ static int stack_erasing_sysctl(struct ctl_table *table, int write, state ? "enabled" : "disabled"); return ret; } -static struct ctl_table stackleak_sysctls[] = { - { - .procname = "stack_erasing", - .data = NULL, - .maxlen = sizeof(int), - .mode = 0600, - .proc_handler = stack_erasing_sysctl, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - {} -}; - -static int __init stackleak_sysctls_init(void) -{ - register_sysctl_init("kernel", stackleak_sysctls); - return 0; -} -late_initcall(stackleak_sysctls_init); -#endif /* CONFIG_SYSCTL */ #define skip_erasing() static_branch_unlikely(&stack_erasing_bypass) #else diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 9c62525702..9f8117c7cf 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -13,7 +13,6 @@ #include #include #include -#include /** * stack_trace_print - Print the entries in the stack trace @@ -374,32 +373,3 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size) #endif /* CONFIG_USER_STACKTRACE_SUPPORT */ #endif /* !CONFIG_ARCH_STACKWALK */ - -static inline bool in_irqentry_text(unsigned long ptr) -{ - return (ptr >= (unsigned long)&__irqentry_text_start && - ptr < (unsigned long)&__irqentry_text_end) || - (ptr >= (unsigned long)&__softirqentry_text_start && - ptr < (unsigned long)&__softirqentry_text_end); -} - -/** - * filter_irq_stacks - Find first IRQ stack entry in trace - * @entries: Pointer to stack trace array - * @nr_entries: Number of entries in the storage array - * - * Return: Number of trace entries until IRQ stack starts. - */ -unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries) -{ - unsigned int i; - - for (i = 0; i < nr_entries; i++) { - if (in_irqentry_text(entries[i])) { - /* Include the irqentry function into the stack. */ - return i + 1; - } - } - return nr_entries; -} -EXPORT_SYMBOL_GPL(filter_irq_stacks); diff --git a/kernel/sys.c b/kernel/sys.c index 97dc9e5d6b..3e4e8930fa 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -220,6 +220,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) niceval = MAX_NICE; rcu_read_lock(); + read_lock(&tasklist_lock); switch (which) { case PRIO_PROCESS: if (who) @@ -234,11 +235,9 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) pgrp = find_vpid(who); else pgrp = task_pgrp(current); - read_lock(&tasklist_lock); do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { error = set_one_prio(p, niceval, error); } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); - read_unlock(&tasklist_lock); break; case PRIO_USER: uid = make_kuid(cred->user_ns, who); @@ -250,15 +249,16 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) if (!user) goto out_unlock; /* No processes for this user */ } - for_each_process_thread(g, p) { + do_each_thread(g, p) { if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) error = set_one_prio(p, niceval, error); - } + } while_each_thread(g, p); if (!uid_eq(uid, cred->uid)) free_uid(user); /* For find_user() */ break; } out_unlock: + read_unlock(&tasklist_lock); rcu_read_unlock(); out: return error; @@ -283,6 +283,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) return -EINVAL; rcu_read_lock(); + read_lock(&tasklist_lock); switch (which) { case PRIO_PROCESS: if (who) @@ -300,13 +301,11 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) pgrp = find_vpid(who); else pgrp = task_pgrp(current); - read_lock(&tasklist_lock); do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { niceval = nice_to_rlimit(task_nice(p)); if (niceval > retval) retval = niceval; } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); - read_unlock(&tasklist_lock); break; case PRIO_USER: uid = make_kuid(cred->user_ns, who); @@ -318,18 +317,19 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) if (!user) goto out_unlock; /* No processes for this user */ } - for_each_process_thread(g, p) { + do_each_thread(g, p) { if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) { niceval = nice_to_rlimit(task_nice(p)); if (niceval > retval) retval = niceval; } - } + } while_each_thread(g, p); if (!uid_eq(uid, cred->uid)) free_uid(user); /* for find_user() */ break; } out_unlock: + read_unlock(&tasklist_lock); rcu_read_unlock(); return retval; @@ -2269,66 +2269,6 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) -#ifdef CONFIG_ANON_VMA_NAME - -#define ANON_VMA_NAME_MAX_LEN 80 -#define ANON_VMA_NAME_INVALID_CHARS "\\`$[]" - -static inline bool is_valid_name_char(char ch) -{ - /* printable ascii characters, excluding ANON_VMA_NAME_INVALID_CHARS */ - return ch > 0x1f && ch < 0x7f && - !strchr(ANON_VMA_NAME_INVALID_CHARS, ch); -} - -static int prctl_set_vma(unsigned long opt, unsigned long addr, - unsigned long size, unsigned long arg) -{ - struct mm_struct *mm = current->mm; - const char __user *uname; - char *name, *pch; - int error; - - switch (opt) { - case PR_SET_VMA_ANON_NAME: - uname = (const char __user *)arg; - if (uname) { - name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN); - - if (IS_ERR(name)) - return PTR_ERR(name); - - for (pch = name; *pch != '\0'; pch++) { - if (!is_valid_name_char(*pch)) { - kfree(name); - return -EINVAL; - } - } - } else { - /* Reset the name */ - name = NULL; - } - - mmap_write_lock(mm); - error = madvise_set_anon_name(mm, addr, size, name); - mmap_write_unlock(mm); - kfree(name); - break; - default: - error = -EINVAL; - } - - return error; -} - -#else /* CONFIG_ANON_VMA_NAME */ -static int prctl_set_vma(unsigned long opt, unsigned long start, - unsigned long size, unsigned long arg) -{ - return -EINVAL; -} -#endif /* CONFIG_ANON_VMA_NAME */ - SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2598,9 +2538,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = sched_core_share_pid(arg2, arg3, arg4, arg5); break; #endif - case PR_SET_VMA: - error = prctl_set_vma(arg2, arg3, arg4, arg5); - break; default: error = -EINVAL; break; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index a492f15962..f43d89d928 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -143,14 +143,13 @@ COND_SYSCALL(capset); /* __ARCH_WANT_SYS_CLONE3 */ COND_SYSCALL(clone3); -/* kernel/futex/syscalls.c */ +/* kernel/futex.c */ COND_SYSCALL(futex); COND_SYSCALL(futex_time32); COND_SYSCALL(set_robust_list); COND_SYSCALL_COMPAT(set_robust_list); COND_SYSCALL(get_robust_list); COND_SYSCALL_COMPAT(get_robust_list); -COND_SYSCALL(futex_waitv); /* kernel/hrtimer.c */ @@ -297,7 +296,6 @@ COND_SYSCALL(get_mempolicy); COND_SYSCALL(set_mempolicy); COND_SYSCALL(migrate_pages); COND_SYSCALL(move_pages); -COND_SYSCALL(set_mempolicy_home_node); COND_SYSCALL(perf_event_open); COND_SYSCALL(accept4); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5ae443b288..0586047f73 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -20,6 +20,7 @@ */ #include +#include #include #include #include @@ -32,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -56,15 +57,19 @@ #include #include #include +#include +#include #include #include #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -91,21 +96,64 @@ #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT) #include #endif +#ifdef CONFIG_CHR_DEV_SG +#include +#endif +#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE +#include +#endif +#ifdef CONFIG_LOCKUP_DETECTOR +#include +#endif #if defined(CONFIG_SYSCTL) /* Constants used for minimum and maximum */ +#ifdef CONFIG_LOCKUP_DETECTOR +static int sixty = 60; +#endif +static int __maybe_unused neg_one = -1; +static int __maybe_unused two = 2; +static int __maybe_unused four = 4; +static unsigned long zero_ul; +static unsigned long one_ul = 1; +static unsigned long long_max = LONG_MAX; +static int one_hundred = 100; +static int two_hundred = 200; +static int one_thousand = 1000; +#ifdef CONFIG_PRINTK +static int ten_thousand = 10000; +#endif #ifdef CONFIG_PERF_EVENTS -static const int six_hundred_forty_kb = 640 * 1024; +static int six_hundred_forty_kb = 640 * 1024; #endif /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ -static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE; +static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; -static const int ngroups_max = NGROUPS_MAX; +/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ +static int maxolduid = 65535; +static int minolduid; + +static int ngroups_max = NGROUPS_MAX; static const int cap_last_cap = CAP_LAST_CAP; +/* + * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs + * and hung_task_check_interval_secs + */ +#ifdef CONFIG_DETECT_HUNG_TASK +static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); +#endif + +#ifdef CONFIG_INOTIFY_USER +#include +#endif +#ifdef CONFIG_FANOTIFY +#include +#endif + #ifdef CONFIG_PROC_SYSCTL /** @@ -142,8 +190,8 @@ int sysctl_legacy_va_layout; #endif #ifdef CONFIG_COMPACTION -/* min_extfrag_threshold is SYSCTL_ZERO */; -static const int max_extfrag_threshold = 1000; +static int min_extfrag_threshold; +static int max_extfrag_threshold = 1000; #endif #endif /* CONFIG_SYSCTL */ @@ -180,6 +228,10 @@ static int bpf_stats_handler(struct ctl_table *table, int write, return ret; } +void __weak unpriv_ebpf_notify(int new_state) +{ +} + static int bpf_unpriv_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -197,6 +249,9 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write, return -EPERM; *(int *)table->data = unpriv_enable; } + + unpriv_ebpf_notify(unpriv_enable); + return ret; } #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ @@ -754,12 +809,12 @@ static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table, return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data); } -int do_proc_douintvec(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos, - int (*conv)(unsigned long *lvalp, - unsigned int *valp, - int write, void *data), - void *data) +static int do_proc_douintvec(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos, + int (*conv)(unsigned long *lvalp, + unsigned int *valp, + int write, void *data), + void *data) { return __do_proc_douintvec(table->data, table, write, buffer, lenp, ppos, conv, data); @@ -888,6 +943,17 @@ static int proc_taint(struct ctl_table *table, int write, return err; } +#ifdef CONFIG_PRINTK +static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); +} +#endif + /** * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure * @min: pointer to minimum allowable value @@ -1083,6 +1149,67 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write, } EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); +static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, + unsigned int *valp, + int write, void *data) +{ + if (write) { + unsigned int val; + + val = round_pipe_size(*lvalp); + if (val == 0) + return -EINVAL; + + *valp = val; + } else { + unsigned int val = *valp; + *lvalp = (unsigned long) val; + } + + return 0; +} + +static int proc_dopipe_max_size(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + return do_proc_douintvec(table, write, buffer, lenp, ppos, + do_proc_dopipe_max_size_conv, NULL); +} + +static void validate_coredump_safety(void) +{ +#ifdef CONFIG_COREDUMP + if (suid_dumpable == SUID_DUMP_ROOT && + core_pattern[0] != '/' && core_pattern[0] != '|') { + printk(KERN_WARNING +"Unsafe core_pattern used with fs.suid_dumpable=2.\n" +"Pipe handler or fully qualified core dump path required.\n" +"Set kernel.core_pattern before fs.suid_dumpable.\n" + ); + } +#endif +} + +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} + +#ifdef CONFIG_COREDUMP +static int proc_dostring_coredump(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dostring(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} +#endif + #ifdef CONFIG_MAGIC_SYSRQ static int sysrq_sysctl_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -1145,11 +1272,10 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, err = proc_get_long(&p, &left, &val, &neg, proc_wspace_sep, sizeof(proc_wspace_sep), NULL); - if (err || neg) { - err = -EINVAL; + if (err) break; - } - + if (neg) + continue; val = convmul * val / convdiv; if ((min && val < *min) || (max && val > *max)) { err = -EINVAL; @@ -1807,6 +1933,29 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_COREDUMP + { + .procname = "core_uses_pid", + .data = &core_uses_pid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "core_pattern", + .data = core_pattern, + .maxlen = CORENAME_MAX_SIZE, + .mode = 0644, + .proc_handler = proc_dostring_coredump, + }, + { + .procname = "core_pipe_limit", + .data = &core_pipe_limit, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", @@ -1820,7 +1969,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_NEG_ONE, + .extra1 = &neg_one, .extra2 = SYSCTL_ONE, }, #endif @@ -1987,6 +2136,15 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dostring, }, #endif +#ifdef CONFIG_CHR_DEV_SG + { + .procname = "sg-big-buff", + .data = &sg_big_buff, + .maxlen = sizeof (int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_BSD_PROCESS_ACCT { .procname = "acct", @@ -2021,19 +2179,31 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sysctl_max_threads, }, + { + .procname = "random", + .mode = 0555, + .child = random_table, + }, { .procname = "usermodehelper", .mode = 0555, .child = usermodehelper_table, }, +#ifdef CONFIG_FW_LOADER_USER_HELPER + { + .procname = "firmware_config", + .mode = 0555, + .child = firmware_config_table, + }, +#endif { .procname = "overflowuid", .data = &overflowuid, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_MAXOLDUID, + .extra1 = &minolduid, + .extra2 = &maxolduid, }, { .procname = "overflowgid", @@ -2041,8 +2211,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_MAXOLDUID, + .extra1 = &minolduid, + .extra2 = &maxolduid, }, #ifdef CONFIG_S390 { @@ -2087,9 +2257,66 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, +#if defined CONFIG_PRINTK + { + .procname = "printk", + .data = &console_loglevel, + .maxlen = 4*sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "printk_ratelimit", + .data = &printk_ratelimit_state.interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "printk_ratelimit_burst", + .data = &printk_ratelimit_state.burst, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "printk_delay", + .data = &printk_delay_msec, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &ten_thousand, + }, + { + .procname = "printk_devkmsg", + .data = devkmsg_log_str, + .maxlen = DEVKMSG_STR_MAX_SIZE, + .mode = 0644, + .proc_handler = devkmsg_sysctl_set_loglvl, + }, + { + .procname = "dmesg_restrict", + .data = &dmesg_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "kptr_restrict", + .data = &kptr_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = &two, + }, +#endif { .procname = "ngroups_max", - .data = (void *)&ngroups_max, + .data = &ngroups_max, .maxlen = sizeof (int), .mode = 0444, .proc_handler = proc_dointvec, @@ -2101,6 +2328,96 @@ static struct ctl_table kern_table[] = { .mode = 0444, .proc_handler = proc_dointvec, }, +#if defined(CONFIG_LOCKUP_DETECTOR) + { + .procname = "watchdog", + .data = &watchdog_user_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_watchdog, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "watchdog_thresh", + .data = &watchdog_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_watchdog_thresh, + .extra1 = SYSCTL_ZERO, + .extra2 = &sixty, + }, + { + .procname = "nmi_watchdog", + .data = &nmi_watchdog_user_enabled, + .maxlen = sizeof(int), + .mode = NMI_WATCHDOG_SYSCTL_PERM, + .proc_handler = proc_nmi_watchdog, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "watchdog_cpumask", + .data = &watchdog_cpumask_bits, + .maxlen = NR_CPUS, + .mode = 0644, + .proc_handler = proc_watchdog_cpumask, + }, +#ifdef CONFIG_SOFTLOCKUP_DETECTOR + { + .procname = "soft_watchdog", + .data = &soft_watchdog_user_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_soft_watchdog, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "softlockup_panic", + .data = &softlockup_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#ifdef CONFIG_SMP + { + .procname = "softlockup_all_cpu_backtrace", + .data = &sysctl_softlockup_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_SMP */ +#endif +#ifdef CONFIG_HARDLOCKUP_DETECTOR + { + .procname = "hardlockup_panic", + .data = &hardlockup_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#ifdef CONFIG_SMP + { + .procname = "hardlockup_all_cpu_backtrace", + .data = &sysctl_hardlockup_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_SMP */ +#endif +#endif + #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) { .procname = "unknown_nmi_panic", @@ -2203,6 +2520,60 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_DETECT_HUNG_TASK +#ifdef CONFIG_SMP + { + .procname = "hung_task_all_cpu_backtrace", + .data = &sysctl_hung_task_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_SMP */ + { + .procname = "hung_task_panic", + .data = &sysctl_hung_task_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "hung_task_check_count", + .data = &sysctl_hung_task_check_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, + { + .procname = "hung_task_timeout_secs", + .data = &sysctl_hung_task_timeout_secs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dohung_task_timeout_secs, + .extra2 = &hung_task_timeout_max, + }, + { + .procname = "hung_task_check_interval_secs", + .data = &sysctl_hung_task_check_interval_secs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dohung_task_timeout_secs, + .extra2 = &hung_task_timeout_max, + }, + { + .procname = "hung_task_warnings", + .data = &sysctl_hung_task_warnings, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &neg_one, + }, +#endif #ifdef CONFIG_RT_MUTEXES { .procname = "max_lock_depth", @@ -2262,7 +2633,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = perf_cpu_time_max_percent_handler, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, + .extra2 = &one_hundred, }, { .procname = "perf_event_max_stack", @@ -2271,7 +2642,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = perf_event_max_stack_handler, .extra1 = SYSCTL_ZERO, - .extra2 = (void *)&six_hundred_forty_kb, + .extra2 = &six_hundred_forty_kb, }, { .procname = "perf_event_max_contexts_per_stack", @@ -2280,7 +2651,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = perf_event_max_stack_handler, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_THOUSAND, + .extra2 = &one_thousand, }, #endif { @@ -2311,7 +2682,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = bpf_unpriv_handler, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_TWO, + .extra2 = &two, }, { .procname = "bpf_stats_enabled", @@ -2342,6 +2713,17 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, +#endif +#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE + { + .procname = "stack_erasing", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = stack_erasing_sysctl, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #endif { } }; @@ -2354,7 +2736,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = overcommit_policy_handler, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_TWO, + .extra2 = &two, }, { .procname = "panic_on_oom", @@ -2363,7 +2745,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_TWO, + .extra2 = &two, }, { .procname = "oom_kill_allocating_task", @@ -2408,7 +2790,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = dirty_background_ratio_handler, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, + .extra2 = &one_hundred, }, { .procname = "dirty_background_bytes", @@ -2416,7 +2798,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(dirty_background_bytes), .mode = 0644, .proc_handler = dirty_background_bytes_handler, - .extra1 = SYSCTL_LONG_ONE, + .extra1 = &one_ul, }, { .procname = "dirty_ratio", @@ -2425,7 +2807,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = dirty_ratio_handler, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, + .extra2 = &one_hundred, }, { .procname = "dirty_bytes", @@ -2433,7 +2815,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(vm_dirty_bytes), .mode = 0644, .proc_handler = dirty_bytes_handler, - .extra1 = (void *)&dirty_bytes_min, + .extra1 = &dirty_bytes_min, }, { .procname = "dirty_writeback_centisecs", @@ -2465,7 +2847,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_TWO_HUNDRED, + .extra2 = &two_hundred, }, #ifdef CONFIG_HUGETLB_PAGE { @@ -2522,7 +2904,7 @@ static struct ctl_table vm_table[] = { .mode = 0200, .proc_handler = drop_caches_sysctl_handler, .extra1 = SYSCTL_ONE, - .extra2 = SYSCTL_FOUR, + .extra2 = &four, }, #ifdef CONFIG_COMPACTION { @@ -2539,7 +2921,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = compaction_proactiveness_sysctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, + .extra2 = &one_hundred, }, { .procname = "extfrag_threshold", @@ -2547,8 +2929,8 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = (void *)&max_extfrag_threshold, + .extra1 = &min_extfrag_threshold, + .extra2 = &max_extfrag_threshold, }, { .procname = "compact_unevictable_allowed", @@ -2584,7 +2966,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = watermark_scale_factor_sysctl_handler, .extra1 = SYSCTL_ONE, - .extra2 = SYSCTL_THREE_THOUSAND, + .extra2 = &one_thousand, }, { .procname = "percpu_pagelist_high_fraction", @@ -2663,7 +3045,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, + .extra2 = &one_hundred, }, { .procname = "min_slab_ratio", @@ -2672,7 +3054,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = sysctl_min_slab_ratio_sysctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, + .extra2 = &one_hundred, }, #endif #ifdef CONFIG_SMP @@ -2806,6 +3188,221 @@ static struct ctl_table vm_table[] = { { } }; +static struct ctl_table fs_table[] = { + { + .procname = "inode-nr", + .data = &inodes_stat, + .maxlen = 2*sizeof(long), + .mode = 0444, + .proc_handler = proc_nr_inodes, + }, + { + .procname = "inode-state", + .data = &inodes_stat, + .maxlen = 7*sizeof(long), + .mode = 0444, + .proc_handler = proc_nr_inodes, + }, + { + .procname = "file-nr", + .data = &files_stat, + .maxlen = sizeof(files_stat), + .mode = 0444, + .proc_handler = proc_nr_files, + }, + { + .procname = "file-max", + .data = &files_stat.max_files, + .maxlen = sizeof(files_stat.max_files), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &zero_ul, + .extra2 = &long_max, + }, + { + .procname = "nr_open", + .data = &sysctl_nr_open, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &sysctl_nr_open_min, + .extra2 = &sysctl_nr_open_max, + }, + { + .procname = "dentry-state", + .data = &dentry_stat, + .maxlen = 6*sizeof(long), + .mode = 0444, + .proc_handler = proc_nr_dentry, + }, + { + .procname = "overflowuid", + .data = &fs_overflowuid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &minolduid, + .extra2 = &maxolduid, + }, + { + .procname = "overflowgid", + .data = &fs_overflowgid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &minolduid, + .extra2 = &maxolduid, + }, +#ifdef CONFIG_FILE_LOCKING + { + .procname = "leases-enable", + .data = &leases_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif +#ifdef CONFIG_DNOTIFY + { + .procname = "dir-notify-enable", + .data = &dir_notify_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif +#ifdef CONFIG_MMU +#ifdef CONFIG_FILE_LOCKING + { + .procname = "lease-break-time", + .data = &lease_break_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif +#ifdef CONFIG_AIO + { + .procname = "aio-nr", + .data = &aio_nr, + .maxlen = sizeof(aio_nr), + .mode = 0444, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "aio-max-nr", + .data = &aio_max_nr, + .maxlen = sizeof(aio_max_nr), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, +#endif /* CONFIG_AIO */ +#ifdef CONFIG_INOTIFY_USER + { + .procname = "inotify", + .mode = 0555, + .child = inotify_table, + }, +#endif +#ifdef CONFIG_FANOTIFY + { + .procname = "fanotify", + .mode = 0555, + .child = fanotify_table, + }, +#endif +#ifdef CONFIG_EPOLL + { + .procname = "epoll", + .mode = 0555, + .child = epoll_table, + }, +#endif +#endif + { + .procname = "protected_symlinks", + .data = &sysctl_protected_symlinks, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "protected_hardlinks", + .data = &sysctl_protected_hardlinks, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "protected_fifos", + .data = &sysctl_protected_fifos, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &two, + }, + { + .procname = "protected_regular", + .data = &sysctl_protected_regular, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &two, + }, + { + .procname = "suid_dumpable", + .data = &suid_dumpable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_coredump, + .extra1 = SYSCTL_ZERO, + .extra2 = &two, + }, +#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) + { + .procname = "binfmt_misc", + .mode = 0555, + .child = sysctl_mount_point, + }, +#endif + { + .procname = "pipe-max-size", + .data = &pipe_max_size, + .maxlen = sizeof(pipe_max_size), + .mode = 0644, + .proc_handler = proc_dopipe_max_size, + }, + { + .procname = "pipe-user-pages-hard", + .data = &pipe_user_pages_hard, + .maxlen = sizeof(pipe_user_pages_hard), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "pipe-user-pages-soft", + .data = &pipe_user_pages_soft, + .maxlen = sizeof(pipe_user_pages_soft), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "mount-max", + .data = &sysctl_mount_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, + { } +}; + static struct ctl_table debug_table[] = { #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE { @@ -2815,6 +3412,17 @@ static struct ctl_table debug_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, +#endif +#if defined(CONFIG_OPTPROBES) + { + .procname = "kprobes-optimization", + .data = &sysctl_kprobes_optimization, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_kprobes_optimization_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #endif { } }; @@ -2823,18 +3431,41 @@ static struct ctl_table dev_table[] = { { } }; -DECLARE_SYSCTL_BASE(kernel, kern_table); -DECLARE_SYSCTL_BASE(vm, vm_table); -DECLARE_SYSCTL_BASE(debug, debug_table); -DECLARE_SYSCTL_BASE(dev, dev_table); +static struct ctl_table sysctl_base_table[] = { + { + .procname = "kernel", + .mode = 0555, + .child = kern_table, + }, + { + .procname = "vm", + .mode = 0555, + .child = vm_table, + }, + { + .procname = "fs", + .mode = 0555, + .child = fs_table, + }, + { + .procname = "debug", + .mode = 0555, + .child = debug_table, + }, + { + .procname = "dev", + .mode = 0555, + .child = dev_table, + }, + { } +}; -int __init sysctl_init_bases(void) +int __init sysctl_init(void) { - register_sysctl_base(kernel); - register_sysctl_base(vm); - register_sysctl_base(debug); - register_sysctl_base(dev); + struct ctl_table_header *hdr; + hdr = register_sysctl_table(sysctl_base_table); + kmemleak_not_leak(hdr); return 0; } #endif /* CONFIG_SYSCTL */ diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c index 0dbab6d1ac..76c997fdbc 100644 --- a/kernel/test_kprobes.c +++ b/kernel/test_kprobes.c @@ -1,17 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * test_kprobes.c - simple sanity test for *probes * * Copyright IBM Corp. 2008 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. */ #define pr_fmt(fmt) "Kprobe smoke test: " fmt @@ -22,7 +13,7 @@ #define div_factor 3 -static u32 rand1, preh_val, posth_val, jph_val; +static u32 rand1, preh_val, posth_val; static int errors, handler_errors, num_tests; static u32 (*target)(u32 value); static u32 (*target2)(u32 value); @@ -34,6 +25,10 @@ static noinline u32 kprobe_target(u32 value) static int kp_pre_handler(struct kprobe *p, struct pt_regs *regs) { + if (preemptible()) { + handler_errors++; + pr_err("pre-handler is preemptible\n"); + } preh_val = (rand1 / div_factor); return 0; } @@ -41,6 +36,10 @@ static int kp_pre_handler(struct kprobe *p, struct pt_regs *regs) static void kp_post_handler(struct kprobe *p, struct pt_regs *regs, unsigned long flags) { + if (preemptible()) { + handler_errors++; + pr_err("post-handler is preemptible\n"); + } if (preh_val != (rand1 / div_factor)) { handler_errors++; pr_err("incorrect value in post_handler\n"); @@ -154,84 +153,15 @@ static int test_kprobes(void) } -static u32 j_kprobe_target(u32 value) -{ - if (value != rand1) { - handler_errors++; - pr_err("incorrect value in jprobe handler\n"); - } - - jph_val = rand1; - jprobe_return(); - return 0; -} - -static struct jprobe jp = { - .entry = j_kprobe_target, - .kp.symbol_name = "kprobe_target" -}; - -static int test_jprobe(void) -{ - int ret; - - ret = register_jprobe(&jp); - if (ret < 0) { - pr_err("register_jprobe returned %d\n", ret); - return ret; - } - - ret = target(rand1); - unregister_jprobe(&jp); - if (jph_val == 0) { - pr_err("jprobe handler not called\n"); - handler_errors++; - } - - return 0; -} - -static struct jprobe jp2 = { - .entry = j_kprobe_target, - .kp.symbol_name = "kprobe_target2" -}; - -static int test_jprobes(void) -{ - int ret; - struct jprobe *jps[2] = {&jp, &jp2}; - - /* addr and flags should be cleard for reusing kprobe. */ - jp.kp.addr = NULL; - jp.kp.flags = 0; - ret = register_jprobes(jps, 2); - if (ret < 0) { - pr_err("register_jprobes returned %d\n", ret); - return ret; - } - - jph_val = 0; - ret = target(rand1); - if (jph_val == 0) { - pr_err("jprobe handler not called\n"); - handler_errors++; - } - - jph_val = 0; - ret = target2(rand1); - if (jph_val == 0) { - pr_err("jprobe handler2 not called\n"); - handler_errors++; - } - unregister_jprobes(jps, 2); - - return 0; -} #ifdef CONFIG_KRETPROBES static u32 krph_val; static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) { + if (preemptible()) { + handler_errors++; + pr_err("kretprobe entry handler is preemptible\n"); + } krph_val = (rand1 / div_factor); return 0; } @@ -240,6 +170,10 @@ static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) { unsigned long ret = regs_return_value(regs); + if (preemptible()) { + handler_errors++; + pr_err("kretprobe return handler is preemptible\n"); + } if (ret != (rand1 / div_factor)) { handler_errors++; pr_err("incorrect value in kretprobe handler\n"); @@ -356,16 +290,6 @@ int init_test_probes(void) if (ret < 0) errors++; - num_tests++; - ret = test_jprobe(); - if (ret < 0) - errors++; - - num_tests++; - ret = test_jprobes(); - if (ret < 0) - errors++; - #ifdef CONFIG_KRETPROBES num_tests++; ret = test_kretprobe(); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1cf73807b4..bcad1a1e5d 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -199,7 +199,7 @@ void clocksource_mark_unstable(struct clocksource *cs) spin_unlock_irqrestore(&watchdog_lock, flags); } -ulong max_cswd_read_retries = 2; +ulong max_cswd_read_retries = 3; module_param(max_cswd_read_retries, ulong, 0644); EXPORT_SYMBOL_GPL(max_cswd_read_retries); static int verify_n_cpus = 8; @@ -285,7 +285,7 @@ static void clocksource_verify_choose_cpus(void) return; /* Make sure to select at least one CPU other than the current CPU. */ - cpu = cpumask_first(cpu_online_mask); + cpu = cpumask_next(-1, cpu_online_mask); if (cpu == smp_processor_id()) cpu = cpumask_next(cpu, cpu_online_mask); if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) @@ -307,7 +307,7 @@ static void clocksource_verify_choose_cpus(void) cpu = prandom_u32() % nr_cpu_ids; cpu = cpumask_next(cpu - 1, cpu_online_mask); if (cpu >= nr_cpu_ids) - cpu = cpumask_first(cpu_online_mask); + cpu = cpumask_next(-1, cpu_online_mask); if (!WARN_ON_ONCE(cpu >= nr_cpu_ids)) cpumask_set_cpu(cpu, &cpus_chosen); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 17a283ce2b..6bffe5af8c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1375,13 +1375,6 @@ static inline void tick_nohz_irq_enter(void) now = ktime_get(); if (ts->idle_active) tick_nohz_stop_idle(ts, now); - /* - * If all CPUs are idle. We may need to update a stale jiffies value. - * Note nohz_full is a special case: a timekeeper is guaranteed to stay - * alive but it might be busy looping with interrupts disabled in some - * rare case (typically stop machine). So we must make sure we have a - * last resort. - */ if (ts->tick_stopped) tick_nohz_update_jiffies(now); } diff --git a/kernel/torture.c b/kernel/torture.c index ef27a6c824..bb8f411c97 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -570,7 +570,7 @@ int torture_shuffle_init(long shuffint) shuffle_idle_cpu = -1; if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) { - TOROUT_ERRSTRING("Failed to alloc mask"); + VERBOSE_TOROUT_ERRSTRING("Failed to alloc mask"); return -ENOMEM; } @@ -934,7 +934,7 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, *tp = kthread_run(fn, arg, "%s", s); if (IS_ERR(*tp)) { ret = PTR_ERR(*tp); - TOROUT_ERRSTRING(f); + VERBOSE_TOROUT_ERRSTRING(f); *tp = NULL; } torture_shuffle_task_register(*tp); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a5eb5e7fd6..420ff4bc67 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -70,19 +70,6 @@ config HAVE_C_RECORDMCOUNT help C version of recordmcount available? -config HAVE_BUILDTIME_MCOUNT_SORT - bool - help - An architecture selects this if it sorts the mcount_loc section - at build time. - -config BUILDTIME_MCOUNT_SORT - bool - default y - depends on HAVE_BUILDTIME_MCOUNT_SORT && DYNAMIC_FTRACE - help - Sort the mcount_loc section at build time. - config TRACER_MAX_TRACE bool @@ -928,20 +915,6 @@ config EVENT_TRACE_TEST_SYSCALLS TBD - enable a way to actually call the syscalls as we test their events -config FTRACE_SORT_STARTUP_TEST - bool "Verify compile time sorting of ftrace functions" - depends on DYNAMIC_FTRACE - depends on BUILDTIME_MCOUNT_SORT - help - Sorting of the mcount_loc sections that is used to find the - where the ftrace knows where to patch functions for tracing - and other callbacks is done at compile time. But if the sort - is not done correctly, it will cause non-deterministic failures. - When this is set, the sorted sections will be verified that they - are in deed sorted and will warn if they are not. - - If unsure, say N - config RING_BUFFER_STARTUP_TEST bool "Ring buffer startup self test" depends on RING_BUFFER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index bedc5cacee..6de5d4d631 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -47,7 +47,6 @@ obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_seq.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o -obj-$(CONFIG_TRACING) += pid_list.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o obj-$(CONFIG_SYNTH_EVENT_GEN_TEST) += synth_event_gen_test.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index af68a67179..c42ff77eb6 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -34,7 +34,7 @@ static struct trace_array *blk_tr; static bool blk_tracer_enabled __read_mostly; static LIST_HEAD(running_trace_list); -static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(running_trace_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock); /* Select an alternative, minimalistic output than the original one */ #define TRACE_BLK_OPT_CLASSIC 0x1 @@ -121,12 +121,12 @@ static void trace_note_tsk(struct task_struct *tsk) struct blk_trace *bt; tsk->btrace_seq = blktrace_seq; - raw_spin_lock_irqsave(&running_trace_lock, flags); + spin_lock_irqsave(&running_trace_lock, flags); list_for_each_entry(bt, &running_trace_list, running_list) { trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm), 0); } - raw_spin_unlock_irqrestore(&running_trace_lock, flags); + spin_unlock_irqrestore(&running_trace_lock, flags); } static void trace_note_time(struct blk_trace *bt) @@ -310,10 +310,20 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, local_irq_restore(flags); } -static void blk_trace_free(struct blk_trace *bt) +static void blk_trace_free(struct request_queue *q, struct blk_trace *bt) { relay_close(bt->rchan); - debugfs_remove(bt->dir); + + /* + * If 'bt->dir' is not set, then both 'dropped' and 'msg' are created + * under 'q->debugfs_dir', thus lookup and remove them. + */ + if (!bt->dir) { + debugfs_remove(debugfs_lookup("dropped", q->debugfs_dir)); + debugfs_remove(debugfs_lookup("msg", q->debugfs_dir)); + } else { + debugfs_remove(bt->dir); + } free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); @@ -335,10 +345,10 @@ static void put_probe_ref(void) mutex_unlock(&blk_probe_mutex); } -static void blk_trace_cleanup(struct blk_trace *bt) +static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt) { synchronize_rcu(); - blk_trace_free(bt); + blk_trace_free(q, bt); put_probe_ref(); } @@ -352,7 +362,7 @@ static int __blk_trace_remove(struct request_queue *q) return -EINVAL; if (bt->trace_state != Blktrace_running) - blk_trace_cleanup(bt); + blk_trace_cleanup(q, bt); return 0; } @@ -572,7 +582,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, ret = 0; err: if (ret) - blk_trace_free(bt); + blk_trace_free(q, bt); return ret; } @@ -666,9 +676,9 @@ static int __blk_trace_startstop(struct request_queue *q, int start) blktrace_seq++; smp_mb(); bt->trace_state = Blktrace_running; - raw_spin_lock_irq(&running_trace_lock); + spin_lock_irq(&running_trace_lock); list_add(&bt->running_list, &running_trace_list); - raw_spin_unlock_irq(&running_trace_lock); + spin_unlock_irq(&running_trace_lock); trace_note_time(bt); ret = 0; @@ -676,9 +686,9 @@ static int __blk_trace_startstop(struct request_queue *q, int start) } else { if (bt->trace_state == Blktrace_running) { bt->trace_state = Blktrace_stopped; - raw_spin_lock_irq(&running_trace_lock); + spin_lock_irq(&running_trace_lock); list_del_init(&bt->running_list); - raw_spin_unlock_irq(&running_trace_lock); + spin_unlock_irq(&running_trace_lock); relay_flush(bt->rchan); ret = 0; } @@ -816,7 +826,7 @@ blk_trace_request_get_cgid(struct request *rq) * Records an action against a request. Will log the bio offset + size. * **/ -static void blk_add_trace_rq(struct request *rq, blk_status_t error, +static void blk_add_trace_rq(struct request *rq, int error, unsigned int nr_bytes, u32 what, u64 cgid) { struct blk_trace *bt; @@ -834,8 +844,7 @@ static void blk_add_trace_rq(struct request *rq, blk_status_t error, what |= BLK_TC_ACT(BLK_TC_FS); __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), - rq->cmd_flags, what, blk_status_to_errno(error), 0, - NULL, cgid); + rq->cmd_flags, what, error, 0, NULL, cgid); rcu_read_unlock(); } @@ -864,7 +873,7 @@ static void blk_add_trace_rq_requeue(void *ignore, struct request *rq) } static void blk_add_trace_rq_complete(void *ignore, struct request *rq, - blk_status_t error, unsigned int nr_bytes) + int error, unsigned int nr_bytes) { blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE, blk_trace_request_get_cgid(rq)); @@ -1045,7 +1054,7 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, } r.device_from = cpu_to_be32(dev); - r.device_to = cpu_to_be32(disk_devt(rq->q->disk)); + r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), @@ -1608,15 +1617,15 @@ static int blk_trace_remove_queue(struct request_queue *q) if (bt->trace_state == Blktrace_running) { bt->trace_state = Blktrace_stopped; - raw_spin_lock_irq(&running_trace_lock); + spin_lock_irq(&running_trace_lock); list_del_init(&bt->running_list); - raw_spin_unlock_irq(&running_trace_lock); + spin_unlock_irq(&running_trace_lock); relay_flush(bt->rchan); } put_probe_ref(); synchronize_rcu(); - blk_trace_free(bt); + blk_trace_free(q, bt); return 0; } @@ -1647,7 +1656,7 @@ static int blk_trace_setup_queue(struct request_queue *q, return 0; free_bt: - blk_trace_free(bt); + blk_trace_free(q, bt); return ret; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 21aa306442..5a18b861fc 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -345,7 +345,7 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = { .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, }; @@ -394,11 +394,11 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { .func = bpf_trace_printk, .gpl_only = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg1_type = ARG_PTR_TO_MEM, .arg2_type = ARG_CONST_SIZE, }; -static void __set_printk_clr_event(void) +const struct bpf_func_proto *bpf_get_trace_printk_proto(void) { /* * This program might be calling bpf_trace_printk, @@ -410,57 +410,11 @@ static void __set_printk_clr_event(void) */ if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) pr_warn_ratelimited("could not enable bpf_trace_printk events"); -} -const struct bpf_func_proto *bpf_get_trace_printk_proto(void) -{ - __set_printk_clr_event(); return &bpf_trace_printk_proto; } -BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, - u32, data_len) -{ - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; - int ret, num_args; - u32 *bin_args; - - if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) - return -EINVAL; - num_args = data_len / 8; - - ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); - if (ret < 0) - return ret; - - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); - - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); - - bpf_bprintf_cleanup(); - - return ret; -} - -static const struct bpf_func_proto bpf_trace_vprintk_proto = { - .func = bpf_trace_vprintk, - .gpl_only = true, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg2_type = ARG_CONST_SIZE, - .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, - .arg4_type = ARG_CONST_SIZE_OR_ZERO, -}; - -const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) -{ - __set_printk_clr_event(); - return &bpf_trace_vprintk_proto; -} +#define MAX_SEQ_PRINTF_VARARGS 12 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, const void *, data, u32, data_len) @@ -468,7 +422,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, int err, num_args; u32 *bin_args; - if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || + if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 || (data_len && !data)) return -EINVAL; num_args = data_len / 8; @@ -492,9 +446,9 @@ static const struct bpf_func_proto bpf_seq_printf_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &btf_seq_file_ids[0], - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, - .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM_OR_NULL, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; @@ -509,7 +463,7 @@ static const struct bpf_func_proto bpf_seq_write_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &btf_seq_file_ids[0], - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, }; @@ -533,7 +487,7 @@ static const struct bpf_func_proto bpf_seq_printf_btf_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &btf_seq_file_ids[0], - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; @@ -694,7 +648,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; @@ -764,7 +718,7 @@ const struct bpf_func_proto bpf_get_current_task_btf_proto = { .func = bpf_get_current_task_btf, .gpl_only = true, .ret_type = RET_PTR_TO_BTF_ID, - .ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .ret_btf_id = &btf_task_struct_ids[0], }; BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task) @@ -779,7 +733,7 @@ const struct bpf_func_proto bpf_task_pt_regs_proto = { .func = bpf_task_pt_regs, .gpl_only = true, .arg1_type = ARG_PTR_TO_BTF_ID, - .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .arg1_btf_id = &btf_task_struct_ids[0], .ret_type = RET_PTR_TO_BTF_ID, .ret_btf_id = &bpf_task_pt_regs_ids[0], }; @@ -1004,7 +958,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM, .arg2_type = ARG_CONST_SIZE, - .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_PTR_TO_MEM, .arg4_type = ARG_CONST_SIZE, .arg5_type = ARG_ANYTHING, }; @@ -1012,7 +966,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx) { /* This helper call is inlined by verifier. */ - return ((u64 *)ctx)[-2]; + return ((u64 *)ctx)[-1]; } static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { @@ -1063,81 +1017,6 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = { .arg1_type = ARG_PTR_TO_CTX, }; -BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) -{ -#ifndef CONFIG_X86 - return -ENOENT; -#else - static const u32 br_entry_size = sizeof(struct perf_branch_entry); - u32 entry_cnt = size / br_entry_size; - - entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt); - - if (unlikely(flags)) - return -EINVAL; - - if (!entry_cnt) - return -ENOENT; - - return entry_cnt * br_entry_size; -#endif -} - -static const struct bpf_func_proto bpf_get_branch_snapshot_proto = { - .func = bpf_get_branch_snapshot, - .gpl_only = true, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_UNINIT_MEM, - .arg2_type = ARG_CONST_SIZE_OR_ZERO, -}; - -BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value) -{ - /* This helper call is inlined by verifier. */ - u64 nr_args = ((u64 *)ctx)[-1]; - - if ((u64) n >= nr_args) - return -EINVAL; - *value = ((u64 *)ctx)[n]; - return 0; -} - -static const struct bpf_func_proto bpf_get_func_arg_proto = { - .func = get_func_arg, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_LONG, -}; - -BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) -{ - /* This helper call is inlined by verifier. */ - u64 nr_args = ((u64 *)ctx)[-1]; - - *value = ((u64 *)ctx)[nr_args]; - return 0; -} - -static const struct bpf_func_proto bpf_get_func_ret_proto = { - .func = get_func_ret, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_LONG, -}; - -BPF_CALL_1(get_func_arg_cnt, void *, ctx) -{ - /* This helper call is inlined by verifier. */ - return ((u64 *)ctx)[-1]; -} - -static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { - .func = get_func_arg_cnt, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_CTX, -}; - static const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1251,12 +1130,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_snprintf_proto; case BPF_FUNC_get_func_ip: return &bpf_get_func_ip_proto_tracing; - case BPF_FUNC_get_branch_snapshot: - return &bpf_get_branch_snapshot_proto; - case BPF_FUNC_find_vma: - return &bpf_find_vma_proto; - case BPF_FUNC_trace_vprintk: - return bpf_get_trace_vprintk_proto(); default: return bpf_base_func_proto(func_id); } @@ -1334,7 +1207,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; @@ -1556,7 +1429,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; @@ -1610,7 +1483,7 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; @@ -1651,8 +1524,6 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_to_tcp_request_sock_proto; case BPF_FUNC_skc_to_udp6_sock: return &bpf_skc_to_udp6_sock_proto; - case BPF_FUNC_skc_to_unix_sock: - return &bpf_skc_to_unix_sock_proto; case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_tracing_proto; case BPF_FUNC_sk_storage_delete: @@ -1676,12 +1547,6 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) NULL; case BPF_FUNC_d_path: return &bpf_d_path_proto; - case BPF_FUNC_get_func_arg: - return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_proto : NULL; - case BPF_FUNC_get_func_ret: - return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; - case BPF_FUNC_get_func_arg_cnt: - return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL; default: fn = raw_tp_prog_func_proto(func_id, prog); if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) @@ -1695,7 +1560,13 @@ static bool raw_tp_prog_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - return bpf_tracing_ctx_access(off, size, type); + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + return true; } static bool tracing_prog_is_valid_access(int off, int size, @@ -1703,7 +1574,13 @@ static bool tracing_prog_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - return bpf_tracing_btf_ctx_access(off, size, type, prog, info); + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + return btf_ctx_access(off, size, type, prog, info); } int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 22061d38fc..b8a0d1d564 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -115,7 +115,6 @@ int function_graph_enter(unsigned long ret, unsigned long func, { struct ftrace_graph_ent trace; -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS /* * Skip graph tracing if the return location is served by direct trampoline, * since call sequence and return addresses are unpredictable anyway. @@ -125,7 +124,6 @@ int function_graph_enter(unsigned long ret, unsigned long func, if (ftrace_direct_func_count && ftrace_find_rec_direct(ret - MCOUNT_INSN_SIZE)) return -EBUSY; -#endif trace.func = func; trace.depth = ++current->curr_ret_depth; @@ -335,10 +333,10 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, #endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ static struct ftrace_ops graph_ops = { - .func = ftrace_graph_func, + .func = ftrace_stub, .flags = FTRACE_OPS_FL_INITIALIZED | FTRACE_OPS_FL_PID | - FTRACE_OPS_GRAPH_STUB, + FTRACE_OPS_FL_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, /* trampoline_size is only needed for dynamically allocated tramps */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a4b462b6f9..c672040142 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -119,9 +119,14 @@ struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; struct ftrace_ops global_ops; -/* Defined by vmlinux.lds.h see the commment above arch_ftrace_ops_list_func for details */ -void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct ftrace_regs *fregs); +#if ARCH_SUPPORTS_FTRACE_OPS +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#else +/* See comment below, where ftrace_ops_list_func is defined */ +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); +#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) +#endif static inline void ftrace_ops_init(struct ftrace_ops *ops) { @@ -318,7 +323,7 @@ int __register_ftrace_function(struct ftrace_ops *ops) if (!ftrace_enabled && (ops->flags & FTRACE_OPS_FL_PERMANENT)) return -EBUSY; - if (!is_kernel_core_data((unsigned long)ops)) + if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; add_ftrace_ops(&ftrace_ops_list, ops); @@ -576,7 +581,7 @@ static void ftrace_profile_reset(struct ftrace_profile_stat *stat) FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); } -static int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) +int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) { struct ftrace_profile_page *pg; int functions; @@ -2390,39 +2395,6 @@ unsigned long ftrace_find_rec_direct(unsigned long ip) return entry->direct; } -static struct ftrace_func_entry* -ftrace_add_rec_direct(unsigned long ip, unsigned long addr, - struct ftrace_hash **free_hash) -{ - struct ftrace_func_entry *entry; - - if (ftrace_hash_empty(direct_functions) || - direct_functions->count > 2 * (1 << direct_functions->size_bits)) { - struct ftrace_hash *new_hash; - int size = ftrace_hash_empty(direct_functions) ? 0 : - direct_functions->count + 1; - - if (size < 32) - size = 32; - - new_hash = dup_hash(direct_functions, size); - if (!new_hash) - return NULL; - - *free_hash = direct_functions; - direct_functions = new_hash; - } - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return NULL; - - entry->ip = ip; - entry->direct = addr; - __add_hash_entry(direct_functions, entry); - return entry; -} - static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { @@ -5139,16 +5111,39 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) } ret = -ENOMEM; + if (ftrace_hash_empty(direct_functions) || + direct_functions->count > 2 * (1 << direct_functions->size_bits)) { + struct ftrace_hash *new_hash; + int size = ftrace_hash_empty(direct_functions) ? 0 : + direct_functions->count + 1; + + if (size < 32) + size = 32; + + new_hash = dup_hash(direct_functions, size); + if (!new_hash) + goto out_unlock; + + free_hash = direct_functions; + direct_functions = new_hash; + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out_unlock; + direct = ftrace_find_direct_func(addr); if (!direct) { direct = ftrace_alloc_direct_func(addr); - if (!direct) + if (!direct) { + kfree(entry); goto out_unlock; + } } - entry = ftrace_add_rec_direct(ip, addr, &free_hash); - if (!entry) - goto out_unlock; + entry->ip = ip; + entry->direct = addr; + __add_hash_entry(direct_functions, entry); ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0); if (ret) @@ -5217,7 +5212,6 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr) { struct ftrace_direct_func *direct; struct ftrace_func_entry *entry; - struct ftrace_hash *hash; int ret = -ENODEV; mutex_lock(&direct_mutex); @@ -5226,8 +5220,7 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr) if (!entry) goto out_unlock; - hash = direct_ops.func_hash->filter_hash; - if (hash->count == 1) + if (direct_functions->count == 1) unregister_ftrace_function(&direct_ops); ret = ftrace_set_filter_ip(&direct_ops, ip, 1, 0); @@ -5403,221 +5396,6 @@ int modify_ftrace_direct(unsigned long ip, return ret; } EXPORT_SYMBOL_GPL(modify_ftrace_direct); - -#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \ - FTRACE_OPS_FL_SAVE_REGS) - -static int check_direct_multi(struct ftrace_ops *ops) -{ - if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) - return -EINVAL; - if ((ops->flags & MULTI_FLAGS) != MULTI_FLAGS) - return -EINVAL; - return 0; -} - -static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long addr) -{ - struct ftrace_func_entry *entry, *del; - int size, i; - - size = 1 << hash->size_bits; - for (i = 0; i < size; i++) { - hlist_for_each_entry(entry, &hash->buckets[i], hlist) { - del = __ftrace_lookup_ip(direct_functions, entry->ip); - if (del && del->direct == addr) { - remove_hash_entry(direct_functions, del); - kfree(del); - } - } - } -} - -/** - * register_ftrace_direct_multi - Call a custom trampoline directly - * for multiple functions registered in @ops - * @ops: The address of the struct ftrace_ops object - * @addr: The address of the trampoline to call at @ops functions - * - * This is used to connect a direct calls to @addr from the nop locations - * of the functions registered in @ops (with by ftrace_set_filter_ip - * function). - * - * The location that it calls (@addr) must be able to handle a direct call, - * and save the parameters of the function being traced, and restore them - * (or inject new ones if needed), before returning. - * - * Returns: - * 0 on success - * -EINVAL - The @ops object was already registered with this call or - * when there are no functions in @ops object. - * -EBUSY - Another direct function is already attached (there can be only one) - * -ENODEV - @ip does not point to a ftrace nop location (or not supported) - * -ENOMEM - There was an allocation failure. - */ -int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) -{ - struct ftrace_hash *hash, *free_hash = NULL; - struct ftrace_func_entry *entry, *new; - int err = -EBUSY, size, i; - - if (ops->func || ops->trampoline) - return -EINVAL; - if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) - return -EINVAL; - if (ops->flags & FTRACE_OPS_FL_ENABLED) - return -EINVAL; - - hash = ops->func_hash->filter_hash; - if (ftrace_hash_empty(hash)) - return -EINVAL; - - mutex_lock(&direct_mutex); - - /* Make sure requested entries are not already registered.. */ - size = 1 << hash->size_bits; - for (i = 0; i < size; i++) { - hlist_for_each_entry(entry, &hash->buckets[i], hlist) { - if (ftrace_find_rec_direct(entry->ip)) - goto out_unlock; - } - } - - /* ... and insert them to direct_functions hash. */ - err = -ENOMEM; - for (i = 0; i < size; i++) { - hlist_for_each_entry(entry, &hash->buckets[i], hlist) { - new = ftrace_add_rec_direct(entry->ip, addr, &free_hash); - if (!new) - goto out_remove; - entry->direct = addr; - } - } - - ops->func = call_direct_funcs; - ops->flags = MULTI_FLAGS; - ops->trampoline = FTRACE_REGS_ADDR; - - err = register_ftrace_function(ops); - - out_remove: - if (err) - remove_direct_functions_hash(hash, addr); - - out_unlock: - mutex_unlock(&direct_mutex); - - if (free_hash) { - synchronize_rcu_tasks(); - free_ftrace_hash(free_hash); - } - return err; -} -EXPORT_SYMBOL_GPL(register_ftrace_direct_multi); - -/** - * unregister_ftrace_direct_multi - Remove calls to custom trampoline - * previously registered by register_ftrace_direct_multi for @ops object. - * @ops: The address of the struct ftrace_ops object - * - * This is used to remove a direct calls to @addr from the nop locations - * of the functions registered in @ops (with by ftrace_set_filter_ip - * function). - * - * Returns: - * 0 on success - * -EINVAL - The @ops object was not properly registered. - */ -int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) -{ - struct ftrace_hash *hash = ops->func_hash->filter_hash; - int err; - - if (check_direct_multi(ops)) - return -EINVAL; - if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) - return -EINVAL; - - mutex_lock(&direct_mutex); - err = unregister_ftrace_function(ops); - remove_direct_functions_hash(hash, addr); - mutex_unlock(&direct_mutex); - - /* cleanup for possible another register call */ - ops->func = NULL; - ops->trampoline = 0; - return err; -} -EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi); - -/** - * modify_ftrace_direct_multi - Modify an existing direct 'multi' call - * to call something else - * @ops: The address of the struct ftrace_ops object - * @addr: The address of the new trampoline to call at @ops functions - * - * This is used to unregister currently registered direct caller and - * register new one @addr on functions registered in @ops object. - * - * Note there's window between ftrace_shutdown and ftrace_startup calls - * where there will be no callbacks called. - * - * Returns: zero on success. Non zero on error, which includes: - * -EINVAL - The @ops object was not properly registered. - */ -int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) -{ - struct ftrace_hash *hash; - struct ftrace_func_entry *entry, *iter; - static struct ftrace_ops tmp_ops = { - .func = ftrace_stub, - .flags = FTRACE_OPS_FL_STUB, - }; - int i, size; - int err; - - if (check_direct_multi(ops)) - return -EINVAL; - if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) - return -EINVAL; - - mutex_lock(&direct_mutex); - - /* Enable the tmp_ops to have the same functions as the direct ops */ - ftrace_ops_init(&tmp_ops); - tmp_ops.func_hash = ops->func_hash; - - err = register_ftrace_function(&tmp_ops); - if (err) - goto out_direct; - - /* - * Now the ftrace_ops_list_func() is called to do the direct callers. - * We can safely change the direct functions attached to each entry. - */ - mutex_lock(&ftrace_lock); - - hash = ops->func_hash->filter_hash; - size = 1 << hash->size_bits; - for (i = 0; i < size; i++) { - hlist_for_each_entry(iter, &hash->buckets[i], hlist) { - entry = __ftrace_lookup_ip(direct_functions, iter->ip); - if (!entry) - continue; - entry->direct = addr; - } - } - - mutex_unlock(&ftrace_lock); - - /* Removing the tmp_ops will add the updated direct callers to the functions */ - unregister_ftrace_function(&tmp_ops); - - out_direct: - mutex_unlock(&direct_mutex); - return err; -} -EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi); #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ /** @@ -6394,27 +6172,6 @@ static int ftrace_cmp_ips(const void *a, const void *b) return 0; } -#ifdef CONFIG_FTRACE_SORT_STARTUP_TEST -static void test_is_sorted(unsigned long *start, unsigned long count) -{ - int i; - - for (i = 1; i < count; i++) { - if (WARN(start[i - 1] > start[i], - "[%d] %pS at %lx is not sorted with %pS at %lx\n", i, - (void *)start[i - 1], start[i - 1], - (void *)start[i], start[i])) - break; - } - if (i == count) - pr_info("ftrace section at %px sorted properly\n", start); -} -#else -static void test_is_sorted(unsigned long *start, unsigned long count) -{ -} -#endif - static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) @@ -6433,17 +6190,8 @@ static int ftrace_process_locs(struct module *mod, if (!count) return 0; - /* - * Sorting mcount in vmlinux at build time depend on - * CONFIG_BUILDTIME_MCOUNT_SORT, while mcount loc in - * modules can not be sorted at build time. - */ - if (!IS_ENABLED(CONFIG_BUILDTIME_MCOUNT_SORT) || mod) { - sort(start, count, sizeof(*start), - ftrace_cmp_ips, NULL); - } else { - test_is_sorted(start, count); - } + sort(start, count, sizeof(*start), + ftrace_cmp_ips, NULL); start_pg = ftrace_allocate_pages(count); if (!start_pg) @@ -7099,11 +6847,6 @@ void __init ftrace_free_init_mem(void) ftrace_free_mem(NULL, start, end); } -int __init __weak ftrace_dyn_arch_init(void) -{ - return 0; -} - void __init ftrace_init(void) { extern unsigned long __start_mcount_loc[]; @@ -7191,6 +6934,7 @@ static int __init ftrace_nodyn_init(void) core_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; } +static inline void ftrace_startup_enable(int command) { } static inline void ftrace_startup_all(int command) { } # define ftrace_startup_sysctl() do { } while (0) @@ -7234,15 +6978,16 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit; - /* - * The ftrace_test_and_set_recursion() will disable preemption, - * which is required since some of the ops may be dynamically - * allocated, they must be freed after a synchronize_rcu(). - */ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START); if (bit < 0) return; + /* + * Some of the ops may be dynamically allocated, + * they must be freed after a synchronize_rcu(). + */ + preempt_disable_notrace(); + do_for_each_ftrace_op(op, ftrace_ops_list) { /* Stub functions don't need to be called nor tested */ if (op->flags & FTRACE_OPS_FL_STUB) @@ -7266,6 +7011,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, } } while_for_each_ftrace_op(op); out: + preempt_enable_notrace(); trace_clear_recursion(bit); } @@ -7281,23 +7027,21 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved. * An architecture can pass partial regs with ftrace_ops and still * set the ARCH_SUPPORTS_FTRACE_OPS. - * - * In vmlinux.lds.h, ftrace_ops_list_func() is defined to be - * arch_ftrace_ops_list_func. */ #if ARCH_SUPPORTS_FTRACE_OPS -void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct ftrace_regs *fregs) +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { __ftrace_ops_list_func(ip, parent_ip, NULL, fregs); } +NOKPROBE_SYMBOL(ftrace_ops_list_func); #else -void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) { __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); } +NOKPROBE_SYMBOL(ftrace_ops_no_ops); #endif -NOKPROBE_SYMBOL(arch_ftrace_ops_list_func); /* * If there's only one function registered but it does not support @@ -7313,9 +7057,12 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; + preempt_disable_notrace(); + if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) op->func(ip, parent_ip, op, fregs); + preempt_enable_notrace(); trace_clear_recursion(bit); } NOKPROBE_SYMBOL(ftrace_ops_assist_func); @@ -7438,10 +7185,10 @@ static void clear_ftrace_pids(struct trace_array *tr, int type) synchronize_rcu(); if ((type & TRACE_PIDS) && pid_list) - trace_pid_list_free(pid_list); + trace_free_pid_list(pid_list); if ((type & TRACE_NO_PIDS) && no_pid_list) - trace_pid_list_free(no_pid_list); + trace_free_pid_list(no_pid_list); } void ftrace_clear_pids(struct trace_array *tr) @@ -7682,7 +7429,7 @@ pid_write(struct file *filp, const char __user *ubuf, if (filtered_pids) { synchronize_rcu(); - trace_pid_list_free(filtered_pids); + trace_free_pid_list(filtered_pids); } else if (pid_list && !other_pids) { /* Register a probe to set whether to ignore the tracing of a task */ register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 05dfc7a12d..46ae72095c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3167,9 +3167,14 @@ static __always_inline int trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { unsigned int val = cpu_buffer->current_context; - int bit = interrupt_context_level(); + unsigned long pc = preempt_count(); + int bit; - bit = RB_CTX_NORMAL - bit; + if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) + bit = RB_CTX_NORMAL; + else + bit = pc & NMI_MASK ? RB_CTX_NMI : + pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) { /* @@ -5898,13 +5903,16 @@ static __init int test_ringbuffer(void) rb_data[cpu].buffer = buffer; rb_data[cpu].cpu = cpu; rb_data[cpu].cnt = cpu; - rb_threads[cpu] = kthread_run_on_cpu(rb_test, &rb_data[cpu], - cpu, "rbtester/%u"); + rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu], + "rbtester/%d", cpu); if (WARN_ON(IS_ERR(rb_threads[cpu]))) { pr_cont("FAILED\n"); ret = PTR_ERR(rb_threads[cpu]); goto out_free; } + + kthread_bind(rb_threads[cpu], cpu); + wake_up_process(rb_threads[cpu]); } /* Now create the rb hammer! */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3050892d18..86fb77c2ac 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -235,7 +235,7 @@ static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; static int __init set_trace_boot_options(char *str) { strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); - return 0; + return 1; } __setup("trace_options=", set_trace_boot_options); @@ -246,7 +246,7 @@ static int __init set_trace_boot_clock(char *str) { strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); trace_boot_clock = trace_boot_clock_buf; - return 0; + return 1; } __setup("trace_clock=", set_trace_boot_clock); @@ -516,6 +516,12 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec, return 0; } +void trace_free_pid_list(struct trace_pid_list *pid_list) +{ + vfree(pid_list->pids); + kfree(pid_list); +} + /** * trace_find_filtered_pid - check if a pid exists in a filtered_pid list * @filtered_pids: The list of pids to check @@ -526,7 +532,14 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec, bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) { - return trace_pid_list_is_set(filtered_pids, search_pid); + /* + * If pid_max changed after filtered_pids was created, we + * by default ignore all pids greater than the previous pid_max. + */ + if (search_pid >= filtered_pids->pid_max) + return false; + + return test_bit(search_pid, filtered_pids->pids); } /** @@ -583,11 +596,15 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, return; } + /* Sorry, but we don't support pid_max changing after setting */ + if (task->pid >= pid_list->pid_max) + return; + /* "self" is set for forks, and NULL for exits */ if (self) - trace_pid_list_set(pid_list, task->pid); + set_bit(task->pid, pid_list->pids); else - trace_pid_list_clear(pid_list, task->pid); + clear_bit(task->pid, pid_list->pids); } /** @@ -604,19 +621,18 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, */ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) { - long pid = (unsigned long)v; - unsigned int next; + unsigned long pid = (unsigned long)v; (*pos)++; /* pid already is +1 of the actual previous bit */ - if (trace_pid_list_next(pid_list, pid, &next) < 0) - return NULL; - - pid = next; + pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid); /* Return pid + 1 to allow zero to be represented */ - return (void *)(pid + 1); + if (pid < pid_list->pid_max) + return (void *)(pid + 1); + + return NULL; } /** @@ -633,14 +649,12 @@ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) { unsigned long pid; - unsigned int first; loff_t l = 0; - if (trace_pid_list_first(pid_list, &first) < 0) + pid = find_first_bit(pid_list->pids, pid_list->pid_max); + if (pid >= pid_list->pid_max) return NULL; - pid = first; - /* Return pid + 1 so that zero can be the exit value */ for (pid++; pid && l < *pos; pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) @@ -676,7 +690,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, unsigned long val; int nr_pids = 0; ssize_t read = 0; - ssize_t ret; + ssize_t ret = 0; loff_t pos; pid_t pid; @@ -689,23 +703,34 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, * the user. If the operation fails, then the current list is * not modified. */ - pid_list = trace_pid_list_alloc(); + pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); if (!pid_list) { trace_parser_put(&parser); return -ENOMEM; } + pid_list->pid_max = READ_ONCE(pid_max); + + /* Only truncating will shrink pid_max */ + if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max) + pid_list->pid_max = filtered_pids->pid_max; + + pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); + if (!pid_list->pids) { + trace_parser_put(&parser); + kfree(pid_list); + return -ENOMEM; + } + if (filtered_pids) { /* copy the current bits to the new max */ - ret = trace_pid_list_first(filtered_pids, &pid); - while (!ret) { - trace_pid_list_set(pid_list, pid); - ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); + for_each_set_bit(pid, filtered_pids->pids, + filtered_pids->pid_max) { + set_bit(pid, pid_list->pids); nr_pids++; } } - ret = 0; while (cnt > 0) { pos = 0; @@ -721,13 +746,12 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, ret = -EINVAL; if (kstrtoul(parser.buffer, 0, &val)) break; + if (val >= pid_list->pid_max) + break; pid = (pid_t)val; - if (trace_pid_list_set(pid_list, pid) < 0) { - ret = -1; - break; - } + set_bit(pid, pid_list->pids); nr_pids++; trace_parser_clear(&parser); @@ -736,13 +760,13 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, trace_parser_put(&parser); if (ret < 0) { - trace_pid_list_free(pid_list); + trace_free_pid_list(pid_list); return ret; } if (!nr_pids) { /* Cleared the list of pids */ - trace_pid_list_free(pid_list); + trace_free_pid_list(pid_list); read = ret; pid_list = NULL; } @@ -984,8 +1008,6 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev ring_buffer_write(buffer, event->array[0], &event->array[1]); /* Release the temp buffer */ this_cpu_dec(trace_buffered_event_cnt); - /* ring_buffer_unlock_commit() enables preemption */ - preempt_enable_notrace(); } else ring_buffer_unlock_commit(buffer, event); } @@ -1474,12 +1496,10 @@ static int __init set_buf_size(char *str) if (!str) return 0; buf_size = memparse(str, &str); - /* - * nr_entries can not be zero and the startup - * tests require some buffer space. Therefore - * ensure we have at least 4096 bytes of buffer. - */ - trace_buf_size = max(4096UL, buf_size); + /* nr_entries can not be zero */ + if (buf_size == 0) + return 0; + trace_buf_size = buf_size; return 1; } __setup("trace_buf_size=", set_buf_size); @@ -2609,8 +2629,6 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) trace_flags |= TRACE_FLAG_HARDIRQ; if (in_serving_softirq()) trace_flags |= TRACE_FLAG_SOFTIRQ; - if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) - trace_flags |= TRACE_FLAG_BH_OFF; if (tif_need_resched()) trace_flags |= TRACE_FLAG_NEED_RESCHED; @@ -2755,8 +2773,8 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, *current_rb = tr->array_buffer.buffer; if (!tr->no_filter_buffering_ref && - (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { - preempt_disable_notrace(); + (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && + (entry = this_cpu_read(trace_buffered_event))) { /* * Filtering is on, so try to use the per cpu buffer first. * This buffer will simulate a ring_buffer_event, @@ -2774,38 +2792,33 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, * is still quicker than no copy on match, but having * to discard out of the ring buffer on a failed match. */ - if ((entry = __this_cpu_read(trace_buffered_event))) { - int max_len = PAGE_SIZE - struct_size(entry, array, 1); + int max_len = PAGE_SIZE - struct_size(entry, array, 1); - val = this_cpu_inc_return(trace_buffered_event_cnt); + val = this_cpu_inc_return(trace_buffered_event_cnt); - /* - * Preemption is disabled, but interrupts and NMIs - * can still come in now. If that happens after - * the above increment, then it will have to go - * back to the old method of allocating the event - * on the ring buffer, and if the filter fails, it - * will have to call ring_buffer_discard_commit() - * to remove it. - * - * Need to also check the unlikely case that the - * length is bigger than the temp buffer size. - * If that happens, then the reserve is pretty much - * guaranteed to fail, as the ring buffer currently - * only allows events less than a page. But that may - * change in the future, so let the ring buffer reserve - * handle the failure in that case. - */ - if (val == 1 && likely(len <= max_len)) { - trace_event_setup(entry, type, trace_ctx); - entry->array[0] = len; - /* Return with preemption disabled */ - return entry; - } - this_cpu_dec(trace_buffered_event_cnt); + /* + * Preemption is disabled, but interrupts and NMIs + * can still come in now. If that happens after + * the above increment, then it will have to go + * back to the old method of allocating the event + * on the ring buffer, and if the filter fails, it + * will have to call ring_buffer_discard_commit() + * to remove it. + * + * Need to also check the unlikely case that the + * length is bigger than the temp buffer size. + * If that happens, then the reserve is pretty much + * guaranteed to fail, as the ring buffer currently + * only allows events less than a page. But that may + * change in the future, so let the ring buffer reserve + * handle the failure in that case. + */ + if (val == 1 && likely(len <= max_len)) { + trace_event_setup(entry, type, trace_ctx); + entry->array[0] = len; + return entry; } - /* __trace_buffer_lock_reserve() disables preemption */ - preempt_enable_notrace(); + this_cpu_dec(trace_buffered_event_cnt); } entry = __trace_buffer_lock_reserve(*current_rb, type, len, @@ -4198,7 +4211,7 @@ unsigned long trace_total_entries(struct trace_array *tr) static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n" - "# / _-----=> irqs-off/BH-disabled\n" + "# / _-----=> irqs-off \n" "# | / _----=> need-resched \n" "# || / _---=> hardirq/softirq \n" "# ||| / _--=> preempt-depth \n" @@ -4239,7 +4252,7 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file print_event_info(buf, m); - seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); + seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); @@ -4849,12 +4862,6 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp) return 0; } -static int tracing_mark_open(struct inode *inode, struct file *filp) -{ - stream_open(inode, filp); - return tracing_open_generic_tr(inode, filp); -} - static int tracing_release(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; @@ -5638,7 +5645,6 @@ static const char readme_msg[] = #ifdef CONFIG_HIST_TRIGGERS " hist trigger\t- If set, event hits are aggregated into a hash table\n" "\t Format: hist:keys=\n" - "\t [:=[,=...]]\n" "\t [:values=]\n" "\t [:sort=]\n" "\t [:size=#entries]\n" @@ -5650,16 +5656,6 @@ static const char readme_msg[] = "\t common_timestamp - to record current timestamp\n" "\t common_cpu - to record the CPU the event happened on\n" "\n" - "\t A hist trigger variable can be:\n" - "\t - a reference to a field e.g. x=current_timestamp,\n" - "\t - a reference to another variable e.g. y=$x,\n" - "\t - a numeric literal: e.g. ms_per_sec=1000,\n" - "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" - "\n" - "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" - "\t multiplication(*) and division(/) operators. An operand can be either a\n" - "\t variable reference, field or numeric literal.\n" - "\n" "\t When a matching event is hit, an entry is added to a hash\n" "\t table using the key(s) and value(s) named, and the value of a\n" "\t sum called 'hitcount' is incremented. Keys and values\n" @@ -6739,9 +6735,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cnt = PAGE_SIZE - 1; /* reset all but tr, trace, and overruns */ - trace_iterator_reset(iter); + memset(&iter->seq, 0, + sizeof(struct trace_iterator) - + offsetof(struct trace_iterator, seq)); cpumask_clear(iter->started); trace_seq_init(&iter->seq); + iter->pos = -1; trace_event_read_lock(); trace_access_lock(iter->cpu_file); @@ -7130,6 +7129,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (tt) event_triggers_post_call(tr->trace_marker_file, tt); + if (written > 0) + *fpos += written; + return written; } @@ -7188,6 +7190,9 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, __buffer_unlock_commit(buffer, event); + if (written > 0) + *fpos += written; + return written; } @@ -7587,14 +7592,16 @@ static const struct file_operations tracing_free_buffer_fops = { }; static const struct file_operations tracing_mark_fops = { - .open = tracing_mark_open, + .open = tracing_open_generic_tr, .write = tracing_mark_write, + .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; static const struct file_operations tracing_mark_raw_fops = { - .open = tracing_mark_open, + .open = tracing_open_generic_tr, .write = tracing_mark_raw_write, + .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c5b09c31e0..421374c304 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -22,8 +22,6 @@ #include #include -#include "pid_list.h" - #ifdef CONFIG_FTRACE_SYSCALLS #include /* For NR_SYSCALLS */ #include /* some archs define it here */ @@ -83,9 +81,6 @@ enum trace_type { #undef __dynamic_array #define __dynamic_array(type, item) type item[]; -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item) type item[]; - #undef F_STRUCT #define F_STRUCT(args...) args @@ -136,6 +131,7 @@ struct kprobe_trace_entry_head { struct eprobe_trace_entry_head { struct trace_entry ent; + unsigned int type; }; struct kretprobe_trace_entry_head { @@ -195,14 +191,10 @@ struct trace_options { struct trace_option_dentry *topts; }; -struct trace_pid_list *trace_pid_list_alloc(void); -void trace_pid_list_free(struct trace_pid_list *pid_list); -bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid); -int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid); -int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid); -int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid); -int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid, - unsigned int *next); +struct trace_pid_list { + int pid_max; + unsigned long *pids; +}; enum { TRACE_PIDS = BIT(0), @@ -892,7 +884,7 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) * is set, and called by an interrupt handler, we still * want to trace it. */ - if (in_hardirq()) + if (in_irq()) trace_recursion_set(TRACE_IRQ_BIT); else trace_recursion_clear(TRACE_IRQ_BIT); @@ -1336,12 +1328,10 @@ __trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { if (this_cpu_read(trace_buffered_event) == event) { - /* Simply release the temp buffer and enable preemption */ + /* Simply release the temp buffer */ this_cpu_dec(trace_buffered_event_cnt); - preempt_enable_notrace(); return; } - /* ring_buffer_discard_commit() enables preemption */ ring_buffer_discard_commit(buffer, event); } @@ -1469,7 +1459,6 @@ struct filter_pred { static inline bool is_string_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_DYN_STRING || - field->filter_type == FILTER_RDYN_STRING || field->filter_type == FILTER_STATIC_STRING || field->filter_type == FILTER_PTR_STRING || field->filter_type == FILTER_COMM; @@ -1577,13 +1566,15 @@ extern int event_enable_trigger_print(struct seq_file *m, struct event_trigger_data *data); extern void event_enable_trigger_free(struct event_trigger_ops *ops, struct event_trigger_data *data); -extern int event_enable_trigger_parse(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param); +extern int event_enable_trigger_func(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param); extern int event_enable_register_trigger(char *glob, + struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); extern void event_enable_unregister_trigger(char *glob, + struct event_trigger_ops *ops, struct event_trigger_data *test, struct trace_event_file *file); extern void trigger_data_free(struct event_trigger_data *data); @@ -1609,30 +1600,6 @@ get_named_trigger_data(struct event_trigger_data *data); extern int register_event_command(struct event_command *cmd); extern int unregister_event_command(struct event_command *cmd); extern int register_trigger_hist_enable_disable_cmds(void); -extern bool event_trigger_check_remove(const char *glob); -extern bool event_trigger_empty_param(const char *param); -extern int event_trigger_separate_filter(char *param_and_filter, char **param, - char **filter, bool param_required); -extern struct event_trigger_data * -event_trigger_alloc(struct event_command *cmd_ops, - char *cmd, - char *param, - void *private_data); -extern int event_trigger_parse_num(char *trigger, - struct event_trigger_data *trigger_data); -extern int event_trigger_set_filter(struct event_command *cmd_ops, - struct trace_event_file *file, - char *param, - struct event_trigger_data *trigger_data); -extern void event_trigger_reset_filter(struct event_command *cmd_ops, - struct event_trigger_data *trigger_data); -extern int event_trigger_register(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, - char *cmd, - char *trigger, - struct event_trigger_data *trigger_data, - int *n_registered); /** * struct event_trigger_ops - callbacks for trace event triggers @@ -1640,20 +1607,10 @@ extern int event_trigger_register(struct event_command *cmd_ops, * The methods in this structure provide per-event trigger hooks for * various trigger operations. * - * The @init and @free methods are used during trigger setup and - * teardown, typically called from an event_command's @parse() - * function implementation. - * - * The @print method is used to print the trigger spec. - * - * The @trigger method is the function that actually implements the - * trigger and is called in the context of the triggering event - * whenever that event occurs. - * * All the methods below, except for @init() and @free(), must be * implemented. * - * @trigger: The trigger 'probe' function called when the triggering + * @func: The trigger 'probe' function called when the triggering * event occurs. The data passed into this callback is the data * that was supplied to the event_command @reg() function that * registered the trigger (see struct event_command) along with @@ -1682,10 +1639,9 @@ extern int event_trigger_register(struct event_command *cmd_ops, * (see trace_event_triggers.c). */ struct event_trigger_ops { - void (*trigger)(struct event_trigger_data *data, - struct trace_buffer *buffer, - void *rec, - struct ring_buffer_event *rbe); + void (*func)(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, + struct ring_buffer_event *rbe); int (*init)(struct event_trigger_ops *ops, struct event_trigger_data *data); void (*free)(struct event_trigger_ops *ops, @@ -1734,7 +1690,7 @@ struct event_trigger_ops { * All the methods below, except for @set_filter() and @unreg_all(), * must be implemented. * - * @parse: The callback function responsible for parsing and + * @func: The callback function responsible for parsing and * registering the trigger written to the 'trigger' file by the * user. It allocates the trigger instance and registers it with * the appropriate trace event. It makes use of the other @@ -1769,24 +1725,21 @@ struct event_trigger_ops { * * @get_trigger_ops: The callback function invoked to retrieve the * event_trigger_ops implementation associated with the command. - * This callback function allows a single event_command to - * support multiple trigger implementations via different sets of - * event_trigger_ops, depending on the value of the @param - * string. */ struct event_command { struct list_head list; char *name; enum event_trigger_type trigger_type; int flags; - int (*parse)(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, - char *param_and_filter); + int (*func)(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *params); int (*reg)(char *glob, + struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg)(char *glob, + struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg_all)(struct trace_event_file *file); @@ -1967,7 +1920,14 @@ extern struct trace_iterator *tracepoint_print_iter; */ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) { - memset_startat(iter, 0, seq); + const size_t offset = offsetof(struct trace_iterator, seq); + + /* + * Keep gcc from complaining about overwriting more than just one + * member in the structure. + */ + memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset); + iter->pos = -1; } diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 541aa13581..928867f527 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -242,6 +242,7 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i) static int eprobe_event_define_fields(struct trace_event_call *event_call) { + int ret; struct eprobe_trace_entry_head field; struct trace_probe *tp; @@ -249,6 +250,8 @@ static int eprobe_event_define_fields(struct trace_event_call *event_call) if (WARN_ON_ONCE(!tp)) return -ENOENT; + DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0); + return traceprobe_define_arg_fields(event_call, sizeof(field), tp); } @@ -267,9 +270,7 @@ print_eprobe_event(struct trace_iterator *iter, int flags, struct trace_event_call *pevent; struct trace_event *probed_event; struct trace_seq *s = &iter->seq; - struct trace_eprobe *ep; struct trace_probe *tp; - unsigned int type; field = (struct eprobe_trace_entry_head *)iter->ent; tp = trace_probe_primary_from_call( @@ -277,18 +278,15 @@ print_eprobe_event(struct trace_iterator *iter, int flags, if (WARN_ON_ONCE(!tp)) goto out; - ep = container_of(tp, struct trace_eprobe, tp); - type = ep->event->event.type; - trace_seq_printf(s, "%s: (", trace_probe_name(tp)); - probed_event = ftrace_find_event(type); + probed_event = ftrace_find_event(field->type); if (probed_event) { pevent = container_of(probed_event, struct trace_event_call, event); trace_seq_printf(s, "%s.%s", pevent->class->system, trace_event_name(pevent)); } else { - trace_seq_printf(s, "%u", type); + trace_seq_printf(s, "%u", field->type); } trace_seq_putc(s, ')'); @@ -491,15 +489,25 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec) if (trace_trigger_soft_disabled(edata->file)) return; + fbuffer.trace_ctx = tracing_gen_ctx(); + fbuffer.trace_file = edata->file; + dsize = get_eprobe_size(&edata->ep->tp, rec); + fbuffer.regs = NULL; - entry = trace_event_buffer_reserve(&fbuffer, edata->file, - sizeof(*entry) + edata->ep->tp.size + dsize); - - if (!entry) + fbuffer.event = + trace_event_buffer_lock_reserve(&fbuffer.buffer, edata->file, + call->event.type, + sizeof(*entry) + edata->ep->tp.size + dsize, + fbuffer.trace_ctx); + if (!fbuffer.event) return; entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); + if (edata->ep->event) + entry->type = edata->ep->event->event.type; + else + entry->type = 0; store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); @@ -541,29 +549,29 @@ static void eprobe_trigger_func(struct event_trigger_data *data, } static struct event_trigger_ops eprobe_trigger_ops = { - .trigger = eprobe_trigger_func, + .func = eprobe_trigger_func, .print = eprobe_trigger_print, .init = eprobe_trigger_init, .free = eprobe_trigger_free, }; -static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +static int eprobe_trigger_cmd_func(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { return -1; } -static int eprobe_trigger_reg_func(char *glob, - struct event_trigger_data *data, - struct trace_event_file *file) +static int eprobe_trigger_reg_func(char *glob, struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct trace_event_file *file) { return -1; } -static void eprobe_trigger_unreg_func(char *glob, - struct event_trigger_data *data, - struct trace_event_file *file) +static void eprobe_trigger_unreg_func(char *glob, struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct trace_event_file *file) { } @@ -578,7 +586,7 @@ static struct event_command event_trigger_cmd = { .name = "eprobe", .trigger_type = ETT_EVENT_EPROBE, .flags = EVENT_CMD_FL_NEEDS_REC, - .parse = eprobe_trigger_cmd_parse, + .func = eprobe_trigger_cmd_func, .reg = eprobe_trigger_reg_func, .unreg = eprobe_trigger_unreg_func, .unreg_all = NULL, diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a114549720..fba8cb77a7 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -400,8 +400,7 @@ void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp) BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "perf buffer not large enough, wanted %d, have %d", - size, PERF_MAX_TRACE_SIZE)) + "perf buffer not large enough")) return NULL; *rctxp = rctx = perf_swevent_get_recursion_context(); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3147614c18..44d031ffe5 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -885,10 +885,10 @@ static void __ftrace_clear_event_pids(struct trace_array *tr, int type) tracepoint_synchronize_unregister(); if ((type & TRACE_PIDS) && pid_list) - trace_pid_list_free(pid_list); + trace_free_pid_list(pid_list); if ((type & TRACE_NO_PIDS) && no_pid_list) - trace_pid_list_free(no_pid_list); + trace_free_pid_list(no_pid_list); } static void ftrace_clear_event_pids(struct trace_array *tr, int type) @@ -1967,7 +1967,7 @@ event_pid_write(struct file *filp, const char __user *ubuf, if (filtered_pids) { tracepoint_synchronize_unregister(); - trace_pid_list_free(filtered_pids); + trace_free_pid_list(filtered_pids); } else if (pid_list && !other_pids) { register_pid_events(tr); } @@ -2681,7 +2681,6 @@ trace_create_new_event(struct trace_event_call *call, struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; struct trace_event_file *file; - unsigned int first; file = kmem_cache_alloc(file_cachep, GFP_TRACE); if (!file) @@ -2692,8 +2691,7 @@ trace_create_new_event(struct trace_event_call *call, no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, lockdep_is_held(&event_mutex)); - if (!trace_pid_list_first(pid_list, &first) || - !trace_pid_list_first(no_pid_list, &first)) + if (pid_list || no_pid_list) file->flags |= EVENT_FILE_FL_PID_FILTER; file->event_call = call; @@ -3461,8 +3459,10 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events, tr, &ftrace_tr_enable_fops); - if (!entry) + if (!entry) { + pr_warn("Could not create tracefs 'enable' entry\n"); return -ENOMEM; + } /* There are not as crucial, just warn if they are not created */ @@ -3478,13 +3478,17 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n"); /* ring buffer internal formats */ - trace_create_file("header_page", TRACE_MODE_READ, d_events, + entry = trace_create_file("header_page", TRACE_MODE_READ, d_events, ring_buffer_print_page_header, &ftrace_show_header_fops); + if (!entry) + pr_warn("Could not create tracefs 'header_page' entry\n"); - trace_create_file("header_event", TRACE_MODE_READ, d_events, + entry = trace_create_file("header_event", TRACE_MODE_READ, d_events, ring_buffer_print_entry_header, &ftrace_show_header_fops); + if (!entry) + pr_warn("Could not create tracefs 'header_event' entry\n"); tr->event_dir = d_events; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index b458a9afa2..06d6318ee5 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -777,29 +777,6 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event) return match; } -/* - * Filter predicate for relative dynamic sized arrays of characters. - * These are implemented through a list of strings at the end - * of the entry as same as dynamic string. - * The difference is that the relative one records the location offset - * from the field itself, not the event entry. - */ -static int filter_pred_strrelloc(struct filter_pred *pred, void *event) -{ - u32 *item = (u32 *)(event + pred->offset); - u32 str_item = *item; - int str_loc = str_item & 0xffff; - int str_len = str_item >> 16; - char *addr = (char *)(&item[1]) + str_loc; - int cmp, match; - - cmp = pred->regex.match(addr, &pred->regex, str_len); - - match = cmp ^ pred->not; - - return match; -} - /* Filter predicate for CPUs. */ static int filter_pred_cpu(struct filter_pred *pred, void *event) { @@ -850,7 +827,7 @@ static int filter_pred_none(struct filter_pred *pred, void *event) * * Note: * - @str might not be NULL-terminated if it's of type DYN_STRING - * RDYN_STRING, or STATIC_STRING, unless @len is zero. + * or STATIC_STRING, unless @len is zero. */ static int regex_match_full(char *str, struct regex *r, int len) @@ -1177,9 +1154,6 @@ int filter_assign_type(const char *type) if (strstr(type, "__data_loc") && strstr(type, "char")) return FILTER_DYN_STRING; - if (strstr(type, "__rel_loc") && strstr(type, "char")) - return FILTER_RDYN_STRING; - if (strchr(type, '[') && strstr(type, "char")) return FILTER_STATIC_STRING; @@ -1422,10 +1396,8 @@ static int parse_pred(const char *str, void *data, pred->fn = filter_pred_string; pred->regex.field_len = field->size; - } else if (field->filter_type == FILTER_DYN_STRING) { + } else if (field->filter_type == FILTER_DYN_STRING) pred->fn = filter_pred_strloc; - } else if (field->filter_type == FILTER_RDYN_STRING) - pred->fn = filter_pred_strrelloc; else { if (!ustring_per_cpu) { diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index ada87bfb5b..ea168d42c8 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -66,10 +66,7 @@ C(EMPTY_SORT_FIELD, "Empty sort field"), \ C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \ C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \ - C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), \ - C(EXPECT_NUMBER, "Expecting numeric literal"), \ - C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), \ - C(DIVISION_BY_ZERO, "Division by zero"), + C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), #undef C #define C(a, b) HIST_ERR_##a @@ -92,16 +89,12 @@ typedef u64 (*hist_field_fn_t) (struct hist_field *field, #define HIST_FIELD_OPERANDS_MAX 2 #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) #define HIST_ACTIONS_MAX 8 -#define HIST_CONST_DIGITS_MAX 21 -#define HIST_DIV_SHIFT 20 /* For optimizing division by constants */ enum field_op_id { FIELD_OP_NONE, FIELD_OP_PLUS, FIELD_OP_MINUS, FIELD_OP_UNARY_MINUS, - FIELD_OP_DIV, - FIELD_OP_MULT, }; /* @@ -159,11 +152,6 @@ struct hist_field { bool read_once; unsigned int var_str_idx; - - /* Numeric literals are represented as u64 */ - u64 constant; - /* Used to optimize division by constants */ - u64 div_multiplier; }; static u64 hist_field_none(struct hist_field *field, @@ -175,15 +163,6 @@ static u64 hist_field_none(struct hist_field *field, return 0; } -static u64 hist_field_const(struct hist_field *field, - struct tracing_map_elt *elt, - struct trace_buffer *buffer, - struct ring_buffer_event *rbe, - void *event) -{ - return field->constant; -} - static u64 hist_field_counter(struct hist_field *field, struct tracing_map_elt *elt, struct trace_buffer *buffer, @@ -217,20 +196,6 @@ static u64 hist_field_dynstring(struct hist_field *hist_field, return (u64)(unsigned long)addr; } -static u64 hist_field_reldynstring(struct hist_field *hist_field, - struct tracing_map_elt *elt, - struct trace_buffer *buffer, - struct ring_buffer_event *rbe, - void *event) -{ - u32 *item = event + hist_field->field->offset; - u32 str_item = *item; - int str_loc = str_item & 0xffff; - char *addr = (char *)&item[1] + str_loc; - - return (u64)(unsigned long)addr; -} - static u64 hist_field_pstring(struct hist_field *hist_field, struct tracing_map_elt *elt, struct trace_buffer *buffer, @@ -306,106 +271,6 @@ static u64 hist_field_minus(struct hist_field *hist_field, return val1 - val2; } -static u64 hist_field_div(struct hist_field *hist_field, - struct tracing_map_elt *elt, - struct trace_buffer *buffer, - struct ring_buffer_event *rbe, - void *event) -{ - struct hist_field *operand1 = hist_field->operands[0]; - struct hist_field *operand2 = hist_field->operands[1]; - - u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); - u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event); - - /* Return -1 for the undefined case */ - if (!val2) - return -1; - - /* Use shift if the divisor is a power of 2 */ - if (!(val2 & (val2 - 1))) - return val1 >> __ffs64(val2); - - return div64_u64(val1, val2); -} - -static u64 div_by_power_of_two(struct hist_field *hist_field, - struct tracing_map_elt *elt, - struct trace_buffer *buffer, - struct ring_buffer_event *rbe, - void *event) -{ - struct hist_field *operand1 = hist_field->operands[0]; - struct hist_field *operand2 = hist_field->operands[1]; - - u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); - - return val1 >> __ffs64(operand2->constant); -} - -static u64 div_by_not_power_of_two(struct hist_field *hist_field, - struct tracing_map_elt *elt, - struct trace_buffer *buffer, - struct ring_buffer_event *rbe, - void *event) -{ - struct hist_field *operand1 = hist_field->operands[0]; - struct hist_field *operand2 = hist_field->operands[1]; - - u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); - - return div64_u64(val1, operand2->constant); -} - -static u64 div_by_mult_and_shift(struct hist_field *hist_field, - struct tracing_map_elt *elt, - struct trace_buffer *buffer, - struct ring_buffer_event *rbe, - void *event) -{ - struct hist_field *operand1 = hist_field->operands[0]; - struct hist_field *operand2 = hist_field->operands[1]; - - u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); - - /* - * If the divisor is a constant, do a multiplication and shift instead. - * - * Choose Z = some power of 2. If Y <= Z, then: - * X / Y = (X * (Z / Y)) / Z - * - * (Z / Y) is a constant (mult) which is calculated at parse time, so: - * X / Y = (X * mult) / Z - * - * The division by Z can be replaced by a shift since Z is a power of 2: - * X / Y = (X * mult) >> HIST_DIV_SHIFT - * - * As long, as X < Z the results will not be off by more than 1. - */ - if (val1 < (1 << HIST_DIV_SHIFT)) { - u64 mult = operand2->div_multiplier; - - return (val1 * mult + ((1 << HIST_DIV_SHIFT) - 1)) >> HIST_DIV_SHIFT; - } - - return div64_u64(val1, operand2->constant); -} - -static u64 hist_field_mult(struct hist_field *hist_field, - struct tracing_map_elt *elt, - struct trace_buffer *buffer, - struct ring_buffer_event *rbe, - void *event) -{ - struct hist_field *operand1 = hist_field->operands[0]; - struct hist_field *operand2 = hist_field->operands[1]; - - u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); - u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event); - - return val1 * val2; -} - static u64 hist_field_unary_minus(struct hist_field *hist_field, struct tracing_map_elt *elt, struct trace_buffer *buffer, @@ -476,7 +341,6 @@ enum hist_field_flags { HIST_FIELD_FL_CPU = 1 << 15, HIST_FIELD_FL_ALIAS = 1 << 16, HIST_FIELD_FL_BUCKET = 1 << 17, - HIST_FIELD_FL_CONST = 1 << 18, }; struct var_defs { @@ -653,25 +517,6 @@ struct snapshot_context { void *key; }; -/* - * Returns the specific division function to use if the divisor - * is constant. This avoids extra branches when the trigger is hit. - */ -static hist_field_fn_t hist_field_get_div_fn(struct hist_field *divisor) -{ - u64 div = divisor->constant; - - if (!(div & (div - 1))) - return div_by_power_of_two; - - /* If the divisor is too large, do a regular division */ - if (div > (1 << HIST_DIV_SHIFT)) - return div_by_not_power_of_two; - - divisor->div_multiplier = div64_u64((u64)(1 << HIST_DIV_SHIFT), div); - return div_by_mult_and_shift; -} - static void track_data_free(struct track_data *track_data) { struct hist_elt_data *elt_data; @@ -1671,12 +1516,6 @@ static void expr_field_str(struct hist_field *field, char *expr) { if (field->flags & HIST_FIELD_FL_VAR_REF) strcat(expr, "$"); - else if (field->flags & HIST_FIELD_FL_CONST) { - char str[HIST_CONST_DIGITS_MAX]; - - snprintf(str, HIST_CONST_DIGITS_MAX, "%llu", field->constant); - strcat(expr, str); - } strcat(expr, hist_field_name(field, 0)); @@ -1732,12 +1571,6 @@ static char *expr_str(struct hist_field *field, unsigned int level) case FIELD_OP_PLUS: strcat(expr, "+"); break; - case FIELD_OP_DIV: - strcat(expr, "/"); - break; - case FIELD_OP_MULT: - strcat(expr, "*"); - break; default: kfree(expr); return NULL; @@ -1748,92 +1581,34 @@ static char *expr_str(struct hist_field *field, unsigned int level) return expr; } -/* - * If field_op != FIELD_OP_NONE, *sep points to the root operator - * of the expression tree to be evaluated. - */ -static int contains_operator(char *str, char **sep) +static int contains_operator(char *str) { enum field_op_id field_op = FIELD_OP_NONE; - char *minus_op, *plus_op, *div_op, *mult_op; + char *op; + op = strpbrk(str, "+-"); + if (!op) + return FIELD_OP_NONE; - /* - * Report the last occurrence of the operators first, so that the - * expression is evaluated left to right. This is important since - * subtraction and division are not associative. - * - * e.g - * 64/8/4/2 is 1, i.e 64/8/4/2 = ((64/8)/4)/2 - * 14-7-5-2 is 0, i.e 14-7-5-2 = ((14-7)-5)-2 - */ - - /* - * First, find lower precedence addition and subtraction - * since the expression will be evaluated recursively. - */ - minus_op = strrchr(str, '-'); - if (minus_op) { + switch (*op) { + case '-': /* - * Unary minus is not supported in sub-expressions. If - * present, it is always the next root operator. + * Unfortunately, the modifier ".sym-offset" + * can confuse things. */ - if (minus_op == str) { + if (op - str >= 4 && !strncmp(op - 4, ".sym-offset", 11)) + return FIELD_OP_NONE; + + if (*str == '-') field_op = FIELD_OP_UNARY_MINUS; - goto out; - } - - field_op = FIELD_OP_MINUS; - } - - plus_op = strrchr(str, '+'); - if (plus_op || minus_op) { - /* - * For operators of the same precedence use to rightmost as the - * root, so that the expression is evaluated left to right. - */ - if (plus_op > minus_op) - field_op = FIELD_OP_PLUS; - goto out; - } - - /* - * Multiplication and division have higher precedence than addition and - * subtraction. - */ - div_op = strrchr(str, '/'); - if (div_op) - field_op = FIELD_OP_DIV; - - mult_op = strrchr(str, '*'); - /* - * For operators of the same precedence use to rightmost as the - * root, so that the expression is evaluated left to right. - */ - if (mult_op > div_op) - field_op = FIELD_OP_MULT; - -out: - if (sep) { - switch (field_op) { - case FIELD_OP_UNARY_MINUS: - case FIELD_OP_MINUS: - *sep = minus_op; - break; - case FIELD_OP_PLUS: - *sep = plus_op; - break; - case FIELD_OP_DIV: - *sep = div_op; - break; - case FIELD_OP_MULT: - *sep = mult_op; - break; - case FIELD_OP_NONE: - default: - *sep = NULL; - break; - } + else + field_op = FIELD_OP_MINUS; + break; + case '+': + field_op = FIELD_OP_PLUS; + break; + default: + break; } return field_op; @@ -1914,15 +1689,6 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, goto out; } - if (flags & HIST_FIELD_FL_CONST) { - hist_field->fn = hist_field_const; - hist_field->size = sizeof(u64); - hist_field->type = kstrdup("u64", GFP_KERNEL); - if (!hist_field->type) - goto free; - goto out; - } - if (flags & HIST_FIELD_FL_STACKTRACE) { hist_field->fn = hist_field_none; goto out; @@ -1970,10 +1736,8 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, if (field->filter_type == FILTER_STATIC_STRING) { hist_field->fn = hist_field_string; hist_field->size = field->size; - } else if (field->filter_type == FILTER_DYN_STRING) { + } else if (field->filter_type == FILTER_DYN_STRING) hist_field->fn = hist_field_dynstring; - } else if (field->filter_type == FILTER_RDYN_STRING) - hist_field->fn = hist_field_reldynstring; else hist_field->fn = hist_field_pstring; } else { @@ -2162,7 +1926,7 @@ static char *field_name_from_var(struct hist_trigger_data *hist_data, if (strcmp(var_name, name) == 0) { field = hist_data->attrs->var_defs.expr[i]; - if (contains_operator(field, NULL) || is_var_ref(field)) + if (contains_operator(field) || is_var_ref(field)) continue; return field; } @@ -2239,11 +2003,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, *flags |= HIST_FIELD_FL_HEX; else if (strcmp(modifier, "sym") == 0) *flags |= HIST_FIELD_FL_SYM; - /* - * 'sym-offset' occurrences in the trigger string are modified - * to 'symXoffset' to simplify arithmetic expression parsing. - */ - else if (strcmp(modifier, "symXoffset") == 0) + else if (strcmp(modifier, "sym-offset") == 0) *flags |= HIST_FIELD_FL_SYM_OFFSET; else if ((strcmp(modifier, "execname") == 0) && (strcmp(field_name, "common_pid") == 0)) @@ -2289,9 +2049,9 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, /* * For backward compatibility, if field_name * was "cpu", then we treat this the same as - * common_cpu. + * common_cpu. This also works for "CPU". */ - if (strcmp(field_name, "cpu") == 0) { + if (field && field->filter_type == FILTER_CPU) { *flags |= HIST_FIELD_FL_CPU; } else { hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, @@ -2331,29 +2091,6 @@ static struct hist_field *create_alias(struct hist_trigger_data *hist_data, return alias; } -static struct hist_field *parse_const(struct hist_trigger_data *hist_data, - char *str, char *var_name, - unsigned long *flags) -{ - struct trace_array *tr = hist_data->event_file->tr; - struct hist_field *field = NULL; - u64 constant; - - if (kstrtoull(str, 0, &constant)) { - hist_err(tr, HIST_ERR_EXPECT_NUMBER, errpos(str)); - return NULL; - } - - *flags |= HIST_FIELD_FL_CONST; - field = create_hist_field(hist_data, NULL, *flags, var_name); - if (!field) - return NULL; - - field->constant = constant; - - return field; -} - static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long *flags, char *var_name) @@ -2364,15 +2101,6 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, unsigned long buckets = 0; int ret = 0; - if (isdigit(str[0])) { - hist_field = parse_const(hist_data, str, var_name, flags); - if (!hist_field) { - ret = -EINVAL; - goto out; - } - return hist_field; - } - s = strchr(str, '.'); if (s) { s = strchr(++s, '.'); @@ -2429,24 +2157,21 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long flags, - char *var_name, unsigned int *n_subexprs); + char *var_name, unsigned int level); static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long flags, - char *var_name, unsigned int *n_subexprs) + char *var_name, unsigned int level) { struct hist_field *operand1, *expr = NULL; unsigned long operand_flags; int ret = 0; char *s; - /* Unary minus operator, increment n_subexprs */ - ++*n_subexprs; - /* we support only -(xxx) i.e. explicit parens required */ - if (*n_subexprs > 3) { + if (level > 3) { hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); ret = -EINVAL; goto free; @@ -2463,16 +2188,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, } s = strrchr(str, ')'); - if (s) { - /* unary minus not supported in sub-expressions */ - if (*(s+1) != '\0') { - hist_err(file->tr, HIST_ERR_UNARY_MINUS_SUBEXPR, - errpos(str)); - ret = -EINVAL; - goto free; - } + if (s) *s = '\0'; - } else { ret = -EINVAL; /* no closing ')' */ goto free; @@ -2486,7 +2203,7 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, } operand_flags = 0; - operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, n_subexprs); + operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); if (IS_ERR(operand1)) { ret = PTR_ERR(operand1); goto free; @@ -2519,15 +2236,9 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, return ERR_PTR(ret); } -/* - * If the operands are var refs, return pointers the - * variable(s) referenced in var1 and var2, else NULL. - */ static int check_expr_operands(struct trace_array *tr, struct hist_field *operand1, - struct hist_field *operand2, - struct hist_field **var1, - struct hist_field **var2) + struct hist_field *operand2) { unsigned long operand1_flags = operand1->flags; unsigned long operand2_flags = operand2->flags; @@ -2540,7 +2251,6 @@ static int check_expr_operands(struct trace_array *tr, if (!var) return -EINVAL; operand1_flags = var->flags; - *var1 = var; } if ((operand2_flags & HIST_FIELD_FL_VAR_REF) || @@ -2551,7 +2261,6 @@ static int check_expr_operands(struct trace_array *tr, if (!var) return -EINVAL; operand2_flags = var->flags; - *var2 = var; } if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != @@ -2566,102 +2275,74 @@ static int check_expr_operands(struct trace_array *tr, static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long flags, - char *var_name, unsigned int *n_subexprs) + char *var_name, unsigned int level) { struct hist_field *operand1 = NULL, *operand2 = NULL, *expr = NULL; - struct hist_field *var1 = NULL, *var2 = NULL; - unsigned long operand_flags, operand2_flags; + unsigned long operand_flags; int field_op, ret = -EINVAL; char *sep, *operand1_str; - hist_field_fn_t op_fn; - bool combine_consts; - if (*n_subexprs > 3) { + if (level > 3) { hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); return ERR_PTR(-EINVAL); } - field_op = contains_operator(str, &sep); + field_op = contains_operator(str); if (field_op == FIELD_OP_NONE) return parse_atom(hist_data, file, str, &flags, var_name); if (field_op == FIELD_OP_UNARY_MINUS) - return parse_unary(hist_data, file, str, flags, var_name, n_subexprs); + return parse_unary(hist_data, file, str, flags, var_name, ++level); - /* Binary operator found, increment n_subexprs */ - ++*n_subexprs; + switch (field_op) { + case FIELD_OP_MINUS: + sep = "-"; + break; + case FIELD_OP_PLUS: + sep = "+"; + break; + default: + goto free; + } - /* Split the expression string at the root operator */ - if (!sep) - return ERR_PTR(-EINVAL); - - *sep = '\0'; - operand1_str = str; - str = sep+1; - - /* Binary operator requires both operands */ - if (*operand1_str == '\0' || *str == '\0') - return ERR_PTR(-EINVAL); + operand1_str = strsep(&str, sep); + if (!operand1_str || !str) + goto free; operand_flags = 0; - - /* LHS of string is an expression e.g. a+b in a+b+c */ - operand1 = parse_expr(hist_data, file, operand1_str, operand_flags, NULL, n_subexprs); - if (IS_ERR(operand1)) - return ERR_CAST(operand1); - + operand1 = parse_atom(hist_data, file, operand1_str, + &operand_flags, NULL); + if (IS_ERR(operand1)) { + ret = PTR_ERR(operand1); + operand1 = NULL; + goto free; + } if (operand1->flags & HIST_FIELD_FL_STRING) { hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str)); ret = -EINVAL; - goto free_op1; + goto free; } - /* RHS of string is another expression e.g. c in a+b+c */ + /* rest of string could be another expression e.g. b+c in a+b+c */ operand_flags = 0; - operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, n_subexprs); + operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); if (IS_ERR(operand2)) { ret = PTR_ERR(operand2); - goto free_op1; + operand2 = NULL; + goto free; } if (operand2->flags & HIST_FIELD_FL_STRING) { hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); ret = -EINVAL; - goto free_operands; + goto free; } - switch (field_op) { - case FIELD_OP_MINUS: - op_fn = hist_field_minus; - break; - case FIELD_OP_PLUS: - op_fn = hist_field_plus; - break; - case FIELD_OP_DIV: - op_fn = hist_field_div; - break; - case FIELD_OP_MULT: - op_fn = hist_field_mult; - break; - default: - ret = -EINVAL; - goto free_operands; - } - - ret = check_expr_operands(file->tr, operand1, operand2, &var1, &var2); + ret = check_expr_operands(file->tr, operand1, operand2); if (ret) - goto free_operands; + goto free; - operand_flags = var1 ? var1->flags : operand1->flags; - operand2_flags = var2 ? var2->flags : operand2->flags; - - /* - * If both operands are constant, the expression can be - * collapsed to a single constant. - */ - combine_consts = operand_flags & operand2_flags & HIST_FIELD_FL_CONST; - - flags |= combine_consts ? HIST_FIELD_FL_CONST : HIST_FIELD_FL_EXPR; + flags |= HIST_FIELD_FL_EXPR; flags |= operand1->flags & (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); @@ -2669,80 +2350,45 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, expr = create_hist_field(hist_data, NULL, flags, var_name); if (!expr) { ret = -ENOMEM; - goto free_operands; + goto free; } operand1->read_once = true; operand2->read_once = true; - /* The operands are now owned and free'd by 'expr' */ expr->operands[0] = operand1; expr->operands[1] = operand2; - if (field_op == FIELD_OP_DIV && - operand2_flags & HIST_FIELD_FL_CONST) { - u64 divisor = var2 ? var2->constant : operand2->constant; + /* The operand sizes should be the same, so just pick one */ + expr->size = operand1->size; + expr->is_signed = operand1->is_signed; - if (!divisor) { - hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str)); - ret = -EDOM; - goto free_expr; - } - - /* - * Copy the divisor here so we don't have to look it up - * later if this is a var ref - */ - operand2->constant = divisor; - op_fn = hist_field_get_div_fn(operand2); + expr->operator = field_op; + expr->name = expr_str(expr, 0); + expr->type = kstrdup_const(operand1->type, GFP_KERNEL); + if (!expr->type) { + ret = -ENOMEM; + goto free; } - if (combine_consts) { - if (var1) - expr->operands[0] = var1; - if (var2) - expr->operands[1] = var2; - - expr->constant = op_fn(expr, NULL, NULL, NULL, NULL); - - expr->operands[0] = NULL; - expr->operands[1] = NULL; - - /* - * var refs won't be destroyed immediately - * See: destroy_hist_field() - */ - destroy_hist_field(operand2, 0); - destroy_hist_field(operand1, 0); - - expr->name = expr_str(expr, 0); - } else { - expr->fn = op_fn; - - /* The operand sizes should be the same, so just pick one */ - expr->size = operand1->size; - expr->is_signed = operand1->is_signed; - - expr->operator = field_op; - expr->type = kstrdup_const(operand1->type, GFP_KERNEL); - if (!expr->type) { - ret = -ENOMEM; - goto free_expr; - } - - expr->name = expr_str(expr, 0); + switch (field_op) { + case FIELD_OP_MINUS: + expr->fn = hist_field_minus; + break; + case FIELD_OP_PLUS: + expr->fn = hist_field_plus; + break; + default: + ret = -EINVAL; + goto free; } return expr; - -free_operands: - destroy_hist_field(operand2, 0); -free_op1: + free: destroy_hist_field(operand1, 0); - return ERR_PTR(ret); - -free_expr: + destroy_hist_field(operand2, 0); destroy_hist_field(expr, 0); + return ERR_PTR(ret); } @@ -2764,9 +2410,9 @@ static char *find_trigger_filter(struct hist_trigger_data *hist_data, } static struct event_command trigger_hist_cmd; -static int event_hist_trigger_parse(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param); +static int event_hist_trigger_func(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param); static bool compatible_keys(struct hist_trigger_data *target_hist_data, struct hist_trigger_data *hist_data, @@ -2969,8 +2615,8 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, var_hist->hist_data = hist_data; /* Create the new histogram with our variable */ - ret = event_hist_trigger_parse(&trigger_hist_cmd, file, - "", "hist", cmd); + ret = event_hist_trigger_func(&trigger_hist_cmd, file, + "", "hist", cmd); if (ret) { kfree(cmd); kfree(var_hist->cmd); @@ -4112,9 +3758,9 @@ static int __create_val_field(struct hist_trigger_data *hist_data, unsigned long flags) { struct hist_field *hist_field; - int ret = 0, n_subexprs = 0; + int ret = 0; - hist_field = parse_expr(hist_data, file, field_str, flags, var_name, &n_subexprs); + hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0); if (IS_ERR(hist_field)) { ret = PTR_ERR(hist_field); goto out; @@ -4255,7 +3901,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, struct hist_field *hist_field = NULL; unsigned long flags = 0; unsigned int key_size; - int ret = 0, n_subexprs = 0; + int ret = 0; if (WARN_ON(key_idx >= HIST_FIELDS_MAX)) return -EINVAL; @@ -4268,7 +3914,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, hist_field = create_hist_field(hist_data, NULL, flags, NULL); } else { hist_field = parse_expr(hist_data, file, field_str, flags, - NULL, &n_subexprs); + NULL, 0); if (IS_ERR(hist_field)) { ret = PTR_ERR(hist_field); goto out; @@ -4832,7 +4478,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data) if (hist_field->flags & HIST_FIELD_FL_STACKTRACE) cmp_fn = tracing_map_cmp_none; - else if (!field) + else if (!field || hist_field->flags & HIST_FIELD_FL_CPU) cmp_fn = tracing_map_cmp_num(hist_field->size, hist_field->is_signed); else if (is_string_field(field)) @@ -4981,8 +4627,7 @@ static inline void add_to_key(char *compound_key, void *key, struct ftrace_event_field *field; field = key_field->field; - if (field->filter_type == FILTER_DYN_STRING || - field->filter_type == FILTER_RDYN_STRING) + if (field->filter_type == FILTER_DYN_STRING) size = *(u32 *)(rec + field->offset) >> 16; else if (field->filter_type == FILTER_STATIC_STRING) size = field->size; @@ -5071,6 +4716,7 @@ static void hist_trigger_stacktrace_print(struct seq_file *m, unsigned long *stacktrace_entries, unsigned int max_entries) { + char str[KSYM_SYMBOL_LEN]; unsigned int spaces = 8; unsigned int i; @@ -5079,7 +4725,8 @@ static void hist_trigger_stacktrace_print(struct seq_file *m, return; seq_printf(m, "%*c", 1 + spaces, ' '); - seq_printf(m, "%pS\n", (void*)stacktrace_entries[i]); + sprint_symbol(str, stacktrace_entries[i]); + seq_printf(m, "%s\n", str); } } @@ -5089,6 +4736,7 @@ static void hist_trigger_print_key(struct seq_file *m, struct tracing_map_elt *elt) { struct hist_field *key_field; + char str[KSYM_SYMBOL_LEN]; bool multiline = false; const char *field_name; unsigned int i; @@ -5109,12 +4757,14 @@ static void hist_trigger_print_key(struct seq_file *m, seq_printf(m, "%s: %llx", field_name, uval); } else if (key_field->flags & HIST_FIELD_FL_SYM) { uval = *(u64 *)(key + key_field->offset); - seq_printf(m, "%s: [%llx] %-45ps", field_name, - uval, (void *)(uintptr_t)uval); + sprint_symbol_no_offset(str, uval); + seq_printf(m, "%s: [%llx] %-45s", field_name, + uval, str); } else if (key_field->flags & HIST_FIELD_FL_SYM_OFFSET) { uval = *(u64 *)(key + key_field->offset); - seq_printf(m, "%s: [%llx] %-55pS", field_name, - uval, (void *)(uintptr_t)uval); + sprint_symbol(str, uval); + seq_printf(m, "%s: [%llx] %-55s", field_name, + uval, str); } else if (key_field->flags & HIST_FIELD_FL_EXECNAME) { struct hist_elt_data *elt_data = elt->private_data; char *comm; @@ -5310,8 +4960,6 @@ static void hist_field_debug_show_flags(struct seq_file *m, if (flags & HIST_FIELD_FL_ALIAS) seq_puts(m, " HIST_FIELD_FL_ALIAS\n"); - else if (flags & HIST_FIELD_FL_CONST) - seq_puts(m, " HIST_FIELD_FL_CONST\n"); } static int hist_field_debug_show(struct seq_file *m, @@ -5333,9 +4981,6 @@ static int hist_field_debug_show(struct seq_file *m, field->var.idx); } - if (field->flags & HIST_FIELD_FL_CONST) - seq_printf(m, " constant: %llu\n", field->constant); - if (field->flags & HIST_FIELD_FL_ALIAS) seq_printf(m, " var_ref_idx (into hist_data->var_refs[]): %u\n", field->var_ref_idx); @@ -5578,8 +5223,6 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) if (hist_field->flags & HIST_FIELD_FL_CPU) seq_puts(m, "common_cpu"); - else if (hist_field->flags & HIST_FIELD_FL_CONST) - seq_printf(m, "%llu", hist_field->constant); else if (field_name) { if (hist_field->flags & HIST_FIELD_FL_VAR_REF || hist_field->flags & HIST_FIELD_FL_ALIAS) @@ -5733,8 +5376,8 @@ static void unregister_field_var_hists(struct hist_trigger_data *hist_data) for (i = 0; i < hist_data->n_field_var_hists; i++) { file = hist_data->field_var_hists[i]->hist_data->event_file; cmd = hist_data->field_var_hists[i]->cmd; - ret = event_hist_trigger_parse(&trigger_hist_cmd, file, - "!hist", "hist", cmd); + ret = event_hist_trigger_func(&trigger_hist_cmd, file, + "!hist", "hist", cmd); WARN_ON_ONCE(ret < 0); } } @@ -5763,7 +5406,7 @@ static void event_hist_trigger_free(struct event_trigger_ops *ops, } static struct event_trigger_ops event_hist_trigger_ops = { - .trigger = event_hist_trigger, + .func = event_hist_trigger, .print = event_hist_trigger_print, .init = event_hist_trigger_init, .free = event_hist_trigger_free, @@ -5797,7 +5440,7 @@ static void event_hist_trigger_named_free(struct event_trigger_ops *ops, } static struct event_trigger_ops event_hist_trigger_named_ops = { - .trigger = event_hist_trigger, + .func = event_hist_trigger, .print = event_hist_trigger_print, .init = event_hist_trigger_named_init, .free = event_hist_trigger_named_free, @@ -5914,7 +5557,7 @@ static bool hist_trigger_match(struct event_trigger_data *data, return true; } -static int hist_register_trigger(char *glob, +static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) { @@ -6066,7 +5709,7 @@ static bool hist_trigger_check_refs(struct event_trigger_data *data, return false; } -static void hist_unregister_trigger(char *glob, +static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) { @@ -6150,9 +5793,9 @@ static void hist_unreg_all(struct trace_event_file *file) } } -static int event_hist_trigger_parse(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +static int event_hist_trigger_func(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { unsigned int hist_trigger_bits = TRACING_MAP_BITS_DEFAULT; struct event_trigger_data *trigger_data; @@ -6162,14 +5805,12 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, struct synth_event *se; const char *se_name; bool remove = false; - char *trigger, *p, *start; + char *trigger, *p; int ret = 0; lockdep_assert_held(&event_mutex); - WARN_ON(!glob); - - if (strlen(glob)) { + if (glob && strlen(glob)) { hist_err_clear(); last_cmd_set(file, param); } @@ -6202,7 +5843,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, continue; } break; - } while (1); + } while (p); if (!p) param = NULL; @@ -6212,16 +5853,6 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, trigger = strstrip(trigger); } - /* - * To simplify arithmetic expression parsing, replace occurrences of - * '.sym-offset' modifier with '.symXoffset' - */ - start = strstr(trigger, ".sym-offset"); - while (start) { - *(start + 4) = 'X'; - start = strstr(start + 11, ".sym-offset"); - } - attrs = parse_hist_trigger_attrs(file->tr, trigger); if (IS_ERR(attrs)) return PTR_ERR(attrs); @@ -6268,7 +5899,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, goto out_free; } - cmd_ops->unreg(glob+1, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); se_name = trace_event_name(file->event_call); se = find_synth_event(se_name); if (se) @@ -6277,7 +5908,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, goto out_free; } - ret = cmd_ops->reg(glob, trigger_data, file); + ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); /* * The above returns on success the # of triggers registered, * but if it didn't register any it returns zero. Consider no @@ -6320,7 +5951,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, return ret; out_unreg: - cmd_ops->unreg(glob+1, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); out_free: if (cmd_ops->set_filter) cmd_ops->set_filter(NULL, trigger_data, NULL); @@ -6337,7 +5968,7 @@ static struct event_command trigger_hist_cmd = { .name = "hist", .trigger_type = ETT_EVENT_HIST, .flags = EVENT_CMD_FL_NEEDS_REC, - .parse = event_hist_trigger_parse, + .func = event_hist_trigger_func, .reg = hist_register_trigger, .unreg = hist_unregister_trigger, .unreg_all = hist_unreg_all, @@ -6389,28 +6020,28 @@ hist_enable_count_trigger(struct event_trigger_data *data, } static struct event_trigger_ops hist_enable_trigger_ops = { - .trigger = hist_enable_trigger, + .func = hist_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops hist_enable_count_trigger_ops = { - .trigger = hist_enable_count_trigger, + .func = hist_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops hist_disable_trigger_ops = { - .trigger = hist_enable_trigger, + .func = hist_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops hist_disable_count_trigger_ops = { - .trigger = hist_enable_count_trigger, + .func = hist_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, @@ -6452,7 +6083,7 @@ static void hist_enable_unreg_all(struct trace_event_file *file) static struct event_command trigger_hist_enable_cmd = { .name = ENABLE_HIST_STR, .trigger_type = ETT_HIST_ENABLE, - .parse = event_enable_trigger_parse, + .func = event_enable_trigger_func, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .unreg_all = hist_enable_unreg_all, @@ -6463,7 +6094,7 @@ static struct event_command trigger_hist_enable_cmd = { static struct event_command trigger_hist_disable_cmd = { .name = DISABLE_HIST_STR, .trigger_type = ETT_HIST_ENABLE, - .parse = event_enable_trigger_parse, + .func = event_enable_trigger_func, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .unreg_all = hist_enable_unreg_all, diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index d6b4935a78..c188045c5f 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -168,14 +168,10 @@ static void *trace_alloc_entry(struct trace_event_call *call, int *size) continue; if (field->filter_type == FILTER_STATIC_STRING) continue; - if (field->filter_type == FILTER_DYN_STRING || - field->filter_type == FILTER_RDYN_STRING) { + if (field->filter_type == FILTER_DYN_STRING) { u32 *str_item; int str_loc = entry_size & 0xffff; - if (field->filter_type == FILTER_RDYN_STRING) - str_loc -= field->offset + field->size; - str_item = (u32 *)(entry + field->offset); *str_item = str_loc; /* string length is 0. */ } else { @@ -218,8 +214,7 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry) if (field->filter_type == FILTER_STATIC_STRING) { strlcpy(entry + field->offset, addr, field->size); - } else if (field->filter_type == FILTER_DYN_STRING || - field->filter_type == FILTER_RDYN_STRING) { + } else if (field->filter_type == FILTER_DYN_STRING) { int str_len = strlen(addr) + 1; int str_loc = entry_size & 0xffff; u32 *str_item; @@ -234,8 +229,6 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry) strlcpy(entry + (entry_size - str_len), addr, str_len); str_item = (u32 *)(entry + field->offset); - if (field->filter_type == FILTER_RDYN_STRING) - str_loc -= field->offset + field->size; *str_item = (str_len << 16) | str_loc; } else { char **paddr; diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 154db74dad..8c26092db8 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1237,8 +1237,9 @@ static int __create_synth_event(const char *name, const char *raw_fields) argv + consumed, &consumed, &field_version); if (IS_ERR(field)) { + argv_free(argv); ret = PTR_ERR(field); - goto err_free_arg; + goto err; } /* @@ -1261,19 +1262,18 @@ static int __create_synth_event(const char *name, const char *raw_fields) if (cmd_version > 1 && n_fields_this_loop >= 1) { synth_err(SYNTH_ERR_INVALID_CMD, errpos(field_str)); ret = -EINVAL; - goto err_free_arg; + goto err; } fields[n_fields++] = field; if (n_fields == SYNTH_FIELDS_MAX) { synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); ret = -EINVAL; - goto err_free_arg; + goto err; } n_fields_this_loop++; } - argv_free(argv); if (consumed < argc) { synth_err(SYNTH_ERR_INVALID_CMD, 0); @@ -1281,6 +1281,7 @@ static int __create_synth_event(const char *name, const char *raw_fields) goto err; } + argv_free(argv); } if (n_fields == 0) { @@ -1306,8 +1307,6 @@ static int __create_synth_event(const char *name, const char *raw_fields) kfree(saved_fields); return ret; - err_free_arg: - argv_free(argv); err: for (i = 0; i < n_fields; i++) free_synth_field(fields[i]); @@ -1979,7 +1978,7 @@ EXPORT_SYMBOL_GPL(synth_event_add_next_val); /** * synth_event_add_val - Add a named field's value to an open synth trace * @field_name: The name of the synthetic event field value to set - * @val: The value to set the named field to + * @val: The value to set the next field to * @trace_state: A pointer to object tracking the piecewise trace state * * Set the value of the named field in an event that's been opened by diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 7eb9d04f1c..67c7979c40 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -68,7 +68,7 @@ event_triggers_call(struct trace_event_file *file, if (data->paused) continue; if (!rec) { - data->ops->trigger(data, buffer, rec, event); + data->ops->func(data, buffer, rec, event); continue; } filter = rcu_dereference_sched(data->filter); @@ -78,26 +78,12 @@ event_triggers_call(struct trace_event_file *file, tt |= data->cmd_ops->trigger_type; continue; } - data->ops->trigger(data, buffer, rec, event); + data->ops->func(data, buffer, rec, event); } return tt; } EXPORT_SYMBOL_GPL(event_triggers_call); -bool __trace_trigger_soft_disabled(struct trace_event_file *file) -{ - unsigned long eflags = file->flags; - - if (eflags & EVENT_FILE_FL_TRIGGER_MODE) - event_triggers_call(file, NULL, NULL, NULL); - if (eflags & EVENT_FILE_FL_SOFT_DISABLED) - return true; - if (eflags & EVENT_FILE_FL_PID_FILTER) - return trace_event_ignore_this_pid(file); - return false; -} -EXPORT_SYMBOL_GPL(__trace_trigger_soft_disabled); - /** * event_triggers_post_call - Call 'post_triggers' for a trace event * @file: The trace_event_file associated with the event @@ -120,7 +106,7 @@ event_triggers_post_call(struct trace_event_file *file, if (data->paused) continue; if (data->cmd_ops->trigger_type & tt) - data->ops->trigger(data, NULL, NULL, NULL); + data->ops->func(data, NULL, NULL, NULL); } } EXPORT_SYMBOL_GPL(event_triggers_post_call); @@ -259,7 +245,7 @@ int trigger_process_regex(struct trace_event_file *file, char *buff) mutex_lock(&trigger_cmd_mutex); list_for_each_entry(p, &trigger_commands, list) { if (strcmp(p->name, command) == 0) { - ret = p->parse(p, file, buff, command, next); + ret = p->func(p, file, buff, command, next); goto out_unlock; } } @@ -554,6 +540,7 @@ void update_cond_flag(struct trace_event_file *file) /** * register_trigger - Generic event_command @reg implementation * @glob: The raw string used to register the trigger + * @ops: The trigger ops associated with the trigger * @data: Trigger-specific data to associate with the trigger * @file: The trace_event_file associated with the event * @@ -564,7 +551,7 @@ void update_cond_flag(struct trace_event_file *file) * * Return: 0 on success, errno otherwise */ -static int register_trigger(char *glob, +static int register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) { @@ -602,6 +589,7 @@ static int register_trigger(char *glob, /** * unregister_trigger - Generic event_command @unreg implementation * @glob: The raw string used to register the trigger + * @ops: The trigger ops associated with the trigger * @test: Trigger-specific data used to find the trigger to remove * @file: The trace_event_file associated with the event * @@ -610,7 +598,7 @@ static int register_trigger(char *glob, * Usually used directly as the @unreg method in event command * implementations. */ -static void unregister_trigger(char *glob, +static void unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, struct trace_event_file *file) { @@ -633,350 +621,8 @@ static void unregister_trigger(char *glob, data->ops->free(data->ops, data); } -/* - * Event trigger parsing helper functions. - * - * These functions help make it easier to write an event trigger - * parsing function i.e. the struct event_command.parse() callback - * function responsible for parsing and registering a trigger command - * written to the 'trigger' file. - * - * A trigger command (or just 'trigger' for short) takes the form: - * [trigger] [if filter] - * - * The struct event_command.parse() callback (and other struct - * event_command functions) refer to several components of a trigger - * command. Those same components are referenced by the event trigger - * parsing helper functions defined below. These components are: - * - * cmd - the trigger command name - * glob - the trigger command name optionally prefaced with '!' - * param_and_filter - text following cmd and ':' - * param - text following cmd and ':' and stripped of filter - * filter - the optional filter text following (and including) 'if' - * - * To illustrate the use of these componenents, here are some concrete - * examples. For the following triggers: - * - * echo 'traceon:5 if pid == 0' > trigger - * - 'traceon' is both cmd and glob - * - '5 if pid == 0' is the param_and_filter - * - '5' is the param - * - 'if pid == 0' is the filter - * - * echo 'enable_event:sys:event:n' > trigger - * - 'enable_event' is both cmd and glob - * - 'sys:event:n' is the param_and_filter - * - 'sys:event:n' is the param - * - there is no filter - * - * echo 'hist:keys=pid if prio > 50' > trigger - * - 'hist' is both cmd and glob - * - 'keys=pid if prio > 50' is the param_and_filter - * - 'keys=pid' is the param - * - 'if prio > 50' is the filter - * - * echo '!enable_event:sys:event:n' > trigger - * - 'enable_event' the cmd - * - '!enable_event' is the glob - * - 'sys:event:n' is the param_and_filter - * - 'sys:event:n' is the param - * - there is no filter - * - * echo 'traceoff' > trigger - * - 'traceoff' is both cmd and glob - * - there is no param_and_filter - * - there is no param - * - there is no filter - * - * There are a few different categories of event trigger covered by - * these helpers: - * - * - triggers that don't require a parameter e.g. traceon - * - triggers that do require a parameter e.g. enable_event and hist - * - triggers that though they may not require a param may support an - * optional 'n' param (n = number of times the trigger should fire) - * e.g.: traceon:5 or enable_event:sys:event:n - * - triggers that do not support an 'n' param e.g. hist - * - * These functions can be used or ignored as necessary - it all - * depends on the complexity of the trigger, and the granularity of - * the functions supported reflects the fact that some implementations - * may need to customize certain aspects of their implementations and - * won't need certain functions. For instance, the hist trigger - * implementation doesn't use event_trigger_separate_filter() because - * it has special requirements for handling the filter. - */ - /** - * event_trigger_check_remove - check whether an event trigger specifies remove - * @glob: The trigger command string, with optional remove(!) operator - * - * The event trigger callback implementations pass in 'glob' as a - * parameter. This is the command name either with or without a - * remove(!) operator. This function simply parses the glob and - * determines whether the command corresponds to a trigger removal or - * a trigger addition. - * - * Return: true if this is a remove command, false otherwise - */ -bool event_trigger_check_remove(const char *glob) -{ - return (glob && glob[0] == '!') ? true : false; -} - -/** - * event_trigger_empty_param - check whether the param is empty - * @param: The trigger param string - * - * The event trigger callback implementations pass in 'param' as a - * parameter. This corresponds to the string following the command - * name minus the command name. This function can be called by a - * callback implementation for any command that requires a param; a - * callback that doesn't require a param can ignore it. - * - * Return: true if this is an empty param, false otherwise - */ -bool event_trigger_empty_param(const char *param) -{ - return !param; -} - -/** - * event_trigger_separate_filter - separate an event trigger from a filter - * @param: The param string containing trigger and possibly filter - * @trigger: outparam, will be filled with a pointer to the trigger - * @filter: outparam, will be filled with a pointer to the filter - * @param_required: Specifies whether or not the param string is required - * - * Given a param string of the form '[trigger] [if filter]', this - * function separates the filter from the trigger and returns the - * trigger in *trigger and the filter in *filter. Either the *trigger - * or the *filter may be set to NULL by this function - if not set to - * NULL, they will contain strings corresponding to the trigger and - * filter. - * - * There are two cases that need to be handled with respect to the - * passed-in param: either the param is required, or it is not - * required. If @param_required is set, and there's no param, it will - * return -EINVAL. If @param_required is not set and there's a param - * that starts with a number, that corresponds to the case of a - * trigger with :n (n = number of times the trigger should fire) and - * the parsing continues normally; otherwise the function just returns - * and assumes param just contains a filter and there's nothing else - * to do. - * - * Return: 0 on success, errno otherwise - */ -int event_trigger_separate_filter(char *param_and_filter, char **param, - char **filter, bool param_required) -{ - int ret = 0; - - *param = *filter = NULL; - - if (!param_and_filter) { - if (param_required) - ret = -EINVAL; - goto out; - } - - /* - * Here we check for an optional param. The only legal - * optional param is :n, and if that's the case, continue - * below. Otherwise we assume what's left is a filter and - * return it as the filter string for the caller to deal with. - */ - if (!param_required && param_and_filter && !isdigit(param_and_filter[0])) { - *filter = param_and_filter; - goto out; - } - - /* - * Separate the param from the filter (param [if filter]). - * Here we have either an optional :n param or a required - * param and an optional filter. - */ - *param = strsep(¶m_and_filter, " \t"); - - /* - * Here we have a filter, though it may be empty. - */ - if (param_and_filter) { - *filter = skip_spaces(param_and_filter); - if (!**filter) - *filter = NULL; - } -out: - return ret; -} - -/** - * event_trigger_alloc - allocate and init event_trigger_data for a trigger - * @cmd_ops: The event_command operations for the trigger - * @cmd: The cmd string - * @param: The param string - * @private_data: User data to associate with the event trigger - * - * Allocate an event_trigger_data instance and initialize it. The - * @cmd_ops are used along with the @cmd and @param to get the - * trigger_ops to assign to the event_trigger_data. @private_data can - * also be passed in and associated with the event_trigger_data. - * - * Use event_trigger_free() to free an event_trigger_data object. - * - * Return: The trigger_data object success, NULL otherwise - */ -struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops, - char *cmd, - char *param, - void *private_data) -{ - struct event_trigger_data *trigger_data; - struct event_trigger_ops *trigger_ops; - - trigger_ops = cmd_ops->get_trigger_ops(cmd, param); - - trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); - if (!trigger_data) - return NULL; - - trigger_data->count = -1; - trigger_data->ops = trigger_ops; - trigger_data->cmd_ops = cmd_ops; - trigger_data->private_data = private_data; - - INIT_LIST_HEAD(&trigger_data->list); - INIT_LIST_HEAD(&trigger_data->named_list); - RCU_INIT_POINTER(trigger_data->filter, NULL); - - return trigger_data; -} - -/** - * event_trigger_parse_num - parse and return the number param for a trigger - * @param: The param string - * @trigger_data: The trigger_data for the trigger - * - * Parse the :n (n = number of times the trigger should fire) param - * and set the count variable in the trigger_data to the parsed count. - * - * Return: 0 on success, errno otherwise - */ -int event_trigger_parse_num(char *param, - struct event_trigger_data *trigger_data) -{ - char *number; - int ret = 0; - - if (param) { - number = strsep(¶m, ":"); - - if (!strlen(number)) - return -EINVAL; - - /* - * We use the callback data field (which is a pointer) - * as our counter. - */ - ret = kstrtoul(number, 0, &trigger_data->count); - } - - return ret; -} - -/** - * event_trigger_set_filter - set an event trigger's filter - * @cmd_ops: The event_command operations for the trigger - * @file: The event file for the trigger's event - * @param: The string containing the filter - * @trigger_data: The trigger_data for the trigger - * - * Set the filter for the trigger. If the filter is NULL, just return - * without error. - * - * Return: 0 on success, errno otherwise - */ -int event_trigger_set_filter(struct event_command *cmd_ops, - struct trace_event_file *file, - char *param, - struct event_trigger_data *trigger_data) -{ - if (param && cmd_ops->set_filter) - return cmd_ops->set_filter(param, trigger_data, file); - - return 0; -} - -/** - * event_trigger_reset_filter - reset an event trigger's filter - * @cmd_ops: The event_command operations for the trigger - * @trigger_data: The trigger_data for the trigger - * - * Reset the filter for the trigger to no filter. - */ -void event_trigger_reset_filter(struct event_command *cmd_ops, - struct event_trigger_data *trigger_data) -{ - if (cmd_ops->set_filter) - cmd_ops->set_filter(NULL, trigger_data, NULL); -} - -/** - * event_trigger_register - register an event trigger - * @cmd_ops: The event_command operations for the trigger - * @file: The event file for the trigger's event - * @glob: The trigger command string, with optional remove(!) operator - * @cmd: The cmd string - * @param: The param string - * @trigger_data: The trigger_data for the trigger - * @n_registered: optional outparam, the number of triggers registered - * - * Register an event trigger. The @cmd_ops are used to call the - * cmd_ops->reg() function which actually does the registration. The - * cmd_ops->reg() function returns the number of triggers registered, - * which is assigned to n_registered, if n_registered is non-NULL. - * - * Return: 0 on success, errno otherwise - */ -int event_trigger_register(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, - char *cmd, - char *param, - struct event_trigger_data *trigger_data, - int *n_registered) -{ - int ret; - - if (n_registered) - *n_registered = 0; - - ret = cmd_ops->reg(glob, trigger_data, file); - /* - * The above returns on success the # of functions enabled, - * but if it didn't find any functions it returns zero. - * Consider no functions a failure too. - */ - if (!ret) { - cmd_ops->unreg(glob, trigger_data, file); - ret = -ENOENT; - } else if (ret > 0) { - if (n_registered) - *n_registered = ret; - /* Just return zero, not the number of enabled functions */ - ret = 0; - } - - return ret; -} - -/* - * End event trigger parsing helper functions. - */ - -/** - * event_trigger_parse - Generic event_command @parse implementation + * event_trigger_callback - Generic event_command @func implementation * @cmd_ops: The command ops, used for trigger registration * @file: The trace_event_file associated with the event * @glob: The raw string used to register the trigger @@ -986,15 +632,15 @@ int event_trigger_register(struct event_command *cmd_ops, * Common implementation for event command parsing and trigger * instantiation. * - * Usually used directly as the @parse method in event command + * Usually used directly as the @func method in event command * implementations. * * Return: 0 on success, errno otherwise */ static int -event_trigger_parse(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +event_trigger_callback(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { struct event_trigger_data *trigger_data; struct event_trigger_ops *trigger_ops; @@ -1027,7 +673,7 @@ event_trigger_parse(struct event_command *cmd_ops, INIT_LIST_HEAD(&trigger_data->named_list); if (glob[0] == '!') { - cmd_ops->unreg(glob+1, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); kfree(trigger_data); ret = 0; goto out; @@ -1062,14 +708,14 @@ event_trigger_parse(struct event_command *cmd_ops, out_reg: /* Up the trigger_data count to make sure reg doesn't free it on failure */ event_trigger_init(trigger_ops, trigger_data); - ret = cmd_ops->reg(glob, trigger_data, file); + ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); /* * The above returns on success the # of functions enabled, * but if it didn't find any functions it returns zero. * Consider no functions a failure too. */ if (!ret) { - cmd_ops->unreg(glob, trigger_data, file); + cmd_ops->unreg(glob, trigger_ops, trigger_data, file); ret = -ENOENT; } else if (ret > 0) ret = 0; @@ -1417,28 +1063,28 @@ traceoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, } static struct event_trigger_ops traceon_trigger_ops = { - .trigger = traceon_trigger, + .func = traceon_trigger, .print = traceon_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops traceon_count_trigger_ops = { - .trigger = traceon_count_trigger, + .func = traceon_count_trigger, .print = traceon_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops traceoff_trigger_ops = { - .trigger = traceoff_trigger, + .func = traceoff_trigger, .print = traceoff_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops traceoff_count_trigger_ops = { - .trigger = traceoff_count_trigger, + .func = traceoff_count_trigger, .print = traceoff_trigger_print, .init = event_trigger_init, .free = event_trigger_free, @@ -1463,7 +1109,7 @@ onoff_get_trigger_ops(char *cmd, char *param) static struct event_command trigger_traceon_cmd = { .name = "traceon", .trigger_type = ETT_TRACE_ONOFF, - .parse = event_trigger_parse, + .func = event_trigger_callback, .reg = register_trigger, .unreg = unregister_trigger, .get_trigger_ops = onoff_get_trigger_ops, @@ -1474,7 +1120,7 @@ static struct event_command trigger_traceoff_cmd = { .name = "traceoff", .trigger_type = ETT_TRACE_ONOFF, .flags = EVENT_CMD_FL_POST_TRIGGER, - .parse = event_trigger_parse, + .func = event_trigger_callback, .reg = register_trigger, .unreg = unregister_trigger, .get_trigger_ops = onoff_get_trigger_ops, @@ -1510,14 +1156,14 @@ snapshot_count_trigger(struct event_trigger_data *data, } static int -register_snapshot_trigger(char *glob, +register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) { if (tracing_alloc_snapshot_instance(file->tr) != 0) return 0; - return register_trigger(glob, data, file); + return register_trigger(glob, ops, data, file); } static int @@ -1529,14 +1175,14 @@ snapshot_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, } static struct event_trigger_ops snapshot_trigger_ops = { - .trigger = snapshot_trigger, + .func = snapshot_trigger, .print = snapshot_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops snapshot_count_trigger_ops = { - .trigger = snapshot_count_trigger, + .func = snapshot_count_trigger, .print = snapshot_trigger_print, .init = event_trigger_init, .free = event_trigger_free, @@ -1551,7 +1197,7 @@ snapshot_get_trigger_ops(char *cmd, char *param) static struct event_command trigger_snapshot_cmd = { .name = "snapshot", .trigger_type = ETT_SNAPSHOT, - .parse = event_trigger_parse, + .func = event_trigger_callback, .reg = register_snapshot_trigger, .unreg = unregister_trigger, .get_trigger_ops = snapshot_get_trigger_ops, @@ -1625,14 +1271,14 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, } static struct event_trigger_ops stacktrace_trigger_ops = { - .trigger = stacktrace_trigger, + .func = stacktrace_trigger, .print = stacktrace_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops stacktrace_count_trigger_ops = { - .trigger = stacktrace_count_trigger, + .func = stacktrace_count_trigger, .print = stacktrace_trigger_print, .init = event_trigger_init, .free = event_trigger_free, @@ -1648,7 +1294,7 @@ static struct event_command trigger_stacktrace_cmd = { .name = "stacktrace", .trigger_type = ETT_STACKTRACE, .flags = EVENT_CMD_FL_POST_TRIGGER, - .parse = event_trigger_parse, + .func = event_trigger_callback, .reg = register_trigger, .unreg = unregister_trigger, .get_trigger_ops = stacktrace_get_trigger_ops, @@ -1752,36 +1398,36 @@ void event_enable_trigger_free(struct event_trigger_ops *ops, } static struct event_trigger_ops event_enable_trigger_ops = { - .trigger = event_enable_trigger, + .func = event_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops event_enable_count_trigger_ops = { - .trigger = event_enable_count_trigger, + .func = event_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops event_disable_trigger_ops = { - .trigger = event_enable_trigger, + .func = event_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops event_disable_count_trigger_ops = { - .trigger = event_enable_count_trigger, + .func = event_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -int event_enable_trigger_parse(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +int event_enable_trigger_func(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { struct trace_event_file *event_enable_file; struct enable_trigger_data *enable_data; @@ -1854,7 +1500,7 @@ int event_enable_trigger_parse(struct event_command *cmd_ops, trigger_data->private_data = enable_data; if (glob[0] == '!') { - cmd_ops->unreg(glob+1, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); kfree(trigger_data); kfree(enable_data); ret = 0; @@ -1901,7 +1547,7 @@ int event_enable_trigger_parse(struct event_command *cmd_ops, ret = trace_event_enable_disable(event_enable_file, 1, 1); if (ret < 0) goto out_put; - ret = cmd_ops->reg(glob, trigger_data, file); + ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); /* * The above returns on success the # of functions enabled, * but if it didn't find any functions it returns zero. @@ -1931,6 +1577,7 @@ int event_enable_trigger_parse(struct event_command *cmd_ops, } int event_enable_register_trigger(char *glob, + struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) { @@ -1972,6 +1619,7 @@ int event_enable_register_trigger(char *glob, } void event_enable_unregister_trigger(char *glob, + struct event_trigger_ops *ops, struct event_trigger_data *test, struct trace_event_file *file) { @@ -2025,7 +1673,7 @@ event_enable_get_trigger_ops(char *cmd, char *param) static struct event_command trigger_enable_cmd = { .name = ENABLE_EVENT_STR, .trigger_type = ETT_EVENT_ENABLE, - .parse = event_enable_trigger_parse, + .func = event_enable_trigger_func, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .get_trigger_ops = event_enable_get_trigger_ops, @@ -2035,7 +1683,7 @@ static struct event_command trigger_enable_cmd = { static struct event_command trigger_disable_cmd = { .name = DISABLE_EVENT_STR, .trigger_type = ETT_EVENT_ENABLE, - .parse = event_enable_trigger_parse, + .func = event_enable_trigger_func, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .get_trigger_ops = event_enable_get_trigger_ops, diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 9f1bfbe105..1f0e63f5d1 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -186,6 +186,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, return; trace_ctx = tracing_gen_ctx(); + preempt_disable_notrace(); cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); @@ -193,6 +194,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, trace_function(tr, ip, parent_ip, trace_ctx); ftrace_test_recursion_unlock(bit); + preempt_enable_notrace(); } #ifdef CONFIG_UNWINDER_ORC @@ -296,6 +298,8 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; + preempt_disable_notrace(); + cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); if (atomic_read(&data->disabled)) @@ -320,6 +324,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, out: ftrace_test_recursion_unlock(bit); + preempt_enable_notrace(); } static void diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 203204cadf..6b5ff3ba42 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -120,7 +120,7 @@ static inline int ftrace_graph_ignore_irqs(void) if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT)) return 0; - return in_hardirq(); + return in_irq(); } int trace_graph_entry(struct ftrace_graph_ent *trace) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index d440ddd5fd..d0a730d99a 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -79,8 +79,8 @@ struct hwlat_kthread_data { int nmi_cpu; }; -static struct hwlat_kthread_data hwlat_single_cpu_data; -static DEFINE_PER_CPU(struct hwlat_kthread_data, hwlat_per_cpu_data); +struct hwlat_kthread_data hwlat_single_cpu_data; +DEFINE_PER_CPU(struct hwlat_kthread_data, hwlat_per_cpu_data); /* Tells NMIs to call back to the hwlat tracer to record timestamps */ bool trace_hwlat_callback_enabled; @@ -491,14 +491,18 @@ static void stop_per_cpu_kthreads(void) static int start_cpu_kthread(unsigned int cpu) { struct task_struct *kthread; + char comm[24]; - kthread = kthread_run_on_cpu(kthread_fn, NULL, cpu, "hwlatd/%u"); + snprintf(comm, 24, "hwlatd/%d", cpu); + + kthread = kthread_create_on_cpu(kthread_fn, NULL, cpu, comm); if (IS_ERR(kthread)) { pr_err(BANNER "could not start sampling thread\n"); return -ENOMEM; } per_cpu(hwlat_per_cpu_data, cpu).kthread = kthread; + wake_up_process(kthread); return 0; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 508f14af4f..39ee607255 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -7,7 +7,6 @@ */ #define pr_fmt(fmt) "trace_kprobe: " fmt -#include #include #include #include @@ -32,7 +31,7 @@ static int __init set_kprobe_boot_events(char *str) strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE); disable_tracing_selftest("running kprobe events"); - return 0; + return 1; } __setup("kprobe_event=", set_kprobe_boot_events); @@ -98,7 +97,7 @@ static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk) { - return kprobe_gone(&tk->rp.kp); + return !!(kprobe_gone(&tk->rp.kp)); } static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk, @@ -328,9 +327,11 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk) static void __disable_trace_kprobe(struct trace_probe *tp) { + struct trace_probe *pos; struct trace_kprobe *tk; - list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) { + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tk = container_of(pos, struct trace_kprobe, tp); if (!trace_kprobe_is_registered(tk)) continue; if (trace_kprobe_is_return(tk)) @@ -347,7 +348,7 @@ static void __disable_trace_kprobe(struct trace_probe *tp) static int enable_trace_kprobe(struct trace_event_call *call, struct trace_event_file *file) { - struct trace_probe *tp; + struct trace_probe *pos, *tp; struct trace_kprobe *tk; bool enabled; int ret = 0; @@ -368,7 +369,8 @@ static int enable_trace_kprobe(struct trace_event_call *call, if (enabled) return 0; - list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) { + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tk = container_of(pos, struct trace_kprobe, tp); if (trace_kprobe_has_gone(tk)) continue; ret = __enable_trace_kprobe(tk); @@ -557,9 +559,11 @@ static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig, struct trace_kprobe *comp) { struct trace_probe_event *tpe = orig->tp.event; + struct trace_probe *pos; int i; - list_for_each_entry(orig, &tpe->probes, tp.list) { + list_for_each_entry(pos, &tpe->probes, list) { + orig = container_of(pos, struct trace_kprobe, tp); if (strcmp(trace_kprobe_symbol(orig), trace_kprobe_symbol(comp)) || trace_kprobe_offset(orig) != trace_kprobe_offset(comp)) @@ -1382,11 +1386,17 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, if (trace_trigger_soft_disabled(trace_file)) return; + fbuffer.trace_ctx = tracing_gen_ctx(); + fbuffer.trace_file = trace_file; + dsize = __get_data_size(&tk->tp, regs); - entry = trace_event_buffer_reserve(&fbuffer, trace_file, - sizeof(*entry) + tk->tp.size + dsize); - if (!entry) + fbuffer.event = + trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, + call->event.type, + sizeof(*entry) + tk->tp.size + dsize, + fbuffer.trace_ctx); + if (!fbuffer.event) return; fbuffer.regs = regs; @@ -1423,11 +1433,16 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, if (trace_trigger_soft_disabled(trace_file)) return; - dsize = __get_data_size(&tk->tp, regs); + fbuffer.trace_ctx = tracing_gen_ctx(); + fbuffer.trace_file = trace_file; - entry = trace_event_buffer_reserve(&fbuffer, trace_file, - sizeof(*entry) + tk->tp.size + dsize); - if (!entry) + dsize = __get_data_size(&tk->tp, regs); + fbuffer.event = + trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, + call->event.type, + sizeof(*entry) + tk->tp.size + dsize, + fbuffer.trace_ctx); + if (!fbuffer.event) return; fbuffer.regs = regs; diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index cfddb30e65..65a5186499 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -38,6 +38,8 @@ #define CREATE_TRACE_POINTS #include +static struct trace_array *osnoise_trace; + /* * Default values. */ @@ -48,99 +50,6 @@ #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ -/* - * trace_array of the enabled osnoise/timerlat instances. - */ -struct osnoise_instance { - struct list_head list; - struct trace_array *tr; -}; - -static struct list_head osnoise_instances; - -static bool osnoise_has_registered_instances(void) -{ - return !!list_first_or_null_rcu(&osnoise_instances, - struct osnoise_instance, - list); -} - -/* - * osnoise_instance_registered - check if a tr is already registered - */ -static int osnoise_instance_registered(struct trace_array *tr) -{ - struct osnoise_instance *inst; - int found = 0; - - rcu_read_lock(); - list_for_each_entry_rcu(inst, &osnoise_instances, list) { - if (inst->tr == tr) - found = 1; - } - rcu_read_unlock(); - - return found; -} - -/* - * osnoise_register_instance - register a new trace instance - * - * Register a trace_array *tr in the list of instances running - * osnoise/timerlat tracers. - */ -static int osnoise_register_instance(struct trace_array *tr) -{ - struct osnoise_instance *inst; - - /* - * register/unregister serialization is provided by trace's - * trace_types_lock. - */ - lockdep_assert_held(&trace_types_lock); - - inst = kmalloc(sizeof(*inst), GFP_KERNEL); - if (!inst) - return -ENOMEM; - - INIT_LIST_HEAD_RCU(&inst->list); - inst->tr = tr; - list_add_tail_rcu(&inst->list, &osnoise_instances); - - return 0; -} - -/* - * osnoise_unregister_instance - unregister a registered trace instance - * - * Remove the trace_array *tr from the list of instances running - * osnoise/timerlat tracers. - */ -static void osnoise_unregister_instance(struct trace_array *tr) -{ - struct osnoise_instance *inst; - int found = 0; - - /* - * register/unregister serialization is provided by trace's - * trace_types_lock. - */ - lockdep_assert_held(&trace_types_lock); - - list_for_each_entry_rcu(inst, &osnoise_instances, list) { - if (inst->tr == tr) { - list_del_rcu(&inst->list); - found = 1; - break; - } - } - - if (!found) - return; - - kvfree_rcu(inst); -} - /* * NMI runtime info. */ @@ -339,56 +248,10 @@ static struct osnoise_data { #endif }; -#ifdef CONFIG_TIMERLAT_TRACER -static inline bool timerlat_enabled(void) -{ - return osnoise_data.timerlat_tracer; -} - -static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) -{ - struct timerlat_variables *tlat_var = this_cpu_tmr_var(); - /* - * If the timerlat is enabled, but the irq handler did - * not run yet enabling timerlat_tracer, do not trace. - */ - if (!tlat_var->tracing_thread) { - osn_var->softirq.arrival_time = 0; - osn_var->softirq.delta_start = 0; - return 0; - } - return 1; -} - -static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) -{ - struct timerlat_variables *tlat_var = this_cpu_tmr_var(); - /* - * If the timerlat is enabled, but the irq handler did - * not run yet enabling timerlat_tracer, do not trace. - */ - if (!tlat_var->tracing_thread) { - osn_var->thread.delta_start = 0; - osn_var->thread.arrival_time = 0; - return 0; - } - return 1; -} -#else /* CONFIG_TIMERLAT_TRACER */ -static inline bool timerlat_enabled(void) -{ - return false; -} - -static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) -{ - return 1; -} -static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) -{ - return 1; -} -#endif +/* + * Boolean variable used to inform that the tracer is currently sampling. + */ +static bool osnoise_busy; #ifdef CONFIG_PREEMPT_RT /* @@ -431,19 +294,19 @@ static void print_osnoise_headers(struct seq_file *s) seq_puts(s, "# _-----=> irqs-off\n"); seq_puts(s, "# / _----=> need-resched\n"); seq_puts(s, "# | / _---=> hardirq/softirq\n"); - seq_puts(s, "# || / _--=> preempt-depth\n"); - seq_puts(s, "# ||| / _-=> migrate-disable "); - seq_puts(s, " MAX\n"); - seq_puts(s, "# |||| / delay "); + seq_puts(s, "# || / _--=> preempt-depth "); + seq_puts(s, " MAX\n"); + + seq_puts(s, "# || / "); seq_puts(s, " SINGLE Interference counters:\n"); - seq_puts(s, "# ||||| RUNTIME "); + seq_puts(s, "# |||| RUNTIME "); seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); - seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); + seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP IN US "); seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); - seq_puts(s, "# | | | ||||| | | "); + seq_puts(s, "# | | | |||| | | "); seq_puts(s, " | | | | | | | |\n"); } #endif /* CONFIG_PREEMPT_RT */ @@ -452,24 +315,19 @@ static void print_osnoise_headers(struct seq_file *s) * osnoise_taint - report an osnoise error. */ #define osnoise_taint(msg) ({ \ - struct osnoise_instance *inst; \ - struct trace_buffer *buffer; \ + struct trace_array *tr = osnoise_trace; \ \ - rcu_read_lock(); \ - list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ - buffer = inst->tr->array_buffer.buffer; \ - trace_array_printk_buf(buffer, _THIS_IP_, msg); \ - } \ - rcu_read_unlock(); \ + trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, msg); \ osnoise_data.tainted = true; \ }) /* * Record an osnoise_sample into the tracer buffer. */ -static void -__trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) +static void trace_osnoise_sample(struct osnoise_sample *sample) { + struct trace_array *tr = osnoise_trace; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct trace_event_call *call = &event_osnoise; struct ring_buffer_event *event; struct osnoise_entry *entry; @@ -492,22 +350,6 @@ __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffe trace_buffer_unlock_commit_nostack(buffer, event); } -/* - * Record an osnoise_sample on all osnoise instances. - */ -static void trace_osnoise_sample(struct osnoise_sample *sample) -{ - struct osnoise_instance *inst; - struct trace_buffer *buffer; - - rcu_read_lock(); - list_for_each_entry_rcu(inst, &osnoise_instances, list) { - buffer = inst->tr->array_buffer.buffer; - __trace_osnoise_sample(sample, buffer); - } - rcu_read_unlock(); -} - #ifdef CONFIG_TIMERLAT_TRACER /* * Print the timerlat header info. @@ -536,20 +378,23 @@ static void print_timerlat_headers(struct seq_file *s) seq_puts(s, "# / _----=> need-resched\n"); seq_puts(s, "# | / _---=> hardirq/softirq\n"); seq_puts(s, "# || / _--=> preempt-depth\n"); - seq_puts(s, "# ||| / _-=> migrate-disable\n"); - seq_puts(s, "# |||| / delay\n"); - seq_puts(s, "# ||||| ACTIVATION\n"); - seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); - seq_puts(s, " CONTEXT LATENCY\n"); - seq_puts(s, "# | | | ||||| | | "); + seq_puts(s, "# || /\n"); + seq_puts(s, "# |||| ACTIVATION\n"); + seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP ID "); + seq_puts(s, " CONTEXT LATENCY\n"); + seq_puts(s, "# | | | |||| | | "); seq_puts(s, " | |\n"); } #endif /* CONFIG_PREEMPT_RT */ -static void -__trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) +/* + * Record an timerlat_sample into the tracer buffer. + */ +static void trace_timerlat_sample(struct timerlat_sample *sample) { + struct trace_array *tr = osnoise_trace; struct trace_event_call *call = &event_osnoise; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct timerlat_entry *entry; @@ -566,22 +411,6 @@ __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buf trace_buffer_unlock_commit_nostack(buffer, event); } -/* - * Record an timerlat_sample into the tracer buffer. - */ -static void trace_timerlat_sample(struct timerlat_sample *sample) -{ - struct osnoise_instance *inst; - struct trace_buffer *buffer; - - rcu_read_lock(); - list_for_each_entry_rcu(inst, &osnoise_instances, list) { - buffer = inst->tr->array_buffer.buffer; - __trace_timerlat_sample(sample, buffer); - } - rcu_read_unlock(); -} - #ifdef CONFIG_STACKTRACE #define MAX_CALLS 256 @@ -621,18 +450,29 @@ static void timerlat_save_stack(int skip) return; } - -static void -__timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) +/* + * timerlat_dump_stack - dump a stack trace previously saved + * + * Dump a saved stack trace into the trace buffer. + */ +static void timerlat_dump_stack(void) { struct trace_event_call *call = &event_osnoise; + struct trace_array *tr = osnoise_trace; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; + struct trace_stack *fstack; struct stack_entry *entry; + unsigned int size; + + preempt_disable_notrace(); + fstack = this_cpu_ptr(&trace_stack); + size = fstack->stack_size; event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, tracing_gen_ctx()); if (!event) - return; + goto out; entry = ring_buffer_event_data(event); @@ -641,39 +481,12 @@ __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, u if (!call_filter_check_discard(call, entry, buffer, event)) trace_buffer_unlock_commit_nostack(buffer, event); -} -/* - * timerlat_dump_stack - dump a stack trace previously saved - */ -static void timerlat_dump_stack(u64 latency) -{ - struct osnoise_instance *inst; - struct trace_buffer *buffer; - struct trace_stack *fstack; - unsigned int size; - - /* - * trace only if latency > print_stack config, if enabled. - */ - if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) - return; - - preempt_disable_notrace(); - fstack = this_cpu_ptr(&trace_stack); - size = fstack->stack_size; - - rcu_read_lock(); - list_for_each_entry_rcu(inst, &osnoise_instances, list) { - buffer = inst->tr->array_buffer.buffer; - __timerlat_dump_stack(buffer, fstack, size); - - } - rcu_read_unlock(); +out: preempt_enable_notrace(); } -#else /* CONFIG_STACKTRACE */ -#define timerlat_dump_stack(u64 latency) do {} while (0) +#else +#define timerlat_dump_stack() do {} while (0) #define timerlat_save_stack(a) do {} while (0) #endif /* CONFIG_STACKTRACE */ #endif /* CONFIG_TIMERLAT_TRACER */ @@ -1053,9 +866,21 @@ static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) if (!osn_var->sampling) return; - if (unlikely(timerlat_enabled())) - if (!timerlat_softirq_exit(osn_var)) +#ifdef CONFIG_TIMERLAT_TRACER + /* + * If the timerlat is enabled, but the irq handler did + * not run yet enabling timerlat_tracer, do not trace. + */ + if (unlikely(osnoise_data.timerlat_tracer)) { + struct timerlat_variables *tlat_var; + tlat_var = this_cpu_tmr_var(); + if (!tlat_var->tracing_thread) { + osn_var->softirq.arrival_time = 0; + osn_var->softirq.delta_start = 0; return; + } + } +#endif duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); @@ -1149,9 +974,17 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) if (!osn_var->sampling) return; - if (unlikely(timerlat_enabled())) - if (!timerlat_thread_exit(osn_var)) +#ifdef CONFIG_TIMERLAT_TRACER + if (osnoise_data.timerlat_tracer) { + struct timerlat_variables *tlat_var; + tlat_var = this_cpu_tmr_var(); + if (!tlat_var->tracing_thread) { + osn_var->thread.delta_start = 0; + osn_var->thread.arrival_time = 0; return; + } + } +#endif duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); @@ -1244,37 +1077,12 @@ diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample * */ static __always_inline void osnoise_stop_tracing(void) { - struct osnoise_instance *inst; - struct trace_array *tr; + struct trace_array *tr = osnoise_trace; - rcu_read_lock(); - list_for_each_entry_rcu(inst, &osnoise_instances, list) { - tr = inst->tr; - trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, - "stop tracing hit on cpu %d\n", smp_processor_id()); + trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, + "stop tracing hit on cpu %d\n", smp_processor_id()); - tracer_tracing_off(tr); - } - rcu_read_unlock(); -} - -/* - * notify_new_max_latency - Notify a new max latency via fsnotify interface. - */ -static void notify_new_max_latency(u64 latency) -{ - struct osnoise_instance *inst; - struct trace_array *tr; - - rcu_read_lock(); - list_for_each_entry_rcu(inst, &osnoise_instances, list) { - tr = inst->tr; - if (tr->max_latency < latency) { - tr->max_latency = latency; - latency_fsnotify(tr); - } - } - rcu_read_unlock(); + tracer_tracing_off(tr); } /* @@ -1288,6 +1096,7 @@ static void notify_new_max_latency(u64 latency) static int run_osnoise(void) { struct osnoise_variables *osn_var = this_cpu_osn_var(); + struct trace_array *tr = osnoise_trace; u64 start, sample, last_sample; u64 last_int_count, int_count; s64 noise = 0, max_noise = 0; @@ -1422,7 +1231,11 @@ static int run_osnoise(void) trace_osnoise_sample(&s); - notify_new_max_latency(max_noise); + /* Keep a running maximum ever recorded osnoise "latency" */ + if (max_noise > tr->max_latency) { + tr->max_latency = max_noise; + latency_fsnotify(tr); + } if (osnoise_data.stop_tracing_total) if (s.noise > osnoise_data.stop_tracing_total) @@ -1436,37 +1249,6 @@ static int run_osnoise(void) static struct cpumask osnoise_cpumask; static struct cpumask save_cpumask; -/* - * osnoise_sleep - sleep until the next period - */ -static void osnoise_sleep(void) -{ - u64 interval; - ktime_t wake_time; - - mutex_lock(&interface_lock); - interval = osnoise_data.sample_period - osnoise_data.sample_runtime; - mutex_unlock(&interface_lock); - - /* - * differently from hwlat_detector, the osnoise tracer can run - * without a pause because preemption is on. - */ - if (!interval) { - /* Let synchronize_rcu_tasks() make progress */ - cond_resched_tasks_rcu_qs(); - return; - } - - wake_time = ktime_add_us(ktime_get(), interval); - __set_current_state(TASK_INTERRUPTIBLE); - - while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) { - if (kthread_should_stop()) - break; - } -} - /* * osnoise_main - The osnoise detection kernel thread * @@ -1475,10 +1257,30 @@ static void osnoise_sleep(void) */ static int osnoise_main(void *data) { + u64 interval; while (!kthread_should_stop()) { + run_osnoise(); - osnoise_sleep(); + + mutex_lock(&interface_lock); + interval = osnoise_data.sample_period - osnoise_data.sample_runtime; + mutex_unlock(&interface_lock); + + do_div(interval, USEC_PER_MSEC); + + /* + * differently from hwlat_detector, the osnoise tracer can run + * without a pause because preemption is on. + */ + if (interval < 1) { + /* Let synchronize_rcu_tasks() make progress */ + cond_resched_tasks_rcu_qs(); + continue; + } + + if (msleep_interruptible(interval)) + break; } return 0; @@ -1491,6 +1293,7 @@ static int osnoise_main(void *data) static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) { struct osnoise_variables *osn_var = this_cpu_osn_var(); + struct trace_array *tr = osnoise_trace; struct timerlat_variables *tlat; struct timerlat_sample s; u64 now; @@ -1529,11 +1332,9 @@ static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) * running, the thread needs to receive the softirq delta_start. The * reason being is that the softirq will be the last to be unfolded, * resseting the thread delay to zero. - * - * The PREEMPT_RT is a special case, though. As softirqs run as threads - * on RT, moving the thread is enough. */ - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { +#ifndef CONFIG_PREEMPT_RT + if (osn_var->softirq.delta_start) { copy_int_safe_time(osn_var, &osn_var->thread.delta_start, &osn_var->softirq.delta_start); @@ -1543,6 +1344,13 @@ static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) copy_int_safe_time(osn_var, &osn_var->thread.delta_start, &osn_var->irq.delta_start); } +#else /* CONFIG_PREEMPT_RT */ + /* + * The sofirqs run as threads on RT, so there is not need + * to keep track of it. + */ + copy_int_safe_time(osn_var, &osn_var->thread.delta_start, &osn_var->irq.delta_start); +#endif /* CONFIG_PREEMPT_RT */ /* * Compute the current time with the expected time. @@ -1556,7 +1364,11 @@ static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) trace_timerlat_sample(&s); - notify_new_max_latency(diff); + /* Keep a running maximum ever recorded os noise "latency" */ + if (diff > tr->max_latency) { + tr->max_latency = diff; + latency_fsnotify(tr); + } if (osnoise_data.stop_tracing) if (time_to_us(diff) >= osnoise_data.stop_tracing) @@ -1644,7 +1456,11 @@ static int timerlat_main(void *data) trace_timerlat_sample(&s); - timerlat_dump_stack(time_to_us(diff)); +#ifdef CONFIG_STACKTRACE + if (osnoise_data.print_stack) + if (osnoise_data.print_stack <= time_to_us(diff)) + timerlat_dump_stack(); +#endif /* CONFIG_STACKTRACE */ tlat->tracing_thread = false; if (osnoise_data.stop_tracing_total) @@ -1657,11 +1473,6 @@ static int timerlat_main(void *data) hrtimer_cancel(&tlat->timer); return 0; } -#else /* CONFIG_TIMERLAT_TRACER */ -static int timerlat_main(void *data) -{ - return 0; -} #endif /* CONFIG_TIMERLAT_TRACER */ /* @@ -1704,14 +1515,17 @@ static int start_kthread(unsigned int cpu) void *main = osnoise_main; char comm[24]; - if (timerlat_enabled()) { +#ifdef CONFIG_TIMERLAT_TRACER + if (osnoise_data.timerlat_tracer) { snprintf(comm, 24, "timerlat/%d", cpu); main = timerlat_main; } else { snprintf(comm, 24, "osnoise/%d", cpu); } - - kthread = kthread_run_on_cpu(main, NULL, cpu, comm); +#else + snprintf(comm, 24, "osnoise/%d", cpu); +#endif + kthread = kthread_create_on_cpu(main, NULL, cpu, comm); if (IS_ERR(kthread)) { pr_err(BANNER "could not start sampling thread\n"); @@ -1720,6 +1534,7 @@ static int start_kthread(unsigned int cpu) } per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; + wake_up_process(kthread); return 0; } @@ -1730,7 +1545,7 @@ static int start_kthread(unsigned int cpu) * This starts the kernel thread that will look for osnoise on many * cpus. */ -static int start_per_cpu_kthreads(void) +static int start_per_cpu_kthreads(struct trace_array *tr) { struct cpumask *current_mask = &save_cpumask; int retval = 0; @@ -1738,9 +1553,13 @@ static int start_per_cpu_kthreads(void) cpus_read_lock(); /* - * Run only on online CPUs in which osnoise is allowed to run. + * Run only on CPUs in which trace and osnoise are allowed to run. */ - cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); + cpumask_and(current_mask, tr->tracing_cpumask, &osnoise_cpumask); + /* + * And the CPU is online. + */ + cpumask_and(current_mask, cpu_online_mask, current_mask); for_each_possible_cpu(cpu) per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; @@ -1761,11 +1580,13 @@ static int start_per_cpu_kthreads(void) #ifdef CONFIG_HOTPLUG_CPU static void osnoise_hotplug_workfn(struct work_struct *dummy) { + struct trace_array *tr = osnoise_trace; unsigned int cpu = smp_processor_id(); + mutex_lock(&trace_types_lock); - if (!osnoise_has_registered_instances()) + if (!osnoise_busy) goto out_unlock_trace; mutex_lock(&interface_lock); @@ -1774,6 +1595,9 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy) if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) goto out_unlock; + if (!cpumask_test_cpu(cpu, tr->tracing_cpumask)) + goto out_unlock; + start_kthread(cpu); out_unlock: @@ -1862,6 +1686,9 @@ osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, return count; } +static void osnoise_tracer_start(struct trace_array *tr); +static void osnoise_tracer_stop(struct trace_array *tr); + /* * osnoise_cpus_write - Write function for "cpus" entry * @filp: The active open file structure @@ -1873,15 +1700,19 @@ osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, * interface to the osnoise trace. By default, it lists all CPUs, * in this way, allowing osnoise threads to run on any online CPU * of the system. It serves to restrict the execution of osnoise to the - * set of CPUs writing via this interface. Why not use "tracing_cpumask"? - * Because the user might be interested in tracing what is running on - * other CPUs. For instance, one might run osnoise in one HT CPU - * while observing what is running on the sibling HT CPU. + * set of CPUs writing via this interface. Note that osnoise also + * respects the "tracing_cpumask." Hence, osnoise threads will run only + * on the set of CPUs allowed here AND on "tracing_cpumask." Why not + * have just "tracing_cpumask?" Because the user might be interested + * in tracing what is running on other CPUs. For instance, one might + * run osnoise in one HT CPU while observing what is running on the + * sibling HT CPU. */ static ssize_t osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *ppos) { + struct trace_array *tr = osnoise_trace; cpumask_var_t osnoise_cpumask_new; int running, err; char buf[256]; @@ -1900,12 +1731,13 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, goto err_free; /* - * trace_types_lock is taken to avoid concurrency on start/stop. + * trace_types_lock is taken to avoid concurrency on start/stop + * and osnoise_busy. */ mutex_lock(&trace_types_lock); - running = osnoise_has_registered_instances(); + running = osnoise_busy; if (running) - stop_per_cpu_kthreads(); + osnoise_tracer_stop(tr); mutex_lock(&interface_lock); /* @@ -1919,7 +1751,7 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, mutex_unlock(&interface_lock); if (running) - start_per_cpu_kthreads(); + osnoise_tracer_start(tr); mutex_unlock(&trace_types_lock); free_cpumask_var(osnoise_cpumask_new); @@ -2003,47 +1835,6 @@ static const struct file_operations cpus_fops = { .llseek = generic_file_llseek, }; -#ifdef CONFIG_TIMERLAT_TRACER -#ifdef CONFIG_STACKTRACE -static int init_timerlat_stack_tracefs(struct dentry *top_dir) -{ - struct dentry *tmp; - - tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, - &osnoise_print_stack, &trace_min_max_fops); - if (!tmp) - return -ENOMEM; - - return 0; -} -#else /* CONFIG_STACKTRACE */ -static int init_timerlat_stack_tracefs(struct dentry *top_dir) -{ - return 0; -} -#endif /* CONFIG_STACKTRACE */ - -/* - * init_timerlat_tracefs - A function to initialize the timerlat interface files - */ -static int init_timerlat_tracefs(struct dentry *top_dir) -{ - struct dentry *tmp; - - tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, - &timerlat_period, &trace_min_max_fops); - if (!tmp) - return -ENOMEM; - - return init_timerlat_stack_tracefs(top_dir); -} -#else /* CONFIG_TIMERLAT_TRACER */ -static int init_timerlat_tracefs(struct dentry *top_dir) -{ - return 0; -} -#endif /* CONFIG_TIMERLAT_TRACER */ - /* * init_tracefs - A function to initialize the tracefs interface files * @@ -2088,10 +1879,19 @@ static int init_tracefs(void) tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); if (!tmp) goto err; - - ret = init_timerlat_tracefs(top_dir); - if (ret) +#ifdef CONFIG_TIMERLAT_TRACER +#ifdef CONFIG_STACKTRACE + tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, + &osnoise_print_stack, &trace_min_max_fops); + if (!tmp) goto err; +#endif + + tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, + &timerlat_period, &trace_min_max_fops); + if (!tmp) + goto err; +#endif return 0; @@ -2139,37 +1939,22 @@ static void osnoise_unhook_events(void) unhook_irq_events(); } -/* - * osnoise_workload_start - start the workload and hook to events - */ -static int osnoise_workload_start(void) +static int __osnoise_tracer_start(struct trace_array *tr) { int retval; - /* - * Instances need to be registered after calling workload - * start. Hence, if there is already an instance, the - * workload was already registered. Otherwise, this - * code is on the way to register the first instance, - * and the workload will start. - */ - if (osnoise_has_registered_instances()) - return 0; - osn_var_reset_all(); retval = osnoise_hook_events(); if (retval) return retval; - /* - * Make sure that ftrace_nmi_enter/exit() see reset values - * before enabling trace_osnoise_callback_enabled. + * Make sure NMIs see reseted values. */ barrier(); trace_osnoise_callback_enabled = true; - retval = start_per_cpu_kthreads(); + retval = start_per_cpu_kthreads(tr); if (retval) { trace_osnoise_callback_enabled = false; /* @@ -2182,72 +1967,51 @@ static int osnoise_workload_start(void) return retval; } + osnoise_busy = true; + return 0; } -/* - * osnoise_workload_stop - stop the workload and unhook the events - */ -static void osnoise_workload_stop(void) -{ - /* - * Instances need to be unregistered before calling - * stop. Hence, if there is a registered instance, more - * than one instance is running, and the workload will not - * yet stop. Otherwise, this code is on the way to disable - * the last instance, and the workload can stop. - */ - if (osnoise_has_registered_instances()) - return; - - trace_osnoise_callback_enabled = false; - /* - * Make sure that ftrace_nmi_enter/exit() see - * trace_osnoise_callback_enabled as false before continuing. - */ - barrier(); - - stop_per_cpu_kthreads(); - - osnoise_unhook_events(); -} - static void osnoise_tracer_start(struct trace_array *tr) { int retval; - /* - * If the instance is already registered, there is no need to - * register it again. - */ - if (osnoise_instance_registered(tr)) + if (osnoise_busy) return; - retval = osnoise_workload_start(); + retval = __osnoise_tracer_start(tr); if (retval) pr_err(BANNER "Error starting osnoise tracer\n"); - osnoise_register_instance(tr); } static void osnoise_tracer_stop(struct trace_array *tr) { - osnoise_unregister_instance(tr); - osnoise_workload_stop(); + if (!osnoise_busy) + return; + + trace_osnoise_callback_enabled = false; + barrier(); + + stop_per_cpu_kthreads(); + + osnoise_unhook_events(); + + osnoise_busy = false; } static int osnoise_tracer_init(struct trace_array *tr) { - /* - * Only allow osnoise tracer if timerlat tracer is not running - * already. - */ - if (timerlat_enabled()) + + /* Only allow one instance to enable this */ + if (osnoise_busy) return -EBUSY; + osnoise_trace = tr; tr->max_latency = 0; osnoise_tracer_start(tr); + return 0; } @@ -2271,55 +2035,45 @@ static void timerlat_tracer_start(struct trace_array *tr) { int retval; - /* - * If the instance is already registered, there is no need to - * register it again. - */ - if (osnoise_instance_registered(tr)) + if (osnoise_busy) return; - retval = osnoise_workload_start(); - if (retval) - pr_err(BANNER "Error starting timerlat tracer\n"); + osnoise_data.timerlat_tracer = 1; - osnoise_register_instance(tr); + retval = __osnoise_tracer_start(tr); + if (retval) + goto out_err; return; +out_err: + pr_err(BANNER "Error starting timerlat tracer\n"); } static void timerlat_tracer_stop(struct trace_array *tr) { int cpu; - osnoise_unregister_instance(tr); + if (!osnoise_busy) + return; - /* - * Instruct the threads to stop only if this is the last instance. - */ - if (!osnoise_has_registered_instances()) { - for_each_online_cpu(cpu) - per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; - } + for_each_online_cpu(cpu) + per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; - osnoise_workload_stop(); + osnoise_tracer_stop(tr); + + osnoise_data.timerlat_tracer = 0; } static int timerlat_tracer_init(struct trace_array *tr) { - /* - * Only allow timerlat tracer if osnoise tracer is not running already. - */ - if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) + /* Only allow one instance to enable this */ + if (osnoise_busy) return -EBUSY; - /* - * If this is the first instance, set timerlat_tracer to block - * osnoise tracer start. - */ - if (!osnoise_has_registered_instances()) - osnoise_data.timerlat_tracer = 1; + osnoise_trace = tr; tr->max_latency = 0; + timerlat_tracer_start(tr); return 0; @@ -2328,13 +2082,6 @@ static int timerlat_tracer_init(struct trace_array *tr) static void timerlat_tracer_reset(struct trace_array *tr) { timerlat_tracer_stop(tr); - - /* - * If this is the last instance, reset timerlat_tracer allowing - * osnoise to be started. - */ - if (!osnoise_has_registered_instances()) - osnoise_data.timerlat_tracer = 0; } static struct tracer timerlat_tracer __read_mostly = { @@ -2346,16 +2093,6 @@ static struct tracer timerlat_tracer __read_mostly = { .print_header = print_timerlat_headers, .allow_instances = true, }; - -__init static int init_timerlat_tracer(void) -{ - return register_tracer(&timerlat_tracer); -} -#else /* CONFIG_TIMERLAT_TRACER */ -__init static int init_timerlat_tracer(void) -{ - return 0; -} #endif /* CONFIG_TIMERLAT_TRACER */ __init static int init_osnoise_tracer(void) @@ -2372,16 +2109,15 @@ __init static int init_osnoise_tracer(void) return ret; } - ret = init_timerlat_tracer(); +#ifdef CONFIG_TIMERLAT_TRACER + ret = register_tracer(&timerlat_tracer); if (ret) { - pr_err(BANNER "Error registering timerlat!\n"); + pr_err(BANNER "Error registering timerlat\n"); return ret; } - +#endif osnoise_init_hotplug_support(); - INIT_LIST_HEAD_RCU(&osnoise_instances); - init_tracefs(); return 0; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 8aa493d25c..c2ca40e859 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -347,12 +346,22 @@ int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) } EXPORT_SYMBOL_GPL(trace_output_call); -static inline const char *kretprobed(const char *name, unsigned long addr) +#ifdef CONFIG_KRETPROBES +static inline const char *kretprobed(const char *name) { - if (is_kretprobe_trampoline(addr)) + static const char tramp_name[] = "kretprobe_trampoline"; + int size = sizeof(tramp_name); + + if (strncmp(tramp_name, name, size) == 0) return "[unknown/kretprobe'd]"; return name; } +#else +static inline const char *kretprobed(const char *name) +{ + return name; +} +#endif /* CONFIG_KRETPROBES */ void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset) @@ -365,7 +374,7 @@ trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset) sprint_symbol(str, address); else kallsyms_lookup(address, NULL, NULL, NULL, str); - name = kretprobed(str, address); + name = kretprobed(str); if (name && strlen(name)) { trace_seq_puts(s, name); @@ -445,18 +454,14 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) char irqs_off; int hardirq; int softirq; - int bh_off; int nmi; nmi = entry->flags & TRACE_FLAG_NMI; hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; - bh_off = entry->flags & TRACE_FLAG_BH_OFF; irqs_off = - (entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' : (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - bh_off ? 'b' : (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.'; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 80863c6508..bb4605b60d 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -871,15 +871,15 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, switch (ptype) { case PROBE_PRINT_NORMAL: fmt = "(%lx)"; - arg = ", REC->" FIELD_STRING_IP; + arg = "REC->" FIELD_STRING_IP; break; case PROBE_PRINT_RETURN: fmt = "(%lx <- %lx)"; - arg = ", REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; + arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; break; case PROBE_PRINT_EVENT: - fmt = ""; - arg = ""; + fmt = "(%u)"; + arg = "REC->" FIELD_STRING_TYPE; break; default: WARN_ON_ONCE(1); @@ -903,7 +903,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, parg->type->fmt); } - pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", arg); + pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); for (i = 0; i < tp->nr_args; i++) { parg = tp->args + i; @@ -1140,7 +1140,8 @@ int trace_probe_remove_file(struct trace_probe *tp, return -ENOENT; list_del_rcu(&link->list); - kvfree_rcu(link); + synchronize_rcu(); + kfree(link); if (list_empty(&tp->event->files)) trace_probe_clear_flag(tp, TP_FLAG_TRACE); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 92cc149af0..99e7a5df02 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -38,6 +38,7 @@ #define FIELD_STRING_IP "__probe_ip" #define FIELD_STRING_RETIP "__probe_ret_ip" #define FIELD_STRING_FUNC "__probe_func" +#define FIELD_STRING_TYPE "__probe_type" #undef DEFINE_FIELD #define DEFINE_FIELD(type, item, name, is_signed) \ diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index abcadbe933..adf7ef1940 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -287,40 +287,6 @@ static int trace_selftest_ops(struct trace_array *tr, int cnt) if (trace_selftest_test_probe3_cnt != 4) goto out_free; - /* Remove trace function from probe 3 */ - func1_name = "!" __stringify(DYN_FTRACE_TEST_NAME); - len1 = strlen(func1_name); - - ftrace_set_filter(&test_probe3, func1_name, len1, 0); - - DYN_FTRACE_TEST_NAME(); - - print_counts(); - - if (trace_selftest_test_probe1_cnt != 3) - goto out_free; - if (trace_selftest_test_probe2_cnt != 2) - goto out_free; - if (trace_selftest_test_probe3_cnt != 4) - goto out_free; - if (cnt > 1) { - if (trace_selftest_test_global_cnt == 0) - goto out_free; - } - if (trace_selftest_test_dyn_cnt == 0) - goto out_free; - - DYN_FTRACE_TEST_NAME2(); - - print_counts(); - - if (trace_selftest_test_probe1_cnt != 3) - goto out_free; - if (trace_selftest_test_probe2_cnt != 3) - goto out_free; - if (trace_selftest_test_probe3_cnt != 5) - goto out_free; - ret = 0; out_free: unregister_ftrace_function(dyn_ops); @@ -784,10 +750,6 @@ static struct fgraph_ops fgraph_ops __initdata = { .retfunc = &trace_graph_return, }; -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS -noinline __noclone static void trace_direct_tramp(void) { } -#endif - /* * Pretty much the same than for the function tracer from which the selftest * has been borrowed. @@ -798,7 +760,6 @@ trace_selftest_startup_function_graph(struct tracer *trace, { int ret; unsigned long count; - char *func_name __maybe_unused; #ifdef CONFIG_DYNAMIC_FTRACE if (ftrace_filter_param) { @@ -847,57 +808,8 @@ trace_selftest_startup_function_graph(struct tracer *trace, goto out; } -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS - tracing_reset_online_cpus(&tr->array_buffer); - set_graph_array(tr); - - /* - * Some archs *cough*PowerPC*cough* add characters to the - * start of the function names. We simply put a '*' to - * accommodate them. - */ - func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); - ftrace_set_global_filter(func_name, strlen(func_name), 1); - - /* - * Register direct function together with graph tracer - * and make sure we get graph trace. - */ - ret = register_ftrace_direct((unsigned long) DYN_FTRACE_TEST_NAME, - (unsigned long) trace_direct_tramp); - if (ret) - goto out; - - ret = register_ftrace_graph(&fgraph_ops); - if (ret) { - warn_failed_init_tracer(trace, ret); - goto out; - } - - DYN_FTRACE_TEST_NAME(); - - count = 0; - - tracing_stop(); - /* check the trace buffer */ - ret = trace_test_buffer(&tr->array_buffer, &count); - - unregister_ftrace_graph(&fgraph_ops); - - ret = unregister_ftrace_direct((unsigned long) DYN_FTRACE_TEST_NAME, - (unsigned long) trace_direct_tramp); - if (ret) - goto out; - - tracing_start(); - - if (!ret && !count) { - ret = -1; - goto out; - } -#endif - /* Don't test dynamic tracing, the function tracer already did */ + out: /* Stop it if we failed */ if (ret) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 9711589273..78ec1c16cc 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -7,7 +7,6 @@ */ #define pr_fmt(fmt) "trace_uprobe: " fmt -#include #include #include #include @@ -410,10 +409,12 @@ static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig, struct trace_uprobe *comp) { struct trace_probe_event *tpe = orig->tp.event; + struct trace_probe *pos; struct inode *comp_inode = d_real_inode(comp->path.dentry); int i; - list_for_each_entry(orig, &tpe->probes, tp.list) { + list_for_each_entry(pos, &tpe->probes, list) { + orig = container_of(pos, struct trace_uprobe, tp); if (comp_inode != d_real_inode(orig->path.dentry) || comp->offset != orig->offset) continue; @@ -948,7 +949,8 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; - struct trace_event_buffer fbuffer; + struct trace_buffer *buffer; + struct ring_buffer_event *event; void *data; int size, esize; struct trace_event_call *call = trace_probe_event_call(&tu->tp); @@ -963,10 +965,12 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; - entry = trace_event_buffer_reserve(&fbuffer, trace_file, size); - if (!entry) + event = trace_event_buffer_lock_reserve(&buffer, trace_file, + call->event.type, size, 0); + if (!event) return; + entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { entry->vaddr[0] = func; entry->vaddr[1] = instruction_pointer(regs); @@ -978,7 +982,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, memcpy(data, ucb->buf, tu->tp.size + dsize); - trace_event_buffer_commit(&fbuffer); + event_trigger_unlock_commit(trace_file, buffer, event, entry, 0); } /* uprobe handler */ @@ -1071,12 +1075,14 @@ static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter) static void __probe_event_disable(struct trace_probe *tp) { + struct trace_probe *pos; struct trace_uprobe *tu; tu = container_of(tp, struct trace_uprobe, tp); WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); - list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); if (!tu->inode) continue; @@ -1088,7 +1094,7 @@ static void __probe_event_disable(struct trace_probe *tp) static int probe_event_enable(struct trace_event_call *call, struct trace_event_file *file, filter_func_t filter) { - struct trace_probe *tp; + struct trace_probe *pos, *tp; struct trace_uprobe *tu; bool enabled; int ret; @@ -1123,7 +1129,8 @@ static int probe_event_enable(struct trace_event_call *call, if (ret) goto err_flags; - list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); ret = trace_uprobe_enable(tu, filter); if (ret) { __probe_event_disable(tp); @@ -1268,7 +1275,7 @@ static bool trace_uprobe_filter_add(struct trace_uprobe_filter *filter, static int uprobe_perf_close(struct trace_event_call *call, struct perf_event *event) { - struct trace_probe *tp; + struct trace_probe *pos, *tp; struct trace_uprobe *tu; int ret = 0; @@ -1280,7 +1287,8 @@ static int uprobe_perf_close(struct trace_event_call *call, if (trace_uprobe_filter_remove(tu->tp.event->filter, event)) return 0; - list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); ret = uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); if (ret) break; @@ -1292,7 +1300,7 @@ static int uprobe_perf_close(struct trace_event_call *call, static int uprobe_perf_open(struct trace_event_call *call, struct perf_event *event) { - struct trace_probe *tp; + struct trace_probe *pos, *tp; struct trace_uprobe *tu; int err = 0; @@ -1304,7 +1312,8 @@ static int uprobe_perf_open(struct trace_event_call *call, if (trace_uprobe_filter_add(tu->tp.event->filter, event)) return 0; - list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); if (err) { uprobe_perf_close(call, event); diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 1d261fbe36..fd2f7a052f 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -136,7 +136,7 @@ static void __acct_update_integrals(struct task_struct *tsk, * the rest of the math is done in xacct_add_tsk. */ tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10; - tsk->acct_vm_mem1 += delta * READ_ONCE(tsk->mm->total_vm) >> 10; + tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10; } /** diff --git a/kernel/ucount.c b/kernel/ucount.c index 06ea04d446..a1d6726150 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -150,15 +150,9 @@ static void hlist_add_ucounts(struct ucounts *ucounts) spin_unlock_irq(&ucounts_lock); } -static inline bool get_ucounts_or_wrap(struct ucounts *ucounts) -{ - /* Returns true on a successful get, false if the count wraps. */ - return !atomic_add_negative(1, &ucounts->count); -} - struct ucounts *get_ucounts(struct ucounts *ucounts) { - if (!get_ucounts_or_wrap(ucounts)) { + if (ucounts && atomic_add_negative(1, &ucounts->count)) { put_ucounts(ucounts); ucounts = NULL; } @@ -169,7 +163,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_head *hashent = ucounts_hashentry(ns, uid); struct ucounts *ucounts, *new; - bool wrapped; + long overflow; spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -195,9 +189,9 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) return new; } } - wrapped = !get_ucounts_or_wrap(ucounts); + overflow = atomic_add_negative(1, &ucounts->count); spin_unlock_irq(&ucounts_lock); - if (wrapped) { + if (overflow) { put_ucounts(ucounts); return NULL; } @@ -285,7 +279,7 @@ bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) struct ucounts *iter; long new = -1; /* Silence compiler warning */ for (iter = ucounts; iter; iter = iter->ns->ucounts) { - long dec = atomic_long_sub_return(v, &iter->ucount[type]); + long dec = atomic_long_add_return(-v, &iter->ucount[type]); WARN_ON_ONCE(dec < 0); if (iter == ucounts) new = dec; @@ -298,7 +292,7 @@ static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts, { struct ucounts *iter, *next; for (iter = ucounts; iter != last; iter = next) { - long dec = atomic_long_sub_return(1, &iter->ucount[type]); + long dec = atomic_long_add_return(-1, &iter->ucount[type]); WARN_ON_ONCE(dec < 0); next = iter->ns->ucounts; if (dec == 0) @@ -336,7 +330,7 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type) } return ret; dec_unwind: - dec = atomic_long_sub_return(1, &iter->ucount[type]); + dec = atomic_long_add_return(-1, &iter->ucount[type]); WARN_ON_ONCE(dec < 0); unwind: do_dec_rlimit_put_ucounts(ucounts, iter, type); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 6b2e3ca7ee..5481ba44a8 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -58,6 +58,18 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) cred->user_ns = user_ns; } +static unsigned long enforced_nproc_rlimit(void) +{ + unsigned long limit = RLIM_INFINITY; + + /* Is RLIMIT_NPROC currently enforced? */ + if (!uid_eq(current_uid(), GLOBAL_ROOT_UID) || + (current_user_ns() != &init_user_ns)) + limit = rlimit(RLIMIT_NPROC); + + return limit; +} + /* * Create a new user namespace, deriving the creator from the user in the * passed credentials, and replacing that user with the new root user for the @@ -122,7 +134,7 @@ int create_user_ns(struct cred *new) for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) { ns->ucount_max[i] = INT_MAX; } - set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)); + set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit()); set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE)); set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING)); set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK)); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 99afb88d2e..ad912511a0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -740,106 +740,6 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, mutex_unlock(&watchdog_mutex); return err; } - -static const int sixty = 60; - -static struct ctl_table watchdog_sysctls[] = { - { - .procname = "watchdog", - .data = &watchdog_user_enabled, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_watchdog, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "watchdog_thresh", - .data = &watchdog_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_watchdog_thresh, - .extra1 = SYSCTL_ZERO, - .extra2 = (void *)&sixty, - }, - { - .procname = "nmi_watchdog", - .data = &nmi_watchdog_user_enabled, - .maxlen = sizeof(int), - .mode = NMI_WATCHDOG_SYSCTL_PERM, - .proc_handler = proc_nmi_watchdog, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "watchdog_cpumask", - .data = &watchdog_cpumask_bits, - .maxlen = NR_CPUS, - .mode = 0644, - .proc_handler = proc_watchdog_cpumask, - }, -#ifdef CONFIG_SOFTLOCKUP_DETECTOR - { - .procname = "soft_watchdog", - .data = &soft_watchdog_user_enabled, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_soft_watchdog, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "softlockup_panic", - .data = &softlockup_panic, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#ifdef CONFIG_SMP - { - .procname = "softlockup_all_cpu_backtrace", - .data = &sysctl_softlockup_all_cpu_backtrace, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SMP */ -#endif -#ifdef CONFIG_HARDLOCKUP_DETECTOR - { - .procname = "hardlockup_panic", - .data = &hardlockup_panic, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#ifdef CONFIG_SMP - { - .procname = "hardlockup_all_cpu_backtrace", - .data = &sysctl_hardlockup_all_cpu_backtrace, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SMP */ -#endif - {} -}; - -static void __init watchdog_sysctl_init(void) -{ - register_sysctl_init("kernel", watchdog_sysctls); -} -#else -#define watchdog_sysctl_init() do { } while (0) #endif /* CONFIG_SYSCTL */ void __init lockup_detector_init(void) @@ -853,5 +753,4 @@ void __init lockup_detector_init(void) if (!watchdog_nmi_probe()) nmi_watchdog_available = true; lockup_detector_setup(); - watchdog_sysctl_init(); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 33f1106b4f..3f4d276685 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -154,9 +154,6 @@ struct worker_pool { unsigned long watchdog_ts; /* L: watchdog timestamp */ - /* The current concurrency level. */ - atomic_t nr_running; - struct list_head worklist; /* L: list of pending works */ int nr_workers; /* L: total number of workers */ @@ -180,12 +177,19 @@ struct worker_pool { struct hlist_node hash_node; /* PL: unbound_pool_hash node */ int refcnt; /* PL: refcnt for unbound pools */ + /* + * The current concurrency level. As it's likely to be accessed + * from other CPUs during try_to_wake_up(), put it in a separate + * cacheline. + */ + atomic_t nr_running ____cacheline_aligned_in_smp; + /* * Destruction of pool is RCU protected to allow dereferences * from get_work_pool(). */ struct rcu_head rcu; -}; +} ____cacheline_aligned_in_smp; /* * The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS @@ -371,7 +375,6 @@ EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); static int worker_thread(void *__worker); static void workqueue_sysfs_unregister(struct workqueue_struct *wq); static void show_pwq(struct pool_workqueue *pwq); -static void show_one_worker_pool(struct worker_pool *pool); #define CREATE_TRACE_POINTS #include @@ -883,7 +886,8 @@ void wq_worker_running(struct task_struct *task) * @task: task going to sleep * * This function is called from schedule() when a busy worker is - * going to sleep. + * going to sleep. Preemption needs to be disabled to protect ->sleeping + * assignment. */ void wq_worker_sleeping(struct task_struct *task) { @@ -907,16 +911,6 @@ void wq_worker_sleeping(struct task_struct *task) worker->sleeping = 1; raw_spin_lock_irq(&pool->lock); - /* - * Recheck in case unbind_workers() preempted us. We don't - * want to decrement nr_running after the worker is unbound - * and nr_running has been reset. - */ - if (worker->flags & WORKER_NOT_RUNNING) { - raw_spin_unlock_irq(&pool->lock); - return; - } - /* * The counterpart of the following dec_and_test, implied mb, * worklist not empty test sequence is in insert_work(). @@ -1365,7 +1359,7 @@ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work, struct worker_pool *pool = pwq->pool; /* record the work call stack in order to print it in KASAN reports */ - kasan_record_aux_stack_noalloc(work); + kasan_record_aux_stack(work); /* we own @work, set data and link */ set_work_pwq(work, pwq, extra_flags); @@ -1545,8 +1539,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, * @work: work to queue * * We queue the work to a specific CPU, the caller must ensure it - * can't go away. Callers that fail to ensure that the specified - * CPU cannot go away will execute on a randomly chosen CPU. + * can't go away. * * Return: %false if @work was already on a queue, %true otherwise. */ @@ -1826,8 +1819,14 @@ static void worker_enter_idle(struct worker *worker) if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); - /* Sanity check nr_running. */ - WARN_ON_ONCE(pool->nr_workers == pool->nr_idle && + /* + * Sanity check nr_running. Because unbind_workers() releases + * pool->lock between setting %WORKER_UNBOUND and zapping + * nr_running, the warning may trigger spuriously. Check iff + * unbind is not in progress. + */ + WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) && + pool->nr_workers == pool->nr_idle && atomic_read(&pool->nr_running)); } @@ -4457,7 +4456,7 @@ void destroy_workqueue(struct workqueue_struct *wq) raw_spin_unlock_irq(&pwq->pool->lock); mutex_unlock(&wq->mutex); mutex_unlock(&wq_pool_mutex); - show_one_workqueue(wq); + show_workqueue_state(); return; } raw_spin_unlock_irq(&pwq->pool->lock); @@ -4807,115 +4806,96 @@ static void show_pwq(struct pool_workqueue *pwq) } /** - * show_one_workqueue - dump state of specified workqueue - * @wq: workqueue whose state will be printed - */ -void show_one_workqueue(struct workqueue_struct *wq) -{ - struct pool_workqueue *pwq; - bool idle = true; - unsigned long flags; - - for_each_pwq(pwq, wq) { - if (pwq->nr_active || !list_empty(&pwq->inactive_works)) { - idle = false; - break; - } - } - if (idle) /* Nothing to print for idle workqueue */ - return; - - pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); - - for_each_pwq(pwq, wq) { - raw_spin_lock_irqsave(&pwq->pool->lock, flags); - if (pwq->nr_active || !list_empty(&pwq->inactive_works)) { - /* - * Defer printing to avoid deadlocks in console - * drivers that queue work while holding locks - * also taken in their write paths. - */ - printk_deferred_enter(); - show_pwq(pwq); - printk_deferred_exit(); - } - raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); - /* - * We could be printing a lot from atomic context, e.g. - * sysrq-t -> show_all_workqueues(). Avoid triggering - * hard lockup. - */ - touch_nmi_watchdog(); - } - -} - -/** - * show_one_worker_pool - dump state of specified worker pool - * @pool: worker pool whose state will be printed - */ -static void show_one_worker_pool(struct worker_pool *pool) -{ - struct worker *worker; - bool first = true; - unsigned long flags; - - raw_spin_lock_irqsave(&pool->lock, flags); - if (pool->nr_workers == pool->nr_idle) - goto next_pool; - /* - * Defer printing to avoid deadlocks in console drivers that - * queue work while holding locks also taken in their write - * paths. - */ - printk_deferred_enter(); - pr_info("pool %d:", pool->id); - pr_cont_pool_info(pool); - pr_cont(" hung=%us workers=%d", - jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000, - pool->nr_workers); - if (pool->manager) - pr_cont(" manager: %d", - task_pid_nr(pool->manager->task)); - list_for_each_entry(worker, &pool->idle_list, entry) { - pr_cont(" %s%d", first ? "idle: " : "", - task_pid_nr(worker->task)); - first = false; - } - pr_cont("\n"); - printk_deferred_exit(); -next_pool: - raw_spin_unlock_irqrestore(&pool->lock, flags); - /* - * We could be printing a lot from atomic context, e.g. - * sysrq-t -> show_all_workqueues(). Avoid triggering - * hard lockup. - */ - touch_nmi_watchdog(); - -} - -/** - * show_all_workqueues - dump workqueue state + * show_workqueue_state - dump workqueue state * * Called from a sysrq handler or try_to_freeze_tasks() and prints out * all busy workqueues and pools. */ -void show_all_workqueues(void) +void show_workqueue_state(void) { struct workqueue_struct *wq; struct worker_pool *pool; + unsigned long flags; int pi; rcu_read_lock(); pr_info("Showing busy workqueues and worker pools:\n"); - list_for_each_entry_rcu(wq, &workqueues, list) - show_one_workqueue(wq); + list_for_each_entry_rcu(wq, &workqueues, list) { + struct pool_workqueue *pwq; + bool idle = true; - for_each_pool(pool, pi) - show_one_worker_pool(pool); + for_each_pwq(pwq, wq) { + if (pwq->nr_active || !list_empty(&pwq->inactive_works)) { + idle = false; + break; + } + } + if (idle) + continue; + + pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); + + for_each_pwq(pwq, wq) { + raw_spin_lock_irqsave(&pwq->pool->lock, flags); + if (pwq->nr_active || !list_empty(&pwq->inactive_works)) { + /* + * Defer printing to avoid deadlocks in console + * drivers that queue work while holding locks + * also taken in their write paths. + */ + printk_deferred_enter(); + show_pwq(pwq); + printk_deferred_exit(); + } + raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); + } + } + + for_each_pool(pool, pi) { + struct worker *worker; + bool first = true; + + raw_spin_lock_irqsave(&pool->lock, flags); + if (pool->nr_workers == pool->nr_idle) + goto next_pool; + /* + * Defer printing to avoid deadlocks in console drivers that + * queue work while holding locks also taken in their write + * paths. + */ + printk_deferred_enter(); + pr_info("pool %d:", pool->id); + pr_cont_pool_info(pool); + pr_cont(" hung=%us workers=%d", + jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000, + pool->nr_workers); + if (pool->manager) + pr_cont(" manager: %d", + task_pid_nr(pool->manager->task)); + list_for_each_entry(worker, &pool->idle_list, entry) { + pr_cont(" %s%d", first ? "idle: " : "", + task_pid_nr(worker->task)); + first = false; + } + pr_cont("\n"); + printk_deferred_exit(); + next_pool: + raw_spin_unlock_irqrestore(&pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); + } rcu_read_unlock(); } @@ -4988,33 +4968,15 @@ static void unbind_workers(int cpu) /* * We've blocked all attach/detach operations. Make all workers * unbound and set DISASSOCIATED. Before this, all workers - * must be on the cpu. After this, they may become diasporas. - * And the preemption disabled section in their sched callbacks - * are guaranteed to see WORKER_UNBOUND since the code here - * is on the same cpu. + * except for the ones which are still executing works from + * before the last CPU down must be on the cpu. After + * this, they may become diasporas. */ for_each_pool_worker(worker, pool) worker->flags |= WORKER_UNBOUND; pool->flags |= POOL_DISASSOCIATED; - /* - * The handling of nr_running in sched callbacks are disabled - * now. Zap nr_running. After this, nr_running stays zero and - * need_more_worker() and keep_working() are always true as - * long as the worklist is not empty. This pool now behaves as - * an unbound (in terms of concurrency management) pool which - * are served by workers tied to the pool. - */ - atomic_set(&pool->nr_running, 0); - - /* - * With concurrency management just turned off, a busy - * worker blocking could lead to lengthy stalls. Kick off - * unbound chain execution of currently pending work items. - */ - wake_up_worker(pool); - raw_spin_unlock_irq(&pool->lock); for_each_pool_worker(worker, pool) { @@ -5023,6 +4985,33 @@ static void unbind_workers(int cpu) } mutex_unlock(&wq_pool_attach_mutex); + + /* + * Call schedule() so that we cross rq->lock and thus can + * guarantee sched callbacks see the %WORKER_UNBOUND flag. + * This is necessary as scheduler callbacks may be invoked + * from other cpus. + */ + schedule(); + + /* + * Sched callbacks are disabled now. Zap nr_running. + * After this, nr_running stays zero and need_more_worker() + * and keep_working() are always true as long as the + * worklist is not empty. This pool now behaves as an + * unbound (in terms of concurrency management) pool which + * are served by workers tied to the pool. + */ + atomic_set(&pool->nr_running, 0); + + /* + * With concurrency management just turned off, a busy + * worker blocking could lead to lengthy stalls. Kick off + * unbound chain execution of currently pending work items. + */ + raw_spin_lock_irq(&pool->lock); + wake_up_worker(pool); + raw_spin_unlock_irq(&pool->lock); } } @@ -5058,6 +5047,17 @@ static void rebind_workers(struct worker_pool *pool) for_each_pool_worker(worker, pool) { unsigned int worker_flags = worker->flags; + /* + * A bound idle worker should actually be on the runqueue + * of the associated CPU for local wake-ups targeting it to + * work. Kick all idle workers so that they migrate to the + * associated CPU. Doing this in the same loop as + * replacing UNBOUND with REBOUND is safe as no worker will + * be bound before @pool->lock is released. + */ + if (worker_flags & WORKER_IDLE) + wake_up_process(worker->task); + /* * We want to clear UNBOUND but can't directly call * worker_clr_flags() or adjust nr_running. Atomically @@ -5885,7 +5885,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) rcu_read_unlock(); if (lockup_detected) - show_all_workqueues(); + show_workqueue_state(); wq_watchdog_reset_touched(); mod_timer(&wq_watchdog_timer, jiffies + thresh); diff --git a/lib/.gitignore b/lib/.gitignore index e5e217b830..5e7fa54c45 100644 --- a/lib/.gitignore +++ b/lib/.gitignore @@ -4,5 +4,3 @@ /gen_crc32table /gen_crc64table /oid_registry_data.c -/test_fortify.log -/test_fortify/*.log diff --git a/lib/Kconfig b/lib/Kconfig index c80fde816a..5e7165e6a3 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -65,6 +65,9 @@ config GENERIC_STRNLEN_USER config GENERIC_NET_UTILS bool +config GENERIC_FIND_FIRST_BIT + bool + source "lib/math/Kconfig" config NO_GENERIC_PCI_IOPORT_MAP @@ -119,8 +122,6 @@ config INDIRECT_IOMEM_FALLBACK mmio accesses when the IO memory address is not a registered emulated region. -source "lib/crypto/Kconfig" - config CRC_CCITT tristate "CRC-CCITT functions" help @@ -670,10 +671,6 @@ config STACKDEPOT bool select STACKTRACE -config STACKDEPOT_ALWAYS_INIT - bool - select STACKDEPOT - config STACK_HASH_ORDER int "stack depot hash size (12 => 4KB, 20 => 1024KB)" range 12 20 @@ -683,11 +680,6 @@ config STACK_HASH_ORDER Select the hash size as a power of 2 for the stackdepot hash table. Choose a lower value to reduce the memory impact. -config REF_TRACKER - bool - depends on STACKTRACE_SUPPORT - select STACKDEPOT - config SBITMAP bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 14b89aa37c..2a9b6dcdac 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -316,7 +316,6 @@ config DEBUG_INFO_BTF bool "Generate BTF typeinfo" depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST - depends on BPF_SYSCALL help Generate deduplicated BTF type information from DWARF debug info. Turning this on expects presence of pahole tool, which will convert @@ -347,9 +346,8 @@ config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 default 2048 if GCC_PLUGIN_LATENT_ENTROPY - default 2048 if PARISC - default 1536 if (!64BIT && XTENSA) - default 1024 if !64BIT + default 1536 if (!64BIT && (PARISC || XTENSA)) + default 1024 if (!64BIT && !PARISC) default 2048 if 64BIT help Tell gcc to warn at build time for stack frames larger than this. @@ -460,7 +458,7 @@ config STACK_VALIDATION config VMLINUX_VALIDATION bool - depends on STACK_VALIDATION && DEBUG_ENTRY + depends on STACK_VALIDATION && DEBUG_ENTRY && !PARAVIRT default y config VMLINUX_MAP @@ -599,11 +597,6 @@ config DEBUG_MISC Say Y here if you need to enable miscellaneous debug code that should be under a more specific debug option but isn't. -menu "Networking Debugging" - -source "net/Kconfig.debug" - -endmenu # "Networking Debugging" menu "Memory Debugging" @@ -884,7 +877,7 @@ config DEBUG_MEMORY_INIT config MEMORY_NOTIFIER_ERROR_INJECT tristate "Memory hotplug notifier error injection module" - depends on MEMORY_HOTPLUG && NOTIFIER_ERROR_INJECTION + depends on MEMORY_HOTPLUG_SPARSE && NOTIFIER_ERROR_INJECTION help This option provides the ability to inject artificial errors to memory hotplug notifier chain callbacks. It is controlled through @@ -1984,8 +1977,6 @@ config KCOV bool "Code coverage for fuzzing" depends on ARCH_HAS_KCOV depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS - depends on !ARCH_WANTS_NO_INSTR || STACK_VALIDATION || \ - GCC_VERSION >= 120000 || CLANG_VERSION >= 130000 select DEBUG_FS select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC help @@ -2089,10 +2080,9 @@ config TEST_DIV64 If unsure, say N. config KPROBES_SANITY_TEST - tristate "Kprobes sanity tests" + bool "Kprobes sanity tests" depends on DEBUG_KERNEL depends on KPROBES - depends on KUNIT help This option provides for testing basic kprobes functionality on boot. Samples of kprobe and kretprobe are inserted and @@ -2114,16 +2104,6 @@ config BACKTRACE_SELF_TEST Say N if you are unsure. -config TEST_REF_TRACKER - tristate "Self test for reference tracker" - depends on DEBUG_KERNEL && STACKTRACE_SUPPORT - select REF_TRACKER - help - This option provides a kernel module performing tests - using reference tracker infrastructure. - - Say N if you are unsure. - config RBTREE_TEST tristate "Red-Black tree test" depends on DEBUG_KERNEL @@ -2224,11 +2204,12 @@ config TEST_RHASHTABLE If unsure, say N. -config TEST_SIPHASH - tristate "Perform selftest on siphash functions" +config TEST_HASH + tristate "Perform selftest on hash functions" help - Enable this option to test the kernel's siphash () hash - functions on boot (or module load). + Enable this option to test the kernel's integer (), + string (), and siphash () + hash functions on boot (or module load). This is intended to help people writing architecture-specific optimized versions. If unsure, say N. @@ -2372,25 +2353,6 @@ config BITFIELD_KUNIT If unsure, say N. -config HASH_KUNIT_TEST - tristate "KUnit Test for integer hash functions" if !KUNIT_ALL_TESTS - depends on KUNIT - default KUNIT_ALL_TESTS - help - Enable this option to test the kernel's string (), and - integer () hash functions on boot. - - KUnit tests run during boot and output the results to the debug log - in TAP format (https://testanything.org/). Only useful for kernel devs - running the KUnit test harness, and not intended for inclusion into a - production build. - - For more information on KUnit and unit tests in general please refer - to the KUnit documentation in Documentation/dev-tools/kunit/. - - This is intended to help people writing architecture-specific - optimized versions. If unsure, say N. - config RESOURCE_KUNIT_TEST tristate "KUnit test for resource API" depends on KUNIT @@ -2490,17 +2452,6 @@ config RATIONAL_KUNIT_TEST If unsure, say N. -config MEMCPY_KUNIT_TEST - tristate "Test memcpy(), memmove(), and memset() functions at runtime" if !KUNIT_ALL_TESTS - depends on KUNIT - default KUNIT_ALL_TESTS - help - Builds unit tests for memcpy(), memmove(), and memset() functions. - For more information on KUnit and unit tests in general please refer - to the KUnit documentation in Documentation/dev-tools/kunit/. - - If unsure, say N. - config TEST_UDELAY tristate "udelay test driver" help @@ -2522,7 +2473,6 @@ config TEST_KMOD depends on m depends on NETDEVICES && NET_CORE && INET # for TUN depends on BLOCK - depends on PAGE_SIZE_LESS_THAN_256KB # for BTRFS select TEST_LKM select XFS_FS select TUN diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 879757b6dd..cdc842d090 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -38,7 +38,7 @@ menuconfig KASAN CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \ HAVE_ARCH_KASAN_HW_TAGS depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) - select STACKDEPOT_ALWAYS_INIT + select STACKDEPOT help Enables KASAN (KernelAddressSANitizer) - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 63b70b8c55..e0a93ffdef 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -191,26 +191,6 @@ config KCSAN_STRICT closely aligns with the rules defined by the Linux-kernel memory consistency model (LKMM). -config KCSAN_WEAK_MEMORY - bool "Enable weak memory modeling to detect missing memory barriers" - default y - depends on KCSAN_STRICT - # We can either let objtool nop __tsan_func_{entry,exit}() and builtin - # atomics instrumentation in .noinstr.text, or use a compiler that can - # implement __no_kcsan to really remove all instrumentation. - depends on STACK_VALIDATION || CC_IS_GCC || CLANG_VERSION >= 140000 - help - Enable support for modeling a subset of weak memory, which allows - detecting a subset of data races due to missing memory barriers. - - Depends on KCSAN_STRICT, because the options strenghtening certain - plain accesses by default (depending on !KCSAN_STRICT) reduce the - ability to detect any data races invoving reordered accesses, in - particular reordered writes. - - Weak memory modeling relies on additional instrumentation and may - affect performance. - config KCSAN_REPORT_VALUE_CHANGE_ONLY bool "Only report races where watcher observed a data value change" default y diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 236c5cefc4..e5372a1351 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -112,6 +112,19 @@ config UBSAN_UNREACHABLE This option enables -fsanitize=unreachable which checks for control flow reaching an expected-to-be-unreachable position. +config UBSAN_OBJECT_SIZE + bool "Perform checking for accesses beyond the end of objects" + default UBSAN + # gcc hugely expands stack usage with -fsanitize=object-size + # https://lore.kernel.org/lkml/CAHk-=wjPasyJrDuwDnpHJS2TuQfExwe=px-SzLeN8GFMAQJPmQ@mail.gmail.com/ + depends on !CC_IS_GCC + depends on $(cc-option,-fsanitize=object-size) + help + This option enables -fsanitize=object-size which checks for accesses + beyond the end of objects where the optimizer can determine both the + object being operated on and its size, usually seen with bad downcasts, + or access to struct members from NULL pointers. + config UBSAN_BOOL bool "Perform checking for non-boolean values used as boolean" default UBSAN diff --git a/lib/Makefile b/lib/Makefile index 300f569c62..a841be5244 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -61,8 +61,7 @@ obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_BITOPS) += test_bitops.o CFLAGS_test_bitops.o += -Werror obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o -obj-$(CONFIG_TEST_SIPHASH) += test_siphash.o -obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o +obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o obj-$(CONFIG_TEST_IDA) += test_ida.o obj-$(CONFIG_KASAN_KUNIT_TEST) += test_kasan.o CFLAGS_test_kasan.o += -fno-builtin @@ -101,8 +100,7 @@ obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o -obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o -obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o + # # CFLAGS for compiling floating point code inside the kernel. x86/Makefile turns # off the generation of FPU/SSE* instructions for kernel proper but FPU_FLAGS @@ -271,8 +269,6 @@ obj-$(CONFIG_STACKDEPOT) += stackdepot.o KASAN_SANITIZE_stackdepot.o := n KCOV_INSTRUMENT_stackdepot.o := n -obj-$(CONFIG_REF_TRACKER) += ref_tracker.o - libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ fdt_empty_tree.o fdt_addresses.o $(foreach file, $(libfdt_files), \ @@ -362,39 +358,5 @@ obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o obj-$(CONFIG_BITS_TEST) += test_bits.o obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o -obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o - -# FORTIFY_SOURCE compile-time behavior tests -TEST_FORTIFY_SRCS = $(wildcard $(srctree)/$(src)/test_fortify/*-*.c) -TEST_FORTIFY_LOGS = $(patsubst $(srctree)/$(src)/%.c, %.log, $(TEST_FORTIFY_SRCS)) -TEST_FORTIFY_LOG = test_fortify.log - -quiet_cmd_test_fortify = TEST $@ - cmd_test_fortify = $(CONFIG_SHELL) $(srctree)/scripts/test_fortify.sh \ - $< $@ "$(NM)" $(CC) $(c_flags) \ - $(call cc-disable-warning,fortify-source) - -targets += $(TEST_FORTIFY_LOGS) -clean-files += $(TEST_FORTIFY_LOGS) -clean-files += $(addsuffix .o, $(TEST_FORTIFY_LOGS)) -$(obj)/test_fortify/%.log: $(src)/test_fortify/%.c \ - $(src)/test_fortify/test_fortify.h \ - $(srctree)/include/linux/fortify-string.h \ - $(srctree)/scripts/test_fortify.sh \ - FORCE - $(call if_changed,test_fortify) - -quiet_cmd_gen_fortify_log = GEN $@ - cmd_gen_fortify_log = cat /dev/null > $@ || true - -targets += $(TEST_FORTIFY_LOG) -clean-files += $(TEST_FORTIFY_LOG) -$(obj)/$(TEST_FORTIFY_LOG): $(addprefix $(obj)/, $(TEST_FORTIFY_LOGS)) FORCE - $(call if_changed,gen_fortify_log) - -# Fake dependency to trigger the fortify tests. -ifeq ($(CONFIG_FORTIFY_SOURCE),y) -$(obj)/string.o: $(obj)/$(TEST_FORTIFY_LOG) -endif diff --git a/lib/asn1_encoder.c b/lib/asn1_encoder.c index 0fd3c454a4..27bbe89171 100644 --- a/lib/asn1_encoder.c +++ b/lib/asn1_encoder.c @@ -164,6 +164,8 @@ asn1_encode_oid(unsigned char *data, const unsigned char *end_data, data_len -= 3; + ret = 0; + for (i = 2; i < oid_len; i++) { ret = asn1_encode_oid_digit(&d, &data_len, oid[i]); if (ret < 0) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 079c72e264..04c98799c3 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -741,7 +741,8 @@ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; - new_s0 = kzalloc(struct_size(new_s0, index_key, keylen), GFP_KERNEL); + new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); if (!new_s0) return false; edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); @@ -848,8 +849,8 @@ static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; - new_s0 = kzalloc(struct_size(new_s0, index_key, keylen), - GFP_KERNEL); + new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); if (!new_s0) return false; edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); @@ -863,7 +864,7 @@ static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, new_n0->parent_slot = 0; memcpy(new_s0->index_key, shortcut->index_key, - flex_array_size(new_s0, index_key, keylen)); + keylen * sizeof(unsigned long)); blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); @@ -898,8 +899,8 @@ static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; - new_s1 = kzalloc(struct_size(new_s1, index_key, keylen), - GFP_KERNEL); + new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); if (!new_s1) return false; edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); @@ -912,7 +913,7 @@ static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); memcpy(new_s1->index_key, shortcut->index_key, - flex_array_size(new_s1, index_key, keylen)); + keylen * sizeof(unsigned long)); edit->set[1].ptr = &side->back_pointer; edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); @@ -1489,12 +1490,13 @@ int assoc_array_gc(struct assoc_array *array, shortcut = assoc_array_ptr_to_shortcut(cursor); keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; - new_s = kmalloc(struct_size(new_s, index_key, keylen), - GFP_KERNEL); + new_s = kmalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); if (!new_s) goto enomem; pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); - memcpy(new_s, shortcut, struct_size(new_s, index_key, keylen)); + memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long))); new_s->back_pointer = new_parent; new_s->parent_slot = shortcut->parent_slot; *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); diff --git a/lib/atomic64.c b/lib/atomic64.c index caf895789a..3df6539941 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -118,6 +118,7 @@ ATOMIC64_OPS(sub, -=) #undef ATOMIC64_OPS #define ATOMIC64_OPS(op, c_op) \ ATOMIC64_OP(op, c_op) \ + ATOMIC64_OP_RETURN(op, c_op) \ ATOMIC64_FETCH_OP(op, c_op) ATOMIC64_OPS(and, &=) @@ -126,6 +127,7 @@ ATOMIC64_OPS(xor, ^=) #undef ATOMIC64_OPS #undef ATOMIC64_FETCH_OP +#undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP s64 generic_atomic64_dec_if_positive(atomic64_t *v) diff --git a/lib/audit.c b/lib/audit.c index 738bda22dd..5004bff928 100644 --- a/lib/audit.c +++ b/lib/audit.c @@ -45,27 +45,23 @@ int audit_classify_syscall(int abi, unsigned syscall) switch(syscall) { #ifdef __NR_open case __NR_open: - return AUDITSC_OPEN; + return 2; #endif #ifdef __NR_openat case __NR_openat: - return AUDITSC_OPENAT; + return 3; #endif #ifdef __NR_socketcall case __NR_socketcall: - return AUDITSC_SOCKETCALL; + return 4; #endif #ifdef __NR_execveat case __NR_execveat: #endif case __NR_execve: - return AUDITSC_EXECVE; -#ifdef __NR_openat2 - case __NR_openat2: - return AUDITSC_OPENAT2; -#endif + return 5; default: - return AUDITSC_NATIVE; + return 0; } } diff --git a/lib/bitmap.c b/lib/bitmap.c index 9264088834..663dd81967 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1398,19 +1398,6 @@ unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags) } EXPORT_SYMBOL(bitmap_zalloc); -unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node) -{ - return kmalloc_array_node(BITS_TO_LONGS(nbits), sizeof(unsigned long), - flags, node); -} -EXPORT_SYMBOL(bitmap_alloc_node); - -unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node) -{ - return bitmap_alloc_node(nbits, flags | __GFP_ZERO, node); -} -EXPORT_SYMBOL(bitmap_zalloc_node); - void bitmap_free(const unsigned long *bitmap) { kfree(bitmap); diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 74f3201ab8..5ae248b293 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -4,24 +4,16 @@ * Masami Hiramatsu */ -#ifdef __KERNEL__ +#define pr_fmt(fmt) "bootconfig: " fmt + #include #include #include #include #include #include +#include #include -#else /* !__KERNEL__ */ -/* - * NOTE: This is only for tools/bootconfig, because tools/bootconfig will - * run the parser sanity test. - * This does NOT mean lib/bootconfig.c is available in the user space. - * However, if you change this file, please make sure the tools/bootconfig - * has no issue on building and running. - */ -#include -#endif /* * Extra Boot Config (XBC) is given as tree-structured ascii text of @@ -42,50 +34,6 @@ static int xbc_err_pos __initdata; static int open_brace[XBC_DEPTH_MAX] __initdata; static int brace_index __initdata; -#ifdef __KERNEL__ -static inline void * __init xbc_alloc_mem(size_t size) -{ - return memblock_alloc(size, SMP_CACHE_BYTES); -} - -static inline void __init xbc_free_mem(void *addr, size_t size) -{ - memblock_free(addr, size); -} - -#else /* !__KERNEL__ */ - -static inline void *xbc_alloc_mem(size_t size) -{ - return malloc(size); -} - -static inline void xbc_free_mem(void *addr, size_t size) -{ - free(addr); -} -#endif -/** - * xbc_get_info() - Get the information of loaded boot config - * @node_size: A pointer to store the number of nodes. - * @data_size: A pointer to store the size of bootconfig data. - * - * Get the number of used nodes in @node_size if it is not NULL, - * and the size of bootconfig data in @data_size if it is not NULL. - * Return 0 if the boot config is initialized, or return -ENODEV. - */ -int __init xbc_get_info(int *node_size, size_t *data_size) -{ - if (!xbc_data) - return -ENODEV; - - if (node_size) - *node_size = xbc_node_num; - if (data_size) - *data_size = xbc_data_size; - return 0; -} - static int __init xbc_parse_error(const char *msg, const char *p) { xbc_err_msg = msg; @@ -278,7 +226,7 @@ int __init xbc_node_compose_key_after(struct xbc_node *root, struct xbc_node *node, char *buf, size_t size) { - uint16_t keys[XBC_DEPTH_MAX]; + u16 keys[XBC_DEPTH_MAX]; int depth = 0, ret = 0, total = 0; if (!node || node == root) @@ -393,21 +341,21 @@ const char * __init xbc_node_find_next_key_value(struct xbc_node *root, /* XBC parse and tree build */ -static int __init xbc_init_node(struct xbc_node *node, char *data, uint32_t flag) +static int __init xbc_init_node(struct xbc_node *node, char *data, u32 flag) { unsigned long offset = data - xbc_data; if (WARN_ON(offset >= XBC_DATA_MAX)) return -EINVAL; - node->data = (uint16_t)offset | flag; + node->data = (u16)offset | flag; node->child = 0; node->next = 0; return 0; } -static struct xbc_node * __init xbc_add_node(char *data, uint32_t flag) +static struct xbc_node * __init xbc_add_node(char *data, u32 flag) { struct xbc_node *node; @@ -437,7 +385,7 @@ static inline __init struct xbc_node *xbc_last_child(struct xbc_node *node) return node; } -static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, bool head) +static struct xbc_node * __init __xbc_add_sibling(char *data, u32 flag, bool head) { struct xbc_node *sib, *node = xbc_add_node(data, flag); @@ -464,17 +412,17 @@ static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, boo return node; } -static inline struct xbc_node * __init xbc_add_sibling(char *data, uint32_t flag) +static inline struct xbc_node * __init xbc_add_sibling(char *data, u32 flag) { return __xbc_add_sibling(data, flag, false); } -static inline struct xbc_node * __init xbc_add_head_sibling(char *data, uint32_t flag) +static inline struct xbc_node * __init xbc_add_head_sibling(char *data, u32 flag) { return __xbc_add_sibling(data, flag, true); } -static inline __init struct xbc_node *xbc_add_child(char *data, uint32_t flag) +static inline __init struct xbc_node *xbc_add_child(char *data, u32 flag) { struct xbc_node *node = xbc_add_sibling(data, flag); @@ -832,14 +780,72 @@ static int __init xbc_verify_tree(void) return 0; } -/* Need to setup xbc_data and xbc_nodes before call this. */ -static int __init xbc_parse_tree(void) +/** + * xbc_destroy_all() - Clean up all parsed bootconfig + * + * This clears all data structures of parsed bootconfig on memory. + * If you need to reuse xbc_init() with new boot config, you can + * use this. + */ +void __init xbc_destroy_all(void) +{ + xbc_data = NULL; + xbc_data_size = 0; + xbc_node_num = 0; + memblock_free_ptr(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX); + xbc_nodes = NULL; + brace_index = 0; +} + +/** + * xbc_init() - Parse given XBC file and build XBC internal tree + * @buf: boot config text + * @emsg: A pointer of const char * to store the error message + * @epos: A pointer of int to store the error position + * + * This parses the boot config text in @buf. @buf must be a + * null terminated string and smaller than XBC_DATA_MAX. + * Return the number of stored nodes (>0) if succeeded, or -errno + * if there is any error. + * In error cases, @emsg will be updated with an error message and + * @epos will be updated with the error position which is the byte offset + * of @buf. If the error is not a parser error, @epos will be -1. + */ +int __init xbc_init(char *buf, const char **emsg, int *epos) { char *p, *q; - int ret = 0, c; + int ret, c; + if (epos) + *epos = -1; + + if (xbc_data) { + if (emsg) + *emsg = "Bootconfig is already initialized"; + return -EBUSY; + } + + ret = strlen(buf); + if (ret > XBC_DATA_MAX - 1 || ret == 0) { + if (emsg) + *emsg = ret ? "Config data is too big" : + "Config data is empty"; + return -ERANGE; + } + + xbc_nodes = memblock_alloc(sizeof(struct xbc_node) * XBC_NODE_MAX, + SMP_CACHE_BYTES); + if (!xbc_nodes) { + if (emsg) + *emsg = "Failed to allocate bootconfig nodes"; + return -ENOMEM; + } + memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX); + xbc_data = buf; + xbc_data_size = ret + 1; last_parent = NULL; - p = xbc_data; + + p = buf; do { q = strpbrk(p, "{}=+;:\n#"); if (!q) { @@ -881,81 +887,6 @@ static int __init xbc_parse_tree(void) } } while (!ret); - return ret; -} - -/** - * xbc_exit() - Clean up all parsed bootconfig - * - * This clears all data structures of parsed bootconfig on memory. - * If you need to reuse xbc_init() with new boot config, you can - * use this. - */ -void __init xbc_exit(void) -{ - xbc_free_mem(xbc_data, xbc_data_size); - xbc_data = NULL; - xbc_data_size = 0; - xbc_node_num = 0; - xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX); - xbc_nodes = NULL; - brace_index = 0; -} - -/** - * xbc_init() - Parse given XBC file and build XBC internal tree - * @data: The boot config text original data - * @size: The size of @data - * @emsg: A pointer of const char * to store the error message - * @epos: A pointer of int to store the error position - * - * This parses the boot config text in @data. @size must be smaller - * than XBC_DATA_MAX. - * Return the number of stored nodes (>0) if succeeded, or -errno - * if there is any error. - * In error cases, @emsg will be updated with an error message and - * @epos will be updated with the error position which is the byte offset - * of @buf. If the error is not a parser error, @epos will be -1. - */ -int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos) -{ - int ret; - - if (epos) - *epos = -1; - - if (xbc_data) { - if (emsg) - *emsg = "Bootconfig is already initialized"; - return -EBUSY; - } - if (size > XBC_DATA_MAX || size == 0) { - if (emsg) - *emsg = size ? "Config data is too big" : - "Config data is empty"; - return -ERANGE; - } - - xbc_data = xbc_alloc_mem(size + 1); - if (!xbc_data) { - if (emsg) - *emsg = "Failed to allocate bootconfig data"; - return -ENOMEM; - } - memcpy(xbc_data, data, size); - xbc_data[size] = '\0'; - xbc_data_size = size + 1; - - xbc_nodes = xbc_alloc_mem(sizeof(struct xbc_node) * XBC_NODE_MAX); - if (!xbc_nodes) { - if (emsg) - *emsg = "Failed to allocate bootconfig nodes"; - xbc_exit(); - return -ENOMEM; - } - memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX); - - ret = xbc_parse_tree(); if (!ret) ret = xbc_verify_tree(); @@ -964,9 +895,27 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos) *epos = xbc_err_pos; if (emsg) *emsg = xbc_err_msg; - xbc_exit(); + xbc_destroy_all(); } else ret = xbc_node_num; return ret; } + +/** + * xbc_debug_dump() - Dump current XBC node list + * + * Dump the current XBC node list on printk buffer for debug. + */ +void __init xbc_debug_dump(void) +{ + int i; + + for (i = 0; i < xbc_node_num; i++) { + pr_debug("[%d] %s (%s) .next=%d, .child=%d .parent=%d\n", i, + xbc_node_get_data(xbc_nodes + i), + xbc_node_is_value(xbc_nodes + i) ? "value" : "key", + xbc_nodes[i].next, xbc_nodes[i].child, + xbc_nodes[i].parent); + } +} diff --git a/lib/compat_audit.c b/lib/compat_audit.c index 3d6b8996f0..77eabad69b 100644 --- a/lib/compat_audit.c +++ b/lib/compat_audit.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include unsigned compat_dir_class[] = { @@ -34,23 +33,19 @@ int audit_classify_compat_syscall(int abi, unsigned syscall) switch (syscall) { #ifdef __NR_open case __NR_open: - return AUDITSC_OPEN; + return 2; #endif #ifdef __NR_openat case __NR_openat: - return AUDITSC_OPENAT; + return 3; #endif #ifdef __NR_socketcall case __NR_socketcall: - return AUDITSC_SOCKETCALL; + return 4; #endif case __NR_execve: - return AUDITSC_EXECVE; -#ifdef __NR_openat2 - case __NR_openat2: - return AUDITSC_OPENAT2; -#endif + return 5; default: - return AUDITSC_COMPAT; + return 1; } } diff --git a/lib/cpumask.c b/lib/cpumask.c index a971a82d2f..c3c76b8333 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -188,7 +188,7 @@ EXPORT_SYMBOL(free_cpumask_var); */ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { - memblock_free(mask, cpumask_size()); + memblock_free_early(__pa(mask), cpumask_size()); } #endif diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index e8e525650c..545ccbddf6 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Crypto library routines" +comment "Crypto library routines" config CRYPTO_LIB_AES tristate @@ -9,14 +9,14 @@ config CRYPTO_LIB_ARC4 tristate config CRYPTO_ARCH_HAVE_LIB_BLAKE2S - bool + tristate help Declares whether the architecture provides an arch-specific accelerated implementation of the Blake2s library interface, either builtin or as a module. config CRYPTO_LIB_BLAKE2S_GENERIC - def_bool !CRYPTO_ARCH_HAVE_LIB_BLAKE2S + tristate help This symbol can be depended upon by arch implementations of the Blake2s library interface that require the generic code as a @@ -24,6 +24,15 @@ config CRYPTO_LIB_BLAKE2S_GENERIC implementation is enabled, this implementation serves the users of CRYPTO_LIB_BLAKE2S. +config CRYPTO_LIB_BLAKE2S + tristate "BLAKE2s hash function library" + depends on CRYPTO_ARCH_HAVE_LIB_BLAKE2S || !CRYPTO_ARCH_HAVE_LIB_BLAKE2S + select CRYPTO_LIB_BLAKE2S_GENERIC if CRYPTO_ARCH_HAVE_LIB_BLAKE2S=n + help + Enable the Blake2s library interface. This interface may be fulfilled + by either the generic implementation or an arch-specific one, if one + is available and enabled. + config CRYPTO_ARCH_HAVE_LIB_CHACHA tristate help @@ -33,7 +42,7 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA config CRYPTO_LIB_CHACHA_GENERIC tristate - select XOR_BLOCKS + select CRYPTO_ALGAPI help This symbol can be depended upon by arch implementations of the ChaCha library interface that require the generic code as a @@ -43,7 +52,6 @@ config CRYPTO_LIB_CHACHA_GENERIC config CRYPTO_LIB_CHACHA tristate "ChaCha library interface" - depends on CRYPTO depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n help @@ -115,15 +123,11 @@ config CRYPTO_LIB_CHACHA20POLY1305 tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)" depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 - depends on CRYPTO select CRYPTO_LIB_CHACHA select CRYPTO_LIB_POLY1305 - select CRYPTO_ALGAPI config CRYPTO_LIB_SHA256 tristate config CRYPTO_LIB_SM4 tristate - -endmenu diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index ed43a41f2d..73205ed269 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -10,10 +10,11 @@ libaes-y := aes.o obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o libarc4-y := arc4.o -# blake2s is used by the /dev/random driver which is always builtin -obj-y += libblake2s.o -libblake2s-y := blake2s.o -libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o +obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += libblake2s-generic.o +libblake2s-generic-y += blake2s-generic.o + +obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o +libblake2s-y += blake2s.o obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o libchacha20poly1305-y += chacha20poly1305.o diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c index 75ccb3e633..04ff8df245 100644 --- a/lib/crypto/blake2s-generic.c +++ b/lib/crypto/blake2s-generic.c @@ -37,11 +37,7 @@ static inline void blake2s_increment_counter(struct blake2s_state *state, state->t[1] += (state->t[0] < inc); } -void blake2s_compress(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc) - __weak __alias(blake2s_compress_generic); - -void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, +void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, size_t nblocks, const u32 inc) { u32 m[16]; diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c index 409e4b7287..5d9ea53be9 100644 --- a/lib/crypto/blake2s-selftest.c +++ b/lib/crypto/blake2s-selftest.c @@ -15,6 +15,7 @@ * #include * * #include + * #include * * #define BLAKE2S_TESTVEC_COUNT 256 * @@ -57,6 +58,16 @@ * } * printf("};\n\n"); * + * printf("static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n"); + * + * HMAC(EVP_blake2s256(), key, sizeof(key), buf, sizeof(buf), hash, NULL); + * print_vec(hash, BLAKE2S_OUTBYTES); + * + * HMAC(EVP_blake2s256(), buf, sizeof(buf), key, sizeof(key), hash, NULL); + * print_vec(hash, BLAKE2S_OUTBYTES); + * + * printf("};\n"); + * * return 0; *} */ @@ -543,6 +554,15 @@ static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, }; +static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { + { 0xce, 0xe1, 0x57, 0x69, 0x82, 0xdc, 0xbf, 0x43, 0xad, 0x56, 0x4c, 0x70, + 0xed, 0x68, 0x16, 0x96, 0xcf, 0xa4, 0x73, 0xe8, 0xe8, 0xfc, 0x32, 0x79, + 0x08, 0x0a, 0x75, 0x82, 0xda, 0x3f, 0x05, 0x11, }, + { 0x77, 0x2f, 0x0c, 0x71, 0x41, 0xf4, 0x4b, 0x2b, 0xb3, 0xc6, 0xb6, 0xf9, + 0x60, 0xde, 0xe4, 0x52, 0x38, 0x66, 0xe8, 0xbf, 0x9b, 0x96, 0xc4, 0x9f, + 0x60, 0xd9, 0x24, 0x37, 0x99, 0xd6, 0xec, 0x31, }, +}; + bool __init blake2s_selftest(void) { u8 key[BLAKE2S_KEY_SIZE]; @@ -587,5 +607,16 @@ bool __init blake2s_selftest(void) } } + if (success) { + blake2s256_hmac(hash, buf, key, sizeof(buf), sizeof(key)); + success &= !memcmp(hash, blake2s_hmac_testvecs[0], BLAKE2S_HASH_SIZE); + + blake2s256_hmac(hash, key, buf, sizeof(key), sizeof(buf)); + success &= !memcmp(hash, blake2s_hmac_testvecs[1], BLAKE2S_HASH_SIZE); + + if (!success) + pr_err("blake2s256_hmac self-test: FAIL\n"); + } + return success; } diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index c71c09621c..4055aa593e 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -16,20 +16,63 @@ #include #include +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S) +# define blake2s_compress blake2s_compress_arch +#else +# define blake2s_compress blake2s_compress_generic +#endif + void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) { - __blake2s_update(state, in, inlen, false); + __blake2s_update(state, in, inlen, blake2s_compress); } EXPORT_SYMBOL(blake2s_update); void blake2s_final(struct blake2s_state *state, u8 *out) { WARN_ON(IS_ENABLED(DEBUG) && !out); - __blake2s_final(state, out, false); + __blake2s_final(state, out, blake2s_compress); memzero_explicit(state, sizeof(*state)); } EXPORT_SYMBOL(blake2s_final); +void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, + const size_t keylen) +{ + struct blake2s_state state; + u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 }; + u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32)); + int i; + + if (keylen > BLAKE2S_BLOCK_SIZE) { + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, key, keylen); + blake2s_final(&state, x_key); + } else + memcpy(x_key, key, keylen); + + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) + x_key[i] ^= 0x36; + + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); + blake2s_update(&state, in, inlen); + blake2s_final(&state, i_hash); + + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) + x_key[i] ^= 0x5c ^ 0x36; + + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); + blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE); + blake2s_final(&state, i_hash); + + memcpy(out, i_hash, BLAKE2S_HASH_SIZE); + memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE); + memzero_explicit(i_hash, BLAKE2S_HASH_SIZE); +} +EXPORT_SYMBOL(blake2s256_hmac); + static int __init blake2s_mod_init(void) { if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c index a512b99ae1..6b629ab31c 100644 --- a/lib/decompress_unzstd.c +++ b/lib/decompress_unzstd.c @@ -68,7 +68,11 @@ #ifdef STATIC # define UNZSTD_PREBOOT # include "xxhash.c" -# include "zstd/decompress_sources.h" +# include "zstd/entropy_common.c" +# include "zstd/fse_decompress.c" +# include "zstd/huf_decompress.c" +# include "zstd/zstd_common.c" +# include "zstd/decompress.c" #endif #include @@ -87,15 +91,11 @@ static int INIT handle_zstd_error(size_t ret, void (*error)(char *x)) { - const zstd_error_code err = zstd_get_error_code(ret); + const int err = ZSTD_getErrorCode(ret); - if (!zstd_is_error(ret)) + if (!ZSTD_isError(ret)) return 0; - /* - * zstd_get_error_name() cannot be used because error takes a char * - * not a const char * - */ switch (err) { case ZSTD_error_memory_allocation: error("ZSTD decompressor ran out of memory"); @@ -124,28 +124,28 @@ static int INIT decompress_single(const u8 *in_buf, long in_len, u8 *out_buf, long out_len, long *in_pos, void (*error)(char *x)) { - const size_t wksp_size = zstd_dctx_workspace_bound(); + const size_t wksp_size = ZSTD_DCtxWorkspaceBound(); void *wksp = large_malloc(wksp_size); - zstd_dctx *dctx = zstd_init_dctx(wksp, wksp_size); + ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size); int err; size_t ret; if (dctx == NULL) { - error("Out of memory while allocating zstd_dctx"); + error("Out of memory while allocating ZSTD_DCtx"); err = -1; goto out; } /* * Find out how large the frame actually is, there may be junk at - * the end of the frame that zstd_decompress_dctx() can't handle. + * the end of the frame that ZSTD_decompressDCtx() can't handle. */ - ret = zstd_find_frame_compressed_size(in_buf, in_len); + ret = ZSTD_findFrameCompressedSize(in_buf, in_len); err = handle_zstd_error(ret, error); if (err) goto out; in_len = (long)ret; - ret = zstd_decompress_dctx(dctx, out_buf, out_len, in_buf, in_len); + ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len); err = handle_zstd_error(ret, error); if (err) goto out; @@ -167,14 +167,14 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, long *in_pos, void (*error)(char *x)) { - zstd_in_buffer in; - zstd_out_buffer out; - zstd_frame_header header; + ZSTD_inBuffer in; + ZSTD_outBuffer out; + ZSTD_frameParams params; void *in_allocated = NULL; void *out_allocated = NULL; void *wksp = NULL; size_t wksp_size; - zstd_dstream *dstream; + ZSTD_DStream *dstream; int err; size_t ret; @@ -238,13 +238,13 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, out.size = out_len; /* - * We need to know the window size to allocate the zstd_dstream. + * We need to know the window size to allocate the ZSTD_DStream. * Since we are streaming, we need to allocate a buffer for the sliding * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX * (8 MB), so it is important to use the actual value so as not to * waste memory when it is smaller. */ - ret = zstd_get_frame_header(&header, in.src, in.size); + ret = ZSTD_getFrameParams(¶ms, in.src, in.size); err = handle_zstd_error(ret, error); if (err) goto out; @@ -253,19 +253,19 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, err = -1; goto out; } - if (header.windowSize > ZSTD_WINDOWSIZE_MAX) { + if (params.windowSize > ZSTD_WINDOWSIZE_MAX) { error("ZSTD-compressed data has too large a window size"); err = -1; goto out; } /* - * Allocate the zstd_dstream now that we know how much memory is + * Allocate the ZSTD_DStream now that we know how much memory is * required. */ - wksp_size = zstd_dstream_workspace_bound(header.windowSize); + wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize); wksp = large_malloc(wksp_size); - dstream = zstd_init_dstream(header.windowSize, wksp, wksp_size); + dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size); if (dstream == NULL) { error("Out of memory while allocating ZSTD_DStream"); err = -1; @@ -298,7 +298,7 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, in.size = in_len; } /* Returns zero when the frame is complete. */ - ret = zstd_decompress_stream(dstream, &out, &in); + ret = ZSTD_decompressStream(dstream, &out, &in); err = handle_zstd_error(ret, error); if (err) goto out; diff --git a/lib/devres.c b/lib/devres.c index 14664bbb48..b0e1c6702c 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -528,85 +528,3 @@ void pcim_iounmap_regions(struct pci_dev *pdev, int mask) } EXPORT_SYMBOL(pcim_iounmap_regions); #endif /* CONFIG_PCI */ - -static void devm_arch_phys_ac_add_release(struct device *dev, void *res) -{ - arch_phys_wc_del(*((int *)res)); -} - -/** - * devm_arch_phys_wc_add - Managed arch_phys_wc_add() - * @dev: Managed device - * @base: Memory base address - * @size: Size of memory range - * - * Adds a WC MTRR using arch_phys_wc_add() and sets up a release callback. - * See arch_phys_wc_add() for more information. - */ -int devm_arch_phys_wc_add(struct device *dev, unsigned long base, unsigned long size) -{ - int *mtrr; - int ret; - - mtrr = devres_alloc(devm_arch_phys_ac_add_release, sizeof(*mtrr), GFP_KERNEL); - if (!mtrr) - return -ENOMEM; - - ret = arch_phys_wc_add(base, size); - if (ret < 0) { - devres_free(mtrr); - return ret; - } - - *mtrr = ret; - devres_add(dev, mtrr); - - return ret; -} -EXPORT_SYMBOL(devm_arch_phys_wc_add); - -struct arch_io_reserve_memtype_wc_devres { - resource_size_t start; - resource_size_t size; -}; - -static void devm_arch_io_free_memtype_wc_release(struct device *dev, void *res) -{ - const struct arch_io_reserve_memtype_wc_devres *this = res; - - arch_io_free_memtype_wc(this->start, this->size); -} - -/** - * devm_arch_io_reserve_memtype_wc - Managed arch_io_reserve_memtype_wc() - * @dev: Managed device - * @start: Memory base address - * @size: Size of memory range - * - * Reserves a memory range with WC caching using arch_io_reserve_memtype_wc() - * and sets up a release callback See arch_io_reserve_memtype_wc() for more - * information. - */ -int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start, - resource_size_t size) -{ - struct arch_io_reserve_memtype_wc_devres *dr; - int ret; - - dr = devres_alloc(devm_arch_io_free_memtype_wc_release, sizeof(*dr), GFP_KERNEL); - if (!dr) - return -ENOMEM; - - ret = arch_io_reserve_memtype_wc(start, size); - if (ret < 0) { - devres_free(dr); - return ret; - } - - dr->start = start; - dr->size = size; - devres_add(dev, dr); - - return ret; -} -EXPORT_SYMBOL(devm_arch_io_reserve_memtype_wc); diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index dd7f56af9a..84c16309cc 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -71,8 +71,6 @@ static DEFINE_MUTEX(ddebug_lock); static LIST_HEAD(ddebug_tables); static int verbose; module_param(verbose, int, 0644); -MODULE_PARM_DESC(verbose, " dynamic_debug/control processing " - "( 0 = off (default), 1 = module add/rm, 2 = >control summary, 3 = parsing, 4 = per-site changes)"); /* Return the path relative to source root */ static inline const char *trim_prefix(const char *path) @@ -120,8 +118,6 @@ do { \ #define vpr_info(fmt, ...) vnpr_info(1, fmt, ##__VA_ARGS__) #define v2pr_info(fmt, ...) vnpr_info(2, fmt, ##__VA_ARGS__) -#define v3pr_info(fmt, ...) vnpr_info(3, fmt, ##__VA_ARGS__) -#define v4pr_info(fmt, ...) vnpr_info(4, fmt, ##__VA_ARGS__) static void vpr_info_dq(const struct ddebug_query *query, const char *msg) { @@ -134,7 +130,7 @@ static void vpr_info_dq(const struct ddebug_query *query, const char *msg) fmtlen--; } - v3pr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u\n", + vpr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u\n", msg, query->function ?: "", query->filename ?: "", @@ -217,7 +213,7 @@ static int ddebug_change(const struct ddebug_query *query, static_branch_enable(&dp->key.dd_key_true); #endif dp->flags = newflags; - v4pr_info("changed %s:%d [%s]%s =%s\n", + v2pr_info("changed %s:%d [%s]%s =%s\n", trim_prefix(dp->filename), dp->lineno, dt->mod_name, dp->function, ddebug_describe_flags(dp->flags, &fbuf)); @@ -277,7 +273,7 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) buf = end; } - if (verbose >= 3) { + if (verbose) { int i; pr_info("split into words:"); for (i = 0; i < nwords; i++) @@ -337,7 +333,7 @@ static int parse_linerange(struct ddebug_query *query, const char *first) } else { query->last_lineno = query->first_lineno; } - v3pr_info("parsed line %d-%d\n", query->first_lineno, + vpr_info("parsed line %d-%d\n", query->first_lineno, query->last_lineno); return 0; } @@ -451,7 +447,7 @@ static int ddebug_parse_flags(const char *str, struct flag_settings *modifiers) pr_err("bad flag-op %c, at start of %s\n", *str, str); return -EINVAL; } - v3pr_info("op='%c'\n", op); + vpr_info("op='%c'\n", op); for (; *str ; ++str) { for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) { @@ -465,7 +461,7 @@ static int ddebug_parse_flags(const char *str, struct flag_settings *modifiers) return -EINVAL; } } - v3pr_info("flags=0x%x\n", modifiers->flags); + vpr_info("flags=0x%x\n", modifiers->flags); /* calculate final flags, mask based upon op */ switch (op) { @@ -481,7 +477,7 @@ static int ddebug_parse_flags(const char *str, struct flag_settings *modifiers) modifiers->flags = 0; break; } - v3pr_info("*flagsp=0x%x *maskp=0x%x\n", modifiers->flags, modifiers->mask); + vpr_info("*flagsp=0x%x *maskp=0x%x\n", modifiers->flags, modifiers->mask); return 0; } @@ -533,7 +529,7 @@ static int ddebug_exec_queries(char *query, const char *modname) if (!query || !*query || *query == '#') continue; - vpr_info("query %d: \"%s\" mod:%s\n", i, query, modname ?: "*"); + vpr_info("query %d: \"%s\"\n", i, query); rc = ddebug_exec_query(query, modname); if (rc < 0) { @@ -544,9 +540,8 @@ static int ddebug_exec_queries(char *query, const char *modname) } i++; } - if (i) - v2pr_info("processed %d queries, with %d matches, %d errs\n", - i, nfound, errs); + vpr_info("processed %d queries, with %d matches, %d errs\n", + i, nfound, errs); if (exitcode) return exitcode; @@ -751,6 +746,21 @@ EXPORT_SYMBOL(__dynamic_ibdev_dbg); #endif +#define DDEBUG_STRING_SIZE 1024 +static __initdata char ddebug_setup_string[DDEBUG_STRING_SIZE]; + +static __init int ddebug_setup_query(char *str) +{ + if (strlen(str) >= DDEBUG_STRING_SIZE) { + pr_warn("ddebug boot param string too large\n"); + return 0; + } + strlcpy(ddebug_setup_string, str, DDEBUG_STRING_SIZE); + return 1; +} + +__setup("ddebug_query=", ddebug_setup_query); + /* * Install a noop handler to make dyndbg look like a normal kernel cli param. * This avoids warnings about dyndbg being an unknown cli param when supplied @@ -783,7 +793,7 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, tmpbuf = memdup_user_nul(ubuf, len); if (IS_ERR(tmpbuf)) return PTR_ERR(tmpbuf); - v2pr_info("read %zu bytes from userspace\n", len); + vpr_info("read %d bytes from userspace\n", (int)len); ret = ddebug_exec_queries(tmpbuf, NULL); kfree(tmpbuf); @@ -971,7 +981,7 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, list_add(&dt->link, &ddebug_tables); mutex_unlock(&ddebug_lock); - vpr_info("%3u debug prints in module %s\n", n, dt->mod_name); + v2pr_info("%3u debug prints in module %s\n", n, dt->mod_name); return 0; } @@ -1030,6 +1040,8 @@ int ddebug_remove_module(const char *mod_name) struct ddebug_table *dt, *nextdt; int ret = -ENOENT; + v2pr_info("removing module \"%s\"\n", mod_name); + mutex_lock(&ddebug_lock); list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) { if (dt->mod_name == mod_name) { @@ -1039,8 +1051,6 @@ int ddebug_remove_module(const char *mod_name) } } mutex_unlock(&ddebug_lock); - if (!ret) - v2pr_info("removed module \"%s\"\n", mod_name); return ret; } @@ -1123,6 +1133,16 @@ static int __init dynamic_debug_init(void) entries, modct, (int)((modct * sizeof(struct ddebug_table)) >> 10), (int)((entries * sizeof(struct _ddebug)) >> 10)); + /* apply ddebug_query boot param, dont unload tables on err */ + if (ddebug_setup_string[0] != '\0') { + pr_warn("ddebug_query param name is deprecated, change it to dyndbg\n"); + ret = ddebug_exec_queries(ddebug_setup_string, NULL); + if (ret < 0) + pr_warn("Invalid ddebug boot param %s\n", + ddebug_setup_string); + else + pr_info("%d changes by ddebug_query\n", ret); + } /* now that ddebug tables are loaded, process all boot args * again to find and activate queries given in dyndbg params. * While this has already been done for known boot params, it diff --git a/lib/error-inject.c b/lib/error-inject.c index 2ff5ef689d..c73651b15b 100644 --- a/lib/error-inject.c +++ b/lib/error-inject.c @@ -8,7 +8,6 @@ #include #include #include -#include /* Whitelist of symbols that can be overridden for error injection. */ static LIST_HEAD(error_injection_list); @@ -65,7 +64,7 @@ static void populate_error_injection_list(struct error_injection_entry *start, mutex_lock(&ei_mutex); for (iter = start; iter < end; iter++) { - entry = (unsigned long)dereference_symbol_descriptor((void *)iter->addr); + entry = arch_deref_entry_point((void *)iter->addr); if (!kernel_text_address(entry) || !kallsyms_lookup_size_offset(entry, &size, &offset)) { diff --git a/lib/find_bit.c b/lib/find_bit.c index 1b8e4b2a9c..0f8e2e369b 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -89,27 +89,6 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) EXPORT_SYMBOL(_find_first_bit); #endif -#ifndef find_first_and_bit -/* - * Find the first set bit in two memory regions. - */ -unsigned long _find_first_and_bit(const unsigned long *addr1, - const unsigned long *addr2, - unsigned long size) -{ - unsigned long idx, val; - - for (idx = 0; idx * BITS_PER_LONG < size; idx++) { - val = addr1[idx] & addr2[idx]; - if (val) - return min(idx * BITS_PER_LONG + __ffs(val), size); - } - - return size; -} -EXPORT_SYMBOL(_find_first_and_bit); -#endif - #ifndef find_first_zero_bit /* * Find the first cleared bit in a memory region. diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index db904b57d4..5637c5711d 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -49,25 +49,6 @@ static int __init test_find_first_bit(void *bitmap, unsigned long len) return 0; } -static int __init test_find_first_and_bit(void *bitmap, const void *bitmap2, unsigned long len) -{ - static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata; - unsigned long i, cnt; - ktime_t time; - - bitmap_copy(cp, bitmap, BITMAP_LEN); - - time = ktime_get(); - for (cnt = i = 0; i < len; cnt++) { - i = find_first_and_bit(cp, bitmap2, len); - __clear_bit(i, cp); - } - time = ktime_get() - time; - pr_err("find_first_and_bit: %18llu ns, %6ld iterations\n", time, cnt); - - return 0; -} - static int __init test_find_next_bit(const void *bitmap, unsigned long len) { unsigned long i, cnt; @@ -148,7 +129,6 @@ static int __init find_bit_test(void) * traverse only part of bitmap to avoid soft lockup. */ test_find_first_bit(bitmap, BITMAP_LEN / 10); - test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN / 2); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); pr_err("\nStart testing find_bit() with sparse bitmap\n"); @@ -165,7 +145,6 @@ static int __init find_bit_test(void) test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); test_find_first_bit(bitmap, BITMAP_LEN); - test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); /* diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index 53e7eb1dd7..4515439375 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -217,12 +217,11 @@ static void fprop_reflect_period_percpu(struct fprop_global *p, } /* Event of type pl happened */ -void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, - long nr) +void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) { fprop_reflect_period_percpu(p, pl); - percpu_counter_add_batch(&pl->events, nr, PROP_BATCH); - percpu_counter_add(&p->events, nr); + percpu_counter_add_batch(&pl->events, 1, PROP_BATCH); + percpu_counter_add(&p->events, 1); } void fprop_fraction_percpu(struct fprop_global *p, @@ -254,29 +253,20 @@ void fprop_fraction_percpu(struct fprop_global *p, } /* - * Like __fprop_add_percpu() except that event is counted only if the given + * Like __fprop_inc_percpu() except that event is counted only if the given * type has fraction smaller than @max_frac/FPROP_FRAC_BASE */ -void __fprop_add_percpu_max(struct fprop_global *p, - struct fprop_local_percpu *pl, int max_frac, long nr) +void __fprop_inc_percpu_max(struct fprop_global *p, + struct fprop_local_percpu *pl, int max_frac) { if (unlikely(max_frac < FPROP_FRAC_BASE)) { unsigned long numerator, denominator; - s64 tmp; fprop_fraction_percpu(p, pl, &numerator, &denominator); - /* Adding 'nr' to fraction exceeds max_frac/FPROP_FRAC_BASE? */ - tmp = (u64)denominator * max_frac - - ((u64)numerator << FPROP_FRAC_SHIFT); - if (tmp < 0) { - /* Maximum fraction already exceeded? */ + if (numerator > + (((u64)denominator) * max_frac) >> FPROP_FRAC_SHIFT) return; - } else if (tmp < nr * (FPROP_FRAC_BASE - max_frac)) { - /* Add just enough for the fraction to saturate */ - nr = div_u64(tmp + FPROP_FRAC_BASE - max_frac - 1, - FPROP_FRAC_BASE - max_frac); - } } - __fprop_add_percpu(p, pl, nr); + __fprop_inc_percpu(p, pl); } diff --git a/lib/genalloc.c b/lib/genalloc.c index 00fc50d0a6..9a57257988 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -251,7 +251,7 @@ void gen_pool_destroy(struct gen_pool *pool) list_del(&chunk->next_chunk); end_bit = chunk_size(chunk) >> order; - bit = find_first_bit(chunk->bits, end_bit); + bit = find_next_bit(chunk->bits, end_bit, 0); BUG_ON(bit < end_bit); vfree(chunk); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 6dd5330f7a..c5b2f0f4b8 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -69,40 +69,42 @@ #define iterate_xarray(i, n, base, len, __off, STEP) { \ __label__ __out; \ size_t __off = 0; \ - struct folio *folio; \ + struct page *head = NULL; \ loff_t start = i->xarray_start + i->iov_offset; \ + unsigned offset = start % PAGE_SIZE; \ pgoff_t index = start / PAGE_SIZE; \ + int j; \ + \ XA_STATE(xas, i->xarray, index); \ \ - len = PAGE_SIZE - offset_in_page(start); \ rcu_read_lock(); \ - xas_for_each(&xas, folio, ULONG_MAX) { \ + xas_for_each(&xas, head, ULONG_MAX) { \ unsigned left; \ - size_t offset; \ - if (xas_retry(&xas, folio)) \ + if (xas_retry(&xas, head)) \ continue; \ - if (WARN_ON(xa_is_value(folio))) \ + if (WARN_ON(xa_is_value(head))) \ break; \ - if (WARN_ON(folio_test_hugetlb(folio))) \ + if (WARN_ON(PageHuge(head))) \ break; \ - offset = offset_in_folio(folio, start + __off); \ - while (offset < folio_size(folio)) { \ - base = kmap_local_folio(folio, offset); \ + for (j = (head->index < index) ? index - head->index : 0; \ + j < thp_nr_pages(head); j++) { \ + void *kaddr = kmap_local_page(head + j); \ + base = kaddr + offset; \ + len = PAGE_SIZE - offset; \ len = min(n, len); \ left = (STEP); \ - kunmap_local(base); \ + kunmap_local(kaddr); \ len -= left; \ __off += len; \ n -= len; \ if (left || n == 0) \ goto __out; \ - offset += len; \ - len = PAGE_SIZE; \ + offset = 0; \ } \ } \ __out: \ rcu_read_unlock(); \ - i->iov_offset += __off; \ + i->iov_offset += __off; \ n = __off; \ } @@ -189,7 +191,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b buf = iov->iov_base + skip; copy = min(bytes, iov->iov_len - skip); - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) { + if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { kaddr = kmap_atomic(page); from = kaddr + offset; @@ -273,7 +275,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t buf = iov->iov_base + skip; copy = min(bytes, iov->iov_len - skip); - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) { + if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { kaddr = kmap_atomic(page); to = kaddr + offset; @@ -429,81 +431,35 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by } /* - * fault_in_iov_iter_readable - fault in iov iterator for reading - * @i: iterator - * @size: maximum length - * * Fault in one or more iovecs of the given iov_iter, to a maximum length of - * @size. For each iovec, fault in each page that constitutes the iovec. + * bytes. For each iovec, fault in each page that constitutes the iovec. * - * Returns the number of bytes not faulted in (like copy_to_user() and - * copy_from_user()). - * - * Always returns 0 for non-userspace iterators. + * Return 0 on success, or non-zero if the memory could not be accessed (i.e. + * because it is an invalid address). */ -size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size) +int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes) { if (iter_is_iovec(i)) { - size_t count = min(size, iov_iter_count(i)); const struct iovec *p; size_t skip; - size -= count; - for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) { - size_t len = min(count, p->iov_len - skip); - size_t ret; + if (bytes > i->count) + bytes = i->count; + for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) { + size_t len = min(bytes, p->iov_len - skip); + int err; if (unlikely(!len)) continue; - ret = fault_in_readable(p->iov_base + skip, len); - count -= len - ret; - if (ret) - break; + err = fault_in_pages_readable(p->iov_base + skip, len); + if (unlikely(err)) + return err; + bytes -= len; } - return count + size; } return 0; } -EXPORT_SYMBOL(fault_in_iov_iter_readable); - -/* - * fault_in_iov_iter_writeable - fault in iov iterator for writing - * @i: iterator - * @size: maximum length - * - * Faults in the iterator using get_user_pages(), i.e., without triggering - * hardware page faults. This is primarily useful when we already know that - * some or all of the pages in @i aren't in memory. - * - * Returns the number of bytes not faulted in, like copy_to_user() and - * copy_from_user(). - * - * Always returns 0 for non-user-space iterators. - */ -size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size) -{ - if (iter_is_iovec(i)) { - size_t count = min(size, iov_iter_count(i)); - const struct iovec *p; - size_t skip; - - size -= count; - for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) { - size_t len = min(count, p->iov_len - skip); - size_t ret; - - if (unlikely(!len)) - continue; - ret = fault_in_safe_writeable(p->iov_base + skip, len); - count -= len - ret; - if (ret) - break; - } - return count + size; - } - return 0; -} -EXPORT_SYMBOL(fault_in_iov_iter_writeable); +EXPORT_SYMBOL(iov_iter_fault_in_readable); void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, unsigned long nr_segs, @@ -512,7 +468,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter) { .iter_type = ITER_IOVEC, - .nofault = false, .data_source = direction, .iov = iov, .nr_segs = nr_segs, @@ -1528,17 +1483,13 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return 0; if (likely(iter_is_iovec(i))) { - unsigned int gup_flags = 0; unsigned long addr; - if (iov_iter_rw(i) != WRITE) - gup_flags |= FOLL_WRITE; - if (i->nofault) - gup_flags |= FOLL_NOFAULT; - addr = first_iovec_segment(i, &len, start, maxsize, maxpages); n = DIV_ROUND_UP(len, PAGE_SIZE); - res = get_user_pages_fast(addr, n, gup_flags, pages); + res = get_user_pages_fast(addr, n, + iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, + pages); if (unlikely(res <= 0)) return res; return (res == n ? len : res * PAGE_SIZE) - *start; @@ -1654,20 +1605,15 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, return 0; if (likely(iter_is_iovec(i))) { - unsigned int gup_flags = 0; unsigned long addr; - if (iov_iter_rw(i) != WRITE) - gup_flags |= FOLL_WRITE; - if (i->nofault) - gup_flags |= FOLL_NOFAULT; - addr = first_iovec_segment(i, &len, start, maxsize, ~0U); n = DIV_ROUND_UP(len, PAGE_SIZE); p = get_pages_array(n); if (!p) return -ENOMEM; - res = get_user_pages_fast(addr, n, gup_flags, p); + res = get_user_pages_fast(addr, n, + iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); if (unlikely(res <= 0)) { kvfree(p); *pages = NULL; diff --git a/lib/kobject.c b/lib/kobject.c index 56fa037501..ea53b30cf4 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -65,7 +65,7 @@ void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) */ static int populate_dir(struct kobject *kobj) { - const struct kobj_type *t = get_ktype(kobj); + struct kobj_type *t = get_ktype(kobj); struct attribute *attr; int error = 0; int i; @@ -346,7 +346,7 @@ EXPORT_SYMBOL(kobject_set_name); * to kobject_put(), not by a call to kfree directly to ensure that all of * the memory is cleaned up properly. */ -void kobject_init(struct kobject *kobj, const struct kobj_type *ktype) +void kobject_init(struct kobject *kobj, struct kobj_type *ktype) { char *err_str; @@ -461,7 +461,7 @@ EXPORT_SYMBOL(kobject_add); * same type of error handling after a call to kobject_add() and kobject * lifetime rules are the same here. */ -int kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, +int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...) { va_list args; @@ -679,7 +679,7 @@ EXPORT_SYMBOL(kobject_get_unless_zero); static void kobject_cleanup(struct kobject *kobj) { struct kobject *parent = kobj->parent; - const struct kobj_type *t = get_ktype(kobj); + struct kobj_type *t = get_ktype(kobj); const char *name = kobj->name; pr_debug("kobject: '%s' (%p): %s, parent %p\n", @@ -777,7 +777,7 @@ static struct kobj_type dynamic_kobj_ktype = { * call to kobject_put() and not kfree(), as kobject_init() has * already been called on this structure. */ -static struct kobject *kobject_create(void) +struct kobject *kobject_create(void) { struct kobject *kobj; diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 7c44b7ae4c..c87d5b6a8a 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -501,7 +501,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, } /* skip the event, if the filter returns zero. */ if (uevent_ops && uevent_ops->filter) - if (!uevent_ops->filter(kobj)) { + if (!uevent_ops->filter(kset, kobj)) { pr_debug("kobject: '%s' (%p): %s: filter function " "caused the event to drop!\n", kobject_name(kobj), kobj, __func__); @@ -510,7 +510,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, /* originating subsystem */ if (uevent_ops && uevent_ops->name) - subsystem = uevent_ops->name(kobj); + subsystem = uevent_ops->name(kset, kobj); else subsystem = kobject_name(&kset->kobj); if (!subsystem) { @@ -554,7 +554,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, /* let the kset specific function add its stuff */ if (uevent_ops && uevent_ops->uevent) { - retval = uevent_ops->uevent(kobj, env); + retval = uevent_ops->uevent(kset, kobj, env); if (retval) { pr_debug("kobject: '%s' (%p): %s: uevent() returned " "%d\n", kobject_name(kobj), kobj, diff --git a/lib/kstrtox.c b/lib/kstrtox.c index 886510d248..059b8b00dc 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -22,7 +22,6 @@ #include "kstrtox.h" -noinline const char *_parse_integer_fixup_radix(const char *s, unsigned int *base) { if (*base == 0) { @@ -48,7 +47,6 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base) * * Don't you dare use this function. */ -noinline unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p, size_t max_chars) { @@ -87,7 +85,6 @@ unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned lon return rv; } -noinline unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p) { return _parse_integer_limit(s, base, p, INT_MAX); @@ -128,7 +125,6 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. * Preferred over simple_strtoull(). Return code must be checked. */ -noinline int kstrtoull(const char *s, unsigned int base, unsigned long long *res) { if (s[0] == '+') @@ -152,7 +148,6 @@ EXPORT_SYMBOL(kstrtoull); * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. * Preferred over simple_strtoll(). Return code must be checked. */ -noinline int kstrtoll(const char *s, unsigned int base, long long *res) { unsigned long long tmp; @@ -224,7 +219,6 @@ EXPORT_SYMBOL(_kstrtol); * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. * Preferred over simple_strtoul(). Return code must be checked. */ -noinline int kstrtouint(const char *s, unsigned int base, unsigned int *res) { unsigned long long tmp; @@ -255,7 +249,6 @@ EXPORT_SYMBOL(kstrtouint); * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. * Preferred over simple_strtol(). Return code must be checked. */ -noinline int kstrtoint(const char *s, unsigned int base, int *res) { long long tmp; @@ -271,7 +264,6 @@ int kstrtoint(const char *s, unsigned int base, int *res) } EXPORT_SYMBOL(kstrtoint); -noinline int kstrtou16(const char *s, unsigned int base, u16 *res) { unsigned long long tmp; @@ -287,7 +279,6 @@ int kstrtou16(const char *s, unsigned int base, u16 *res) } EXPORT_SYMBOL(kstrtou16); -noinline int kstrtos16(const char *s, unsigned int base, s16 *res) { long long tmp; @@ -303,7 +294,6 @@ int kstrtos16(const char *s, unsigned int base, s16 *res) } EXPORT_SYMBOL(kstrtos16); -noinline int kstrtou8(const char *s, unsigned int base, u8 *res) { unsigned long long tmp; @@ -319,7 +309,6 @@ int kstrtou8(const char *s, unsigned int base, u8 *res) } EXPORT_SYMBOL(kstrtou8); -noinline int kstrtos8(const char *s, unsigned int base, s8 *res) { long long tmp; @@ -344,7 +333,6 @@ EXPORT_SYMBOL(kstrtos8); * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value * pointed to by res is updated upon finding a match. */ -noinline int kstrtobool(const char *s, bool *res) { if (!s) diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 22640c9ee8..acd1de436f 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -15,89 +15,23 @@ extern struct kunit_suite * const * const __kunit_suites_end[]; #if IS_BUILTIN(CONFIG_KUNIT) static char *filter_glob_param; -static char *action_param; - module_param_named(filter_glob, filter_glob_param, charp, 0); MODULE_PARM_DESC(filter_glob, - "Filter which KUnit test suites/tests run at boot-time, e.g. list* or list*.*del_test"); -module_param_named(action, action_param, charp, 0); -MODULE_PARM_DESC(action, - "Changes KUnit executor behavior, valid values are:\n" - ": run the tests like normal\n" - "'list' to list test names instead of running them.\n"); - -/* glob_match() needs NULL terminated strings, so we need a copy of filter_glob_param. */ -struct kunit_test_filter { - char *suite_glob; - char *test_glob; -}; - -/* Split "suite_glob.test_glob" into two. Assumes filter_glob is not empty. */ -static void kunit_parse_filter_glob(struct kunit_test_filter *parsed, - const char *filter_glob) -{ - const int len = strlen(filter_glob); - const char *period = strchr(filter_glob, '.'); - - if (!period) { - parsed->suite_glob = kzalloc(len + 1, GFP_KERNEL); - parsed->test_glob = NULL; - strcpy(parsed->suite_glob, filter_glob); - return; - } - - parsed->suite_glob = kzalloc(period - filter_glob + 1, GFP_KERNEL); - parsed->test_glob = kzalloc(len - (period - filter_glob) + 1, GFP_KERNEL); - - strncpy(parsed->suite_glob, filter_glob, period - filter_glob); - strncpy(parsed->test_glob, period + 1, len - (period - filter_glob)); -} - -/* Create a copy of suite with only tests that match test_glob. */ -static struct kunit_suite * -kunit_filter_tests(struct kunit_suite *const suite, const char *test_glob) -{ - int n = 0; - struct kunit_case *filtered, *test_case; - struct kunit_suite *copy; - - kunit_suite_for_each_test_case(suite, test_case) { - if (!test_glob || glob_match(test_glob, test_case->name)) - ++n; - } - - if (n == 0) - return NULL; - - /* Use memcpy to workaround copy->name being const. */ - copy = kmalloc(sizeof(*copy), GFP_KERNEL); - memcpy(copy, suite, sizeof(*copy)); - - filtered = kcalloc(n + 1, sizeof(*filtered), GFP_KERNEL); - - n = 0; - kunit_suite_for_each_test_case(suite, test_case) { - if (!test_glob || glob_match(test_glob, test_case->name)) - filtered[n++] = *test_case; - } - - copy->test_cases = filtered; - return copy; -} + "Filter which KUnit test suites run at boot-time, e.g. list*"); static char *kunit_shutdown; core_param(kunit_shutdown, kunit_shutdown, charp, 0644); static struct kunit_suite * const * kunit_filter_subsuite(struct kunit_suite * const * const subsuite, - struct kunit_test_filter *filter) + const char *filter_glob) { int i, n = 0; - struct kunit_suite **filtered, *filtered_suite; + struct kunit_suite **filtered; n = 0; - for (i = 0; subsuite[i]; ++i) { - if (glob_match(filter->suite_glob, subsuite[i]->name)) + for (i = 0; subsuite[i] != NULL; ++i) { + if (glob_match(filter_glob, subsuite[i]->name)) ++n; } @@ -110,11 +44,8 @@ kunit_filter_subsuite(struct kunit_suite * const * const subsuite, n = 0; for (i = 0; subsuite[i] != NULL; ++i) { - if (!glob_match(filter->suite_glob, subsuite[i]->name)) - continue; - filtered_suite = kunit_filter_tests(subsuite[i], filter->test_glob); - if (filtered_suite) - filtered[n++] = filtered_suite; + if (glob_match(filter_glob, subsuite[i]->name)) + filtered[n++] = subsuite[i]; } filtered[n] = NULL; @@ -126,32 +57,12 @@ struct suite_set { struct kunit_suite * const * const *end; }; -static void kunit_free_subsuite(struct kunit_suite * const *subsuite) -{ - unsigned int i; - - for (i = 0; subsuite[i]; i++) - kfree(subsuite[i]); - - kfree(subsuite); -} - -static void kunit_free_suite_set(struct suite_set suite_set) -{ - struct kunit_suite * const * const *suites; - - for (suites = suite_set.start; suites < suite_set.end; suites++) - kunit_free_subsuite(*suites); - kfree(suite_set.start); -} - static struct suite_set kunit_filter_suites(const struct suite_set *suite_set, const char *filter_glob) { int i; struct kunit_suite * const **copy, * const *filtered_subsuite; struct suite_set filtered; - struct kunit_test_filter filter; const size_t max = suite_set->end - suite_set->start; @@ -162,17 +73,12 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set, return filtered; } - kunit_parse_filter_glob(&filter, filter_glob); - for (i = 0; i < max; ++i) { - filtered_subsuite = kunit_filter_subsuite(suite_set->start[i], &filter); + filtered_subsuite = kunit_filter_subsuite(suite_set->start[i], filter_glob); if (filtered_subsuite) *copy++ = filtered_subsuite; } filtered.end = copy; - - kfree(filter.suite_glob); - kfree(filter.test_glob); return filtered; } @@ -203,35 +109,9 @@ static void kunit_print_tap_header(struct suite_set *suite_set) pr_info("1..%d\n", num_of_suites); } -static void kunit_exec_run_tests(struct suite_set *suite_set) -{ - struct kunit_suite * const * const *suites; - - kunit_print_tap_header(suite_set); - - for (suites = suite_set->start; suites < suite_set->end; suites++) - __kunit_test_suites_init(*suites); -} - -static void kunit_exec_list_tests(struct suite_set *suite_set) -{ - unsigned int i; - struct kunit_suite * const * const *suites; - struct kunit_case *test_case; - - /* Hack: print a tap header so kunit.py can find the start of KUnit output. */ - pr_info("TAP version 14\n"); - - for (suites = suite_set->start; suites < suite_set->end; suites++) - for (i = 0; (*suites)[i] != NULL; i++) { - kunit_suite_for_each_test_case((*suites)[i], test_case) { - pr_info("%s.%s\n", (*suites)[i]->name, test_case->name); - } - } -} - int kunit_run_all_tests(void) { + struct kunit_suite * const * const *suites; struct suite_set suite_set = { .start = __kunit_suites_start, .end = __kunit_suites_end, @@ -240,15 +120,15 @@ int kunit_run_all_tests(void) if (filter_glob_param) suite_set = kunit_filter_suites(&suite_set, filter_glob_param); - if (!action_param) - kunit_exec_run_tests(&suite_set); - else if (strcmp(action_param, "list") == 0) - kunit_exec_list_tests(&suite_set); - else - pr_err("kunit executor: unknown action '%s'\n", action_param); + kunit_print_tap_header(&suite_set); + + for (suites = suite_set.start; suites < suite_set.end; suites++) + __kunit_test_suites_init(*suites); if (filter_glob_param) { /* a copy was made of each array */ - kunit_free_suite_set(suite_set); + for (suites = suite_set.start; suites < suite_set.end; suites++) + kfree(*suites); + kfree(suite_set.start); } kunit_handle_shutdown(); diff --git a/lib/kunit/executor_test.c b/lib/kunit/executor_test.c index 4ed57fd94e..e14a18af57 100644 --- a/lib/kunit/executor_test.c +++ b/lib/kunit/executor_test.c @@ -9,103 +9,38 @@ #include static void kfree_at_end(struct kunit *test, const void *to_free); -static void free_subsuite_at_end(struct kunit *test, - struct kunit_suite *const *to_free); static struct kunit_suite *alloc_fake_suite(struct kunit *test, - const char *suite_name, - struct kunit_case *test_cases); - -static void dummy_test(struct kunit *test) {} - -static struct kunit_case dummy_test_cases[] = { - /* .run_case is not important, just needs to be non-NULL */ - { .name = "test1", .run_case = dummy_test }, - { .name = "test2", .run_case = dummy_test }, - {}, -}; - -static void parse_filter_test(struct kunit *test) -{ - struct kunit_test_filter filter = {NULL, NULL}; - - kunit_parse_filter_glob(&filter, "suite"); - KUNIT_EXPECT_STREQ(test, filter.suite_glob, "suite"); - KUNIT_EXPECT_FALSE(test, filter.test_glob); - kfree(filter.suite_glob); - kfree(filter.test_glob); - - kunit_parse_filter_glob(&filter, "suite.test"); - KUNIT_EXPECT_STREQ(test, filter.suite_glob, "suite"); - KUNIT_EXPECT_STREQ(test, filter.test_glob, "test"); - kfree(filter.suite_glob); - kfree(filter.test_glob); -} + const char *suite_name); static void filter_subsuite_test(struct kunit *test) { struct kunit_suite *subsuite[3] = {NULL, NULL, NULL}; struct kunit_suite * const *filtered; - struct kunit_test_filter filter = { - .suite_glob = "suite2", - .test_glob = NULL, - }; - subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases); - subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases); + subsuite[0] = alloc_fake_suite(test, "suite1"); + subsuite[1] = alloc_fake_suite(test, "suite2"); /* Want: suite1, suite2, NULL -> suite2, NULL */ - filtered = kunit_filter_subsuite(subsuite, &filter); + filtered = kunit_filter_subsuite(subsuite, "suite2*"); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered); - free_subsuite_at_end(test, filtered); + kfree_at_end(test, filtered); - /* Validate we just have suite2 */ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered[0]); KUNIT_EXPECT_STREQ(test, (const char *)filtered[0]->name, "suite2"); + KUNIT_EXPECT_FALSE(test, filtered[1]); } -static void filter_subsuite_test_glob_test(struct kunit *test) -{ - struct kunit_suite *subsuite[3] = {NULL, NULL, NULL}; - struct kunit_suite * const *filtered; - struct kunit_test_filter filter = { - .suite_glob = "suite2", - .test_glob = "test2", - }; - - subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases); - subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases); - - /* Want: suite1, suite2, NULL -> suite2 (just test1), NULL */ - filtered = kunit_filter_subsuite(subsuite, &filter); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered); - free_subsuite_at_end(test, filtered); - - /* Validate we just have suite2 */ - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered[0]); - KUNIT_EXPECT_STREQ(test, (const char *)filtered[0]->name, "suite2"); - KUNIT_EXPECT_FALSE(test, filtered[1]); - - /* Now validate we just have test2 */ - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered[0]->test_cases); - KUNIT_EXPECT_STREQ(test, (const char *)filtered[0]->test_cases[0].name, "test2"); - KUNIT_EXPECT_FALSE(test, filtered[0]->test_cases[1].name); -} - static void filter_subsuite_to_empty_test(struct kunit *test) { struct kunit_suite *subsuite[3] = {NULL, NULL, NULL}; struct kunit_suite * const *filtered; - struct kunit_test_filter filter = { - .suite_glob = "not_found", - .test_glob = NULL, - }; - subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases); - subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases); + subsuite[0] = alloc_fake_suite(test, "suite1"); + subsuite[1] = alloc_fake_suite(test, "suite2"); - filtered = kunit_filter_subsuite(subsuite, &filter); - free_subsuite_at_end(test, filtered); /* just in case */ + filtered = kunit_filter_subsuite(subsuite, "not_found"); + kfree_at_end(test, filtered); /* just in case */ KUNIT_EXPECT_FALSE_MSG(test, filtered, "should be NULL to indicate no match"); @@ -117,7 +52,7 @@ static void kfree_subsuites_at_end(struct kunit *test, struct suite_set *suite_s kfree_at_end(test, suite_set->start); for (suites = suite_set->start; suites < suite_set->end; suites++) - free_subsuite_at_end(test, *suites); + kfree_at_end(test, *suites); } static void filter_suites_test(struct kunit *test) @@ -139,8 +74,8 @@ static void filter_suites_test(struct kunit *test) struct suite_set filtered = {.start = NULL, .end = NULL}; /* Emulate two files, each having one suite */ - subsuites[0][0] = alloc_fake_suite(test, "suite0", dummy_test_cases); - subsuites[1][0] = alloc_fake_suite(test, "suite1", dummy_test_cases); + subsuites[0][0] = alloc_fake_suite(test, "suite0"); + subsuites[1][0] = alloc_fake_suite(test, "suite1"); /* Filter out suite1 */ filtered = kunit_filter_suites(&suite_set, "suite0"); @@ -149,14 +84,11 @@ static void filter_suites_test(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered.start); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered.start[0]); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered.start[0][0]); KUNIT_EXPECT_STREQ(test, (const char *)filtered.start[0][0]->name, "suite0"); } static struct kunit_case executor_test_cases[] = { - KUNIT_CASE(parse_filter_test), KUNIT_CASE(filter_subsuite_test), - KUNIT_CASE(filter_subsuite_test_glob_test), KUNIT_CASE(filter_subsuite_to_empty_test), KUNIT_CASE(filter_suites_test), {} @@ -188,30 +120,14 @@ static void kfree_at_end(struct kunit *test, const void *to_free) (void *)to_free); } -static void free_subsuite_res_free(struct kunit_resource *res) -{ - kunit_free_subsuite(res->data); -} - -static void free_subsuite_at_end(struct kunit *test, - struct kunit_suite *const *to_free) -{ - if (IS_ERR_OR_NULL(to_free)) - return; - kunit_alloc_resource(test, NULL, free_subsuite_res_free, - GFP_KERNEL, (void *)to_free); -} - static struct kunit_suite *alloc_fake_suite(struct kunit *test, - const char *suite_name, - struct kunit_case *test_cases) + const char *suite_name) { struct kunit_suite *suite; /* We normally never expect to allocate suites, hence the non-const cast. */ suite = kunit_kzalloc(test, sizeof(*suite), GFP_KERNEL); strncpy((char *)suite->name, suite_name, sizeof(suite->name) - 1); - suite->test_cases = test_cases; return suite; } diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index 555601d17f..d69efcbed6 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -415,15 +415,12 @@ static struct kunit_suite kunit_log_test_suite = { static void kunit_log_test(struct kunit *test) { - struct kunit_suite suite; - - suite.log = kunit_kzalloc(test, KUNIT_LOG_SIZE, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, suite.log); + struct kunit_suite *suite = &kunit_log_test_suite; kunit_log(KERN_INFO, test, "put this in log."); kunit_log(KERN_INFO, test, "this too."); - kunit_log(KERN_INFO, &suite, "add to suite log."); - kunit_log(KERN_INFO, &suite, "along with this."); + kunit_log(KERN_INFO, suite, "add to suite log."); + kunit_log(KERN_INFO, suite, "along with this."); #ifdef CONFIG_KUNIT_DEBUGFS KUNIT_EXPECT_NOT_ERR_OR_NULL(test, @@ -431,11 +428,12 @@ static void kunit_log_test(struct kunit *test) KUNIT_EXPECT_NOT_ERR_OR_NULL(test, strstr(test->log, "this too.")); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, - strstr(suite.log, "add to suite log.")); + strstr(suite->log, "add to suite log.")); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, - strstr(suite.log, "along with this.")); + strstr(suite->log, "along with this.")); #else KUNIT_EXPECT_PTR_EQ(test, test->log, (char *)NULL); + KUNIT_EXPECT_PTR_EQ(test, suite->log, (char *)NULL); #endif } diff --git a/lib/kunit/test.c b/lib/kunit/test.c index c7ed4aabec..9aef816e57 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -190,10 +190,10 @@ enum kunit_status kunit_suite_has_succeeded(struct kunit_suite *suite) } EXPORT_SYMBOL_GPL(kunit_suite_has_succeeded); -static size_t kunit_suite_counter = 1; - static void kunit_print_subtest_end(struct kunit_suite *suite) { + static size_t kunit_suite_counter = 1; + kunit_print_ok_not_ok((void *)suite, false, kunit_suite_has_succeeded(suite), kunit_suite_counter++, @@ -512,8 +512,6 @@ int kunit_run_tests(struct kunit_suite *suite) /* Get initial param. */ param_desc[0] = '\0'; test.param_value = test_case->generate_params(NULL, param_desc); - kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT - "# Subtest: %s", test_case->name); while (test.param_value) { kunit_run_case_catch_errors(suite, test_case, &test); @@ -524,8 +522,9 @@ int kunit_run_tests(struct kunit_suite *suite) } kunit_log(KERN_INFO, &test, - KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT - "%s %d - %s", + KUNIT_SUBTEST_INDENT + "# %s: %s %d - %s", + test_case->name, kunit_status_to_ok_not_ok(test.status), test.param_index + 1, param_desc); @@ -586,8 +585,6 @@ void __kunit_test_suites_exit(struct kunit_suite **suites) for (i = 0; suites[i] != NULL; i++) kunit_exit_suite(suites[i]); - - kunit_suite_counter = 1; } EXPORT_SYMBOL_GPL(__kunit_test_suites_exit); diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c index be38a2c5ec..0dd434e404 100644 --- a/lib/kunit/try-catch.c +++ b/lib/kunit/try-catch.c @@ -17,7 +17,7 @@ void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch) { try_catch->try_result = -EFAULT; - kthread_complete_and_exit(try_catch->try_completion, -EFAULT); + complete_and_exit(try_catch->try_completion, -EFAULT); } EXPORT_SYMBOL_GPL(kunit_try_catch_throw); @@ -27,7 +27,7 @@ static int kunit_generic_run_threadfn_adapter(void *data) try_catch->try(try_catch->context); - kthread_complete_and_exit(try_catch->try_completion, 0); + complete_and_exit(try_catch->try_completion, 0); } static unsigned long kunit_test_timeout(void) diff --git a/lib/list_debug.c b/lib/list_debug.c index 9daa3fb9d1..5d5424b51b 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -49,11 +49,11 @@ bool __list_del_entry_valid(struct list_head *entry) "list_del corruption, %px->prev is LIST_POISON2 (%px)\n", entry, LIST_POISON2) || CHECK_DATA_CORRUPTION(prev->next != entry, - "list_del corruption. prev->next should be %px, but was %px. (prev=%px)\n", - entry, prev->next, prev) || + "list_del corruption. prev->next should be %px, but was %px\n", + entry, prev->next) || CHECK_DATA_CORRUPTION(next->prev != entry, - "list_del corruption. next->prev should be %px, but was %px. (next=%px)\n", - entry, next->prev, next)) + "list_del corruption. next->prev should be %px, but was %px\n", + entry, next->prev)) return false; return true; diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 8d24279fad..161108e5d2 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -26,12 +26,6 @@ #include #include -#ifdef CONFIG_PREEMPT_RT -# define NON_RT(...) -#else -# define NON_RT(...) __VA_ARGS__ -#endif - /* * Change this to 1 if you want to see the failure printouts: */ @@ -145,7 +139,7 @@ static DEFINE_RT_MUTEX(rtmutex_Z2); #endif -static DEFINE_PER_CPU(local_lock_t, local_A); +static local_lock_t local_A = INIT_LOCAL_LOCK(local_A); /* * non-inlined runtime initializers, to let separate locks share @@ -264,7 +258,7 @@ static void init_shared_classes(void) #define WWAF(x) ww_acquire_fini(x) #define WWL(x, c) ww_mutex_lock(x, c) -#define WWT(x) ww_mutex_trylock(x, NULL) +#define WWT(x) ww_mutex_trylock(x) #define WWL1(x) ww_mutex_lock(x, NULL) #define WWU(x) ww_mutex_unlock(x) @@ -718,18 +712,12 @@ GENERATE_TESTCASE(ABCDBCDA_rtmutex); #undef E -#ifdef CONFIG_PREEMPT_RT -# define RT_PREPARE_DBL_UNLOCK() { migrate_disable(); rcu_read_lock(); } -#else -# define RT_PREPARE_DBL_UNLOCK() -#endif /* * Double unlock: */ #define E() \ \ LOCK(A); \ - RT_PREPARE_DBL_UNLOCK(); \ UNLOCK(A); \ UNLOCK(A); /* fail */ @@ -814,7 +802,6 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) #include "locking-selftest-wlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock) -#ifndef CONFIG_PREEMPT_RT #include "locking-selftest-spin-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin) @@ -823,12 +810,10 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) -#endif #undef E1 #undef E2 -#ifndef CONFIG_PREEMPT_RT /* * Enabling hardirqs with a softirq-safe lock held: */ @@ -861,8 +846,6 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) #undef E1 #undef E2 -#endif - /* * Enabling irqs with an irq-safe lock held: */ @@ -892,7 +875,6 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) #include "locking-selftest-wlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock) -#ifndef CONFIG_PREEMPT_RT #include "locking-selftest-spin-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin) @@ -901,7 +883,6 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) -#endif #undef E1 #undef E2 @@ -940,7 +921,6 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) #include "locking-selftest-wlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock) -#ifndef CONFIG_PREEMPT_RT #include "locking-selftest-spin-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin) @@ -949,7 +929,6 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) -#endif #undef E1 #undef E2 @@ -990,7 +969,6 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) #include "locking-selftest-wlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock) -#ifndef CONFIG_PREEMPT_RT #include "locking-selftest-spin-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin) @@ -999,7 +977,6 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) -#endif #undef E1 #undef E2 @@ -1054,7 +1031,6 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock) #include "locking-selftest-wlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock) -#ifndef CONFIG_PREEMPT_RT #include "locking-selftest-spin-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin) @@ -1063,7 +1039,6 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock) -#endif #undef E1 #undef E2 @@ -1231,14 +1206,12 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_rlock) #include "locking-selftest-wlock.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_wlock) -#ifndef CONFIG_PREEMPT_RT #include "locking-selftest-softirq.h" #include "locking-selftest-rlock.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_rlock) #include "locking-selftest-wlock.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_wlock) -#endif #undef E1 #undef E2 @@ -1279,14 +1252,12 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_rlock) #include "locking-selftest-wlock.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_wlock) -#ifndef CONFIG_PREEMPT_RT #include "locking-selftest-softirq.h" #include "locking-selftest-rlock.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_rlock) #include "locking-selftest-wlock.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_wlock) -#endif #undef E1 #undef E2 @@ -1335,14 +1306,12 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_rlock) #include "locking-selftest-wlock.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_wlock) -#ifndef CONFIG_PREEMPT_RT #include "locking-selftest-softirq.h" #include "locking-selftest-rlock.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_rlock) #include "locking-selftest-wlock.h" GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock) -#endif #ifdef CONFIG_DEBUG_LOCK_ALLOC # define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map) @@ -1351,7 +1320,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock) # define I_MUTEX(x) lockdep_reset_lock(&mutex_##x.dep_map) # define I_RWSEM(x) lockdep_reset_lock(&rwsem_##x.dep_map) # define I_WW(x) lockdep_reset_lock(&x.dep_map) -# define I_LOCAL_LOCK(x) lockdep_reset_lock(this_cpu_ptr(&local_##x.dep_map)) +# define I_LOCAL_LOCK(x) lockdep_reset_lock(&local_##x.dep_map) #ifdef CONFIG_RT_MUTEXES # define I_RTMUTEX(x) lockdep_reset_lock(&rtmutex_##x.dep_map) #endif @@ -1411,7 +1380,7 @@ static void reset_locks(void) init_shared_classes(); raw_spin_lock_init(&raw_lock_A); raw_spin_lock_init(&raw_lock_B); - local_lock_init(this_cpu_ptr(&local_A)); + local_lock_init(&local_A); ww_mutex_init(&o, &ww_lockdep); ww_mutex_init(&o2, &ww_lockdep); ww_mutex_init(&o3, &ww_lockdep); memset(&t, 0, sizeof(t)); memset(&t2, 0, sizeof(t2)); @@ -1429,13 +1398,7 @@ static int unexpected_testcase_failures; static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) { - int saved_preempt_count = preempt_count(); -#ifdef CONFIG_PREEMPT_RT -#ifdef CONFIG_SMP - int saved_mgd_count = current->migration_disabled; -#endif - int saved_rcu_count = current->rcu_read_lock_nesting; -#endif + unsigned long saved_preempt_count = preempt_count(); WARN_ON(irqs_disabled()); @@ -1469,18 +1432,6 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) * count, so restore it: */ preempt_count_set(saved_preempt_count); - -#ifdef CONFIG_PREEMPT_RT -#ifdef CONFIG_SMP - while (current->migration_disabled > saved_mgd_count) - migrate_enable(); -#endif - - while (current->rcu_read_lock_nesting > saved_rcu_count) - rcu_read_unlock(); - WARN_ON_ONCE(current->rcu_read_lock_nesting < saved_rcu_count); -#endif - #ifdef CONFIG_TRACE_IRQFLAGS if (softirq_count()) current->softirqs_enabled = 0; @@ -1548,7 +1499,7 @@ static inline void print_testname(const char *testname) #define DO_TESTCASE_2x2RW(desc, name, nr) \ DO_TESTCASE_2RW("hard-"desc, name##_hard, nr) \ - NON_RT(DO_TESTCASE_2RW("soft-"desc, name##_soft, nr)) \ + DO_TESTCASE_2RW("soft-"desc, name##_soft, nr) \ #define DO_TESTCASE_6x2x2RW(desc, name) \ DO_TESTCASE_2x2RW(desc, name, 123); \ @@ -1596,19 +1547,19 @@ static inline void print_testname(const char *testname) #define DO_TESTCASE_2I(desc, name, nr) \ DO_TESTCASE_1("hard-"desc, name##_hard, nr); \ - NON_RT(DO_TESTCASE_1("soft-"desc, name##_soft, nr)); + DO_TESTCASE_1("soft-"desc, name##_soft, nr); #define DO_TESTCASE_2IB(desc, name, nr) \ DO_TESTCASE_1B("hard-"desc, name##_hard, nr); \ - NON_RT(DO_TESTCASE_1B("soft-"desc, name##_soft, nr)); + DO_TESTCASE_1B("soft-"desc, name##_soft, nr); #define DO_TESTCASE_6I(desc, name, nr) \ DO_TESTCASE_3("hard-"desc, name##_hard, nr); \ - NON_RT(DO_TESTCASE_3("soft-"desc, name##_soft, nr)); + DO_TESTCASE_3("soft-"desc, name##_soft, nr); #define DO_TESTCASE_6IRW(desc, name, nr) \ DO_TESTCASE_3RW("hard-"desc, name##_hard, nr); \ - NON_RT(DO_TESTCASE_3RW("soft-"desc, name##_soft, nr)); + DO_TESTCASE_3RW("soft-"desc, name##_soft, nr); #define DO_TESTCASE_2x3(desc, name) \ DO_TESTCASE_3(desc, name, 12); \ @@ -1700,22 +1651,6 @@ static void ww_test_fail_acquire(void) #endif } -#ifdef CONFIG_PREEMPT_RT -#define ww_mutex_base_lock(b) rt_mutex_lock(b) -#define ww_mutex_base_trylock(b) rt_mutex_trylock(b) -#define ww_mutex_base_lock_nest_lock(b, b2) rt_mutex_lock_nest_lock(b, b2) -#define ww_mutex_base_lock_interruptible(b) rt_mutex_lock_interruptible(b) -#define ww_mutex_base_lock_killable(b) rt_mutex_lock_killable(b) -#define ww_mutex_base_unlock(b) rt_mutex_unlock(b) -#else -#define ww_mutex_base_lock(b) mutex_lock(b) -#define ww_mutex_base_trylock(b) mutex_trylock(b) -#define ww_mutex_base_lock_nest_lock(b, b2) mutex_lock_nest_lock(b, b2) -#define ww_mutex_base_lock_interruptible(b) mutex_lock_interruptible(b) -#define ww_mutex_base_lock_killable(b) mutex_lock_killable(b) -#define ww_mutex_base_unlock(b) mutex_unlock(b) -#endif - static void ww_test_normal(void) { int ret; @@ -1730,50 +1665,50 @@ static void ww_test_normal(void) /* mutex_lock (and indirectly, mutex_lock_nested) */ o.ctx = (void *)~0UL; - ww_mutex_base_lock(&o.base); - ww_mutex_base_unlock(&o.base); + mutex_lock(&o.base); + mutex_unlock(&o.base); WARN_ON(o.ctx != (void *)~0UL); /* mutex_lock_interruptible (and *_nested) */ o.ctx = (void *)~0UL; - ret = ww_mutex_base_lock_interruptible(&o.base); + ret = mutex_lock_interruptible(&o.base); if (!ret) - ww_mutex_base_unlock(&o.base); + mutex_unlock(&o.base); else WARN_ON(1); WARN_ON(o.ctx != (void *)~0UL); /* mutex_lock_killable (and *_nested) */ o.ctx = (void *)~0UL; - ret = ww_mutex_base_lock_killable(&o.base); + ret = mutex_lock_killable(&o.base); if (!ret) - ww_mutex_base_unlock(&o.base); + mutex_unlock(&o.base); else WARN_ON(1); WARN_ON(o.ctx != (void *)~0UL); /* trylock, succeeding */ o.ctx = (void *)~0UL; - ret = ww_mutex_base_trylock(&o.base); + ret = mutex_trylock(&o.base); WARN_ON(!ret); if (ret) - ww_mutex_base_unlock(&o.base); + mutex_unlock(&o.base); else WARN_ON(1); WARN_ON(o.ctx != (void *)~0UL); /* trylock, failing */ o.ctx = (void *)~0UL; - ww_mutex_base_lock(&o.base); - ret = ww_mutex_base_trylock(&o.base); + mutex_lock(&o.base); + ret = mutex_trylock(&o.base); WARN_ON(ret); - ww_mutex_base_unlock(&o.base); + mutex_unlock(&o.base); WARN_ON(o.ctx != (void *)~0UL); /* nest_lock */ o.ctx = (void *)~0UL; - ww_mutex_base_lock_nest_lock(&o.base, &t); - ww_mutex_base_unlock(&o.base); + mutex_lock_nest_lock(&o.base, &t); + mutex_unlock(&o.base); WARN_ON(o.ctx != (void *)~0UL); } @@ -1786,7 +1721,7 @@ static void ww_test_two_contexts(void) static void ww_test_diff_class(void) { WWAI(&t); -#ifdef DEBUG_WW_MUTEXES +#ifdef CONFIG_DEBUG_MUTEXES t.ww_class = NULL; #endif WWL(&o, &t); @@ -1850,7 +1785,7 @@ static void ww_test_edeadlk_normal(void) { int ret; - ww_mutex_base_lock(&o2.base); + mutex_lock(&o2.base); o2.ctx = &t2; mutex_release(&o2.base.dep_map, _THIS_IP_); @@ -1866,7 +1801,7 @@ static void ww_test_edeadlk_normal(void) o2.ctx = NULL; mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); - ww_mutex_base_unlock(&o2.base); + mutex_unlock(&o2.base); WWU(&o); WWL(&o2, &t); @@ -1876,7 +1811,7 @@ static void ww_test_edeadlk_normal_slow(void) { int ret; - ww_mutex_base_lock(&o2.base); + mutex_lock(&o2.base); mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; @@ -1892,7 +1827,7 @@ static void ww_test_edeadlk_normal_slow(void) o2.ctx = NULL; mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); - ww_mutex_base_unlock(&o2.base); + mutex_unlock(&o2.base); WWU(&o); ww_mutex_lock_slow(&o2, &t); @@ -1902,7 +1837,7 @@ static void ww_test_edeadlk_no_unlock(void) { int ret; - ww_mutex_base_lock(&o2.base); + mutex_lock(&o2.base); o2.ctx = &t2; mutex_release(&o2.base.dep_map, _THIS_IP_); @@ -1918,7 +1853,7 @@ static void ww_test_edeadlk_no_unlock(void) o2.ctx = NULL; mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); - ww_mutex_base_unlock(&o2.base); + mutex_unlock(&o2.base); WWL(&o2, &t); } @@ -1927,7 +1862,7 @@ static void ww_test_edeadlk_no_unlock_slow(void) { int ret; - ww_mutex_base_lock(&o2.base); + mutex_lock(&o2.base); mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; @@ -1943,7 +1878,7 @@ static void ww_test_edeadlk_no_unlock_slow(void) o2.ctx = NULL; mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); - ww_mutex_base_unlock(&o2.base); + mutex_unlock(&o2.base); ww_mutex_lock_slow(&o2, &t); } @@ -1952,7 +1887,7 @@ static void ww_test_edeadlk_acquire_more(void) { int ret; - ww_mutex_base_lock(&o2.base); + mutex_lock(&o2.base); mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; @@ -1973,7 +1908,7 @@ static void ww_test_edeadlk_acquire_more_slow(void) { int ret; - ww_mutex_base_lock(&o2.base); + mutex_lock(&o2.base); mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; @@ -1994,11 +1929,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk(void) { int ret; - ww_mutex_base_lock(&o2.base); + mutex_lock(&o2.base); mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; - ww_mutex_base_lock(&o3.base); + mutex_lock(&o3.base); mutex_release(&o3.base.dep_map, _THIS_IP_); o3.ctx = &t2; @@ -2020,11 +1955,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk_slow(void) { int ret; - ww_mutex_base_lock(&o2.base); + mutex_lock(&o2.base); mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; - ww_mutex_base_lock(&o3.base); + mutex_lock(&o3.base); mutex_release(&o3.base.dep_map, _THIS_IP_); o3.ctx = &t2; @@ -2045,7 +1980,7 @@ static void ww_test_edeadlk_acquire_wrong(void) { int ret; - ww_mutex_base_lock(&o2.base); + mutex_lock(&o2.base); mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; @@ -2070,7 +2005,7 @@ static void ww_test_edeadlk_acquire_wrong_slow(void) { int ret; - ww_mutex_base_lock(&o2.base); + mutex_lock(&o2.base); mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; @@ -2711,8 +2646,8 @@ static void wait_context_tests(void) static void local_lock_2(void) { - local_lock(&local_A); /* IRQ-ON */ - local_unlock(&local_A); + local_lock_acquire(&local_A); /* IRQ-ON */ + local_lock_release(&local_A); HARDIRQ_ENTER(); spin_lock(&lock_A); /* IN-IRQ */ @@ -2721,18 +2656,18 @@ static void local_lock_2(void) HARDIRQ_DISABLE(); spin_lock(&lock_A); - local_lock(&local_A); /* IN-IRQ <-> IRQ-ON cycle, false */ - local_unlock(&local_A); + local_lock_acquire(&local_A); /* IN-IRQ <-> IRQ-ON cycle, false */ + local_lock_release(&local_A); spin_unlock(&lock_A); HARDIRQ_ENABLE(); } static void local_lock_3A(void) { - local_lock(&local_A); /* IRQ-ON */ + local_lock_acquire(&local_A); /* IRQ-ON */ spin_lock(&lock_B); /* IRQ-ON */ spin_unlock(&lock_B); - local_unlock(&local_A); + local_lock_release(&local_A); HARDIRQ_ENTER(); spin_lock(&lock_A); /* IN-IRQ */ @@ -2741,18 +2676,18 @@ static void local_lock_3A(void) HARDIRQ_DISABLE(); spin_lock(&lock_A); - local_lock(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */ - local_unlock(&local_A); + local_lock_acquire(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */ + local_lock_release(&local_A); spin_unlock(&lock_A); HARDIRQ_ENABLE(); } static void local_lock_3B(void) { - local_lock(&local_A); /* IRQ-ON */ + local_lock_acquire(&local_A); /* IRQ-ON */ spin_lock(&lock_B); /* IRQ-ON */ spin_unlock(&lock_B); - local_unlock(&local_A); + local_lock_release(&local_A); HARDIRQ_ENTER(); spin_lock(&lock_A); /* IN-IRQ */ @@ -2761,8 +2696,8 @@ static void local_lock_3B(void) HARDIRQ_DISABLE(); spin_lock(&lock_A); - local_lock(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */ - local_unlock(&local_A); + local_lock_acquire(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */ + local_lock_release(&local_A); spin_unlock(&lock_A); HARDIRQ_ENABLE(); @@ -2877,7 +2812,7 @@ void locking_selftest(void) printk("------------------------\n"); printk("| Locking API testsuite:\n"); printk("----------------------------------------------------------------------------\n"); - printk(" | spin |wlock |rlock |mutex | wsem | rsem |rtmutex\n"); + printk(" | spin |wlock |rlock |mutex | wsem | rsem |\n"); printk(" --------------------------------------------------------------------------\n"); init_shared_classes(); @@ -2950,11 +2885,12 @@ void locking_selftest(void) DO_TESTCASE_6x1RR("rlock W1R2/R2R3/W3W1", W1R2_R2R3_W3W1); printk(" --------------------------------------------------------------------------\n"); + /* * irq-context testcases: */ DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1); - NON_RT(DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A)); + DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A); DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B); DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3); DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4); diff --git a/lib/logic_iomem.c b/lib/logic_iomem.c index 8c3365f26e..549b22d4bc 100644 --- a/lib/logic_iomem.c +++ b/lib/logic_iomem.c @@ -76,7 +76,7 @@ static void __iomem *real_ioremap(phys_addr_t offset, size_t size) return NULL; } -static void real_iounmap(volatile void __iomem *addr) +static void real_iounmap(void __iomem *addr) { WARN(1, "invalid iounmap for addr 0x%llx\n", (unsigned long long)(uintptr_t __force)addr); @@ -149,7 +149,7 @@ get_area(const volatile void __iomem *addr) return NULL; } -void iounmap(volatile void __iomem *addr) +void iounmap(void __iomem *addr) { struct logic_iomem_area *area = get_area(addr); diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index 330aa539b4..673bd206aa 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -36,8 +36,6 @@ */ #include - -#include #include /* memset, memcpy */ #define FORCE_INLINE __always_inline diff --git a/lib/objagg.c b/lib/objagg.c index 1e248629ed..5e1676ccda 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -781,6 +781,7 @@ static struct objagg_tmp_graph *objagg_tmp_graph_create(struct objagg *objagg) struct objagg_tmp_node *node; struct objagg_tmp_node *pnode; struct objagg_obj *objagg_obj; + size_t alloc_size; int i, j; graph = kzalloc(sizeof(*graph), GFP_KERNEL); @@ -792,7 +793,9 @@ static struct objagg_tmp_graph *objagg_tmp_graph_create(struct objagg *objagg) goto err_nodes_alloc; graph->nodes_count = nodes_count; - graph->edges = bitmap_zalloc(nodes_count * nodes_count, GFP_KERNEL); + alloc_size = BITS_TO_LONGS(nodes_count * nodes_count) * + sizeof(unsigned long); + graph->edges = kzalloc(alloc_size, GFP_KERNEL); if (!graph->edges) goto err_edges_alloc; @@ -830,7 +833,7 @@ static struct objagg_tmp_graph *objagg_tmp_graph_create(struct objagg *objagg) static void objagg_tmp_graph_destroy(struct objagg_tmp_graph *graph) { - bitmap_free(graph->edges); + kfree(graph->edges); kfree(graph->nodes); kfree(graph); } diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 45e1761942..c770570bfe 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -14,8 +14,6 @@ hostprogs += mktables ifeq ($(CONFIG_ALTIVEC),y) altivec_flags := -maltivec $(call cc-option,-mabi=altivec) -# Enable -altivec_flags += -isystem $(shell $(CC) -print-file-name=include) ifdef CONFIG_CC_IS_CLANG # clang ppc port does not yet support -maltivec when -msoft-float is @@ -36,8 +34,6 @@ endif # ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel) ifeq ($(CONFIG_KERNEL_MODE_NEON),y) NEON_FLAGS := -ffreestanding -# Enable -NEON_FLAGS += -isystem $(shell $(CC) -print-file-name=include) ifeq ($(ARCH),arm) NEON_FLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon endif diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 39b74221f4..6d5e5000fd 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -145,13 +145,13 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) static inline const struct raid6_calls *raid6_choose_gen( void *(*const dptrs)[RAID6_TEST_DISKS], const int disks) { - unsigned long perf, bestgenperf, j0, j1; + unsigned long perf, bestgenperf, bestxorperf, j0, j1; int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ const struct raid6_calls *const *algo; const struct raid6_calls *best; - for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { - if (!best || (*algo)->priority >= best->priority) { + for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + if (!best || (*algo)->prefer >= best->prefer) { if ((*algo)->valid && !(*algo)->valid()) continue; @@ -180,48 +180,50 @@ static inline const struct raid6_calls *raid6_choose_gen( pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, (perf * HZ * (disks-2)) >> (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); + + if (!(*algo)->xor_syndrome) + continue; + + perf = 0; + + preempt_disable(); + j0 = jiffies; + while ((j1 = jiffies) == j0) + cpu_relax(); + while (time_before(jiffies, + j1 + (1<xor_syndrome(disks, start, stop, + PAGE_SIZE, *dptrs); + perf++; + } + preempt_enable(); + + if (best == *algo) + bestxorperf = perf; + + pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, + (perf * HZ * (disks-2)) >> + (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); } } - if (!best) { - pr_err("raid6: Yikes! No algorithm found!\n"); - goto out; - } + if (best) { + if (IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) { + pr_info("raid6: using algorithm %s gen() %ld MB/s\n", + best->name, + (bestgenperf * HZ * (disks-2)) >> + (20 - PAGE_SHIFT+RAID6_TIME_JIFFIES_LG2)); + if (best->xor_syndrome) + pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", + (bestxorperf * HZ * (disks-2)) >> + (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); + } else + pr_info("raid6: skip pq benchmark and using algorithm %s\n", + best->name); + raid6_call = *best; + } else + pr_err("raid6: Yikes! No algorithm found!\n"); - raid6_call = *best; - - if (!IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) { - pr_info("raid6: skipped pq benchmark and selected %s\n", - best->name); - goto out; - } - - pr_info("raid6: using algorithm %s gen() %ld MB/s\n", - best->name, - (bestgenperf * HZ * (disks - 2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); - - if (best->xor_syndrome) { - perf = 0; - - preempt_disable(); - j0 = jiffies; - while ((j1 = jiffies) == j0) - cpu_relax(); - while (time_before(jiffies, - j1 + (1 << RAID6_TIME_JIFFIES_LG2))) { - best->xor_syndrome(disks, start, stop, - PAGE_SIZE, *dptrs); - perf++; - } - preempt_enable(); - - pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", - (perf * HZ * (disks - 2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); - } - -out: return best; } diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c index 059024234d..f299476e1d 100644 --- a/lib/raid6/avx2.c +++ b/lib/raid6/avx2.c @@ -132,7 +132,7 @@ const struct raid6_calls raid6_avx2x1 = { raid6_avx21_xor_syndrome, raid6_have_avx2, "avx2x1", - .priority = 2 /* Prefer AVX2 over priority 1 (SSE2 and others) */ + 1 /* Has cache hints */ }; /* @@ -262,7 +262,7 @@ const struct raid6_calls raid6_avx2x2 = { raid6_avx22_xor_syndrome, raid6_have_avx2, "avx2x2", - .priority = 2 /* Prefer AVX2 over priority 1 (SSE2 and others) */ + 1 /* Has cache hints */ }; #ifdef CONFIG_X86_64 @@ -465,6 +465,6 @@ const struct raid6_calls raid6_avx2x4 = { raid6_avx24_xor_syndrome, raid6_have_avx2, "avx2x4", - .priority = 2 /* Prefer AVX2 over priority 1 (SSE2 and others) */ + 1 /* Has cache hints */ }; -#endif /* CONFIG_X86_64 */ +#endif diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c index 9c3e822e1a..bb684d144e 100644 --- a/lib/raid6/avx512.c +++ b/lib/raid6/avx512.c @@ -162,7 +162,7 @@ const struct raid6_calls raid6_avx512x1 = { raid6_avx5121_xor_syndrome, raid6_have_avx512, "avx512x1", - .priority = 2 /* Prefer AVX512 over priority 1 (SSE2 and others) */ + 1 /* Has cache hints */ }; /* @@ -319,7 +319,7 @@ const struct raid6_calls raid6_avx512x2 = { raid6_avx5122_xor_syndrome, raid6_have_avx512, "avx512x2", - .priority = 2 /* Prefer AVX512 over priority 1 (SSE2 and others) */ + 1 /* Has cache hints */ }; #ifdef CONFIG_X86_64 @@ -557,7 +557,7 @@ const struct raid6_calls raid6_avx512x4 = { raid6_avx5124_xor_syndrome, raid6_have_avx512, "avx512x4", - .priority = 2 /* Prefer AVX512 over priority 1 (SSE2 and others) */ + 1 /* Has cache hints */ }; #endif diff --git a/lib/random32.c b/lib/random32.c index a57a0e1881..4d0e05e471 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 09d293c30f..b25db9be93 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -457,9 +457,10 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); -static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, - unsigned int wake_batch) +static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, + unsigned int depth) { + unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); int i; if (sbq->wake_batch != wake_batch) { @@ -475,30 +476,6 @@ static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, } } -static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, - unsigned int depth) -{ - unsigned int wake_batch; - - wake_batch = sbq_calc_wake_batch(sbq, depth); - __sbitmap_queue_update_wake_batch(sbq, wake_batch); -} - -void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, - unsigned int users) -{ - unsigned int wake_batch; - unsigned int min_batch; - unsigned int depth = (sbq->sb.depth + users - 1) / users; - - min_batch = sbq->sb.depth >= (4 * SBQ_WAIT_QUEUES) ? 4 : 1; - - wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, - min_batch, SBQ_WAKE_BATCH); - __sbitmap_queue_update_wake_batch(sbq, wake_batch); -} -EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch); - void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) { sbitmap_queue_update_wake_batch(sbq, depth); @@ -512,57 +489,6 @@ int __sbitmap_queue_get(struct sbitmap_queue *sbq) } EXPORT_SYMBOL_GPL(__sbitmap_queue_get); -unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, - unsigned int *offset) -{ - struct sbitmap *sb = &sbq->sb; - unsigned int hint, depth; - unsigned long index, nr; - int i; - - if (unlikely(sb->round_robin)) - return 0; - - depth = READ_ONCE(sb->depth); - hint = update_alloc_hint_before_get(sb, depth); - - index = SB_NR_TO_INDEX(sb, hint); - - for (i = 0; i < sb->map_nr; i++) { - struct sbitmap_word *map = &sb->map[index]; - unsigned long get_mask; - - sbitmap_deferred_clear(map); - if (map->word == (1UL << (map->depth - 1)) - 1) - continue; - - nr = find_first_zero_bit(&map->word, map->depth); - if (nr + nr_tags <= map->depth) { - atomic_long_t *ptr = (atomic_long_t *) &map->word; - int map_tags = min_t(int, nr_tags, map->depth); - unsigned long val, ret; - - get_mask = ((1UL << map_tags) - 1) << nr; - do { - val = READ_ONCE(map->word); - ret = atomic_long_cmpxchg(ptr, val, get_mask | val); - } while (ret != val); - get_mask = (get_mask & ~ret) >> nr; - if (get_mask) { - *offset = nr + (index << sb->shift); - update_alloc_hint_after_get(sb, depth, hint, - *offset + map_tags - 1); - return get_mask; - } - } - /* Jump to next index. */ - if (++index >= sb->map_nr) - index = 0; - } - - return 0; -} - int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int shallow_depth) { @@ -651,46 +577,6 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); -static inline void sbitmap_update_cpu_hint(struct sbitmap *sb, int cpu, int tag) -{ - if (likely(!sb->round_robin && tag < sb->depth)) - data_race(*per_cpu_ptr(sb->alloc_hint, cpu) = tag); -} - -void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, - int *tags, int nr_tags) -{ - struct sbitmap *sb = &sbq->sb; - unsigned long *addr = NULL; - unsigned long mask = 0; - int i; - - smp_mb__before_atomic(); - for (i = 0; i < nr_tags; i++) { - const int tag = tags[i] - offset; - unsigned long *this_addr; - - /* since we're clearing a batch, skip the deferred map */ - this_addr = &sb->map[SB_NR_TO_INDEX(sb, tag)].word; - if (!addr) { - addr = this_addr; - } else if (addr != this_addr) { - atomic_long_andnot(mask, (atomic_long_t *) addr); - mask = 0; - addr = this_addr; - } - mask |= (1UL << SB_NR_TO_BIT(sb, tag)); - } - - if (mask) - atomic_long_andnot(mask, (atomic_long_t *) addr); - - smp_mb__after_atomic(); - sbitmap_queue_wake_up(sbq); - sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(), - tags[nr_tags - 1] - offset); -} - void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { @@ -715,7 +601,9 @@ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, */ smp_mb__after_atomic(); sbitmap_queue_wake_up(sbq); - sbitmap_update_cpu_hint(&sbq->sb, cpu, nr); + + if (likely(!sbq->sb.round_robin && nr < sbq->sb.depth)) + *per_cpu_ptr(sbq->sb.alloc_hint, cpu) = nr; } EXPORT_SYMBOL_GPL(sbitmap_queue_clear); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index d5e82e4a57..abb3432ed7 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -828,7 +828,8 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) * stops @miter. * * Context: - * Don't care. + * Don't care if @miter is stopped, or not proceeded yet. + * Otherwise, preemption disabled if the SG_MITER_ATOMIC is set. * * Returns: * true if @miter contains the valid mapping. false if end of sg @@ -864,7 +865,8 @@ EXPORT_SYMBOL(sg_miter_skip); * @miter->addr and @miter->length point to the current mapping. * * Context: - * May sleep if !SG_MITER_ATOMIC. + * Preemption disabled if SG_MITER_ATOMIC. Preemption must stay disabled + * till @miter is stopped. May sleep if !SG_MITER_ATOMIC. * * Returns: * true if @miter contains the next mapping. false if end of sg @@ -904,7 +906,8 @@ EXPORT_SYMBOL(sg_miter_next); * need to be released during iteration. * * Context: - * Don't care otherwise. + * Preemption disabled if the SG_MITER_ATOMIC is set. Don't care + * otherwise. */ void sg_miter_stop(struct sg_mapping_iter *miter) { @@ -919,7 +922,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) flush_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { - WARN_ON_ONCE(!pagefault_disabled()); + WARN_ON_ONCE(preemptible()); kunmap_atomic(miter->addr); } else kunmap(miter->page); diff --git a/lib/sha1.c b/lib/sha1.c index 0494766fc5..9bd1935a14 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -56,8 +55,7 @@ #define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ __u32 TEMP = input(t); setW(t, TEMP); \ E += TEMP + rol32(A,5) + (fn) + (constant); \ - B = ror32(B, 2); \ - TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0) + B = ror32(B, 2); } while (0) #define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) #define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) @@ -86,7 +84,6 @@ void sha1_transform(__u32 *digest, const char *data, __u32 *array) { __u32 A, B, C, D, E; - unsigned int i = 0; A = digest[0]; B = digest[1]; @@ -95,24 +92,94 @@ void sha1_transform(__u32 *digest, const char *data, __u32 *array) E = digest[4]; /* Round 1 - iterations 0-16 take their input from 'data' */ - for (; i < 16; ++i) - T_0_15(i, A, B, C, D, E); + T_0_15( 0, A, B, C, D, E); + T_0_15( 1, E, A, B, C, D); + T_0_15( 2, D, E, A, B, C); + T_0_15( 3, C, D, E, A, B); + T_0_15( 4, B, C, D, E, A); + T_0_15( 5, A, B, C, D, E); + T_0_15( 6, E, A, B, C, D); + T_0_15( 7, D, E, A, B, C); + T_0_15( 8, C, D, E, A, B); + T_0_15( 9, B, C, D, E, A); + T_0_15(10, A, B, C, D, E); + T_0_15(11, E, A, B, C, D); + T_0_15(12, D, E, A, B, C); + T_0_15(13, C, D, E, A, B); + T_0_15(14, B, C, D, E, A); + T_0_15(15, A, B, C, D, E); /* Round 1 - tail. Input from 512-bit mixing array */ - for (; i < 20; ++i) - T_16_19(i, A, B, C, D, E); + T_16_19(16, E, A, B, C, D); + T_16_19(17, D, E, A, B, C); + T_16_19(18, C, D, E, A, B); + T_16_19(19, B, C, D, E, A); /* Round 2 */ - for (; i < 40; ++i) - T_20_39(i, A, B, C, D, E); + T_20_39(20, A, B, C, D, E); + T_20_39(21, E, A, B, C, D); + T_20_39(22, D, E, A, B, C); + T_20_39(23, C, D, E, A, B); + T_20_39(24, B, C, D, E, A); + T_20_39(25, A, B, C, D, E); + T_20_39(26, E, A, B, C, D); + T_20_39(27, D, E, A, B, C); + T_20_39(28, C, D, E, A, B); + T_20_39(29, B, C, D, E, A); + T_20_39(30, A, B, C, D, E); + T_20_39(31, E, A, B, C, D); + T_20_39(32, D, E, A, B, C); + T_20_39(33, C, D, E, A, B); + T_20_39(34, B, C, D, E, A); + T_20_39(35, A, B, C, D, E); + T_20_39(36, E, A, B, C, D); + T_20_39(37, D, E, A, B, C); + T_20_39(38, C, D, E, A, B); + T_20_39(39, B, C, D, E, A); /* Round 3 */ - for (; i < 60; ++i) - T_40_59(i, A, B, C, D, E); + T_40_59(40, A, B, C, D, E); + T_40_59(41, E, A, B, C, D); + T_40_59(42, D, E, A, B, C); + T_40_59(43, C, D, E, A, B); + T_40_59(44, B, C, D, E, A); + T_40_59(45, A, B, C, D, E); + T_40_59(46, E, A, B, C, D); + T_40_59(47, D, E, A, B, C); + T_40_59(48, C, D, E, A, B); + T_40_59(49, B, C, D, E, A); + T_40_59(50, A, B, C, D, E); + T_40_59(51, E, A, B, C, D); + T_40_59(52, D, E, A, B, C); + T_40_59(53, C, D, E, A, B); + T_40_59(54, B, C, D, E, A); + T_40_59(55, A, B, C, D, E); + T_40_59(56, E, A, B, C, D); + T_40_59(57, D, E, A, B, C); + T_40_59(58, C, D, E, A, B); + T_40_59(59, B, C, D, E, A); /* Round 4 */ - for (; i < 80; ++i) - T_60_79(i, A, B, C, D, E); + T_60_79(60, A, B, C, D, E); + T_60_79(61, E, A, B, C, D); + T_60_79(62, D, E, A, B, C); + T_60_79(63, C, D, E, A, B); + T_60_79(64, B, C, D, E, A); + T_60_79(65, A, B, C, D, E); + T_60_79(66, E, A, B, C, D); + T_60_79(67, D, E, A, B, C); + T_60_79(68, C, D, E, A, B); + T_60_79(69, B, C, D, E, A); + T_60_79(70, A, B, C, D, E); + T_60_79(71, E, A, B, C, D); + T_60_79(72, D, E, A, B, C); + T_60_79(73, C, D, E, A, B); + T_60_79(74, B, C, D, E, A); + T_60_79(75, A, B, C, D, E); + T_60_79(76, E, A, B, C, D); + T_60_79(77, D, E, A, B, C); + T_60_79(78, C, D, E, A, B); + T_60_79(79, B, C, D, E, A); digest[0] += A; digest[1] += B; diff --git a/lib/stackdepot.c b/lib/stackdepot.c index bf5ba9af05..0a2e417f83 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -20,10 +20,10 @@ */ #include +#include #include #include #include -#include #include #include #include @@ -102,8 +102,8 @@ static bool init_stack_slab(void **prealloc) } /* Allocation of a new stack in raw storage */ -static struct stack_record * -depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) +static struct stack_record *depot_alloc_stack(unsigned long *entries, int size, + u32 hash, void **prealloc, gfp_t alloc_flags) { struct stack_record *stack; size_t required_size = struct_size(stack, entries, size); @@ -162,40 +162,18 @@ static int __init is_stack_depot_disabled(char *str) } early_param("stack_depot_disable", is_stack_depot_disabled); -/* - * __ref because of memblock_alloc(), which will not be actually called after - * the __init code is gone, because at that point slab_is_available() is true - */ -__ref int stack_depot_init(void) +int __init stack_depot_init(void) { - static DEFINE_MUTEX(stack_depot_init_mutex); - - mutex_lock(&stack_depot_init_mutex); - if (!stack_depot_disable && !stack_table) { + if (!stack_depot_disable) { size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); int i; - if (slab_is_available()) { - pr_info("Stack Depot allocating hash table with kvmalloc\n"); - stack_table = kvmalloc(size, GFP_KERNEL); - } else { - pr_info("Stack Depot allocating hash table with memblock_alloc\n"); - stack_table = memblock_alloc(size, SMP_CACHE_BYTES); - } - if (stack_table) { - for (i = 0; i < STACK_HASH_SIZE; i++) - stack_table[i] = NULL; - } else { - pr_err("Stack Depot hash table allocation failed, disabling\n"); - stack_depot_disable = true; - mutex_unlock(&stack_depot_init_mutex); - return -ENOMEM; - } + stack_table = memblock_alloc(size, size); + for (i = 0; i < STACK_HASH_SIZE; i++) + stack_table[i] = NULL; } - mutex_unlock(&stack_depot_init_mutex); return 0; } -EXPORT_SYMBOL_GPL(stack_depot_init); /* Calculate hash for a stack */ static inline u32 hash_stack(unsigned long *entries, unsigned int size) @@ -236,49 +214,6 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, return NULL; } -/** - * stack_depot_snprint - print stack entries from a depot into a buffer - * - * @handle: Stack depot handle which was returned from - * stack_depot_save(). - * @buf: Pointer to the print buffer - * - * @size: Size of the print buffer - * - * @spaces: Number of leading spaces to print - * - * Return: Number of bytes printed. - */ -int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, - int spaces) -{ - unsigned long *entries; - unsigned int nr_entries; - - nr_entries = stack_depot_fetch(handle, &entries); - return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, - spaces) : 0; -} -EXPORT_SYMBOL_GPL(stack_depot_snprint); - -/** - * stack_depot_print - print stack entries from a depot - * - * @stack: Stack depot handle which was returned from - * stack_depot_save(). - * - */ -void stack_depot_print(depot_stack_handle_t stack) -{ - unsigned long *entries; - unsigned int nr_entries; - - nr_entries = stack_depot_fetch(stack, &entries); - if (nr_entries > 0) - stack_trace_print(entries, nr_entries, 0); -} -EXPORT_SYMBOL_GPL(stack_depot_print); - /** * stack_depot_fetch - Fetch stack entries from a depot * @@ -297,9 +232,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, struct stack_record *stack; *entries = NULL; - if (!handle) - return 0; - if (parts.slabindex > depot_index) { WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", parts.slabindex, depot_index, handle); @@ -316,31 +248,17 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, EXPORT_SYMBOL_GPL(stack_depot_fetch); /** - * __stack_depot_save - Save a stack trace from an array + * stack_depot_save - Save a stack trace from an array * * @entries: Pointer to storage array * @nr_entries: Size of the storage array * @alloc_flags: Allocation gfp flags - * @can_alloc: Allocate stack slabs (increased chance of failure if false) * - * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is - * %true, is allowed to replenish the stack slab pool in case no space is left - * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids - * any allocations and will fail if no space is left to store the stack trace. - * - * If the stack trace in @entries is from an interrupt, only the portion up to - * interrupt entry is saved. - * - * Context: Any context, but setting @can_alloc to %false is required if - * alloc_pages() cannot be used from the current context. Currently - * this is the case from contexts where neither %GFP_ATOMIC nor - * %GFP_NOWAIT can be used (NMI, raw_spin_lock). - * - * Return: The handle of the stack struct stored in depot, 0 on failure. + * Return: The handle of the stack struct stored in depot */ -depot_stack_handle_t __stack_depot_save(unsigned long *entries, - unsigned int nr_entries, - gfp_t alloc_flags, bool can_alloc) +depot_stack_handle_t stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + gfp_t alloc_flags) { struct stack_record *found = NULL, **bucket; depot_stack_handle_t retval = 0; @@ -349,16 +267,6 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned long flags; u32 hash; - /* - * If this stack trace is from an interrupt, including anything before - * interrupt entry usually leads to unbounded stackdepot growth. - * - * Because use of filter_irq_stacks() is a requirement to ensure - * stackdepot can efficiently deduplicate interrupt stacks, always - * filter_irq_stacks() to simplify all callers' use of stackdepot. - */ - nr_entries = filter_irq_stacks(entries, nr_entries); - if (unlikely(nr_entries == 0) || stack_depot_disable) goto fast_exit; @@ -383,7 +291,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * The smp_load_acquire() here pairs with smp_store_release() to * |next_slab_inited| in depot_alloc_stack() and init_stack_slab(). */ - if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) { + if (unlikely(!smp_load_acquire(&next_slab_inited))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -401,8 +309,9 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, found = find_stack(*bucket, entries, nr_entries, hash); if (!found) { - struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); - + struct stack_record *new = + depot_alloc_stack(entries, nr_entries, + hash, &prealloc, alloc_flags); if (new) { new->next = *bucket; /* @@ -431,24 +340,27 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, fast_exit: return retval; } -EXPORT_SYMBOL_GPL(__stack_depot_save); - -/** - * stack_depot_save - Save a stack trace from an array - * - * @entries: Pointer to storage array - * @nr_entries: Size of the storage array - * @alloc_flags: Allocation gfp flags - * - * Context: Contexts where allocations via alloc_pages() are allowed. - * See __stack_depot_save() for more details. - * - * Return: The handle of the stack struct stored in depot, 0 on failure. - */ -depot_stack_handle_t stack_depot_save(unsigned long *entries, - unsigned int nr_entries, - gfp_t alloc_flags) -{ - return __stack_depot_save(entries, nr_entries, alloc_flags, true); -} EXPORT_SYMBOL_GPL(stack_depot_save); + +static inline int in_irqentry_text(unsigned long ptr) +{ + return (ptr >= (unsigned long)&__irqentry_text_start && + ptr < (unsigned long)&__irqentry_text_end) || + (ptr >= (unsigned long)&__softirqentry_text_start && + ptr < (unsigned long)&__softirqentry_text_end); +} + +unsigned int filter_irq_stacks(unsigned long *entries, + unsigned int nr_entries) +{ + unsigned int i; + + for (i = 0; i < nr_entries; i++) { + if (in_irqentry_text(entries[i])) { + /* Include the irqentry function into the stack. */ + return i + 1; + } + } + return nr_entries; +} +EXPORT_SYMBOL_GPL(filter_irq_stacks); diff --git a/lib/string.c b/lib/string.c index 485777c9da..b2de45a581 100644 --- a/lib/string.c +++ b/lib/string.c @@ -6,15 +6,20 @@ */ /* - * This file should be used only for "library" routines that may have - * alternative implementations on specific architectures (generally - * found in ), or get overloaded by FORTIFY_SOURCE. - * (Specifically, this file is built with __NO_FORTIFY.) + * stupid library routines.. The optimized versions should generally be found + * as inline code in * - * Other helper functions should live in string_helpers.c. + * These are buggy as well.. + * + * * Fri Jun 25 1999, Ingo Oeser + * - Added strsep() which will replace strtok() soon (because strsep() is + * reentrant and should be faster). Use only strsep() in new code, please. + * + * * Sat Feb 09 2002, Jason Thomas , + * Matthew Hawkins + * - Kissed strtok() goodbye */ -#define __NO_FORTIFY #include #include #include @@ -233,6 +238,40 @@ ssize_t strscpy(char *dest, const char *src, size_t count) EXPORT_SYMBOL(strscpy); #endif +/** + * strscpy_pad() - Copy a C-string into a sized buffer + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @count: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always %NUL terminated, unless it's zero-sized. + * + * If the source string is shorter than the destination buffer, zeros + * the tail of the destination buffer. + * + * For full explanation of why you may want to consider using the + * 'strscpy' functions please see the function docstring for strscpy(). + * + * Returns: + * * The number of characters copied (not including the trailing %NUL) + * * -E2BIG if count is 0 or @src was truncated. + */ +ssize_t strscpy_pad(char *dest, const char *src, size_t count) +{ + ssize_t written; + + written = strscpy(dest, src, count); + if (written < 0 || written == count - 1) + return written; + + memset(dest + written + 1, 0, count - written - 1); + + return written; +} +EXPORT_SYMBOL(strscpy_pad); + /** * stpcpy - copy a string from src to dest returning a pointer to the new end * of dest, including src's %NUL-terminator. May overrun dest. @@ -475,6 +514,46 @@ char *strnchr(const char *s, size_t count, int c) EXPORT_SYMBOL(strnchr); #endif +/** + * skip_spaces - Removes leading whitespace from @str. + * @str: The string to be stripped. + * + * Returns a pointer to the first non-whitespace character in @str. + */ +char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} +EXPORT_SYMBOL(skip_spaces); + +/** + * strim - Removes leading and trailing whitespace from @s. + * @s: The string to be stripped. + * + * Note that the first trailing whitespace is replaced with a %NUL-terminator + * in the given string @s. Returns a pointer to the first non-whitespace + * character in @s. + */ +char *strim(char *s) +{ + size_t size; + char *end; + + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return skip_spaces(s); +} +EXPORT_SYMBOL(strim); + #ifndef __HAVE_ARCH_STRLEN /** * strlen - Find the length of a string @@ -609,6 +688,101 @@ char *strsep(char **s, const char *ct) EXPORT_SYMBOL(strsep); #endif +/** + * sysfs_streq - return true if strings are equal, modulo trailing newline + * @s1: one string + * @s2: another string + * + * This routine returns true iff two strings are equal, treating both + * NUL and newline-then-NUL as equivalent string terminations. It's + * geared for use with sysfs input strings, which generally terminate + * with newlines but are compared against values without newlines. + */ +bool sysfs_streq(const char *s1, const char *s2) +{ + while (*s1 && *s1 == *s2) { + s1++; + s2++; + } + + if (*s1 == *s2) + return true; + if (!*s1 && *s2 == '\n' && !s2[1]) + return true; + if (*s1 == '\n' && !s1[1] && !*s2) + return true; + return false; +} +EXPORT_SYMBOL(sysfs_streq); + +/** + * match_string - matches given string in an array + * @array: array of strings + * @n: number of strings in the array or -1 for NULL terminated arrays + * @string: string to match with + * + * This routine will look for a string in an array of strings up to the + * n-th element in the array or until the first NULL element. + * + * Historically the value of -1 for @n, was used to search in arrays that + * are NULL terminated. However, the function does not make a distinction + * when finishing the search: either @n elements have been compared OR + * the first NULL element was found. + * + * Return: + * index of a @string in the @array if matches, or %-EINVAL otherwise. + */ +int match_string(const char * const *array, size_t n, const char *string) +{ + int index; + const char *item; + + for (index = 0; index < n; index++) { + item = array[index]; + if (!item) + break; + if (!strcmp(item, string)) + return index; + } + + return -EINVAL; +} +EXPORT_SYMBOL(match_string); + +/** + * __sysfs_match_string - matches given string in an array + * @array: array of strings + * @n: number of strings in the array or -1 for NULL terminated arrays + * @str: string to match with + * + * Returns index of @str in the @array or -EINVAL, just like match_string(). + * Uses sysfs_streq instead of strcmp for matching. + * + * This routine will look for a string in an array of strings up to the + * n-th element in the array or until the first NULL element. + * + * Historically the value of -1 for @n, was used to search in arrays that + * are NULL terminated. However, the function does not make a distinction + * when finishing the search: either @n elements have been compared OR + * the first NULL element was found. + */ +int __sysfs_match_string(const char * const *array, size_t n, const char *str) +{ + const char *item; + int index; + + for (index = 0; index < n; index++) { + item = array[index]; + if (!item) + break; + if (sysfs_streq(item, str)) + return index; + } + + return -EINVAL; +} +EXPORT_SYMBOL(__sysfs_match_string); + #ifndef __HAVE_ARCH_MEMSET /** * memset - Fill a region of memory with the given value @@ -967,3 +1141,27 @@ void *memchr_inv(const void *start, int c, size_t bytes) return check_bytes8(start, value, bytes % 8); } EXPORT_SYMBOL(memchr_inv); + +/** + * strreplace - Replace all occurrences of character in string. + * @s: The string to operate on. + * @old: The character being replaced. + * @new: The character @old is replaced with. + * + * Returns pointer to the nul byte at the end of @s. + */ +char *strreplace(char *s, char old, char new) +{ + for (; *s; ++s) + if (*s == old) + *s = new; + return s; +} +EXPORT_SYMBOL(strreplace); + +void fortify_panic(const char *name) +{ + pr_emerg("detected buffer overflow in %s\n", name); + BUG(); +} +EXPORT_SYMBOL(fortify_panic); diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 90f9f1b7af..2ddc10bd9a 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -675,39 +674,6 @@ char *kstrdup_quotable_file(struct file *file, gfp_t gfp) } EXPORT_SYMBOL_GPL(kstrdup_quotable_file); -/** - * kasprintf_strarray - allocate and fill array of sequential strings - * @gfp: flags for the slab allocator - * @prefix: prefix to be used - * @n: amount of lines to be allocated and filled - * - * Allocates and fills @n strings using pattern "%s-%zu", where prefix - * is provided by caller. The caller is responsible to free them with - * kfree_strarray() after use. - * - * Returns array of strings or NULL when memory can't be allocated. - */ -char **kasprintf_strarray(gfp_t gfp, const char *prefix, size_t n) -{ - char **names; - size_t i; - - names = kcalloc(n + 1, sizeof(char *), gfp); - if (!names) - return NULL; - - for (i = 0; i < n; i++) { - names[i] = kasprintf(gfp, "%s-%zu", prefix, i); - if (!names[i]) { - kfree_strarray(names, i); - return NULL; - } - } - - return names; -} -EXPORT_SYMBOL_GPL(kasprintf_strarray); - /** * kfree_strarray - free a number of dynamically allocated strings contained * in an array and the array itself @@ -731,222 +697,6 @@ void kfree_strarray(char **array, size_t n) } EXPORT_SYMBOL_GPL(kfree_strarray); -struct strarray { - char **array; - size_t n; -}; - -static void devm_kfree_strarray(struct device *dev, void *res) -{ - struct strarray *array = res; - - kfree_strarray(array->array, array->n); -} - -char **devm_kasprintf_strarray(struct device *dev, const char *prefix, size_t n) -{ - struct strarray *ptr; - - ptr = devres_alloc(devm_kfree_strarray, sizeof(*ptr), GFP_KERNEL); - if (!ptr) - return ERR_PTR(-ENOMEM); - - ptr->array = kasprintf_strarray(GFP_KERNEL, prefix, n); - if (!ptr->array) { - devres_free(ptr); - return ERR_PTR(-ENOMEM); - } - - return ptr->array; -} -EXPORT_SYMBOL_GPL(devm_kasprintf_strarray); - -/** - * strscpy_pad() - Copy a C-string into a sized buffer - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @count: Size of destination buffer - * - * Copy the string, or as much of it as fits, into the dest buffer. The - * behavior is undefined if the string buffers overlap. The destination - * buffer is always %NUL terminated, unless it's zero-sized. - * - * If the source string is shorter than the destination buffer, zeros - * the tail of the destination buffer. - * - * For full explanation of why you may want to consider using the - * 'strscpy' functions please see the function docstring for strscpy(). - * - * Returns: - * * The number of characters copied (not including the trailing %NUL) - * * -E2BIG if count is 0 or @src was truncated. - */ -ssize_t strscpy_pad(char *dest, const char *src, size_t count) -{ - ssize_t written; - - written = strscpy(dest, src, count); - if (written < 0 || written == count - 1) - return written; - - memset(dest + written + 1, 0, count - written - 1); - - return written; -} -EXPORT_SYMBOL(strscpy_pad); - -/** - * skip_spaces - Removes leading whitespace from @str. - * @str: The string to be stripped. - * - * Returns a pointer to the first non-whitespace character in @str. - */ -char *skip_spaces(const char *str) -{ - while (isspace(*str)) - ++str; - return (char *)str; -} -EXPORT_SYMBOL(skip_spaces); - -/** - * strim - Removes leading and trailing whitespace from @s. - * @s: The string to be stripped. - * - * Note that the first trailing whitespace is replaced with a %NUL-terminator - * in the given string @s. Returns a pointer to the first non-whitespace - * character in @s. - */ -char *strim(char *s) -{ - size_t size; - char *end; - - size = strlen(s); - if (!size) - return s; - - end = s + size - 1; - while (end >= s && isspace(*end)) - end--; - *(end + 1) = '\0'; - - return skip_spaces(s); -} -EXPORT_SYMBOL(strim); - -/** - * sysfs_streq - return true if strings are equal, modulo trailing newline - * @s1: one string - * @s2: another string - * - * This routine returns true iff two strings are equal, treating both - * NUL and newline-then-NUL as equivalent string terminations. It's - * geared for use with sysfs input strings, which generally terminate - * with newlines but are compared against values without newlines. - */ -bool sysfs_streq(const char *s1, const char *s2) -{ - while (*s1 && *s1 == *s2) { - s1++; - s2++; - } - - if (*s1 == *s2) - return true; - if (!*s1 && *s2 == '\n' && !s2[1]) - return true; - if (*s1 == '\n' && !s1[1] && !*s2) - return true; - return false; -} -EXPORT_SYMBOL(sysfs_streq); - -/** - * match_string - matches given string in an array - * @array: array of strings - * @n: number of strings in the array or -1 for NULL terminated arrays - * @string: string to match with - * - * This routine will look for a string in an array of strings up to the - * n-th element in the array or until the first NULL element. - * - * Historically the value of -1 for @n, was used to search in arrays that - * are NULL terminated. However, the function does not make a distinction - * when finishing the search: either @n elements have been compared OR - * the first NULL element was found. - * - * Return: - * index of a @string in the @array if matches, or %-EINVAL otherwise. - */ -int match_string(const char * const *array, size_t n, const char *string) -{ - int index; - const char *item; - - for (index = 0; index < n; index++) { - item = array[index]; - if (!item) - break; - if (!strcmp(item, string)) - return index; - } - - return -EINVAL; -} -EXPORT_SYMBOL(match_string); - -/** - * __sysfs_match_string - matches given string in an array - * @array: array of strings - * @n: number of strings in the array or -1 for NULL terminated arrays - * @str: string to match with - * - * Returns index of @str in the @array or -EINVAL, just like match_string(). - * Uses sysfs_streq instead of strcmp for matching. - * - * This routine will look for a string in an array of strings up to the - * n-th element in the array or until the first NULL element. - * - * Historically the value of -1 for @n, was used to search in arrays that - * are NULL terminated. However, the function does not make a distinction - * when finishing the search: either @n elements have been compared OR - * the first NULL element was found. - */ -int __sysfs_match_string(const char * const *array, size_t n, const char *str) -{ - const char *item; - int index; - - for (index = 0; index < n; index++) { - item = array[index]; - if (!item) - break; - if (sysfs_streq(item, str)) - return index; - } - - return -EINVAL; -} -EXPORT_SYMBOL(__sysfs_match_string); - -/** - * strreplace - Replace all occurrences of character in string. - * @s: The string to operate on. - * @old: The character being replaced. - * @new: The character @old is replaced with. - * - * Returns pointer to the nul byte at the end of @s. - */ -char *strreplace(char *s, char old, char new) -{ - for (; *s; ++s) - if (*s == old) - *s = new; - return s; -} -EXPORT_SYMBOL(strreplace); - /** * memcpy_and_pad - Copy one buffer to another with padding * @dest: Where to copy to @@ -966,12 +716,3 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, } } EXPORT_SYMBOL(memcpy_and_pad); - -#ifdef CONFIG_FORTIFY_SOURCE -void fortify_panic(const char *name) -{ - pr_emerg("detected buffer overflow in %s\n", name); - BUG(); -} -EXPORT_SYMBOL(fortify_panic); -#endif /* CONFIG_FORTIFY_SOURCE */ diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 0c82f07f74..d33fa5a61b 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -446,42 +446,6 @@ static void __init test_bitmap_parselist(void) } } -static void __init test_bitmap_printlist(void) -{ - unsigned long *bmap = kmalloc(PAGE_SIZE, GFP_KERNEL); - char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - char expected[256]; - int ret, slen; - ktime_t time; - - if (!buf || !bmap) - goto out; - - memset(bmap, -1, PAGE_SIZE); - slen = snprintf(expected, 256, "0-%ld", PAGE_SIZE * 8 - 1); - if (slen < 0) - goto out; - - time = ktime_get(); - ret = bitmap_print_to_pagebuf(true, buf, bmap, PAGE_SIZE * 8); - time = ktime_get() - time; - - if (ret != slen + 1) { - pr_err("bitmap_print_to_pagebuf: result is %d, expected %d\n", ret, slen); - goto out; - } - - if (strncmp(buf, expected, slen)) { - pr_err("bitmap_print_to_pagebuf: result is %s, expected %s\n", buf, expected); - goto out; - } - - pr_err("bitmap_print_to_pagebuf: input is '%s', Time: %llu\n", buf, time); -out: - kfree(buf); - kfree(bmap); -} - static const unsigned long parse_test[] __initconst = { BITMAP_FROM_U64(0), BITMAP_FROM_U64(1), @@ -854,7 +818,6 @@ static void __init selftest(void) test_bitmap_arr32(); test_bitmap_parse(); test_bitmap_parselist(); - test_bitmap_printlist(); test_mem_optimisations(); test_for_each_set_clump8(); test_bitmap_cut(); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 0c5cb2d643..68d125b409 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -52,7 +52,6 @@ #define FLAG_NO_DATA BIT(0) #define FLAG_EXPECTED_FAIL BIT(1) #define FLAG_SKB_FRAG BIT(2) -#define FLAG_VERIFIER_ZEXT BIT(3) enum { CLASSIC = BIT(6), /* Old BPF instructions only. */ @@ -81,7 +80,6 @@ struct bpf_test { int expected_errcode; /* used when FLAG_EXPECTED_FAIL is set in the aux */ __u8 frag_data[MAX_DATA]; int stack_depth; /* for eBPF only, since tests don't call verifier */ - int nr_testruns; /* Custom run count, defaults to MAX_TESTRUNS if 0 */ }; /* Large test cases need separate allocation and fill handler. */ @@ -463,2602 +461,41 @@ static int bpf_fill_stxdw(struct bpf_test *self) return __bpf_fill_stxdw(self, BPF_DW); } -static int __bpf_ld_imm64(struct bpf_insn insns[2], u8 reg, s64 imm64) +static int bpf_fill_long_jmp(struct bpf_test *self) { - struct bpf_insn tmp[] = {BPF_LD_IMM64(reg, imm64)}; - - memcpy(insns, tmp, sizeof(tmp)); - return 2; -} - -/* - * Branch conversion tests. Complex operations can expand to a lot - * of instructions when JITed. This in turn may cause jump offsets - * to overflow the field size of the native instruction, triggering - * a branch conversion mechanism in some JITs. - */ -static int __bpf_fill_max_jmp(struct bpf_test *self, int jmp, int imm) -{ - struct bpf_insn *insns; - int len = S16_MAX + 5; + unsigned int len = BPF_MAXINSNS; + struct bpf_insn *insn; int i; - insns = kmalloc_array(len, sizeof(*insns), GFP_KERNEL); - if (!insns) - return -ENOMEM; - - i = __bpf_ld_imm64(insns, R1, 0x0123456789abcdefULL); - insns[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 1); - insns[i++] = BPF_JMP_IMM(jmp, R0, imm, S16_MAX); - insns[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 2); - insns[i++] = BPF_EXIT_INSN(); - - while (i < len - 1) { - static const int ops[] = { - BPF_LSH, BPF_RSH, BPF_ARSH, BPF_ADD, - BPF_SUB, BPF_MUL, BPF_DIV, BPF_MOD, - }; - int op = ops[(i >> 1) % ARRAY_SIZE(ops)]; - - if (i & 1) - insns[i++] = BPF_ALU32_REG(op, R0, R1); - else - insns[i++] = BPF_ALU64_REG(op, R0, R1); - } - - insns[i++] = BPF_EXIT_INSN(); - self->u.ptr.insns = insns; - self->u.ptr.len = len; - BUG_ON(i != len); - - return 0; -} - -/* Branch taken by runtime decision */ -static int bpf_fill_max_jmp_taken(struct bpf_test *self) -{ - return __bpf_fill_max_jmp(self, BPF_JEQ, 1); -} - -/* Branch not taken by runtime decision */ -static int bpf_fill_max_jmp_not_taken(struct bpf_test *self) -{ - return __bpf_fill_max_jmp(self, BPF_JEQ, 0); -} - -/* Branch always taken, known at JIT time */ -static int bpf_fill_max_jmp_always_taken(struct bpf_test *self) -{ - return __bpf_fill_max_jmp(self, BPF_JGE, 0); -} - -/* Branch never taken, known at JIT time */ -static int bpf_fill_max_jmp_never_taken(struct bpf_test *self) -{ - return __bpf_fill_max_jmp(self, BPF_JLT, 0); -} - -/* ALU result computation used in tests */ -static bool __bpf_alu_result(u64 *res, u64 v1, u64 v2, u8 op) -{ - *res = 0; - switch (op) { - case BPF_MOV: - *res = v2; - break; - case BPF_AND: - *res = v1 & v2; - break; - case BPF_OR: - *res = v1 | v2; - break; - case BPF_XOR: - *res = v1 ^ v2; - break; - case BPF_LSH: - *res = v1 << v2; - break; - case BPF_RSH: - *res = v1 >> v2; - break; - case BPF_ARSH: - *res = v1 >> v2; - if (v2 > 0 && v1 > S64_MAX) - *res |= ~0ULL << (64 - v2); - break; - case BPF_ADD: - *res = v1 + v2; - break; - case BPF_SUB: - *res = v1 - v2; - break; - case BPF_MUL: - *res = v1 * v2; - break; - case BPF_DIV: - if (v2 == 0) - return false; - *res = div64_u64(v1, v2); - break; - case BPF_MOD: - if (v2 == 0) - return false; - div64_u64_rem(v1, v2, res); - break; - } - return true; -} - -/* Test an ALU shift operation for all valid shift values */ -static int __bpf_fill_alu_shift(struct bpf_test *self, u8 op, - u8 mode, bool alu32) -{ - static const s64 regs[] = { - 0x0123456789abcdefLL, /* dword > 0, word < 0 */ - 0xfedcba9876543210LL, /* dowrd < 0, word > 0 */ - 0xfedcba0198765432LL, /* dowrd < 0, word < 0 */ - 0x0123458967abcdefLL, /* dword > 0, word > 0 */ - }; - int bits = alu32 ? 32 : 64; - int len = (2 + 7 * bits) * ARRAY_SIZE(regs) + 3; - struct bpf_insn *insn; - int imm, k; - int i = 0; - insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); if (!insn) return -ENOMEM; - insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 0); - - for (k = 0; k < ARRAY_SIZE(regs); k++) { - s64 reg = regs[k]; - - i += __bpf_ld_imm64(&insn[i], R3, reg); - - for (imm = 0; imm < bits; imm++) { - u64 val; - - /* Perform operation */ - insn[i++] = BPF_ALU64_REG(BPF_MOV, R1, R3); - insn[i++] = BPF_ALU64_IMM(BPF_MOV, R2, imm); - if (alu32) { - if (mode == BPF_K) - insn[i++] = BPF_ALU32_IMM(op, R1, imm); - else - insn[i++] = BPF_ALU32_REG(op, R1, R2); - - if (op == BPF_ARSH) - reg = (s32)reg; - else - reg = (u32)reg; - __bpf_alu_result(&val, reg, imm, op); - val = (u32)val; - } else { - if (mode == BPF_K) - insn[i++] = BPF_ALU64_IMM(op, R1, imm); - else - insn[i++] = BPF_ALU64_REG(op, R1, R2); - __bpf_alu_result(&val, reg, imm, op); - } - - /* - * When debugging a JIT that fails this test, one - * can write the immediate value to R0 here to find - * out which operand values that fail. - */ - - /* Load reference and check the result */ - i += __bpf_ld_imm64(&insn[i], R4, val); - insn[i++] = BPF_JMP_REG(BPF_JEQ, R1, R4, 1); - insn[i++] = BPF_EXIT_INSN(); - } - } - - insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 1); - insn[i++] = BPF_EXIT_INSN(); - - self->u.ptr.insns = insn; - self->u.ptr.len = len; - BUG_ON(i != len); - - return 0; -} - -static int bpf_fill_alu64_lsh_imm(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_LSH, BPF_K, false); -} - -static int bpf_fill_alu64_rsh_imm(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_RSH, BPF_K, false); -} - -static int bpf_fill_alu64_arsh_imm(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_ARSH, BPF_K, false); -} - -static int bpf_fill_alu64_lsh_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_LSH, BPF_X, false); -} - -static int bpf_fill_alu64_rsh_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_RSH, BPF_X, false); -} - -static int bpf_fill_alu64_arsh_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_ARSH, BPF_X, false); -} - -static int bpf_fill_alu32_lsh_imm(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_LSH, BPF_K, true); -} - -static int bpf_fill_alu32_rsh_imm(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_RSH, BPF_K, true); -} - -static int bpf_fill_alu32_arsh_imm(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_ARSH, BPF_K, true); -} - -static int bpf_fill_alu32_lsh_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_LSH, BPF_X, true); -} - -static int bpf_fill_alu32_rsh_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_RSH, BPF_X, true); -} - -static int bpf_fill_alu32_arsh_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift(self, BPF_ARSH, BPF_X, true); -} - -/* - * Test an ALU register shift operation for all valid shift values - * for the case when the source and destination are the same. - */ -static int __bpf_fill_alu_shift_same_reg(struct bpf_test *self, u8 op, - bool alu32) -{ - int bits = alu32 ? 32 : 64; - int len = 3 + 6 * bits; - struct bpf_insn *insn; - int i = 0; - u64 val; - - insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); - if (!insn) - return -ENOMEM; - - insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 0); - - for (val = 0; val < bits; val++) { - u64 res; - - /* Perform operation */ - insn[i++] = BPF_ALU64_IMM(BPF_MOV, R1, val); - if (alu32) - insn[i++] = BPF_ALU32_REG(op, R1, R1); - else - insn[i++] = BPF_ALU64_REG(op, R1, R1); - - /* Compute the reference result */ - __bpf_alu_result(&res, val, val, op); - if (alu32) - res = (u32)res; - i += __bpf_ld_imm64(&insn[i], R2, res); - - /* Check the actual result */ - insn[i++] = BPF_JMP_REG(BPF_JEQ, R1, R2, 1); - insn[i++] = BPF_EXIT_INSN(); - } - - insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 1); - insn[i++] = BPF_EXIT_INSN(); - - self->u.ptr.insns = insn; - self->u.ptr.len = len; - BUG_ON(i != len); - - return 0; -} - -static int bpf_fill_alu64_lsh_same_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift_same_reg(self, BPF_LSH, false); -} - -static int bpf_fill_alu64_rsh_same_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift_same_reg(self, BPF_RSH, false); -} - -static int bpf_fill_alu64_arsh_same_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift_same_reg(self, BPF_ARSH, false); -} - -static int bpf_fill_alu32_lsh_same_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift_same_reg(self, BPF_LSH, true); -} - -static int bpf_fill_alu32_rsh_same_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift_same_reg(self, BPF_RSH, true); -} - -static int bpf_fill_alu32_arsh_same_reg(struct bpf_test *self) -{ - return __bpf_fill_alu_shift_same_reg(self, BPF_ARSH, true); -} - -/* - * Common operand pattern generator for exhaustive power-of-two magnitudes - * tests. The block size parameters can be adjusted to increase/reduce the - * number of combinatons tested and thereby execution speed and memory - * footprint. - */ - -static inline s64 value(int msb, int delta, int sign) -{ - return sign * (1LL << msb) + delta; -} - -static int __bpf_fill_pattern(struct bpf_test *self, void *arg, - int dbits, int sbits, int block1, int block2, - int (*emit)(struct bpf_test*, void*, - struct bpf_insn*, s64, s64)) -{ - static const int sgn[][2] = {{1, 1}, {1, -1}, {-1, 1}, {-1, -1}}; - struct bpf_insn *insns; - int di, si, bt, db, sb; - int count, len, k; - int extra = 1 + 2; - int i = 0; - - /* Total number of iterations for the two pattern */ - count = (dbits - 1) * (sbits - 1) * block1 * block1 * ARRAY_SIZE(sgn); - count += (max(dbits, sbits) - 1) * block2 * block2 * ARRAY_SIZE(sgn); - - /* Compute the maximum number of insns and allocate the buffer */ - len = extra + count * (*emit)(self, arg, NULL, 0, 0); - insns = kmalloc_array(len, sizeof(*insns), GFP_KERNEL); - if (!insns) - return -ENOMEM; - - /* Add head instruction(s) */ - insns[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 0); + insn[0] = BPF_ALU64_IMM(BPF_MOV, R0, 1); + insn[1] = BPF_JMP_IMM(BPF_JEQ, R0, 1, len - 2 - 1); /* - * Pattern 1: all combinations of power-of-two magnitudes and sign, - * and with a block of contiguous values around each magnitude. + * Fill with a complex 64-bit operation that expands to a lot of + * instructions on 32-bit JITs. The large jump offset can then + * overflow the conditional branch field size, triggering a branch + * conversion mechanism in some JITs. + * + * Note: BPF_MAXINSNS of ALU64 MUL is enough to trigger such branch + * conversion on the 32-bit MIPS JIT. For other JITs, the instruction + * count and/or operation may need to be modified to trigger the + * branch conversion. */ - for (di = 0; di < dbits - 1; di++) /* Dst magnitudes */ - for (si = 0; si < sbits - 1; si++) /* Src magnitudes */ - for (k = 0; k < ARRAY_SIZE(sgn); k++) /* Sign combos */ - for (db = -(block1 / 2); - db < (block1 + 1) / 2; db++) - for (sb = -(block1 / 2); - sb < (block1 + 1) / 2; sb++) { - s64 dst, src; + for (i = 2; i < len - 1; i++) + insn[i] = BPF_ALU64_IMM(BPF_MUL, R0, (i << 16) + i); - dst = value(di, db, sgn[k][0]); - src = value(si, sb, sgn[k][1]); - i += (*emit)(self, arg, - &insns[i], - dst, src); - } - /* - * Pattern 2: all combinations for a larger block of values - * for each power-of-two magnitude and sign, where the magnitude is - * the same for both operands. - */ - for (bt = 0; bt < max(dbits, sbits) - 1; bt++) /* Magnitude */ - for (k = 0; k < ARRAY_SIZE(sgn); k++) /* Sign combos */ - for (db = -(block2 / 2); db < (block2 + 1) / 2; db++) - for (sb = -(block2 / 2); - sb < (block2 + 1) / 2; sb++) { - s64 dst, src; - - dst = value(bt % dbits, db, sgn[k][0]); - src = value(bt % sbits, sb, sgn[k][1]); - i += (*emit)(self, arg, &insns[i], - dst, src); - } - - /* Append tail instructions */ - insns[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 1); - insns[i++] = BPF_EXIT_INSN(); - BUG_ON(i > len); - - self->u.ptr.insns = insns; - self->u.ptr.len = i; - - return 0; -} - -/* - * Block size parameters used in pattern tests below. une as needed to - * increase/reduce the number combinations tested, see following examples. - * block values per operand MSB - * ---------------------------------------- - * 0 none - * 1 (1 << MSB) - * 2 (1 << MSB) + [-1, 0] - * 3 (1 << MSB) + [-1, 0, 1] - */ -#define PATTERN_BLOCK1 1 -#define PATTERN_BLOCK2 5 - -/* Number of test runs for a pattern test */ -#define NR_PATTERN_RUNS 1 - -/* - * Exhaustive tests of ALU operations for all combinations of power-of-two - * magnitudes of the operands, both for positive and negative values. The - * test is designed to verify e.g. the ALU and ALU64 operations for JITs that - * emit different code depending on the magnitude of the immediate value. - */ -static int __bpf_emit_alu64_imm(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 imm) -{ - int op = *(int *)arg; - int i = 0; - u64 res; - - if (!insns) - return 7; - - if (__bpf_alu_result(&res, dst, (s32)imm, op)) { - i += __bpf_ld_imm64(&insns[i], R1, dst); - i += __bpf_ld_imm64(&insns[i], R3, res); - insns[i++] = BPF_ALU64_IMM(op, R1, imm); - insns[i++] = BPF_JMP_REG(BPF_JEQ, R1, R3, 1); - insns[i++] = BPF_EXIT_INSN(); - } - - return i; -} - -static int __bpf_emit_alu32_imm(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 imm) -{ - int op = *(int *)arg; - int i = 0; - u64 res; - - if (!insns) - return 7; - - if (__bpf_alu_result(&res, (u32)dst, (u32)imm, op)) { - i += __bpf_ld_imm64(&insns[i], R1, dst); - i += __bpf_ld_imm64(&insns[i], R3, (u32)res); - insns[i++] = BPF_ALU32_IMM(op, R1, imm); - insns[i++] = BPF_JMP_REG(BPF_JEQ, R1, R3, 1); - insns[i++] = BPF_EXIT_INSN(); - } - - return i; -} - -static int __bpf_emit_alu64_reg(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 src) -{ - int op = *(int *)arg; - int i = 0; - u64 res; - - if (!insns) - return 9; - - if (__bpf_alu_result(&res, dst, src, op)) { - i += __bpf_ld_imm64(&insns[i], R1, dst); - i += __bpf_ld_imm64(&insns[i], R2, src); - i += __bpf_ld_imm64(&insns[i], R3, res); - insns[i++] = BPF_ALU64_REG(op, R1, R2); - insns[i++] = BPF_JMP_REG(BPF_JEQ, R1, R3, 1); - insns[i++] = BPF_EXIT_INSN(); - } - - return i; -} - -static int __bpf_emit_alu32_reg(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 src) -{ - int op = *(int *)arg; - int i = 0; - u64 res; - - if (!insns) - return 9; - - if (__bpf_alu_result(&res, (u32)dst, (u32)src, op)) { - i += __bpf_ld_imm64(&insns[i], R1, dst); - i += __bpf_ld_imm64(&insns[i], R2, src); - i += __bpf_ld_imm64(&insns[i], R3, (u32)res); - insns[i++] = BPF_ALU32_REG(op, R1, R2); - insns[i++] = BPF_JMP_REG(BPF_JEQ, R1, R3, 1); - insns[i++] = BPF_EXIT_INSN(); - } - - return i; -} - -static int __bpf_fill_alu64_imm(struct bpf_test *self, int op) -{ - return __bpf_fill_pattern(self, &op, 64, 32, - PATTERN_BLOCK1, PATTERN_BLOCK2, - &__bpf_emit_alu64_imm); -} - -static int __bpf_fill_alu32_imm(struct bpf_test *self, int op) -{ - return __bpf_fill_pattern(self, &op, 64, 32, - PATTERN_BLOCK1, PATTERN_BLOCK2, - &__bpf_emit_alu32_imm); -} - -static int __bpf_fill_alu64_reg(struct bpf_test *self, int op) -{ - return __bpf_fill_pattern(self, &op, 64, 64, - PATTERN_BLOCK1, PATTERN_BLOCK2, - &__bpf_emit_alu64_reg); -} - -static int __bpf_fill_alu32_reg(struct bpf_test *self, int op) -{ - return __bpf_fill_pattern(self, &op, 64, 64, - PATTERN_BLOCK1, PATTERN_BLOCK2, - &__bpf_emit_alu32_reg); -} - -/* ALU64 immediate operations */ -static int bpf_fill_alu64_mov_imm(struct bpf_test *self) -{ - return __bpf_fill_alu64_imm(self, BPF_MOV); -} - -static int bpf_fill_alu64_and_imm(struct bpf_test *self) -{ - return __bpf_fill_alu64_imm(self, BPF_AND); -} - -static int bpf_fill_alu64_or_imm(struct bpf_test *self) -{ - return __bpf_fill_alu64_imm(self, BPF_OR); -} - -static int bpf_fill_alu64_xor_imm(struct bpf_test *self) -{ - return __bpf_fill_alu64_imm(self, BPF_XOR); -} - -static int bpf_fill_alu64_add_imm(struct bpf_test *self) -{ - return __bpf_fill_alu64_imm(self, BPF_ADD); -} - -static int bpf_fill_alu64_sub_imm(struct bpf_test *self) -{ - return __bpf_fill_alu64_imm(self, BPF_SUB); -} - -static int bpf_fill_alu64_mul_imm(struct bpf_test *self) -{ - return __bpf_fill_alu64_imm(self, BPF_MUL); -} - -static int bpf_fill_alu64_div_imm(struct bpf_test *self) -{ - return __bpf_fill_alu64_imm(self, BPF_DIV); -} - -static int bpf_fill_alu64_mod_imm(struct bpf_test *self) -{ - return __bpf_fill_alu64_imm(self, BPF_MOD); -} - -/* ALU32 immediate operations */ -static int bpf_fill_alu32_mov_imm(struct bpf_test *self) -{ - return __bpf_fill_alu32_imm(self, BPF_MOV); -} - -static int bpf_fill_alu32_and_imm(struct bpf_test *self) -{ - return __bpf_fill_alu32_imm(self, BPF_AND); -} - -static int bpf_fill_alu32_or_imm(struct bpf_test *self) -{ - return __bpf_fill_alu32_imm(self, BPF_OR); -} - -static int bpf_fill_alu32_xor_imm(struct bpf_test *self) -{ - return __bpf_fill_alu32_imm(self, BPF_XOR); -} - -static int bpf_fill_alu32_add_imm(struct bpf_test *self) -{ - return __bpf_fill_alu32_imm(self, BPF_ADD); -} - -static int bpf_fill_alu32_sub_imm(struct bpf_test *self) -{ - return __bpf_fill_alu32_imm(self, BPF_SUB); -} - -static int bpf_fill_alu32_mul_imm(struct bpf_test *self) -{ - return __bpf_fill_alu32_imm(self, BPF_MUL); -} - -static int bpf_fill_alu32_div_imm(struct bpf_test *self) -{ - return __bpf_fill_alu32_imm(self, BPF_DIV); -} - -static int bpf_fill_alu32_mod_imm(struct bpf_test *self) -{ - return __bpf_fill_alu32_imm(self, BPF_MOD); -} - -/* ALU64 register operations */ -static int bpf_fill_alu64_mov_reg(struct bpf_test *self) -{ - return __bpf_fill_alu64_reg(self, BPF_MOV); -} - -static int bpf_fill_alu64_and_reg(struct bpf_test *self) -{ - return __bpf_fill_alu64_reg(self, BPF_AND); -} - -static int bpf_fill_alu64_or_reg(struct bpf_test *self) -{ - return __bpf_fill_alu64_reg(self, BPF_OR); -} - -static int bpf_fill_alu64_xor_reg(struct bpf_test *self) -{ - return __bpf_fill_alu64_reg(self, BPF_XOR); -} - -static int bpf_fill_alu64_add_reg(struct bpf_test *self) -{ - return __bpf_fill_alu64_reg(self, BPF_ADD); -} - -static int bpf_fill_alu64_sub_reg(struct bpf_test *self) -{ - return __bpf_fill_alu64_reg(self, BPF_SUB); -} - -static int bpf_fill_alu64_mul_reg(struct bpf_test *self) -{ - return __bpf_fill_alu64_reg(self, BPF_MUL); -} - -static int bpf_fill_alu64_div_reg(struct bpf_test *self) -{ - return __bpf_fill_alu64_reg(self, BPF_DIV); -} - -static int bpf_fill_alu64_mod_reg(struct bpf_test *self) -{ - return __bpf_fill_alu64_reg(self, BPF_MOD); -} - -/* ALU32 register operations */ -static int bpf_fill_alu32_mov_reg(struct bpf_test *self) -{ - return __bpf_fill_alu32_reg(self, BPF_MOV); -} - -static int bpf_fill_alu32_and_reg(struct bpf_test *self) -{ - return __bpf_fill_alu32_reg(self, BPF_AND); -} - -static int bpf_fill_alu32_or_reg(struct bpf_test *self) -{ - return __bpf_fill_alu32_reg(self, BPF_OR); -} - -static int bpf_fill_alu32_xor_reg(struct bpf_test *self) -{ - return __bpf_fill_alu32_reg(self, BPF_XOR); -} - -static int bpf_fill_alu32_add_reg(struct bpf_test *self) -{ - return __bpf_fill_alu32_reg(self, BPF_ADD); -} - -static int bpf_fill_alu32_sub_reg(struct bpf_test *self) -{ - return __bpf_fill_alu32_reg(self, BPF_SUB); -} - -static int bpf_fill_alu32_mul_reg(struct bpf_test *self) -{ - return __bpf_fill_alu32_reg(self, BPF_MUL); -} - -static int bpf_fill_alu32_div_reg(struct bpf_test *self) -{ - return __bpf_fill_alu32_reg(self, BPF_DIV); -} - -static int bpf_fill_alu32_mod_reg(struct bpf_test *self) -{ - return __bpf_fill_alu32_reg(self, BPF_MOD); -} - -/* - * Test JITs that implement complex ALU operations as function - * calls, and must re-arrange operands for argument passing. - */ -static int __bpf_fill_alu_imm_regs(struct bpf_test *self, u8 op, bool alu32) -{ - int len = 2 + 10 * 10; - struct bpf_insn *insns; - u64 dst, res; - int i = 0; - u32 imm; - int rd; - - insns = kmalloc_array(len, sizeof(*insns), GFP_KERNEL); - if (!insns) - return -ENOMEM; - - /* Operand and result values according to operation */ - if (alu32) - dst = 0x76543210U; - else - dst = 0x7edcba9876543210ULL; - imm = 0x01234567U; - - if (op == BPF_LSH || op == BPF_RSH || op == BPF_ARSH) - imm &= 31; - - __bpf_alu_result(&res, dst, imm, op); - - if (alu32) - res = (u32)res; - - /* Check all operand registers */ - for (rd = R0; rd <= R9; rd++) { - i += __bpf_ld_imm64(&insns[i], rd, dst); - - if (alu32) - insns[i++] = BPF_ALU32_IMM(op, rd, imm); - else - insns[i++] = BPF_ALU64_IMM(op, rd, imm); - - insns[i++] = BPF_JMP32_IMM(BPF_JEQ, rd, res, 2); - insns[i++] = BPF_MOV64_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - - insns[i++] = BPF_ALU64_IMM(BPF_RSH, rd, 32); - insns[i++] = BPF_JMP32_IMM(BPF_JEQ, rd, res >> 32, 2); - insns[i++] = BPF_MOV64_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - } - - insns[i++] = BPF_MOV64_IMM(R0, 1); - insns[i++] = BPF_EXIT_INSN(); - - self->u.ptr.insns = insns; - self->u.ptr.len = len; - BUG_ON(i != len); - - return 0; -} - -/* ALU64 K registers */ -static int bpf_fill_alu64_mov_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_MOV, false); -} - -static int bpf_fill_alu64_and_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_AND, false); -} - -static int bpf_fill_alu64_or_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_OR, false); -} - -static int bpf_fill_alu64_xor_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_XOR, false); -} - -static int bpf_fill_alu64_lsh_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_LSH, false); -} - -static int bpf_fill_alu64_rsh_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_RSH, false); -} - -static int bpf_fill_alu64_arsh_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_ARSH, false); -} - -static int bpf_fill_alu64_add_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_ADD, false); -} - -static int bpf_fill_alu64_sub_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_SUB, false); -} - -static int bpf_fill_alu64_mul_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_MUL, false); -} - -static int bpf_fill_alu64_div_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_DIV, false); -} - -static int bpf_fill_alu64_mod_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_MOD, false); -} - -/* ALU32 K registers */ -static int bpf_fill_alu32_mov_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_MOV, true); -} - -static int bpf_fill_alu32_and_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_AND, true); -} - -static int bpf_fill_alu32_or_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_OR, true); -} - -static int bpf_fill_alu32_xor_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_XOR, true); -} - -static int bpf_fill_alu32_lsh_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_LSH, true); -} - -static int bpf_fill_alu32_rsh_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_RSH, true); -} - -static int bpf_fill_alu32_arsh_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_ARSH, true); -} - -static int bpf_fill_alu32_add_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_ADD, true); -} - -static int bpf_fill_alu32_sub_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_SUB, true); -} - -static int bpf_fill_alu32_mul_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_MUL, true); -} - -static int bpf_fill_alu32_div_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_DIV, true); -} - -static int bpf_fill_alu32_mod_imm_regs(struct bpf_test *self) -{ - return __bpf_fill_alu_imm_regs(self, BPF_MOD, true); -} - -/* - * Test JITs that implement complex ALU operations as function - * calls, and must re-arrange operands for argument passing. - */ -static int __bpf_fill_alu_reg_pairs(struct bpf_test *self, u8 op, bool alu32) -{ - int len = 2 + 10 * 10 * 12; - u64 dst, src, res, same; - struct bpf_insn *insns; - int rd, rs; - int i = 0; - - insns = kmalloc_array(len, sizeof(*insns), GFP_KERNEL); - if (!insns) - return -ENOMEM; - - /* Operand and result values according to operation */ - if (alu32) { - dst = 0x76543210U; - src = 0x01234567U; - } else { - dst = 0x7edcba9876543210ULL; - src = 0x0123456789abcdefULL; - } - - if (op == BPF_LSH || op == BPF_RSH || op == BPF_ARSH) - src &= 31; - - __bpf_alu_result(&res, dst, src, op); - __bpf_alu_result(&same, src, src, op); - - if (alu32) { - res = (u32)res; - same = (u32)same; - } - - /* Check all combinations of operand registers */ - for (rd = R0; rd <= R9; rd++) { - for (rs = R0; rs <= R9; rs++) { - u64 val = rd == rs ? same : res; - - i += __bpf_ld_imm64(&insns[i], rd, dst); - i += __bpf_ld_imm64(&insns[i], rs, src); - - if (alu32) - insns[i++] = BPF_ALU32_REG(op, rd, rs); - else - insns[i++] = BPF_ALU64_REG(op, rd, rs); - - insns[i++] = BPF_JMP32_IMM(BPF_JEQ, rd, val, 2); - insns[i++] = BPF_MOV64_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - - insns[i++] = BPF_ALU64_IMM(BPF_RSH, rd, 32); - insns[i++] = BPF_JMP32_IMM(BPF_JEQ, rd, val >> 32, 2); - insns[i++] = BPF_MOV64_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - } - } - - insns[i++] = BPF_MOV64_IMM(R0, 1); - insns[i++] = BPF_EXIT_INSN(); - - self->u.ptr.insns = insns; - self->u.ptr.len = len; - BUG_ON(i != len); - - return 0; -} - -/* ALU64 X register combinations */ -static int bpf_fill_alu64_mov_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_MOV, false); -} - -static int bpf_fill_alu64_and_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_AND, false); -} - -static int bpf_fill_alu64_or_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_OR, false); -} - -static int bpf_fill_alu64_xor_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_XOR, false); -} - -static int bpf_fill_alu64_lsh_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_LSH, false); -} - -static int bpf_fill_alu64_rsh_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_RSH, false); -} - -static int bpf_fill_alu64_arsh_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_ARSH, false); -} - -static int bpf_fill_alu64_add_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_ADD, false); -} - -static int bpf_fill_alu64_sub_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_SUB, false); -} - -static int bpf_fill_alu64_mul_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_MUL, false); -} - -static int bpf_fill_alu64_div_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_DIV, false); -} - -static int bpf_fill_alu64_mod_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_MOD, false); -} - -/* ALU32 X register combinations */ -static int bpf_fill_alu32_mov_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_MOV, true); -} - -static int bpf_fill_alu32_and_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_AND, true); -} - -static int bpf_fill_alu32_or_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_OR, true); -} - -static int bpf_fill_alu32_xor_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_XOR, true); -} - -static int bpf_fill_alu32_lsh_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_LSH, true); -} - -static int bpf_fill_alu32_rsh_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_RSH, true); -} - -static int bpf_fill_alu32_arsh_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_ARSH, true); -} - -static int bpf_fill_alu32_add_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_ADD, true); -} - -static int bpf_fill_alu32_sub_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_SUB, true); -} - -static int bpf_fill_alu32_mul_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_MUL, true); -} - -static int bpf_fill_alu32_div_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_DIV, true); -} - -static int bpf_fill_alu32_mod_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_alu_reg_pairs(self, BPF_MOD, true); -} - -/* - * Exhaustive tests of atomic operations for all power-of-two operand - * magnitudes, both for positive and negative values. - */ - -static int __bpf_emit_atomic64(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 src) -{ - int op = *(int *)arg; - u64 keep, fetch, res; - int i = 0; - - if (!insns) - return 21; - - switch (op) { - case BPF_XCHG: - res = src; - break; - default: - __bpf_alu_result(&res, dst, src, BPF_OP(op)); - } - - keep = 0x0123456789abcdefULL; - if (op & BPF_FETCH) - fetch = dst; - else - fetch = src; - - i += __bpf_ld_imm64(&insns[i], R0, keep); - i += __bpf_ld_imm64(&insns[i], R1, dst); - i += __bpf_ld_imm64(&insns[i], R2, src); - i += __bpf_ld_imm64(&insns[i], R3, res); - i += __bpf_ld_imm64(&insns[i], R4, fetch); - i += __bpf_ld_imm64(&insns[i], R5, keep); - - insns[i++] = BPF_STX_MEM(BPF_DW, R10, R1, -8); - insns[i++] = BPF_ATOMIC_OP(BPF_DW, op, R10, R2, -8); - insns[i++] = BPF_LDX_MEM(BPF_DW, R1, R10, -8); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R1, R3, 1); - insns[i++] = BPF_EXIT_INSN(); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R2, R4, 1); - insns[i++] = BPF_EXIT_INSN(); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R0, R5, 1); - insns[i++] = BPF_EXIT_INSN(); - - return i; -} - -static int __bpf_emit_atomic32(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 src) -{ - int op = *(int *)arg; - u64 keep, fetch, res; - int i = 0; - - if (!insns) - return 21; - - switch (op) { - case BPF_XCHG: - res = src; - break; - default: - __bpf_alu_result(&res, (u32)dst, (u32)src, BPF_OP(op)); - } - - keep = 0x0123456789abcdefULL; - if (op & BPF_FETCH) - fetch = (u32)dst; - else - fetch = src; - - i += __bpf_ld_imm64(&insns[i], R0, keep); - i += __bpf_ld_imm64(&insns[i], R1, (u32)dst); - i += __bpf_ld_imm64(&insns[i], R2, src); - i += __bpf_ld_imm64(&insns[i], R3, (u32)res); - i += __bpf_ld_imm64(&insns[i], R4, fetch); - i += __bpf_ld_imm64(&insns[i], R5, keep); - - insns[i++] = BPF_STX_MEM(BPF_W, R10, R1, -4); - insns[i++] = BPF_ATOMIC_OP(BPF_W, op, R10, R2, -4); - insns[i++] = BPF_LDX_MEM(BPF_W, R1, R10, -4); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R1, R3, 1); - insns[i++] = BPF_EXIT_INSN(); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R2, R4, 1); - insns[i++] = BPF_EXIT_INSN(); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R0, R5, 1); - insns[i++] = BPF_EXIT_INSN(); - - return i; -} - -static int __bpf_emit_cmpxchg64(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 src) -{ - int i = 0; - - if (!insns) - return 23; - - i += __bpf_ld_imm64(&insns[i], R0, ~dst); - i += __bpf_ld_imm64(&insns[i], R1, dst); - i += __bpf_ld_imm64(&insns[i], R2, src); - - /* Result unsuccessful */ - insns[i++] = BPF_STX_MEM(BPF_DW, R10, R1, -8); - insns[i++] = BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -8); - insns[i++] = BPF_LDX_MEM(BPF_DW, R3, R10, -8); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R1, R3, 2); - insns[i++] = BPF_MOV64_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R0, R3, 2); - insns[i++] = BPF_MOV64_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - - /* Result successful */ - insns[i++] = BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -8); - insns[i++] = BPF_LDX_MEM(BPF_DW, R3, R10, -8); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R2, R3, 2); - insns[i++] = BPF_MOV64_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R0, R1, 2); - insns[i++] = BPF_MOV64_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - - return i; -} - -static int __bpf_emit_cmpxchg32(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 src) -{ - int i = 0; - - if (!insns) - return 27; - - i += __bpf_ld_imm64(&insns[i], R0, ~dst); - i += __bpf_ld_imm64(&insns[i], R1, (u32)dst); - i += __bpf_ld_imm64(&insns[i], R2, src); - - /* Result unsuccessful */ - insns[i++] = BPF_STX_MEM(BPF_W, R10, R1, -4); - insns[i++] = BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R2, -4); - insns[i++] = BPF_ZEXT_REG(R0), /* Zext always inserted by verifier */ - insns[i++] = BPF_LDX_MEM(BPF_W, R3, R10, -4); - - insns[i++] = BPF_JMP32_REG(BPF_JEQ, R1, R3, 2); - insns[i++] = BPF_MOV32_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R0, R3, 2); - insns[i++] = BPF_MOV32_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - - /* Result successful */ - i += __bpf_ld_imm64(&insns[i], R0, dst); - insns[i++] = BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R2, -4); - insns[i++] = BPF_ZEXT_REG(R0), /* Zext always inserted by verifier */ - insns[i++] = BPF_LDX_MEM(BPF_W, R3, R10, -4); - - insns[i++] = BPF_JMP32_REG(BPF_JEQ, R2, R3, 2); - insns[i++] = BPF_MOV32_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - - insns[i++] = BPF_JMP_REG(BPF_JEQ, R0, R1, 2); - insns[i++] = BPF_MOV32_IMM(R0, __LINE__); - insns[i++] = BPF_EXIT_INSN(); - - return i; -} - -static int __bpf_fill_atomic64(struct bpf_test *self, int op) -{ - return __bpf_fill_pattern(self, &op, 64, 64, - 0, PATTERN_BLOCK2, - &__bpf_emit_atomic64); -} - -static int __bpf_fill_atomic32(struct bpf_test *self, int op) -{ - return __bpf_fill_pattern(self, &op, 64, 64, - 0, PATTERN_BLOCK2, - &__bpf_emit_atomic32); -} - -/* 64-bit atomic operations */ -static int bpf_fill_atomic64_add(struct bpf_test *self) -{ - return __bpf_fill_atomic64(self, BPF_ADD); -} - -static int bpf_fill_atomic64_and(struct bpf_test *self) -{ - return __bpf_fill_atomic64(self, BPF_AND); -} - -static int bpf_fill_atomic64_or(struct bpf_test *self) -{ - return __bpf_fill_atomic64(self, BPF_OR); -} - -static int bpf_fill_atomic64_xor(struct bpf_test *self) -{ - return __bpf_fill_atomic64(self, BPF_XOR); -} - -static int bpf_fill_atomic64_add_fetch(struct bpf_test *self) -{ - return __bpf_fill_atomic64(self, BPF_ADD | BPF_FETCH); -} - -static int bpf_fill_atomic64_and_fetch(struct bpf_test *self) -{ - return __bpf_fill_atomic64(self, BPF_AND | BPF_FETCH); -} - -static int bpf_fill_atomic64_or_fetch(struct bpf_test *self) -{ - return __bpf_fill_atomic64(self, BPF_OR | BPF_FETCH); -} - -static int bpf_fill_atomic64_xor_fetch(struct bpf_test *self) -{ - return __bpf_fill_atomic64(self, BPF_XOR | BPF_FETCH); -} - -static int bpf_fill_atomic64_xchg(struct bpf_test *self) -{ - return __bpf_fill_atomic64(self, BPF_XCHG); -} - -static int bpf_fill_cmpxchg64(struct bpf_test *self) -{ - return __bpf_fill_pattern(self, NULL, 64, 64, 0, PATTERN_BLOCK2, - &__bpf_emit_cmpxchg64); -} - -/* 32-bit atomic operations */ -static int bpf_fill_atomic32_add(struct bpf_test *self) -{ - return __bpf_fill_atomic32(self, BPF_ADD); -} - -static int bpf_fill_atomic32_and(struct bpf_test *self) -{ - return __bpf_fill_atomic32(self, BPF_AND); -} - -static int bpf_fill_atomic32_or(struct bpf_test *self) -{ - return __bpf_fill_atomic32(self, BPF_OR); -} - -static int bpf_fill_atomic32_xor(struct bpf_test *self) -{ - return __bpf_fill_atomic32(self, BPF_XOR); -} - -static int bpf_fill_atomic32_add_fetch(struct bpf_test *self) -{ - return __bpf_fill_atomic32(self, BPF_ADD | BPF_FETCH); -} - -static int bpf_fill_atomic32_and_fetch(struct bpf_test *self) -{ - return __bpf_fill_atomic32(self, BPF_AND | BPF_FETCH); -} - -static int bpf_fill_atomic32_or_fetch(struct bpf_test *self) -{ - return __bpf_fill_atomic32(self, BPF_OR | BPF_FETCH); -} - -static int bpf_fill_atomic32_xor_fetch(struct bpf_test *self) -{ - return __bpf_fill_atomic32(self, BPF_XOR | BPF_FETCH); -} - -static int bpf_fill_atomic32_xchg(struct bpf_test *self) -{ - return __bpf_fill_atomic32(self, BPF_XCHG); -} - -static int bpf_fill_cmpxchg32(struct bpf_test *self) -{ - return __bpf_fill_pattern(self, NULL, 64, 64, 0, PATTERN_BLOCK2, - &__bpf_emit_cmpxchg32); -} - -/* - * Test JITs that implement ATOMIC operations as function calls or - * other primitives, and must re-arrange operands for argument passing. - */ -static int __bpf_fill_atomic_reg_pairs(struct bpf_test *self, u8 width, u8 op) -{ - struct bpf_insn *insn; - int len = 2 + 34 * 10 * 10; - u64 mem, upd, res; - int rd, rs, i = 0; - - insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); - if (!insn) - return -ENOMEM; - - /* Operand and memory values */ - if (width == BPF_DW) { - mem = 0x0123456789abcdefULL; - upd = 0xfedcba9876543210ULL; - } else { /* BPF_W */ - mem = 0x01234567U; - upd = 0x76543210U; - } - - /* Memory updated according to operation */ - switch (op) { - case BPF_XCHG: - res = upd; - break; - case BPF_CMPXCHG: - res = mem; - break; - default: - __bpf_alu_result(&res, mem, upd, BPF_OP(op)); - } - - /* Test all operand registers */ - for (rd = R0; rd <= R9; rd++) { - for (rs = R0; rs <= R9; rs++) { - u64 cmp, src; - - /* Initialize value in memory */ - i += __bpf_ld_imm64(&insn[i], R0, mem); - insn[i++] = BPF_STX_MEM(width, R10, R0, -8); - - /* Initialize registers in order */ - i += __bpf_ld_imm64(&insn[i], R0, ~mem); - i += __bpf_ld_imm64(&insn[i], rs, upd); - insn[i++] = BPF_MOV64_REG(rd, R10); - - /* Perform atomic operation */ - insn[i++] = BPF_ATOMIC_OP(width, op, rd, rs, -8); - if (op == BPF_CMPXCHG && width == BPF_W) - insn[i++] = BPF_ZEXT_REG(R0); - - /* Check R0 register value */ - if (op == BPF_CMPXCHG) - cmp = mem; /* Expect value from memory */ - else if (R0 == rd || R0 == rs) - cmp = 0; /* Aliased, checked below */ - else - cmp = ~mem; /* Expect value to be preserved */ - if (cmp) { - insn[i++] = BPF_JMP32_IMM(BPF_JEQ, R0, - (u32)cmp, 2); - insn[i++] = BPF_MOV32_IMM(R0, __LINE__); - insn[i++] = BPF_EXIT_INSN(); - insn[i++] = BPF_ALU64_IMM(BPF_RSH, R0, 32); - insn[i++] = BPF_JMP32_IMM(BPF_JEQ, R0, - cmp >> 32, 2); - insn[i++] = BPF_MOV32_IMM(R0, __LINE__); - insn[i++] = BPF_EXIT_INSN(); - } - - /* Check source register value */ - if (rs == R0 && op == BPF_CMPXCHG) - src = 0; /* Aliased with R0, checked above */ - else if (rs == rd && (op == BPF_CMPXCHG || - !(op & BPF_FETCH))) - src = 0; /* Aliased with rd, checked below */ - else if (op == BPF_CMPXCHG) - src = upd; /* Expect value to be preserved */ - else if (op & BPF_FETCH) - src = mem; /* Expect fetched value from mem */ - else /* no fetch */ - src = upd; /* Expect value to be preserved */ - if (src) { - insn[i++] = BPF_JMP32_IMM(BPF_JEQ, rs, - (u32)src, 2); - insn[i++] = BPF_MOV32_IMM(R0, __LINE__); - insn[i++] = BPF_EXIT_INSN(); - insn[i++] = BPF_ALU64_IMM(BPF_RSH, rs, 32); - insn[i++] = BPF_JMP32_IMM(BPF_JEQ, rs, - src >> 32, 2); - insn[i++] = BPF_MOV32_IMM(R0, __LINE__); - insn[i++] = BPF_EXIT_INSN(); - } - - /* Check destination register value */ - if (!(rd == R0 && op == BPF_CMPXCHG) && - !(rd == rs && (op & BPF_FETCH))) { - insn[i++] = BPF_JMP_REG(BPF_JEQ, rd, R10, 2); - insn[i++] = BPF_MOV32_IMM(R0, __LINE__); - insn[i++] = BPF_EXIT_INSN(); - } - - /* Check value in memory */ - if (rs != rd) { /* No aliasing */ - i += __bpf_ld_imm64(&insn[i], R1, res); - } else if (op == BPF_XCHG) { /* Aliased, XCHG */ - insn[i++] = BPF_MOV64_REG(R1, R10); - } else if (op == BPF_CMPXCHG) { /* Aliased, CMPXCHG */ - i += __bpf_ld_imm64(&insn[i], R1, mem); - } else { /* Aliased, ALU oper */ - i += __bpf_ld_imm64(&insn[i], R1, mem); - insn[i++] = BPF_ALU64_REG(BPF_OP(op), R1, R10); - } - - insn[i++] = BPF_LDX_MEM(width, R0, R10, -8); - if (width == BPF_DW) - insn[i++] = BPF_JMP_REG(BPF_JEQ, R0, R1, 2); - else /* width == BPF_W */ - insn[i++] = BPF_JMP32_REG(BPF_JEQ, R0, R1, 2); - insn[i++] = BPF_MOV32_IMM(R0, __LINE__); - insn[i++] = BPF_EXIT_INSN(); - } - } - - insn[i++] = BPF_MOV64_IMM(R0, 1); - insn[i++] = BPF_EXIT_INSN(); - - self->u.ptr.insns = insn; - self->u.ptr.len = i; - BUG_ON(i > len); - - return 0; -} - -/* 64-bit atomic register tests */ -static int bpf_fill_atomic64_add_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_DW, BPF_ADD); -} - -static int bpf_fill_atomic64_and_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_DW, BPF_AND); -} - -static int bpf_fill_atomic64_or_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_DW, BPF_OR); -} - -static int bpf_fill_atomic64_xor_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_DW, BPF_XOR); -} - -static int bpf_fill_atomic64_add_fetch_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_DW, BPF_ADD | BPF_FETCH); -} - -static int bpf_fill_atomic64_and_fetch_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_DW, BPF_AND | BPF_FETCH); -} - -static int bpf_fill_atomic64_or_fetch_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_DW, BPF_OR | BPF_FETCH); -} - -static int bpf_fill_atomic64_xor_fetch_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_DW, BPF_XOR | BPF_FETCH); -} - -static int bpf_fill_atomic64_xchg_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_DW, BPF_XCHG); -} - -static int bpf_fill_atomic64_cmpxchg_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_DW, BPF_CMPXCHG); -} - -/* 32-bit atomic register tests */ -static int bpf_fill_atomic32_add_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_W, BPF_ADD); -} - -static int bpf_fill_atomic32_and_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_W, BPF_AND); -} - -static int bpf_fill_atomic32_or_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_W, BPF_OR); -} - -static int bpf_fill_atomic32_xor_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_W, BPF_XOR); -} - -static int bpf_fill_atomic32_add_fetch_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_W, BPF_ADD | BPF_FETCH); -} - -static int bpf_fill_atomic32_and_fetch_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_W, BPF_AND | BPF_FETCH); -} - -static int bpf_fill_atomic32_or_fetch_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_W, BPF_OR | BPF_FETCH); -} - -static int bpf_fill_atomic32_xor_fetch_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_W, BPF_XOR | BPF_FETCH); -} - -static int bpf_fill_atomic32_xchg_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_W, BPF_XCHG); -} - -static int bpf_fill_atomic32_cmpxchg_reg_pairs(struct bpf_test *self) -{ - return __bpf_fill_atomic_reg_pairs(self, BPF_W, BPF_CMPXCHG); -} - -/* - * Test the two-instruction 64-bit immediate load operation for all - * power-of-two magnitudes of the immediate operand. For each MSB, a block - * of immediate values centered around the power-of-two MSB are tested, - * both for positive and negative values. The test is designed to verify - * the operation for JITs that emit different code depending on the magnitude - * of the immediate value. This is often the case if the native instruction - * immediate field width is narrower than 32 bits. - */ -static int bpf_fill_ld_imm64_magn(struct bpf_test *self) -{ - int block = 64; /* Increase for more tests per MSB position */ - int len = 3 + 8 * 63 * block * 2; - struct bpf_insn *insn; - int bit, adj, sign; - int i = 0; - - insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); - if (!insn) - return -ENOMEM; - - insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 0); - - for (bit = 0; bit <= 62; bit++) { - for (adj = -block / 2; adj < block / 2; adj++) { - for (sign = -1; sign <= 1; sign += 2) { - s64 imm = sign * ((1LL << bit) + adj); - - /* Perform operation */ - i += __bpf_ld_imm64(&insn[i], R1, imm); - - /* Load reference */ - insn[i++] = BPF_ALU32_IMM(BPF_MOV, R2, imm); - insn[i++] = BPF_ALU32_IMM(BPF_MOV, R3, - (u32)(imm >> 32)); - insn[i++] = BPF_ALU64_IMM(BPF_LSH, R3, 32); - insn[i++] = BPF_ALU64_REG(BPF_OR, R2, R3); - - /* Check result */ - insn[i++] = BPF_JMP_REG(BPF_JEQ, R1, R2, 1); - insn[i++] = BPF_EXIT_INSN(); - } - } - } - - insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 1); - insn[i++] = BPF_EXIT_INSN(); + insn[len - 1] = BPF_EXIT_INSN(); self->u.ptr.insns = insn; self->u.ptr.len = len; - BUG_ON(i != len); return 0; } -/* - * Test the two-instruction 64-bit immediate load operation for different - * combinations of bytes. Each byte in the 64-bit word is constructed as - * (base & mask) | (rand() & ~mask), where rand() is a deterministic LCG. - * All patterns (base1, mask1) and (base2, mask2) bytes are tested. - */ -static int __bpf_fill_ld_imm64_bytes(struct bpf_test *self, - u8 base1, u8 mask1, - u8 base2, u8 mask2) -{ - struct bpf_insn *insn; - int len = 3 + 8 * BIT(8); - int pattern, index; - u32 rand = 1; - int i = 0; - - insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); - if (!insn) - return -ENOMEM; - - insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 0); - - for (pattern = 0; pattern < BIT(8); pattern++) { - u64 imm = 0; - - for (index = 0; index < 8; index++) { - int byte; - - if (pattern & BIT(index)) - byte = (base1 & mask1) | (rand & ~mask1); - else - byte = (base2 & mask2) | (rand & ~mask2); - imm = (imm << 8) | byte; - } - - /* Update our LCG */ - rand = rand * 1664525 + 1013904223; - - /* Perform operation */ - i += __bpf_ld_imm64(&insn[i], R1, imm); - - /* Load reference */ - insn[i++] = BPF_ALU32_IMM(BPF_MOV, R2, imm); - insn[i++] = BPF_ALU32_IMM(BPF_MOV, R3, (u32)(imm >> 32)); - insn[i++] = BPF_ALU64_IMM(BPF_LSH, R3, 32); - insn[i++] = BPF_ALU64_REG(BPF_OR, R2, R3); - - /* Check result */ - insn[i++] = BPF_JMP_REG(BPF_JEQ, R1, R2, 1); - insn[i++] = BPF_EXIT_INSN(); - } - - insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 1); - insn[i++] = BPF_EXIT_INSN(); - - self->u.ptr.insns = insn; - self->u.ptr.len = len; - BUG_ON(i != len); - - return 0; -} - -static int bpf_fill_ld_imm64_checker(struct bpf_test *self) -{ - return __bpf_fill_ld_imm64_bytes(self, 0, 0xff, 0xff, 0xff); -} - -static int bpf_fill_ld_imm64_pos_neg(struct bpf_test *self) -{ - return __bpf_fill_ld_imm64_bytes(self, 1, 0x81, 0x80, 0x80); -} - -static int bpf_fill_ld_imm64_pos_zero(struct bpf_test *self) -{ - return __bpf_fill_ld_imm64_bytes(self, 1, 0x81, 0, 0xff); -} - -static int bpf_fill_ld_imm64_neg_zero(struct bpf_test *self) -{ - return __bpf_fill_ld_imm64_bytes(self, 0x80, 0x80, 0, 0xff); -} - -/* - * Exhaustive tests of JMP operations for all combinations of power-of-two - * magnitudes of the operands, both for positive and negative values. The - * test is designed to verify e.g. the JMP and JMP32 operations for JITs that - * emit different code depending on the magnitude of the immediate value. - */ - -static bool __bpf_match_jmp_cond(s64 v1, s64 v2, u8 op) -{ - switch (op) { - case BPF_JSET: - return !!(v1 & v2); - case BPF_JEQ: - return v1 == v2; - case BPF_JNE: - return v1 != v2; - case BPF_JGT: - return (u64)v1 > (u64)v2; - case BPF_JGE: - return (u64)v1 >= (u64)v2; - case BPF_JLT: - return (u64)v1 < (u64)v2; - case BPF_JLE: - return (u64)v1 <= (u64)v2; - case BPF_JSGT: - return v1 > v2; - case BPF_JSGE: - return v1 >= v2; - case BPF_JSLT: - return v1 < v2; - case BPF_JSLE: - return v1 <= v2; - } - return false; -} - -static int __bpf_emit_jmp_imm(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 imm) -{ - int op = *(int *)arg; - - if (insns) { - bool match = __bpf_match_jmp_cond(dst, (s32)imm, op); - int i = 0; - - insns[i++] = BPF_ALU32_IMM(BPF_MOV, R0, match); - - i += __bpf_ld_imm64(&insns[i], R1, dst); - insns[i++] = BPF_JMP_IMM(op, R1, imm, 1); - if (!match) - insns[i++] = BPF_JMP_IMM(BPF_JA, 0, 0, 1); - insns[i++] = BPF_EXIT_INSN(); - - return i; - } - - return 5 + 1; -} - -static int __bpf_emit_jmp32_imm(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 imm) -{ - int op = *(int *)arg; - - if (insns) { - bool match = __bpf_match_jmp_cond((s32)dst, (s32)imm, op); - int i = 0; - - i += __bpf_ld_imm64(&insns[i], R1, dst); - insns[i++] = BPF_JMP32_IMM(op, R1, imm, 1); - if (!match) - insns[i++] = BPF_JMP_IMM(BPF_JA, 0, 0, 1); - insns[i++] = BPF_EXIT_INSN(); - - return i; - } - - return 5; -} - -static int __bpf_emit_jmp_reg(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 src) -{ - int op = *(int *)arg; - - if (insns) { - bool match = __bpf_match_jmp_cond(dst, src, op); - int i = 0; - - i += __bpf_ld_imm64(&insns[i], R1, dst); - i += __bpf_ld_imm64(&insns[i], R2, src); - insns[i++] = BPF_JMP_REG(op, R1, R2, 1); - if (!match) - insns[i++] = BPF_JMP_IMM(BPF_JA, 0, 0, 1); - insns[i++] = BPF_EXIT_INSN(); - - return i; - } - - return 7; -} - -static int __bpf_emit_jmp32_reg(struct bpf_test *self, void *arg, - struct bpf_insn *insns, s64 dst, s64 src) -{ - int op = *(int *)arg; - - if (insns) { - bool match = __bpf_match_jmp_cond((s32)dst, (s32)src, op); - int i = 0; - - i += __bpf_ld_imm64(&insns[i], R1, dst); - i += __bpf_ld_imm64(&insns[i], R2, src); - insns[i++] = BPF_JMP32_REG(op, R1, R2, 1); - if (!match) - insns[i++] = BPF_JMP_IMM(BPF_JA, 0, 0, 1); - insns[i++] = BPF_EXIT_INSN(); - - return i; - } - - return 7; -} - -static int __bpf_fill_jmp_imm(struct bpf_test *self, int op) -{ - return __bpf_fill_pattern(self, &op, 64, 32, - PATTERN_BLOCK1, PATTERN_BLOCK2, - &__bpf_emit_jmp_imm); -} - -static int __bpf_fill_jmp32_imm(struct bpf_test *self, int op) -{ - return __bpf_fill_pattern(self, &op, 64, 32, - PATTERN_BLOCK1, PATTERN_BLOCK2, - &__bpf_emit_jmp32_imm); -} - -static int __bpf_fill_jmp_reg(struct bpf_test *self, int op) -{ - return __bpf_fill_pattern(self, &op, 64, 64, - PATTERN_BLOCK1, PATTERN_BLOCK2, - &__bpf_emit_jmp_reg); -} - -static int __bpf_fill_jmp32_reg(struct bpf_test *self, int op) -{ - return __bpf_fill_pattern(self, &op, 64, 64, - PATTERN_BLOCK1, PATTERN_BLOCK2, - &__bpf_emit_jmp32_reg); -} - -/* JMP immediate tests */ -static int bpf_fill_jmp_jset_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp_imm(self, BPF_JSET); -} - -static int bpf_fill_jmp_jeq_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp_imm(self, BPF_JEQ); -} - -static int bpf_fill_jmp_jne_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp_imm(self, BPF_JNE); -} - -static int bpf_fill_jmp_jgt_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp_imm(self, BPF_JGT); -} - -static int bpf_fill_jmp_jge_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp_imm(self, BPF_JGE); -} - -static int bpf_fill_jmp_jlt_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp_imm(self, BPF_JLT); -} - -static int bpf_fill_jmp_jle_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp_imm(self, BPF_JLE); -} - -static int bpf_fill_jmp_jsgt_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp_imm(self, BPF_JSGT); -} - -static int bpf_fill_jmp_jsge_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp_imm(self, BPF_JSGE); -} - -static int bpf_fill_jmp_jslt_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp_imm(self, BPF_JSLT); -} - -static int bpf_fill_jmp_jsle_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp_imm(self, BPF_JSLE); -} - -/* JMP32 immediate tests */ -static int bpf_fill_jmp32_jset_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp32_imm(self, BPF_JSET); -} - -static int bpf_fill_jmp32_jeq_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp32_imm(self, BPF_JEQ); -} - -static int bpf_fill_jmp32_jne_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp32_imm(self, BPF_JNE); -} - -static int bpf_fill_jmp32_jgt_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp32_imm(self, BPF_JGT); -} - -static int bpf_fill_jmp32_jge_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp32_imm(self, BPF_JGE); -} - -static int bpf_fill_jmp32_jlt_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp32_imm(self, BPF_JLT); -} - -static int bpf_fill_jmp32_jle_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp32_imm(self, BPF_JLE); -} - -static int bpf_fill_jmp32_jsgt_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp32_imm(self, BPF_JSGT); -} - -static int bpf_fill_jmp32_jsge_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp32_imm(self, BPF_JSGE); -} - -static int bpf_fill_jmp32_jslt_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp32_imm(self, BPF_JSLT); -} - -static int bpf_fill_jmp32_jsle_imm(struct bpf_test *self) -{ - return __bpf_fill_jmp32_imm(self, BPF_JSLE); -} - -/* JMP register tests */ -static int bpf_fill_jmp_jset_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp_reg(self, BPF_JSET); -} - -static int bpf_fill_jmp_jeq_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp_reg(self, BPF_JEQ); -} - -static int bpf_fill_jmp_jne_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp_reg(self, BPF_JNE); -} - -static int bpf_fill_jmp_jgt_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp_reg(self, BPF_JGT); -} - -static int bpf_fill_jmp_jge_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp_reg(self, BPF_JGE); -} - -static int bpf_fill_jmp_jlt_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp_reg(self, BPF_JLT); -} - -static int bpf_fill_jmp_jle_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp_reg(self, BPF_JLE); -} - -static int bpf_fill_jmp_jsgt_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp_reg(self, BPF_JSGT); -} - -static int bpf_fill_jmp_jsge_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp_reg(self, BPF_JSGE); -} - -static int bpf_fill_jmp_jslt_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp_reg(self, BPF_JSLT); -} - -static int bpf_fill_jmp_jsle_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp_reg(self, BPF_JSLE); -} - -/* JMP32 register tests */ -static int bpf_fill_jmp32_jset_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp32_reg(self, BPF_JSET); -} - -static int bpf_fill_jmp32_jeq_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp32_reg(self, BPF_JEQ); -} - -static int bpf_fill_jmp32_jne_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp32_reg(self, BPF_JNE); -} - -static int bpf_fill_jmp32_jgt_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp32_reg(self, BPF_JGT); -} - -static int bpf_fill_jmp32_jge_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp32_reg(self, BPF_JGE); -} - -static int bpf_fill_jmp32_jlt_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp32_reg(self, BPF_JLT); -} - -static int bpf_fill_jmp32_jle_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp32_reg(self, BPF_JLE); -} - -static int bpf_fill_jmp32_jsgt_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp32_reg(self, BPF_JSGT); -} - -static int bpf_fill_jmp32_jsge_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp32_reg(self, BPF_JSGE); -} - -static int bpf_fill_jmp32_jslt_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp32_reg(self, BPF_JSLT); -} - -static int bpf_fill_jmp32_jsle_reg(struct bpf_test *self) -{ - return __bpf_fill_jmp32_reg(self, BPF_JSLE); -} - -/* - * Set up a sequence of staggered jumps, forwards and backwards with - * increasing offset. This tests the conversion of relative jumps to - * JITed native jumps. On some architectures, for example MIPS, a large - * PC-relative jump offset may overflow the immediate field of the native - * conditional branch instruction, triggering a conversion to use an - * absolute jump instead. Since this changes the jump offsets, another - * offset computation pass is necessary, and that may in turn trigger - * another branch conversion. This jump sequence is particularly nasty - * in that regard. - * - * The sequence generation is parameterized by size and jump type. - * The size must be even, and the expected result is always size + 1. - * Below is an example with size=8 and result=9. - * - * ________________________Start - * R0 = 0 - * R1 = r1 - * R2 = r2 - * ,------- JMP +4 * 3______________Preamble: 4 insns - * ,----------|-ind 0- if R0 != 7 JMP 8 * 3 + 1 <--------------------. - * | | R0 = 8 | - * | | JMP +7 * 3 ------------------------. - * | ,--------|-----1- if R0 != 5 JMP 7 * 3 + 1 <--------------. | | - * | | | R0 = 6 | | | - * | | | JMP +5 * 3 ------------------. | | - * | | ,------|-----2- if R0 != 3 JMP 6 * 3 + 1 <--------. | | | | - * | | | | R0 = 4 | | | | | - * | | | | JMP +3 * 3 ------------. | | | | - * | | | ,----|-----3- if R0 != 1 JMP 5 * 3 + 1 <--. | | | | | | - * | | | | | R0 = 2 | | | | | | | - * | | | | | JMP +1 * 3 ------. | | | | | | - * | | | | ,--t=====4> if R0 != 0 JMP 4 * 3 + 1 1 2 3 4 5 6 7 8 loc - * | | | | | R0 = 1 -1 +2 -3 +4 -5 +6 -7 +8 off - * | | | | | JMP -2 * 3 ---' | | | | | | | - * | | | | | ,------5- if R0 != 2 JMP 3 * 3 + 1 <-----' | | | | | | - * | | | | | | R0 = 3 | | | | | | - * | | | | | | JMP -4 * 3 ---------' | | | | | - * | | | | | | ,----6- if R0 != 4 JMP 2 * 3 + 1 <-----------' | | | | - * | | | | | | | R0 = 5 | | | | - * | | | | | | | JMP -6 * 3 ---------------' | | | - * | | | | | | | ,--7- if R0 != 6 JMP 1 * 3 + 1 <-----------------' | | - * | | | | | | | | R0 = 7 | | - * | | Error | | | JMP -8 * 3 ---------------------' | - * | | paths | | | ,8- if R0 != 8 JMP 0 * 3 + 1 <-----------------------' - * | | | | | | | | | R0 = 9__________________Sequence: 3 * size - 1 insns - * `-+-+-+-+-+-+-+-+-> EXIT____________________Return: 1 insn - * - */ - -/* The maximum size parameter */ -#define MAX_STAGGERED_JMP_SIZE ((0x7fff / 3) & ~1) - -/* We use a reduced number of iterations to get a reasonable execution time */ -#define NR_STAGGERED_JMP_RUNS 10 - -static int __bpf_fill_staggered_jumps(struct bpf_test *self, - const struct bpf_insn *jmp, - u64 r1, u64 r2) -{ - int size = self->test[0].result - 1; - int len = 4 + 3 * (size + 1); - struct bpf_insn *insns; - int off, ind; - - insns = kmalloc_array(len, sizeof(*insns), GFP_KERNEL); - if (!insns) - return -ENOMEM; - - /* Preamble */ - insns[0] = BPF_ALU64_IMM(BPF_MOV, R0, 0); - insns[1] = BPF_ALU64_IMM(BPF_MOV, R1, r1); - insns[2] = BPF_ALU64_IMM(BPF_MOV, R2, r2); - insns[3] = BPF_JMP_IMM(BPF_JA, 0, 0, 3 * size / 2); - - /* Sequence */ - for (ind = 0, off = size; ind <= size; ind++, off -= 2) { - struct bpf_insn *ins = &insns[4 + 3 * ind]; - int loc; - - if (off == 0) - off--; - - loc = abs(off); - ins[0] = BPF_JMP_IMM(BPF_JNE, R0, loc - 1, - 3 * (size - ind) + 1); - ins[1] = BPF_ALU64_IMM(BPF_MOV, R0, loc); - ins[2] = *jmp; - ins[2].off = 3 * (off - 1); - } - - /* Return */ - insns[len - 1] = BPF_EXIT_INSN(); - - self->u.ptr.insns = insns; - self->u.ptr.len = len; - - return 0; -} - -/* 64-bit unconditional jump */ -static int bpf_fill_staggered_ja(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JA, 0, 0, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 0, 0); -} - -/* 64-bit immediate jumps */ -static int bpf_fill_staggered_jeq_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JEQ, R1, 1234, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 0); -} - -static int bpf_fill_staggered_jne_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JNE, R1, 1234, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 4321, 0); -} - -static int bpf_fill_staggered_jset_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JSET, R1, 0x82, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 0x86, 0); -} - -static int bpf_fill_staggered_jgt_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JGT, R1, 1234, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 0x80000000, 0); -} - -static int bpf_fill_staggered_jge_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JGE, R1, 1234, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 0); -} - -static int bpf_fill_staggered_jlt_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JLT, R1, 0x80000000, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 0); -} - -static int bpf_fill_staggered_jle_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JLE, R1, 1234, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 0); -} - -static int bpf_fill_staggered_jsgt_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JSGT, R1, -2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -1, 0); -} - -static int bpf_fill_staggered_jsge_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JSGE, R1, -2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -2, 0); -} - -static int bpf_fill_staggered_jslt_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JSLT, R1, -1, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -2, 0); -} - -static int bpf_fill_staggered_jsle_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_IMM(BPF_JSLE, R1, -1, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -1, 0); -} - -/* 64-bit register jumps */ -static int bpf_fill_staggered_jeq_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_REG(BPF_JEQ, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 1234); -} - -static int bpf_fill_staggered_jne_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_REG(BPF_JNE, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 4321, 1234); -} - -static int bpf_fill_staggered_jset_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_REG(BPF_JSET, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 0x86, 0x82); -} - -static int bpf_fill_staggered_jgt_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_REG(BPF_JGT, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 0x80000000, 1234); -} - -static int bpf_fill_staggered_jge_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_REG(BPF_JGE, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 1234); -} - -static int bpf_fill_staggered_jlt_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_REG(BPF_JLT, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 0x80000000); -} - -static int bpf_fill_staggered_jle_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_REG(BPF_JLE, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 1234); -} - -static int bpf_fill_staggered_jsgt_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_REG(BPF_JSGT, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -1, -2); -} - -static int bpf_fill_staggered_jsge_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_REG(BPF_JSGE, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -2, -2); -} - -static int bpf_fill_staggered_jslt_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_REG(BPF_JSLT, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -2, -1); -} - -static int bpf_fill_staggered_jsle_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP_REG(BPF_JSLE, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -1, -1); -} - -/* 32-bit immediate jumps */ -static int bpf_fill_staggered_jeq32_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_IMM(BPF_JEQ, R1, 1234, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 0); -} - -static int bpf_fill_staggered_jne32_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_IMM(BPF_JNE, R1, 1234, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 4321, 0); -} - -static int bpf_fill_staggered_jset32_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_IMM(BPF_JSET, R1, 0x82, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 0x86, 0); -} - -static int bpf_fill_staggered_jgt32_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_IMM(BPF_JGT, R1, 1234, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 0x80000000, 0); -} - -static int bpf_fill_staggered_jge32_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_IMM(BPF_JGE, R1, 1234, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 0); -} - -static int bpf_fill_staggered_jlt32_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_IMM(BPF_JLT, R1, 0x80000000, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 0); -} - -static int bpf_fill_staggered_jle32_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_IMM(BPF_JLE, R1, 1234, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 0); -} - -static int bpf_fill_staggered_jsgt32_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_IMM(BPF_JSGT, R1, -2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -1, 0); -} - -static int bpf_fill_staggered_jsge32_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_IMM(BPF_JSGE, R1, -2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -2, 0); -} - -static int bpf_fill_staggered_jslt32_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_IMM(BPF_JSLT, R1, -1, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -2, 0); -} - -static int bpf_fill_staggered_jsle32_imm(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_IMM(BPF_JSLE, R1, -1, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -1, 0); -} - -/* 32-bit register jumps */ -static int bpf_fill_staggered_jeq32_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_REG(BPF_JEQ, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 1234); -} - -static int bpf_fill_staggered_jne32_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_REG(BPF_JNE, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 4321, 1234); -} - -static int bpf_fill_staggered_jset32_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_REG(BPF_JSET, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 0x86, 0x82); -} - -static int bpf_fill_staggered_jgt32_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_REG(BPF_JGT, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 0x80000000, 1234); -} - -static int bpf_fill_staggered_jge32_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_REG(BPF_JGE, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 1234); -} - -static int bpf_fill_staggered_jlt32_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_REG(BPF_JLT, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 0x80000000); -} - -static int bpf_fill_staggered_jle32_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_REG(BPF_JLE, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, 1234, 1234); -} - -static int bpf_fill_staggered_jsgt32_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_REG(BPF_JSGT, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -1, -2); -} - -static int bpf_fill_staggered_jsge32_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_REG(BPF_JSGE, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -2, -2); -} - -static int bpf_fill_staggered_jslt32_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_REG(BPF_JSLT, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -2, -1); -} - -static int bpf_fill_staggered_jsle32_reg(struct bpf_test *self) -{ - struct bpf_insn jmp = BPF_JMP32_REG(BPF_JSLE, R1, R2, 0); - - return __bpf_fill_staggered_jumps(self, &jmp, -1, -1); -} - - static struct bpf_test tests[] = { { "TAX", @@ -4514,6 +1951,147 @@ static struct bpf_test tests[] = { { }, { { 0, -1 } } }, + { + /* + * Register (non-)clobbering test, in the case where a 32-bit + * JIT implements complex ALU64 operations via function calls. + * If so, the function call must be invisible in the eBPF + * registers. The JIT must then save and restore relevant + * registers during the call. The following tests check that + * the eBPF registers retain their values after such a call. + */ + "INT: Register clobbering, R1 updated", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 123456789), + BPF_ALU32_IMM(BPF_MOV, R2, 2), + BPF_ALU32_IMM(BPF_MOV, R3, 3), + BPF_ALU32_IMM(BPF_MOV, R4, 4), + BPF_ALU32_IMM(BPF_MOV, R5, 5), + BPF_ALU32_IMM(BPF_MOV, R6, 6), + BPF_ALU32_IMM(BPF_MOV, R7, 7), + BPF_ALU32_IMM(BPF_MOV, R8, 8), + BPF_ALU32_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_DIV, R1, 123456789), + BPF_JMP_IMM(BPF_JNE, R0, 0, 10), + BPF_JMP_IMM(BPF_JNE, R1, 1, 9), + BPF_JMP_IMM(BPF_JNE, R2, 2, 8), + BPF_JMP_IMM(BPF_JNE, R3, 3, 7), + BPF_JMP_IMM(BPF_JNE, R4, 4, 6), + BPF_JMP_IMM(BPF_JNE, R5, 5, 5), + BPF_JMP_IMM(BPF_JNE, R6, 6, 4), + BPF_JMP_IMM(BPF_JNE, R7, 7, 3), + BPF_JMP_IMM(BPF_JNE, R8, 8, 2), + BPF_JMP_IMM(BPF_JNE, R9, 9, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "INT: Register clobbering, R2 updated", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R2, 2 * 123456789), + BPF_ALU32_IMM(BPF_MOV, R3, 3), + BPF_ALU32_IMM(BPF_MOV, R4, 4), + BPF_ALU32_IMM(BPF_MOV, R5, 5), + BPF_ALU32_IMM(BPF_MOV, R6, 6), + BPF_ALU32_IMM(BPF_MOV, R7, 7), + BPF_ALU32_IMM(BPF_MOV, R8, 8), + BPF_ALU32_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_DIV, R2, 123456789), + BPF_JMP_IMM(BPF_JNE, R0, 0, 10), + BPF_JMP_IMM(BPF_JNE, R1, 1, 9), + BPF_JMP_IMM(BPF_JNE, R2, 2, 8), + BPF_JMP_IMM(BPF_JNE, R3, 3, 7), + BPF_JMP_IMM(BPF_JNE, R4, 4, 6), + BPF_JMP_IMM(BPF_JNE, R5, 5, 5), + BPF_JMP_IMM(BPF_JNE, R6, 6, 4), + BPF_JMP_IMM(BPF_JNE, R7, 7, 3), + BPF_JMP_IMM(BPF_JNE, R8, 8, 2), + BPF_JMP_IMM(BPF_JNE, R9, 9, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + /* + * Test 32-bit JITs that implement complex ALU64 operations as + * function calls R0 = f(R1, R2), and must re-arrange operands. + */ +#define NUMER 0xfedcba9876543210ULL +#define DENOM 0x0123456789abcdefULL + "ALU64_DIV X: Operand register permutations", + .u.insns_int = { + /* R0 / R2 */ + BPF_LD_IMM64(R0, NUMER), + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R0, R2), + BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R0 */ + BPF_LD_IMM64(R1, NUMER), + BPF_LD_IMM64(R0, DENOM), + BPF_ALU64_REG(BPF_DIV, R1, R0), + BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R0 / R1 */ + BPF_LD_IMM64(R0, NUMER), + BPF_LD_IMM64(R1, DENOM), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R2 / R0 */ + BPF_LD_IMM64(R2, NUMER), + BPF_LD_IMM64(R0, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R0), + BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R2 / R1 */ + BPF_LD_IMM64(R2, NUMER), + BPF_LD_IMM64(R1, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R1), + BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R2 */ + BPF_LD_IMM64(R1, NUMER), + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R1, R2), + BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R1 */ + BPF_LD_IMM64(R1, NUMER), + BPF_ALU64_REG(BPF_DIV, R1, R1), + BPF_JMP_IMM(BPF_JEQ, R1, 1, 1), + BPF_EXIT_INSN(), + /* R2 / R2 */ + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R2), + BPF_JMP_IMM(BPF_JEQ, R2, 1, 1), + BPF_EXIT_INSN(), + /* R3 / R4 */ + BPF_LD_IMM64(R3, NUMER), + BPF_LD_IMM64(R4, DENOM), + BPF_ALU64_REG(BPF_DIV, R3, R4), + BPF_JMP_IMM(BPF_JEQ, R3, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* Successful return */ + BPF_LD_IMM64(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, +#undef NUMER +#undef DENOM + }, #ifdef CONFIG_32BIT { "INT: 32-bit context pointer word order and zero-extension", @@ -7677,67 +5255,6 @@ static struct bpf_test tests[] = { { }, { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } }, }, - { - "ALU_END_FROM_BE 64: 0x0123456789abcdef >> 32 -> 0x01234567", - .u.insns_int = { - BPF_LD_IMM64(R0, 0x0123456789abcdefLL), - BPF_ENDIAN(BPF_FROM_BE, R0, 64), - BPF_ALU64_IMM(BPF_RSH, R0, 32), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, (u32) (cpu_to_be64(0x0123456789abcdefLL) >> 32) } }, - }, - /* BPF_ALU | BPF_END | BPF_FROM_BE, reversed */ - { - "ALU_END_FROM_BE 16: 0xfedcba9876543210 -> 0x3210", - .u.insns_int = { - BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), - BPF_ENDIAN(BPF_FROM_BE, R0, 16), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, cpu_to_be16(0x3210) } }, - }, - { - "ALU_END_FROM_BE 32: 0xfedcba9876543210 -> 0x76543210", - .u.insns_int = { - BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), - BPF_ENDIAN(BPF_FROM_BE, R0, 32), - BPF_ALU64_REG(BPF_MOV, R1, R0), - BPF_ALU64_IMM(BPF_RSH, R1, 32), - BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */ - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, cpu_to_be32(0x76543210) } }, - }, - { - "ALU_END_FROM_BE 64: 0xfedcba9876543210 -> 0x76543210", - .u.insns_int = { - BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), - BPF_ENDIAN(BPF_FROM_BE, R0, 64), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, (u32) cpu_to_be64(0xfedcba9876543210ULL) } }, - }, - { - "ALU_END_FROM_BE 64: 0xfedcba9876543210 >> 32 -> 0xfedcba98", - .u.insns_int = { - BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), - BPF_ENDIAN(BPF_FROM_BE, R0, 64), - BPF_ALU64_IMM(BPF_RSH, R0, 32), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, (u32) (cpu_to_be64(0xfedcba9876543210ULL) >> 32) } }, - }, /* BPF_ALU | BPF_END | BPF_FROM_LE */ { "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd", @@ -7775,321 +5292,6 @@ static struct bpf_test tests[] = { { }, { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } }, }, - { - "ALU_END_FROM_LE 64: 0x0123456789abcdef >> 32 -> 0xefcdab89", - .u.insns_int = { - BPF_LD_IMM64(R0, 0x0123456789abcdefLL), - BPF_ENDIAN(BPF_FROM_LE, R0, 64), - BPF_ALU64_IMM(BPF_RSH, R0, 32), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, (u32) (cpu_to_le64(0x0123456789abcdefLL) >> 32) } }, - }, - /* BPF_ALU | BPF_END | BPF_FROM_LE, reversed */ - { - "ALU_END_FROM_LE 16: 0xfedcba9876543210 -> 0x1032", - .u.insns_int = { - BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), - BPF_ENDIAN(BPF_FROM_LE, R0, 16), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, cpu_to_le16(0x3210) } }, - }, - { - "ALU_END_FROM_LE 32: 0xfedcba9876543210 -> 0x10325476", - .u.insns_int = { - BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), - BPF_ENDIAN(BPF_FROM_LE, R0, 32), - BPF_ALU64_REG(BPF_MOV, R1, R0), - BPF_ALU64_IMM(BPF_RSH, R1, 32), - BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */ - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, cpu_to_le32(0x76543210) } }, - }, - { - "ALU_END_FROM_LE 64: 0xfedcba9876543210 -> 0x10325476", - .u.insns_int = { - BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), - BPF_ENDIAN(BPF_FROM_LE, R0, 64), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, (u32) cpu_to_le64(0xfedcba9876543210ULL) } }, - }, - { - "ALU_END_FROM_LE 64: 0xfedcba9876543210 >> 32 -> 0x98badcfe", - .u.insns_int = { - BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), - BPF_ENDIAN(BPF_FROM_LE, R0, 64), - BPF_ALU64_IMM(BPF_RSH, R0, 32), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, (u32) (cpu_to_le64(0xfedcba9876543210ULL) >> 32) } }, - }, - /* BPF_LDX_MEM B/H/W/DW */ - { - "BPF_LDX_MEM | BPF_B", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x0102030405060708ULL), - BPF_LD_IMM64(R2, 0x0000000000000008ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_LDX_MEM(BPF_B, R0, R10, -1), -#else - BPF_LDX_MEM(BPF_B, R0, R10, -8), -#endif - BPF_JMP_REG(BPF_JNE, R0, R2, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - { - "BPF_LDX_MEM | BPF_B, MSB set", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x8182838485868788ULL), - BPF_LD_IMM64(R2, 0x0000000000000088ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_LDX_MEM(BPF_B, R0, R10, -1), -#else - BPF_LDX_MEM(BPF_B, R0, R10, -8), -#endif - BPF_JMP_REG(BPF_JNE, R0, R2, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - { - "BPF_LDX_MEM | BPF_H", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x0102030405060708ULL), - BPF_LD_IMM64(R2, 0x0000000000000708ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_LDX_MEM(BPF_H, R0, R10, -2), -#else - BPF_LDX_MEM(BPF_H, R0, R10, -8), -#endif - BPF_JMP_REG(BPF_JNE, R0, R2, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - { - "BPF_LDX_MEM | BPF_H, MSB set", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x8182838485868788ULL), - BPF_LD_IMM64(R2, 0x0000000000008788ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_LDX_MEM(BPF_H, R0, R10, -2), -#else - BPF_LDX_MEM(BPF_H, R0, R10, -8), -#endif - BPF_JMP_REG(BPF_JNE, R0, R2, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - { - "BPF_LDX_MEM | BPF_W", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x0102030405060708ULL), - BPF_LD_IMM64(R2, 0x0000000005060708ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_LDX_MEM(BPF_W, R0, R10, -4), -#else - BPF_LDX_MEM(BPF_W, R0, R10, -8), -#endif - BPF_JMP_REG(BPF_JNE, R0, R2, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - { - "BPF_LDX_MEM | BPF_W, MSB set", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x8182838485868788ULL), - BPF_LD_IMM64(R2, 0x0000000085868788ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_LDX_MEM(BPF_W, R0, R10, -4), -#else - BPF_LDX_MEM(BPF_W, R0, R10, -8), -#endif - BPF_JMP_REG(BPF_JNE, R0, R2, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - /* BPF_STX_MEM B/H/W/DW */ - { - "BPF_STX_MEM | BPF_B", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x8090a0b0c0d0e0f0ULL), - BPF_LD_IMM64(R2, 0x0102030405060708ULL), - BPF_LD_IMM64(R3, 0x8090a0b0c0d0e008ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_STX_MEM(BPF_B, R10, R2, -1), -#else - BPF_STX_MEM(BPF_B, R10, R2, -8), -#endif - BPF_LDX_MEM(BPF_DW, R0, R10, -8), - BPF_JMP_REG(BPF_JNE, R0, R3, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - { - "BPF_STX_MEM | BPF_B, MSB set", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x8090a0b0c0d0e0f0ULL), - BPF_LD_IMM64(R2, 0x8182838485868788ULL), - BPF_LD_IMM64(R3, 0x8090a0b0c0d0e088ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_STX_MEM(BPF_B, R10, R2, -1), -#else - BPF_STX_MEM(BPF_B, R10, R2, -8), -#endif - BPF_LDX_MEM(BPF_DW, R0, R10, -8), - BPF_JMP_REG(BPF_JNE, R0, R3, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - { - "BPF_STX_MEM | BPF_H", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x8090a0b0c0d0e0f0ULL), - BPF_LD_IMM64(R2, 0x0102030405060708ULL), - BPF_LD_IMM64(R3, 0x8090a0b0c0d00708ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_STX_MEM(BPF_H, R10, R2, -2), -#else - BPF_STX_MEM(BPF_H, R10, R2, -8), -#endif - BPF_LDX_MEM(BPF_DW, R0, R10, -8), - BPF_JMP_REG(BPF_JNE, R0, R3, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - { - "BPF_STX_MEM | BPF_H, MSB set", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x8090a0b0c0d0e0f0ULL), - BPF_LD_IMM64(R2, 0x8182838485868788ULL), - BPF_LD_IMM64(R3, 0x8090a0b0c0d08788ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_STX_MEM(BPF_H, R10, R2, -2), -#else - BPF_STX_MEM(BPF_H, R10, R2, -8), -#endif - BPF_LDX_MEM(BPF_DW, R0, R10, -8), - BPF_JMP_REG(BPF_JNE, R0, R3, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - { - "BPF_STX_MEM | BPF_W", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x8090a0b0c0d0e0f0ULL), - BPF_LD_IMM64(R2, 0x0102030405060708ULL), - BPF_LD_IMM64(R3, 0x8090a0b005060708ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_STX_MEM(BPF_W, R10, R2, -4), -#else - BPF_STX_MEM(BPF_W, R10, R2, -8), -#endif - BPF_LDX_MEM(BPF_DW, R0, R10, -8), - BPF_JMP_REG(BPF_JNE, R0, R3, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - { - "BPF_STX_MEM | BPF_W, MSB set", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x8090a0b0c0d0e0f0ULL), - BPF_LD_IMM64(R2, 0x8182838485868788ULL), - BPF_LD_IMM64(R3, 0x8090a0b085868788ULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), -#ifdef __BIG_ENDIAN - BPF_STX_MEM(BPF_W, R10, R2, -4), -#else - BPF_STX_MEM(BPF_W, R10, R2, -8), -#endif - BPF_LDX_MEM(BPF_DW, R0, R10, -8), - BPF_JMP_REG(BPF_JNE, R0, R3, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */ { "ST_MEM_B: Store/Load byte: max negative", @@ -8327,20 +5529,15 @@ static struct bpf_test tests[] = { * Individual tests are expanded from template macros for all * combinations of ALU operation, word size and fetching. */ -#define BPF_ATOMIC_POISON(width) ((width) == BPF_W ? (0xbaadf00dULL << 32) : 0) - #define BPF_ATOMIC_OP_TEST1(width, op, logic, old, update, result) \ { \ "BPF_ATOMIC | " #width ", " #op ": Test: " \ #old " " #logic " " #update " = " #result, \ .u.insns_int = { \ - BPF_LD_IMM64(R5, (update) | BPF_ATOMIC_POISON(width)), \ + BPF_ALU32_IMM(BPF_MOV, R5, update), \ BPF_ST_MEM(width, R10, -40, old), \ BPF_ATOMIC_OP(width, op, R10, R5, -40), \ BPF_LDX_MEM(width, R0, R10, -40), \ - BPF_ALU64_REG(BPF_MOV, R1, R0), \ - BPF_ALU64_IMM(BPF_RSH, R1, 32), \ - BPF_ALU64_REG(BPF_OR, R0, R1), \ BPF_EXIT_INSN(), \ }, \ INTERNAL, \ @@ -8354,14 +5551,11 @@ static struct bpf_test tests[] = { #old " " #logic " " #update " = " #result, \ .u.insns_int = { \ BPF_ALU64_REG(BPF_MOV, R1, R10), \ - BPF_LD_IMM64(R0, (update) | BPF_ATOMIC_POISON(width)), \ + BPF_ALU32_IMM(BPF_MOV, R0, update), \ BPF_ST_MEM(BPF_W, R10, -40, old), \ BPF_ATOMIC_OP(width, op, R10, R0, -40), \ BPF_ALU64_REG(BPF_MOV, R0, R10), \ BPF_ALU64_REG(BPF_SUB, R0, R1), \ - BPF_ALU64_REG(BPF_MOV, R1, R0), \ - BPF_ALU64_IMM(BPF_RSH, R1, 32), \ - BPF_ALU64_REG(BPF_OR, R0, R1), \ BPF_EXIT_INSN(), \ }, \ INTERNAL, \ @@ -8375,13 +5569,10 @@ static struct bpf_test tests[] = { #old " " #logic " " #update " = " #result, \ .u.insns_int = { \ BPF_ALU64_REG(BPF_MOV, R0, R10), \ - BPF_LD_IMM64(R1, (update) | BPF_ATOMIC_POISON(width)), \ + BPF_ALU32_IMM(BPF_MOV, R1, update), \ BPF_ST_MEM(width, R10, -40, old), \ BPF_ATOMIC_OP(width, op, R10, R1, -40), \ BPF_ALU64_REG(BPF_SUB, R0, R10), \ - BPF_ALU64_REG(BPF_MOV, R1, R0), \ - BPF_ALU64_IMM(BPF_RSH, R1, 32), \ - BPF_ALU64_REG(BPF_OR, R0, R1), \ BPF_EXIT_INSN(), \ }, \ INTERNAL, \ @@ -8394,10 +5585,10 @@ static struct bpf_test tests[] = { "BPF_ATOMIC | " #width ", " #op ": Test fetch: " \ #old " " #logic " " #update " = " #result, \ .u.insns_int = { \ - BPF_LD_IMM64(R3, (update) | BPF_ATOMIC_POISON(width)), \ + BPF_ALU32_IMM(BPF_MOV, R3, update), \ BPF_ST_MEM(width, R10, -40, old), \ BPF_ATOMIC_OP(width, op, R10, R3, -40), \ - BPF_ALU32_REG(BPF_MOV, R0, R3), \ + BPF_ALU64_REG(BPF_MOV, R0, R3), \ BPF_EXIT_INSN(), \ }, \ INTERNAL, \ @@ -8495,7 +5686,6 @@ static struct bpf_test tests[] = { BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), -#undef BPF_ATOMIC_POISON #undef BPF_ATOMIC_OP_TEST1 #undef BPF_ATOMIC_OP_TEST2 #undef BPF_ATOMIC_OP_TEST3 @@ -8580,7 +5770,7 @@ static struct bpf_test tests[] = { "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful return", .u.insns_int = { BPF_LD_IMM64(R1, 0x0123456789abcdefULL), - BPF_LD_IMM64(R2, 0xfedcba9876543210ULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), BPF_ALU64_REG(BPF_MOV, R0, R1), BPF_STX_MEM(BPF_DW, R10, R1, -40), BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), @@ -8597,7 +5787,7 @@ static struct bpf_test tests[] = { "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful store", .u.insns_int = { BPF_LD_IMM64(R1, 0x0123456789abcdefULL), - BPF_LD_IMM64(R2, 0xfedcba9876543210ULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), BPF_ALU64_REG(BPF_MOV, R0, R1), BPF_STX_MEM(BPF_DW, R10, R0, -40), BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), @@ -8615,7 +5805,7 @@ static struct bpf_test tests[] = { "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure return", .u.insns_int = { BPF_LD_IMM64(R1, 0x0123456789abcdefULL), - BPF_LD_IMM64(R2, 0xfedcba9876543210ULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), BPF_ALU64_REG(BPF_MOV, R0, R1), BPF_ALU64_IMM(BPF_ADD, R0, 1), BPF_STX_MEM(BPF_DW, R10, R1, -40), @@ -8633,7 +5823,7 @@ static struct bpf_test tests[] = { "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure store", .u.insns_int = { BPF_LD_IMM64(R1, 0x0123456789abcdefULL), - BPF_LD_IMM64(R2, 0xfedcba9876543210ULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), BPF_ALU64_REG(BPF_MOV, R0, R1), BPF_ALU64_IMM(BPF_ADD, R0, 1), BPF_STX_MEM(BPF_DW, R10, R1, -40), @@ -8652,11 +5842,11 @@ static struct bpf_test tests[] = { "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test side effects", .u.insns_int = { BPF_LD_IMM64(R1, 0x0123456789abcdefULL), - BPF_LD_IMM64(R2, 0xfedcba9876543210ULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), BPF_ALU64_REG(BPF_MOV, R0, R1), BPF_STX_MEM(BPF_DW, R10, R1, -40), BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), - BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), + BPF_LD_IMM64(R0, 0xfecdba9876543210ULL), BPF_JMP_REG(BPF_JNE, R0, R2, 1), BPF_ALU64_REG(BPF_SUB, R0, R2), BPF_EXIT_INSN(), @@ -10002,6 +7192,14 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Very long conditional jump", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_long_jmp, + }, { "JMP_JA: Jump, gap, jump, ...", { }, @@ -11215,2841 +8413,6 @@ static struct bpf_test tests[] = { {}, { { 0, 2 } }, }, - /* BPF_LDX_MEM with operand aliasing */ - { - "LDX_MEM_B: operand register aliasing", - .u.insns_int = { - BPF_ST_MEM(BPF_B, R10, -8, 123), - BPF_MOV64_REG(R0, R10), - BPF_LDX_MEM(BPF_B, R0, R0, -8), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 123 } }, - .stack_depth = 8, - }, - { - "LDX_MEM_H: operand register aliasing", - .u.insns_int = { - BPF_ST_MEM(BPF_H, R10, -8, 12345), - BPF_MOV64_REG(R0, R10), - BPF_LDX_MEM(BPF_H, R0, R0, -8), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 12345 } }, - .stack_depth = 8, - }, - { - "LDX_MEM_W: operand register aliasing", - .u.insns_int = { - BPF_ST_MEM(BPF_W, R10, -8, 123456789), - BPF_MOV64_REG(R0, R10), - BPF_LDX_MEM(BPF_W, R0, R0, -8), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 123456789 } }, - .stack_depth = 8, - }, - { - "LDX_MEM_DW: operand register aliasing", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x123456789abcdefULL), - BPF_STX_MEM(BPF_DW, R10, R1, -8), - BPF_MOV64_REG(R0, R10), - BPF_LDX_MEM(BPF_DW, R0, R0, -8), - BPF_ALU64_REG(BPF_SUB, R0, R1), - BPF_MOV64_REG(R1, R0), - BPF_ALU64_IMM(BPF_RSH, R1, 32), - BPF_ALU64_REG(BPF_OR, R0, R1), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - /* - * Register (non-)clobbering tests for the case where a JIT implements - * complex ALU or ATOMIC operations via function calls. If so, the - * function call must be transparent to the eBPF registers. The JIT - * must therefore save and restore relevant registers across the call. - * The following tests check that the eBPF registers retain their - * values after such an operation. Mainly intended for complex ALU - * and atomic operation, but we run it for all. You never know... - * - * Note that each operations should be tested twice with different - * destinations, to check preservation for all registers. - */ -#define BPF_TEST_CLOBBER_ALU(alu, op, dst, src) \ - { \ - #alu "_" #op " to " #dst ": no clobbering", \ - .u.insns_int = { \ - BPF_ALU64_IMM(BPF_MOV, R0, R0), \ - BPF_ALU64_IMM(BPF_MOV, R1, R1), \ - BPF_ALU64_IMM(BPF_MOV, R2, R2), \ - BPF_ALU64_IMM(BPF_MOV, R3, R3), \ - BPF_ALU64_IMM(BPF_MOV, R4, R4), \ - BPF_ALU64_IMM(BPF_MOV, R5, R5), \ - BPF_ALU64_IMM(BPF_MOV, R6, R6), \ - BPF_ALU64_IMM(BPF_MOV, R7, R7), \ - BPF_ALU64_IMM(BPF_MOV, R8, R8), \ - BPF_ALU64_IMM(BPF_MOV, R9, R9), \ - BPF_##alu(BPF_ ##op, dst, src), \ - BPF_ALU32_IMM(BPF_MOV, dst, dst), \ - BPF_JMP_IMM(BPF_JNE, R0, R0, 10), \ - BPF_JMP_IMM(BPF_JNE, R1, R1, 9), \ - BPF_JMP_IMM(BPF_JNE, R2, R2, 8), \ - BPF_JMP_IMM(BPF_JNE, R3, R3, 7), \ - BPF_JMP_IMM(BPF_JNE, R4, R4, 6), \ - BPF_JMP_IMM(BPF_JNE, R5, R5, 5), \ - BPF_JMP_IMM(BPF_JNE, R6, R6, 4), \ - BPF_JMP_IMM(BPF_JNE, R7, R7, 3), \ - BPF_JMP_IMM(BPF_JNE, R8, R8, 2), \ - BPF_JMP_IMM(BPF_JNE, R9, R9, 1), \ - BPF_ALU64_IMM(BPF_MOV, R0, 1), \ - BPF_EXIT_INSN(), \ - }, \ - INTERNAL, \ - { }, \ - { { 0, 1 } } \ - } - /* ALU64 operations, register clobbering */ - BPF_TEST_CLOBBER_ALU(ALU64_IMM, AND, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, AND, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, OR, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, OR, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, XOR, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, XOR, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, LSH, R8, 12), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, LSH, R9, 12), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, RSH, R8, 12), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, RSH, R9, 12), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, ARSH, R8, 12), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, ARSH, R9, 12), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, ADD, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, ADD, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, SUB, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, SUB, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, MUL, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, MUL, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, DIV, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, DIV, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, MOD, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU64_IMM, MOD, R9, 123456789), - /* ALU32 immediate operations, register clobbering */ - BPF_TEST_CLOBBER_ALU(ALU32_IMM, AND, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, AND, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, OR, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, OR, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, XOR, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, XOR, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, LSH, R8, 12), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, LSH, R9, 12), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, RSH, R8, 12), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, RSH, R9, 12), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, ARSH, R8, 12), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, ARSH, R9, 12), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, ADD, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, ADD, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, SUB, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, SUB, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, MUL, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, MUL, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, DIV, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, DIV, R9, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, MOD, R8, 123456789), - BPF_TEST_CLOBBER_ALU(ALU32_IMM, MOD, R9, 123456789), - /* ALU64 register operations, register clobbering */ - BPF_TEST_CLOBBER_ALU(ALU64_REG, AND, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, AND, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, OR, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, OR, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, XOR, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, XOR, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, LSH, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, LSH, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, RSH, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, RSH, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, ARSH, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, ARSH, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, ADD, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, ADD, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, SUB, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, SUB, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, MUL, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, MUL, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, DIV, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, DIV, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, MOD, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU64_REG, MOD, R9, R1), - /* ALU32 register operations, register clobbering */ - BPF_TEST_CLOBBER_ALU(ALU32_REG, AND, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, AND, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, OR, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, OR, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, XOR, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, XOR, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, LSH, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, LSH, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, RSH, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, RSH, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, ARSH, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, ARSH, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, ADD, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, ADD, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, SUB, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, SUB, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, MUL, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, MUL, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, DIV, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, DIV, R9, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, MOD, R8, R1), - BPF_TEST_CLOBBER_ALU(ALU32_REG, MOD, R9, R1), -#undef BPF_TEST_CLOBBER_ALU -#define BPF_TEST_CLOBBER_ATOMIC(width, op) \ - { \ - "Atomic_" #width " " #op ": no clobbering", \ - .u.insns_int = { \ - BPF_ALU64_IMM(BPF_MOV, R0, 0), \ - BPF_ALU64_IMM(BPF_MOV, R1, 1), \ - BPF_ALU64_IMM(BPF_MOV, R2, 2), \ - BPF_ALU64_IMM(BPF_MOV, R3, 3), \ - BPF_ALU64_IMM(BPF_MOV, R4, 4), \ - BPF_ALU64_IMM(BPF_MOV, R5, 5), \ - BPF_ALU64_IMM(BPF_MOV, R6, 6), \ - BPF_ALU64_IMM(BPF_MOV, R7, 7), \ - BPF_ALU64_IMM(BPF_MOV, R8, 8), \ - BPF_ALU64_IMM(BPF_MOV, R9, 9), \ - BPF_ST_MEM(width, R10, -8, \ - (op) == BPF_CMPXCHG ? 0 : \ - (op) & BPF_FETCH ? 1 : 0), \ - BPF_ATOMIC_OP(width, op, R10, R1, -8), \ - BPF_JMP_IMM(BPF_JNE, R0, 0, 10), \ - BPF_JMP_IMM(BPF_JNE, R1, 1, 9), \ - BPF_JMP_IMM(BPF_JNE, R2, 2, 8), \ - BPF_JMP_IMM(BPF_JNE, R3, 3, 7), \ - BPF_JMP_IMM(BPF_JNE, R4, 4, 6), \ - BPF_JMP_IMM(BPF_JNE, R5, 5, 5), \ - BPF_JMP_IMM(BPF_JNE, R6, 6, 4), \ - BPF_JMP_IMM(BPF_JNE, R7, 7, 3), \ - BPF_JMP_IMM(BPF_JNE, R8, 8, 2), \ - BPF_JMP_IMM(BPF_JNE, R9, 9, 1), \ - BPF_ALU64_IMM(BPF_MOV, R0, 1), \ - BPF_EXIT_INSN(), \ - }, \ - INTERNAL, \ - { }, \ - { { 0, 1 } }, \ - .stack_depth = 8, \ - } - /* 64-bit atomic operations, register clobbering */ - BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_ADD), - BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_AND), - BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_OR), - BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_XOR), - BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_ADD | BPF_FETCH), - BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_AND | BPF_FETCH), - BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_OR | BPF_FETCH), - BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_XOR | BPF_FETCH), - BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_XCHG), - BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_CMPXCHG), - /* 32-bit atomic operations, register clobbering */ - BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_ADD), - BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_AND), - BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_OR), - BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_XOR), - BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_ADD | BPF_FETCH), - BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_AND | BPF_FETCH), - BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_OR | BPF_FETCH), - BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_XOR | BPF_FETCH), - BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_XCHG), - BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_CMPXCHG), -#undef BPF_TEST_CLOBBER_ATOMIC - /* Checking that ALU32 src is not zero extended in place */ -#define BPF_ALU32_SRC_ZEXT(op) \ - { \ - "ALU32_" #op "_X: src preserved in zext", \ - .u.insns_int = { \ - BPF_LD_IMM64(R1, 0x0123456789acbdefULL),\ - BPF_LD_IMM64(R2, 0xfedcba9876543210ULL),\ - BPF_ALU64_REG(BPF_MOV, R0, R1), \ - BPF_ALU32_REG(BPF_##op, R2, R1), \ - BPF_ALU64_REG(BPF_SUB, R0, R1), \ - BPF_ALU64_REG(BPF_MOV, R1, R0), \ - BPF_ALU64_IMM(BPF_RSH, R1, 32), \ - BPF_ALU64_REG(BPF_OR, R0, R1), \ - BPF_EXIT_INSN(), \ - }, \ - INTERNAL, \ - { }, \ - { { 0, 0 } }, \ - } - BPF_ALU32_SRC_ZEXT(MOV), - BPF_ALU32_SRC_ZEXT(AND), - BPF_ALU32_SRC_ZEXT(OR), - BPF_ALU32_SRC_ZEXT(XOR), - BPF_ALU32_SRC_ZEXT(ADD), - BPF_ALU32_SRC_ZEXT(SUB), - BPF_ALU32_SRC_ZEXT(MUL), - BPF_ALU32_SRC_ZEXT(DIV), - BPF_ALU32_SRC_ZEXT(MOD), -#undef BPF_ALU32_SRC_ZEXT - /* Checking that ATOMIC32 src is not zero extended in place */ -#define BPF_ATOMIC32_SRC_ZEXT(op) \ - { \ - "ATOMIC_W_" #op ": src preserved in zext", \ - .u.insns_int = { \ - BPF_LD_IMM64(R0, 0x0123456789acbdefULL), \ - BPF_ALU64_REG(BPF_MOV, R1, R0), \ - BPF_ST_MEM(BPF_W, R10, -4, 0), \ - BPF_ATOMIC_OP(BPF_W, BPF_##op, R10, R1, -4), \ - BPF_ALU64_REG(BPF_SUB, R0, R1), \ - BPF_ALU64_REG(BPF_MOV, R1, R0), \ - BPF_ALU64_IMM(BPF_RSH, R1, 32), \ - BPF_ALU64_REG(BPF_OR, R0, R1), \ - BPF_EXIT_INSN(), \ - }, \ - INTERNAL, \ - { }, \ - { { 0, 0 } }, \ - .stack_depth = 8, \ - } - BPF_ATOMIC32_SRC_ZEXT(ADD), - BPF_ATOMIC32_SRC_ZEXT(AND), - BPF_ATOMIC32_SRC_ZEXT(OR), - BPF_ATOMIC32_SRC_ZEXT(XOR), -#undef BPF_ATOMIC32_SRC_ZEXT - /* Checking that CMPXCHG32 src is not zero extended in place */ - { - "ATOMIC_W_CMPXCHG: src preserved in zext", - .u.insns_int = { - BPF_LD_IMM64(R1, 0x0123456789acbdefULL), - BPF_ALU64_REG(BPF_MOV, R2, R1), - BPF_ALU64_REG(BPF_MOV, R0, 0), - BPF_ST_MEM(BPF_W, R10, -4, 0), - BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R1, -4), - BPF_ALU64_REG(BPF_SUB, R1, R2), - BPF_ALU64_REG(BPF_MOV, R2, R1), - BPF_ALU64_IMM(BPF_RSH, R2, 32), - BPF_ALU64_REG(BPF_OR, R1, R2), - BPF_ALU64_REG(BPF_MOV, R0, R1), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 8, - }, - /* Checking that JMP32 immediate src is not zero extended in place */ -#define BPF_JMP32_IMM_ZEXT(op) \ - { \ - "JMP32_" #op "_K: operand preserved in zext", \ - .u.insns_int = { \ - BPF_LD_IMM64(R0, 0x0123456789acbdefULL),\ - BPF_ALU64_REG(BPF_MOV, R1, R0), \ - BPF_JMP32_IMM(BPF_##op, R0, 1234, 1), \ - BPF_JMP_A(0), /* Nop */ \ - BPF_ALU64_REG(BPF_SUB, R0, R1), \ - BPF_ALU64_REG(BPF_MOV, R1, R0), \ - BPF_ALU64_IMM(BPF_RSH, R1, 32), \ - BPF_ALU64_REG(BPF_OR, R0, R1), \ - BPF_EXIT_INSN(), \ - }, \ - INTERNAL, \ - { }, \ - { { 0, 0 } }, \ - } - BPF_JMP32_IMM_ZEXT(JEQ), - BPF_JMP32_IMM_ZEXT(JNE), - BPF_JMP32_IMM_ZEXT(JSET), - BPF_JMP32_IMM_ZEXT(JGT), - BPF_JMP32_IMM_ZEXT(JGE), - BPF_JMP32_IMM_ZEXT(JLT), - BPF_JMP32_IMM_ZEXT(JLE), - BPF_JMP32_IMM_ZEXT(JSGT), - BPF_JMP32_IMM_ZEXT(JSGE), - BPF_JMP32_IMM_ZEXT(JSGT), - BPF_JMP32_IMM_ZEXT(JSLT), - BPF_JMP32_IMM_ZEXT(JSLE), -#undef BPF_JMP2_IMM_ZEXT - /* Checking that JMP32 dst & src are not zero extended in place */ -#define BPF_JMP32_REG_ZEXT(op) \ - { \ - "JMP32_" #op "_X: operands preserved in zext", \ - .u.insns_int = { \ - BPF_LD_IMM64(R0, 0x0123456789acbdefULL),\ - BPF_LD_IMM64(R1, 0xfedcba9876543210ULL),\ - BPF_ALU64_REG(BPF_MOV, R2, R0), \ - BPF_ALU64_REG(BPF_MOV, R3, R1), \ - BPF_JMP32_IMM(BPF_##op, R0, R1, 1), \ - BPF_JMP_A(0), /* Nop */ \ - BPF_ALU64_REG(BPF_SUB, R0, R2), \ - BPF_ALU64_REG(BPF_SUB, R1, R3), \ - BPF_ALU64_REG(BPF_OR, R0, R1), \ - BPF_ALU64_REG(BPF_MOV, R1, R0), \ - BPF_ALU64_IMM(BPF_RSH, R1, 32), \ - BPF_ALU64_REG(BPF_OR, R0, R1), \ - BPF_EXIT_INSN(), \ - }, \ - INTERNAL, \ - { }, \ - { { 0, 0 } }, \ - } - BPF_JMP32_REG_ZEXT(JEQ), - BPF_JMP32_REG_ZEXT(JNE), - BPF_JMP32_REG_ZEXT(JSET), - BPF_JMP32_REG_ZEXT(JGT), - BPF_JMP32_REG_ZEXT(JGE), - BPF_JMP32_REG_ZEXT(JLT), - BPF_JMP32_REG_ZEXT(JLE), - BPF_JMP32_REG_ZEXT(JSGT), - BPF_JMP32_REG_ZEXT(JSGE), - BPF_JMP32_REG_ZEXT(JSGT), - BPF_JMP32_REG_ZEXT(JSLT), - BPF_JMP32_REG_ZEXT(JSLE), -#undef BPF_JMP2_REG_ZEXT - /* ALU64 K register combinations */ - { - "ALU64_MOV_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mov_imm_regs, - }, - { - "ALU64_AND_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_and_imm_regs, - }, - { - "ALU64_OR_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_or_imm_regs, - }, - { - "ALU64_XOR_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_xor_imm_regs, - }, - { - "ALU64_LSH_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_lsh_imm_regs, - }, - { - "ALU64_RSH_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_rsh_imm_regs, - }, - { - "ALU64_ARSH_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_arsh_imm_regs, - }, - { - "ALU64_ADD_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_add_imm_regs, - }, - { - "ALU64_SUB_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_sub_imm_regs, - }, - { - "ALU64_MUL_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mul_imm_regs, - }, - { - "ALU64_DIV_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_div_imm_regs, - }, - { - "ALU64_MOD_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mod_imm_regs, - }, - /* ALU32 K registers */ - { - "ALU32_MOV_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mov_imm_regs, - }, - { - "ALU32_AND_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_and_imm_regs, - }, - { - "ALU32_OR_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_or_imm_regs, - }, - { - "ALU32_XOR_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_xor_imm_regs, - }, - { - "ALU32_LSH_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_lsh_imm_regs, - }, - { - "ALU32_RSH_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_rsh_imm_regs, - }, - { - "ALU32_ARSH_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_arsh_imm_regs, - }, - { - "ALU32_ADD_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_add_imm_regs, - }, - { - "ALU32_SUB_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_sub_imm_regs, - }, - { - "ALU32_MUL_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mul_imm_regs, - }, - { - "ALU32_DIV_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_div_imm_regs, - }, - { - "ALU32_MOD_K: registers", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mod_imm_regs, - }, - /* ALU64 X register combinations */ - { - "ALU64_MOV_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mov_reg_pairs, - }, - { - "ALU64_AND_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_and_reg_pairs, - }, - { - "ALU64_OR_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_or_reg_pairs, - }, - { - "ALU64_XOR_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_xor_reg_pairs, - }, - { - "ALU64_LSH_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_lsh_reg_pairs, - }, - { - "ALU64_RSH_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_rsh_reg_pairs, - }, - { - "ALU64_ARSH_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_arsh_reg_pairs, - }, - { - "ALU64_ADD_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_add_reg_pairs, - }, - { - "ALU64_SUB_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_sub_reg_pairs, - }, - { - "ALU64_MUL_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mul_reg_pairs, - }, - { - "ALU64_DIV_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_div_reg_pairs, - }, - { - "ALU64_MOD_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mod_reg_pairs, - }, - /* ALU32 X register combinations */ - { - "ALU32_MOV_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mov_reg_pairs, - }, - { - "ALU32_AND_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_and_reg_pairs, - }, - { - "ALU32_OR_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_or_reg_pairs, - }, - { - "ALU32_XOR_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_xor_reg_pairs, - }, - { - "ALU32_LSH_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_lsh_reg_pairs, - }, - { - "ALU32_RSH_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_rsh_reg_pairs, - }, - { - "ALU32_ARSH_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_arsh_reg_pairs, - }, - { - "ALU32_ADD_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_add_reg_pairs, - }, - { - "ALU32_SUB_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_sub_reg_pairs, - }, - { - "ALU32_MUL_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mul_reg_pairs, - }, - { - "ALU32_DIV_X: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_div_reg_pairs, - }, - { - "ALU32_MOD_X register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mod_reg_pairs, - }, - /* Exhaustive test of ALU64 shift operations */ - { - "ALU64_LSH_K: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_lsh_imm, - }, - { - "ALU64_RSH_K: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_rsh_imm, - }, - { - "ALU64_ARSH_K: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_arsh_imm, - }, - { - "ALU64_LSH_X: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_lsh_reg, - }, - { - "ALU64_RSH_X: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_rsh_reg, - }, - { - "ALU64_ARSH_X: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_arsh_reg, - }, - /* Exhaustive test of ALU32 shift operations */ - { - "ALU32_LSH_K: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_lsh_imm, - }, - { - "ALU32_RSH_K: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_rsh_imm, - }, - { - "ALU32_ARSH_K: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_arsh_imm, - }, - { - "ALU32_LSH_X: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_lsh_reg, - }, - { - "ALU32_RSH_X: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_rsh_reg, - }, - { - "ALU32_ARSH_X: all shift values", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_arsh_reg, - }, - /* - * Exhaustive test of ALU64 shift operations when - * source and destination register are the same. - */ - { - "ALU64_LSH_X: all shift values with the same register", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_lsh_same_reg, - }, - { - "ALU64_RSH_X: all shift values with the same register", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_rsh_same_reg, - }, - { - "ALU64_ARSH_X: all shift values with the same register", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_arsh_same_reg, - }, - /* - * Exhaustive test of ALU32 shift operations when - * source and destination register are the same. - */ - { - "ALU32_LSH_X: all shift values with the same register", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_lsh_same_reg, - }, - { - "ALU32_RSH_X: all shift values with the same register", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_rsh_same_reg, - }, - { - "ALU32_ARSH_X: all shift values with the same register", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_arsh_same_reg, - }, - /* ALU64 immediate magnitudes */ - { - "ALU64_MOV_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mov_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_AND_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_and_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_OR_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_or_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_XOR_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_xor_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_ADD_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_add_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_SUB_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_sub_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_MUL_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mul_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_DIV_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_div_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_MOD_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mod_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - /* ALU32 immediate magnitudes */ - { - "ALU32_MOV_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mov_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_AND_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_and_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_OR_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_or_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_XOR_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_xor_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_ADD_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_add_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_SUB_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_sub_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_MUL_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mul_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_DIV_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_div_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_MOD_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mod_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - /* ALU64 register magnitudes */ - { - "ALU64_MOV_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mov_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_AND_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_and_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_OR_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_or_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_XOR_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_xor_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_ADD_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_add_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_SUB_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_sub_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_MUL_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mul_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_DIV_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_div_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU64_MOD_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu64_mod_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - /* ALU32 register magnitudes */ - { - "ALU32_MOV_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mov_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_AND_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_and_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_OR_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_or_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_XOR_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_xor_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_ADD_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_add_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_SUB_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_sub_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_MUL_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mul_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_DIV_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_div_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ALU32_MOD_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_alu32_mod_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - /* LD_IMM64 immediate magnitudes and byte patterns */ - { - "LD_IMM64: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_ld_imm64_magn, - }, - { - "LD_IMM64: checker byte patterns", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_ld_imm64_checker, - }, - { - "LD_IMM64: random positive and zero byte patterns", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_ld_imm64_pos_zero, - }, - { - "LD_IMM64: random negative and zero byte patterns", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_ld_imm64_neg_zero, - }, - { - "LD_IMM64: random positive and negative byte patterns", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_ld_imm64_pos_neg, - }, - /* 64-bit ATOMIC register combinations */ - { - "ATOMIC_DW_ADD: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_add_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_DW_AND: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_and_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_DW_OR: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_or_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_DW_XOR: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_xor_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_DW_ADD_FETCH: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_add_fetch_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_DW_AND_FETCH: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_and_fetch_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_DW_OR_FETCH: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_or_fetch_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_DW_XOR_FETCH: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_xor_fetch_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_DW_XCHG: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_xchg_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_DW_CMPXCHG: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_cmpxchg_reg_pairs, - .stack_depth = 8, - }, - /* 32-bit ATOMIC register combinations */ - { - "ATOMIC_W_ADD: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_add_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_W_AND: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_and_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_W_OR: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_or_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_W_XOR: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_xor_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_W_ADD_FETCH: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_add_fetch_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_W_AND_FETCH: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_and_fetch_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_W_OR_FETCH: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_or_fetch_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_W_XOR_FETCH: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_xor_fetch_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_W_XCHG: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_xchg_reg_pairs, - .stack_depth = 8, - }, - { - "ATOMIC_W_CMPXCHG: register combinations", - { }, - INTERNAL, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_cmpxchg_reg_pairs, - .stack_depth = 8, - }, - /* 64-bit ATOMIC magnitudes */ - { - "ATOMIC_DW_ADD: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_add, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_DW_AND: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_and, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_DW_OR: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_or, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_DW_XOR: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_xor, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_DW_ADD_FETCH: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_add_fetch, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_DW_AND_FETCH: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_and_fetch, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_DW_OR_FETCH: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_or_fetch, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_DW_XOR_FETCH: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_xor_fetch, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_DW_XCHG: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic64_xchg, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_DW_CMPXCHG: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_cmpxchg64, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - /* 64-bit atomic magnitudes */ - { - "ATOMIC_W_ADD: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_add, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_W_AND: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_and, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_W_OR: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_or, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_W_XOR: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_xor, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_W_ADD_FETCH: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_add_fetch, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_W_AND_FETCH: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_and_fetch, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_W_OR_FETCH: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_or_fetch, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_W_XOR_FETCH: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_xor_fetch, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_W_XCHG: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_atomic32_xchg, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "ATOMIC_W_CMPXCHG: all operand magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_cmpxchg32, - .stack_depth = 8, - .nr_testruns = NR_PATTERN_RUNS, - }, - /* JMP immediate magnitudes */ - { - "JMP_JSET_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jset_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JEQ_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jeq_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JNE_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jne_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JGT_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jgt_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JGE_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jge_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JLT_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jlt_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JLE_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jle_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JSGT_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jsgt_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JSGE_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jsge_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JSLT_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jslt_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JSLE_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jsle_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - /* JMP register magnitudes */ - { - "JMP_JSET_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jset_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JEQ_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jeq_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JNE_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jne_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JGT_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jgt_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JGE_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jge_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JLT_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jlt_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JLE_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jle_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JSGT_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jsgt_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JSGE_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jsge_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JSLT_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jslt_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP_JSLE_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp_jsle_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - /* JMP32 immediate magnitudes */ - { - "JMP32_JSET_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jset_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JEQ_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jeq_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JNE_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jne_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JGT_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jgt_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JGE_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jge_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JLT_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jlt_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JLE_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jle_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JSGT_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jsgt_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JSGE_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jsge_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JSLT_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jslt_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JSLE_K: all immediate value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jsle_imm, - .nr_testruns = NR_PATTERN_RUNS, - }, - /* JMP32 register magnitudes */ - { - "JMP32_JSET_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jset_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JEQ_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jeq_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JNE_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jne_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JGT_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jgt_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JGE_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jge_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JLT_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jlt_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JLE_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jle_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JSGT_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jsgt_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JSGE_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jsge_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JSLT_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jslt_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - { - "JMP32_JSLE_X: all register value magnitudes", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_jmp32_jsle_reg, - .nr_testruns = NR_PATTERN_RUNS, - }, - /* Conditional jumps with constant decision */ - { - "JMP_JSET_K: imm = 0 -> never taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_IMM(BPF_JSET, R1, 0, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 0 } }, - }, - { - "JMP_JLT_K: imm = 0 -> never taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_IMM(BPF_JLT, R1, 0, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 0 } }, - }, - { - "JMP_JGE_K: imm = 0 -> always taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_IMM(BPF_JGE, R1, 0, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - }, - { - "JMP_JGT_K: imm = 0xffffffff -> never taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_IMM(BPF_JGT, R1, U32_MAX, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 0 } }, - }, - { - "JMP_JLE_K: imm = 0xffffffff -> always taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_IMM(BPF_JLE, R1, U32_MAX, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - }, - { - "JMP32_JSGT_K: imm = 0x7fffffff -> never taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP32_IMM(BPF_JSGT, R1, S32_MAX, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 0 } }, - }, - { - "JMP32_JSGE_K: imm = -0x80000000 -> always taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP32_IMM(BPF_JSGE, R1, S32_MIN, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - }, - { - "JMP32_JSLT_K: imm = -0x80000000 -> never taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP32_IMM(BPF_JSLT, R1, S32_MIN, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 0 } }, - }, - { - "JMP32_JSLE_K: imm = 0x7fffffff -> always taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP32_IMM(BPF_JSLE, R1, S32_MAX, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - }, - { - "JMP_JEQ_X: dst = src -> always taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_REG(BPF_JEQ, R1, R1, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - }, - { - "JMP_JGE_X: dst = src -> always taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_REG(BPF_JGE, R1, R1, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - }, - { - "JMP_JLE_X: dst = src -> always taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_REG(BPF_JLE, R1, R1, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - }, - { - "JMP_JSGE_X: dst = src -> always taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_REG(BPF_JSGE, R1, R1, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - }, - { - "JMP_JSLE_X: dst = src -> always taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_REG(BPF_JSLE, R1, R1, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - }, - { - "JMP_JNE_X: dst = src -> never taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_REG(BPF_JNE, R1, R1, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 0 } }, - }, - { - "JMP_JGT_X: dst = src -> never taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_REG(BPF_JGT, R1, R1, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 0 } }, - }, - { - "JMP_JLT_X: dst = src -> never taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_REG(BPF_JLT, R1, R1, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 0 } }, - }, - { - "JMP_JSGT_X: dst = src -> never taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_REG(BPF_JSGT, R1, R1, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 0 } }, - }, - { - "JMP_JSLT_X: dst = src -> never taken", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 1), - BPF_JMP_REG(BPF_JSLT, R1, R1, 1), - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 0 } }, - }, - /* Short relative jumps */ - { - "Short relative jump: offset=0", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_JMP_IMM(BPF_JEQ, R0, 0, 0), - BPF_EXIT_INSN(), - BPF_ALU32_IMM(BPF_MOV, R0, -1), - }, - INTERNAL | FLAG_NO_DATA | FLAG_VERIFIER_ZEXT, - { }, - { { 0, 0 } }, - }, - { - "Short relative jump: offset=1", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_JMP_IMM(BPF_JEQ, R0, 0, 1), - BPF_ALU32_IMM(BPF_ADD, R0, 1), - BPF_EXIT_INSN(), - BPF_ALU32_IMM(BPF_MOV, R0, -1), - }, - INTERNAL | FLAG_NO_DATA | FLAG_VERIFIER_ZEXT, - { }, - { { 0, 0 } }, - }, - { - "Short relative jump: offset=2", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_JMP_IMM(BPF_JEQ, R0, 0, 2), - BPF_ALU32_IMM(BPF_ADD, R0, 1), - BPF_ALU32_IMM(BPF_ADD, R0, 1), - BPF_EXIT_INSN(), - BPF_ALU32_IMM(BPF_MOV, R0, -1), - }, - INTERNAL | FLAG_NO_DATA | FLAG_VERIFIER_ZEXT, - { }, - { { 0, 0 } }, - }, - { - "Short relative jump: offset=3", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_JMP_IMM(BPF_JEQ, R0, 0, 3), - BPF_ALU32_IMM(BPF_ADD, R0, 1), - BPF_ALU32_IMM(BPF_ADD, R0, 1), - BPF_ALU32_IMM(BPF_ADD, R0, 1), - BPF_EXIT_INSN(), - BPF_ALU32_IMM(BPF_MOV, R0, -1), - }, - INTERNAL | FLAG_NO_DATA | FLAG_VERIFIER_ZEXT, - { }, - { { 0, 0 } }, - }, - { - "Short relative jump: offset=4", - .u.insns_int = { - BPF_ALU64_IMM(BPF_MOV, R0, 0), - BPF_JMP_IMM(BPF_JEQ, R0, 0, 4), - BPF_ALU32_IMM(BPF_ADD, R0, 1), - BPF_ALU32_IMM(BPF_ADD, R0, 1), - BPF_ALU32_IMM(BPF_ADD, R0, 1), - BPF_ALU32_IMM(BPF_ADD, R0, 1), - BPF_EXIT_INSN(), - BPF_ALU32_IMM(BPF_MOV, R0, -1), - }, - INTERNAL | FLAG_NO_DATA | FLAG_VERIFIER_ZEXT, - { }, - { { 0, 0 } }, - }, - /* Conditional branch conversions */ - { - "Long conditional jump: taken at runtime", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_max_jmp_taken, - }, - { - "Long conditional jump: not taken at runtime", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 2 } }, - .fill_helper = bpf_fill_max_jmp_not_taken, - }, - { - "Long conditional jump: always taken, known at JIT time", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 1 } }, - .fill_helper = bpf_fill_max_jmp_always_taken, - }, - { - "Long conditional jump: never taken, known at JIT time", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, 2 } }, - .fill_helper = bpf_fill_max_jmp_never_taken, - }, - /* Staggered jump sequences, immediate */ - { - "Staggered jumps: JMP_JA", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_ja, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JEQ_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jeq_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JNE_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jne_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JSET_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jset_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JGT_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jgt_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JGE_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jge_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JLT_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jlt_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JLE_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jle_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JSGT_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsgt_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JSGE_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsge_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JSLT_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jslt_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JSLE_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsle_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - /* Staggered jump sequences, register */ - { - "Staggered jumps: JMP_JEQ_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jeq_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JNE_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jne_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JSET_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jset_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JGT_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jgt_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JGE_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jge_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JLT_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jlt_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JLE_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jle_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JSGT_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsgt_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JSGE_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsge_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JSLT_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jslt_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP_JSLE_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsle_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - /* Staggered jump sequences, JMP32 immediate */ - { - "Staggered jumps: JMP32_JEQ_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jeq32_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JNE_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jne32_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JSET_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jset32_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JGT_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jgt32_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JGE_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jge32_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JLT_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jlt32_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JLE_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jle32_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JSGT_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsgt32_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JSGE_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsge32_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JSLT_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jslt32_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JSLE_K", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsle32_imm, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - /* Staggered jump sequences, JMP32 register */ - { - "Staggered jumps: JMP32_JEQ_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jeq32_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JNE_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jne32_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JSET_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jset32_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JGT_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jgt32_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JGE_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jge32_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JLT_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jlt32_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JLE_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jle32_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JSGT_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsgt32_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JSGE_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsge32_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JSLT_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jslt32_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, - { - "Staggered jumps: JMP32_JSLE_X", - { }, - INTERNAL | FLAG_NO_DATA, - { }, - { { 0, MAX_STAGGERED_JMP_SIZE + 1 } }, - .fill_helper = bpf_fill_staggered_jsle32_reg, - .nr_testruns = NR_STAGGERED_JMP_RUNS, - }, }; static struct net_device dev; @@ -14213,8 +8576,6 @@ static struct bpf_prog *generate_filter(int which, int *err) fp->type = BPF_PROG_TYPE_SOCKET_FILTER; memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn)); fp->aux->stack_depth = tests[which].stack_depth; - fp->aux->verifier_zext = !!(tests[which].aux & - FLAG_VERIFIER_ZEXT); /* We cannot error here as we don't need type compatibility * checks. @@ -14270,9 +8631,6 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test) { int err_cnt = 0, i, runs = MAX_TESTRUNS; - if (test->nr_testruns) - runs = min(test->nr_testruns, MAX_TESTRUNS); - for (i = 0; i < MAX_SUBTESTS; i++) { void *data; u64 duration; @@ -14316,9 +8674,86 @@ module_param_string(test_name, test_name, sizeof(test_name), 0); static int test_id = -1; module_param(test_id, int, 0); -static int test_range[2] = { 0, INT_MAX }; +static int test_range[2] = { 0, ARRAY_SIZE(tests) - 1 }; module_param_array(test_range, int, NULL, 0); +static __init int find_test_index(const char *test_name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!strcmp(tests[i].descr, test_name)) + return i; + } + return -1; +} + +static __init int prepare_bpf_tests(void) +{ + int i; + + if (test_id >= 0) { + /* + * if a test_id was specified, use test_range to + * cover only that test. + */ + if (test_id >= ARRAY_SIZE(tests)) { + pr_err("test_bpf: invalid test_id specified.\n"); + return -EINVAL; + } + + test_range[0] = test_id; + test_range[1] = test_id; + } else if (*test_name) { + /* + * if a test_name was specified, find it and setup + * test_range to cover only that test. + */ + int idx = find_test_index(test_name); + + if (idx < 0) { + pr_err("test_bpf: no test named '%s' found.\n", + test_name); + return -EINVAL; + } + test_range[0] = idx; + test_range[1] = idx; + } else { + /* + * check that the supplied test_range is valid. + */ + if (test_range[0] >= ARRAY_SIZE(tests) || + test_range[1] >= ARRAY_SIZE(tests) || + test_range[0] < 0 || test_range[1] < 0) { + pr_err("test_bpf: test_range is out of bound.\n"); + return -EINVAL; + } + + if (test_range[1] < test_range[0]) { + pr_err("test_bpf: test_range is ending before it starts.\n"); + return -EINVAL; + } + } + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (tests[i].fill_helper && + tests[i].fill_helper(&tests[i]) < 0) + return -ENOMEM; + } + + return 0; +} + +static __init void destroy_bpf_tests(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (tests[i].fill_helper) + kfree(tests[i].u.ptr.insns); + } +} + static bool exclude_test(int test_id) { return test_id < test_range[0] || test_id > test_range[1]; @@ -14365,7 +8800,6 @@ static __init struct sk_buff *build_test_skb(void) skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY; skb_shinfo(skb[0])->gso_segs = 0; skb_shinfo(skb[0])->frag_list = skb[1]; - skb_shinfo(skb[0])->hwtstamps.hwtstamp = 1000; /* adjust skb[0]'s len */ skb[0]->len += skb[1]->len; @@ -14490,10 +8924,6 @@ static __init int test_skb_segment(void) for (i = 0; i < ARRAY_SIZE(skb_segment_tests); i++) { const struct skb_segment_test *test = &skb_segment_tests[i]; - cond_resched(); - if (exclude_test(i)) - continue; - pr_info("#%d %s ", i, test->descr); if (test_skb_segment_single(test)) { @@ -14525,19 +8955,7 @@ static __init int test_bpf(void) pr_info("#%d %s ", i, tests[i].descr); - if (tests[i].fill_helper && - tests[i].fill_helper(&tests[i]) < 0) { - pr_cont("FAIL to prog_fill\n"); - continue; - } - fp = generate_filter(i, &err); - - if (tests[i].fill_helper) { - kfree(tests[i].u.ptr.insns); - tests[i].u.ptr.insns = NULL; - } - if (fp == NULL) { if (err == 0) { pass_cnt++; @@ -14602,30 +9020,6 @@ struct tail_call_test { offset, TAIL_CALL_MARKER), \ BPF_JMP_IMM(BPF_TAIL_CALL, 0, 0, 0) -/* - * A test function to be called from a BPF program, clobbering a lot of - * CPU registers in the process. A JITed BPF program calling this function - * must save and restore any caller-saved registers it uses for internal - * state, for example the current tail call count. - */ -BPF_CALL_1(bpf_test_func, u64, arg) -{ - char buf[64]; - long a = 0; - long b = 1; - long c = 2; - long d = 3; - long e = 4; - long f = 5; - long g = 6; - long h = 7; - - return snprintf(buf, sizeof(buf), - "%ld %lu %lx %ld %lu %lx %ld %lu %x", - a, b, c, d, e, f, g, h, (int)arg); -} -#define BPF_FUNC_test_func __BPF_FUNC_MAX_ID - /* * Tail call tests. Each test case may call any other test in the table, * including itself, specified as a relative index offset from the calling @@ -14683,29 +9077,7 @@ static struct tail_call_test tail_call_tests[] = { BPF_EXIT_INSN(), }, .flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE, - .result = (MAX_TAIL_CALL_CNT + 1) * MAX_TESTRUNS, - }, - { - "Tail call count preserved across function calls", - .insns = { - BPF_LDX_MEM(BPF_W, R2, R1, 0), - BPF_ALU64_IMM(BPF_ADD, R2, 1), - BPF_STX_MEM(BPF_W, R1, R2, 0), - BPF_STX_MEM(BPF_DW, R10, R1, -8), - BPF_CALL_REL(BPF_FUNC_get_numa_node_id), - BPF_CALL_REL(BPF_FUNC_ktime_get_ns), - BPF_CALL_REL(BPF_FUNC_ktime_get_boot_ns), - BPF_CALL_REL(BPF_FUNC_ktime_get_coarse_ns), - BPF_CALL_REL(BPF_FUNC_jiffies64), - BPF_CALL_REL(BPF_FUNC_test_func), - BPF_LDX_MEM(BPF_DW, R1, R10, -8), - BPF_ALU32_REG(BPF_MOV, R0, R1), - TAIL_CALL(0), - BPF_EXIT_INSN(), - }, - .stack_depth = 8, - .flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE, - .result = (MAX_TAIL_CALL_CNT + 1) * MAX_TESTRUNS, + .result = (MAX_TAIL_CALL_CNT + 1 + 1) * MAX_TESTRUNS, }, { "Tail call error path, NULL target", @@ -14785,19 +9157,17 @@ static __init int prepare_tail_call_tests(struct bpf_array **pprogs) /* Relocate runtime tail call offsets and addresses */ for (i = 0; i < len; i++) { struct bpf_insn *insn = &fp->insnsi[i]; - long addr = 0; + + if (insn->imm != TAIL_CALL_MARKER) + continue; switch (insn->code) { case BPF_LD | BPF_DW | BPF_IMM: - if (insn->imm != TAIL_CALL_MARKER) - break; insn[0].imm = (u32)(long)progs; insn[1].imm = ((u64)(long)progs) >> 32; break; case BPF_ALU | BPF_MOV | BPF_K: - if (insn->imm != TAIL_CALL_MARKER) - break; if (insn->off == TAIL_CALL_NULL) insn->imm = ntests; else if (insn->off == TAIL_CALL_INVALID) @@ -14805,38 +9175,6 @@ static __init int prepare_tail_call_tests(struct bpf_array **pprogs) else insn->imm = which + insn->off; insn->off = 0; - break; - - case BPF_JMP | BPF_CALL: - if (insn->src_reg != BPF_PSEUDO_CALL) - break; - switch (insn->imm) { - case BPF_FUNC_get_numa_node_id: - addr = (long)&numa_node_id; - break; - case BPF_FUNC_ktime_get_ns: - addr = (long)&ktime_get_ns; - break; - case BPF_FUNC_ktime_get_boot_ns: - addr = (long)&ktime_get_boot_fast_ns; - break; - case BPF_FUNC_ktime_get_coarse_ns: - addr = (long)&ktime_get_coarse_ns; - break; - case BPF_FUNC_jiffies64: - addr = (long)&get_jiffies_64; - break; - case BPF_FUNC_test_func: - addr = (long)&bpf_test_func; - break; - default: - err = -EFAULT; - goto out_err; - } - *insn = BPF_EMIT_CALL(addr); - if ((long)__bpf_call_base + insn->imm != addr) - *insn = BPF_JMP_A(0); /* Skip: NOP */ - break; } } @@ -14875,8 +9213,6 @@ static __init int test_tail_calls(struct bpf_array *progs) int ret; cond_resched(); - if (exclude_test(i)) - continue; pr_info("#%d %s ", i, test->descr); if (!fp) { @@ -14909,144 +9245,29 @@ static __init int test_tail_calls(struct bpf_array *progs) return err_cnt ? -EINVAL : 0; } -static char test_suite[32]; -module_param_string(test_suite, test_suite, sizeof(test_suite), 0); - -static __init int find_test_index(const char *test_name) -{ - int i; - - if (!strcmp(test_suite, "test_bpf")) { - for (i = 0; i < ARRAY_SIZE(tests); i++) { - if (!strcmp(tests[i].descr, test_name)) - return i; - } - } - - if (!strcmp(test_suite, "test_tail_calls")) { - for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) { - if (!strcmp(tail_call_tests[i].descr, test_name)) - return i; - } - } - - if (!strcmp(test_suite, "test_skb_segment")) { - for (i = 0; i < ARRAY_SIZE(skb_segment_tests); i++) { - if (!strcmp(skb_segment_tests[i].descr, test_name)) - return i; - } - } - - return -1; -} - -static __init int prepare_test_range(void) -{ - int valid_range; - - if (!strcmp(test_suite, "test_bpf")) - valid_range = ARRAY_SIZE(tests); - else if (!strcmp(test_suite, "test_tail_calls")) - valid_range = ARRAY_SIZE(tail_call_tests); - else if (!strcmp(test_suite, "test_skb_segment")) - valid_range = ARRAY_SIZE(skb_segment_tests); - else - return 0; - - if (test_id >= 0) { - /* - * if a test_id was specified, use test_range to - * cover only that test. - */ - if (test_id >= valid_range) { - pr_err("test_bpf: invalid test_id specified for '%s' suite.\n", - test_suite); - return -EINVAL; - } - - test_range[0] = test_id; - test_range[1] = test_id; - } else if (*test_name) { - /* - * if a test_name was specified, find it and setup - * test_range to cover only that test. - */ - int idx = find_test_index(test_name); - - if (idx < 0) { - pr_err("test_bpf: no test named '%s' found for '%s' suite.\n", - test_name, test_suite); - return -EINVAL; - } - test_range[0] = idx; - test_range[1] = idx; - } else if (test_range[0] != 0 || test_range[1] != INT_MAX) { - /* - * check that the supplied test_range is valid. - */ - if (test_range[0] < 0 || test_range[1] >= valid_range) { - pr_err("test_bpf: test_range is out of bound for '%s' suite.\n", - test_suite); - return -EINVAL; - } - - if (test_range[1] < test_range[0]) { - pr_err("test_bpf: test_range is ending before it starts.\n"); - return -EINVAL; - } - } - - return 0; -} - static int __init test_bpf_init(void) { struct bpf_array *progs = NULL; int ret; - if (strlen(test_suite) && - strcmp(test_suite, "test_bpf") && - strcmp(test_suite, "test_tail_calls") && - strcmp(test_suite, "test_skb_segment")) { - pr_err("test_bpf: invalid test_suite '%s' specified.\n", test_suite); - return -EINVAL; - } - - /* - * if test_suite is not specified, but test_id, test_name or test_range - * is specified, set 'test_bpf' as the default test suite. - */ - if (!strlen(test_suite) && - (test_id != -1 || strlen(test_name) || - (test_range[0] != 0 || test_range[1] != INT_MAX))) { - pr_info("test_bpf: set 'test_bpf' as the default test_suite.\n"); - strscpy(test_suite, "test_bpf", sizeof(test_suite)); - } - - ret = prepare_test_range(); + ret = prepare_bpf_tests(); if (ret < 0) return ret; - if (!strlen(test_suite) || !strcmp(test_suite, "test_bpf")) { - ret = test_bpf(); - if (ret) - return ret; - } + ret = test_bpf(); + destroy_bpf_tests(); + if (ret) + return ret; - if (!strlen(test_suite) || !strcmp(test_suite, "test_tail_calls")) { - ret = prepare_tail_call_tests(&progs); - if (ret) - return ret; - ret = test_tail_calls(progs); - destroy_tail_call_tests(progs); - if (ret) - return ret; - } + ret = prepare_tail_call_tests(&progs); + if (ret) + return ret; + ret = test_tail_calls(progs); + destroy_tail_call_tests(progs); + if (ret) + return ret; - if (!strlen(test_suite) || !strcmp(test_suite, "test_skb_segment")) - return test_skb_segment(); - - return 0; + return test_skb_segment(); } static void __exit test_bpf_exit(void) diff --git a/lib/test_hash.c b/lib/test_hash.c index bb25fda347..0ee40b4a56 100644 --- a/lib/test_hash.c +++ b/lib/test_hash.c @@ -14,15 +14,17 @@ * and hash_64(). */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt "\n" + #include #include #include #include #include -#include +#include /* 32-bit XORSHIFT generator. Seed must not be zero. */ -static u32 __attribute_const__ +static u32 __init __attribute_const__ xorshift(u32 seed) { seed ^= seed << 13; @@ -32,7 +34,7 @@ xorshift(u32 seed) } /* Given a non-zero x, returns a non-zero byte. */ -static u8 __attribute_const__ +static u8 __init __attribute_const__ mod255(u32 x) { x = (x & 0xffff) + (x >> 16); /* 1 <= x <= 0x1fffe */ @@ -43,7 +45,8 @@ mod255(u32 x) } /* Fill the buffer with non-zero bytes. */ -static void fill_buf(char *buf, size_t len, u32 seed) +static void __init +fill_buf(char *buf, size_t len, u32 seed) { size_t i; @@ -53,50 +56,6 @@ static void fill_buf(char *buf, size_t len, u32 seed) } } -/* Holds most testing variables for the int test. */ -struct test_hash_params { - /* Pointer to integer to be hashed. */ - unsigned long long *h64; - /* Low 32-bits of integer to be hashed. */ - u32 h0; - /* Arch-specific hash result. */ - u32 h1; - /* Generic hash result. */ - u32 h2; - /* ORed hashes of given size (in bits). */ - u32 (*hash_or)[33]; -}; - -#ifdef HAVE_ARCH__HASH_32 -static void -test_int__hash_32(struct kunit *test, struct test_hash_params *params) -{ - params->hash_or[1][0] |= params->h2 = __hash_32_generic(params->h0); -#if HAVE_ARCH__HASH_32 == 1 - KUNIT_EXPECT_EQ_MSG(test, params->h1, params->h2, - "__hash_32(%#x) = %#x != __hash_32_generic() = %#x", - params->h0, params->h1, params->h2); -#endif -} -#endif - -#ifdef HAVE_ARCH_HASH_64 -static void -test_int_hash_64(struct kunit *test, struct test_hash_params *params, u32 const *m, int *k) -{ - params->h2 = hash_64_generic(*params->h64, *k); -#if HAVE_ARCH_HASH_64 == 1 - KUNIT_EXPECT_EQ_MSG(test, params->h1, params->h2, - "hash_64(%#llx, %d) = %#x != hash_64_generic() = %#x", - *params->h64, *k, params->h1, params->h2); -#else - KUNIT_EXPECT_LE_MSG(test, params->h1, params->h2, - "hash_64_generic(%#llx, %d) = %#x > %#x", - *params->h64, *k, params->h1, *m); -#endif -} -#endif - /* * Test the various integer hash functions. h64 (or its low-order bits) * is the integer to hash. hash_or accumulates the OR of the hash values, @@ -106,16 +65,23 @@ test_int_hash_64(struct kunit *test, struct test_hash_params *params, u32 const * inline, the code being tested is actually in the module, and you can * recompile and re-test the module without rebooting. */ -static void -test_int_hash(struct kunit *test, unsigned long long h64, u32 hash_or[2][33]) +static bool __init +test_int_hash(unsigned long long h64, u32 hash_or[2][33]) { int k; - struct test_hash_params params = { &h64, (u32)h64, 0, 0, hash_or }; + u32 h0 = (u32)h64, h1, h2; /* Test __hash32 */ - hash_or[0][0] |= params.h1 = __hash_32(params.h0); + hash_or[0][0] |= h1 = __hash_32(h0); #ifdef HAVE_ARCH__HASH_32 - test_int__hash_32(test, ¶ms); + hash_or[1][0] |= h2 = __hash_32_generic(h0); +#if HAVE_ARCH__HASH_32 == 1 + if (h1 != h2) { + pr_err("__hash_32(%#x) = %#x != __hash_32_generic() = %#x", + h0, h1, h2); + return false; + } +#endif #endif /* Test k = 1..32 bits */ @@ -123,53 +89,63 @@ test_int_hash(struct kunit *test, unsigned long long h64, u32 hash_or[2][33]) u32 const m = ((u32)2 << (k-1)) - 1; /* Low k bits set */ /* Test hash_32 */ - hash_or[0][k] |= params.h1 = hash_32(params.h0, k); - KUNIT_EXPECT_LE_MSG(test, params.h1, m, - "hash_32(%#x, %d) = %#x > %#x", - params.h0, k, params.h1, m); - + hash_or[0][k] |= h1 = hash_32(h0, k); + if (h1 > m) { + pr_err("hash_32(%#x, %d) = %#x > %#x", h0, k, h1, m); + return false; + } +#ifdef HAVE_ARCH_HASH_32 + h2 = hash_32_generic(h0, k); +#if HAVE_ARCH_HASH_32 == 1 + if (h1 != h2) { + pr_err("hash_32(%#x, %d) = %#x != hash_32_generic() " + " = %#x", h0, k, h1, h2); + return false; + } +#else + if (h2 > m) { + pr_err("hash_32_generic(%#x, %d) = %#x > %#x", + h0, k, h1, m); + return false; + } +#endif +#endif /* Test hash_64 */ - hash_or[1][k] |= params.h1 = hash_64(h64, k); - KUNIT_EXPECT_LE_MSG(test, params.h1, m, - "hash_64(%#llx, %d) = %#x > %#x", - h64, k, params.h1, m); + hash_or[1][k] |= h1 = hash_64(h64, k); + if (h1 > m) { + pr_err("hash_64(%#llx, %d) = %#x > %#x", h64, k, h1, m); + return false; + } #ifdef HAVE_ARCH_HASH_64 - test_int_hash_64(test, ¶ms, &m, &k); + h2 = hash_64_generic(h64, k); +#if HAVE_ARCH_HASH_64 == 1 + if (h1 != h2) { + pr_err("hash_64(%#llx, %d) = %#x != hash_64_generic() " + "= %#x", h64, k, h1, h2); + return false; + } +#else + if (h2 > m) { + pr_err("hash_64_generic(%#llx, %d) = %#x > %#x", + h64, k, h1, m); + return false; + } +#endif #endif } + + (void)h2; /* Suppress unused variable warning */ + return true; } #define SIZE 256 /* Run time is cubic in SIZE */ -static void test_string_or(struct kunit *test) +static int __init +test_hash_init(void) { char buf[SIZE+1]; - u32 string_or = 0; - int i, j; - - fill_buf(buf, SIZE, 1); - - /* Test every possible non-empty substring in the buffer. */ - for (j = SIZE; j > 0; --j) { - buf[j] = '\0'; - - for (i = 0; i <= j; i++) { - u32 h0 = full_name_hash(buf+i, buf+i, j-i); - - string_or |= h0; - } /* i */ - } /* j */ - - /* The OR of all the hash values should cover all the bits */ - KUNIT_EXPECT_EQ_MSG(test, string_or, -1u, - "OR of all string hash results = %#x != %#x", - string_or, -1u); -} - -static void test_hash_or(struct kunit *test) -{ - char buf[SIZE+1]; - u32 hash_or[2][33] = { { 0, } }; + u32 string_or = 0, hash_or[2][33] = { { 0, } }; + unsigned tests = 0; unsigned long long h64 = 0; int i, j; @@ -184,27 +160,46 @@ static void test_hash_or(struct kunit *test) u32 h0 = full_name_hash(buf+i, buf+i, j-i); /* Check that hashlen_string gets the length right */ - KUNIT_EXPECT_EQ_MSG(test, hashlen_len(hashlen), j-i, - "hashlen_string(%d..%d) returned length %u, expected %d", - i, j, hashlen_len(hashlen), j-i); + if (hashlen_len(hashlen) != j-i) { + pr_err("hashlen_string(%d..%d) returned length" + " %u, expected %d", + i, j, hashlen_len(hashlen), j-i); + return -EINVAL; + } /* Check that the hashes match */ - KUNIT_EXPECT_EQ_MSG(test, hashlen_hash(hashlen), h0, - "hashlen_string(%d..%d) = %08x != full_name_hash() = %08x", - i, j, hashlen_hash(hashlen), h0); + if (hashlen_hash(hashlen) != h0) { + pr_err("hashlen_string(%d..%d) = %08x != " + "full_name_hash() = %08x", + i, j, hashlen_hash(hashlen), h0); + return -EINVAL; + } + string_or |= h0; h64 = h64 << 32 | h0; /* For use with hash_64 */ - test_int_hash(test, h64, hash_or); + if (!test_int_hash(h64, hash_or)) + return -EINVAL; + tests++; } /* i */ } /* j */ - KUNIT_EXPECT_EQ_MSG(test, hash_or[0][0], -1u, - "OR of all __hash_32 results = %#x != %#x", - hash_or[0][0], -1u); + /* The OR of all the hash values should cover all the bits */ + if (~string_or) { + pr_err("OR of all string hash results = %#x != %#x", + string_or, -1u); + return -EINVAL; + } + if (~hash_or[0][0]) { + pr_err("OR of all __hash_32 results = %#x != %#x", + hash_or[0][0], -1u); + return -EINVAL; + } #ifdef HAVE_ARCH__HASH_32 #if HAVE_ARCH__HASH_32 != 1 /* Test is pointless if results match */ - KUNIT_EXPECT_EQ_MSG(test, hash_or[1][0], -1u, - "OR of all __hash_32_generic results = %#x != %#x", - hash_or[1][0], -1u); + if (~hash_or[1][0]) { + pr_err("OR of all __hash_32_generic results = %#x != %#x", + hash_or[1][0], -1u); + return -EINVAL; + } #endif #endif @@ -212,27 +207,51 @@ static void test_hash_or(struct kunit *test) for (i = 1; i <= 32; i++) { u32 const m = ((u32)2 << (i-1)) - 1; /* Low i bits set */ - KUNIT_EXPECT_EQ_MSG(test, hash_or[0][i], m, - "OR of all hash_32(%d) results = %#x (%#x expected)", - i, hash_or[0][i], m); - KUNIT_EXPECT_EQ_MSG(test, hash_or[1][i], m, - "OR of all hash_64(%d) results = %#x (%#x expected)", - i, hash_or[1][i], m); + if (hash_or[0][i] != m) { + pr_err("OR of all hash_32(%d) results = %#x " + "(%#x expected)", i, hash_or[0][i], m); + return -EINVAL; + } + if (hash_or[1][i] != m) { + pr_err("OR of all hash_64(%d) results = %#x " + "(%#x expected)", i, hash_or[1][i], m); + return -EINVAL; + } } + + /* Issue notices about skipped tests. */ +#ifdef HAVE_ARCH__HASH_32 +#if HAVE_ARCH__HASH_32 != 1 + pr_info("__hash_32() is arch-specific; not compared to generic."); +#endif +#else + pr_info("__hash_32() has no arch implementation to test."); +#endif +#ifdef HAVE_ARCH_HASH_32 +#if HAVE_ARCH_HASH_32 != 1 + pr_info("hash_32() is arch-specific; not compared to generic."); +#endif +#else + pr_info("hash_32() has no arch implementation to test."); +#endif +#ifdef HAVE_ARCH_HASH_64 +#if HAVE_ARCH_HASH_64 != 1 + pr_info("hash_64() is arch-specific; not compared to generic."); +#endif +#else + pr_info("hash_64() has no arch implementation to test."); +#endif + + pr_notice("%u tests passed.", tests); + + return 0; } -static struct kunit_case hash_test_cases[] __refdata = { - KUNIT_CASE(test_string_or), - KUNIT_CASE(test_hash_or), - {} -}; +static void __exit test_hash_exit(void) +{ +} -static struct kunit_suite hash_test_suite = { - .name = "hash", - .test_cases = hash_test_cases, -}; - - -kunit_test_suite(hash_test_suite); +module_init(test_hash_init); /* Does everything */ +module_exit(test_hash_exit); /* Does nothing */ MODULE_LICENSE("GPL"); diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 767538089a..ac794e3540 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -613,7 +613,8 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, */ rpage->zone_device_data = dmirror; - *dst = migrate_pfn(page_to_pfn(dpage)); + *dst = migrate_pfn(page_to_pfn(dpage)) | + MIGRATE_PFN_LOCKED; if ((*src & MIGRATE_PFN_WRITE) || (!spage && args->vma->vm_flags & VM_WRITE)) *dst |= MIGRATE_PFN_WRITE; @@ -1160,7 +1161,7 @@ static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, lock_page(dpage); xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); copy_highpage(dpage, spage); - *dst = migrate_pfn(page_to_pfn(dpage)); + *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; if (*src & MIGRATE_PFN_WRITE) *dst |= MIGRATE_PFN_WRITE; } diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 3b413f8c8a..8835e07845 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -88,7 +88,7 @@ static void kasan_test_exit(struct kunit *test) */ #define KUNIT_EXPECT_KASAN_FAIL(test, expression) do { \ if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ - kasan_sync_fault_possible()) \ + !kasan_async_mode_enabled()) \ migrate_disable(); \ KUNIT_EXPECT_FALSE(test, READ_ONCE(fail_data.report_found)); \ barrier(); \ @@ -440,7 +440,6 @@ static void kmalloc_oob_memset_2(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 1, 0, 2)); kfree(ptr); } @@ -453,7 +452,6 @@ static void kmalloc_oob_memset_4(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 3, 0, 4)); kfree(ptr); } @@ -466,7 +464,6 @@ static void kmalloc_oob_memset_8(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 7, 0, 8)); kfree(ptr); } @@ -479,7 +476,6 @@ static void kmalloc_oob_memset_16(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 15, 0, 16)); kfree(ptr); } @@ -492,18 +488,16 @@ static void kmalloc_oob_in_memset(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - OPTIMIZER_HIDE_VAR(ptr); - OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size + KASAN_GRANULE_SIZE)); kfree(ptr); } -static void kmalloc_memmove_negative_size(struct kunit *test) +static void kmalloc_memmove_invalid_size(struct kunit *test) { char *ptr; size_t size = 64; - size_t invalid_size = -2; + volatile size_t invalid_size = -2; /* * Hardware tag-based mode doesn't check memmove for negative size. @@ -516,24 +510,6 @@ static void kmalloc_memmove_negative_size(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset((char *)ptr, 0, 64); - OPTIMIZER_HIDE_VAR(ptr); - OPTIMIZER_HIDE_VAR(invalid_size); - KUNIT_EXPECT_KASAN_FAIL(test, - memmove((char *)ptr, (char *)ptr + 4, invalid_size)); - kfree(ptr); -} - -static void kmalloc_memmove_invalid_size(struct kunit *test) -{ - char *ptr; - size_t size = 64; - volatile size_t invalid_size = size; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - memset((char *)ptr, 0, 64); - OPTIMIZER_HIDE_VAR(ptr); KUNIT_EXPECT_KASAN_FAIL(test, memmove((char *)ptr, (char *)ptr + 4, invalid_size)); kfree(ptr); @@ -703,7 +679,7 @@ static void kmem_cache_bulk(struct kunit *test) static char global_array[10]; -static void kasan_global_oob_right(struct kunit *test) +static void kasan_global_oob(struct kunit *test) { /* * Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS @@ -726,20 +702,6 @@ static void kasan_global_oob_right(struct kunit *test) KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); } -static void kasan_global_oob_left(struct kunit *test) -{ - char *volatile array = global_array; - char *p = array - 3; - - /* - * GCC is known to fail this test, skip it. - * See https://bugzilla.kernel.org/show_bug.cgi?id=215051. - */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_CC_IS_CLANG); - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); -} - /* Check that ksize() makes the whole object accessible. */ static void ksize_unpoisons_memory(struct kunit *test) { @@ -869,19 +831,6 @@ static void kmem_cache_invalid_free(struct kunit *test) kmem_cache_destroy(cache); } -static void empty_cache_ctor(void *object) { } - -static void kmem_cache_double_destroy(struct kunit *test) -{ - struct kmem_cache *cache; - - /* Provide a constructor to prevent cache merging. */ - cache = kmem_cache_create("test_cache", 200, 0, 0, empty_cache_ctor); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); - kmem_cache_destroy(cache); - KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_destroy(cache)); -} - static void kasan_memchr(struct kunit *test) { char *ptr; @@ -899,8 +848,6 @@ static void kasan_memchr(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - OPTIMIZER_HIDE_VAR(ptr); - OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = memchr(ptr, '1', size + 1)); @@ -926,8 +873,6 @@ static void kasan_memcmp(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset(arr, 0, sizeof(arr)); - OPTIMIZER_HIDE_VAR(ptr); - OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = memcmp(ptr, arr, size+1)); kfree(ptr); @@ -1184,7 +1129,6 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kmalloc_oob_memset_4), KUNIT_CASE(kmalloc_oob_memset_8), KUNIT_CASE(kmalloc_oob_memset_16), - KUNIT_CASE(kmalloc_memmove_negative_size), KUNIT_CASE(kmalloc_memmove_invalid_size), KUNIT_CASE(kmalloc_uaf), KUNIT_CASE(kmalloc_uaf_memset), @@ -1194,8 +1138,7 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kmem_cache_oob), KUNIT_CASE(kmem_cache_accounted), KUNIT_CASE(kmem_cache_bulk), - KUNIT_CASE(kasan_global_oob_right), - KUNIT_CASE(kasan_global_oob_left), + KUNIT_CASE(kasan_global_oob), KUNIT_CASE(kasan_stack_oob), KUNIT_CASE(kasan_alloca_oob_left), KUNIT_CASE(kasan_alloca_oob_right), @@ -1203,7 +1146,6 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(ksize_uaf), KUNIT_CASE(kmem_cache_double_free), KUNIT_CASE(kmem_cache_invalid_free), - KUNIT_CASE(kmem_cache_double_destroy), KUNIT_CASE(kasan_memchr), KUNIT_CASE(kasan_memcmp), KUNIT_CASE(kasan_strings), diff --git a/lib/test_kasan_module.c b/lib/test_kasan_module.c index b112cbc835..7ebf433ede 100644 --- a/lib/test_kasan_module.c +++ b/lib/test_kasan_module.c @@ -35,8 +35,6 @@ static noinline void __init copy_user_test(void) return; } - OPTIMIZER_HIDE_VAR(size); - pr_info("out-of-bounds in copy_from_user()\n"); unused = copy_from_user(kmem, usermem, size + 1); diff --git a/lib/test_printf.c b/lib/test_printf.c index 07309c45f3..55082432f3 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -586,59 +586,70 @@ struct page_flags_test { int width; int shift; int mask; + unsigned long value; const char *fmt; const char *name; }; -static const struct page_flags_test pft[] = { +static struct page_flags_test pft[] = { {SECTIONS_WIDTH, SECTIONS_PGSHIFT, SECTIONS_MASK, - "%d", "section"}, + 0, "%d", "section"}, {NODES_WIDTH, NODES_PGSHIFT, NODES_MASK, - "%d", "node"}, + 0, "%d", "node"}, {ZONES_WIDTH, ZONES_PGSHIFT, ZONES_MASK, - "%d", "zone"}, + 0, "%d", "zone"}, {LAST_CPUPID_WIDTH, LAST_CPUPID_PGSHIFT, LAST_CPUPID_MASK, - "%#x", "lastcpupid"}, + 0, "%#x", "lastcpupid"}, {KASAN_TAG_WIDTH, KASAN_TAG_PGSHIFT, KASAN_TAG_MASK, - "%#x", "kasantag"}, + 0, "%#x", "kasantag"}, }; static void __init page_flags_test(int section, int node, int zone, int last_cpupid, - int kasan_tag, unsigned long flags, const char *name, - char *cmp_buf) + int kasan_tag, int flags, const char *name, char *cmp_buf) { unsigned long values[] = {section, node, zone, last_cpupid, kasan_tag}; - unsigned long size; + unsigned long page_flags = 0; + unsigned long size = 0; bool append = false; int i; - for (i = 0; i < ARRAY_SIZE(values); i++) - flags |= (values[i] & pft[i].mask) << pft[i].shift; - - size = scnprintf(cmp_buf, BUF_SIZE, "%#lx(", flags); - if (flags & PAGEFLAGS_MASK) { - size += scnprintf(cmp_buf + size, BUF_SIZE - size, "%s", name); - append = true; + flags &= PAGEFLAGS_MASK; + if (flags) { + page_flags |= flags; + snprintf(cmp_buf + size, BUF_SIZE - size, "%s", name); + size = strlen(cmp_buf); +#if SECTIONS_WIDTH || NODES_WIDTH || ZONES_WIDTH || \ + LAST_CPUPID_WIDTH || KASAN_TAG_WIDTH + /* Other information also included in page flags */ + snprintf(cmp_buf + size, BUF_SIZE - size, "|"); + size = strlen(cmp_buf); +#endif } + /* Set the test value */ + for (i = 0; i < ARRAY_SIZE(pft); i++) + pft[i].value = values[i]; + for (i = 0; i < ARRAY_SIZE(pft); i++) { if (!pft[i].width) continue; - if (append) - size += scnprintf(cmp_buf + size, BUF_SIZE - size, "|"); + if (append) { + snprintf(cmp_buf + size, BUF_SIZE - size, "|"); + size = strlen(cmp_buf); + } - size += scnprintf(cmp_buf + size, BUF_SIZE - size, "%s=", - pft[i].name); - size += scnprintf(cmp_buf + size, BUF_SIZE - size, pft[i].fmt, - values[i] & pft[i].mask); + page_flags |= (pft[i].value & pft[i].mask) << pft[i].shift; + snprintf(cmp_buf + size, BUF_SIZE - size, "%s=", pft[i].name); + size = strlen(cmp_buf); + snprintf(cmp_buf + size, BUF_SIZE - size, pft[i].fmt, + pft[i].value & pft[i].mask); + size = strlen(cmp_buf); append = true; } - snprintf(cmp_buf + size, BUF_SIZE - size, ")"); - - test(cmp_buf, "%pGp", &flags); + test(cmp_buf, "%pGp", &page_flags); } static void __init diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c index a5a3d6c27e..3750323973 100644 --- a/lib/test_sysctl.c +++ b/lib/test_sysctl.c @@ -128,6 +128,26 @@ static struct ctl_table test_table[] = { { } }; +static struct ctl_table test_sysctl_table[] = { + { + .procname = "test_sysctl", + .maxlen = 0, + .mode = 0555, + .child = test_table, + }, + { } +}; + +static struct ctl_table test_sysctl_root_table[] = { + { + .procname = "debug", + .maxlen = 0, + .mode = 0555, + .child = test_sysctl_table, + }, + { } +}; + static struct ctl_table_header *test_sysctl_header; static int __init test_sysctl_init(void) @@ -135,7 +155,7 @@ static int __init test_sysctl_init(void) test_data.bitmap_0001 = kzalloc(SYSCTL_TEST_BITMAP_SIZE/8, GFP_KERNEL); if (!test_data.bitmap_0001) return -ENOMEM; - test_sysctl_header = register_sysctl("debug/test_sysctl", test_table); + test_sysctl_header = register_sysctl_table(test_sysctl_root_table); if (!test_sysctl_header) { kfree(test_data.bitmap_0001); return -ENOMEM; diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c index 2062be1f2e..7e7bbd0f3f 100644 --- a/lib/test_ubsan.c +++ b/lib/test_ubsan.c @@ -79,6 +79,15 @@ static void test_ubsan_load_invalid_value(void) eval2 = eval; } +static void test_ubsan_null_ptr_deref(void) +{ + volatile int *ptr = NULL; + int val; + + UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE); + val = *ptr; +} + static void test_ubsan_misaligned_access(void) { volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5}; @@ -89,16 +98,29 @@ static void test_ubsan_misaligned_access(void) *ptr = val; } +static void test_ubsan_object_size_mismatch(void) +{ + /* "((aligned(8)))" helps this not into be misaligned for ptr-access. */ + volatile int val __aligned(8) = 4; + volatile long long *ptr, val2; + + UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE); + ptr = (long long *)&val; + val2 = *ptr; +} + static const test_ubsan_fp test_ubsan_array[] = { test_ubsan_shift_out_of_bounds, test_ubsan_out_of_bounds, test_ubsan_load_invalid_value, test_ubsan_misaligned_access, + test_ubsan_object_size_mismatch, }; /* Excluded because they Oops the module. */ static const test_ubsan_fp skip_ubsan_array[] = { test_ubsan_divrem_overflow, + test_ubsan_null_ptr_deref, }; static int __init test_ubsan_init(void) diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index cf41fd6df4..e14993bc84 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -393,7 +393,7 @@ static struct test_driver { static void shuffle_array(int *arr, int n) { unsigned int rnd; - int i, j; + int i, j, x; for (i = n - 1; i > 0; i--) { get_random_bytes(&rnd, sizeof(rnd)); @@ -402,7 +402,9 @@ static void shuffle_array(int *arr, int n) j = rnd % i; /* Swap indexes. */ - swap(arr[i], arr[j]); + x = arr[i]; + arr[i] = arr[j]; + arr[j] = x; } } diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3b8129dd37..d7ad44f2c8 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -53,7 +53,8 @@ #include #include "kstrtox.h" -static noinline unsigned long long simple_strntoull(const char *startp, size_t max_chars, char **endp, unsigned int base) +static unsigned long long simple_strntoull(const char *startp, size_t max_chars, + char **endp, unsigned int base) { const char *cp; unsigned long long result = 0ULL; @@ -407,9 +408,8 @@ int num_to_str(char *buf, int size, unsigned long long num, unsigned int width) #define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */ #define SPECIAL 64 /* prefix hex with "0x", octal with "0" */ -static_assert(SIGN == 1); static_assert(ZEROPAD == ('0' - ' ')); -static_assert(SMALL == ('a' ^ 'A')); +static_assert(SMALL == ' '); enum format_type { FORMAT_TYPE_NONE, /* Just a string part */ @@ -1241,13 +1241,20 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, struct printf_spec spec, const char *fmt) { int nr_bits = max_t(int, spec.field_width, 0); + /* current bit is 'cur', most recently seen range is [rbot, rtop] */ + int cur, rbot, rtop; bool first = true; - int rbot, rtop; if (check_pointer(&buf, end, bitmap, spec)) return buf; - for_each_set_bitrange(rbot, rtop, bitmap, nr_bits) { + rbot = cur = find_first_bit(bitmap, nr_bits); + while (cur < nr_bits) { + rtop = cur; + cur = find_next_bit(bitmap, nr_bits, cur + 1); + if (cur < nr_bits && cur <= rtop + 1) + continue; + if (!first) { if (buf < end) *buf = ','; @@ -1256,12 +1263,15 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, first = false; buf = number(buf, end, rbot, default_dec_spec); - if (rtop == rbot + 1) - continue; + if (rbot < rtop) { + if (buf < end) + *buf = '-'; + buf++; - if (buf < end) - *buf = '-'; - buf = number(++buf, end, rtop - 1, default_dec_spec); + buf = number(buf, end, rtop, default_dec_spec); + } + + rbot = cur; } return buf; } @@ -2013,11 +2023,6 @@ char *format_page_flags(char *buf, char *end, unsigned long flags) bool append = false; int i; - buf = number(buf, end, flags, default_flag_spec); - if (buf < end) - *buf = '('; - buf++; - /* Page flags from the main area. */ if (main_flags) { buf = format_flags(buf, end, main_flags, pageflag_names); @@ -2046,9 +2051,6 @@ char *format_page_flags(char *buf, char *end, unsigned long flags) append = true; } - if (buf < end) - *buf = ')'; - buf++; return buf; } @@ -3554,7 +3556,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args) ++fmt; for ( ; *fmt && *fmt != ']'; ++fmt, ++len) - __set_bit((u8)*fmt, set); + set_bit((u8)*fmt, set); /* no ']' or no character set found */ if (!*fmt || !len) @@ -3564,7 +3566,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args) if (negate) { bitmap_complement(set, set, 256); /* exclude null '\0' byte */ - __clear_bit(0, set); + clear_bit(0, set); } /* match must be non-empty */ diff --git a/lib/xarray.c b/lib/xarray.c index 6f47f63758..f5d8f54907 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -157,7 +157,7 @@ static void xas_move_index(struct xa_state *xas, unsigned long offset) xas->xa_index += offset << shift; } -static void xas_next_offset(struct xa_state *xas) +static void xas_advance(struct xa_state *xas) { xas->xa_offset++; xas_move_index(xas, xas->xa_offset); @@ -1250,7 +1250,7 @@ void *xas_find(struct xa_state *xas, unsigned long max) xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1; } - xas_next_offset(xas); + xas_advance(xas); while (xas->xa_node && (xas->xa_index <= max)) { if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { @@ -1268,7 +1268,7 @@ void *xas_find(struct xa_state *xas, unsigned long max) if (entry && !xa_is_sibling(entry)) return entry; - xas_next_offset(xas); + xas_advance(xas); } if (!xas->xa_node) diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index adce22ac18..5cb50245a8 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -39,19 +39,6 @@ config XZ_DEC_SPARC default y select XZ_DEC_BCJ -config XZ_DEC_MICROLZMA - bool "MicroLZMA decoder" - default n - help - MicroLZMA is a header format variant where the first byte - of a raw LZMA stream (without the end of stream marker) has - been replaced with a bitwise-negation of the lc/lp/pb - properties byte. MicroLZMA was created to be used in EROFS - but can be used by other things too where wasting minimal - amount of space for headers is important. - - Unless you know that you need this, say N. - endif config XZ_DEC_BCJ diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c index 27ce34520e..d548cf0e59 100644 --- a/lib/xz/xz_dec_lzma2.c +++ b/lib/xz/xz_dec_lzma2.c @@ -248,10 +248,6 @@ struct lzma2_dec { * before the first LZMA chunk. */ bool need_props; - -#ifdef XZ_DEC_MICROLZMA - bool pedantic_microlzma; -#endif }; struct xz_dec_lzma2 { @@ -423,12 +419,6 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, } } -#ifdef XZ_DEC_MICROLZMA -# define DICT_FLUSH_SUPPORTS_SKIPPING true -#else -# define DICT_FLUSH_SUPPORTS_SKIPPING false -#endif - /* * Flush pending data from dictionary to b->out. It is assumed that there is * enough space in b->out. This is guaranteed because caller uses dict_limit() @@ -447,14 +437,9 @@ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b) * decompression because in multi-call mode dict->buf * has been allocated by us in this file; it's not * provided by the caller like in single-call mode. - * - * With MicroLZMA, b->out can be NULL to skip bytes that - * the caller doesn't need. This cannot be done with XZ - * because it would break BCJ filters. */ - if (!DICT_FLUSH_SUPPORTS_SKIPPING || b->out != NULL) - memcpy(b->out + b->out_pos, dict->buf + dict->start, - copy_size); + memcpy(b->out + b->out_pos, dict->buf + dict->start, + copy_size); } dict->start = dict->pos; @@ -520,7 +505,7 @@ static __always_inline void rc_normalize(struct rc_dec *rc) * functions so that the compiler is supposed to be able to more easily avoid * an extra branch. In this particular version of the LZMA decoder, this * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3 - * on x86). Using a non-split version results in nicer looking code too. + * on x86). Using a non-splitted version results in nicer looking code too. * * NOTE: This must return an int. Do not make it return a bool or the speed * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care, @@ -806,7 +791,6 @@ static void lzma_reset(struct xz_dec_lzma2 *s) s->lzma.rep1 = 0; s->lzma.rep2 = 0; s->lzma.rep3 = 0; - s->lzma.len = 0; /* * All probabilities are initialized to the same value. This hack @@ -1190,6 +1174,8 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) } } + s->lzma.len = 0; + s->lzma2.sequence = SEQ_CONTROL; s->lzma2.need_dict_reset = true; @@ -1205,140 +1191,3 @@ XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s) kfree(s); } - -#ifdef XZ_DEC_MICROLZMA -/* This is a wrapper struct to have a nice struct name in the public API. */ -struct xz_dec_microlzma { - struct xz_dec_lzma2 s; -}; - -enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr, - struct xz_buf *b) -{ - struct xz_dec_lzma2 *s = &s_ptr->s; - - /* - * sequence is SEQ_PROPERTIES before the first input byte, - * SEQ_LZMA_PREPARE until a total of five bytes have been read, - * and SEQ_LZMA_RUN for the rest of the input stream. - */ - if (s->lzma2.sequence != SEQ_LZMA_RUN) { - if (s->lzma2.sequence == SEQ_PROPERTIES) { - /* One byte is needed for the props. */ - if (b->in_pos >= b->in_size) - return XZ_OK; - - /* - * Don't increment b->in_pos here. The same byte is - * also passed to rc_read_init() which will ignore it. - */ - if (!lzma_props(s, ~b->in[b->in_pos])) - return XZ_DATA_ERROR; - - s->lzma2.sequence = SEQ_LZMA_PREPARE; - } - - /* - * xz_dec_microlzma_reset() doesn't validate the compressed - * size so we do it here. We have to limit the maximum size - * to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice - * round number and much more than users of this code should - * ever need. - */ - if (s->lzma2.compressed < RC_INIT_BYTES - || s->lzma2.compressed > (3U << 30)) - return XZ_DATA_ERROR; - - if (!rc_read_init(&s->rc, b)) - return XZ_OK; - - s->lzma2.compressed -= RC_INIT_BYTES; - s->lzma2.sequence = SEQ_LZMA_RUN; - - dict_reset(&s->dict, b); - } - - /* This is to allow increasing b->out_size between calls. */ - if (DEC_IS_SINGLE(s->dict.mode)) - s->dict.end = b->out_size - b->out_pos; - - while (true) { - dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos, - s->lzma2.uncompressed)); - - if (!lzma2_lzma(s, b)) - return XZ_DATA_ERROR; - - s->lzma2.uncompressed -= dict_flush(&s->dict, b); - - if (s->lzma2.uncompressed == 0) { - if (s->lzma2.pedantic_microlzma) { - if (s->lzma2.compressed > 0 || s->lzma.len > 0 - || !rc_is_finished(&s->rc)) - return XZ_DATA_ERROR; - } - - return XZ_STREAM_END; - } - - if (b->out_pos == b->out_size) - return XZ_OK; - - if (b->in_pos == b->in_size - && s->temp.size < s->lzma2.compressed) - return XZ_OK; - } -} - -struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, - uint32_t dict_size) -{ - struct xz_dec_microlzma *s; - - /* Restrict dict_size to the same range as in the LZMA2 code. */ - if (dict_size < 4096 || dict_size > (3U << 30)) - return NULL; - - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s == NULL) - return NULL; - - s->s.dict.mode = mode; - s->s.dict.size = dict_size; - - if (DEC_IS_MULTI(mode)) { - s->s.dict.end = dict_size; - - s->s.dict.buf = vmalloc(dict_size); - if (s->s.dict.buf == NULL) { - kfree(s); - return NULL; - } - } - - return s; -} - -void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size, - uint32_t uncomp_size, int uncomp_size_is_exact) -{ - /* - * comp_size is validated in xz_dec_microlzma_run(). - * uncomp_size can safely be anything. - */ - s->s.lzma2.compressed = comp_size; - s->s.lzma2.uncompressed = uncomp_size; - s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact; - - s->s.lzma2.sequence = SEQ_PROPERTIES; - s->s.temp.size = 0; -} - -void xz_dec_microlzma_end(struct xz_dec_microlzma *s) -{ - if (DEC_IS_MULTI(s->s.dict.mode)) - vfree(s->s.dict.buf); - - kfree(s); -} -#endif diff --git a/lib/xz/xz_dec_syms.c b/lib/xz/xz_dec_syms.c index 61098c67a4..32eb3c03ae 100644 --- a/lib/xz/xz_dec_syms.c +++ b/lib/xz/xz_dec_syms.c @@ -15,15 +15,8 @@ EXPORT_SYMBOL(xz_dec_reset); EXPORT_SYMBOL(xz_dec_run); EXPORT_SYMBOL(xz_dec_end); -#ifdef CONFIG_XZ_DEC_MICROLZMA -EXPORT_SYMBOL(xz_dec_microlzma_alloc); -EXPORT_SYMBOL(xz_dec_microlzma_reset); -EXPORT_SYMBOL(xz_dec_microlzma_run); -EXPORT_SYMBOL(xz_dec_microlzma_end); -#endif - MODULE_DESCRIPTION("XZ decompressor"); -MODULE_VERSION("1.1"); +MODULE_VERSION("1.0"); MODULE_AUTHOR("Lasse Collin and Igor Pavlov"); /* diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h index bf1e94ec78..09360ebb51 100644 --- a/lib/xz/xz_private.h +++ b/lib/xz/xz_private.h @@ -37,9 +37,6 @@ # ifdef CONFIG_XZ_DEC_SPARC # define XZ_DEC_SPARC # endif -# ifdef CONFIG_XZ_DEC_MICROLZMA -# define XZ_DEC_MICROLZMA -# endif # define memeq(a, b, size) (memcmp(a, b, size) == 0) # define memzero(buf, size) memset(buf, 0, size) # endif diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile index fc45339fc3..f5d778e7e5 100644 --- a/lib/zstd/Makefile +++ b/lib/zstd/Makefile @@ -1,44 +1,10 @@ -# SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause -# ################################################################ -# Copyright (c) Facebook, Inc. -# All rights reserved. -# -# This source code is licensed under both the BSD-style license (found in the -# LICENSE file in the root directory of this source tree) and the GPLv2 (found -# in the COPYING file in the root directory of this source tree). -# You may select, at your option, one of the above-listed licenses. -# ################################################################ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o -zstd_compress-y := \ - zstd_compress_module.o \ - common/debug.o \ - common/entropy_common.o \ - common/error_private.o \ - common/fse_decompress.o \ - common/zstd_common.o \ - compress/fse_compress.o \ - compress/hist.o \ - compress/huf_compress.o \ - compress/zstd_compress.o \ - compress/zstd_compress_literals.o \ - compress/zstd_compress_sequences.o \ - compress/zstd_compress_superblock.o \ - compress/zstd_double_fast.o \ - compress/zstd_fast.o \ - compress/zstd_lazy.o \ - compress/zstd_ldm.o \ - compress/zstd_opt.o \ +ccflags-y += -O3 -zstd_decompress-y := \ - zstd_decompress_module.o \ - common/debug.o \ - common/entropy_common.o \ - common/error_private.o \ - common/fse_decompress.o \ - common/zstd_common.o \ - decompress/huf_decompress.o \ - decompress/zstd_ddict.o \ - decompress/zstd_decompress.o \ - decompress/zstd_decompress_block.o \ +zstd_compress-y := fse_compress.o huf_compress.o compress.o \ + entropy_common.o fse_decompress.o zstd_common.o +zstd_decompress-y := huf_decompress.o decompress.o \ + entropy_common.o fse_decompress.o zstd_common.o diff --git a/mm/Kconfig b/mm/Kconfig index 3326ee3903..c048dea7e3 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -109,13 +109,6 @@ config NUMA_KEEP_MEMINFO config MEMORY_ISOLATION bool -# IORESOURCE_SYSTEM_RAM regions in the kernel resource tree that are marked -# IORESOURCE_EXCLUSIVE cannot be mapped to user space, for example, via -# /dev/mem. -config EXCLUSIVE_SYSTEM_RAM - def_bool y - depends on !DEVMEM || STRICT_DEVMEM - # # Only be set on architectures that have completely implemented memory hotplug # feature. If you are not sure, don't touch it. @@ -130,11 +123,15 @@ config ARCH_ENABLE_MEMORY_HOTPLUG config MEMORY_HOTPLUG bool "Allow for memory hot-add" select MEMORY_ISOLATION - depends on SPARSEMEM + depends on SPARSEMEM || X86_64_ACPI_NUMA depends on ARCH_ENABLE_MEMORY_HOTPLUG - depends on 64BIT + depends on 64BIT || BROKEN select NUMA_KEEP_MEMINFO if NUMA +config MEMORY_HOTPLUG_SPARSE + def_bool y + depends on SPARSEMEM && MEMORY_HOTPLUG + config MEMORY_HOTPLUG_DEFAULT_ONLINE bool "Online the newly added memory blocks by default" depends on MEMORY_HOTPLUG @@ -374,7 +371,7 @@ config NOMMU_INITIAL_TRIM_EXCESS config TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE select COMPACTION select XARRAY_MULTI help @@ -428,24 +425,47 @@ config THP_SWAP # UP and nommu archs use km based percpu allocator # config NEED_PER_CPU_KM - depends on !SMP || !MMU + depends on !SMP bool default y -config NEED_PER_CPU_EMBED_FIRST_CHUNK - bool +config CLEANCACHE + bool "Enable cleancache driver to cache clean pages if tmem is present" + help + Cleancache can be thought of as a page-granularity victim cache + for clean pages that the kernel's pageframe replacement algorithm + (PFRA) would like to keep around, but can't since there isn't enough + memory. So when the PFRA "evicts" a page, it first attempts to use + cleancache code to put the data contained in that page into + "transcendent memory", memory that is not directly accessible or + addressable by the kernel and is of unknown and possibly + time-varying size. And when a cleancache-enabled + filesystem wishes to access a page in a file on disk, it first + checks cleancache to see if it already contains it; if it does, + the page is copied into the kernel and a disk access is avoided. + When a transcendent memory driver is available (such as zcache or + Xen transcendent memory), a significant I/O reduction + may be achieved. When none is available, all cleancache calls + are reduced to a single pointer-compare-against-NULL resulting + in a negligible performance hit. -config NEED_PER_CPU_PAGE_FIRST_CHUNK - bool - -config USE_PERCPU_NUMA_NODE_ID - bool - -config HAVE_SETUP_PER_CPU_AREA - bool + If unsure, say Y to enable cleancache config FRONTSWAP - bool + bool "Enable frontswap to cache swap pages if tmem is present" + depends on SWAP + help + Frontswap is so named because it can be thought of as the opposite + of a "backing" store for a swap device. The data is stored into + "transcendent memory", memory that is not directly accessible or + addressable by the kernel and is of unknown and possibly + time-varying size. When space in transcendent memory is available, + a significant swap I/O reduction may be achieved. When none is + available, all frontswap calls are reduced to a single pointer- + compare-against-NULL resulting in a negligible performance hit + and swap data is stored as normal on the matching swap device. + + If unsure, say Y to enable frontswap. config CMA bool "Contiguous Memory Allocator" @@ -510,8 +530,7 @@ config MEM_SOFT_DIRTY config ZSWAP bool "Compressed cache for swap pages (EXPERIMENTAL)" - depends on SWAP && CRYPTO=y - select FRONTSWAP + depends on FRONTSWAP && CRYPTO=y select ZPOOL help A lightweight compressed cache for swap pages. It takes @@ -878,20 +897,6 @@ config IO_MAPPING config SECRETMEM def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED -config ANON_VMA_NAME - bool "Anonymous VMA name support" - depends on PROC_FS && ADVISE_SYSCALLS && MMU - - help - Allow naming anonymous virtual memory areas. - - This feature allows assigning names to virtual memory areas. Assigned - names can be later retrieved from /proc/pid/maps and /proc/pid/smaps - and help identifying individual anonymous memory areas. - Assigning a name to anonymous virtual memory area might prevent that - area from being merged with adjacent virtual memory areas due to the - difference in their name. - source "mm/damon/Kconfig" endmenu diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 5bd5bb0972..1e73717802 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -62,30 +62,6 @@ config PAGE_OWNER If unsure, say N. -config PAGE_TABLE_CHECK - bool "Check for invalid mappings in user page tables" - depends on ARCH_SUPPORTS_PAGE_TABLE_CHECK - select PAGE_EXTENSION - help - Check that anonymous page is not being mapped twice with read write - permissions. Check that anonymous and file pages are not being - erroneously shared. Since the checking is performed at the time - entries are added and removed to user page tables, leaking, corruption - and double mapping problems are detected synchronously. - - If unsure say "n". - -config PAGE_TABLE_CHECK_ENFORCED - bool "Enforce the page table checking by default" - depends on PAGE_TABLE_CHECK - help - Always enable page table checking. By default the page table checking - is disabled, and can be optionally enabled via page_table_check=on - kernel parameter. This config enforces that page table check is always - enabled. - - If unsure say "n". - config PAGE_POISONING bool "Poison pages after freeing" help diff --git a/mm/Makefile b/mm/Makefile index 70d4309c9c..fc60a40ce9 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -15,8 +15,6 @@ KCSAN_SANITIZE_slab_common.o := n KCSAN_SANITIZE_slab.o := n KCSAN_SANITIZE_slub.o := n KCSAN_SANITIZE_page_alloc.o := n -# But enable explicit instrumentation for memory barriers. -KCSAN_INSTRUMENT_BARRIERS := y # These files are disabled because they produce non-interesting and/or # flaky coverage that is not a function of syscall inputs. E.g. slab is out of @@ -48,7 +46,7 @@ mmu-$(CONFIG_MMU) += process_vm_access.o endif obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ - maccess.o page-writeback.o folio-compat.o \ + maccess.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o percpu.o slab_common.o \ @@ -104,6 +102,7 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o obj-$(CONFIG_PAGE_OWNER) += page_owner.o +obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o obj-$(CONFIG_ZPOOL) += zpool.o obj-$(CONFIG_ZBUD) += zbud.o @@ -113,7 +112,6 @@ obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o obj-$(CONFIG_CMA) += cma.o obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o -obj-$(CONFIG_PAGE_TABLE_CHECK) += page_table_check.o obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o obj-$(CONFIG_SECRETMEM) += secretmem.o obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o diff --git a/mm/backing-dev.c b/mm/backing-dev.c index eae96dfe02..02ff66f863 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -2,9 +2,8 @@ #include #include -#include #include -#include +#include #include #include #include @@ -292,6 +291,8 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, memset(wb, 0, sizeof(*wb)); + if (wb != &bdi->wb) + bdi_get(bdi); wb->bdi = bdi; wb->last_old_flush = jiffies; INIT_LIST_HEAD(&wb->b_dirty); @@ -315,7 +316,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, err = fprop_local_init_percpu(&wb->completions, gfp); if (err) - return err; + goto out_put_bdi; for (i = 0; i < NR_WB_STAT_ITEMS; i++) { err = percpu_counter_init(&wb->stat[i], 0, gfp); @@ -329,6 +330,9 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, while (i--) percpu_counter_destroy(&wb->stat[i]); fprop_local_destroy_percpu(&wb->completions); +out_put_bdi: + if (wb != &bdi->wb) + bdi_put(bdi); return err; } @@ -369,6 +373,8 @@ static void wb_exit(struct bdi_writeback *wb) percpu_counter_destroy(&wb->stat[i]); fprop_local_destroy_percpu(&wb->completions); + if (wb != &wb->bdi->wb) + bdi_put(wb->bdi); } #ifdef CONFIG_CGROUP_WRITEBACK @@ -391,7 +397,6 @@ static void cgwb_release_workfn(struct work_struct *work) struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css); - struct backing_dev_info *bdi = wb->bdi; mutex_lock(&wb->bdi->cgwb_release_mutex); wb_shutdown(wb); @@ -411,7 +416,6 @@ static void cgwb_release_workfn(struct work_struct *work) percpu_ref_exit(&wb->refcnt); wb_exit(wb); - bdi_put(bdi); WARN_ON_ONCE(!list_empty(&wb->b_attached)); kfree_rcu(wb, rcu); } @@ -493,7 +497,6 @@ static int cgwb_create(struct backing_dev_info *bdi, INIT_LIST_HEAD(&wb->b_attached); INIT_WORK(&wb->release_work, cgwb_release_workfn); set_bit(WB_registered, &wb->state); - bdi_get(bdi); /* * The root wb determines the registered state of the whole bdi and @@ -525,7 +528,6 @@ static int cgwb_create(struct backing_dev_info *bdi, goto out_put; err_fprop_exit: - bdi_put(bdi); fprop_local_destroy_percpu(&wb->memcg_completions); err_ref_exit: percpu_ref_exit(&wb->refcnt); @@ -963,14 +965,14 @@ void bdi_unregister(struct backing_dev_info *bdi) bdi->owner = NULL; } } -EXPORT_SYMBOL(bdi_unregister); static void release_bdi(struct kref *ref) { struct backing_dev_info *bdi = container_of(ref, struct backing_dev_info, refcnt); - WARN_ON_ONCE(test_bit(WB_registered, &bdi->wb.state)); + if (test_bit(WB_registered, &bdi->wb.state)) + bdi_unregister(bdi); WARN_ON_ONCE(bdi->dev); wb_exit(&bdi->wb); kfree(bdi); @@ -982,22 +984,6 @@ void bdi_put(struct backing_dev_info *bdi) } EXPORT_SYMBOL(bdi_put); -struct backing_dev_info *inode_to_bdi(struct inode *inode) -{ - struct super_block *sb; - - if (!inode) - return &noop_backing_dev_info; - - sb = inode->i_sb; -#ifdef CONFIG_BLOCK - if (sb_is_blkdev_sb(sb)) - return I_BDEV(inode)->bd_disk->bdi; -#endif - return sb->s_bdi; -} -EXPORT_SYMBOL(inode_to_bdi); - const char *bdi_dev_name(struct backing_dev_info *bdi) { if (!bdi || !bdi->dev) @@ -1062,3 +1048,51 @@ long congestion_wait(int sync, long timeout) return ret; } EXPORT_SYMBOL(congestion_wait); + +/** + * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes + * @sync: SYNC or ASYNC IO + * @timeout: timeout in jiffies + * + * In the event of a congested backing_dev (any backing_dev) this waits + * for up to @timeout jiffies for either a BDI to exit congestion of the + * given @sync queue or a write to complete. + * + * The return value is 0 if the sleep is for the full timeout. Otherwise, + * it is the number of jiffies that were still remaining when the function + * returned. return_value == timeout implies the function did not sleep. + */ +long wait_iff_congested(int sync, long timeout) +{ + long ret; + unsigned long start = jiffies; + DEFINE_WAIT(wait); + wait_queue_head_t *wqh = &congestion_wqh[sync]; + + /* + * If there is no congestion, yield if necessary instead + * of sleeping on the congestion queue + */ + if (atomic_read(&nr_wb_congested[sync]) == 0) { + cond_resched(); + + /* In case we scheduled, work out time remaining */ + ret = timeout - (jiffies - start); + if (ret < 0) + ret = 0; + + goto out; + } + + /* Sleep until uncongested or a write happens */ + prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); + ret = io_schedule_timeout(timeout); + finish_wait(wqh, &wait); + +out: + trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout), + jiffies_to_usecs(jiffies - start)); + + return ret; +} +EXPORT_SYMBOL(wait_iff_congested); diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c index f18a631e74..f03f42f426 100644 --- a/mm/bootmem_info.c +++ b/mm/bootmem_info.c @@ -15,7 +15,7 @@ void get_page_bootmem(unsigned long info, struct page *page, unsigned long type) { - page->index = type; + page->freelist = (void *)type; SetPagePrivate(page); set_page_private(page, info); page_ref_inc(page); @@ -23,13 +23,14 @@ void get_page_bootmem(unsigned long info, struct page *page, unsigned long type) void put_page_bootmem(struct page *page) { - unsigned long type = page->index; + unsigned long type; + type = (unsigned long) page->freelist; BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE); if (page_ref_dec_return(page) == 1) { - page->index = 0; + page->freelist = NULL; ClearPagePrivate(page); set_page_private(page, 0); INIT_LIST_HEAD(&page->lru); diff --git a/mm/cma.c b/mm/cma.c index bc9ca8f3c4..995e154809 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -378,7 +378,7 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, return 0; free_mem: - memblock_phys_free(base, size); + memblock_free(base, size); err: pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); return ret; @@ -524,25 +524,6 @@ struct page *cma_alloc(struct cma *cma, unsigned long count, return page; } -bool cma_pages_valid(struct cma *cma, const struct page *pages, - unsigned long count) -{ - unsigned long pfn; - - if (!cma || !pages) - return false; - - pfn = page_to_pfn(pages); - - if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) { - pr_debug("%s(page %p, count %lu)\n", __func__, - (void *)pages, count); - return false; - } - - return true; -} - /** * cma_release() - release allocated pages * @cma: Contiguous memory region for which the allocation is performed. @@ -558,13 +539,16 @@ bool cma_release(struct cma *cma, const struct page *pages, { unsigned long pfn; - if (!cma_pages_valid(cma, pages, count)) + if (!cma || !pages) return false; pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count); pfn = page_to_pfn(pages); + if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) + return false; + VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); free_contig_range(pfn, count); diff --git a/mm/compaction.c b/mm/compaction.c index b4e94cda30..bfc93da1c2 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -761,8 +761,6 @@ isolate_freepages_range(struct compact_control *cc, /* Similar to reclaim, but different enough that they don't share logic */ static bool too_many_isolated(pg_data_t *pgdat) { - bool too_many; - unsigned long active, inactive, isolated; inactive = node_page_state(pgdat, NR_INACTIVE_FILE) + @@ -772,11 +770,7 @@ static bool too_many_isolated(pg_data_t *pgdat) isolated = node_page_state(pgdat, NR_ISOLATED_FILE) + node_page_state(pgdat, NR_ISOLATED_ANON); - too_many = isolated > (inactive + active) / 2; - if (!too_many) - wake_throttle_isolated(pgdat); - - return too_many; + return isolated > (inactive + active) / 2; } /** @@ -828,7 +822,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (cc->mode == MIGRATE_ASYNC) return -EAGAIN; - reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED); + congestion_wait(BLK_RW_ASYNC, HZ/10); if (fatal_signal_pending(current)) return -EINTR; @@ -1028,7 +1022,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (!TestClearPageLRU(page)) goto isolate_fail_put; - lruvec = folio_lruvec(page_folio(page)); + lruvec = mem_cgroup_page_lruvec(page); /* If we already hold the lock, we can skip some rechecking */ if (lruvec != locked) { @@ -1038,7 +1032,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, compact_lock_irqsave(&lruvec->lru_lock, &flags, cc); locked = lruvec; - lruvec_memcg_debug(lruvec, page_folio(page)); + lruvec_memcg_debug(lruvec, page); /* Try get exclusive access under lock */ if (!skip_updated) { @@ -2280,7 +2274,6 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) unsigned long last_migrated_pfn; const bool sync = cc->mode != MIGRATE_ASYNC; bool update_cached; - unsigned int nr_succeeded = 0; /* * These counters track activities during zone compaction. Initialize @@ -2399,10 +2392,10 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) err = migrate_pages(&cc->migratepages, compaction_alloc, compaction_free, (unsigned long)cc, cc->mode, - MR_COMPACTION, &nr_succeeded); + MR_COMPACTION, NULL); - trace_mm_compaction_migratepages(cc->nr_migratepages, - nr_succeeded); + trace_mm_compaction_migratepages(cc->nr_migratepages, err, + &cc->migratepages); /* All pages were either migrated or will be released */ cc->nr_migratepages = 0; diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index 5bcf05851a..37024798a9 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -30,15 +30,7 @@ config DAMON_VADDR select PAGE_IDLE_FLAG help This builds the default data access monitoring primitives for DAMON - that work for virtual address spaces. - -config DAMON_PADDR - bool "Data access monitoring primitives for the physical address space" - depends on DAMON && MMU - select PAGE_IDLE_FLAG - help - This builds the default data access monitoring primitives for DAMON - that works for the physical address space. + that works for virtual address spaces. config DAMON_VADDR_KUNIT_TEST bool "Test for DAMON primitives" if !KUNIT_ALL_TESTS @@ -54,7 +46,7 @@ config DAMON_VADDR_KUNIT_TEST config DAMON_DBGFS bool "DAMON debugfs interface" - depends on DAMON_VADDR && DAMON_PADDR && DEBUG_FS + depends on DAMON_VADDR && DEBUG_FS help This builds the debugfs interface for DAMON. The user space admins can use the interface for arbitrary data access monitoring. @@ -73,16 +65,4 @@ config DAMON_DBGFS_KUNIT_TEST If unsure, say N. -config DAMON_RECLAIM - bool "Build DAMON-based reclaim (DAMON_RECLAIM)" - depends on DAMON_PADDR - help - This builds the DAMON-based reclamation subsystem. It finds pages - that not accessed for a long time (cold) using DAMON and reclaim - those. - - This is suggested to be used as a proactive and lightweight - reclamation under light memory pressure, while the traditional page - scanning-based reclamation is used for heavy pressure. - endmenu diff --git a/mm/damon/Makefile b/mm/damon/Makefile index f7d5ac377a..fed4be3bac 100644 --- a/mm/damon/Makefile +++ b/mm/damon/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DAMON) := core.o -obj-$(CONFIG_DAMON_VADDR) += prmtv-common.o vaddr.o -obj-$(CONFIG_DAMON_PADDR) += prmtv-common.o paddr.o +obj-$(CONFIG_DAMON_VADDR) += vaddr.o obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o -obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o diff --git a/mm/damon/core.c b/mm/damon/core.c index 1dd153c31c..7a4912d6e6 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -10,9 +10,8 @@ #include #include #include -#include +#include #include -#include #define CREATE_TRACE_POINTS #include @@ -22,6 +21,9 @@ #define DAMON_MIN_REGION 1 #endif +/* Get a random number in [l, r) */ +#define damon_rand(l, r) (l + prandom_u32_max(r - l)) + static DEFINE_MUTEX(damon_lock); static int nr_running_ctxs; @@ -43,12 +45,20 @@ struct damon_region *damon_new_region(unsigned long start, unsigned long end) region->nr_accesses = 0; INIT_LIST_HEAD(®ion->list); - region->age = 0; - region->last_nr_accesses = 0; - return region; } +/* + * Add a region between two other regions + */ +inline void damon_insert_region(struct damon_region *r, + struct damon_region *prev, struct damon_region *next, + struct damon_target *t) +{ + __list_add(&r->list, &prev->list, &next->list); + t->nr_regions++; +} + void damon_add_region(struct damon_region *r, struct damon_target *t) { list_add_tail(&r->list, &t->regions_list); @@ -72,73 +82,6 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t) damon_free_region(r); } -struct damos *damon_new_scheme( - unsigned long min_sz_region, unsigned long max_sz_region, - unsigned int min_nr_accesses, unsigned int max_nr_accesses, - unsigned int min_age_region, unsigned int max_age_region, - enum damos_action action, struct damos_quota *quota, - struct damos_watermarks *wmarks) -{ - struct damos *scheme; - - scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); - if (!scheme) - return NULL; - scheme->min_sz_region = min_sz_region; - scheme->max_sz_region = max_sz_region; - scheme->min_nr_accesses = min_nr_accesses; - scheme->max_nr_accesses = max_nr_accesses; - scheme->min_age_region = min_age_region; - scheme->max_age_region = max_age_region; - scheme->action = action; - scheme->stat = (struct damos_stat){}; - INIT_LIST_HEAD(&scheme->list); - - scheme->quota.ms = quota->ms; - scheme->quota.sz = quota->sz; - scheme->quota.reset_interval = quota->reset_interval; - scheme->quota.weight_sz = quota->weight_sz; - scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; - scheme->quota.weight_age = quota->weight_age; - scheme->quota.total_charged_sz = 0; - scheme->quota.total_charged_ns = 0; - scheme->quota.esz = 0; - scheme->quota.charged_sz = 0; - scheme->quota.charged_from = 0; - scheme->quota.charge_target_from = NULL; - scheme->quota.charge_addr_from = 0; - - scheme->wmarks.metric = wmarks->metric; - scheme->wmarks.interval = wmarks->interval; - scheme->wmarks.high = wmarks->high; - scheme->wmarks.mid = wmarks->mid; - scheme->wmarks.low = wmarks->low; - scheme->wmarks.activated = true; - - return scheme; -} - -void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) -{ - list_add_tail(&s->list, &ctx->schemes); -} - -static void damon_del_scheme(struct damos *s) -{ - list_del(&s->list); -} - -static void damon_free_scheme(struct damos *s) -{ - kfree(s); -} - -void damon_destroy_scheme(struct damos *s) -{ - damon_del_scheme(s); - damon_free_scheme(s); -} - /* * Construct a damon_target struct * @@ -164,11 +107,6 @@ void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) list_add_tail(&t->list, &ctx->adaptive_targets); } -bool damon_targets_empty(struct damon_ctx *ctx) -{ - return list_empty(&ctx->adaptive_targets); -} - static void damon_del_target(struct damon_target *t) { list_del(&t->list); @@ -215,7 +153,6 @@ struct damon_ctx *damon_new_ctx(void) ctx->max_nr_regions = 1000; INIT_LIST_HEAD(&ctx->adaptive_targets); - INIT_LIST_HEAD(&ctx->schemes); return ctx; } @@ -235,13 +172,7 @@ static void damon_destroy_targets(struct damon_ctx *ctx) void damon_destroy_ctx(struct damon_ctx *ctx) { - struct damos *s, *next_s; - damon_destroy_targets(ctx); - - damon_for_each_scheme_safe(s, next_s, ctx) - damon_destroy_scheme(s); - kfree(ctx); } @@ -266,6 +197,7 @@ int damon_set_targets(struct damon_ctx *ctx, for (i = 0; i < nr_ids; i++) { t = damon_new_target(ids[i]); if (!t) { + pr_err("Failed to alloc damon_target\n"); /* The caller should do cleanup of the ids itself */ damon_for_each_target_safe(t, next, ctx) damon_destroy_target(t); @@ -295,10 +227,16 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, unsigned long aggr_int, unsigned long primitive_upd_int, unsigned long min_nr_reg, unsigned long max_nr_reg) { - if (min_nr_reg < 3) + if (min_nr_reg < 3) { + pr_err("min_nr_regions (%lu) must be at least 3\n", + min_nr_reg); return -EINVAL; - if (min_nr_reg > max_nr_reg) + } + if (min_nr_reg > max_nr_reg) { + pr_err("invalid nr_regions. min (%lu) > max (%lu)\n", + min_nr_reg, max_nr_reg); return -EINVAL; + } ctx->sample_interval = sample_int; ctx->aggr_interval = aggr_int; @@ -309,30 +247,6 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, return 0; } -/** - * damon_set_schemes() - Set data access monitoring based operation schemes. - * @ctx: monitoring context - * @schemes: array of the schemes - * @nr_schemes: number of entries in @schemes - * - * This function should not be called while the kdamond of the context is - * running. - * - * Return: 0 if success, or negative error code otherwise. - */ -int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, - ssize_t nr_schemes) -{ - struct damos *s, *next; - ssize_t i; - - damon_for_each_scheme_safe(s, next, ctx) - damon_destroy_scheme(s); - for (i = 0; i < nr_schemes; i++) - damon_add_scheme(ctx, schemes[i]); - return 0; -} - /** * damon_nr_running_ctxs() - Return number of currently running contexts. */ @@ -367,6 +281,17 @@ static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) return sz; } +static bool damon_kdamond_running(struct damon_ctx *ctx) +{ + bool running; + + mutex_lock(&ctx->kdamond_lock); + running = ctx->kdamond != NULL; + mutex_unlock(&ctx->kdamond_lock); + + return running; +} + static int kdamond_fn(void *data); /* @@ -384,11 +309,12 @@ static int __damon_start(struct damon_ctx *ctx) mutex_lock(&ctx->kdamond_lock); if (!ctx->kdamond) { err = 0; + ctx->kdamond_stop = false; ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", nr_running_ctxs); if (IS_ERR(ctx->kdamond)) { err = PTR_ERR(ctx->kdamond); - ctx->kdamond = NULL; + ctx->kdamond = 0; } } mutex_unlock(&ctx->kdamond_lock); @@ -431,6 +357,15 @@ int damon_start(struct damon_ctx **ctxs, int nr_ctxs) return err; } +static void kdamond_usleep(unsigned long usecs) +{ + /* See Documentation/timers/timers-howto.rst for the thresholds */ + if (usecs > 20 * 1000) + schedule_timeout_idle(usecs_to_jiffies(usecs)); + else + usleep_idle_range(usecs, usecs + 1); +} + /* * __damon_stop() - Stops monitoring of given context. * @ctx: monitoring context @@ -439,15 +374,12 @@ int damon_start(struct damon_ctx **ctxs, int nr_ctxs) */ static int __damon_stop(struct damon_ctx *ctx) { - struct task_struct *tsk; - mutex_lock(&ctx->kdamond_lock); - tsk = ctx->kdamond; - if (tsk) { - get_task_struct(tsk); + if (ctx->kdamond) { + ctx->kdamond_stop = true; mutex_unlock(&ctx->kdamond_lock); - kthread_stop(tsk); - put_task_struct(tsk); + while (damon_kdamond_running(ctx)) + kdamond_usleep(ctx->sample_interval); return 0; } mutex_unlock(&ctx->kdamond_lock); @@ -514,221 +446,18 @@ static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) static void kdamond_reset_aggregated(struct damon_ctx *c) { struct damon_target *t; - unsigned int ti = 0; /* target's index */ damon_for_each_target(t, c) { struct damon_region *r; damon_for_each_region(r, t) { - trace_damon_aggregated(t, ti, r, damon_nr_regions(t)); - r->last_nr_accesses = r->nr_accesses; + trace_damon_aggregated(t, r, damon_nr_regions(t)); r->nr_accesses = 0; } - ti++; } } -static void damon_split_region_at(struct damon_ctx *ctx, - struct damon_target *t, struct damon_region *r, - unsigned long sz_r); - -static bool __damos_valid_target(struct damon_region *r, struct damos *s) -{ - unsigned long sz; - - sz = r->ar.end - r->ar.start; - return s->min_sz_region <= sz && sz <= s->max_sz_region && - s->min_nr_accesses <= r->nr_accesses && - r->nr_accesses <= s->max_nr_accesses && - s->min_age_region <= r->age && r->age <= s->max_age_region; -} - -static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, - struct damon_region *r, struct damos *s) -{ - bool ret = __damos_valid_target(r, s); - - if (!ret || !s->quota.esz || !c->primitive.get_scheme_score) - return ret; - - return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score; -} - -static void damon_do_apply_schemes(struct damon_ctx *c, - struct damon_target *t, - struct damon_region *r) -{ - struct damos *s; - - damon_for_each_scheme(s, c) { - struct damos_quota *quota = &s->quota; - unsigned long sz = r->ar.end - r->ar.start; - struct timespec64 begin, end; - unsigned long sz_applied = 0; - - if (!s->wmarks.activated) - continue; - - /* Check the quota */ - if (quota->esz && quota->charged_sz >= quota->esz) - continue; - - /* Skip previously charged regions */ - if (quota->charge_target_from) { - if (t != quota->charge_target_from) - continue; - if (r == damon_last_region(t)) { - quota->charge_target_from = NULL; - quota->charge_addr_from = 0; - continue; - } - if (quota->charge_addr_from && - r->ar.end <= quota->charge_addr_from) - continue; - - if (quota->charge_addr_from && r->ar.start < - quota->charge_addr_from) { - sz = ALIGN_DOWN(quota->charge_addr_from - - r->ar.start, DAMON_MIN_REGION); - if (!sz) { - if (r->ar.end - r->ar.start <= - DAMON_MIN_REGION) - continue; - sz = DAMON_MIN_REGION; - } - damon_split_region_at(c, t, r, sz); - r = damon_next_region(r); - sz = r->ar.end - r->ar.start; - } - quota->charge_target_from = NULL; - quota->charge_addr_from = 0; - } - - if (!damos_valid_target(c, t, r, s)) - continue; - - /* Apply the scheme */ - if (c->primitive.apply_scheme) { - if (quota->esz && - quota->charged_sz + sz > quota->esz) { - sz = ALIGN_DOWN(quota->esz - quota->charged_sz, - DAMON_MIN_REGION); - if (!sz) - goto update_stat; - damon_split_region_at(c, t, r, sz); - } - ktime_get_coarse_ts64(&begin); - sz_applied = c->primitive.apply_scheme(c, t, r, s); - ktime_get_coarse_ts64(&end); - quota->total_charged_ns += timespec64_to_ns(&end) - - timespec64_to_ns(&begin); - quota->charged_sz += sz; - if (quota->esz && quota->charged_sz >= quota->esz) { - quota->charge_target_from = t; - quota->charge_addr_from = r->ar.end + 1; - } - } - if (s->action != DAMOS_STAT) - r->age = 0; - -update_stat: - s->stat.nr_tried++; - s->stat.sz_tried += sz; - if (sz_applied) - s->stat.nr_applied++; - s->stat.sz_applied += sz_applied; - } -} - -/* Shouldn't be called if quota->ms and quota->sz are zero */ -static void damos_set_effective_quota(struct damos_quota *quota) -{ - unsigned long throughput; - unsigned long esz; - - if (!quota->ms) { - quota->esz = quota->sz; - return; - } - - if (quota->total_charged_ns) - throughput = quota->total_charged_sz * 1000000 / - quota->total_charged_ns; - else - throughput = PAGE_SIZE * 1024; - esz = throughput * quota->ms; - - if (quota->sz && quota->sz < esz) - esz = quota->sz; - quota->esz = esz; -} - -static void kdamond_apply_schemes(struct damon_ctx *c) -{ - struct damon_target *t; - struct damon_region *r, *next_r; - struct damos *s; - - damon_for_each_scheme(s, c) { - struct damos_quota *quota = &s->quota; - unsigned long cumulated_sz; - unsigned int score, max_score = 0; - - if (!s->wmarks.activated) - continue; - - if (!quota->ms && !quota->sz) - continue; - - /* New charge window starts */ - if (time_after_eq(jiffies, quota->charged_from + - msecs_to_jiffies( - quota->reset_interval))) { - if (quota->esz && quota->charged_sz >= quota->esz) - s->stat.qt_exceeds++; - quota->total_charged_sz += quota->charged_sz; - quota->charged_from = jiffies; - quota->charged_sz = 0; - damos_set_effective_quota(quota); - } - - if (!c->primitive.get_scheme_score) - continue; - - /* Fill up the score histogram */ - memset(quota->histogram, 0, sizeof(quota->histogram)); - damon_for_each_target(t, c) { - damon_for_each_region(r, t) { - if (!__damos_valid_target(r, s)) - continue; - score = c->primitive.get_scheme_score( - c, t, r, s); - quota->histogram[score] += - r->ar.end - r->ar.start; - if (score > max_score) - max_score = score; - } - } - - /* Set the min score limit */ - for (cumulated_sz = 0, score = max_score; ; score--) { - cumulated_sz += quota->histogram[score]; - if (cumulated_sz >= quota->esz || !score) - break; - } - quota->min_score = score; - } - - damon_for_each_target(t, c) { - damon_for_each_region_safe(r, next_r, t) - damon_do_apply_schemes(c, t, r); - } -} - -static inline unsigned long sz_damon_region(struct damon_region *r) -{ - return r->ar.end - r->ar.start; -} +#define sz_damon_region(r) (r->ar.end - r->ar.start) /* * Merge two adjacent regions into one region @@ -740,11 +469,12 @@ static void damon_merge_two_regions(struct damon_target *t, l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / (sz_l + sz_r); - l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); l->ar.end = r->ar.end; damon_destroy_region(r, t); } +#define diff_of(a, b) (a > b ? a - b : b - a) + /* * Merge adjacent regions having similar access frequencies * @@ -758,13 +488,8 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, struct damon_region *r, *prev = NULL, *next; damon_for_each_region_safe(r, next, t) { - if (abs(r->nr_accesses - r->last_nr_accesses) > thres) - r->age = 0; - else - r->age++; - if (prev && prev->ar.end == r->ar.start && - abs(prev->nr_accesses - r->nr_accesses) <= thres && + diff_of(prev->nr_accesses, r->nr_accesses) <= thres && sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) damon_merge_two_regions(t, prev, r); else @@ -810,9 +535,6 @@ static void damon_split_region_at(struct damon_ctx *ctx, r->ar.end = new->ar.start; - new->age = r->age; - new->last_nr_accesses = r->last_nr_accesses; - damon_insert_region(new, r, damon_next_region(r), t); } @@ -901,8 +623,12 @@ static bool kdamond_need_update_primitive(struct damon_ctx *ctx) static bool kdamond_need_stop(struct damon_ctx *ctx) { struct damon_target *t; + bool stop; - if (kthread_should_stop()) + mutex_lock(&ctx->kdamond_lock); + stop = ctx->kdamond_stop; + mutex_unlock(&ctx->kdamond_lock); + if (stop) return true; if (!ctx->primitive.target_valid) @@ -916,82 +642,11 @@ static bool kdamond_need_stop(struct damon_ctx *ctx) return true; } -static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) +static void set_kdamond_stop(struct damon_ctx *ctx) { - struct sysinfo i; - - switch (metric) { - case DAMOS_WMARK_FREE_MEM_RATE: - si_meminfo(&i); - return i.freeram * 1000 / i.totalram; - default: - break; - } - return -EINVAL; -} - -/* - * Returns zero if the scheme is active. Else, returns time to wait for next - * watermark check in micro-seconds. - */ -static unsigned long damos_wmark_wait_us(struct damos *scheme) -{ - unsigned long metric; - - if (scheme->wmarks.metric == DAMOS_WMARK_NONE) - return 0; - - metric = damos_wmark_metric_value(scheme->wmarks.metric); - /* higher than high watermark or lower than low watermark */ - if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { - if (scheme->wmarks.activated) - pr_debug("deactivate a scheme (%d) for %s wmark\n", - scheme->action, - metric > scheme->wmarks.high ? - "high" : "low"); - scheme->wmarks.activated = false; - return scheme->wmarks.interval; - } - - /* inactive and higher than middle watermark */ - if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && - !scheme->wmarks.activated) - return scheme->wmarks.interval; - - if (!scheme->wmarks.activated) - pr_debug("activate a scheme (%d)\n", scheme->action); - scheme->wmarks.activated = true; - return 0; -} - -static void kdamond_usleep(unsigned long usecs) -{ - /* See Documentation/timers/timers-howto.rst for the thresholds */ - if (usecs > 20 * USEC_PER_MSEC) - schedule_timeout_idle(usecs_to_jiffies(usecs)); - else - usleep_idle_range(usecs, usecs + 1); -} - -/* Returns negative error code if it's not activated but should return */ -static int kdamond_wait_activation(struct damon_ctx *ctx) -{ - struct damos *s; - unsigned long wait_time; - unsigned long min_wait_time = 0; - - while (!kdamond_need_stop(ctx)) { - damon_for_each_scheme(s, ctx) { - wait_time = damos_wmark_wait_us(s); - if (!min_wait_time || wait_time < min_wait_time) - min_wait_time = wait_time; - } - if (!min_wait_time) - return 0; - - kdamond_usleep(min_wait_time); - } - return -EBUSY; + mutex_lock(&ctx->kdamond_lock); + ctx->kdamond_stop = true; + mutex_unlock(&ctx->kdamond_lock); } /* @@ -1004,26 +659,24 @@ static int kdamond_fn(void *data) struct damon_region *r, *next; unsigned int max_nr_accesses = 0; unsigned long sz_limit = 0; - bool done = false; - pr_debug("kdamond (%d) starts\n", current->pid); + mutex_lock(&ctx->kdamond_lock); + pr_info("kdamond (%d) starts\n", ctx->kdamond->pid); + mutex_unlock(&ctx->kdamond_lock); if (ctx->primitive.init) ctx->primitive.init(ctx); if (ctx->callback.before_start && ctx->callback.before_start(ctx)) - done = true; + set_kdamond_stop(ctx); sz_limit = damon_region_sz_limit(ctx); - while (!kdamond_need_stop(ctx) && !done) { - if (kdamond_wait_activation(ctx)) - continue; - + while (!kdamond_need_stop(ctx)) { if (ctx->primitive.prepare_access_checks) ctx->primitive.prepare_access_checks(ctx); if (ctx->callback.after_sampling && ctx->callback.after_sampling(ctx)) - done = true; + set_kdamond_stop(ctx); kdamond_usleep(ctx->sample_interval); @@ -1036,8 +689,7 @@ static int kdamond_fn(void *data) sz_limit); if (ctx->callback.after_aggregation && ctx->callback.after_aggregation(ctx)) - done = true; - kdamond_apply_schemes(ctx); + set_kdamond_stop(ctx); kdamond_reset_aggregated(ctx); kdamond_split_regions(ctx); if (ctx->primitive.reset_aggregated) @@ -1055,12 +707,13 @@ static int kdamond_fn(void *data) damon_destroy_region(r, t); } - if (ctx->callback.before_terminate) - ctx->callback.before_terminate(ctx); + if (ctx->callback.before_terminate && + ctx->callback.before_terminate(ctx)) + set_kdamond_stop(ctx); if (ctx->primitive.cleanup) ctx->primitive.cleanup(ctx); - pr_debug("kdamond (%d) finishes\n", current->pid); + pr_debug("kdamond (%d) finishes\n", ctx->kdamond->pid); mutex_lock(&ctx->kdamond_lock); ctx->kdamond = NULL; mutex_unlock(&ctx->kdamond_lock); @@ -1069,7 +722,7 @@ static int kdamond_fn(void *data) nr_running_ctxs--; mutex_unlock(&damon_lock); - return 0; + do_exit(0); } #include "core-test.h" diff --git a/mm/damon/dbgfs-test.h b/mm/damon/dbgfs-test.h index 86b9f95282..4eddcfa739 100644 --- a/mm/damon/dbgfs-test.h +++ b/mm/damon/dbgfs-test.h @@ -109,63 +109,9 @@ static void damon_dbgfs_test_set_targets(struct kunit *test) dbgfs_destroy_ctx(ctx); } -static void damon_dbgfs_test_set_init_regions(struct kunit *test) -{ - struct damon_ctx *ctx = damon_new_ctx(); - unsigned long ids[] = {1, 2, 3}; - /* Each line represents one region in `` `` */ - char * const valid_inputs[] = {"2 10 20\n 2 20 30\n2 35 45", - "2 10 20\n", - "2 10 20\n1 39 59\n1 70 134\n 2 20 25\n", - ""}; - /* Reading the file again will show sorted, clean output */ - char * const valid_expects[] = {"2 10 20\n2 20 30\n2 35 45\n", - "2 10 20\n", - "1 39 59\n1 70 134\n2 10 20\n2 20 25\n", - ""}; - char * const invalid_inputs[] = {"4 10 20\n", /* target not exists */ - "2 10 20\n 2 14 26\n", /* regions overlap */ - "1 10 20\n2 30 40\n 1 5 8"}; /* not sorted by address */ - char *input, *expect; - int i, rc; - char buf[256]; - - damon_set_targets(ctx, ids, 3); - - /* Put valid inputs and check the results */ - for (i = 0; i < ARRAY_SIZE(valid_inputs); i++) { - input = valid_inputs[i]; - expect = valid_expects[i]; - - rc = set_init_regions(ctx, input, strnlen(input, 256)); - KUNIT_EXPECT_EQ(test, rc, 0); - - memset(buf, 0, 256); - sprint_init_regions(ctx, buf, 256); - - KUNIT_EXPECT_STREQ(test, (char *)buf, expect); - } - /* Put invalid inputs and check the return error code */ - for (i = 0; i < ARRAY_SIZE(invalid_inputs); i++) { - input = invalid_inputs[i]; - pr_info("input: %s\n", input); - rc = set_init_regions(ctx, input, strnlen(input, 256)); - KUNIT_EXPECT_EQ(test, rc, -EINVAL); - - memset(buf, 0, 256); - sprint_init_regions(ctx, buf, 256); - - KUNIT_EXPECT_STREQ(test, (char *)buf, ""); - } - - damon_set_targets(ctx, NULL, 0); - damon_destroy_ctx(ctx); -} - static struct kunit_case damon_test_cases[] = { KUNIT_CASE(damon_dbgfs_test_str_to_target_ids), KUNIT_CASE(damon_dbgfs_test_set_targets), - KUNIT_CASE(damon_dbgfs_test_set_init_regions), {}, }; diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 5b899601e5..36624990b5 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -69,7 +69,8 @@ static ssize_t dbgfs_attrs_write(struct file *file, struct damon_ctx *ctx = file->private_data; unsigned long s, a, r, minr, maxr; char *kbuf; - ssize_t ret; + ssize_t ret = count; + int err; kbuf = user_input_str(buf, count, ppos); if (IS_ERR(kbuf)) @@ -87,9 +88,9 @@ static ssize_t dbgfs_attrs_write(struct file *file, goto unlock_out; } - ret = damon_set_attrs(ctx, s, a, r, minr, maxr); - if (!ret) - ret = count; + err = damon_set_attrs(ctx, s, a, r, minr, maxr); + if (err) + ret = err; unlock_out: mutex_unlock(&ctx->kdamond_lock); out: @@ -97,184 +98,6 @@ static ssize_t dbgfs_attrs_write(struct file *file, return ret; } -static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len) -{ - struct damos *s; - int written = 0; - int rc; - - damon_for_each_scheme(s, c) { - rc = scnprintf(&buf[written], len - written, - "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %d %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", - s->min_sz_region, s->max_sz_region, - s->min_nr_accesses, s->max_nr_accesses, - s->min_age_region, s->max_age_region, - s->action, - s->quota.ms, s->quota.sz, - s->quota.reset_interval, - s->quota.weight_sz, - s->quota.weight_nr_accesses, - s->quota.weight_age, - s->wmarks.metric, s->wmarks.interval, - s->wmarks.high, s->wmarks.mid, s->wmarks.low, - s->stat.nr_tried, s->stat.sz_tried, - s->stat.nr_applied, s->stat.sz_applied, - s->stat.qt_exceeds); - if (!rc) - return -ENOMEM; - - written += rc; - } - return written; -} - -static ssize_t dbgfs_schemes_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct damon_ctx *ctx = file->private_data; - char *kbuf; - ssize_t len; - - kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); - if (!kbuf) - return -ENOMEM; - - mutex_lock(&ctx->kdamond_lock); - len = sprint_schemes(ctx, kbuf, count); - mutex_unlock(&ctx->kdamond_lock); - if (len < 0) - goto out; - len = simple_read_from_buffer(buf, count, ppos, kbuf, len); - -out: - kfree(kbuf); - return len; -} - -static void free_schemes_arr(struct damos **schemes, ssize_t nr_schemes) -{ - ssize_t i; - - for (i = 0; i < nr_schemes; i++) - kfree(schemes[i]); - kfree(schemes); -} - -static bool damos_action_valid(int action) -{ - switch (action) { - case DAMOS_WILLNEED: - case DAMOS_COLD: - case DAMOS_PAGEOUT: - case DAMOS_HUGEPAGE: - case DAMOS_NOHUGEPAGE: - case DAMOS_STAT: - return true; - default: - return false; - } -} - -/* - * Converts a string into an array of struct damos pointers - * - * Returns an array of struct damos pointers that converted if the conversion - * success, or NULL otherwise. - */ -static struct damos **str_to_schemes(const char *str, ssize_t len, - ssize_t *nr_schemes) -{ - struct damos *scheme, **schemes; - const int max_nr_schemes = 256; - int pos = 0, parsed, ret; - unsigned long min_sz, max_sz; - unsigned int min_nr_a, max_nr_a, min_age, max_age; - unsigned int action; - - schemes = kmalloc_array(max_nr_schemes, sizeof(scheme), - GFP_KERNEL); - if (!schemes) - return NULL; - - *nr_schemes = 0; - while (pos < len && *nr_schemes < max_nr_schemes) { - struct damos_quota quota = {}; - struct damos_watermarks wmarks; - - ret = sscanf(&str[pos], - "%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u %u %lu %lu %lu %lu%n", - &min_sz, &max_sz, &min_nr_a, &max_nr_a, - &min_age, &max_age, &action, "a.ms, - "a.sz, "a.reset_interval, - "a.weight_sz, "a.weight_nr_accesses, - "a.weight_age, &wmarks.metric, - &wmarks.interval, &wmarks.high, &wmarks.mid, - &wmarks.low, &parsed); - if (ret != 18) - break; - if (!damos_action_valid(action)) - goto fail; - - if (min_sz > max_sz || min_nr_a > max_nr_a || min_age > max_age) - goto fail; - - if (wmarks.high < wmarks.mid || wmarks.high < wmarks.low || - wmarks.mid < wmarks.low) - goto fail; - - pos += parsed; - scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a, - min_age, max_age, action, "a, &wmarks); - if (!scheme) - goto fail; - - schemes[*nr_schemes] = scheme; - *nr_schemes += 1; - } - return schemes; -fail: - free_schemes_arr(schemes, *nr_schemes); - return NULL; -} - -static ssize_t dbgfs_schemes_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct damon_ctx *ctx = file->private_data; - char *kbuf; - struct damos **schemes; - ssize_t nr_schemes = 0, ret; - - kbuf = user_input_str(buf, count, ppos); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); - - schemes = str_to_schemes(kbuf, count, &nr_schemes); - if (!schemes) { - ret = -EINVAL; - goto out; - } - - mutex_lock(&ctx->kdamond_lock); - if (ctx->kdamond) { - ret = -EBUSY; - goto unlock_out; - } - - ret = damon_set_schemes(ctx, schemes, nr_schemes); - if (!ret) { - ret = count; - nr_schemes = 0; - } - -unlock_out: - mutex_unlock(&ctx->kdamond_lock); - free_schemes_arr(schemes, nr_schemes); -out: - kfree(kbuf); - return ret; -} - static inline bool targetid_is_pid(const struct damon_ctx *ctx) { return ctx->primitive.target_valid == damon_va_target_valid; @@ -363,30 +186,26 @@ static ssize_t dbgfs_target_ids_write(struct file *file, { struct damon_ctx *ctx = file->private_data; struct damon_target *t, *next_t; - bool id_is_pid = true; - char *kbuf; + char *kbuf, *nrs; unsigned long *targets; ssize_t nr_targets; - ssize_t ret; + ssize_t ret = count; int i; + int err; kbuf = user_input_str(buf, count, ppos); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); - if (!strncmp(kbuf, "paddr\n", count)) { - id_is_pid = false; - /* target id is meaningless here, but we set it just for fun */ - scnprintf(kbuf, count, "42 "); - } + nrs = kbuf; - targets = str_to_target_ids(kbuf, count, &nr_targets); + targets = str_to_target_ids(nrs, ret, &nr_targets); if (!targets) { ret = -ENOMEM; goto out; } - if (id_is_pid) { + if (targetid_is_pid(ctx)) { for (i = 0; i < nr_targets; i++) { targets[i] = (unsigned long)find_get_pid( (int)targets[i]); @@ -400,7 +219,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file, mutex_lock(&ctx->kdamond_lock); if (ctx->kdamond) { - if (id_is_pid) + if (targetid_is_pid(ctx)) dbgfs_put_pids(targets, nr_targets); ret = -EBUSY; goto unlock_out; @@ -413,18 +232,11 @@ static ssize_t dbgfs_target_ids_write(struct file *file, damon_destroy_target(t); } - /* Configure the context for the address space type */ - if (id_is_pid) - damon_va_set_primitives(ctx); - else - damon_pa_set_primitives(ctx); - - ret = damon_set_targets(ctx, targets, nr_targets); - if (ret) { - if (id_is_pid) + err = damon_set_targets(ctx, targets, nr_targets); + if (err) { + if (targetid_is_pid(ctx)) dbgfs_put_pids(targets, nr_targets); - } else { - ret = count; + ret = err; } unlock_out: @@ -436,152 +248,6 @@ static ssize_t dbgfs_target_ids_write(struct file *file, return ret; } -static ssize_t sprint_init_regions(struct damon_ctx *c, char *buf, ssize_t len) -{ - struct damon_target *t; - struct damon_region *r; - int written = 0; - int rc; - - damon_for_each_target(t, c) { - damon_for_each_region(r, t) { - rc = scnprintf(&buf[written], len - written, - "%lu %lu %lu\n", - t->id, r->ar.start, r->ar.end); - if (!rc) - return -ENOMEM; - written += rc; - } - } - return written; -} - -static ssize_t dbgfs_init_regions_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct damon_ctx *ctx = file->private_data; - char *kbuf; - ssize_t len; - - kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); - if (!kbuf) - return -ENOMEM; - - mutex_lock(&ctx->kdamond_lock); - if (ctx->kdamond) { - mutex_unlock(&ctx->kdamond_lock); - len = -EBUSY; - goto out; - } - - len = sprint_init_regions(ctx, kbuf, count); - mutex_unlock(&ctx->kdamond_lock); - if (len < 0) - goto out; - len = simple_read_from_buffer(buf, count, ppos, kbuf, len); - -out: - kfree(kbuf); - return len; -} - -static int add_init_region(struct damon_ctx *c, - unsigned long target_id, struct damon_addr_range *ar) -{ - struct damon_target *t; - struct damon_region *r, *prev; - unsigned long id; - int rc = -EINVAL; - - if (ar->start >= ar->end) - return -EINVAL; - - damon_for_each_target(t, c) { - id = t->id; - if (targetid_is_pid(c)) - id = (unsigned long)pid_vnr((struct pid *)id); - if (id == target_id) { - r = damon_new_region(ar->start, ar->end); - if (!r) - return -ENOMEM; - damon_add_region(r, t); - if (damon_nr_regions(t) > 1) { - prev = damon_prev_region(r); - if (prev->ar.end > r->ar.start) { - damon_destroy_region(r, t); - return -EINVAL; - } - } - rc = 0; - } - } - return rc; -} - -static int set_init_regions(struct damon_ctx *c, const char *str, ssize_t len) -{ - struct damon_target *t; - struct damon_region *r, *next; - int pos = 0, parsed, ret; - unsigned long target_id; - struct damon_addr_range ar; - int err; - - damon_for_each_target(t, c) { - damon_for_each_region_safe(r, next, t) - damon_destroy_region(r, t); - } - - while (pos < len) { - ret = sscanf(&str[pos], "%lu %lu %lu%n", - &target_id, &ar.start, &ar.end, &parsed); - if (ret != 3) - break; - err = add_init_region(c, target_id, &ar); - if (err) - goto fail; - pos += parsed; - } - - return 0; - -fail: - damon_for_each_target(t, c) { - damon_for_each_region_safe(r, next, t) - damon_destroy_region(r, t); - } - return err; -} - -static ssize_t dbgfs_init_regions_write(struct file *file, - const char __user *buf, size_t count, - loff_t *ppos) -{ - struct damon_ctx *ctx = file->private_data; - char *kbuf; - ssize_t ret = count; - int err; - - kbuf = user_input_str(buf, count, ppos); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); - - mutex_lock(&ctx->kdamond_lock); - if (ctx->kdamond) { - ret = -EBUSY; - goto unlock_out; - } - - err = set_init_regions(ctx, kbuf, ret); - if (err) - ret = err; - -unlock_out: - mutex_unlock(&ctx->kdamond_lock); - kfree(kbuf); - return ret; -} - static ssize_t dbgfs_kdamond_pid_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -621,24 +287,12 @@ static const struct file_operations attrs_fops = { .write = dbgfs_attrs_write, }; -static const struct file_operations schemes_fops = { - .open = damon_dbgfs_open, - .read = dbgfs_schemes_read, - .write = dbgfs_schemes_write, -}; - static const struct file_operations target_ids_fops = { .open = damon_dbgfs_open, .read = dbgfs_target_ids_read, .write = dbgfs_target_ids_write, }; -static const struct file_operations init_regions_fops = { - .open = damon_dbgfs_open, - .read = dbgfs_init_regions_read, - .write = dbgfs_init_regions_write, -}; - static const struct file_operations kdamond_pid_fops = { .open = damon_dbgfs_open, .read = dbgfs_kdamond_pid_read, @@ -646,22 +300,22 @@ static const struct file_operations kdamond_pid_fops = { static void dbgfs_fill_ctx_dir(struct dentry *dir, struct damon_ctx *ctx) { - const char * const file_names[] = {"attrs", "schemes", "target_ids", - "init_regions", "kdamond_pid"}; - const struct file_operations *fops[] = {&attrs_fops, &schemes_fops, - &target_ids_fops, &init_regions_fops, &kdamond_pid_fops}; + const char * const file_names[] = {"attrs", "target_ids", + "kdamond_pid"}; + const struct file_operations *fops[] = {&attrs_fops, &target_ids_fops, + &kdamond_pid_fops}; int i; for (i = 0; i < ARRAY_SIZE(file_names); i++) debugfs_create_file(file_names[i], 0600, dir, ctx, fops[i]); } -static void dbgfs_before_terminate(struct damon_ctx *ctx) +static int dbgfs_before_terminate(struct damon_ctx *ctx) { struct damon_target *t, *next; if (!targetid_is_pid(ctx)) - return; + return 0; mutex_lock(&ctx->kdamond_lock); damon_for_each_target_safe(t, next, ctx) { @@ -669,6 +323,7 @@ static void dbgfs_before_terminate(struct damon_ctx *ctx) damon_destroy_target(t); } mutex_unlock(&ctx->kdamond_lock); + return 0; } static struct damon_ctx *dbgfs_new_ctx(void) @@ -743,7 +398,8 @@ static ssize_t dbgfs_mk_context_write(struct file *file, { char *kbuf; char *ctx_name; - ssize_t ret; + ssize_t ret = count; + int err; kbuf = user_input_str(buf, count, ppos); if (IS_ERR(kbuf)) @@ -761,9 +417,9 @@ static ssize_t dbgfs_mk_context_write(struct file *file, } mutex_lock(&damon_dbgfs_lock); - ret = dbgfs_mk_context(ctx_name); - if (!ret) - ret = count; + err = dbgfs_mk_context(ctx_name); + if (err) + ret = err; mutex_unlock(&damon_dbgfs_lock); out: @@ -832,7 +488,8 @@ static ssize_t dbgfs_rm_context_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char *kbuf; - ssize_t ret; + ssize_t ret = count; + int err; char *ctx_name; kbuf = user_input_str(buf, count, ppos); @@ -851,9 +508,9 @@ static ssize_t dbgfs_rm_context_write(struct file *file, } mutex_lock(&damon_dbgfs_lock); - ret = dbgfs_rm_context(ctx_name); - if (!ret) - ret = count; + err = dbgfs_rm_context(ctx_name); + if (err) + ret = err; mutex_unlock(&damon_dbgfs_lock); out: @@ -877,8 +534,9 @@ static ssize_t dbgfs_monitor_on_read(struct file *file, static ssize_t dbgfs_monitor_on_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - ssize_t ret; + ssize_t ret = count; char *kbuf; + int err; kbuf = user_input_str(buf, count, ppos); if (IS_ERR(kbuf)) @@ -891,26 +549,16 @@ static ssize_t dbgfs_monitor_on_write(struct file *file, } mutex_lock(&damon_dbgfs_lock); - if (!strncmp(kbuf, "on", count)) { - int i; - - for (i = 0; i < dbgfs_nr_ctxs; i++) { - if (damon_targets_empty(dbgfs_ctxs[i])) { - kfree(kbuf); - mutex_unlock(&damon_dbgfs_lock); - return -EINVAL; - } - } - ret = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs); - } else if (!strncmp(kbuf, "off", count)) { - ret = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs); - } else { - ret = -EINVAL; - } + if (!strncmp(kbuf, "on", count)) + err = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs); + else if (!strncmp(kbuf, "off", count)) + err = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs); + else + err = -EINVAL; mutex_unlock(&damon_dbgfs_lock); - if (!ret) - ret = count; + if (err) + ret = err; kfree(kbuf); return ret; } diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h index 6a1b9272ea..1f5c13257d 100644 --- a/mm/damon/vaddr-test.h +++ b/mm/damon/vaddr-test.h @@ -135,6 +135,7 @@ static void damon_do_test_apply_three_regions(struct kunit *test, struct damon_addr_range *three_regions, unsigned long *expected, int nr_expected) { + struct damon_ctx *ctx = damon_new_ctx(); struct damon_target *t; struct damon_region *r; int i; @@ -144,6 +145,7 @@ static void damon_do_test_apply_three_regions(struct kunit *test, r = damon_new_region(regions[i * 2], regions[i * 2 + 1]); damon_add_region(r, t); } + damon_add_target(ctx, t); damon_va_apply_three_regions(t, three_regions); @@ -152,6 +154,8 @@ static void damon_do_test_apply_three_regions(struct kunit *test, KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]); KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]); } + + damon_destroy_ctx(ctx); } /* @@ -229,7 +233,7 @@ static void damon_test_apply_three_regions3(struct kunit *test) * and 70-100) has totally freed and mapped to different area (30-32 and * 65-68). The target regions which were in the old second and third big * regions should now be removed and new target regions covering the new second - * and third big regions should be created. + * and third big regions should be crated. */ static void damon_test_apply_three_regions4(struct kunit *test) { @@ -248,59 +252,60 @@ static void damon_test_apply_three_regions4(struct kunit *test) new_three_regions, expected, ARRAY_SIZE(expected)); } -static void damon_test_split_evenly_fail(struct kunit *test, - unsigned long start, unsigned long end, unsigned int nr_pieces) -{ - struct damon_target *t = damon_new_target(42); - struct damon_region *r = damon_new_region(start, end); - - damon_add_region(r, t); - KUNIT_EXPECT_EQ(test, - damon_va_evenly_split_region(t, r, nr_pieces), -EINVAL); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u); - - damon_for_each_region(r, t) { - KUNIT_EXPECT_EQ(test, r->ar.start, start); - KUNIT_EXPECT_EQ(test, r->ar.end, end); - } - - damon_free_target(t); -} - -static void damon_test_split_evenly_succ(struct kunit *test, - unsigned long start, unsigned long end, unsigned int nr_pieces) -{ - struct damon_target *t = damon_new_target(42); - struct damon_region *r = damon_new_region(start, end); - unsigned long expected_width = (end - start) / nr_pieces; - unsigned long i = 0; - - damon_add_region(r, t); - KUNIT_EXPECT_EQ(test, - damon_va_evenly_split_region(t, r, nr_pieces), 0); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), nr_pieces); - - damon_for_each_region(r, t) { - if (i == nr_pieces - 1) - break; - KUNIT_EXPECT_EQ(test, - r->ar.start, start + i++ * expected_width); - KUNIT_EXPECT_EQ(test, r->ar.end, start + i * expected_width); - } - KUNIT_EXPECT_EQ(test, r->ar.start, start + i * expected_width); - KUNIT_EXPECT_EQ(test, r->ar.end, end); - damon_free_target(t); -} - static void damon_test_split_evenly(struct kunit *test) { + struct damon_ctx *c = damon_new_ctx(); + struct damon_target *t; + struct damon_region *r; + unsigned long i; + KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5), -EINVAL); - damon_test_split_evenly_fail(test, 0, 100, 0); - damon_test_split_evenly_succ(test, 0, 100, 10); - damon_test_split_evenly_succ(test, 5, 59, 5); - damon_test_split_evenly_fail(test, 5, 6, 2); + t = damon_new_target(42); + r = damon_new_region(0, 100); + KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 0), -EINVAL); + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 10), 0); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 10u); + + i = 0; + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, r->ar.start, i++ * 10); + KUNIT_EXPECT_EQ(test, r->ar.end, i * 10); + } + damon_free_target(t); + + t = damon_new_target(42); + r = damon_new_region(5, 59); + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 5), 0); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u); + + i = 0; + damon_for_each_region(r, t) { + if (i == 4) + break; + KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i++); + KUNIT_EXPECT_EQ(test, r->ar.end, 5 + 10 * i); + } + KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i); + KUNIT_EXPECT_EQ(test, r->ar.end, 59ul); + damon_free_target(t); + + t = damon_new_target(42); + r = damon_new_region(5, 6); + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 2), -EINVAL); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u); + + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, r->ar.start, 5ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 6ul); + } + damon_free_target(t); + damon_destroy_ctx(c); } static struct kunit_case damon_test_cases[] = { diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 89b6468da2..58c1fb2aaf 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -7,29 +7,31 @@ #define pr_fmt(fmt) "damon-va: " fmt -#include -#include +#include #include +#include #include +#include #include #include +#include #include - -#include "prmtv-common.h" +#include #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST #undef DAMON_MIN_REGION #define DAMON_MIN_REGION 1 #endif +/* Get a random number in [l, r) */ +#define damon_rand(l, r) (l + prandom_u32_max(r - l)) + /* * 't->id' should be the pointer to the relevant 'struct pid' having reference * count. Caller must put the returned task, unless it is NULL. */ -static inline struct task_struct *damon_get_task_struct(struct damon_target *t) -{ - return get_pid_task((struct pid *)t->id, PIDTYPE_PID); -} +#define damon_get_task_struct(t) \ + (get_pid_task((struct pid *)t->id, PIDTYPE_PID)) /* * Get the mm_struct of the given target @@ -100,6 +102,16 @@ static unsigned long sz_range(struct damon_addr_range *r) return r->end - r->start; } +static void swap_ranges(struct damon_addr_range *r1, + struct damon_addr_range *r2) +{ + struct damon_addr_range tmp; + + tmp = *r1; + *r1 = *r2; + *r2 = tmp; +} + /* * Find three regions separated by two biggest unmapped regions * @@ -138,9 +150,9 @@ static int __damon_va_three_regions(struct vm_area_struct *vma, gap.start = last_vma->vm_end; gap.end = vma->vm_start; if (sz_range(&gap) > sz_range(&second_gap)) { - swap(gap, second_gap); + swap_ranges(&gap, &second_gap); if (sz_range(&second_gap) > sz_range(&first_gap)) - swap(second_gap, first_gap); + swap_ranges(&second_gap, &first_gap); } next: last_vma = vma; @@ -151,7 +163,7 @@ static int __damon_va_three_regions(struct vm_area_struct *vma, /* Sort the two biggest gaps by address */ if (first_gap.start > second_gap.start) - swap(first_gap, second_gap); + swap_ranges(&first_gap, &second_gap); /* Store the result */ regions[0].start = ALIGN(start, DAMON_MIN_REGION); @@ -232,19 +244,13 @@ static int damon_va_three_regions(struct damon_target *t, static void __damon_va_init_regions(struct damon_ctx *ctx, struct damon_target *t) { - struct damon_target *ti; struct damon_region *r; struct damon_addr_range regions[3]; unsigned long sz = 0, nr_pieces; - int i, tidx = 0; + int i; if (damon_va_three_regions(t, regions)) { - damon_for_each_target(ti, ctx) { - if (ti == t) - break; - tidx++; - } - pr_debug("Failed to get three regions of %dth target\n", tidx); + pr_err("Failed to get three regions of target %lu\n", t->id); return; } @@ -270,7 +276,7 @@ static void __damon_va_init_regions(struct damon_ctx *ctx, } /* Initialize '->regions_list' of every target (task) */ -static void damon_va_init(struct damon_ctx *ctx) +void damon_va_init(struct damon_ctx *ctx) { struct damon_target *t; @@ -290,8 +296,7 @@ static void damon_va_init(struct damon_ctx *ctx) * * Returns true if it is. */ -static bool damon_intersect(struct damon_region *r, - struct damon_addr_range *re) +static bool damon_intersect(struct damon_region *r, struct damon_addr_range *re) { return !(r->ar.end <= re->start || re->end <= r->ar.start); } @@ -306,7 +311,7 @@ static void damon_va_apply_three_regions(struct damon_target *t, struct damon_addr_range bregions[3]) { struct damon_region *r, *next; - unsigned int i; + unsigned int i = 0; /* Remove regions which are not in the three big regions now */ damon_for_each_region_safe(r, next, t) { @@ -355,7 +360,7 @@ static void damon_va_apply_three_regions(struct damon_target *t, /* * Update regions for current memory mappings */ -static void damon_va_update(struct damon_ctx *ctx) +void damon_va_update(struct damon_ctx *ctx) { struct damon_addr_range three_regions[3]; struct damon_target *t; @@ -367,6 +372,82 @@ static void damon_va_update(struct damon_ctx *ctx) } } +/* + * Get an online page for a pfn if it's in the LRU list. Otherwise, returns + * NULL. + * + * The body of this function is stolen from the 'page_idle_get_page()'. We + * steal rather than reuse it because the code is quite simple. + */ +static struct page *damon_get_page(unsigned long pfn) +{ + struct page *page = pfn_to_online_page(pfn); + + if (!page || !PageLRU(page) || !get_page_unless_zero(page)) + return NULL; + + if (unlikely(!PageLRU(page))) { + put_page(page); + page = NULL; + } + return page; +} + +static void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, + unsigned long addr) +{ + bool referenced = false; + struct page *page = damon_get_page(pte_pfn(*pte)); + + if (!page) + return; + + if (pte_young(*pte)) { + referenced = true; + *pte = pte_mkold(*pte); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, addr + PAGE_SIZE)) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +} + +static void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, + unsigned long addr) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + bool referenced = false; + struct page *page = damon_get_page(pmd_pfn(*pmd)); + + if (!page) + return; + + if (pmd_young(*pmd)) { + referenced = true; + *pmd = pmd_mkold(*pmd); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, + addr + ((1UL) << HPAGE_PMD_SHIFT))) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +} + static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { @@ -394,65 +475,8 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, return 0; } -#ifdef CONFIG_HUGETLB_PAGE -static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long addr) -{ - bool referenced = false; - pte_t entry = huge_ptep_get(pte); - struct page *page = pte_page(entry); - - if (!page) - return; - - get_page(page); - - if (pte_young(entry)) { - referenced = true; - entry = pte_mkold(entry); - huge_ptep_set_access_flags(vma, addr, pte, entry, - vma->vm_flags & VM_WRITE); - } - -#ifdef CONFIG_MMU_NOTIFIER - if (mmu_notifier_clear_young(mm, addr, - addr + huge_page_size(hstate_vma(vma)))) - referenced = true; -#endif /* CONFIG_MMU_NOTIFIER */ - - if (referenced) - set_page_young(page); - - set_page_idle(page); - put_page(page); -} - -static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, - unsigned long addr, unsigned long end, - struct mm_walk *walk) -{ - struct hstate *h = hstate_vma(walk->vma); - spinlock_t *ptl; - pte_t entry; - - ptl = huge_pte_lock(h, walk->mm, pte); - entry = huge_ptep_get(pte); - if (!pte_present(entry)) - goto out; - - damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); - -out: - spin_unlock(ptl); - return 0; -} -#else -#define damon_mkold_hugetlb_entry NULL -#endif /* CONFIG_HUGETLB_PAGE */ - -static const struct mm_walk_ops damon_mkold_ops = { +static struct mm_walk_ops damon_mkold_ops = { .pmd_entry = damon_mkold_pmd_entry, - .hugetlb_entry = damon_mkold_hugetlb_entry, }; static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) @@ -466,7 +490,7 @@ static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) * Functions for the access checking of the regions */ -static void __damon_va_prepare_access_check(struct damon_ctx *ctx, +static void damon_va_prepare_access_check(struct damon_ctx *ctx, struct mm_struct *mm, struct damon_region *r) { r->sampling_addr = damon_rand(r->ar.start, r->ar.end); @@ -474,7 +498,7 @@ static void __damon_va_prepare_access_check(struct damon_ctx *ctx, damon_va_mkold(mm, r->sampling_addr); } -static void damon_va_prepare_access_checks(struct damon_ctx *ctx) +void damon_va_prepare_access_checks(struct damon_ctx *ctx) { struct damon_target *t; struct mm_struct *mm; @@ -485,7 +509,7 @@ static void damon_va_prepare_access_checks(struct damon_ctx *ctx) if (!mm) continue; damon_for_each_region(r, t) - __damon_va_prepare_access_check(ctx, mm, r); + damon_va_prepare_access_check(ctx, mm, r); mmput(mm); } } @@ -547,47 +571,8 @@ static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, return 0; } -#ifdef CONFIG_HUGETLB_PAGE -static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, - unsigned long addr, unsigned long end, - struct mm_walk *walk) -{ - struct damon_young_walk_private *priv = walk->private; - struct hstate *h = hstate_vma(walk->vma); - struct page *page; - spinlock_t *ptl; - pte_t entry; - - ptl = huge_pte_lock(h, walk->mm, pte); - entry = huge_ptep_get(pte); - if (!pte_present(entry)) - goto out; - - page = pte_page(entry); - if (!page) - goto out; - - get_page(page); - - if (pte_young(entry) || !page_is_idle(page) || - mmu_notifier_test_young(walk->mm, addr)) { - *priv->page_sz = huge_page_size(h); - priv->young = true; - } - - put_page(page); - -out: - spin_unlock(ptl); - return 0; -} -#else -#define damon_young_hugetlb_entry NULL -#endif /* CONFIG_HUGETLB_PAGE */ - -static const struct mm_walk_ops damon_young_ops = { +static struct mm_walk_ops damon_young_ops = { .pmd_entry = damon_young_pmd_entry, - .hugetlb_entry = damon_young_hugetlb_entry, }; static bool damon_va_young(struct mm_struct *mm, unsigned long addr, @@ -610,7 +595,7 @@ static bool damon_va_young(struct mm_struct *mm, unsigned long addr, * mm 'mm_struct' for the given virtual address space * r the region to be checked */ -static void __damon_va_check_access(struct damon_ctx *ctx, +static void damon_va_check_access(struct damon_ctx *ctx, struct mm_struct *mm, struct damon_region *r) { static struct mm_struct *last_mm; @@ -634,7 +619,7 @@ static void __damon_va_check_access(struct damon_ctx *ctx, last_addr = r->sampling_addr; } -static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) +unsigned int damon_va_check_accesses(struct damon_ctx *ctx) { struct damon_target *t; struct mm_struct *mm; @@ -646,7 +631,7 @@ static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) if (!mm) continue; damon_for_each_region(r, t) { - __damon_va_check_access(ctx, mm, r); + damon_va_check_access(ctx, mm, r); max_nr_accesses = max(r->nr_accesses, max_nr_accesses); } mmput(mm); @@ -673,78 +658,6 @@ bool damon_va_target_valid(void *target) return false; } -#ifndef CONFIG_ADVISE_SYSCALLS -static unsigned long damos_madvise(struct damon_target *target, - struct damon_region *r, int behavior) -{ - return 0; -} -#else -static unsigned long damos_madvise(struct damon_target *target, - struct damon_region *r, int behavior) -{ - struct mm_struct *mm; - unsigned long start = PAGE_ALIGN(r->ar.start); - unsigned long len = PAGE_ALIGN(r->ar.end - r->ar.start); - unsigned long applied; - - mm = damon_get_mm(target); - if (!mm) - return 0; - - applied = do_madvise(mm, start, len, behavior) ? 0 : len; - mmput(mm); - - return applied; -} -#endif /* CONFIG_ADVISE_SYSCALLS */ - -static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, - struct damon_target *t, struct damon_region *r, - struct damos *scheme) -{ - int madv_action; - - switch (scheme->action) { - case DAMOS_WILLNEED: - madv_action = MADV_WILLNEED; - break; - case DAMOS_COLD: - madv_action = MADV_COLD; - break; - case DAMOS_PAGEOUT: - madv_action = MADV_PAGEOUT; - break; - case DAMOS_HUGEPAGE: - madv_action = MADV_HUGEPAGE; - break; - case DAMOS_NOHUGEPAGE: - madv_action = MADV_NOHUGEPAGE; - break; - case DAMOS_STAT: - return 0; - default: - return 0; - } - - return damos_madvise(t, r, madv_action); -} - -static int damon_va_scheme_score(struct damon_ctx *context, - struct damon_target *t, struct damon_region *r, - struct damos *scheme) -{ - - switch (scheme->action) { - case DAMOS_PAGEOUT: - return damon_pageout_score(context, r, scheme); - default: - break; - } - - return DAMOS_MAX_SCORE; -} - void damon_va_set_primitives(struct damon_ctx *ctx) { ctx->primitive.init = damon_va_init; @@ -754,8 +667,6 @@ void damon_va_set_primitives(struct damon_ctx *ctx) ctx->primitive.reset_aggregated = NULL; ctx->primitive.target_valid = damon_va_target_valid; ctx->primitive.cleanup = NULL; - ctx->primitive.apply_scheme = damon_va_apply_scheme; - ctx->primitive.get_scheme_score = damon_va_scheme_score; } #include "vaddr-test.h" diff --git a/mm/debug.c b/mm/debug.c index bc9ac87f0e..fae0f81ad8 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -16,19 +16,17 @@ #include #include "internal.h" -#include - -/* - * Define EM() and EMe() so that MIGRATE_REASON from trace/events/migrate.h can - * be used to populate migrate_reason_names[]. - */ -#undef EM -#undef EMe -#define EM(a, b) b, -#define EMe(a, b) b const char *migrate_reason_names[MR_TYPES] = { - MIGRATE_REASON + "compaction", + "memory_failure", + "memory_hotplug", + "syscall_or_cpuset", + "mempolicy_mbind", + "numa_misplaced", + "contig_range", + "longterm_pin", + "demotion", }; const struct trace_print_flags pageflag_names[] = { @@ -112,11 +110,59 @@ static void __dump_page(struct page *page) type = "ksm "; else if (PageAnon(page)) type = "anon "; - else if (mapping) - dump_mapping(mapping); + else if (mapping) { + struct inode *host; + const struct address_space_operations *a_ops; + struct hlist_node *dentry_first; + struct dentry *dentry_ptr; + struct dentry dentry; + unsigned long ino; + + /* + * mapping can be invalid pointer and we don't want to crash + * accessing it, so probe everything depending on it carefully + */ + if (get_kernel_nofault(host, &mapping->host) || + get_kernel_nofault(a_ops, &mapping->a_ops)) { + pr_warn("failed to read mapping contents, not a valid kernel address?\n"); + goto out_mapping; + } + + if (!host) { + pr_warn("aops:%ps\n", a_ops); + goto out_mapping; + } + + if (get_kernel_nofault(dentry_first, &host->i_dentry.first) || + get_kernel_nofault(ino, &host->i_ino)) { + pr_warn("aops:%ps with invalid host inode %px\n", + a_ops, host); + goto out_mapping; + } + + if (!dentry_first) { + pr_warn("aops:%ps ino:%lx\n", a_ops, ino); + goto out_mapping; + } + + dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); + if (get_kernel_nofault(dentry, dentry_ptr)) { + pr_warn("aops:%ps ino:%lx with invalid dentry %px\n", + a_ops, ino, dentry_ptr); + } else { + /* + * if dentry is corrupted, the %pd handler may still + * crash, but it's unlikely that we reach here with a + * corrupted struct page + */ + pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", + a_ops, ino, &dentry); + } + } +out_mapping: BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1); - pr_warn("%sflags: %pGp%s\n", type, &head->flags, + pr_warn("%sflags: %#lx(%pGp)%s\n", type, head->flags, &head->flags, page_cma ? " CMA" : ""); print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32, sizeof(unsigned long), page, @@ -170,7 +216,7 @@ void dump_mm(const struct mm_struct *mm) "start_code %lx end_code %lx start_data %lx end_data %lx\n" "start_brk %lx brk %lx start_stack %lx\n" "arg_start %lx arg_end %lx env_start %lx env_end %lx\n" - "binfmt %px flags %lx\n" + "binfmt %px flags %lx core_state %px\n" #ifdef CONFIG_AIO "ioctx_table %px\n" #endif @@ -202,7 +248,7 @@ void dump_mm(const struct mm_struct *mm) mm->start_code, mm->end_code, mm->start_data, mm->end_data, mm->start_brk, mm->brk, mm->start_stack, mm->arg_start, mm->arg_end, mm->env_start, mm->env_end, - mm->binfmt, mm->flags, + mm->binfmt, mm->flags, mm->core_state, #ifdef CONFIG_AIO mm->ioctx_table, #endif diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index db2abd9e41..718d0d3ad8 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -654,7 +654,7 @@ static void __init pte_clear_tests(struct pgtable_debug_args *args) set_pte_at(args->mm, args->vaddr, args->ptep, pte); flush_dcache_page(page); barrier(); - ptep_clear(args->mm, args->vaddr, args->ptep); + pte_clear(args->mm, args->vaddr, args->ptep); pte = ptep_get(args->ptep); WARN_ON(!pte_none(pte)); } @@ -890,8 +890,8 @@ static void __init swap_migration_tests(struct pgtable_debug_args *args) pr_debug("Validating swap migration\n"); /* - * make_[readable|writable]_migration_entry() expects given page to - * be locked, otherwise it stumbles upon a BUG_ON(). + * make_migration_entry() expects given page to be + * locked, otherwise it stumbles upon a BUG_ON(). */ __SetPageLocked(page); swp = make_writable_migration_entry(page_to_pfn(page)); @@ -1106,14 +1106,13 @@ static int __init init_args(struct pgtable_debug_args *args) /* * Initialize the debugging data. * - * protection_map[0] (or even protection_map[8]) will help create - * page table entries with PROT_NONE permission as required for - * pxx_protnone_tests(). + * __P000 (or even __S000) will help create page table entries with + * PROT_NONE permission as required for pxx_protnone_tests(). */ memset(args, 0, sizeof(*args)); args->vaddr = get_random_vaddr(); args->page_prot = vm_get_page_prot(VMFLAGS); - args->page_prot_none = protection_map[0]; + args->page_prot_none = __P000; args->is_contiguous_page = false; args->pud_pfn = ULONG_MAX; args->pmd_pfn = ULONG_MAX; diff --git a/mm/dmapool.c b/mm/dmapool.c index a7eb5d0eb2..64b537b3cc 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -152,7 +152,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev, else if ((boundary < size) || (boundary & (boundary - 1))) return NULL; - retval = kmalloc(sizeof(*retval), GFP_KERNEL); + retval = kmalloc_node(sizeof(*retval), GFP_KERNEL, dev_to_node(dev)); if (!retval) return retval; diff --git a/mm/filemap.c b/mm/filemap.c index ad8c39d90b..1293c3409e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -31,17 +30,18 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include "internal.h" @@ -90,7 +90,7 @@ * ->lock_page (filemap_fault, access_process_vm) * * ->i_rwsem (generic_perform_write) - * ->mmap_lock (fault_in_readable->do_page_fault) + * ->mmap_lock (fault_in_pages_readable->do_page_fault) * * bdi->wb.list_lock * sb_lock (fs/fs-writeback.c) @@ -122,87 +122,99 @@ */ static void page_cache_delete(struct address_space *mapping, - struct folio *folio, void *shadow) + struct page *page, void *shadow) { - XA_STATE(xas, &mapping->i_pages, folio->index); - long nr = 1; + XA_STATE(xas, &mapping->i_pages, page->index); + unsigned int nr = 1; mapping_set_update(&xas, mapping); /* hugetlb pages are represented by a single entry in the xarray */ - if (!folio_test_hugetlb(folio)) { - xas_set_order(&xas, folio->index, folio_order(folio)); - nr = folio_nr_pages(folio); + if (!PageHuge(page)) { + xas_set_order(&xas, page->index, compound_order(page)); + nr = compound_nr(page); } - VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(nr != 1 && shadow, page); xas_store(&xas, shadow); xas_init_marks(&xas); - folio->mapping = NULL; + page->mapping = NULL; /* Leave page->index set: truncation lookup relies upon it */ mapping->nrpages -= nr; } -static void filemap_unaccount_folio(struct address_space *mapping, - struct folio *folio) +static void unaccount_page_cache_page(struct address_space *mapping, + struct page *page) { - long nr; + int nr; - VM_BUG_ON_FOLIO(folio_mapped(folio), folio); - if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) { + /* + * if we're uptodate, flush out into the cleancache, otherwise + * invalidate any existing cleancache entries. We can't leave + * stale data around in the cleancache once our page is gone + */ + if (PageUptodate(page) && PageMappedToDisk(page)) + cleancache_put_page(page); + else + cleancache_invalidate_page(mapping, page); + + VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(page_mapped(page), page); + if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) { int mapcount; pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n", - current->comm, folio_pfn(folio)); - dump_page(&folio->page, "still mapped when deleted"); + current->comm, page_to_pfn(page)); + dump_page(page, "still mapped when deleted"); dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); - mapcount = page_mapcount(&folio->page); + mapcount = page_mapcount(page); if (mapping_exiting(mapping) && - folio_ref_count(folio) >= mapcount + 2) { + page_count(page) >= mapcount + 2) { /* * All vmas have already been torn down, so it's - * a good bet that actually the folio is unmapped, + * a good bet that actually the page is unmapped, * and we'd prefer not to leak it: if we're wrong, * some other bad page check should catch it later. */ - page_mapcount_reset(&folio->page); - folio_ref_sub(folio, mapcount); + page_mapcount_reset(page); + page_ref_sub(page, mapcount); } } - /* hugetlb folios do not participate in page cache accounting. */ - if (folio_test_hugetlb(folio)) + /* hugetlb pages do not participate in page cache accounting. */ + if (PageHuge(page)) return; - nr = folio_nr_pages(folio); + nr = thp_nr_pages(page); - __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr); - if (folio_test_swapbacked(folio)) { - __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr); - if (folio_test_pmd_mappable(folio)) - __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr); - } else if (folio_test_pmd_mappable(folio)) { - __lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); + __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr); + if (PageSwapBacked(page)) { + __mod_lruvec_page_state(page, NR_SHMEM, -nr); + if (PageTransHuge(page)) + __mod_lruvec_page_state(page, NR_SHMEM_THPS, -nr); + } else if (PageTransHuge(page)) { + __mod_lruvec_page_state(page, NR_FILE_THPS, -nr); filemap_nr_thps_dec(mapping); } /* - * At this point folio must be either written or cleaned by - * truncate. Dirty folio here signals a bug and loss of + * At this point page must be either written or cleaned by + * truncate. Dirty page here signals a bug and loss of * unwritten data. * - * This fixes dirty accounting after removing the folio entirely - * but leaves the dirty flag set: it has no effect for truncated - * folio and anyway will be cleared before returning folio to + * This fixes dirty accounting after removing the page entirely + * but leaves PageDirty set: it has no effect for truncated + * page and anyway will be cleared before returning page into * buddy allocator. */ - if (WARN_ON_ONCE(folio_test_dirty(folio))) - folio_account_cleaned(folio, mapping, - inode_to_wb(mapping->host)); + if (WARN_ON_ONCE(PageDirty(page))) + account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); } /* @@ -210,81 +222,83 @@ static void filemap_unaccount_folio(struct address_space *mapping, * sure the page is locked and that nobody else uses it - or that usage * is safe. The caller must hold the i_pages lock. */ -void __filemap_remove_folio(struct folio *folio, void *shadow) +void __delete_from_page_cache(struct page *page, void *shadow) { - struct address_space *mapping = folio->mapping; + struct address_space *mapping = page->mapping; - trace_mm_filemap_delete_from_page_cache(folio); - filemap_unaccount_folio(mapping, folio); - page_cache_delete(mapping, folio, shadow); + trace_mm_filemap_delete_from_page_cache(page); + + unaccount_page_cache_page(mapping, page); + page_cache_delete(mapping, page, shadow); } -void filemap_free_folio(struct address_space *mapping, struct folio *folio) +static void page_cache_free_page(struct address_space *mapping, + struct page *page) { void (*freepage)(struct page *); - int refs = 1; freepage = mapping->a_ops->freepage; if (freepage) - freepage(&folio->page); + freepage(page); - if (folio_test_large(folio) && !folio_test_hugetlb(folio)) - refs = folio_nr_pages(folio); - folio_put_refs(folio, refs); + if (PageTransHuge(page) && !PageHuge(page)) { + page_ref_sub(page, thp_nr_pages(page)); + VM_BUG_ON_PAGE(page_count(page) <= 0, page); + } else { + put_page(page); + } } /** - * filemap_remove_folio - Remove folio from page cache. - * @folio: The folio. + * delete_from_page_cache - delete page from page cache + * @page: the page which the kernel is trying to remove from page cache * - * This must be called only on folios that are locked and have been - * verified to be in the page cache. It will never put the folio into - * the free list because the caller has a reference on the page. + * This must be called only on pages that have been verified to be in the page + * cache and locked. It will never put the page into the free list, the caller + * has a reference on the page. */ -void filemap_remove_folio(struct folio *folio) +void delete_from_page_cache(struct page *page) { - struct address_space *mapping = folio->mapping; + struct address_space *mapping = page_mapping(page); - BUG_ON(!folio_test_locked(folio)); - spin_lock(&mapping->host->i_lock); + BUG_ON(!PageLocked(page)); xa_lock_irq(&mapping->i_pages); - __filemap_remove_folio(folio, NULL); + __delete_from_page_cache(page, NULL); xa_unlock_irq(&mapping->i_pages); - if (mapping_shrinkable(mapping)) - inode_add_lru(mapping->host); - spin_unlock(&mapping->host->i_lock); - filemap_free_folio(mapping, folio); + page_cache_free_page(mapping, page); } +EXPORT_SYMBOL(delete_from_page_cache); /* - * page_cache_delete_batch - delete several folios from page cache - * @mapping: the mapping to which folios belong - * @fbatch: batch of folios to delete + * page_cache_delete_batch - delete several pages from page cache + * @mapping: the mapping to which pages belong + * @pvec: pagevec with pages to delete * - * The function walks over mapping->i_pages and removes folios passed in - * @fbatch from the mapping. The function expects @fbatch to be sorted - * by page index and is optimised for it to be dense. - * It tolerates holes in @fbatch (mapping entries at those indices are not - * modified). + * The function walks over mapping->i_pages and removes pages passed in @pvec + * from the mapping. The function expects @pvec to be sorted by page index + * and is optimised for it to be dense. + * It tolerates holes in @pvec (mapping entries at those indices are not + * modified). The function expects only THP head pages to be present in the + * @pvec. * * The function expects the i_pages lock to be held. */ static void page_cache_delete_batch(struct address_space *mapping, - struct folio_batch *fbatch) + struct pagevec *pvec) { - XA_STATE(xas, &mapping->i_pages, fbatch->folios[0]->index); - long total_pages = 0; + XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index); + int total_pages = 0; int i = 0; - struct folio *folio; + struct page *page; mapping_set_update(&xas, mapping); - xas_for_each(&xas, folio, ULONG_MAX) { - if (i >= folio_batch_count(fbatch)) + xas_for_each(&xas, page, ULONG_MAX) { + if (i >= pagevec_count(pvec)) break; /* A swap/dax/shadow entry got inserted? Skip it. */ - if (xa_is_value(folio)) + if (xa_is_value(page)) continue; /* * A page got inserted in our range? Skip it. We have our @@ -293,48 +307,50 @@ static void page_cache_delete_batch(struct address_space *mapping, * means our page has been removed, which shouldn't be * possible because we're holding the PageLock. */ - if (folio != fbatch->folios[i]) { - VM_BUG_ON_FOLIO(folio->index > - fbatch->folios[i]->index, folio); + if (page != pvec->pages[i]) { + VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index, + page); continue; } - WARN_ON_ONCE(!folio_test_locked(folio)); + WARN_ON_ONCE(!PageLocked(page)); - folio->mapping = NULL; - /* Leave folio->index set: truncation lookup relies on it */ + if (page->index == xas.xa_index) + page->mapping = NULL; + /* Leave page->index set: truncation lookup relies on it */ - i++; + /* + * Move to the next page in the vector if this is a regular + * page or the index is of the last sub-page of this compound + * page. + */ + if (page->index + compound_nr(page) - 1 == xas.xa_index) + i++; xas_store(&xas, NULL); - total_pages += folio_nr_pages(folio); + total_pages++; } mapping->nrpages -= total_pages; } void delete_from_page_cache_batch(struct address_space *mapping, - struct folio_batch *fbatch) + struct pagevec *pvec) { int i; - if (!folio_batch_count(fbatch)) + if (!pagevec_count(pvec)) return; - spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); - for (i = 0; i < folio_batch_count(fbatch); i++) { - struct folio *folio = fbatch->folios[i]; + for (i = 0; i < pagevec_count(pvec); i++) { + trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); - trace_mm_filemap_delete_from_page_cache(folio); - filemap_unaccount_folio(mapping, folio); + unaccount_page_cache_page(mapping, pvec->pages[i]); } - page_cache_delete_batch(mapping, fbatch); + page_cache_delete_batch(mapping, pvec); xa_unlock_irq(&mapping->i_pages); - if (mapping_shrinkable(mapping)) - inode_add_lru(mapping->host); - spin_unlock(&mapping->host->i_lock); - for (i = 0; i < folio_batch_count(fbatch); i++) - filemap_free_folio(mapping, fbatch->folios[i]); + for (i = 0; i < pagevec_count(pvec); i++) + page_cache_free_page(mapping, pvec->pages[i]); } int filemap_check_errors(struct address_space *mapping) @@ -623,13 +639,32 @@ static bool mapping_needs_writeback(struct address_space *mapping) return mapping->nrpages; } -bool filemap_range_has_writeback(struct address_space *mapping, - loff_t start_byte, loff_t end_byte) +/** + * filemap_range_needs_writeback - check if range potentially needs writeback + * @mapping: address space within which to check + * @start_byte: offset in bytes where the range starts + * @end_byte: offset in bytes where the range ends (inclusive) + * + * Find at least one page in the range supplied, usually used to check if + * direct writing in this range will trigger a writeback. Used by O_DIRECT + * read/write with IOCB_NOWAIT, to see if the caller needs to do + * filemap_write_and_wait_range() before proceeding. + * + * Return: %true if the caller should do filemap_write_and_wait_range() before + * doing O_DIRECT to a page in this range, %false otherwise. + */ +bool filemap_range_needs_writeback(struct address_space *mapping, + loff_t start_byte, loff_t end_byte) { XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT); pgoff_t max = end_byte >> PAGE_SHIFT; struct page *page; + if (!mapping_needs_writeback(mapping)) + return false; + if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && + !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) + return false; if (end_byte < start_byte) return false; @@ -645,7 +680,7 @@ bool filemap_range_has_writeback(struct address_space *mapping, rcu_read_unlock(); return page != NULL; } -EXPORT_SYMBOL_GPL(filemap_range_has_writeback); +EXPORT_SYMBOL_GPL(filemap_range_needs_writeback); /** * filemap_write_and_wait_range - write out & wait on a file range @@ -800,8 +835,6 @@ EXPORT_SYMBOL(file_write_and_wait_range); */ void replace_page_cache_page(struct page *old, struct page *new) { - struct folio *fold = page_folio(old); - struct folio *fnew = page_folio(new); struct address_space *mapping = old->mapping; void (*freepage)(struct page *) = mapping->a_ops->freepage; pgoff_t offset = old->index; @@ -815,7 +848,7 @@ void replace_page_cache_page(struct page *old, struct page *new) new->mapping = mapping; new->index = offset; - mem_cgroup_migrate(fold, fnew); + mem_cgroup_migrate(old, new); xas_lock_irq(&xas); xas_store(&xas, new); @@ -837,25 +870,26 @@ void replace_page_cache_page(struct page *old, struct page *new) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -noinline int __filemap_add_folio(struct address_space *mapping, - struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp) +noinline int __add_to_page_cache_locked(struct page *page, + struct address_space *mapping, + pgoff_t offset, gfp_t gfp, + void **shadowp) { - XA_STATE(xas, &mapping->i_pages, index); - int huge = folio_test_hugetlb(folio); + XA_STATE(xas, &mapping->i_pages, offset); + int huge = PageHuge(page); int error; bool charged = false; - VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageSwapBacked(page), page); mapping_set_update(&xas, mapping); - folio_get(folio); - folio->mapping = mapping; - folio->index = index; + get_page(page); + page->mapping = mapping; + page->index = offset; if (!huge) { - error = mem_cgroup_charge(folio, NULL, gfp); - VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio); + error = mem_cgroup_charge(page, NULL, gfp); if (error) goto error; charged = true; @@ -867,7 +901,7 @@ noinline int __filemap_add_folio(struct address_space *mapping, unsigned int order = xa_get_order(xas.xa, xas.xa_index); void *entry, *old = NULL; - if (order > folio_order(folio)) + if (order > thp_order(page)) xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index), order, gfp); xas_lock_irq(&xas); @@ -884,13 +918,13 @@ noinline int __filemap_add_folio(struct address_space *mapping, *shadowp = old; /* entry may have been split before we acquired lock */ order = xa_get_order(xas.xa, xas.xa_index); - if (order > folio_order(folio)) { + if (order > thp_order(page)) { xas_split(&xas, old, order); xas_reset(&xas); } } - xas_store(&xas, folio); + xas_store(&xas, page); if (xas_error(&xas)) goto unlock; @@ -898,7 +932,7 @@ noinline int __filemap_add_folio(struct address_space *mapping, /* hugetlb pages do not participate in page cache accounting */ if (!huge) - __lruvec_stat_add_folio(folio, NR_FILE_PAGES); + __inc_lruvec_page_state(page, NR_FILE_PAGES); unlock: xas_unlock_irq(&xas); } while (xas_nomem(&xas, gfp)); @@ -906,19 +940,19 @@ noinline int __filemap_add_folio(struct address_space *mapping, if (xas_error(&xas)) { error = xas_error(&xas); if (charged) - mem_cgroup_uncharge(folio); + mem_cgroup_uncharge(page); goto error; } - trace_mm_filemap_add_to_page_cache(folio); + trace_mm_filemap_add_to_page_cache(page); return 0; error: - folio->mapping = NULL; + page->mapping = NULL; /* Leave page->index set: truncation relies upon it */ - folio_put(folio); + put_page(page); return error; } -ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO); +ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO); /** * add_to_page_cache_locked - add a locked page to the pagecache @@ -935,58 +969,59 @@ ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO); int add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) { - return __filemap_add_folio(mapping, page_folio(page), offset, + return __add_to_page_cache_locked(page, mapping, offset, gfp_mask, NULL); } EXPORT_SYMBOL(add_to_page_cache_locked); -int filemap_add_folio(struct address_space *mapping, struct folio *folio, - pgoff_t index, gfp_t gfp) +int add_to_page_cache_lru(struct page *page, struct address_space *mapping, + pgoff_t offset, gfp_t gfp_mask) { void *shadow = NULL; int ret; - __folio_set_locked(folio); - ret = __filemap_add_folio(mapping, folio, index, gfp, &shadow); + __SetPageLocked(page); + ret = __add_to_page_cache_locked(page, mapping, offset, + gfp_mask, &shadow); if (unlikely(ret)) - __folio_clear_locked(folio); + __ClearPageLocked(page); else { /* - * The folio might have been evicted from cache only + * The page might have been evicted from cache only * recently, in which case it should be activated like - * any other repeatedly accessed folio. - * The exception is folios getting rewritten; evicting other + * any other repeatedly accessed page. + * The exception is pages getting rewritten; evicting other * data from the working set, only to cache data that will * get overwritten with something else, is a waste of memory. */ - WARN_ON_ONCE(folio_test_active(folio)); - if (!(gfp & __GFP_WRITE) && shadow) - workingset_refault(folio, shadow); - folio_add_lru(folio); + WARN_ON_ONCE(PageActive(page)); + if (!(gfp_mask & __GFP_WRITE) && shadow) + workingset_refault(page, shadow); + lru_cache_add(page); } return ret; } -EXPORT_SYMBOL_GPL(filemap_add_folio); +EXPORT_SYMBOL_GPL(add_to_page_cache_lru); #ifdef CONFIG_NUMA -struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) +struct page *__page_cache_alloc(gfp_t gfp) { int n; - struct folio *folio; + struct page *page; if (cpuset_do_page_mem_spread()) { unsigned int cpuset_mems_cookie; do { cpuset_mems_cookie = read_mems_allowed_begin(); n = cpuset_mem_spread_node(); - folio = __folio_alloc_node(gfp, order, n); - } while (!folio && read_mems_allowed_retry(cpuset_mems_cookie)); + page = __alloc_pages_node(n, gfp, 0); + } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); - return folio; + return page; } - return folio_alloc(gfp, order); + return alloc_pages(gfp, 0); } -EXPORT_SYMBOL(filemap_alloc_folio); +EXPORT_SYMBOL(__page_cache_alloc); #endif /* @@ -1039,11 +1074,11 @@ EXPORT_SYMBOL(filemap_invalidate_unlock_two); */ #define PAGE_WAIT_TABLE_BITS 8 #define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS) -static wait_queue_head_t folio_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; +static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; -static wait_queue_head_t *folio_waitqueue(struct folio *folio) +static wait_queue_head_t *page_waitqueue(struct page *page) { - return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)]; + return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)]; } void __init pagecache_init(void) @@ -1051,7 +1086,7 @@ void __init pagecache_init(void) int i; for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++) - init_waitqueue_head(&folio_wait_table[i]); + init_waitqueue_head(&page_wait_table[i]); page_writeback_init(); } @@ -1106,10 +1141,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, */ flags = wait->flags; if (flags & WQ_FLAG_EXCLUSIVE) { - if (test_bit(key->bit_nr, &key->folio->flags)) + if (test_bit(key->bit_nr, &key->page->flags)) return -1; if (flags & WQ_FLAG_CUSTOM) { - if (test_and_set_bit(key->bit_nr, &key->folio->flags)) + if (test_and_set_bit(key->bit_nr, &key->page->flags)) return -1; flags |= WQ_FLAG_DONE; } @@ -1122,7 +1157,7 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, * * So update the flags atomically, and wake up the waiter * afterwards to avoid any races. This store-release pairs - * with the load-acquire in folio_wait_bit_common(). + * with the load-acquire in wait_on_page_bit_common(). */ smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN); wake_up_state(wait->private, mode); @@ -1141,14 +1176,14 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, return (flags & WQ_FLAG_EXCLUSIVE) != 0; } -static void folio_wake_bit(struct folio *folio, int bit_nr) +static void wake_up_page_bit(struct page *page, int bit_nr) { - wait_queue_head_t *q = folio_waitqueue(folio); + wait_queue_head_t *q = page_waitqueue(page); struct wait_page_key key; unsigned long flags; wait_queue_entry_t bookmark; - key.folio = folio; + key.page = page; key.bit_nr = bit_nr; key.page_match = 0; @@ -1183,7 +1218,7 @@ static void folio_wake_bit(struct folio *folio, int bit_nr) * page waiters. */ if (!waitqueue_active(q) || !key.page_match) { - folio_clear_waiters(folio); + ClearPageWaiters(page); /* * It's possible to miss clearing Waiters here, when we woke * our page waiters, but the hashed waitqueue has waiters for @@ -1195,39 +1230,39 @@ static void folio_wake_bit(struct folio *folio, int bit_nr) spin_unlock_irqrestore(&q->lock, flags); } -static void folio_wake(struct folio *folio, int bit) +static void wake_up_page(struct page *page, int bit) { - if (!folio_test_waiters(folio)) + if (!PageWaiters(page)) return; - folio_wake_bit(folio, bit); + wake_up_page_bit(page, bit); } /* - * A choice of three behaviors for folio_wait_bit_common(): + * A choice of three behaviors for wait_on_page_bit_common(): */ enum behavior { EXCLUSIVE, /* Hold ref to page and take the bit when woken, like - * __folio_lock() waiting on then setting PG_locked. + * __lock_page() waiting on then setting PG_locked. */ SHARED, /* Hold ref to page and check the bit when woken, like - * folio_wait_writeback() waiting on PG_writeback. + * wait_on_page_writeback() waiting on PG_writeback. */ DROP, /* Drop ref to page before wait, no check when woken, - * like folio_put_wait_locked() on PG_locked. + * like put_and_wait_on_page_locked() on PG_locked. */ }; /* - * Attempt to check (or get) the folio flag, and mark us done + * Attempt to check (or get) the page bit, and mark us done * if successful. */ -static inline bool folio_trylock_flag(struct folio *folio, int bit_nr, +static inline bool trylock_page_bit_common(struct page *page, int bit_nr, struct wait_queue_entry *wait) { if (wait->flags & WQ_FLAG_EXCLUSIVE) { - if (test_and_set_bit(bit_nr, &folio->flags)) + if (test_and_set_bit(bit_nr, &page->flags)) return false; - } else if (test_bit(bit_nr, &folio->flags)) + } else if (test_bit(bit_nr, &page->flags)) return false; wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE; @@ -1237,10 +1272,9 @@ static inline bool folio_trylock_flag(struct folio *folio, int bit_nr, /* How many times do we accept lock stealing from under a waiter? */ int sysctl_page_lock_unfairness = 5; -static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, - int state, enum behavior behavior) +static inline int wait_on_page_bit_common(wait_queue_head_t *q, + struct page *page, int bit_nr, int state, enum behavior behavior) { - wait_queue_head_t *q = folio_waitqueue(folio); int unfairness = sysctl_page_lock_unfairness; struct wait_page_queue wait_page; wait_queue_entry_t *wait = &wait_page.wait; @@ -1249,8 +1283,8 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, unsigned long pflags; if (bit_nr == PG_locked && - !folio_test_uptodate(folio) && folio_test_workingset(folio)) { - if (!folio_test_swapbacked(folio)) { + !PageUptodate(page) && PageWorkingset(page)) { + if (!PageSwapBacked(page)) { delayacct_thrashing_start(); delayacct = true; } @@ -1260,7 +1294,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, init_wait(wait); wait->func = wake_page_function; - wait_page.folio = folio; + wait_page.page = page; wait_page.bit_nr = bit_nr; repeat: @@ -1275,7 +1309,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, * Do one last check whether we can get the * page bit synchronously. * - * Do the folio_set_waiters() marking before that + * Do the SetPageWaiters() marking before that * to let any waker we _just_ missed know they * need to wake us up (otherwise they'll never * even go to the slow case that looks at the @@ -1286,8 +1320,8 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, * lock to avoid races. */ spin_lock_irq(&q->lock); - folio_set_waiters(folio); - if (!folio_trylock_flag(folio, bit_nr, wait)) + SetPageWaiters(page); + if (!trylock_page_bit_common(page, bit_nr, wait)) __add_wait_queue_entry_tail(q, wait); spin_unlock_irq(&q->lock); @@ -1297,10 +1331,10 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, * see whether the page bit testing has already * been done by the wake function. * - * We can drop our reference to the folio. + * We can drop our reference to the page. */ if (behavior == DROP) - folio_put(folio); + put_page(page); /* * Note that until the "finish_wait()", or until @@ -1337,7 +1371,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, * * And if that fails, we'll have to retry this all. */ - if (unlikely(test_and_set_bit(bit_nr, folio_flags(folio, 0)))) + if (unlikely(test_and_set_bit(bit_nr, &page->flags))) goto repeat; wait->flags |= WQ_FLAG_DONE; @@ -1346,7 +1380,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, /* * If a signal happened, this 'finish_wait()' may remove the last - * waiter from the wait-queues, but the folio waiters bit will remain + * waiter from the wait-queues, but the PageWaiters bit will remain * set. That's ok. The next wakeup will take care of it, and trying * to do it here would be difficult and prone to races. */ @@ -1377,143 +1411,60 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR; } -#ifdef CONFIG_MIGRATION -/** - * migration_entry_wait_on_locked - Wait for a migration entry to be removed - * @entry: migration swap entry. - * @ptep: mapped pte pointer. Will return with the ptep unmapped. Only required - * for pte entries, pass NULL for pmd entries. - * @ptl: already locked ptl. This function will drop the lock. - * - * Wait for a migration entry referencing the given page to be removed. This is - * equivalent to put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE) except - * this can be called without taking a reference on the page. Instead this - * should be called while holding the ptl for the migration entry referencing - * the page. - * - * Returns after unmapping and unlocking the pte/ptl with pte_unmap_unlock(). - * - * This follows the same logic as folio_wait_bit_common() so see the comments - * there. - */ -void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, - spinlock_t *ptl) +void wait_on_page_bit(struct page *page, int bit_nr) { - struct wait_page_queue wait_page; - wait_queue_entry_t *wait = &wait_page.wait; - bool thrashing = false; - bool delayacct = false; - unsigned long pflags; - wait_queue_head_t *q; - struct folio *folio = page_folio(pfn_swap_entry_to_page(entry)); - - q = folio_waitqueue(folio); - if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) { - if (!folio_test_swapbacked(folio)) { - delayacct_thrashing_start(); - delayacct = true; - } - psi_memstall_enter(&pflags); - thrashing = true; - } - - init_wait(wait); - wait->func = wake_page_function; - wait_page.folio = folio; - wait_page.bit_nr = PG_locked; - wait->flags = 0; - - spin_lock_irq(&q->lock); - folio_set_waiters(folio); - if (!folio_trylock_flag(folio, PG_locked, wait)) - __add_wait_queue_entry_tail(q, wait); - spin_unlock_irq(&q->lock); - - /* - * If a migration entry exists for the page the migration path must hold - * a valid reference to the page, and it must take the ptl to remove the - * migration entry. So the page is valid until the ptl is dropped. - */ - if (ptep) - pte_unmap_unlock(ptep, ptl); - else - spin_unlock(ptl); - - for (;;) { - unsigned int flags; - - set_current_state(TASK_UNINTERRUPTIBLE); - - /* Loop until we've been woken or interrupted */ - flags = smp_load_acquire(&wait->flags); - if (!(flags & WQ_FLAG_WOKEN)) { - if (signal_pending_state(TASK_UNINTERRUPTIBLE, current)) - break; - - io_schedule(); - continue; - } - break; - } - - finish_wait(q, wait); - - if (thrashing) { - if (delayacct) - delayacct_thrashing_end(); - psi_memstall_leave(&pflags); - } + wait_queue_head_t *q = page_waitqueue(page); + wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED); } -#endif +EXPORT_SYMBOL(wait_on_page_bit); -void folio_wait_bit(struct folio *folio, int bit_nr) +int wait_on_page_bit_killable(struct page *page, int bit_nr) { - folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED); + wait_queue_head_t *q = page_waitqueue(page); + return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED); } -EXPORT_SYMBOL(folio_wait_bit); - -int folio_wait_bit_killable(struct folio *folio, int bit_nr) -{ - return folio_wait_bit_common(folio, bit_nr, TASK_KILLABLE, SHARED); -} -EXPORT_SYMBOL(folio_wait_bit_killable); +EXPORT_SYMBOL(wait_on_page_bit_killable); /** - * folio_put_wait_locked - Drop a reference and wait for it to be unlocked - * @folio: The folio to wait for. + * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked + * @page: The page to wait for. * @state: The sleep state (TASK_KILLABLE, TASK_UNINTERRUPTIBLE, etc). * - * The caller should hold a reference on @folio. They expect the page to + * The caller should hold a reference on @page. They expect the page to * become unlocked relatively soon, but do not wish to hold up migration - * (for example) by holding the reference while waiting for the folio to + * (for example) by holding the reference while waiting for the page to * come unlocked. After this function returns, the caller should not - * dereference @folio. + * dereference @page. * - * Return: 0 if the folio was unlocked or -EINTR if interrupted by a signal. + * Return: 0 if the page was unlocked or -EINTR if interrupted by a signal. */ -int folio_put_wait_locked(struct folio *folio, int state) +int put_and_wait_on_page_locked(struct page *page, int state) { - return folio_wait_bit_common(folio, PG_locked, state, DROP); + wait_queue_head_t *q; + + page = compound_head(page); + q = page_waitqueue(page); + return wait_on_page_bit_common(q, page, PG_locked, state, DROP); } /** - * folio_add_wait_queue - Add an arbitrary waiter to a folio's wait queue - * @folio: Folio defining the wait queue of interest + * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue + * @page: Page defining the wait queue of interest * @waiter: Waiter to add to the queue * - * Add an arbitrary @waiter to the wait queue for the nominated @folio. + * Add an arbitrary @waiter to the wait queue for the nominated @page. */ -void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter) +void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) { - wait_queue_head_t *q = folio_waitqueue(folio); + wait_queue_head_t *q = page_waitqueue(page); unsigned long flags; spin_lock_irqsave(&q->lock, flags); __add_wait_queue_entry_tail(q, waiter); - folio_set_waiters(folio); + SetPageWaiters(page); spin_unlock_irqrestore(&q->lock, flags); } -EXPORT_SYMBOL_GPL(folio_add_wait_queue); +EXPORT_SYMBOL_GPL(add_page_wait_queue); #ifndef clear_bit_unlock_is_negative_byte @@ -1539,117 +1490,124 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem #endif /** - * folio_unlock - Unlock a locked folio. - * @folio: The folio. + * unlock_page - unlock a locked page + * @page: the page * - * Unlocks the folio and wakes up any thread sleeping on the page lock. + * Unlocks the page and wakes up sleepers in wait_on_page_locked(). + * Also wakes sleepers in wait_on_page_writeback() because the wakeup + * mechanism between PageLocked pages and PageWriteback pages is shared. + * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * - * Context: May be called from interrupt or process context. May not be - * called from NMI context. + * Note that this depends on PG_waiters being the sign bit in the byte + * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to + * clear the PG_locked bit and test PG_waiters at the same time fairly + * portably (architectures that do LL/SC can test any bit, while x86 can + * test the sign bit). */ -void folio_unlock(struct folio *folio) +void unlock_page(struct page *page) { - /* Bit 7 allows x86 to check the byte's sign bit */ BUILD_BUG_ON(PG_waiters != 7); - BUILD_BUG_ON(PG_locked > 7); - VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0))) - folio_wake_bit(folio, PG_locked); + page = compound_head(page); + VM_BUG_ON_PAGE(!PageLocked(page), page); + if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) + wake_up_page_bit(page, PG_locked); } -EXPORT_SYMBOL(folio_unlock); +EXPORT_SYMBOL(unlock_page); /** - * folio_end_private_2 - Clear PG_private_2 and wake any waiters. - * @folio: The folio. + * end_page_private_2 - Clear PG_private_2 and release any waiters + * @page: The page * - * Clear the PG_private_2 bit on a folio and wake up any sleepers waiting for - * it. The folio reference held for PG_private_2 being set is released. + * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for + * this. The page ref held for PG_private_2 being set is released. * - * This is, for example, used when a netfs folio is being written to a local - * disk cache, thereby allowing writes to the cache for the same folio to be + * This is, for example, used when a netfs page is being written to a local + * disk cache, thereby allowing writes to the cache for the same page to be * serialised. */ -void folio_end_private_2(struct folio *folio) +void end_page_private_2(struct page *page) { - VM_BUG_ON_FOLIO(!folio_test_private_2(folio), folio); - clear_bit_unlock(PG_private_2, folio_flags(folio, 0)); - folio_wake_bit(folio, PG_private_2); - folio_put(folio); + page = compound_head(page); + VM_BUG_ON_PAGE(!PagePrivate2(page), page); + clear_bit_unlock(PG_private_2, &page->flags); + wake_up_page_bit(page, PG_private_2); + put_page(page); } -EXPORT_SYMBOL(folio_end_private_2); +EXPORT_SYMBOL(end_page_private_2); /** - * folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio. - * @folio: The folio to wait on. + * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page + * @page: The page to wait on * - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio. + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page. */ -void folio_wait_private_2(struct folio *folio) +void wait_on_page_private_2(struct page *page) { - while (folio_test_private_2(folio)) - folio_wait_bit(folio, PG_private_2); + page = compound_head(page); + while (PagePrivate2(page)) + wait_on_page_bit(page, PG_private_2); } -EXPORT_SYMBOL(folio_wait_private_2); +EXPORT_SYMBOL(wait_on_page_private_2); /** - * folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio. - * @folio: The folio to wait on. + * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page + * @page: The page to wait on * - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio or until a + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a * fatal signal is received by the calling task. * * Return: * - 0 if successful. * - -EINTR if a fatal signal was encountered. */ -int folio_wait_private_2_killable(struct folio *folio) +int wait_on_page_private_2_killable(struct page *page) { int ret = 0; - while (folio_test_private_2(folio)) { - ret = folio_wait_bit_killable(folio, PG_private_2); + page = compound_head(page); + while (PagePrivate2(page)) { + ret = wait_on_page_bit_killable(page, PG_private_2); if (ret < 0) break; } return ret; } -EXPORT_SYMBOL(folio_wait_private_2_killable); +EXPORT_SYMBOL(wait_on_page_private_2_killable); /** - * folio_end_writeback - End writeback against a folio. - * @folio: The folio. + * end_page_writeback - end writeback against a page + * @page: the page */ -void folio_end_writeback(struct folio *folio) +void end_page_writeback(struct page *page) { /* - * folio_test_clear_reclaim() could be used here but it is an - * atomic operation and overkill in this particular case. Failing - * to shuffle a folio marked for immediate reclaim is too mild - * a gain to justify taking an atomic operation penalty at the - * end of every folio writeback. + * TestClearPageReclaim could be used here but it is an atomic + * operation and overkill in this particular case. Failing to + * shuffle a page marked for immediate reclaim is too mild to + * justify taking an atomic operation penalty at the end of + * ever page writeback. */ - if (folio_test_reclaim(folio)) { - folio_clear_reclaim(folio); - folio_rotate_reclaimable(folio); + if (PageReclaim(page)) { + ClearPageReclaim(page); + rotate_reclaimable_page(page); } /* - * Writeback does not hold a folio reference of its own, relying + * Writeback does not hold a page reference of its own, relying * on truncation to wait for the clearing of PG_writeback. - * But here we must make sure that the folio is not freed and - * reused before the folio_wake(). + * But here we must make sure that the page is not freed and + * reused before the wake_up_page(). */ - folio_get(folio); - if (!__folio_end_writeback(folio)) + get_page(page); + if (!test_clear_page_writeback(page)) BUG(); smp_mb__after_atomic(); - folio_wake(folio, PG_writeback); - acct_reclaim_writeback(folio); - folio_put(folio); + wake_up_page(page, PG_writeback); + put_page(page); } -EXPORT_SYMBOL(folio_end_writeback); +EXPORT_SYMBOL(end_page_writeback); /* * After completing I/O on a page, call this routine to update the page @@ -1680,35 +1638,39 @@ void page_endio(struct page *page, bool is_write, int err) EXPORT_SYMBOL_GPL(page_endio); /** - * __folio_lock - Get a lock on the folio, assuming we need to sleep to get it. - * @folio: The folio to lock + * __lock_page - get a lock on the page, assuming we need to sleep to get it + * @__page: the page to lock */ -void __folio_lock(struct folio *folio) +void __lock_page(struct page *__page) { - folio_wait_bit_common(folio, PG_locked, TASK_UNINTERRUPTIBLE, + struct page *page = compound_head(__page); + wait_queue_head_t *q = page_waitqueue(page); + wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, EXCLUSIVE); } -EXPORT_SYMBOL(__folio_lock); +EXPORT_SYMBOL(__lock_page); -int __folio_lock_killable(struct folio *folio) +int __lock_page_killable(struct page *__page) { - return folio_wait_bit_common(folio, PG_locked, TASK_KILLABLE, + struct page *page = compound_head(__page); + wait_queue_head_t *q = page_waitqueue(page); + return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, EXCLUSIVE); } -EXPORT_SYMBOL_GPL(__folio_lock_killable); +EXPORT_SYMBOL_GPL(__lock_page_killable); -static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) +int __lock_page_async(struct page *page, struct wait_page_queue *wait) { - struct wait_queue_head *q = folio_waitqueue(folio); + struct wait_queue_head *q = page_waitqueue(page); int ret = 0; - wait->folio = folio; + wait->page = page; wait->bit_nr = PG_locked; spin_lock_irq(&q->lock); __add_wait_queue_entry_tail(q, &wait->wait); - folio_set_waiters(folio); - ret = !folio_trylock(folio); + SetPageWaiters(page); + ret = !trylock_page(page); /* * If we were successful now, we know we're still on the * waitqueue as we're still under the lock. This means it's @@ -1725,16 +1687,16 @@ static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) /* * Return values: - * true - folio is locked; mmap_lock is still held. - * false - folio is not locked. + * 1 - page is locked; mmap_lock is still held. + * 0 - page is not locked. * mmap_lock has been released (mmap_read_unlock(), unless flags had both * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in * which case mmap_lock is still held. * - * If neither ALLOW_RETRY nor KILLABLE are set, will always return true - * with the folio locked and the mmap_lock unperturbed. + * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1 + * with the page locked and the mmap_lock unperturbed. */ -bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, +int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { if (fault_flag_allow_retry_first(flags)) { @@ -1743,28 +1705,28 @@ bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, * even though return 0. */ if (flags & FAULT_FLAG_RETRY_NOWAIT) - return false; + return 0; mmap_read_unlock(mm); if (flags & FAULT_FLAG_KILLABLE) - folio_wait_locked_killable(folio); + wait_on_page_locked_killable(page); else - folio_wait_locked(folio); - return false; + wait_on_page_locked(page); + return 0; } if (flags & FAULT_FLAG_KILLABLE) { - bool ret; + int ret; - ret = __folio_lock_killable(folio); + ret = __lock_page_killable(page); if (ret) { mmap_read_unlock(mm); - return false; + return 0; } } else { - __folio_lock(folio); + __lock_page(page); } + return 1; - return true; } /** @@ -1839,156 +1801,144 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, } EXPORT_SYMBOL(page_cache_prev_miss); -/* - * Lockless page cache protocol: - * On the lookup side: - * 1. Load the folio from i_pages - * 2. Increment the refcount if it's not zero - * 3. If the folio is not found by xas_reload(), put the refcount and retry - * - * On the removal side: - * A. Freeze the page (by zeroing the refcount if nobody else has a reference) - * B. Remove the page from i_pages - * C. Return the page to the page allocator - * - * This means that any page may have its reference count temporarily - * increased by a speculative page cache (or fast GUP) lookup as it can - * be allocated by another user before the RCU grace period expires. - * Because the refcount temporarily acquired here may end up being the - * last refcount on the page, any page allocation must be freeable by - * folio_put(). - */ - /* * mapping_get_entry - Get a page cache entry. * @mapping: the address_space to search * @index: The page cache index. * - * Looks up the page cache entry at @mapping & @index. If it is a folio, - * it is returned with an increased refcount. If it is a shadow entry - * of a previously evicted folio, or a swap entry from shmem/tmpfs, - * it is returned without further action. + * Looks up the page cache slot at @mapping & @index. If there is a + * page cache page, the head page is returned with an increased refcount. * - * Return: The folio, swap or shadow entry, %NULL if nothing is found. + * If the slot holds a shadow entry of a previously evicted page, or a + * swap entry from shmem/tmpfs, it is returned. + * + * Return: The head page or shadow entry, %NULL if nothing is found. */ -static void *mapping_get_entry(struct address_space *mapping, pgoff_t index) +static struct page *mapping_get_entry(struct address_space *mapping, + pgoff_t index) { XA_STATE(xas, &mapping->i_pages, index); - struct folio *folio; + struct page *page; rcu_read_lock(); repeat: xas_reset(&xas); - folio = xas_load(&xas); - if (xas_retry(&xas, folio)) + page = xas_load(&xas); + if (xas_retry(&xas, page)) goto repeat; /* * A shadow entry of a recently evicted page, or a swap entry from * shmem/tmpfs. Return it without attempting to raise page count. */ - if (!folio || xa_is_value(folio)) + if (!page || xa_is_value(page)) goto out; - if (!folio_try_get_rcu(folio)) + if (!page_cache_get_speculative(page)) goto repeat; - if (unlikely(folio != xas_reload(&xas))) { - folio_put(folio); + /* + * Has the page moved or been split? + * This is part of the lockless pagecache protocol. See + * include/linux/pagemap.h for details. + */ + if (unlikely(page != xas_reload(&xas))) { + put_page(page); goto repeat; } out: rcu_read_unlock(); - return folio; + return page; } /** - * __filemap_get_folio - Find and get a reference to a folio. + * pagecache_get_page - Find and get a reference to a page. * @mapping: The address_space to search. * @index: The page index. - * @fgp_flags: %FGP flags modify how the folio is returned. - * @gfp: Memory allocation flags to use if %FGP_CREAT is specified. + * @fgp_flags: %FGP flags modify how the page is returned. + * @gfp_mask: Memory allocation flags to use if %FGP_CREAT is specified. * * Looks up the page cache entry at @mapping & @index. * * @fgp_flags can be zero or more of these flags: * - * * %FGP_ACCESSED - The folio will be marked accessed. - * * %FGP_LOCK - The folio is returned locked. + * * %FGP_ACCESSED - The page will be marked accessed. + * * %FGP_LOCK - The page is returned locked. + * * %FGP_HEAD - If the page is present and a THP, return the head page + * rather than the exact page specified by the index. * * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it - * instead of allocating a new folio to replace it. + * instead of allocating a new page to replace it. * * %FGP_CREAT - If no page is present then a new page is allocated using - * @gfp and added to the page cache and the VM's LRU list. + * @gfp_mask and added to the page cache and the VM's LRU list. * The page is returned locked and with an increased refcount. * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the * page is already in cache. If the page was allocated, unlock it before * returning so the caller can do the same dance. - * * %FGP_WRITE - The page will be written to by the caller. - * * %FGP_NOFS - __GFP_FS will get cleared in gfp. - * * %FGP_NOWAIT - Don't get blocked by page lock. - * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) + * * %FGP_WRITE - The page will be written + * * %FGP_NOFS - __GFP_FS will get cleared in gfp mask + * * %FGP_NOWAIT - Don't get blocked by page lock * * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even * if the %GFP flags specified for %FGP_CREAT are atomic. * * If there is a page cache page, it is returned with an increased refcount. * - * Return: The found folio or %NULL otherwise. + * Return: The found page or %NULL otherwise. */ -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp) +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp_mask) { - struct folio *folio; + struct page *page; repeat: - folio = mapping_get_entry(mapping, index); - if (xa_is_value(folio)) { + page = mapping_get_entry(mapping, index); + if (xa_is_value(page)) { if (fgp_flags & FGP_ENTRY) - return folio; - folio = NULL; + return page; + page = NULL; } - if (!folio) + if (!page) goto no_page; if (fgp_flags & FGP_LOCK) { if (fgp_flags & FGP_NOWAIT) { - if (!folio_trylock(folio)) { - folio_put(folio); + if (!trylock_page(page)) { + put_page(page); return NULL; } } else { - folio_lock(folio); + lock_page(page); } /* Has the page been truncated? */ - if (unlikely(folio->mapping != mapping)) { - folio_unlock(folio); - folio_put(folio); + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + put_page(page); goto repeat; } - VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); + VM_BUG_ON_PAGE(!thp_contains(page, index), page); } if (fgp_flags & FGP_ACCESSED) - folio_mark_accessed(folio); + mark_page_accessed(page); else if (fgp_flags & FGP_WRITE) { /* Clear idle flag for buffer write */ - if (folio_test_idle(folio)) - folio_clear_idle(folio); + if (page_is_idle(page)) + clear_page_idle(page); } + if (!(fgp_flags & FGP_HEAD)) + page = find_subpage(page, index); - if (fgp_flags & FGP_STABLE) - folio_wait_stable(folio); no_page: - if (!folio && (fgp_flags & FGP_CREAT)) { + if (!page && (fgp_flags & FGP_CREAT)) { int err; if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping)) - gfp |= __GFP_WRITE; + gfp_mask |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) - gfp &= ~__GFP_FS; + gfp_mask &= ~__GFP_FS; - folio = filemap_alloc_folio(gfp, 0); - if (!folio) + page = __page_cache_alloc(gfp_mask); + if (!page) return NULL; if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP)))) @@ -1996,58 +1946,59 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, /* Init accessed so avoid atomic mark_page_accessed later */ if (fgp_flags & FGP_ACCESSED) - __folio_set_referenced(folio); + __SetPageReferenced(page); - err = filemap_add_folio(mapping, folio, index, gfp); + err = add_to_page_cache_lru(page, mapping, index, gfp_mask); if (unlikely(err)) { - folio_put(folio); - folio = NULL; + put_page(page); + page = NULL; if (err == -EEXIST) goto repeat; } /* - * filemap_add_folio locks the page, and for mmap - * we expect an unlocked page. + * add_to_page_cache_lru locks the page, and for mmap we expect + * an unlocked page. */ - if (folio && (fgp_flags & FGP_FOR_MMAP)) - folio_unlock(folio); + if (page && (fgp_flags & FGP_FOR_MMAP)) + unlock_page(page); } - return folio; + return page; } -EXPORT_SYMBOL(__filemap_get_folio); +EXPORT_SYMBOL(pagecache_get_page); -static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max, +static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max, xa_mark_t mark) { - struct folio *folio; + struct page *page; retry: if (mark == XA_PRESENT) - folio = xas_find(xas, max); + page = xas_find(xas, max); else - folio = xas_find_marked(xas, max, mark); + page = xas_find_marked(xas, max, mark); - if (xas_retry(xas, folio)) + if (xas_retry(xas, page)) goto retry; /* * A shadow entry of a recently evicted page, a swap * entry from shmem/tmpfs or a DAX entry. Return it * without attempting to raise page count. */ - if (!folio || xa_is_value(folio)) - return folio; + if (!page || xa_is_value(page)) + return page; - if (!folio_try_get_rcu(folio)) + if (!page_cache_get_speculative(page)) goto reset; - if (unlikely(folio != xas_reload(xas))) { - folio_put(folio); + /* Has the page moved or been split? */ + if (unlikely(page != xas_reload(xas))) { + put_page(page); goto reset; } - return folio; + return page; reset: xas_reset(xas); goto retry; @@ -2058,36 +2009,56 @@ static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max, * @mapping: The address_space to search * @start: The starting page cache index * @end: The final page index (inclusive). - * @fbatch: Where the resulting entries are placed. + * @pvec: Where the resulting entries are placed. * @indices: The cache indices corresponding to the entries in @entries * * find_get_entries() will search for and return a batch of entries in - * the mapping. The entries are placed in @fbatch. find_get_entries() - * takes a reference on any actual folios it returns. + * the mapping. The entries are placed in @pvec. find_get_entries() + * takes a reference on any actual pages it returns. * - * The entries have ascending indexes. The indices may not be consecutive - * due to not-present entries or large folios. + * The search returns a group of mapping-contiguous page cache entries + * with ascending indexes. There may be holes in the indices due to + * not-present pages. * - * Any shadow entries of evicted folios, or swap entries from + * Any shadow entries of evicted pages, or swap entries from * shmem/tmpfs, are included in the returned array. * - * Return: The number of entries which were found. + * If it finds a Transparent Huge Page, head or tail, find_get_entries() + * stops at that page: the caller is likely to have a better way to handle + * the compound page as a whole, and then skip its extent, than repeatedly + * calling find_get_entries() to return all its tails. + * + * Return: the number of pages and shadow entries which were found. */ unsigned find_get_entries(struct address_space *mapping, pgoff_t start, - pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) + pgoff_t end, struct pagevec *pvec, pgoff_t *indices) { XA_STATE(xas, &mapping->i_pages, start); - struct folio *folio; + struct page *page; + unsigned int ret = 0; + unsigned nr_entries = PAGEVEC_SIZE; rcu_read_lock(); - while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) { - indices[fbatch->nr] = xas.xa_index; - if (!folio_batch_add(fbatch, folio)) + while ((page = find_get_entry(&xas, end, XA_PRESENT))) { + /* + * Terminate early on finding a THP, to allow the caller to + * handle it all at once; but continue if this is hugetlbfs. + */ + if (!xa_is_value(page) && PageTransHuge(page) && + !PageHuge(page)) { + page = find_subpage(page, xas.xa_index); + nr_entries = ret + 1; + } + + indices[ret] = xas.xa_index; + pvec->pages[ret] = page; + if (++ret == nr_entries) break; } rcu_read_unlock(); - return folio_batch_count(fbatch); + pvec->nr = ret; + return ret; } /** @@ -2095,64 +2066,63 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start, * @mapping: The address_space to search. * @start: The starting page cache index. * @end: The final page index (inclusive). - * @fbatch: Where the resulting entries are placed. - * @indices: The cache indices of the entries in @fbatch. + * @pvec: Where the resulting entries are placed. + * @indices: The cache indices of the entries in @pvec. * * find_lock_entries() will return a batch of entries from @mapping. - * Swap, shadow and DAX entries are included. Folios are returned - * locked and with an incremented refcount. Folios which are locked - * by somebody else or under writeback are skipped. Folios which are - * partially outside the range are not returned. + * Swap, shadow and DAX entries are included. Pages are returned + * locked and with an incremented refcount. Pages which are locked by + * somebody else or under writeback are skipped. Only the head page of + * a THP is returned. Pages which are partially outside the range are + * not returned. * * The entries have ascending indexes. The indices may not be consecutive - * due to not-present entries, large folios, folios which could not be - * locked or folios under writeback. + * due to not-present entries, THP pages, pages which could not be locked + * or pages under writeback. * * Return: The number of entries which were found. */ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, - pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) + pgoff_t end, struct pagevec *pvec, pgoff_t *indices) { XA_STATE(xas, &mapping->i_pages, start); - struct folio *folio; + struct page *page; rcu_read_lock(); - while ((folio = find_get_entry(&xas, end, XA_PRESENT))) { - if (!xa_is_value(folio)) { - if (folio->index < start) + while ((page = find_get_entry(&xas, end, XA_PRESENT))) { + if (!xa_is_value(page)) { + if (page->index < start) goto put; - if (folio->index + folio_nr_pages(folio) - 1 > end) + if (page->index + thp_nr_pages(page) - 1 > end) goto put; - if (!folio_trylock(folio)) + if (!trylock_page(page)) goto put; - if (folio->mapping != mapping || - folio_test_writeback(folio)) + if (page->mapping != mapping || PageWriteback(page)) goto unlock; - VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index), - folio); + VM_BUG_ON_PAGE(!thp_contains(page, xas.xa_index), + page); } - indices[fbatch->nr] = xas.xa_index; - if (!folio_batch_add(fbatch, folio)) + indices[pvec->nr] = xas.xa_index; + if (!pagevec_add(pvec, page)) break; - continue; + goto next; unlock: - folio_unlock(folio); + unlock_page(page); put: - folio_put(folio); + put_page(page); +next: + if (!xa_is_value(page) && PageTransHuge(page)) { + unsigned int nr_pages = thp_nr_pages(page); + + /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */ + xas_set(&xas, page->index + nr_pages); + if (xas.xa_index < nr_pages) + break; + } } rcu_read_unlock(); - return folio_batch_count(fbatch); -} - -static inline -bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max) -{ - if (!folio_test_large(folio) || folio_test_hugetlb(folio)) - return false; - if (index >= max) - return false; - return index < folio->index + folio_nr_pages(folio) - 1; + return pagevec_count(pvec); } /** @@ -2181,29 +2151,23 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, struct page **pages) { XA_STATE(xas, &mapping->i_pages, *start); - struct folio *folio; + struct page *page; unsigned ret = 0; if (unlikely(!nr_pages)) return 0; rcu_read_lock(); - while ((folio = find_get_entry(&xas, end, XA_PRESENT))) { + while ((page = find_get_entry(&xas, end, XA_PRESENT))) { /* Skip over shadow, swap and DAX entries */ - if (xa_is_value(folio)) + if (xa_is_value(page)) continue; -again: - pages[ret] = folio_file_page(folio, xas.xa_index); + pages[ret] = find_subpage(page, xas.xa_index); if (++ret == nr_pages) { *start = xas.xa_index + 1; goto out; } - if (folio_more_pages(folio, xas.xa_index, end)) { - xas.xa_index++; - folio_ref_inc(folio); - goto again; - } } /* @@ -2238,41 +2202,36 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, unsigned int nr_pages, struct page **pages) { XA_STATE(xas, &mapping->i_pages, index); - struct folio *folio; + struct page *page; unsigned int ret = 0; if (unlikely(!nr_pages)) return 0; rcu_read_lock(); - for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) { - if (xas_retry(&xas, folio)) + for (page = xas_load(&xas); page; page = xas_next(&xas)) { + if (xas_retry(&xas, page)) continue; /* * If the entry has been swapped out, we can stop looking. * No current caller is looking for DAX entries. */ - if (xa_is_value(folio)) + if (xa_is_value(page)) break; - if (!folio_try_get_rcu(folio)) + if (!page_cache_get_speculative(page)) goto retry; - if (unlikely(folio != xas_reload(&xas))) + /* Has the page moved or been split? */ + if (unlikely(page != xas_reload(&xas))) goto put_page; -again: - pages[ret] = folio_file_page(folio, xas.xa_index); + pages[ret] = find_subpage(page, xas.xa_index); if (++ret == nr_pages) break; - if (folio_more_pages(folio, xas.xa_index, ULONG_MAX)) { - xas.xa_index++; - folio_ref_inc(folio); - goto again; - } continue; put_page: - folio_put(folio); + put_page(page); retry: xas_reset(&xas); } @@ -2301,25 +2260,25 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, struct page **pages) { XA_STATE(xas, &mapping->i_pages, *index); - struct folio *folio; + struct page *page; unsigned ret = 0; if (unlikely(!nr_pages)) return 0; rcu_read_lock(); - while ((folio = find_get_entry(&xas, end, tag))) { + while ((page = find_get_entry(&xas, end, tag))) { /* * Shadow entries should never be tagged, but this iteration * is lockless so there is a window for page reclaim to evict * a page we saw tagged. Skip over it. */ - if (xa_is_value(folio)) + if (xa_is_value(page)) continue; - pages[ret] = &folio->page; + pages[ret] = page; if (++ret == nr_pages) { - *index = folio->index + folio_nr_pages(folio); + *index = page->index + thp_nr_pages(page); goto out; } } @@ -2362,50 +2321,56 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra) } /* - * filemap_get_read_batch - Get a batch of folios for read + * filemap_get_read_batch - Get a batch of pages for read * - * Get a batch of folios which represent a contiguous range of bytes in - * the file. No exceptional entries will be returned. If @index is in - * the middle of a folio, the entire folio will be returned. The last - * folio in the batch may have the readahead flag set or the uptodate flag - * clear so that the caller can take the appropriate action. + * Get a batch of pages which represent a contiguous range of bytes + * in the file. No tail pages will be returned. If @index is in the + * middle of a THP, the entire THP will be returned. The last page in + * the batch may have Readahead set or be not Uptodate so that the + * caller can take the appropriate action. */ static void filemap_get_read_batch(struct address_space *mapping, - pgoff_t index, pgoff_t max, struct folio_batch *fbatch) + pgoff_t index, pgoff_t max, struct pagevec *pvec) { XA_STATE(xas, &mapping->i_pages, index); - struct folio *folio; + struct page *head; rcu_read_lock(); - for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) { - if (xas_retry(&xas, folio)) + for (head = xas_load(&xas); head; head = xas_next(&xas)) { + if (xas_retry(&xas, head)) continue; - if (xas.xa_index > max || xa_is_value(folio)) + if (xas.xa_index > max || xa_is_value(head)) break; - if (!folio_try_get_rcu(folio)) + if (!page_cache_get_speculative(head)) goto retry; - if (unlikely(folio != xas_reload(&xas))) - goto put_folio; + /* Has the page moved or been split? */ + if (unlikely(head != xas_reload(&xas))) + goto put_page; - if (!folio_batch_add(fbatch, folio)) + if (!pagevec_add(pvec, head)) break; - if (!folio_test_uptodate(folio)) + if (!PageUptodate(head)) break; - if (folio_test_readahead(folio)) + if (PageReadahead(head)) break; - xas_advance(&xas, folio->index + folio_nr_pages(folio) - 1); + if (PageHead(head)) { + xas_set(&xas, head->index + thp_nr_pages(head)); + /* Handle wrap correctly */ + if (xas.xa_index - 1 >= max) + break; + } continue; -put_folio: - folio_put(folio); +put_page: + put_page(head); retry: xas_reset(&xas); } rcu_read_unlock(); } -static int filemap_read_folio(struct file *file, struct address_space *mapping, - struct folio *folio) +static int filemap_read_page(struct file *file, struct address_space *mapping, + struct page *page) { int error; @@ -2414,50 +2379,50 @@ static int filemap_read_folio(struct file *file, struct address_space *mapping, * eg. multipath errors. PG_error will be set again if readpage * fails. */ - folio_clear_error(folio); + ClearPageError(page); /* Start the actual read. The read will unlock the page. */ - error = mapping->a_ops->readpage(file, &folio->page); + error = mapping->a_ops->readpage(file, page); if (error) return error; - error = folio_wait_locked_killable(folio); + error = wait_on_page_locked_killable(page); if (error) return error; - if (folio_test_uptodate(folio)) + if (PageUptodate(page)) return 0; shrink_readahead_size_eio(&file->f_ra); return -EIO; } static bool filemap_range_uptodate(struct address_space *mapping, - loff_t pos, struct iov_iter *iter, struct folio *folio) + loff_t pos, struct iov_iter *iter, struct page *page) { int count; - if (folio_test_uptodate(folio)) + if (PageUptodate(page)) return true; /* pipes can't handle partially uptodate pages */ if (iov_iter_is_pipe(iter)) return false; if (!mapping->a_ops->is_partially_uptodate) return false; - if (mapping->host->i_blkbits >= folio_shift(folio)) + if (mapping->host->i_blkbits >= (PAGE_SHIFT + thp_order(page))) return false; count = iter->count; - if (folio_pos(folio) > pos) { - count -= folio_pos(folio) - pos; + if (page_offset(page) > pos) { + count -= page_offset(page) - pos; pos = 0; } else { - pos -= folio_pos(folio); + pos -= page_offset(page); } - return mapping->a_ops->is_partially_uptodate(&folio->page, pos, count); + return mapping->a_ops->is_partially_uptodate(page, pos, count); } static int filemap_update_page(struct kiocb *iocb, struct address_space *mapping, struct iov_iter *iter, - struct folio *folio) + struct page *page) { int error; @@ -2468,113 +2433,107 @@ static int filemap_update_page(struct kiocb *iocb, filemap_invalidate_lock_shared(mapping); } - if (!folio_trylock(folio)) { + if (!trylock_page(page)) { error = -EAGAIN; if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) goto unlock_mapping; if (!(iocb->ki_flags & IOCB_WAITQ)) { filemap_invalidate_unlock_shared(mapping); - /* - * This is where we usually end up waiting for a - * previously submitted readahead to finish. - */ - folio_put_wait_locked(folio, TASK_KILLABLE); + put_and_wait_on_page_locked(page, TASK_KILLABLE); return AOP_TRUNCATED_PAGE; } - error = __folio_lock_async(folio, iocb->ki_waitq); + error = __lock_page_async(page, iocb->ki_waitq); if (error) goto unlock_mapping; } error = AOP_TRUNCATED_PAGE; - if (!folio->mapping) + if (!page->mapping) goto unlock; error = 0; - if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, folio)) + if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page)) goto unlock; error = -EAGAIN; if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) goto unlock; - error = filemap_read_folio(iocb->ki_filp, mapping, folio); + error = filemap_read_page(iocb->ki_filp, mapping, page); goto unlock_mapping; unlock: - folio_unlock(folio); + unlock_page(page); unlock_mapping: filemap_invalidate_unlock_shared(mapping); if (error == AOP_TRUNCATED_PAGE) - folio_put(folio); + put_page(page); return error; } -static int filemap_create_folio(struct file *file, +static int filemap_create_page(struct file *file, struct address_space *mapping, pgoff_t index, - struct folio_batch *fbatch) + struct pagevec *pvec) { - struct folio *folio; + struct page *page; int error; - folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0); - if (!folio) + page = page_cache_alloc(mapping); + if (!page) return -ENOMEM; /* - * Protect against truncate / hole punch. Grabbing invalidate_lock - * here assures we cannot instantiate and bring uptodate new - * pagecache folios after evicting page cache during truncate - * and before actually freeing blocks. Note that we could - * release invalidate_lock after inserting the folio into - * the page cache as the locked folio would then be enough to - * synchronize with hole punching. But there are code paths - * such as filemap_update_page() filling in partially uptodate - * pages or ->readpages() that need to hold invalidate_lock - * while mapping blocks for IO so let's hold the lock here as - * well to keep locking rules simple. + * Protect against truncate / hole punch. Grabbing invalidate_lock here + * assures we cannot instantiate and bring uptodate new pagecache pages + * after evicting page cache during truncate and before actually + * freeing blocks. Note that we could release invalidate_lock after + * inserting the page into page cache as the locked page would then be + * enough to synchronize with hole punching. But there are code paths + * such as filemap_update_page() filling in partially uptodate pages or + * ->readpages() that need to hold invalidate_lock while mapping blocks + * for IO so let's hold the lock here as well to keep locking rules + * simple. */ filemap_invalidate_lock_shared(mapping); - error = filemap_add_folio(mapping, folio, index, + error = add_to_page_cache_lru(page, mapping, index, mapping_gfp_constraint(mapping, GFP_KERNEL)); if (error == -EEXIST) error = AOP_TRUNCATED_PAGE; if (error) goto error; - error = filemap_read_folio(file, mapping, folio); + error = filemap_read_page(file, mapping, page); if (error) goto error; filemap_invalidate_unlock_shared(mapping); - folio_batch_add(fbatch, folio); + pagevec_add(pvec, page); return 0; error: filemap_invalidate_unlock_shared(mapping); - folio_put(folio); + put_page(page); return error; } static int filemap_readahead(struct kiocb *iocb, struct file *file, - struct address_space *mapping, struct folio *folio, + struct address_space *mapping, struct page *page, pgoff_t last_index) { - DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, folio->index); - if (iocb->ki_flags & IOCB_NOIO) return -EAGAIN; - page_cache_async_ra(&ractl, folio, last_index - folio->index); + page_cache_async_readahead(mapping, &file->f_ra, file, page, + page->index, last_index - page->index); return 0; } static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, - struct folio_batch *fbatch) + struct pagevec *pvec) { struct file *filp = iocb->ki_filp; struct address_space *mapping = filp->f_mapping; struct file_ra_state *ra = &filp->f_ra; pgoff_t index = iocb->ki_pos >> PAGE_SHIFT; pgoff_t last_index; - struct folio *folio; + struct page *page; int err = 0; last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE); @@ -2582,35 +2541,34 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, if (fatal_signal_pending(current)) return -EINTR; - filemap_get_read_batch(mapping, index, last_index, fbatch); - if (!folio_batch_count(fbatch)) { + filemap_get_read_batch(mapping, index, last_index, pvec); + if (!pagevec_count(pvec)) { if (iocb->ki_flags & IOCB_NOIO) return -EAGAIN; page_cache_sync_readahead(mapping, ra, filp, index, last_index - index); - filemap_get_read_batch(mapping, index, last_index, fbatch); + filemap_get_read_batch(mapping, index, last_index, pvec); } - if (!folio_batch_count(fbatch)) { + if (!pagevec_count(pvec)) { if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ)) return -EAGAIN; - err = filemap_create_folio(filp, mapping, - iocb->ki_pos >> PAGE_SHIFT, fbatch); + err = filemap_create_page(filp, mapping, + iocb->ki_pos >> PAGE_SHIFT, pvec); if (err == AOP_TRUNCATED_PAGE) goto retry; return err; } - folio = fbatch->folios[folio_batch_count(fbatch) - 1]; - if (folio_test_readahead(folio)) { - err = filemap_readahead(iocb, filp, mapping, folio, last_index); + page = pvec->pages[pagevec_count(pvec) - 1]; + if (PageReadahead(page)) { + err = filemap_readahead(iocb, filp, mapping, page, last_index); if (err) goto err; } - if (!folio_test_uptodate(folio)) { - if ((iocb->ki_flags & IOCB_WAITQ) && - folio_batch_count(fbatch) > 1) + if (!PageUptodate(page)) { + if ((iocb->ki_flags & IOCB_WAITQ) && pagevec_count(pvec) > 1) iocb->ki_flags |= IOCB_NOWAIT; - err = filemap_update_page(iocb, mapping, iter, folio); + err = filemap_update_page(iocb, mapping, iter, page); if (err) goto err; } @@ -2618,8 +2576,8 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, return 0; err: if (err < 0) - folio_put(folio); - if (likely(--fbatch->nr)) + put_page(page); + if (likely(--pvec->nr)) return 0; if (err == AOP_TRUNCATED_PAGE) goto retry; @@ -2646,7 +2604,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, struct file_ra_state *ra = &filp->f_ra; struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; - struct folio_batch fbatch; + struct pagevec pvec; int i, error = 0; bool writably_mapped; loff_t isize, end_offset; @@ -2657,7 +2615,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, return 0; iov_iter_truncate(iter, inode->i_sb->s_maxbytes); - folio_batch_init(&fbatch); + pagevec_init(&pvec); do { cond_resched(); @@ -2670,10 +2628,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, if ((iocb->ki_flags & IOCB_WAITQ) && already_read) iocb->ki_flags |= IOCB_NOWAIT; - if (unlikely(iocb->ki_pos >= i_size_read(inode))) - break; - - error = filemap_get_pages(iocb, iter, &fbatch); + error = filemap_get_pages(iocb, iter, &pvec); if (error < 0) break; @@ -2687,7 +2642,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, */ isize = i_size_read(inode); if (unlikely(iocb->ki_pos >= isize)) - goto put_folios; + goto put_pages; end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count); /* @@ -2702,29 +2657,33 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, */ if (iocb->ki_pos >> PAGE_SHIFT != ra->prev_pos >> PAGE_SHIFT) - folio_mark_accessed(fbatch.folios[0]); + mark_page_accessed(pvec.pages[0]); - for (i = 0; i < folio_batch_count(&fbatch); i++) { - struct folio *folio = fbatch.folios[i]; - size_t fsize = folio_size(folio); - size_t offset = iocb->ki_pos & (fsize - 1); + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + size_t page_size = thp_size(page); + size_t offset = iocb->ki_pos & (page_size - 1); size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos, - fsize - offset); + page_size - offset); size_t copied; - if (end_offset < folio_pos(folio)) + if (end_offset < page_offset(page)) break; if (i > 0) - folio_mark_accessed(folio); + mark_page_accessed(page); /* - * If users can be writing to this folio using arbitrary - * virtual addresses, take care of potential aliasing - * before reading the folio on the kernel side. + * If users can be writing to this page using arbitrary + * virtual addresses, take care about potential aliasing + * before reading the page on the kernel side. */ - if (writably_mapped) - flush_dcache_folio(folio); + if (writably_mapped) { + int j; - copied = copy_folio_to_iter(folio, offset, bytes, iter); + for (j = 0; j < thp_nr_pages(page); j++) + flush_dcache_page(page + j); + } + + copied = copy_page_to_iter(page, offset, bytes, iter); already_read += copied; iocb->ki_pos += copied; @@ -2735,10 +2694,10 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, break; } } -put_folios: - for (i = 0; i < folio_batch_count(&fbatch); i++) - folio_put(fbatch.folios[i]); - folio_batch_init(&fbatch); +put_pages: + for (i = 0; i < pagevec_count(&pvec); i++) + put_page(pvec.pages[i]); + pagevec_reinit(&pvec); } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error); file_accessed(filp); @@ -2781,7 +2740,9 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; + loff_t size; + size = i_size_read(inode); if (iocb->ki_flags & IOCB_NOWAIT) { if (filemap_range_needs_writeback(mapping, iocb->ki_pos, iocb->ki_pos + count - 1)) @@ -2813,9 +2774,8 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) * the rest of the read. Buffered reads will not work for * DAX files, so don't bother trying. */ - if (retval < 0 || !count || IS_DAX(inode)) - return retval; - if (iocb->ki_pos >= i_size_read(inode)) + if (retval < 0 || !count || iocb->ki_pos >= size || + IS_DAX(inode)) return retval; } @@ -2823,44 +2783,44 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) } EXPORT_SYMBOL(generic_file_read_iter); -static inline loff_t folio_seek_hole_data(struct xa_state *xas, - struct address_space *mapping, struct folio *folio, +static inline loff_t page_seek_hole_data(struct xa_state *xas, + struct address_space *mapping, struct page *page, loff_t start, loff_t end, bool seek_data) { const struct address_space_operations *ops = mapping->a_ops; size_t offset, bsz = i_blocksize(mapping->host); - if (xa_is_value(folio) || folio_test_uptodate(folio)) + if (xa_is_value(page) || PageUptodate(page)) return seek_data ? start : end; if (!ops->is_partially_uptodate) return seek_data ? end : start; xas_pause(xas); rcu_read_unlock(); - folio_lock(folio); - if (unlikely(folio->mapping != mapping)) + lock_page(page); + if (unlikely(page->mapping != mapping)) goto unlock; - offset = offset_in_folio(folio, start) & ~(bsz - 1); + offset = offset_in_thp(page, start) & ~(bsz - 1); do { - if (ops->is_partially_uptodate(&folio->page, offset, bsz) == - seek_data) + if (ops->is_partially_uptodate(page, offset, bsz) == seek_data) break; start = (start + bsz) & ~(bsz - 1); offset += bsz; - } while (offset < folio_size(folio)); + } while (offset < thp_size(page)); unlock: - folio_unlock(folio); + unlock_page(page); rcu_read_lock(); return start; } -static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio) +static inline +unsigned int seek_page_size(struct xa_state *xas, struct page *page) { - if (xa_is_value(folio)) + if (xa_is_value(page)) return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index); - return folio_size(folio); + return thp_size(page); } /** @@ -2887,15 +2847,15 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT); pgoff_t max = (end - 1) >> PAGE_SHIFT; bool seek_data = (whence == SEEK_DATA); - struct folio *folio; + struct page *page; if (end <= start) return -ENXIO; rcu_read_lock(); - while ((folio = find_get_entry(&xas, max, XA_PRESENT))) { + while ((page = find_get_entry(&xas, max, XA_PRESENT))) { loff_t pos = (u64)xas.xa_index << PAGE_SHIFT; - size_t seek_size; + unsigned int seek_size; if (start < pos) { if (!seek_data) @@ -2903,9 +2863,9 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, start = pos; } - seek_size = seek_folio_size(&xas, folio); - pos = round_up((u64)pos + 1, seek_size); - start = folio_seek_hole_data(&xas, mapping, folio, start, pos, + seek_size = seek_page_size(&xas, page); + pos = round_up(pos + 1, seek_size); + start = page_seek_hole_data(&xas, mapping, page, start, pos, seek_data); if (start < pos) goto unlock; @@ -2913,15 +2873,15 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, break; if (seek_size > PAGE_SIZE) xas_set(&xas, pos >> PAGE_SHIFT); - if (!xa_is_value(folio)) - folio_put(folio); + if (!xa_is_value(page)) + put_page(page); } if (seek_data) start = -ENXIO; unlock: rcu_read_unlock(); - if (folio && !xa_is_value(folio)) - folio_put(folio); + if (page && !xa_is_value(page)) + put_page(page); if (start > end) return end; return start; @@ -2930,21 +2890,20 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, #ifdef CONFIG_MMU #define MMAP_LOTSAMISS (100) /* - * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock + * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock * @vmf - the vm_fault for this fault. - * @folio - the folio to lock. + * @page - the page to lock. * @fpin - the pointer to the file we may pin (or is already pinned). * - * This works similar to lock_folio_or_retry in that it can drop the - * mmap_lock. It differs in that it actually returns the folio locked - * if it returns 1 and 0 if it couldn't lock the folio. If we did have - * to drop the mmap_lock then fpin will point to the pinned file and - * needs to be fput()'ed at a later point. + * This works similar to lock_page_or_retry in that it can drop the mmap_lock. + * It differs in that it actually returns the page locked if it returns 1 and 0 + * if it couldn't lock the page. If we did have to drop the mmap_lock then fpin + * will point to the pinned file and needs to be fput()'ed at a later point. */ -static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio, +static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, struct file **fpin) { - if (folio_trylock(folio)) + if (trylock_page(page)) return 1; /* @@ -2957,7 +2916,7 @@ static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio, *fpin = maybe_unlock_mmap_for_io(vmf, *fpin); if (vmf->flags & FAULT_FLAG_KILLABLE) { - if (__folio_lock_killable(folio)) { + if (__lock_page_killable(page)) { /* * We didn't have the right flags to drop the mmap_lock, * but all fault_handlers only check for fatal signals @@ -2969,11 +2928,11 @@ static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio, return 0; } } else - __folio_lock(folio); - + __lock_page(page); return 1; } + /* * Synchronous readahead happens when we don't even find a page in the page * cache at all. We don't want to perform IO under the mmap sem, so if we have @@ -3032,25 +2991,25 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) * was pinned if we have to drop the mmap_lock in order to do IO. */ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, - struct folio *folio) + struct page *page) { struct file *file = vmf->vma->vm_file; struct file_ra_state *ra = &file->f_ra; - DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, vmf->pgoff); + struct address_space *mapping = file->f_mapping; struct file *fpin = NULL; unsigned int mmap_miss; + pgoff_t offset = vmf->pgoff; /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) return fpin; - mmap_miss = READ_ONCE(ra->mmap_miss); if (mmap_miss) WRITE_ONCE(ra->mmap_miss, --mmap_miss); - - if (folio_test_readahead(folio)) { + if (PageReadahead(page)) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); - page_cache_async_ra(&ractl, folio, ra->ra_pages); + page_cache_async_readahead(mapping, ra, file, + page, offset, ra->ra_pages); } return fpin; } @@ -3069,7 +3028,7 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, * vma->vm_mm->mmap_lock must be held on entry. * * If our return value has VM_FAULT_RETRY set, it's because the mmap_lock - * may be dropped before doing I/O or by lock_folio_maybe_drop_mmap(). + * may be dropped before doing I/O or by lock_page_maybe_drop_mmap(). * * If our return value does not have VM_FAULT_RETRY set, the mmap_lock * has not been released. @@ -3085,27 +3044,28 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) struct file *fpin = NULL; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - pgoff_t max_idx, index = vmf->pgoff; - struct folio *folio; + pgoff_t offset = vmf->pgoff; + pgoff_t max_off; + struct page *page; vm_fault_t ret = 0; bool mapping_locked = false; - max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); - if (unlikely(index >= max_idx)) + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= max_off)) return VM_FAULT_SIGBUS; /* * Do we have something in the page cache already? */ - folio = filemap_get_folio(mapping, index); - if (likely(folio)) { + page = find_get_page(mapping, offset); + if (likely(page)) { /* * We found the page, so try async readahead before waiting for * the lock. */ if (!(vmf->flags & FAULT_FLAG_TRIED)) - fpin = do_async_mmap_readahead(vmf, folio); - if (unlikely(!folio_test_uptodate(folio))) { + fpin = do_async_mmap_readahead(vmf, page); + if (unlikely(!PageUptodate(page))) { filemap_invalidate_lock_shared(mapping); mapping_locked = true; } @@ -3117,17 +3077,17 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) fpin = do_sync_mmap_readahead(vmf); retry_find: /* - * See comment in filemap_create_folio() why we need + * See comment in filemap_create_page() why we need * invalidate_lock */ if (!mapping_locked) { filemap_invalidate_lock_shared(mapping); mapping_locked = true; } - folio = __filemap_get_folio(mapping, index, + page = pagecache_get_page(mapping, offset, FGP_CREAT|FGP_FOR_MMAP, vmf->gfp_mask); - if (!folio) { + if (!page) { if (fpin) goto out_retry; filemap_invalidate_unlock_shared(mapping); @@ -3135,22 +3095,22 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) } } - if (!lock_folio_maybe_drop_mmap(vmf, folio, &fpin)) + if (!lock_page_maybe_drop_mmap(vmf, page, &fpin)) goto out_retry; /* Did it get truncated? */ - if (unlikely(folio->mapping != mapping)) { - folio_unlock(folio); - folio_put(folio); + if (unlikely(compound_head(page)->mapping != mapping)) { + unlock_page(page); + put_page(page); goto retry_find; } - VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); + VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); /* * We have a locked page in the page cache, now we need to check * that it's up-to-date. If not, it is going to be due to an error. */ - if (unlikely(!folio_test_uptodate(folio))) { + if (unlikely(!PageUptodate(page))) { /* * The page was in cache and uptodate and now it is not. * Strange but possible since we didn't hold the page lock all @@ -3158,8 +3118,8 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) * try again. */ if (!mapping_locked) { - folio_unlock(folio); - folio_put(folio); + unlock_page(page); + put_page(page); goto retry_find; } goto page_not_uptodate; @@ -3171,7 +3131,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) * redo the fault. */ if (fpin) { - folio_unlock(folio); + unlock_page(page); goto out_retry; } if (mapping_locked) @@ -3181,14 +3141,14 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) * Found the page and have a reference on it. * We must recheck i_size under page lock. */ - max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); - if (unlikely(index >= max_idx)) { - folio_unlock(folio); - folio_put(folio); + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= max_off)) { + unlock_page(page); + put_page(page); return VM_FAULT_SIGBUS; } - vmf->page = folio_file_page(folio, index); + vmf->page = page; return ret | VM_FAULT_LOCKED; page_not_uptodate: @@ -3199,10 +3159,10 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) * and we need to check for errors. */ fpin = maybe_unlock_mmap_for_io(vmf, fpin); - error = filemap_read_folio(file, mapping, folio); + error = filemap_read_page(file, mapping, page); if (fpin) goto out_retry; - folio_put(folio); + put_page(page); if (!error || error == AOP_TRUNCATED_PAGE) goto retry_find; @@ -3216,8 +3176,8 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) * re-find the vma and come back and find our hopefully still populated * page. */ - if (folio) - folio_put(folio); + if (page) + put_page(page); if (mapping_locked) filemap_invalidate_unlock_shared(mapping); if (fpin) @@ -3238,16 +3198,23 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page) } if (pmd_none(*vmf->pmd) && PageTransHuge(page)) { - vm_fault_t ret = do_set_pmd(vmf, page); - if (!ret) { - /* The page is mapped successfully, reference consumed. */ - unlock_page(page); - return true; - } + vm_fault_t ret = do_set_pmd(vmf, page); + if (!ret) { + /* The page is mapped successfully, reference consumed. */ + unlock_page(page); + return true; + } } - if (pmd_none(*vmf->pmd)) - pmd_install(mm, vmf->pmd, &vmf->prealloc_pte); + if (pmd_none(*vmf->pmd)) { + vmf->ptl = pmd_lock(mm, vmf->pmd); + if (likely(pmd_none(*vmf->pmd))) { + mm_inc_nr_ptes(mm); + pmd_populate(mm, vmf->pmd, vmf->prealloc_pte); + vmf->prealloc_pte = NULL; + } + spin_unlock(vmf->ptl); + } /* See comment in handle_pte_fault() */ if (pmd_devmap_trans_unstable(vmf->pmd)) { @@ -3259,48 +3226,50 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page) return false; } -static struct folio *next_uptodate_page(struct folio *folio, +static struct page *next_uptodate_page(struct page *page, struct address_space *mapping, struct xa_state *xas, pgoff_t end_pgoff) { unsigned long max_idx; do { - if (!folio) + if (!page) return NULL; - if (xas_retry(xas, folio)) + if (xas_retry(xas, page)) continue; - if (xa_is_value(folio)) + if (xa_is_value(page)) continue; - if (folio_test_locked(folio)) + if (PageLocked(page)) continue; - if (!folio_try_get_rcu(folio)) + if (!page_cache_get_speculative(page)) continue; /* Has the page moved or been split? */ - if (unlikely(folio != xas_reload(xas))) + if (unlikely(page != xas_reload(xas))) goto skip; - if (!folio_test_uptodate(folio) || folio_test_readahead(folio)) + if (!PageUptodate(page) || PageReadahead(page)) goto skip; - if (!folio_trylock(folio)) + if (PageHWPoison(page)) goto skip; - if (folio->mapping != mapping) + if (!trylock_page(page)) + goto skip; + if (page->mapping != mapping) goto unlock; - if (!folio_test_uptodate(folio)) + if (!PageUptodate(page)) goto unlock; max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); if (xas->xa_index >= max_idx) goto unlock; - return folio; + return page; unlock: - folio_unlock(folio); + unlock_page(page); skip: - folio_put(folio); - } while ((folio = xas_next_entry(xas, end_pgoff)) != NULL); + put_page(page); + } while ((page = xas_next_entry(xas, end_pgoff)) != NULL); return NULL; } -static inline struct folio *first_map_page(struct address_space *mapping, +static inline struct page *first_map_page(struct address_space *mapping, struct xa_state *xas, pgoff_t end_pgoff) { @@ -3308,7 +3277,7 @@ static inline struct folio *first_map_page(struct address_space *mapping, mapping, xas, end_pgoff); } -static inline struct folio *next_map_page(struct address_space *mapping, +static inline struct page *next_map_page(struct address_space *mapping, struct xa_state *xas, pgoff_t end_pgoff) { @@ -3325,17 +3294,16 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, pgoff_t last_pgoff = start_pgoff; unsigned long addr; XA_STATE(xas, &mapping->i_pages, start_pgoff); - struct folio *folio; - struct page *page; + struct page *head, *page; unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); vm_fault_t ret = 0; rcu_read_lock(); - folio = first_map_page(mapping, &xas, end_pgoff); - if (!folio) + head = first_map_page(mapping, &xas, end_pgoff); + if (!head) goto out; - if (filemap_map_pmd(vmf, &folio->page)) { + if (filemap_map_pmd(vmf, head)) { ret = VM_FAULT_NOPAGE; goto out; } @@ -3343,8 +3311,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl); do { -again: - page = folio_file_page(folio, xas.xa_index); + page = find_subpage(head, xas.xa_index); if (PageHWPoison(page)) goto unlock; @@ -3365,21 +3332,12 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, do_set_pte(vmf, page, addr); /* no need to invalidate: a not-present page won't be cached */ update_mmu_cache(vma, addr, vmf->pte); - if (folio_more_pages(folio, xas.xa_index, end_pgoff)) { - xas.xa_index++; - folio_ref_inc(folio); - goto again; - } - folio_unlock(folio); + unlock_page(head); continue; unlock: - if (folio_more_pages(folio, xas.xa_index, end_pgoff)) { - xas.xa_index++; - goto again; - } - folio_unlock(folio); - folio_put(folio); - } while ((folio = next_map_page(mapping, &xas, end_pgoff)) != NULL); + unlock_page(head); + put_page(head); + } while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL); pte_unmap_unlock(vmf->pte, vmf->ptl); out: rcu_read_unlock(); @@ -3391,24 +3349,24 @@ EXPORT_SYMBOL(filemap_map_pages); vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; - struct folio *folio = page_folio(vmf->page); + struct page *page = vmf->page; vm_fault_t ret = VM_FAULT_LOCKED; sb_start_pagefault(mapping->host->i_sb); file_update_time(vmf->vma->vm_file); - folio_lock(folio); - if (folio->mapping != mapping) { - folio_unlock(folio); + lock_page(page); + if (page->mapping != mapping) { + unlock_page(page); ret = VM_FAULT_NOPAGE; goto out; } /* - * We mark the folio dirty already here so that when freeze is in + * We mark the page dirty already here so that when freeze is in * progress, we are guaranteed that writeback during freezing will - * see the dirty folio and writeprotect it again. + * see the dirty page and writeprotect it again. */ - folio_mark_dirty(folio); - folio_wait_stable(folio); + set_page_dirty(page); + wait_for_stable_page(page); out: sb_end_pagefault(mapping->host->i_sb); return ret; @@ -3461,20 +3419,35 @@ EXPORT_SYMBOL(filemap_page_mkwrite); EXPORT_SYMBOL(generic_file_mmap); EXPORT_SYMBOL(generic_file_readonly_mmap); -static struct folio *do_read_cache_folio(struct address_space *mapping, - pgoff_t index, filler_t filler, void *data, gfp_t gfp) +static struct page *wait_on_page_read(struct page *page) { - struct folio *folio; + if (!IS_ERR(page)) { + wait_on_page_locked(page); + if (!PageUptodate(page)) { + put_page(page); + page = ERR_PTR(-EIO); + } + } + return page; +} + +static struct page *do_read_cache_page(struct address_space *mapping, + pgoff_t index, + int (*filler)(void *, struct page *), + void *data, + gfp_t gfp) +{ + struct page *page; int err; repeat: - folio = filemap_get_folio(mapping, index); - if (!folio) { - folio = filemap_alloc_folio(gfp, 0); - if (!folio) + page = find_get_page(mapping, index); + if (!page) { + page = __page_cache_alloc(gfp); + if (!page) return ERR_PTR(-ENOMEM); - err = filemap_add_folio(mapping, folio, index, gfp); + err = add_to_page_cache_lru(page, mapping, index, gfp); if (unlikely(err)) { - folio_put(folio); + put_page(page); if (err == -EEXIST) goto repeat; /* Presumably ENOMEM for xarray node */ @@ -3483,41 +3456,71 @@ static struct folio *do_read_cache_folio(struct address_space *mapping, filler: if (filler) - err = filler(data, &folio->page); + err = filler(data, page); else - err = mapping->a_ops->readpage(data, &folio->page); + err = mapping->a_ops->readpage(data, page); if (err < 0) { - folio_put(folio); + put_page(page); return ERR_PTR(err); } - folio_wait_locked(folio); - if (!folio_test_uptodate(folio)) { - folio_put(folio); - return ERR_PTR(-EIO); - } - + page = wait_on_page_read(page); + if (IS_ERR(page)) + return page; goto out; } - if (folio_test_uptodate(folio)) + if (PageUptodate(page)) goto out; - if (!folio_trylock(folio)) { - folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE); - goto repeat; - } + /* + * Page is not up to date and may be locked due to one of the following + * case a: Page is being filled and the page lock is held + * case b: Read/write error clearing the page uptodate status + * case c: Truncation in progress (page locked) + * case d: Reclaim in progress + * + * Case a, the page will be up to date when the page is unlocked. + * There is no need to serialise on the page lock here as the page + * is pinned so the lock gives no additional protection. Even if the + * page is truncated, the data is still valid if PageUptodate as + * it's a race vs truncate race. + * Case b, the page will not be up to date + * Case c, the page may be truncated but in itself, the data may still + * be valid after IO completes as it's a read vs truncate race. The + * operation must restart if the page is not uptodate on unlock but + * otherwise serialising on page lock to stabilise the mapping gives + * no additional guarantees to the caller as the page lock is + * released before return. + * Case d, similar to truncation. If reclaim holds the page lock, it + * will be a race with remove_mapping that determines if the mapping + * is valid on unlock but otherwise the data is valid and there is + * no need to serialise with page lock. + * + * As the page lock gives no additional guarantee, we optimistically + * wait on the page to be unlocked and check if it's up to date and + * use the page if it is. Otherwise, the page lock is required to + * distinguish between the different cases. The motivation is that we + * avoid spurious serialisations and wakeups when multiple processes + * wait on the same page for IO to complete. + */ + wait_on_page_locked(page); + if (PageUptodate(page)) + goto out; - /* Folio was truncated from mapping */ - if (!folio->mapping) { - folio_unlock(folio); - folio_put(folio); + /* Distinguish between all the cases under the safety of the lock */ + lock_page(page); + + /* Case c or d, restart the operation */ + if (!page->mapping) { + unlock_page(page); + put_page(page); goto repeat; } /* Someone else locked and filled the page in a very small window */ - if (folio_test_uptodate(folio)) { - folio_unlock(folio); + if (PageUptodate(page)) { + unlock_page(page); goto out; } @@ -3527,16 +3530,16 @@ static struct folio *do_read_cache_folio(struct address_space *mapping, * Clear page error before actual read, PG_error will be * set again if read page fails. */ - folio_clear_error(folio); + ClearPageError(page); goto filler; out: - folio_mark_accessed(folio); - return folio; + mark_page_accessed(page); + return page; } /** - * read_cache_folio - read into page cache, fill it if needed + * read_cache_page - read into page cache, fill it if needed * @mapping: the page's address_space * @index: the page index * @filler: function to perform the read @@ -3551,27 +3554,10 @@ static struct folio *do_read_cache_folio(struct address_space *mapping, * * Return: up to date page on success, ERR_PTR() on failure. */ -struct folio *read_cache_folio(struct address_space *mapping, pgoff_t index, - filler_t filler, void *data) -{ - return do_read_cache_folio(mapping, index, filler, data, - mapping_gfp_mask(mapping)); -} -EXPORT_SYMBOL(read_cache_folio); - -static struct page *do_read_cache_page(struct address_space *mapping, - pgoff_t index, filler_t *filler, void *data, gfp_t gfp) -{ - struct folio *folio; - - folio = do_read_cache_folio(mapping, index, filler, data, gfp); - if (IS_ERR(folio)) - return &folio->page; - return folio_file_page(folio, index); -} - struct page *read_cache_page(struct address_space *mapping, - pgoff_t index, filler_t *filler, void *data) + pgoff_t index, + int (*filler)(void *, struct page *), + void *data) { return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); @@ -3725,6 +3711,28 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) } EXPORT_SYMBOL(generic_file_direct_write); +/* + * Find or create a page at the given pagecache position. Return the locked + * page. This function is specifically for buffered writes. + */ +struct page *grab_cache_page_write_begin(struct address_space *mapping, + pgoff_t index, unsigned flags) +{ + struct page *page; + int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT; + + if (flags & AOP_FLAG_NOFS) + fgp_flags |= FGP_NOFS; + + page = pagecache_get_page(mapping, index, fgp_flags, + mapping_gfp_mask(mapping)); + if (page) + wait_for_stable_page(page); + + return page; +} +EXPORT_SYMBOL(grab_cache_page_write_begin); + ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos) { @@ -3752,7 +3760,7 @@ ssize_t generic_perform_write(struct file *file, * same page as we're writing to, without it being marked * up-to-date. */ - if (unlikely(fault_in_iov_iter_readable(i, bytes))) { + if (unlikely(iov_iter_fault_in_readable(i, bytes))) { status = -EFAULT; break; } @@ -3931,32 +3939,33 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) EXPORT_SYMBOL(generic_file_write_iter); /** - * filemap_release_folio() - Release fs-specific metadata on a folio. - * @folio: The folio which the kernel is trying to free. - * @gfp: Memory allocation flags (and I/O mode). + * try_to_release_page() - release old fs-specific metadata on a page * - * The address_space is trying to release any data attached to a folio - * (presumably at folio->private). + * @page: the page which the kernel is trying to free + * @gfp_mask: memory allocation flags (and I/O mode) * - * This will also be called if the private_2 flag is set on a page, - * indicating that the folio has other metadata associated with it. + * The address_space is to try to release any data against the page + * (presumably at page->private). * - * The @gfp argument specifies whether I/O may be performed to release - * this page (__GFP_IO), and whether the call may block - * (__GFP_RECLAIM & __GFP_FS). + * This may also be called if PG_fscache is set on a page, indicating that the + * page is known to the local caching routines. * - * Return: %true if the release was successful, otherwise %false. + * The @gfp_mask argument specifies whether I/O may be performed to release + * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS). + * + * Return: %1 if the release was successful, otherwise return zero. */ -bool filemap_release_folio(struct folio *folio, gfp_t gfp) +int try_to_release_page(struct page *page, gfp_t gfp_mask) { - struct address_space * const mapping = folio->mapping; + struct address_space * const mapping = page->mapping; - BUG_ON(!folio_test_locked(folio)); - if (folio_test_writeback(folio)) - return false; + BUG_ON(!PageLocked(page)); + if (PageWriteback(page)) + return 0; if (mapping && mapping->a_ops->releasepage) - return mapping->a_ops->releasepage(&folio->page, gfp); - return try_to_free_buffers(&folio->page); + return mapping->a_ops->releasepage(page, gfp_mask); + return try_to_free_buffers(page); } -EXPORT_SYMBOL(filemap_release_folio); + +EXPORT_SYMBOL(try_to_release_page); diff --git a/mm/frontswap.c b/mm/frontswap.c index 6f69b044a8..130e301c5a 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -27,7 +27,27 @@ DEFINE_STATIC_KEY_FALSE(frontswap_enabled_key); * may be registered, but implementations can never deregister. This * is a simple singly-linked list of all registered implementations. */ -static const struct frontswap_ops *frontswap_ops __read_mostly; +static struct frontswap_ops *frontswap_ops __read_mostly; + +#define for_each_frontswap_ops(ops) \ + for ((ops) = frontswap_ops; (ops); (ops) = (ops)->next) + +/* + * If enabled, frontswap_store will return failure even on success. As + * a result, the swap subsystem will always write the page to swap, in + * effect converting frontswap into a writethrough cache. In this mode, + * there is no direct reduction in swap writes, but a frontswap backend + * can unilaterally "reclaim" any pages in use with no data loss, thus + * providing increases control over maximum memory usage due to frontswap. + */ +static bool frontswap_writethrough_enabled __read_mostly; + +/* + * If enabled, the underlying tmem implementation is capable of doing + * exclusive gets, so frontswap_load, on a successful tmem_get must + * mark the page as no longer in frontswap AND mark it dirty. + */ +static bool frontswap_tmem_exclusive_gets_enabled __read_mostly; #ifdef CONFIG_DEBUG_FS /* @@ -94,22 +114,87 @@ static inline void inc_frontswap_invalidates(void) { } /* * Register operations for frontswap */ -int frontswap_register_ops(const struct frontswap_ops *ops) +void frontswap_register_ops(struct frontswap_ops *ops) { - if (frontswap_ops) - return -EINVAL; + DECLARE_BITMAP(a, MAX_SWAPFILES); + DECLARE_BITMAP(b, MAX_SWAPFILES); + struct swap_info_struct *si; + unsigned int i; + + bitmap_zero(a, MAX_SWAPFILES); + bitmap_zero(b, MAX_SWAPFILES); + + spin_lock(&swap_lock); + plist_for_each_entry(si, &swap_active_head, list) { + if (!WARN_ON(!si->frontswap_map)) + set_bit(si->type, a); + } + spin_unlock(&swap_lock); + + /* the new ops needs to know the currently active swap devices */ + for_each_set_bit(i, a, MAX_SWAPFILES) + ops->init(i); + + /* + * Setting frontswap_ops must happen after the ops->init() calls + * above; cmpxchg implies smp_mb() which will ensure the init is + * complete at this point. + */ + do { + ops->next = frontswap_ops; + } while (cmpxchg(&frontswap_ops, ops->next, ops) != ops->next); - frontswap_ops = ops; static_branch_inc(&frontswap_enabled_key); - return 0; + + spin_lock(&swap_lock); + plist_for_each_entry(si, &swap_active_head, list) { + if (si->frontswap_map) + set_bit(si->type, b); + } + spin_unlock(&swap_lock); + + /* + * On the very unlikely chance that a swap device was added or + * removed between setting the "a" list bits and the ops init + * calls, we re-check and do init or invalidate for any changed + * bits. + */ + if (unlikely(!bitmap_equal(a, b, MAX_SWAPFILES))) { + for (i = 0; i < MAX_SWAPFILES; i++) { + if (!test_bit(i, a) && test_bit(i, b)) + ops->init(i); + else if (test_bit(i, a) && !test_bit(i, b)) + ops->invalidate_area(i); + } + } } +EXPORT_SYMBOL(frontswap_register_ops); + +/* + * Enable/disable frontswap writethrough (see above). + */ +void frontswap_writethrough(bool enable) +{ + frontswap_writethrough_enabled = enable; +} +EXPORT_SYMBOL(frontswap_writethrough); + +/* + * Enable/disable frontswap exclusive gets (see above). + */ +void frontswap_tmem_exclusive_gets(bool enable) +{ + frontswap_tmem_exclusive_gets_enabled = enable; +} +EXPORT_SYMBOL(frontswap_tmem_exclusive_gets); /* * Called when a swap device is swapon'd. */ -void frontswap_init(unsigned type, unsigned long *map) +void __frontswap_init(unsigned type, unsigned long *map) { struct swap_info_struct *sis = swap_info[type]; + struct frontswap_ops *ops; VM_BUG_ON(sis == NULL); @@ -125,16 +210,20 @@ void frontswap_init(unsigned type, unsigned long *map) * p->frontswap set to something valid to work properly. */ frontswap_map_set(sis, map); - frontswap_ops->init(type); -} -static bool __frontswap_test(struct swap_info_struct *sis, + for_each_frontswap_ops(ops) + ops->init(type); +} +EXPORT_SYMBOL(__frontswap_init); + +bool __frontswap_test(struct swap_info_struct *sis, pgoff_t offset) { if (sis->frontswap_map) return test_bit(offset, sis->frontswap_map); return false; } +EXPORT_SYMBOL(__frontswap_test); static inline void __frontswap_set(struct swap_info_struct *sis, pgoff_t offset) @@ -164,6 +253,7 @@ int __frontswap_store(struct page *page) int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); + struct frontswap_ops *ops; VM_BUG_ON(!frontswap_ops); VM_BUG_ON(!PageLocked(page)); @@ -177,19 +267,28 @@ int __frontswap_store(struct page *page) */ if (__frontswap_test(sis, offset)) { __frontswap_clear(sis, offset); - frontswap_ops->invalidate_page(type, offset); + for_each_frontswap_ops(ops) + ops->invalidate_page(type, offset); } - ret = frontswap_ops->store(type, offset, page); + /* Try to store in each implementation, until one succeeds. */ + for_each_frontswap_ops(ops) { + ret = ops->store(type, offset, page); + if (!ret) /* successful store */ + break; + } if (ret == 0) { __frontswap_set(sis, offset); inc_frontswap_succ_stores(); } else { inc_frontswap_failed_stores(); } - + if (frontswap_writethrough_enabled) + /* report failure so swap also writes to swap device */ + ret = -1; return ret; } +EXPORT_SYMBOL(__frontswap_store); /* * "Get" data from frontswap associated with swaptype and offset that were @@ -203,6 +302,7 @@ int __frontswap_load(struct page *page) int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); + struct frontswap_ops *ops; VM_BUG_ON(!frontswap_ops); VM_BUG_ON(!PageLocked(page)); @@ -212,11 +312,21 @@ int __frontswap_load(struct page *page) return -1; /* Try loading from each implementation, until one succeeds. */ - ret = frontswap_ops->load(type, offset, page); - if (ret == 0) + for_each_frontswap_ops(ops) { + ret = ops->load(type, offset, page); + if (!ret) /* successful load */ + break; + } + if (ret == 0) { inc_frontswap_loads(); + if (frontswap_tmem_exclusive_gets_enabled) { + SetPageDirty(page); + __frontswap_clear(sis, offset); + } + } return ret; } +EXPORT_SYMBOL(__frontswap_load); /* * Invalidate any data from frontswap associated with the specified swaptype @@ -225,6 +335,7 @@ int __frontswap_load(struct page *page) void __frontswap_invalidate_page(unsigned type, pgoff_t offset) { struct swap_info_struct *sis = swap_info[type]; + struct frontswap_ops *ops; VM_BUG_ON(!frontswap_ops); VM_BUG_ON(sis == NULL); @@ -232,10 +343,12 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) if (!__frontswap_test(sis, offset)) return; - frontswap_ops->invalidate_page(type, offset); + for_each_frontswap_ops(ops) + ops->invalidate_page(type, offset); __frontswap_clear(sis, offset); inc_frontswap_invalidates(); } +EXPORT_SYMBOL(__frontswap_invalidate_page); /* * Invalidate all data from frontswap associated with all offsets for the @@ -244,6 +357,7 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) void __frontswap_invalidate_area(unsigned type) { struct swap_info_struct *sis = swap_info[type]; + struct frontswap_ops *ops; VM_BUG_ON(!frontswap_ops); VM_BUG_ON(sis == NULL); @@ -251,10 +365,123 @@ void __frontswap_invalidate_area(unsigned type) if (sis->frontswap_map == NULL) return; - frontswap_ops->invalidate_area(type); + for_each_frontswap_ops(ops) + ops->invalidate_area(type); atomic_set(&sis->frontswap_pages, 0); bitmap_zero(sis->frontswap_map, sis->max); } +EXPORT_SYMBOL(__frontswap_invalidate_area); + +static unsigned long __frontswap_curr_pages(void) +{ + unsigned long totalpages = 0; + struct swap_info_struct *si = NULL; + + assert_spin_locked(&swap_lock); + plist_for_each_entry(si, &swap_active_head, list) + totalpages += atomic_read(&si->frontswap_pages); + return totalpages; +} + +static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused, + int *swapid) +{ + int ret = -EINVAL; + struct swap_info_struct *si = NULL; + int si_frontswap_pages; + unsigned long total_pages_to_unuse = total; + unsigned long pages = 0, pages_to_unuse = 0; + + assert_spin_locked(&swap_lock); + plist_for_each_entry(si, &swap_active_head, list) { + si_frontswap_pages = atomic_read(&si->frontswap_pages); + if (total_pages_to_unuse < si_frontswap_pages) { + pages = pages_to_unuse = total_pages_to_unuse; + } else { + pages = si_frontswap_pages; + pages_to_unuse = 0; /* unuse all */ + } + /* ensure there is enough RAM to fetch pages from frontswap */ + if (security_vm_enough_memory_mm(current->mm, pages)) { + ret = -ENOMEM; + continue; + } + vm_unacct_memory(pages); + *unused = pages_to_unuse; + *swapid = si->type; + ret = 0; + break; + } + + return ret; +} + +/* + * Used to check if it's necessary and feasible to unuse pages. + * Return 1 when nothing to do, 0 when need to shrink pages, + * error code when there is an error. + */ +static int __frontswap_shrink(unsigned long target_pages, + unsigned long *pages_to_unuse, + int *type) +{ + unsigned long total_pages = 0, total_pages_to_unuse; + + assert_spin_locked(&swap_lock); + + total_pages = __frontswap_curr_pages(); + if (total_pages <= target_pages) { + /* Nothing to do */ + *pages_to_unuse = 0; + return 1; + } + total_pages_to_unuse = total_pages - target_pages; + return __frontswap_unuse_pages(total_pages_to_unuse, pages_to_unuse, type); +} + +/* + * Frontswap, like a true swap device, may unnecessarily retain pages + * under certain circumstances; "shrink" frontswap is essentially a + * "partial swapoff" and works by calling try_to_unuse to attempt to + * unuse enough frontswap pages to attempt to -- subject to memory + * constraints -- reduce the number of pages in frontswap to the + * number given in the parameter target_pages. + */ +void frontswap_shrink(unsigned long target_pages) +{ + unsigned long pages_to_unuse = 0; + int type, ret; + + /* + * we don't want to hold swap_lock while doing a very + * lengthy try_to_unuse, but swap_list may change + * so restart scan from swap_active_head each time + */ + spin_lock(&swap_lock); + ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type); + spin_unlock(&swap_lock); + if (ret == 0) + try_to_unuse(type, true, pages_to_unuse); + return; +} +EXPORT_SYMBOL(frontswap_shrink); + +/* + * Count and return the number of frontswap pages across all + * swap devices. This is exported so that backend drivers can + * determine current usage without reading debugfs. + */ +unsigned long frontswap_curr_pages(void) +{ + unsigned long totalpages = 0; + + spin_lock(&swap_lock); + totalpages = __frontswap_curr_pages(); + spin_unlock(&swap_lock); + + return totalpages; +} +EXPORT_SYMBOL(frontswap_curr_pages); static int __init init_frontswap(void) { diff --git a/mm/gup.c b/mm/gup.c index a9d4d724ae..52f08e3177 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -667,17 +667,12 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, } retry: if (!pmd_present(pmdval)) { - /* - * Should never reach here, if thp migration is not supported; - * Otherwise, it must be a thp migration entry. - */ - VM_BUG_ON(!thp_migration_supported() || - !is_pmd_migration_entry(pmdval)); - if (likely(!(flags & FOLL_MIGRATION))) return no_page_table(vma, flags); - - pmd_migration_entry_wait(mm, pmd); + VM_BUG_ON(thp_migration_supported() && + !is_pmd_migration_entry(pmdval)); + if (is_pmd_migration_entry(pmdval)) + pmd_migration_entry_wait(mm, pmd); pmdval = READ_ONCE(*pmd); /* * MADV_DONTNEED may convert the pmd to null because @@ -948,8 +943,6 @@ static int faultin_page(struct vm_area_struct *vma, /* mlock all present pages, but do not fault in new pages */ if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK) return -ENOENT; - if (*flags & FOLL_NOFAULT) - return -EFAULT; if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (*flags & FOLL_REMOTE) @@ -1688,143 +1681,6 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, } #endif /* !CONFIG_MMU */ -/** - * fault_in_writeable - fault in userspace address range for writing - * @uaddr: start of address range - * @size: size of address range - * - * Returns the number of bytes not faulted in (like copy_to_user() and - * copy_from_user()). - */ -size_t fault_in_writeable(char __user *uaddr, size_t size) -{ - char __user *start = uaddr, *end; - - if (unlikely(size == 0)) - return 0; - if (!user_write_access_begin(uaddr, size)) - return size; - if (!PAGE_ALIGNED(uaddr)) { - unsafe_put_user(0, uaddr, out); - uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr); - } - end = (char __user *)PAGE_ALIGN((unsigned long)start + size); - if (unlikely(end < start)) - end = NULL; - while (uaddr != end) { - unsafe_put_user(0, uaddr, out); - uaddr += PAGE_SIZE; - } - -out: - user_write_access_end(); - if (size > uaddr - start) - return size - (uaddr - start); - return 0; -} -EXPORT_SYMBOL(fault_in_writeable); - -/* - * fault_in_safe_writeable - fault in an address range for writing - * @uaddr: start of address range - * @size: length of address range - * - * Faults in an address range using get_user_pages, i.e., without triggering - * hardware page faults. This is primarily useful when we already know that - * some or all of the pages in the address range aren't in memory. - * - * Other than fault_in_writeable(), this function is non-destructive. - * - * Note that we don't pin or otherwise hold the pages referenced that we fault - * in. There's no guarantee that they'll stay in memory for any duration of - * time. - * - * Returns the number of bytes not faulted in, like copy_to_user() and - * copy_from_user(). - */ -size_t fault_in_safe_writeable(const char __user *uaddr, size_t size) -{ - unsigned long start = (unsigned long)untagged_addr(uaddr); - unsigned long end, nstart, nend; - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma = NULL; - int locked = 0; - - nstart = start & PAGE_MASK; - end = PAGE_ALIGN(start + size); - if (end < nstart) - end = 0; - for (; nstart != end; nstart = nend) { - unsigned long nr_pages; - long ret; - - if (!locked) { - locked = 1; - mmap_read_lock(mm); - vma = find_vma(mm, nstart); - } else if (nstart >= vma->vm_end) - vma = vma->vm_next; - if (!vma || vma->vm_start >= end) - break; - nend = end ? min(end, vma->vm_end) : vma->vm_end; - if (vma->vm_flags & (VM_IO | VM_PFNMAP)) - continue; - if (nstart < vma->vm_start) - nstart = vma->vm_start; - nr_pages = (nend - nstart) / PAGE_SIZE; - ret = __get_user_pages_locked(mm, nstart, nr_pages, - NULL, NULL, &locked, - FOLL_TOUCH | FOLL_WRITE); - if (ret <= 0) - break; - nend = nstart + ret * PAGE_SIZE; - } - if (locked) - mmap_read_unlock(mm); - if (nstart == end) - return 0; - return size - min_t(size_t, nstart - start, size); -} -EXPORT_SYMBOL(fault_in_safe_writeable); - -/** - * fault_in_readable - fault in userspace address range for reading - * @uaddr: start of user address range - * @size: size of user address range - * - * Returns the number of bytes not faulted in (like copy_to_user() and - * copy_from_user()). - */ -size_t fault_in_readable(const char __user *uaddr, size_t size) -{ - const char __user *start = uaddr, *end; - volatile char c; - - if (unlikely(size == 0)) - return 0; - if (!user_read_access_begin(uaddr, size)) - return size; - if (!PAGE_ALIGNED(uaddr)) { - unsafe_get_user(c, uaddr, out); - uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr); - } - end = (const char __user *)PAGE_ALIGN((unsigned long)start + size); - if (unlikely(end < start)) - end = NULL; - while (uaddr != end) { - unsafe_get_user(c, uaddr, out); - uaddr += PAGE_SIZE; - } - -out: - user_read_access_end(); - (void)c; - if (size > uaddr - start) - return size - (uaddr - start); - return 0; -} -EXPORT_SYMBOL(fault_in_readable); - /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address @@ -2397,6 +2253,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, { int nr_start = *nr; struct dev_pagemap *pgmap = NULL; + int ret = 1; do { struct page *page = pfn_to_page(pfn); @@ -2404,12 +2261,14 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, pgmap = get_dev_pagemap(pfn, pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, flags, pages); + ret = 0; break; } SetPageReferenced(page); pages[*nr] = page; if (unlikely(!try_grab_page(page, flags))) { undo_dev_pagemap(nr, nr_start, flags, pages); + ret = 0; break; } (*nr)++; @@ -2417,7 +2276,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, } while (addr += PAGE_SIZE, addr != end); put_dev_pagemap(pgmap); - return addr == end; + return ret; } static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, @@ -2874,7 +2733,7 @@ static int internal_get_user_pages_fast(unsigned long start, if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | FOLL_FORCE | FOLL_PIN | FOLL_GET | - FOLL_FAST_ONLY | FOLL_NOFAULT))) + FOLL_FAST_ONLY))) return -EINVAL; if (gup_flags & FOLL_PIN) diff --git a/mm/highmem.c b/mm/highmem.c index 762679050c..1f0c8a52fd 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -359,6 +360,7 @@ void kunmap_high(struct page *page) } EXPORT_SYMBOL(kunmap_high); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { @@ -381,7 +383,7 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned this_end = min_t(unsigned, end1, PAGE_SIZE); if (end1 > start1) { - kaddr = kmap_local_page(page + i); + kaddr = kmap_atomic(page + i); memset(kaddr + start1, 0, this_end - start1); } end1 -= this_end; @@ -396,7 +398,7 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1, if (end2 > start2) { if (!kaddr) - kaddr = kmap_local_page(page + i); + kaddr = kmap_atomic(page + i); memset(kaddr + start2, 0, this_end - start2); } end2 -= this_end; @@ -404,7 +406,7 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1, } if (kaddr) { - kunmap_local(kaddr); + kunmap_atomic(kaddr); flush_dcache_page(page + i); } @@ -415,6 +417,7 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1, BUG_ON((start1 | start2 | end1 | end2) != 0); } EXPORT_SYMBOL(zero_user_segments); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_HIGHMEM */ #ifdef CONFIG_KMAP_LOCAL diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 406a3c28c0..c5142d237e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -603,7 +603,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, VM_BUG_ON_PAGE(!PageCompound(page), page); - if (mem_cgroup_charge(page_folio(page), vma->vm_mm, gfp)) { + if (mem_cgroup_charge(page, vma->vm_mm, gfp)) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK_CHARGE); @@ -1322,7 +1322,7 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) * We can only reuse the page if nobody else maps the huge page or it's * part. */ - if (reuse_swap_page(page)) { + if (reuse_swap_page(page, NULL)) { pmd_t entry; entry = pmd_mkyoung(orig_pmd); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); @@ -2405,8 +2405,7 @@ static void __split_huge_page_tail(struct page *head, int tail, static void __split_huge_page(struct page *page, struct list_head *list, pgoff_t end) { - struct folio *folio = page_folio(page); - struct page *head = &folio->page; + struct page *head = compound_head(page); struct lruvec *lruvec; struct address_space *swap_cache = NULL; unsigned long offset = 0; @@ -2425,7 +2424,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, } /* lock lru list/PageCompound, ref frozen by page_ref_freeze */ - lruvec = folio_lruvec_lock(folio); + lruvec = lock_page_lruvec(head); ClearPageHasHWPoisoned(head); @@ -2542,28 +2541,38 @@ int total_mapcount(struct page *page) * need full accuracy to avoid breaking page pinning, because * page_trans_huge_mapcount() is slower than page_mapcount(). */ -int page_trans_huge_mapcount(struct page *page) +int page_trans_huge_mapcount(struct page *page, int *total_mapcount) { - int i, ret; + int i, ret, _total_mapcount, mapcount; /* hugetlbfs shouldn't call it */ VM_BUG_ON_PAGE(PageHuge(page), page); - if (likely(!PageTransCompound(page))) - return atomic_read(&page->_mapcount) + 1; + if (likely(!PageTransCompound(page))) { + mapcount = atomic_read(&page->_mapcount) + 1; + if (total_mapcount) + *total_mapcount = mapcount; + return mapcount; + } page = compound_head(page); - ret = 0; + _total_mapcount = ret = 0; for (i = 0; i < thp_nr_pages(page); i++) { - int mapcount = atomic_read(&page[i]._mapcount) + 1; + mapcount = atomic_read(&page[i]._mapcount) + 1; ret = max(ret, mapcount); + _total_mapcount += mapcount; } - - if (PageDoubleMap(page)) + if (PageDoubleMap(page)) { ret -= 1; - - return ret + compound_mapcount(page); + _total_mapcount -= thp_nr_pages(page); + } + mapcount = compound_mapcount(page); + ret += mapcount; + _total_mapcount += mapcount; + if (total_mapcount) + *total_mapcount = _total_mapcount; + return ret; } /* Racy check whether the huge page can be split */ @@ -2604,7 +2613,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) { struct page *head = compound_head(page); struct deferred_split *ds_queue = get_deferred_split_queue(head); - XA_STATE(xas, &head->mapping->i_pages, head->index); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; int extra_pins, ret; @@ -2643,13 +2651,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) goto out; } - xas_split_alloc(&xas, head, compound_order(head), - mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK); - if (xas_error(&xas)) { - ret = xas_error(&xas); - goto out; - } - anon_vma = NULL; i_mmap_lock_read(mapping); @@ -2679,12 +2680,13 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) /* block interrupt reentry in xa_lock and spinlock */ local_irq_disable(); if (mapping) { + XA_STATE(xas, &mapping->i_pages, page_index(head)); + /* * Check if the head page is present in page cache. * We assume all tail are present too, if head is there. */ - xas_lock(&xas); - xas_reset(&xas); + xa_lock(&mapping->i_pages); if (xas_load(&xas) != head) goto fail; } @@ -2700,7 +2702,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) if (mapping) { int nr = thp_nr_pages(head); - xas_split(&xas, head, thp_order(head)); if (PageSwapBacked(head)) { __mod_lruvec_page_state(head, NR_SHMEM_THPS, -nr); @@ -2717,7 +2718,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) spin_unlock(&ds_queue->split_queue_lock); fail: if (mapping) - xas_unlock(&xas); + xa_unlock(&mapping->i_pages); local_irq_enable(); remap_page(head, thp_nr_pages(head)); ret = -EBUSY; @@ -2731,8 +2732,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) if (mapping) i_mmap_unlock_read(mapping); out: - /* Free any memory we didn't use */ - xas_nomem(&xas, 0); count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED); return ret; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f294db835f..f5ed988435 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -50,17 +50,6 @@ struct hstate hstates[HUGE_MAX_HSTATE]; #ifdef CONFIG_CMA static struct cma *hugetlb_cma[MAX_NUMNODES]; -static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata; -static bool hugetlb_cma_page(struct page *page, unsigned int order) -{ - return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page, - 1 << order); -} -#else -static bool hugetlb_cma_page(struct page *page, unsigned int order) -{ - return false; -} #endif static unsigned long hugetlb_cma_size __initdata; @@ -77,7 +66,6 @@ static struct hstate * __initdata parsed_hstate; static unsigned long __initdata default_hstate_max_huge_pages; static bool __initdata parsed_valid_hugepagesz = true; static bool __initdata parsed_default_hugepagesz; -static unsigned int default_hugepages_in_node[MAX_NUMNODES] __initdata; /* * Protects updates to hugepage_freelists, hugepage_activelist, nr_huge_pages, @@ -333,7 +321,8 @@ static bool has_same_uncharge_info(struct file_region *rg, struct file_region *org) { #ifdef CONFIG_CGROUP_HUGETLB - return rg->reservation_counter == org->reservation_counter && + return rg && org && + rg->reservation_counter == org->reservation_counter && rg->css == org->css; #else @@ -446,6 +435,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t, add += hugetlb_resv_map_add(resv, rg, last_accounted_offset, t, h, h_cg, regions_needed); + VM_BUG_ON(add < 0); return add; } @@ -1014,37 +1004,6 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma) vma->vm_private_data = (void *)0; } -/* - * Reset and decrement one ref on hugepage private reservation. - * Called with mm->mmap_sem writer semaphore held. - * This function should be only used by move_vma() and operate on - * same sized vma. It should never come here with last ref on the - * reservation. - */ -void clear_vma_resv_huge_pages(struct vm_area_struct *vma) -{ - /* - * Clear the old hugetlb private page reservation. - * It has already been transferred to new_vma. - * - * During a mremap() operation of a hugetlb vma we call move_vma() - * which copies vma into new_vma and unmaps vma. After the copy - * operation both new_vma and vma share a reference to the resv_map - * struct, and at that point vma is about to be unmapped. We don't - * want to return the reservation to the pool at unmap of vma because - * the reservation still lives on in new_vma, so simply decrement the - * ref here and remove the resv_map reference from this vma. - */ - struct resv_map *reservations = vma_resv_map(vma); - - if (reservations && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { - resv_map_put_hugetlb_cgroup_uncharge_info(reservations); - kref_put(&reservations->refs, resv_map_release); - } - - reset_vma_resv_huge_pages(vma); -} - /* Returns true if the VMA has associated reserve pages */ static bool vma_has_reserves(struct vm_area_struct *vma, long chg) { @@ -1301,9 +1260,9 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) ((node = hstate_next_node_to_free(hs, mask)) || 1); \ nr_nodes--) -/* used to demote non-gigantic_huge pages as well */ -static void __destroy_compound_gigantic_page(struct page *page, - unsigned int order, bool demote) +#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE +static void destroy_compound_gigantic_page(struct page *page, + unsigned int order) { int i; int nr_pages = 1 << order; @@ -1313,10 +1272,8 @@ static void __destroy_compound_gigantic_page(struct page *page, atomic_set(compound_pincount_ptr(page), 0); for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { - p->mapping = NULL; clear_compound_head(p); - if (!demote) - set_page_refcounted(p); + set_page_refcounted(p); } set_compound_order(page, 0); @@ -1324,19 +1281,6 @@ static void __destroy_compound_gigantic_page(struct page *page, __ClearPageHead(page); } -static void destroy_compound_hugetlb_page_for_demote(struct page *page, - unsigned int order) -{ - __destroy_compound_gigantic_page(page, order, true); -} - -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static void destroy_compound_gigantic_page(struct page *page, - unsigned int order) -{ - __destroy_compound_gigantic_page(page, order, false); -} - static void free_gigantic_page(struct page *page, unsigned int order) { /* @@ -1409,15 +1353,12 @@ static inline void destroy_compound_gigantic_page(struct page *page, /* * Remove hugetlb page from lists, and update dtor so that page appears - * as just a compound page. - * - * A reference is held on the page, except in the case of demote. + * as just a compound page. A reference is held on the page. * * Must be called with hugetlb lock held. */ -static void __remove_hugetlb_page(struct hstate *h, struct page *page, - bool adjust_surplus, - bool demote) +static void remove_hugetlb_page(struct hstate *h, struct page *page, + bool adjust_surplus) { int nid = page_to_nid(page); @@ -1455,12 +1396,8 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page, * * This handles the case where more than one ref is held when and * after update_and_free_page is called. - * - * In the case of demote we do not ref count the page as it will soon - * be turned into a page of smaller size. */ - if (!demote) - set_page_refcounted(page); + set_page_refcounted(page); if (hstate_is_gigantic(h)) set_compound_page_dtor(page, NULL_COMPOUND_DTOR); else @@ -1470,18 +1407,6 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page, h->nr_huge_pages_node[nid]--; } -static void remove_hugetlb_page(struct hstate *h, struct page *page, - bool adjust_surplus) -{ - __remove_hugetlb_page(h, page, adjust_surplus, false); -} - -static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page, - bool adjust_surplus) -{ - __remove_hugetlb_page(h, page, adjust_surplus, true); -} - static void add_hugetlb_page(struct hstate *h, struct page *page, bool adjust_surplus) { @@ -1551,13 +1476,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) 1 << PG_active | 1 << PG_private | 1 << PG_writeback); } - - /* - * Non-gigantic pages demoted from CMA allocated gigantic pages - * need to be given back to CMA in free_gigantic_page. - */ - if (hstate_is_gigantic(h) || - hugetlb_cma_page(page, huge_page_order(h))) { + if (hstate_is_gigantic(h)) { destroy_compound_gigantic_page(page, huge_page_order(h)); free_gigantic_page(page, huge_page_order(h)); } else { @@ -1745,8 +1664,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) spin_unlock_irq(&hugetlb_lock); } -static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, - bool demote) +static bool prep_compound_gigantic_page(struct page *page, unsigned int order) { int i, j; int nr_pages = 1 << order; @@ -1784,17 +1702,12 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, * the set of pages can not be converted to a gigantic page. * The caller who allocated the pages should then discard the * pages using the appropriate free interface. - * - * In the case of demote, the ref count will be zero. */ - if (!demote) { - if (!page_ref_freeze(p, 1)) { - pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n"); - goto out_error; - } - } else { - VM_BUG_ON_PAGE(page_count(p), p); + if (!page_ref_freeze(p, 1)) { + pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n"); + goto out_error; } + set_page_count(p, 0); set_compound_head(p, page); } atomic_set(compound_mapcount_ptr(page), -1); @@ -1817,17 +1730,6 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, return false; } -static bool prep_compound_gigantic_page(struct page *page, unsigned int order) -{ - return __prep_compound_gigantic_page(page, order, false); -} - -static bool prep_compound_gigantic_page_for_demote(struct page *page, - unsigned int order) -{ - return __prep_compound_gigantic_page(page, order, true); -} - /* * PageHuge() only returns true for hugetlbfs pages, but not for normal or * transparent huge pages. See the PageTransHuge() documentation for more @@ -2966,39 +2868,33 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, return ERR_PTR(-ENOSPC); } -int alloc_bootmem_huge_page(struct hstate *h, int nid) +int alloc_bootmem_huge_page(struct hstate *h) __attribute__ ((weak, alias("__alloc_bootmem_huge_page"))); -int __alloc_bootmem_huge_page(struct hstate *h, int nid) +int __alloc_bootmem_huge_page(struct hstate *h) { - struct huge_bootmem_page *m = NULL; /* initialize for clang */ + struct huge_bootmem_page *m; int nr_nodes, node; - if (nid != NUMA_NO_NODE && nid >= nr_online_nodes) - return 0; - /* do node specific alloc */ - if (nid != NUMA_NO_NODE) { - m = memblock_alloc_try_nid_raw(huge_page_size(h), huge_page_size(h), - 0, MEMBLOCK_ALLOC_ACCESSIBLE, nid); - if (!m) - return 0; - goto found; - } - /* allocate from next node when distributing huge pages */ for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) { - m = memblock_alloc_try_nid_raw( + void *addr; + + addr = memblock_alloc_try_nid_raw( huge_page_size(h), huge_page_size(h), 0, MEMBLOCK_ALLOC_ACCESSIBLE, node); - /* - * Use the beginning of the huge page to store the - * huge_bootmem_page struct (until gather_bootmem - * puts them into the mem_map). - */ - if (!m) - return 0; - goto found; + if (addr) { + /* + * Use the beginning of the huge page to store the + * huge_bootmem_page struct (until gather_bootmem + * puts them into the mem_map). + */ + m = addr; + goto found; + } } + return 0; found: + BUG_ON(!IS_ALIGNED(virt_to_phys(m), huge_page_size(h))); /* Put them into a private list first because mem_map is not up yet */ INIT_LIST_HEAD(&m->list); list_add(&m->list, &huge_boot_pages); @@ -3038,61 +2934,12 @@ static void __init gather_bootmem_prealloc(void) cond_resched(); } } -static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid) -{ - unsigned long i; - char buf[32]; - - for (i = 0; i < h->max_huge_pages_node[nid]; ++i) { - if (hstate_is_gigantic(h)) { - if (!alloc_bootmem_huge_page(h, nid)) - break; - } else { - struct page *page; - gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; - - page = alloc_fresh_huge_page(h, gfp_mask, nid, - &node_states[N_MEMORY], NULL); - if (!page) - break; - put_page(page); /* free it into the hugepage allocator */ - } - cond_resched(); - } - if (i == h->max_huge_pages_node[nid]) - return; - - string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32); - pr_warn("HugeTLB: allocating %u of page size %s failed node%d. Only allocated %lu hugepages.\n", - h->max_huge_pages_node[nid], buf, nid, i); - h->max_huge_pages -= (h->max_huge_pages_node[nid] - i); - h->max_huge_pages_node[nid] = i; -} static void __init hugetlb_hstate_alloc_pages(struct hstate *h) { unsigned long i; nodemask_t *node_alloc_noretry; - bool node_specific_alloc = false; - /* skip gigantic hugepages allocation if hugetlb_cma enabled */ - if (hstate_is_gigantic(h) && hugetlb_cma_size) { - pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n"); - return; - } - - /* do node specific alloc */ - for (i = 0; i < nr_online_nodes; i++) { - if (h->max_huge_pages_node[i] > 0) { - hugetlb_hstate_alloc_pages_onenode(h, i); - node_specific_alloc = true; - } - } - - if (node_specific_alloc) - return; - - /* below will do all node balanced alloc */ if (!hstate_is_gigantic(h)) { /* * Bit mask controlling how hard we retry per-node allocations. @@ -3113,7 +2960,11 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) for (i = 0; i < h->max_huge_pages; ++i) { if (hstate_is_gigantic(h)) { - if (!alloc_bootmem_huge_page(h, NUMA_NO_NODE)) + if (hugetlb_cma_size) { + pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n"); + goto free; + } + if (!alloc_bootmem_huge_page(h)) break; } else if (!alloc_pool_huge_page(h, &node_states[N_MEMORY], @@ -3129,12 +2980,13 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) h->max_huge_pages, buf, i); h->max_huge_pages = i; } +free: kfree(node_alloc_noretry); } static void __init hugetlb_init_hstates(void) { - struct hstate *h, *h2; + struct hstate *h; for_each_hstate(h) { if (minimum_order > huge_page_order(h)) @@ -3143,26 +2995,6 @@ static void __init hugetlb_init_hstates(void) /* oversize hugepages were init'ed in early boot */ if (!hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); - - /* - * Set demote order for each hstate. Note that - * h->demote_order is initially 0. - * - We can not demote gigantic pages if runtime freeing - * is not supported, so skip this. - * - If CMA allocation is possible, we can not demote - * HUGETLB_PAGE_ORDER or smaller size pages. - */ - if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) - continue; - if (hugetlb_cma_size && h->order <= HUGETLB_PAGE_ORDER) - continue; - for_each_hstate(h2) { - if (h2 == h) - continue; - if (h2->order < h->order && - h2->order > h->demote_order) - h->demote_order = h2->order; - } } VM_BUG_ON(minimum_order == UINT_MAX); } @@ -3403,100 +3235,9 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, return 0; } -static int demote_free_huge_page(struct hstate *h, struct page *page) -{ - int i, nid = page_to_nid(page); - struct hstate *target_hstate; - int rc = 0; - - target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); - - remove_hugetlb_page_for_demote(h, page, false); - spin_unlock_irq(&hugetlb_lock); - - rc = alloc_huge_page_vmemmap(h, page); - if (rc) { - /* Allocation of vmemmmap failed, we can not demote page */ - spin_lock_irq(&hugetlb_lock); - set_page_refcounted(page); - add_hugetlb_page(h, page, false); - return rc; - } - - /* - * Use destroy_compound_hugetlb_page_for_demote for all huge page - * sizes as it will not ref count pages. - */ - destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h)); - - /* - * Taking target hstate mutex synchronizes with set_max_huge_pages. - * Without the mutex, pages added to target hstate could be marked - * as surplus. - * - * Note that we already hold h->resize_lock. To prevent deadlock, - * use the convention of always taking larger size hstate mutex first. - */ - mutex_lock(&target_hstate->resize_lock); - for (i = 0; i < pages_per_huge_page(h); - i += pages_per_huge_page(target_hstate)) { - if (hstate_is_gigantic(target_hstate)) - prep_compound_gigantic_page_for_demote(page + i, - target_hstate->order); - else - prep_compound_page(page + i, target_hstate->order); - set_page_private(page + i, 0); - set_page_refcounted(page + i); - prep_new_huge_page(target_hstate, page + i, nid); - put_page(page + i); - } - mutex_unlock(&target_hstate->resize_lock); - - spin_lock_irq(&hugetlb_lock); - - /* - * Not absolutely necessary, but for consistency update max_huge_pages - * based on pool changes for the demoted page. - */ - h->max_huge_pages--; - target_hstate->max_huge_pages += pages_per_huge_page(h); - - return rc; -} - -static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) - __must_hold(&hugetlb_lock) -{ - int nr_nodes, node; - struct page *page; - int rc = 0; - - lockdep_assert_held(&hugetlb_lock); - - /* We should never get here if no demote order */ - if (!h->demote_order) { - pr_warn("HugeTLB: NULL demote order passed to demote_pool_huge_page.\n"); - return -EINVAL; /* internal error */ - } - - for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { - if (!list_empty(&h->hugepage_freelists[node])) { - page = list_entry(h->hugepage_freelists[node].next, - struct page, lru); - rc = demote_free_huge_page(h, page); - break; - } - } - - return rc; -} - #define HSTATE_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) -#define HSTATE_ATTR_WO(_name) \ - static struct kobj_attribute _name##_attr = __ATTR_WO(_name) - #define HSTATE_ATTR(_name) \ static struct kobj_attribute _name##_attr = \ __ATTR(_name, 0644, _name##_show, _name##_store) @@ -3692,103 +3433,6 @@ static ssize_t surplus_hugepages_show(struct kobject *kobj, } HSTATE_ATTR_RO(surplus_hugepages); -static ssize_t demote_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t len) -{ - unsigned long nr_demote; - unsigned long nr_available; - nodemask_t nodes_allowed, *n_mask; - struct hstate *h; - int err = 0; - int nid; - - err = kstrtoul(buf, 10, &nr_demote); - if (err) - return err; - h = kobj_to_hstate(kobj, &nid); - - if (nid != NUMA_NO_NODE) { - init_nodemask_of_node(&nodes_allowed, nid); - n_mask = &nodes_allowed; - } else { - n_mask = &node_states[N_MEMORY]; - } - - /* Synchronize with other sysfs operations modifying huge pages */ - mutex_lock(&h->resize_lock); - spin_lock_irq(&hugetlb_lock); - - while (nr_demote) { - /* - * Check for available pages to demote each time thorough the - * loop as demote_pool_huge_page will drop hugetlb_lock. - */ - if (nid != NUMA_NO_NODE) - nr_available = h->free_huge_pages_node[nid]; - else - nr_available = h->free_huge_pages; - nr_available -= h->resv_huge_pages; - if (!nr_available) - break; - - err = demote_pool_huge_page(h, n_mask); - if (err) - break; - - nr_demote--; - } - - spin_unlock_irq(&hugetlb_lock); - mutex_unlock(&h->resize_lock); - - if (err) - return err; - return len; -} -HSTATE_ATTR_WO(demote); - -static ssize_t demote_size_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - int nid; - struct hstate *h = kobj_to_hstate(kobj, &nid); - unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K; - - return sysfs_emit(buf, "%lukB\n", demote_size); -} - -static ssize_t demote_size_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct hstate *h, *demote_hstate; - unsigned long demote_size; - unsigned int demote_order; - int nid; - - demote_size = (unsigned long)memparse(buf, NULL); - - demote_hstate = size_to_hstate(demote_size); - if (!demote_hstate) - return -EINVAL; - demote_order = demote_hstate->order; - if (demote_order < HUGETLB_PAGE_ORDER) - return -EINVAL; - - /* demote order must be smaller than hstate order */ - h = kobj_to_hstate(kobj, &nid); - if (demote_order >= h->order) - return -EINVAL; - - /* resize_lock synchronizes access to demote size and writes */ - mutex_lock(&h->resize_lock); - h->demote_order = demote_order; - mutex_unlock(&h->resize_lock); - - return count; -} -HSTATE_ATTR(demote_size); - static struct attribute *hstate_attrs[] = { &nr_hugepages_attr.attr, &nr_overcommit_hugepages_attr.attr, @@ -3805,16 +3449,6 @@ static const struct attribute_group hstate_attr_group = { .attrs = hstate_attrs, }; -static struct attribute *hstate_demote_attrs[] = { - &demote_size_attr.attr, - &demote_attr.attr, - NULL, -}; - -static const struct attribute_group hstate_demote_attr_group = { - .attrs = hstate_demote_attrs, -}; - static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, struct kobject **hstate_kobjs, const struct attribute_group *hstate_attr_group) @@ -3832,12 +3466,6 @@ static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, hstate_kobjs[hi] = NULL; } - if (h->demote_order) { - if (sysfs_create_group(hstate_kobjs[hi], - &hstate_demote_attr_group)) - pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name); - } - return retval; } @@ -4043,10 +3671,6 @@ static int __init hugetlb_init(void) } default_hstate.max_huge_pages = default_hstate_max_huge_pages; - - for (i = 0; i < nr_online_nodes; i++) - default_hstate.max_huge_pages_node[i] = - default_hugepages_in_node[i]; } } @@ -4107,10 +3731,6 @@ void __init hugetlb_add_hstate(unsigned int order) parsed_hstate = h; } -bool __init __weak hugetlb_node_alloc_supported(void) -{ - return true; -} /* * hugepages command line processing * hugepages normally follows a valid hugepagsz or default_hugepagsz @@ -4122,10 +3742,6 @@ static int __init hugepages_setup(char *s) { unsigned long *mhp; static unsigned long *last_mhp; - int node = NUMA_NO_NODE; - int count; - unsigned long tmp; - char *p = s; if (!parsed_valid_hugepagesz) { pr_warn("HugeTLB: hugepages=%s does not follow a valid hugepagesz, ignoring\n", s); @@ -4149,40 +3765,8 @@ static int __init hugepages_setup(char *s) return 0; } - while (*p) { - count = 0; - if (sscanf(p, "%lu%n", &tmp, &count) != 1) - goto invalid; - /* Parameter is node format */ - if (p[count] == ':') { - if (!hugetlb_node_alloc_supported()) { - pr_warn("HugeTLB: architecture can't support node specific alloc, ignoring!\n"); - return 0; - } - if (tmp >= nr_online_nodes) - goto invalid; - node = tmp; - p += count + 1; - /* Parse hugepages */ - if (sscanf(p, "%lu%n", &tmp, &count) != 1) - goto invalid; - if (!hugetlb_max_hstate) - default_hugepages_in_node[node] = tmp; - else - parsed_hstate->max_huge_pages_node[node] = tmp; - *mhp += tmp; - /* Go to parse next node*/ - if (p[count] == ',') - p += count + 1; - else - break; - } else { - if (p != s) - goto invalid; - *mhp = tmp; - break; - } - } + if (sscanf(s, "%lu", mhp) <= 0) + *mhp = 0; /* * Global state is always initialized later in hugetlb_init. @@ -4195,10 +3779,6 @@ static int __init hugepages_setup(char *s) last_mhp = mhp; return 1; - -invalid: - pr_warn("HugeTLB: Invalid hugepages parameter %s\n", p); - return 0; } __setup("hugepages=", hugepages_setup); @@ -4260,7 +3840,6 @@ __setup("hugepagesz=", hugepagesz_setup); static int __init default_hugepagesz_setup(char *s) { unsigned long size; - int i; parsed_valid_hugepagesz = false; if (parsed_default_hugepagesz) { @@ -4289,9 +3868,6 @@ static int __init default_hugepagesz_setup(char *s) */ if (default_hstate_max_huge_pages) { default_hstate.max_huge_pages = default_hstate_max_huge_pages; - for (i = 0; i < nr_online_nodes; i++) - default_hstate.max_huge_pages_node[i] = - default_hugepages_in_node[i]; if (hstate_is_gigantic(&default_hstate)) hugetlb_hstate_alloc_pages(&default_hstate); default_hstate_max_huge_pages = 0; @@ -4684,8 +4260,8 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr struct page *new_page) { __SetPageUptodate(new_page); - hugepage_add_new_anon_rmap(new_page, vma, addr); set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1)); + hugepage_add_new_anon_rmap(new_page, vma, addr); hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); ClearHPageRestoreReserve(new_page); SetHPageMigratable(new_page); @@ -4850,84 +4426,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, return ret; } -static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte) -{ - struct hstate *h = hstate_vma(vma); - struct mm_struct *mm = vma->vm_mm; - spinlock_t *src_ptl, *dst_ptl; - pte_t pte; - - dst_ptl = huge_pte_lock(h, mm, dst_pte); - src_ptl = huge_pte_lockptr(h, mm, src_pte); - - /* - * We don't have to worry about the ordering of src and dst ptlocks - * because exclusive mmap_sem (or the i_mmap_lock) prevents deadlock. - */ - if (src_ptl != dst_ptl) - spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); - - pte = huge_ptep_get_and_clear(mm, old_addr, src_pte); - set_huge_pte_at(mm, new_addr, dst_pte, pte); - - if (src_ptl != dst_ptl) - spin_unlock(src_ptl); - spin_unlock(dst_ptl); -} - -int move_hugetlb_page_tables(struct vm_area_struct *vma, - struct vm_area_struct *new_vma, - unsigned long old_addr, unsigned long new_addr, - unsigned long len) -{ - struct hstate *h = hstate_vma(vma); - struct address_space *mapping = vma->vm_file->f_mapping; - unsigned long sz = huge_page_size(h); - struct mm_struct *mm = vma->vm_mm; - unsigned long old_end = old_addr + len; - unsigned long old_addr_copy; - pte_t *src_pte, *dst_pte; - struct mmu_notifier_range range; - - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, old_addr, - old_end); - adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); - mmu_notifier_invalidate_range_start(&range); - /* Prevent race with file truncation */ - i_mmap_lock_write(mapping); - for (; old_addr < old_end; old_addr += sz, new_addr += sz) { - src_pte = huge_pte_offset(mm, old_addr, sz); - if (!src_pte) - continue; - if (huge_pte_none(huge_ptep_get(src_pte))) - continue; - - /* old_addr arg to huge_pmd_unshare() is a pointer and so the - * arg may be modified. Pass a copy instead to preserve the - * value in old_addr. - */ - old_addr_copy = old_addr; - - if (huge_pmd_unshare(mm, vma, &old_addr_copy, src_pte)) - continue; - - dst_pte = huge_pte_alloc(mm, new_vma, new_addr, sz); - if (!dst_pte) - break; - - move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte); - } - flush_tlb_range(vma, old_end - len, old_end); - mmu_notifier_invalidate_range_end(&range); - i_mmap_unlock_write(mapping); - - return len + old_addr - old_end; -} - -static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct page *ref_page) +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct page *ref_page) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -5130,7 +4631,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, /* * Hugetlb_cow() should be called with page lock of the original hugepage held. - * Called with hugetlb_fault_mutex_table held and pte_page locked so we + * Called with hugetlb_instantiation_mutex held and pte_page locked so we * cannot race with other handlers or page migration. * Keep the pte_same checks anyway to make transition from the mutex easier. */ @@ -5258,10 +4759,10 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, /* Break COW */ huge_ptep_clear_flush(vma, haddr, ptep); mmu_notifier_invalidate_range(mm, range.start, range.end); - page_remove_rmap(old_page, true); - hugepage_add_new_anon_rmap(new_page, vma, haddr); set_huge_pte_at(mm, haddr, ptep, make_huge_pte(vma, new_page, 1)); + page_remove_rmap(old_page, true); + hugepage_add_new_anon_rmap(new_page, vma, haddr); SetHPageMigratable(new_page); /* Make the old page be freed below */ new_page = old_page; @@ -5817,7 +5318,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, *pagep = NULL; goto out; } - folio_copy(page_folio(page), page_folio(*pagep)); + copy_huge_page(page, *pagep); put_page(*pagep); *pagep = NULL; } @@ -6480,6 +5981,12 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, * sharing is possible. For hugetlbfs, this prevents removal of any page * table entries associated with the address space. This is important as we * are setting up sharing based on existing page table entries (mappings). + * + * NOTE: This routine is only called from huge_pte_alloc. Some callers of + * huge_pte_alloc know that sharing is not possible and do not take + * i_mmap_rwsem as a performance optimization. This is handled by the + * if !vma_shareable check at the beginning of the routine. i_mmap_rwsem is + * only required for subsequent processing. */ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pud_t *pud) @@ -6880,38 +6387,7 @@ static bool cma_reserve_called __initdata; static int __init cmdline_parse_hugetlb_cma(char *p) { - int nid, count = 0; - unsigned long tmp; - char *s = p; - - while (*s) { - if (sscanf(s, "%lu%n", &tmp, &count) != 1) - break; - - if (s[count] == ':') { - nid = tmp; - if (nid < 0 || nid >= MAX_NUMNODES) - break; - - s += count + 1; - tmp = memparse(s, &s); - hugetlb_cma_size_in_node[nid] = tmp; - hugetlb_cma_size += tmp; - - /* - * Skip the separator if have one, otherwise - * break the parsing. - */ - if (*s == ',') - s++; - else - break; - } else { - hugetlb_cma_size = memparse(p, &p); - break; - } - } - + hugetlb_cma_size = memparse(p, &p); return 0; } @@ -6920,80 +6396,37 @@ early_param("hugetlb_cma", cmdline_parse_hugetlb_cma); void __init hugetlb_cma_reserve(int order) { unsigned long size, reserved, per_node; - bool node_specific_cma_alloc = false; int nid; cma_reserve_called = true; - if (!hugetlb_cma_size) - return; - - for (nid = 0; nid < MAX_NUMNODES; nid++) { - if (hugetlb_cma_size_in_node[nid] == 0) - continue; - - if (!node_state(nid, N_ONLINE)) { - pr_warn("hugetlb_cma: invalid node %d specified\n", nid); - hugetlb_cma_size -= hugetlb_cma_size_in_node[nid]; - hugetlb_cma_size_in_node[nid] = 0; - continue; - } - - if (hugetlb_cma_size_in_node[nid] < (PAGE_SIZE << order)) { - pr_warn("hugetlb_cma: cma area of node %d should be at least %lu MiB\n", - nid, (PAGE_SIZE << order) / SZ_1M); - hugetlb_cma_size -= hugetlb_cma_size_in_node[nid]; - hugetlb_cma_size_in_node[nid] = 0; - } else { - node_specific_cma_alloc = true; - } - } - - /* Validate the CMA size again in case some invalid nodes specified. */ if (!hugetlb_cma_size) return; if (hugetlb_cma_size < (PAGE_SIZE << order)) { pr_warn("hugetlb_cma: cma area should be at least %lu MiB\n", (PAGE_SIZE << order) / SZ_1M); - hugetlb_cma_size = 0; return; } - if (!node_specific_cma_alloc) { - /* - * If 3 GB area is requested on a machine with 4 numa nodes, - * let's allocate 1 GB on first three nodes and ignore the last one. - */ - per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes); - pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", - hugetlb_cma_size / SZ_1M, per_node / SZ_1M); - } + /* + * If 3 GB area is requested on a machine with 4 numa nodes, + * let's allocate 1 GB on first three nodes and ignore the last one. + */ + per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes); + pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", + hugetlb_cma_size / SZ_1M, per_node / SZ_1M); reserved = 0; for_each_node_state(nid, N_ONLINE) { int res; char name[CMA_MAX_NAME]; - if (node_specific_cma_alloc) { - if (hugetlb_cma_size_in_node[nid] == 0) - continue; - - size = hugetlb_cma_size_in_node[nid]; - } else { - size = min(per_node, hugetlb_cma_size - reserved); - } - + size = min(per_node, hugetlb_cma_size - reserved); size = round_up(size, PAGE_SIZE << order); snprintf(name, sizeof(name), "hugetlb%d", nid); - /* - * Note that 'order per bit' is based on smallest size that - * may be returned to CMA allocator in the case of - * huge page demotion. - */ - res = cma_declare_contiguous_nid(0, size, 0, - PAGE_SIZE << HUGETLB_PAGE_ORDER, + res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order, 0, false, name, &hugetlb_cma[nid], nid); if (res) { @@ -7009,13 +6442,6 @@ void __init hugetlb_cma_reserve(int order) if (reserved >= hugetlb_cma_size) break; } - - if (!reserved) - /* - * hugetlb_cma_size is used to determine if allocations from - * cma are possible. Set to zero if no cma regions are set up. - */ - hugetlb_cma_size = 0; } void __init hugetlb_cma_check(void) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index f9942841df..5383023d0c 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -27,6 +27,9 @@ #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) #define MEMFILE_ATTR(val) ((val) & 0xffff) +#define hugetlb_cgroup_from_counter(counter, idx) \ + container_of(counter, struct hugetlb_cgroup, hugepage[idx]) + static struct hugetlb_cgroup *root_h_cgroup __read_mostly; static inline struct page_counter * @@ -123,58 +126,29 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, } } -static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup) -{ - int node; - - for_each_node(node) - kfree(h_cgroup->nodeinfo[node]); - kfree(h_cgroup); -} - static struct cgroup_subsys_state * hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) { struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); struct hugetlb_cgroup *h_cgroup; - int node; - - h_cgroup = kzalloc(struct_size(h_cgroup, nodeinfo, nr_node_ids), - GFP_KERNEL); + h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL); if (!h_cgroup) return ERR_PTR(-ENOMEM); if (!parent_h_cgroup) root_h_cgroup = h_cgroup; - /* - * TODO: this routine can waste much memory for nodes which will - * never be onlined. It's better to use memory hotplug callback - * function. - */ - for_each_node(node) { - /* Set node_to_alloc to -1 for offline nodes. */ - int node_to_alloc = - node_state(node, N_NORMAL_MEMORY) ? node : -1; - h_cgroup->nodeinfo[node] = - kzalloc_node(sizeof(struct hugetlb_cgroup_per_node), - GFP_KERNEL, node_to_alloc); - if (!h_cgroup->nodeinfo[node]) - goto fail_alloc_nodeinfo; - } - hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); return &h_cgroup->css; - -fail_alloc_nodeinfo: - hugetlb_cgroup_free(h_cgroup); - return ERR_PTR(-ENOMEM); } static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) { - hugetlb_cgroup_free(hugetlb_cgroup_from_css(css)); + struct hugetlb_cgroup *h_cgroup; + + h_cgroup = hugetlb_cgroup_from_css(css); + kfree(h_cgroup); } /* @@ -318,17 +292,7 @@ static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, return; __set_hugetlb_cgroup(page, h_cg, rsvd); - if (!rsvd) { - unsigned long usage = - h_cg->nodeinfo[page_to_nid(page)]->usage[idx]; - /* - * This write is not atomic due to fetching usage and writing - * to it, but that's fine because we call this with - * hugetlb_lock held anyway. - */ - WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx], - usage + nr_pages); - } + return; } void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, @@ -367,17 +331,8 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, if (rsvd) css_put(&h_cg->css); - else { - unsigned long usage = - h_cg->nodeinfo[page_to_nid(page)]->usage[idx]; - /* - * This write is not atomic due to fetching usage and writing - * to it, but that's fine because we call this with - * hugetlb_lock held anyway. - */ - WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx], - usage - nr_pages); - } + + return; } void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, @@ -466,59 +421,6 @@ enum { RES_RSVD_FAILCNT, }; -static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy) -{ - int nid; - struct cftype *cft = seq_cft(seq); - int idx = MEMFILE_IDX(cft->private); - bool legacy = MEMFILE_ATTR(cft->private); - struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); - struct cgroup_subsys_state *css; - unsigned long usage; - - if (legacy) { - /* Add up usage across all nodes for the non-hierarchical total. */ - usage = 0; - for_each_node_state(nid, N_MEMORY) - usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]); - seq_printf(seq, "total=%lu", usage * PAGE_SIZE); - - /* Simply print the per-node usage for the non-hierarchical total. */ - for_each_node_state(nid, N_MEMORY) - seq_printf(seq, " N%d=%lu", nid, - READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) * - PAGE_SIZE); - seq_putc(seq, '\n'); - } - - /* - * The hierarchical total is pretty much the value recorded by the - * counter, so use that. - */ - seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "", - page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE); - - /* - * For each node, transverse the css tree to obtain the hierarchical - * node usage. - */ - for_each_node_state(nid, N_MEMORY) { - usage = 0; - rcu_read_lock(); - css_for_each_descendant_pre(css, &h_cg->css) { - usage += READ_ONCE(hugetlb_cgroup_from_css(css) - ->nodeinfo[nid] - ->usage[idx]); - } - rcu_read_unlock(); - seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE); - } - - seq_putc(seq, '\n'); - - return 0; -} - static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, struct cftype *cft) { @@ -769,14 +671,8 @@ static void __init __hugetlb_cgroup_file_dfl_init(int idx) events_local_file[idx]); cft->flags = CFTYPE_NOT_ON_ROOT; - /* Add the numa stat file */ - cft = &h->cgroup_files_dfl[6]; - snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf); - cft->seq_show = hugetlb_cgroup_read_numa_stat; - cft->flags = CFTYPE_NOT_ON_ROOT; - /* NULL terminate the last cft */ - cft = &h->cgroup_files_dfl[7]; + cft = &h->cgroup_files_dfl[6]; memset(cft, 0, sizeof(*cft)); WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, @@ -846,14 +742,8 @@ static void __init __hugetlb_cgroup_file_legacy_init(int idx) cft->write = hugetlb_cgroup_reset; cft->read_u64 = hugetlb_cgroup_read_u64; - /* Add the numa stat file */ - cft = &h->cgroup_files_legacy[8]; - snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf); - cft->private = MEMFILE_PRIVATE(idx, 1); - cft->seq_show = hugetlb_cgroup_read_numa_stat; - /* NULL terminate the last cft */ - cft = &h->cgroup_files_legacy[9]; + cft = &h->cgroup_files_legacy[8]; memset(cft, 0, sizeof(*cft)); WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, diff --git a/mm/internal.h b/mm/internal.h index d80300392a..cf3cb933eb 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -12,8 +12,6 @@ #include #include -struct folio_batch; - /* * The set of flags that only affect watermark checking and reclaim * behaviour. This is used by the MM to obey the caller constraints @@ -23,7 +21,7 @@ struct folio_batch; #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\ - __GFP_ATOMIC|__GFP_NOLOCKDEP) + __GFP_ATOMIC) /* The GFP flags allowed during early boot */ #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) @@ -36,47 +34,16 @@ struct folio_batch; void page_writeback_init(void); -static inline void *folio_raw_mapping(struct folio *folio) -{ - unsigned long mapping = (unsigned long)folio->mapping; - - return (void *)(mapping & ~PAGE_MAPPING_FLAGS); -} - -void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, - int nr_throttled); -static inline void acct_reclaim_writeback(struct folio *folio) -{ - pg_data_t *pgdat = folio_pgdat(folio); - int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled); - - if (nr_throttled) - __acct_reclaim_writeback(pgdat, folio, nr_throttled); -} - -static inline void wake_throttle_isolated(pg_data_t *pgdat) -{ - wait_queue_head_t *wqh; - - wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED]; - if (waitqueue_active(wqh)) - wake_up(wqh); -} - vm_fault_t do_swap_page(struct vm_fault *vmf); -void folio_rotate_reclaimable(struct folio *folio); -bool __folio_end_writeback(struct folio *folio); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); -void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); static inline bool can_madv_lru_vma(struct vm_area_struct *vma) { return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)); } -struct zap_details; void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, @@ -93,37 +60,20 @@ static inline void force_page_cache_readahead(struct address_space *mapping, } unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, - pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); -unsigned find_get_entries(struct address_space *mapping, pgoff_t start, - pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); -void filemap_free_folio(struct address_space *mapping, struct folio *folio); -int truncate_inode_folio(struct address_space *mapping, struct folio *folio); -bool truncate_inode_partial_folio(struct folio *folio, loff_t start, - loff_t end); + pgoff_t end, struct pagevec *pvec, pgoff_t *indices); /** - * folio_evictable - Test whether a folio is evictable. - * @folio: The folio to test. + * page_evictable - test whether a page is evictable + * @page: the page to test * - * Test whether @folio is evictable -- i.e., should be placed on - * active/inactive lists vs unevictable list. + * Test whether page is evictable--i.e., should be placed on active/inactive + * lists vs unevictable list. + * + * Reasons page might not be evictable: + * (1) page's mapping marked unevictable + * (2) page is part of an mlocked VMA * - * Reasons folio might not be evictable: - * 1. folio's mapping marked unevictable - * 2. One of the pages in the folio is part of an mlocked VMA */ -static inline bool folio_evictable(struct folio *folio) -{ - bool ret; - - /* Prevent address_space of inode and swap cache from being freed */ - rcu_read_lock(); - ret = !mapping_unevictable(folio_mapping(folio)) && - !folio_test_mlocked(folio); - rcu_read_unlock(); - return ret; -} - static inline bool page_evictable(struct page *page) { bool ret; @@ -159,13 +109,17 @@ extern unsigned long highest_memmap_pfn; */ extern int isolate_lru_page(struct page *page); extern void putback_lru_page(struct page *page); -extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason); /* * in mm/rmap.c: */ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); +/* + * in mm/memcontrol.c: + */ +extern bool cgroup_memory_nokmem; + /* * in mm/page_alloc.c */ @@ -392,7 +346,6 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma); #ifdef CONFIG_MMU -void unmap_mapping_folio(struct folio *folio); extern long populate_vma_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *locked); extern long faultin_vma_page_range(struct vm_area_struct *vma, @@ -496,8 +449,8 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, } return fpin; } + #else /* !CONFIG_MMU */ -static inline void unmap_mapping_folio(struct folio *folio) { } static inline void clear_page_mlock(struct page *page) { } static inline void mlock_vma_page(struct page *page) { } static inline void vunmap_range_noflush(unsigned long start, unsigned long end) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 9219656268..2baf121fb8 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -30,19 +30,20 @@ #include "kasan.h" #include "../slab.h" -depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc) +depot_stack_handle_t kasan_save_stack(gfp_t flags) { unsigned long entries[KASAN_STACK_DEPTH]; unsigned int nr_entries; nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); - return __stack_depot_save(entries, nr_entries, flags, can_alloc); + nr_entries = filter_irq_stacks(entries, nr_entries); + return stack_depot_save(entries, nr_entries, flags); } void kasan_set_track(struct kasan_track *track, gfp_t flags) { track->pid = current->pid; - track->stack = kasan_save_stack(flags, true); + track->stack = kasan_save_stack(flags); } #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) @@ -246,9 +247,8 @@ struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache, } #endif -void __kasan_poison_slab(struct slab *slab) +void __kasan_poison_slab(struct page *page) { - struct page *page = slab_page(slab); unsigned long i; for (i = 0; i < compound_nr(page); i++) @@ -298,7 +298,7 @@ static inline u8 assign_tag(struct kmem_cache *cache, /* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */ #ifdef CONFIG_SLAB /* For SLAB assign tags based on the object index in the freelist. */ - return (u8)obj_to_index(cache, virt_to_slab(object), (void *)object); + return (u8)obj_to_index(cache, virt_to_page(object), (void *)object); #else /* * For SLUB assign a random tag during slab creation, otherwise reuse @@ -341,7 +341,7 @@ static inline bool ____kasan_slab_free(struct kmem_cache *cache, void *object, if (is_kfence_address(object)) return false; - if (unlikely(nearest_obj(cache, virt_to_slab(object), object) != + if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) != object)) { kasan_report_invalid_free(tagged_object, ip); return true; @@ -401,9 +401,9 @@ void __kasan_kfree_large(void *ptr, unsigned long ip) void __kasan_slab_free_mempool(void *ptr, unsigned long ip) { - struct folio *folio; + struct page *page; - folio = virt_to_folio(ptr); + page = virt_to_head_page(ptr); /* * Even though this function is only called for kmem_cache_alloc and @@ -411,14 +411,12 @@ void __kasan_slab_free_mempool(void *ptr, unsigned long ip) * !PageSlab() when the size provided to kmalloc is larger than * KMALLOC_MAX_SIZE, and kmalloc falls back onto page_alloc. */ - if (unlikely(!folio_test_slab(folio))) { + if (unlikely(!PageSlab(page))) { if (____kasan_kfree_large(ptr, ip)) return; - kasan_poison(ptr, folio_size(folio), KASAN_FREE_PAGE, false); + kasan_poison(ptr, page_size(page), KASAN_FREE_PAGE, false); } else { - struct slab *slab = folio_slab(folio); - - ____kasan_slab_free(slab->slab_cache, ptr, ip, false, false); + ____kasan_slab_free(page->slab_cache, ptr, ip, false, false); } } @@ -562,7 +560,7 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size, void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flags) { - struct slab *slab; + struct page *page; if (unlikely(object == ZERO_SIZE_PTR)) return (void *)object; @@ -574,13 +572,13 @@ void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flag */ kasan_unpoison(object, size, false); - slab = virt_to_slab(object); + page = virt_to_head_page(object); /* Piggy-back on kmalloc() instrumentation to poison the redzone. */ - if (unlikely(!slab)) + if (unlikely(!PageSlab(page))) return __kasan_kmalloc_large(object, size, flags); else - return ____kasan_kmalloc(slab->slab_cache, object, size, flags); + return ____kasan_kmalloc(page->slab_cache, object, size, flags); } bool __kasan_check_byte(const void *address, unsigned long ip) diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index a25ad40906..c3f5ba7a29 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -328,34 +328,24 @@ DEFINE_ASAN_SET_SHADOW(f3); DEFINE_ASAN_SET_SHADOW(f5); DEFINE_ASAN_SET_SHADOW(f8); -static void __kasan_record_aux_stack(void *addr, bool can_alloc) +void kasan_record_aux_stack(void *addr) { - struct slab *slab = kasan_addr_to_slab(addr); + struct page *page = kasan_addr_to_page(addr); struct kmem_cache *cache; struct kasan_alloc_meta *alloc_meta; void *object; - if (is_kfence_address(addr) || !slab) + if (is_kfence_address(addr) || !(page && PageSlab(page))) return; - cache = slab->slab_cache; - object = nearest_obj(cache, slab, addr); + cache = page->slab_cache; + object = nearest_obj(cache, page, addr); alloc_meta = kasan_get_alloc_meta(cache, object); if (!alloc_meta) return; alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0]; - alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT, can_alloc); -} - -void kasan_record_aux_stack(void *addr) -{ - return __kasan_record_aux_stack(addr, true); -} - -void kasan_record_aux_stack_noalloc(void *addr) -{ - return __kasan_record_aux_stack(addr, false); + alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT); } void kasan_set_free_info(struct kmem_cache *cache, diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 7355cb534e..05d1e9460e 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -29,7 +29,6 @@ enum kasan_arg_mode { KASAN_ARG_MODE_DEFAULT, KASAN_ARG_MODE_SYNC, KASAN_ARG_MODE_ASYNC, - KASAN_ARG_MODE_ASYMM, }; enum kasan_arg_stacktrace { @@ -46,9 +45,9 @@ static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init; DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled); EXPORT_SYMBOL(kasan_flag_enabled); -/* Whether the selected mode is synchronous/asynchronous/asymmetric.*/ -enum kasan_mode kasan_mode __ro_after_init; -EXPORT_SYMBOL_GPL(kasan_mode); +/* Whether the asynchronous mode is enabled. */ +bool kasan_flag_async __ro_after_init; +EXPORT_SYMBOL_GPL(kasan_flag_async); /* Whether to collect alloc/free stack traces. */ DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace); @@ -70,7 +69,7 @@ static int __init early_kasan_flag(char *arg) } early_param("kasan", early_kasan_flag); -/* kasan.mode=sync/async/asymm */ +/* kasan.mode=sync/async */ static int __init early_kasan_mode(char *arg) { if (!arg) @@ -80,8 +79,6 @@ static int __init early_kasan_mode(char *arg) kasan_arg_mode = KASAN_ARG_MODE_SYNC; else if (!strcmp(arg, "async")) kasan_arg_mode = KASAN_ARG_MODE_ASYNC; - else if (!strcmp(arg, "asymm")) - kasan_arg_mode = KASAN_ARG_MODE_ASYMM; else return -EINVAL; @@ -106,16 +103,6 @@ static int __init early_kasan_flag_stacktrace(char *arg) } early_param("kasan.stacktrace", early_kasan_flag_stacktrace); -static inline const char *kasan_mode_info(void) -{ - if (kasan_mode == KASAN_MODE_ASYNC) - return "async"; - else if (kasan_mode == KASAN_MODE_ASYMM) - return "asymm"; - else - return "sync"; -} - /* kasan_init_hw_tags_cpu() is called for each CPU. */ void kasan_init_hw_tags_cpu(void) { @@ -129,13 +116,11 @@ void kasan_init_hw_tags_cpu(void) return; /* - * Enable async or asymm modes only when explicitly requested - * through the command line. + * Enable async mode only when explicitly requested through + * the command line. */ if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC) hw_enable_tagging_async(); - else if (kasan_arg_mode == KASAN_ARG_MODE_ASYMM) - hw_enable_tagging_asymm(); else hw_enable_tagging_sync(); } @@ -158,19 +143,15 @@ void __init kasan_init_hw_tags(void) case KASAN_ARG_MODE_DEFAULT: /* * Default to sync mode. + * Do nothing, kasan_flag_async keeps its default value. */ - fallthrough; + break; case KASAN_ARG_MODE_SYNC: - /* Sync mode enabled. */ - kasan_mode = KASAN_MODE_SYNC; + /* Do nothing, kasan_flag_async keeps its default value. */ break; case KASAN_ARG_MODE_ASYNC: /* Async mode enabled. */ - kasan_mode = KASAN_MODE_ASYNC; - break; - case KASAN_ARG_MODE_ASYMM: - /* Asymm mode enabled. */ - kasan_mode = KASAN_MODE_ASYMM; + kasan_flag_async = true; break; } @@ -187,9 +168,7 @@ void __init kasan_init_hw_tags(void) break; } - pr_info("KernelAddressSanitizer initialized (hw-tags, mode=%s, stacktrace=%s)\n", - kasan_mode_info(), - kasan_stack_collection_enabled() ? "on" : "off"); + pr_info("KernelAddressSanitizer initialized\n"); } void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index c17fa8d26f..8bf568a80e 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -13,28 +13,16 @@ #include "../slab.h" DECLARE_STATIC_KEY_FALSE(kasan_flag_stacktrace); - -enum kasan_mode { - KASAN_MODE_SYNC, - KASAN_MODE_ASYNC, - KASAN_MODE_ASYMM, -}; - -extern enum kasan_mode kasan_mode __ro_after_init; +extern bool kasan_flag_async __ro_after_init; static inline bool kasan_stack_collection_enabled(void) { return static_branch_unlikely(&kasan_flag_stacktrace); } -static inline bool kasan_async_fault_possible(void) +static inline bool kasan_async_mode_enabled(void) { - return kasan_mode == KASAN_MODE_ASYNC || kasan_mode == KASAN_MODE_ASYMM; -} - -static inline bool kasan_sync_fault_possible(void) -{ - return kasan_mode == KASAN_MODE_SYNC || kasan_mode == KASAN_MODE_ASYMM; + return kasan_flag_async; } #else @@ -43,18 +31,15 @@ static inline bool kasan_stack_collection_enabled(void) return true; } -static inline bool kasan_async_fault_possible(void) +static inline bool kasan_async_mode_enabled(void) { return false; } -static inline bool kasan_sync_fault_possible(void) -{ - return true; -} - #endif +extern bool kasan_flag_async __ro_after_init; + #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #define KASAN_GRANULE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) #else @@ -265,9 +250,8 @@ bool kasan_report(unsigned long addr, size_t size, void kasan_report_invalid_free(void *object, unsigned long ip); struct page *kasan_addr_to_page(const void *addr); -struct slab *kasan_addr_to_slab(const void *addr); -depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc); +depot_stack_handle_t kasan_save_stack(gfp_t flags); void kasan_set_track(struct kasan_track *track, gfp_t flags); void kasan_set_free_info(struct kmem_cache *cache, void *object, u8 tag); struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, @@ -305,9 +289,6 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #ifndef arch_enable_tagging_async #define arch_enable_tagging_async() #endif -#ifndef arch_enable_tagging_asymm -#define arch_enable_tagging_asymm() -#endif #ifndef arch_force_async_tag_fault #define arch_force_async_tag_fault() #endif @@ -323,7 +304,6 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #define hw_enable_tagging_sync() arch_enable_tagging_sync() #define hw_enable_tagging_async() arch_enable_tagging_async() -#define hw_enable_tagging_asymm() arch_enable_tagging_asymm() #define hw_force_async_tag_fault() arch_force_async_tag_fault() #define hw_get_random_tag() arch_get_random_tag() #define hw_get_mem_tag(addr) arch_get_mem_tag(addr) @@ -334,7 +314,6 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #define hw_enable_tagging_sync() #define hw_enable_tagging_async() -#define hw_enable_tagging_asymm() #endif /* CONFIG_KASAN_HW_TAGS */ diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 08291ed33e..47ed4fc33a 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -117,7 +117,7 @@ static unsigned long quarantine_batch_size; static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink) { - return virt_to_slab(qlink)->slab_cache; + return virt_to_head_page(qlink)->slab_cache; } static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 3ad9624dcc..884a950c70 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -112,7 +112,7 @@ static void start_report(unsigned long *flags) static void end_report(unsigned long *flags, unsigned long addr) { - if (!kasan_async_fault_possible()) + if (!kasan_async_mode_enabled()) trace_error_report_end(ERROR_DETECTOR_KASAN, addr); pr_err("==================================================================\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); @@ -132,11 +132,20 @@ static void end_report(unsigned long *flags, unsigned long addr) kasan_enable_current(); } +static void print_stack(depot_stack_handle_t stack) +{ + unsigned long *entries; + unsigned int nr_entries; + + nr_entries = stack_depot_fetch(stack, &entries); + stack_trace_print(entries, nr_entries, 0); +} + static void print_track(struct kasan_track *track, const char *prefix) { pr_err("%s by task %u:\n", prefix, track->pid); if (track->stack) { - stack_depot_print(track->stack); + print_stack(track->stack); } else { pr_err("(stack is not available)\n"); } @@ -150,14 +159,6 @@ struct page *kasan_addr_to_page(const void *addr) return NULL; } -struct slab *kasan_addr_to_slab(const void *addr) -{ - if ((addr >= (void *)PAGE_OFFSET) && - (addr < high_memory)) - return virt_to_slab(addr); - return NULL; -} - static void describe_object_addr(struct kmem_cache *cache, void *object, const void *addr) { @@ -213,12 +214,12 @@ static void describe_object_stacks(struct kmem_cache *cache, void *object, return; if (alloc_meta->aux_stack[0]) { pr_err("Last potentially related work creation:\n"); - stack_depot_print(alloc_meta->aux_stack[0]); + print_stack(alloc_meta->aux_stack[0]); pr_err("\n"); } if (alloc_meta->aux_stack[1]) { pr_err("Second to last potentially related work creation:\n"); - stack_depot_print(alloc_meta->aux_stack[1]); + print_stack(alloc_meta->aux_stack[1]); pr_err("\n"); } #endif @@ -234,7 +235,7 @@ static void describe_object(struct kmem_cache *cache, void *object, static inline bool kernel_or_module_addr(const void *addr) { - if (is_kernel((unsigned long)addr)) + if (addr >= (void *)_stext && addr < (void *)_end) return true; if (is_module_address((unsigned long)addr)) return true; @@ -256,9 +257,8 @@ static void print_address_description(void *addr, u8 tag) pr_err("\n"); if (page && PageSlab(page)) { - struct slab *slab = page_slab(page); - struct kmem_cache *cache = slab->slab_cache; - void *object = nearest_obj(cache, slab, addr); + struct kmem_cache *cache = page->slab_cache; + void *object = nearest_obj(cache, page, addr); describe_object(cache, object, addr, tag); } diff --git a/mm/kasan/report_tags.c b/mm/kasan/report_tags.c index 1b41de88c5..8a319fc16d 100644 --- a/mm/kasan/report_tags.c +++ b/mm/kasan/report_tags.c @@ -12,7 +12,7 @@ const char *kasan_get_bug_type(struct kasan_access_info *info) #ifdef CONFIG_KASAN_TAGS_IDENTIFY struct kasan_alloc_meta *alloc_meta; struct kmem_cache *cache; - struct slab *slab; + struct page *page; const void *addr; void *object; u8 tag; @@ -20,10 +20,10 @@ const char *kasan_get_bug_type(struct kasan_access_info *info) tag = get_tag(info->access_addr); addr = kasan_reset_tag(info->access_addr); - slab = kasan_addr_to_slab(addr); - if (slab) { - cache = slab->slab_cache; - object = nearest_obj(cache, slab, (void *)addr); + page = kasan_addr_to_page(addr); + if (page && PageSlab(page)) { + cache = page->slab_cache; + object = nearest_obj(cache, page, (void *)addr); alloc_meta = kasan_get_alloc_meta(cache, object); if (alloc_meta) { diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 94136f84b4..dd79840e60 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -254,11 +254,6 @@ core_initcall(kasan_memhotplug_init); #ifdef CONFIG_KASAN_VMALLOC -void __init __weak kasan_populate_early_vm_area_shadow(void *start, - unsigned long size) -{ -} - static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, void *unused) { diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index 77f13f391b..bd3f540feb 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -42,7 +42,7 @@ void __init kasan_init_sw_tags(void) for_each_possible_cpu(cpu) per_cpu(prng_state, cpu) = (u32)get_cycles(); - pr_info("KernelAddressSanitizer initialized (sw-tags)\n"); + pr_info("KernelAddressSanitizer initialized\n"); } /* diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 13128fa130..84555b8233 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -10,15 +10,12 @@ #include #include #include -#include #include -#include #include #include #include #include #include -#include #include #include #include @@ -47,8 +44,7 @@ static bool kfence_enabled __read_mostly; -unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL; -EXPORT_SYMBOL_GPL(kfence_sample_interval); /* Export for test modules. */ +static unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL; #ifdef MODULE_PARAM_PREFIX #undef MODULE_PARAM_PREFIX @@ -86,10 +82,6 @@ static const struct kernel_param_ops sample_interval_param_ops = { }; module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600); -/* Pool usage% threshold when currently covered allocations are skipped. */ -static unsigned long kfence_skip_covered_thresh __read_mostly = 75; -module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644); - /* The pool of pages used for guard pages and objects. */ char *__kfence_pool __ro_after_init; EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */ @@ -114,32 +106,6 @@ DEFINE_STATIC_KEY_FALSE(kfence_allocation_key); /* Gates the allocation, ensuring only one succeeds in a given period. */ atomic_t kfence_allocation_gate = ATOMIC_INIT(1); -/* - * A Counting Bloom filter of allocation coverage: limits currently covered - * allocations of the same source filling up the pool. - * - * Assuming a range of 15%-85% unique allocations in the pool at any point in - * time, the below parameters provide a probablity of 0.02-0.33 for false - * positive hits respectively: - * - * P(alloc_traces) = (1 - e^(-HNUM * (alloc_traces / SIZE)) ^ HNUM - */ -#define ALLOC_COVERED_HNUM 2 -#define ALLOC_COVERED_ORDER (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2) -#define ALLOC_COVERED_SIZE (1 << ALLOC_COVERED_ORDER) -#define ALLOC_COVERED_HNEXT(h) hash_32(h, ALLOC_COVERED_ORDER) -#define ALLOC_COVERED_MASK (ALLOC_COVERED_SIZE - 1) -static atomic_t alloc_covered[ALLOC_COVERED_SIZE]; - -/* Stack depth used to determine uniqueness of an allocation. */ -#define UNIQUE_ALLOC_STACK_DEPTH ((size_t)8) - -/* - * Randomness for stack hashes, making the same collisions across reboots and - * different machines less likely. - */ -static u32 stack_hash_seed __ro_after_init; - /* Statistics counters for debugfs. */ enum kfence_counter_id { KFENCE_COUNTER_ALLOCATED, @@ -147,9 +113,6 @@ enum kfence_counter_id { KFENCE_COUNTER_FREES, KFENCE_COUNTER_ZOMBIES, KFENCE_COUNTER_BUGS, - KFENCE_COUNTER_SKIP_INCOMPAT, - KFENCE_COUNTER_SKIP_CAPACITY, - KFENCE_COUNTER_SKIP_COVERED, KFENCE_COUNTER_COUNT, }; static atomic_long_t counters[KFENCE_COUNTER_COUNT]; @@ -159,59 +122,11 @@ static const char *const counter_names[] = { [KFENCE_COUNTER_FREES] = "total frees", [KFENCE_COUNTER_ZOMBIES] = "zombie allocations", [KFENCE_COUNTER_BUGS] = "total bugs", - [KFENCE_COUNTER_SKIP_INCOMPAT] = "skipped allocations (incompatible)", - [KFENCE_COUNTER_SKIP_CAPACITY] = "skipped allocations (capacity)", - [KFENCE_COUNTER_SKIP_COVERED] = "skipped allocations (covered)", }; static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT); /* === Internals ============================================================ */ -static inline bool should_skip_covered(void) -{ - unsigned long thresh = (CONFIG_KFENCE_NUM_OBJECTS * kfence_skip_covered_thresh) / 100; - - return atomic_long_read(&counters[KFENCE_COUNTER_ALLOCATED]) > thresh; -} - -static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries) -{ - num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH); - num_entries = filter_irq_stacks(stack_entries, num_entries); - return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), stack_hash_seed); -} - -/* - * Adds (or subtracts) count @val for allocation stack trace hash - * @alloc_stack_hash from Counting Bloom filter. - */ -static void alloc_covered_add(u32 alloc_stack_hash, int val) -{ - int i; - - for (i = 0; i < ALLOC_COVERED_HNUM; i++) { - atomic_add(val, &alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]); - alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash); - } -} - -/* - * Returns true if the allocation stack trace hash @alloc_stack_hash is - * currently contained (non-zero count) in Counting Bloom filter. - */ -static bool alloc_covered_contains(u32 alloc_stack_hash) -{ - int i; - - for (i = 0; i < ALLOC_COVERED_HNUM; i++) { - if (!atomic_read(&alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK])) - return false; - alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash); - } - - return true; -} - static bool kfence_protect(unsigned long addr) { return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true)); @@ -269,26 +184,19 @@ static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *m * Update the object's metadata state, including updating the alloc/free stacks * depending on the state transition. */ -static noinline void -metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state next, - unsigned long *stack_entries, size_t num_stack_entries) +static noinline void metadata_update_state(struct kfence_metadata *meta, + enum kfence_object_state next) { struct kfence_track *track = next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track; lockdep_assert_held(&meta->lock); - if (stack_entries) { - memcpy(track->stack_entries, stack_entries, - num_stack_entries * sizeof(stack_entries[0])); - } else { - /* - * Skip over 1 (this) functions; noinline ensures we do not - * accidentally skip over the caller by never inlining. - */ - num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1); - } - track->num_stack_entries = num_stack_entries; + /* + * Skip over 1 (this) functions; noinline ensures we do not accidentally + * skip over the caller by never inlining. + */ + track->num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1); track->pid = task_pid_nr(current); track->cpu = raw_smp_processor_id(); track->ts_nsec = local_clock(); /* Same source as printk timestamps. */ @@ -311,19 +219,12 @@ static inline bool set_canary_byte(u8 *addr) /* Check canary byte at @addr. */ static inline bool check_canary_byte(u8 *addr) { - struct kfence_metadata *meta; - unsigned long flags; - if (likely(*addr == KFENCE_CANARY_PATTERN(addr))) return true; atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]); - - meta = addr_to_metadata((unsigned long)addr); - raw_spin_lock_irqsave(&meta->lock, flags); - kfence_report_error((unsigned long)addr, false, NULL, meta, KFENCE_ERROR_CORRUPTION); - raw_spin_unlock_irqrestore(&meta->lock, flags); - + kfence_report_error((unsigned long)addr, false, NULL, addr_to_metadata((unsigned long)addr), + KFENCE_ERROR_CORRUPTION); return false; } @@ -333,6 +234,8 @@ static __always_inline void for_each_canary(const struct kfence_metadata *meta, const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE); unsigned long addr; + lockdep_assert_held(&meta->lock); + /* * We'll iterate over each canary byte per-side until fn() returns * false. However, we'll still iterate over the canary bytes to the @@ -355,13 +258,11 @@ static __always_inline void for_each_canary(const struct kfence_metadata *meta, } } -static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp, - unsigned long *stack_entries, size_t num_stack_entries, - u32 alloc_stack_hash) +static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp) { struct kfence_metadata *meta = NULL; unsigned long flags; - struct slab *slab; + struct page *page; void *addr; /* Try to obtain a free object. */ @@ -371,10 +272,8 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g list_del_init(&meta->list); } raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags); - if (!meta) { - atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_CAPACITY]); + if (!meta) return NULL; - } if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) { /* @@ -416,26 +315,23 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g addr = (void *)meta->addr; /* Update remaining metadata. */ - metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED, stack_entries, num_stack_entries); + metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED); /* Pairs with READ_ONCE() in kfence_shutdown_cache(). */ WRITE_ONCE(meta->cache, cache); meta->size = size; - meta->alloc_stack_hash = alloc_stack_hash; + for_each_canary(meta, set_canary_byte); + + /* Set required struct page fields. */ + page = virt_to_page(meta->addr); + page->slab_cache = cache; + if (IS_ENABLED(CONFIG_SLUB)) + page->objects = 1; + if (IS_ENABLED(CONFIG_SLAB)) + page->s_mem = addr; + raw_spin_unlock_irqrestore(&meta->lock, flags); - alloc_covered_add(alloc_stack_hash, 1); - - /* Set required slab fields. */ - slab = virt_to_slab((void *)meta->addr); - slab->slab_cache = cache; -#if defined(CONFIG_SLUB) - slab->objects = 1; -#elif defined(CONFIG_SLAB) - slab->s_mem = addr; -#endif - /* Memory initialization. */ - for_each_canary(meta, set_canary_byte); /* * We check slab_want_init_on_alloc() ourselves, rather than letting @@ -460,7 +356,6 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z { struct kcsan_scoped_access assert_page_exclusive; unsigned long flags; - bool init; raw_spin_lock_irqsave(&meta->lock, flags); @@ -488,13 +383,6 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z meta->unprotected_page = 0; } - /* Mark the object as freed. */ - metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0); - init = slab_want_init_on_free(meta->cache); - raw_spin_unlock_irqrestore(&meta->lock, flags); - - alloc_covered_add(meta->alloc_stack_hash, -1); - /* Check canary bytes for memory corruption. */ for_each_canary(meta, check_canary_byte); @@ -503,9 +391,14 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z * data is still there, and after a use-after-free is detected, we * unprotect the page, so the data is still accessible. */ - if (!zombie && unlikely(init)) + if (!zombie && unlikely(slab_want_init_on_free(meta->cache))) memzero_explicit(addr, meta->size); + /* Mark the object as freed. */ + metadata_update_state(meta, KFENCE_OBJECT_FREED); + + raw_spin_unlock_irqrestore(&meta->lock, flags); + /* Protect to detect use-after-frees. */ kfence_protect((unsigned long)addr); @@ -772,7 +665,6 @@ void __init kfence_init(void) if (!kfence_sample_interval) return; - stack_hash_seed = (u32)random_get_entropy(); if (!kfence_init_pool()) { pr_err("%s failed\n", __func__); return; @@ -848,18 +740,12 @@ void kfence_shutdown_cache(struct kmem_cache *s) void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { - unsigned long stack_entries[KFENCE_STACK_DEPTH]; - size_t num_stack_entries; - u32 alloc_stack_hash; - /* * Perform size check before switching kfence_allocation_gate, so that * we don't disable KFENCE without making an allocation. */ - if (size > PAGE_SIZE) { - atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); + if (size > PAGE_SIZE) return NULL; - } /* * Skip allocations from non-default zones, including DMA. We cannot @@ -867,10 +753,8 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) * properties (e.g. reside in DMAable memory). */ if ((flags & GFP_ZONEMASK) || - (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) { - atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); + (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) return NULL; - } if (atomic_inc_return(&kfence_allocation_gate) > 1) return NULL; @@ -891,25 +775,7 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) if (!READ_ONCE(kfence_enabled)) return NULL; - num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 0); - - /* - * Do expensive check for coverage of allocation in slow-path after - * allocation_gate has already become non-zero, even though it might - * mean not making any allocation within a given sample interval. - * - * This ensures reasonable allocation coverage when the pool is almost - * full, including avoiding long-lived allocations of the same source - * filling up the pool (e.g. pagecache allocations). - */ - alloc_stack_hash = get_alloc_stack_hash(stack_entries, num_stack_entries); - if (should_skip_covered() && alloc_covered_contains(alloc_stack_hash)) { - atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_COVERED]); - return NULL; - } - - return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries, - alloc_stack_hash); + return kfence_guarded_alloc(s, size, flags); } size_t kfence_ksize(const void *addr) diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h index 2a2d5de9d3..c1f23c61e5 100644 --- a/mm/kfence/kfence.h +++ b/mm/kfence/kfence.h @@ -87,8 +87,6 @@ struct kfence_metadata { /* Allocation and free stack information. */ struct kfence_track alloc_track; struct kfence_track free_track; - /* For updating alloc_covered on frees. */ - u32 alloc_stack_hash; }; extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS]; diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index 50dbb815a2..f1690cf541 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -32,11 +32,6 @@ #define arch_kfence_test_address(addr) (addr) #endif -#define KFENCE_TEST_REQUIRES(test, cond) do { \ - if (!(cond)) \ - kunit_skip((test), "Test requires: " #cond); \ -} while (0) - /* Report as observed from console. */ static struct { spinlock_t lock; @@ -268,13 +263,13 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat * 100x the sample interval should be more than enough to ensure we get * a KFENCE allocation eventually. */ - timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval); + timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL); /* * Especially for non-preemption kernels, ensure the allocation-gate * timer can catch up: after @resched_after, every failed allocation * attempt yields, to ensure the allocation-gate timer is scheduled. */ - resched_after = jiffies + msecs_to_jiffies(kfence_sample_interval); + resched_after = jiffies + msecs_to_jiffies(CONFIG_KFENCE_SAMPLE_INTERVAL); do { if (test_cache) alloc = kmem_cache_alloc(test_cache, gfp); @@ -282,7 +277,7 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat alloc = kmalloc(size, gfp); if (is_kfence_address(alloc)) { - struct slab *slab = virt_to_slab(alloc); + struct page *page = virt_to_head_page(alloc); struct kmem_cache *s = test_cache ?: kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)]; @@ -291,8 +286,8 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat * even for KFENCE objects; these are required so that * memcg accounting works correctly. */ - KUNIT_EXPECT_EQ(test, obj_to_index(s, slab, alloc), 0U); - KUNIT_EXPECT_EQ(test, objs_per_slab(s, slab), 1); + KUNIT_EXPECT_EQ(test, obj_to_index(s, page, alloc), 0U); + KUNIT_EXPECT_EQ(test, objs_per_slab_page(s, page), 1); if (policy == ALLOCATE_ANY) return alloc; @@ -560,7 +555,8 @@ static void test_init_on_free(struct kunit *test) }; int i; - KFENCE_TEST_REQUIRES(test, IS_ENABLED(CONFIG_INIT_ON_FREE_DEFAULT_ON)); + if (!IS_ENABLED(CONFIG_INIT_ON_FREE_DEFAULT_ON)) + return; /* Assume it hasn't been disabled on command line. */ setup_test_cache(test, size, 0, NULL); @@ -607,8 +603,10 @@ static void test_gfpzero(struct kunit *test) char *buf1, *buf2; int i; - /* Skip if we think it'd take too long. */ - KFENCE_TEST_REQUIRES(test, kfence_sample_interval <= 100); + if (CONFIG_KFENCE_SAMPLE_INTERVAL > 100) { + kunit_warn(test, "skipping ... would take too long\n"); + return; + } setup_test_cache(test, size, 0, NULL); buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY); @@ -739,7 +737,7 @@ static void test_memcache_alloc_bulk(struct kunit *test) * 100x the sample interval should be more than enough to ensure we get * a KFENCE allocation eventually. */ - timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval); + timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL); do { void *objects[100]; int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects), diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 131492fd11..8a8b3aa929 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -619,7 +618,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, continue; } else { result = SCAN_EXCEED_NONE_PTE; - count_vm_event(THP_SCAN_EXCEED_NONE_PTE); goto out; } } @@ -638,7 +636,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, if (page_mapcount(page) > 1 && ++shared > khugepaged_max_ptes_shared) { result = SCAN_EXCEED_SHARED_PTE; - count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); goto out; } @@ -684,7 +681,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, goto out; } if (!pte_write(pteval) && PageSwapCache(page) && - !reuse_swap_page(page)) { + !reuse_swap_page(page, NULL)) { /* * Page is in the swap cache and cannot be re-used. * It cannot be collapsed into a THP. @@ -759,7 +756,11 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, * ptl mostly unnecessary. */ spin_lock(ptl); - ptep_clear(vma->vm_mm, address, _pte); + /* + * paravirt calls inside pte_clear here are + * superfluous. + */ + pte_clear(vma->vm_mm, address, _pte); spin_unlock(ptl); } } else { @@ -773,7 +774,11 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, * inside page_remove_rmap(). */ spin_lock(ptl); - ptep_clear(vma->vm_mm, address, _pte); + /* + * paravirt calls inside pte_clear here are + * superfluous. + */ + pte_clear(vma->vm_mm, address, _pte); page_remove_rmap(src_page, false); spin_unlock(ptl); free_page_and_swap_cache(src_page); @@ -1085,7 +1090,7 @@ static void collapse_huge_page(struct mm_struct *mm, goto out_nolock; } - if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) { + if (unlikely(mem_cgroup_charge(new_page, mm, gfp))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out_nolock; } @@ -1209,7 +1214,7 @@ static void collapse_huge_page(struct mm_struct *mm, mmap_write_unlock(mm); out_nolock: if (!IS_ERR_OR_NULL(*hpage)) - mem_cgroup_uncharge(page_folio(*hpage)); + mem_cgroup_uncharge(*hpage); trace_mm_collapse_huge_page(mm, isolated, result); return; } @@ -1256,7 +1261,6 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, continue; } else { result = SCAN_EXCEED_SWAP_PTE; - count_vm_event(THP_SCAN_EXCEED_SWAP_PTE); goto out_unmap; } } @@ -1266,7 +1270,6 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, continue; } else { result = SCAN_EXCEED_NONE_PTE; - count_vm_event(THP_SCAN_EXCEED_NONE_PTE); goto out_unmap; } } @@ -1295,7 +1298,6 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, if (page_mapcount(page) > 1 && ++shared > khugepaged_max_ptes_shared) { result = SCAN_EXCEED_SHARED_PTE; - count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); goto out_unmap; } @@ -1304,7 +1306,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, /* * Record which node the original page is from and save this * information to khugepaged_node_load[]. - * Khugepaged will allocate hugepage from the node has the max + * Khupaged will allocate hugepage from the node has the max * hit record. */ node = page_to_nid(page); @@ -1417,21 +1419,6 @@ static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm, return 0; } -static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, pmd_t *pmdp) -{ - spinlock_t *ptl; - pmd_t pmd; - - mmap_assert_write_locked(mm); - ptl = pmd_lock(vma->vm_mm, pmdp); - pmd = pmdp_collapse_flush(vma, addr, pmdp); - spin_unlock(ptl); - mm_dec_nr_ptes(mm); - page_table_check_pte_clear_range(mm, addr, pmd); - pte_free(mm, pmd_pgtable(pmd)); -} - /** * collapse_pte_mapped_thp - Try to collapse a pte-mapped THP for mm at * address haddr. @@ -1449,7 +1436,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) struct vm_area_struct *vma = find_vma(mm, haddr); struct page *hpage; pte_t *start_pte, *pte; - pmd_t *pmd; + pmd_t *pmd, _pmd; spinlock_t *ptl; int count = 0; int i; @@ -1525,7 +1512,12 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) } /* step 4: collapse pmd */ - collapse_and_free_pmd(mm, vma, haddr, pmd); + ptl = pmd_lock(vma->vm_mm, pmd); + _pmd = pmdp_collapse_flush(vma, haddr, pmd); + spin_unlock(ptl); + mm_dec_nr_ptes(mm); + pte_free(mm, pmd_pgtable(_pmd)); + drop_hpage: unlock_page(hpage); put_page(hpage); @@ -1563,7 +1555,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) struct vm_area_struct *vma; struct mm_struct *mm; unsigned long addr; - pmd_t *pmd; + pmd_t *pmd, _pmd; i_mmap_lock_write(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { @@ -1602,8 +1594,14 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * reverse order. Trylock is a way to avoid deadlock. */ if (mmap_write_trylock(mm)) { - if (!khugepaged_test_exit(mm)) - collapse_and_free_pmd(mm, vma, addr, pmd); + if (!khugepaged_test_exit(mm)) { + spinlock_t *ptl = pmd_lock(mm, pmd); + /* assume page table is clear */ + _pmd = pmdp_collapse_flush(vma, addr, pmd); + spin_unlock(ptl); + mm_dec_nr_ptes(mm); + pte_free(mm, pmd_pgtable(_pmd)); + } mmap_write_unlock(mm); } else { /* Try again later */ @@ -1663,16 +1661,13 @@ static void collapse_file(struct mm_struct *mm, goto out; } - if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) { + if (unlikely(mem_cgroup_charge(new_page, mm, gfp))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out; } count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC); - /* - * Ensure we have slots for all the pages in the range. This is - * almost certainly a no-op because most of the pages must be present - */ + /* This will be less messy when we use multi-index entries */ do { xas_lock_irq(&xas); xas_create_range(&xas); @@ -1897,9 +1892,6 @@ static void collapse_file(struct mm_struct *mm, __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); } - /* Join all the small entries into a single multi-index entry */ - xas_set_order(&xas, start, HPAGE_PMD_ORDER); - xas_store(&xas, new_page); xa_locked: xas_unlock_irq(&xas); xa_unlocked: @@ -1991,7 +1983,7 @@ static void collapse_file(struct mm_struct *mm, out: VM_BUG_ON(!list_empty(&pagelist)); if (!IS_ERR_OR_NULL(*hpage)) - mem_cgroup_uncharge(page_folio(*hpage)); + mem_cgroup_uncharge(*hpage); /* TODO: tracepoints */ } @@ -2016,16 +2008,11 @@ static void khugepaged_scan_file(struct mm_struct *mm, if (xa_is_value(page)) { if (++swap > khugepaged_max_ptes_swap) { result = SCAN_EXCEED_SWAP_PTE; - count_vm_event(THP_SCAN_EXCEED_SWAP_PTE); break; } continue; } - /* - * XXX: khugepaged should compact smaller compound pages - * into a PMD sized page - */ if (PageTransCompound(page)) { result = SCAN_PAGE_COMPOUND; break; @@ -2067,7 +2054,6 @@ static void khugepaged_scan_file(struct mm_struct *mm, if (result == SCAN_SUCCEED) { if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) { result = SCAN_EXCEED_NONE_PTE; - count_vm_event(THP_SCAN_EXCEED_NONE_PTE); } else { node = khugepaged_find_target_node(); collapse_file(mm, file, start, hpage, node); @@ -2313,11 +2299,6 @@ static void set_recommended_min_free_kbytes(void) int nr_zones = 0; unsigned long recommended_min; - if (!khugepaged_enabled()) { - calculate_min_free_kbytes(); - goto update_wmarks; - } - for_each_populated_zone(zone) { /* * We don't need to worry about fragmentation of @@ -2353,8 +2334,6 @@ static void set_recommended_min_free_kbytes(void) min_free_kbytes = recommended_min; } - -update_wmarks: setup_per_zone_wmarks(); } @@ -2376,11 +2355,12 @@ int start_stop_khugepaged(void) if (!list_empty(&khugepaged_scan.mm_head)) wake_up_interruptible(&khugepaged_wait); + + set_recommended_min_free_kbytes(); } else if (khugepaged_thread) { kthread_stop(khugepaged_thread); khugepaged_thread = NULL; } - set_recommended_min_free_kbytes(); fail: mutex_unlock(&khugepaged_mutex); return err; diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 7580baa76a..adbe5aa011 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -381,20 +381,15 @@ static void dump_object_info(struct kmemleak_object *object) static struct kmemleak_object *lookup_object(unsigned long ptr, int alias) { struct rb_node *rb = object_tree_root.rb_node; - unsigned long untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); while (rb) { - struct kmemleak_object *object; - unsigned long untagged_objp; - - object = rb_entry(rb, struct kmemleak_object, rb_node); - untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); - - if (untagged_ptr < untagged_objp) + struct kmemleak_object *object = + rb_entry(rb, struct kmemleak_object, rb_node); + if (ptr < object->pointer) rb = object->rb_node.rb_left; - else if (untagged_objp + object->size <= untagged_ptr) + else if (object->pointer + object->size <= ptr) rb = object->rb_node.rb_right; - else if (untagged_objp == untagged_ptr || alias) + else if (object->pointer == ptr || alias) return object; else { kmemleak_warn("Found object by alias at 0x%08lx\n", @@ -581,7 +576,6 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, struct kmemleak_object *object, *parent; struct rb_node **link, *rb_parent; unsigned long untagged_ptr; - unsigned long untagged_objp; object = mem_pool_alloc(gfp); if (!object) { @@ -635,10 +629,9 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, while (*link) { rb_parent = *link; parent = rb_entry(rb_parent, struct kmemleak_object, rb_node); - untagged_objp = (unsigned long)kasan_reset_tag((void *)parent->pointer); - if (untagged_ptr + size <= untagged_objp) + if (ptr + size <= parent->pointer) link = &parent->rb_node.rb_left; - else if (untagged_objp + parent->size <= untagged_ptr) + else if (parent->pointer + parent->size <= ptr) link = &parent->rb_node.rb_right; else { kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n", diff --git a/mm/ksm.c b/mm/ksm.c index c20bd4d9a0..a5716fdec1 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -15,7 +15,6 @@ #include #include -#include #include #include #include @@ -752,7 +751,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node, /* * We come here from above when page->mapping or !PageSwapCache * suggests that the node is stale; but it might be under migration. - * We need smp_rmb(), matching the smp_wmb() in folio_migrate_ksm(), + * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(), * before checking whether node->kpfn has been changed. */ smp_rmb(); @@ -853,14 +852,9 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma, return err; } -static inline struct stable_node *folio_stable_node(struct folio *folio) -{ - return folio_test_ksm(folio) ? folio_raw_mapping(folio) : NULL; -} - static inline struct stable_node *page_stable_node(struct page *page) { - return folio_stable_node(page_folio(page)); + return PageKsm(page) ? page_rmapping(page) : NULL; } static inline void set_page_stable_node(struct page *page, @@ -2576,16 +2570,15 @@ struct page *ksm_might_need_to_copy(struct page *page, return page; /* no need to copy it */ } else if (!anon_vma) { return page; /* no need to copy it */ - } else if (page->index == linear_page_index(vma, address) && - anon_vma->root == vma->anon_vma->root) { + } else if (anon_vma->root == vma->anon_vma->root && + page->index == linear_page_index(vma, address)) { return page; /* still no need to copy it */ } if (!PageUptodate(page)) return page; /* let do_swap_page report the error */ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); - if (new_page && - mem_cgroup_charge(page_folio(new_page), vma->vm_mm, GFP_KERNEL)) { + if (new_page && mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL)) { put_page(new_page); new_page = NULL; } @@ -2665,26 +2658,26 @@ void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) } #ifdef CONFIG_MIGRATION -void folio_migrate_ksm(struct folio *newfolio, struct folio *folio) +void ksm_migrate_page(struct page *newpage, struct page *oldpage) { struct stable_node *stable_node; - VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - VM_BUG_ON_FOLIO(!folio_test_locked(newfolio), newfolio); - VM_BUG_ON_FOLIO(newfolio->mapping != folio->mapping, newfolio); + VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); + VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); + VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage); - stable_node = folio_stable_node(folio); + stable_node = page_stable_node(newpage); if (stable_node) { - VM_BUG_ON_FOLIO(stable_node->kpfn != folio_pfn(folio), folio); - stable_node->kpfn = folio_pfn(newfolio); + VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage); + stable_node->kpfn = page_to_pfn(newpage); /* - * newfolio->mapping was set in advance; now we need smp_wmb() + * newpage->mapping was set in advance; now we need smp_wmb() * to make sure that the new stable_node->kpfn is visible - * to get_ksm_page() before it can see that folio->mapping - * has gone stale (or that folio_test_swapcache has been cleared). + * to get_ksm_page() before it can see that oldpage->mapping + * has gone stale (or that PageSwapCache has been cleared). */ smp_wmb(); - set_page_stable_node(&folio->page, NULL); + set_page_stable_node(oldpage, NULL); } } #endif /* CONFIG_MIGRATION */ diff --git a/mm/list_lru.c b/mm/list_lru.c index 0cd5e89ca0..cd58790d0f 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -15,29 +15,18 @@ #include "slab.h" #ifdef CONFIG_MEMCG_KMEM -static LIST_HEAD(memcg_list_lrus); +static LIST_HEAD(list_lrus); static DEFINE_MUTEX(list_lrus_mutex); -static inline bool list_lru_memcg_aware(struct list_lru *lru) -{ - return lru->memcg_aware; -} - static void list_lru_register(struct list_lru *lru) { - if (!list_lru_memcg_aware(lru)) - return; - mutex_lock(&list_lrus_mutex); - list_add(&lru->list, &memcg_list_lrus); + list_add(&lru->list, &list_lrus); mutex_unlock(&list_lrus_mutex); } static void list_lru_unregister(struct list_lru *lru) { - if (!list_lru_memcg_aware(lru)) - return; - mutex_lock(&list_lrus_mutex); list_del(&lru->list); mutex_unlock(&list_lrus_mutex); @@ -48,6 +37,11 @@ static int lru_shrinker_id(struct list_lru *lru) return lru->shrinker_id; } +static inline bool list_lru_memcg_aware(struct list_lru *lru) +{ + return lru->memcg_aware; +} + static inline struct list_lru_one * list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) { @@ -182,16 +176,13 @@ unsigned long list_lru_count_one(struct list_lru *lru, { struct list_lru_node *nlru = &lru->node[nid]; struct list_lru_one *l; - long count; + unsigned long count; rcu_read_lock(); l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); count = READ_ONCE(l->nr_items); rcu_read_unlock(); - if (unlikely(count < 0)) - count = 0; - return count; } EXPORT_SYMBOL_GPL(list_lru_count_one); @@ -363,7 +354,8 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru) struct list_lru_memcg *memcg_lrus; int size = memcg_nr_cache_ids; - memcg_lrus = kvmalloc(struct_size(memcg_lrus, lru, size), GFP_KERNEL); + memcg_lrus = kvmalloc(sizeof(*memcg_lrus) + + size * sizeof(void *), GFP_KERNEL); if (!memcg_lrus) return -ENOMEM; @@ -397,7 +389,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, old = rcu_dereference_protected(nlru->memcg_lrus, lockdep_is_held(&list_lrus_mutex)); - new = kvmalloc(struct_size(new, lru, new_size), GFP_KERNEL); + new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL); if (!new) return -ENOMEM; @@ -406,8 +398,19 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, return -ENOMEM; } - memcpy(&new->lru, &old->lru, flex_array_size(new, lru, old_size)); + memcpy(&new->lru, &old->lru, old_size * sizeof(void *)); + + /* + * The locking below allows readers that hold nlru->lock avoid taking + * rcu_read_lock (see list_lru_from_memcg_idx). + * + * Since list_lru_{add,del} may be called under an IRQ-safe lock, + * we have to use IRQ-safe primitives here to avoid deadlock. + */ + spin_lock_irq(&nlru->lock); rcu_assign_pointer(nlru->memcg_lrus, new); + spin_unlock_irq(&nlru->lock); + kvfree_rcu(old, rcu); return 0; } @@ -463,6 +466,9 @@ static int memcg_update_list_lru(struct list_lru *lru, { int i; + if (!list_lru_memcg_aware(lru)) + return 0; + for_each_node(i) { if (memcg_update_list_lru_node(&lru->node[i], old_size, new_size)) @@ -485,6 +491,9 @@ static void memcg_cancel_update_list_lru(struct list_lru *lru, { int i; + if (!list_lru_memcg_aware(lru)) + return; + for_each_node(i) memcg_cancel_update_list_lru_node(&lru->node[i], old_size, new_size); @@ -497,7 +506,7 @@ int memcg_update_all_list_lrus(int new_size) int old_size = memcg_nr_cache_ids; mutex_lock(&list_lrus_mutex); - list_for_each_entry(lru, &memcg_list_lrus, list) { + list_for_each_entry(lru, &list_lrus, list) { ret = memcg_update_list_lru(lru, old_size, new_size); if (ret) goto fail; @@ -506,7 +515,7 @@ int memcg_update_all_list_lrus(int new_size) mutex_unlock(&list_lrus_mutex); return ret; fail: - list_for_each_entry_continue_reverse(lru, &memcg_list_lrus, list) + list_for_each_entry_continue_reverse(lru, &list_lrus, list) memcg_cancel_update_list_lru(lru, old_size, new_size); goto out; } @@ -543,6 +552,9 @@ static void memcg_drain_list_lru(struct list_lru *lru, { int i; + if (!list_lru_memcg_aware(lru)) + return; + for_each_node(i) memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg); } @@ -552,7 +564,7 @@ void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg) struct list_lru *lru; mutex_lock(&list_lrus_mutex); - list_for_each_entry(lru, &memcg_list_lrus, list) + list_for_each_entry(lru, &list_lrus, list) memcg_drain_list_lru(lru, src_idx, dst_memcg); mutex_unlock(&list_lrus_mutex); } diff --git a/mm/madvise.c b/mm/madvise.c index 5604064df4..0734db8d53 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -18,8 +18,6 @@ #include #include #include -#include -#include #include #include #include @@ -64,122 +62,83 @@ static int madvise_need_mmap_write(int behavior) } } -#ifdef CONFIG_ANON_VMA_NAME -static struct anon_vma_name *anon_vma_name_alloc(const char *name) -{ - struct anon_vma_name *anon_name; - size_t count; - - /* Add 1 for NUL terminator at the end of the anon_name->name */ - count = strlen(name) + 1; - anon_name = kmalloc(struct_size(anon_name, name, count), GFP_KERNEL); - if (anon_name) { - kref_init(&anon_name->kref); - memcpy(anon_name->name, name, count); - } - - return anon_name; -} - -static void vma_anon_name_free(struct kref *kref) -{ - struct anon_vma_name *anon_name = - container_of(kref, struct anon_vma_name, kref); - kfree(anon_name); -} - -static inline bool has_vma_anon_name(struct vm_area_struct *vma) -{ - return !vma->vm_file && vma->anon_name; -} - -const char *vma_anon_name(struct vm_area_struct *vma) -{ - if (!has_vma_anon_name(vma)) - return NULL; - - mmap_assert_locked(vma->vm_mm); - - return vma->anon_name->name; -} - -void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma) -{ - if (!has_vma_anon_name(orig_vma)) - return; - - kref_get(&orig_vma->anon_name->kref); - new_vma->anon_name = orig_vma->anon_name; -} - -void free_vma_anon_name(struct vm_area_struct *vma) -{ - struct anon_vma_name *anon_name; - - if (!has_vma_anon_name(vma)) - return; - - anon_name = vma->anon_name; - vma->anon_name = NULL; - kref_put(&anon_name->kref, vma_anon_name_free); -} - -/* mmap_lock should be write-locked */ -static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name) -{ - const char *anon_name; - - if (!name) { - free_vma_anon_name(vma); - return 0; - } - - anon_name = vma_anon_name(vma); - if (anon_name) { - /* Same name, nothing to do here */ - if (!strcmp(name, anon_name)) - return 0; - - free_vma_anon_name(vma); - } - vma->anon_name = anon_vma_name_alloc(name); - if (!vma->anon_name) - return -ENOMEM; - - return 0; -} -#else /* CONFIG_ANON_VMA_NAME */ -static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name) -{ - if (name) - return -EINVAL; - - return 0; -} -#endif /* CONFIG_ANON_VMA_NAME */ /* - * Update the vm_flags on region of a vma, splitting it or merging it as - * necessary. Must be called with mmap_sem held for writing; + * We can potentially split a vm area into separate + * areas, each area with its own behavior. */ -static int madvise_update_vma(struct vm_area_struct *vma, - struct vm_area_struct **prev, unsigned long start, - unsigned long end, unsigned long new_flags, - const char *name) +static long madvise_behavior(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end, int behavior) { struct mm_struct *mm = vma->vm_mm; - int error; + int error = 0; pgoff_t pgoff; + unsigned long new_flags = vma->vm_flags; - if (new_flags == vma->vm_flags && is_same_vma_anon_name(vma, name)) { + switch (behavior) { + case MADV_NORMAL: + new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; + break; + case MADV_SEQUENTIAL: + new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; + break; + case MADV_RANDOM: + new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; + break; + case MADV_DONTFORK: + new_flags |= VM_DONTCOPY; + break; + case MADV_DOFORK: + if (vma->vm_flags & VM_IO) { + error = -EINVAL; + goto out; + } + new_flags &= ~VM_DONTCOPY; + break; + case MADV_WIPEONFORK: + /* MADV_WIPEONFORK is only supported on anonymous memory. */ + if (vma->vm_file || vma->vm_flags & VM_SHARED) { + error = -EINVAL; + goto out; + } + new_flags |= VM_WIPEONFORK; + break; + case MADV_KEEPONFORK: + new_flags &= ~VM_WIPEONFORK; + break; + case MADV_DONTDUMP: + new_flags |= VM_DONTDUMP; + break; + case MADV_DODUMP: + if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) { + error = -EINVAL; + goto out; + } + new_flags &= ~VM_DONTDUMP; + break; + case MADV_MERGEABLE: + case MADV_UNMERGEABLE: + error = ksm_madvise(vma, start, end, behavior, &new_flags); + if (error) + goto out_convert_errno; + break; + case MADV_HUGEPAGE: + case MADV_NOHUGEPAGE: + error = hugepage_madvise(vma, &new_flags, behavior); + if (error) + goto out_convert_errno; + break; + } + + if (new_flags == vma->vm_flags) { *prev = vma; - return 0; + goto out; } pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, name); + vma->vm_userfaultfd_ctx); if (*prev) { vma = *prev; goto success; @@ -188,19 +147,23 @@ static int madvise_update_vma(struct vm_area_struct *vma, *prev = vma; if (start != vma->vm_start) { - if (unlikely(mm->map_count >= sysctl_max_map_count)) - return -ENOMEM; + if (unlikely(mm->map_count >= sysctl_max_map_count)) { + error = -ENOMEM; + goto out; + } error = __split_vma(mm, vma, start, 1); if (error) - return error; + goto out_convert_errno; } if (end != vma->vm_end) { - if (unlikely(mm->map_count >= sysctl_max_map_count)) - return -ENOMEM; + if (unlikely(mm->map_count >= sysctl_max_map_count)) { + error = -ENOMEM; + goto out; + } error = __split_vma(mm, vma, end, 0); if (error) - return error; + goto out_convert_errno; } success: @@ -208,13 +171,16 @@ static int madvise_update_vma(struct vm_area_struct *vma, * vm_flags is protected by the mmap_lock held in write mode. */ vma->vm_flags = new_flags; - if (!vma->vm_file) { - error = replace_vma_anon_name(vma, name); - if (error) - return error; - } - return 0; +out_convert_errno: + /* + * madvise() returns EAGAIN if kernel resources, such as + * slab, are temporarily unavailable. + */ + if (error == -ENOMEM) + error = -EAGAIN; +out: + return error; } #ifdef CONFIG_SWAP @@ -964,95 +930,6 @@ static long madvise_remove(struct vm_area_struct *vma, return error; } -/* - * Apply an madvise behavior to a region of a vma. madvise_update_vma - * will handle splitting a vm area into separate areas, each area with its own - * behavior. - */ -static int madvise_vma_behavior(struct vm_area_struct *vma, - struct vm_area_struct **prev, - unsigned long start, unsigned long end, - unsigned long behavior) -{ - int error; - unsigned long new_flags = vma->vm_flags; - - switch (behavior) { - case MADV_REMOVE: - return madvise_remove(vma, prev, start, end); - case MADV_WILLNEED: - return madvise_willneed(vma, prev, start, end); - case MADV_COLD: - return madvise_cold(vma, prev, start, end); - case MADV_PAGEOUT: - return madvise_pageout(vma, prev, start, end); - case MADV_FREE: - case MADV_DONTNEED: - return madvise_dontneed_free(vma, prev, start, end, behavior); - case MADV_POPULATE_READ: - case MADV_POPULATE_WRITE: - return madvise_populate(vma, prev, start, end, behavior); - case MADV_NORMAL: - new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; - break; - case MADV_SEQUENTIAL: - new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; - break; - case MADV_RANDOM: - new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; - break; - case MADV_DONTFORK: - new_flags |= VM_DONTCOPY; - break; - case MADV_DOFORK: - if (vma->vm_flags & VM_IO) - return -EINVAL; - new_flags &= ~VM_DONTCOPY; - break; - case MADV_WIPEONFORK: - /* MADV_WIPEONFORK is only supported on anonymous memory. */ - if (vma->vm_file || vma->vm_flags & VM_SHARED) - return -EINVAL; - new_flags |= VM_WIPEONFORK; - break; - case MADV_KEEPONFORK: - new_flags &= ~VM_WIPEONFORK; - break; - case MADV_DONTDUMP: - new_flags |= VM_DONTDUMP; - break; - case MADV_DODUMP: - if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) - return -EINVAL; - new_flags &= ~VM_DONTDUMP; - break; - case MADV_MERGEABLE: - case MADV_UNMERGEABLE: - error = ksm_madvise(vma, start, end, behavior, &new_flags); - if (error) - goto out; - break; - case MADV_HUGEPAGE: - case MADV_NOHUGEPAGE: - error = hugepage_madvise(vma, &new_flags, behavior); - if (error) - goto out; - break; - } - - error = madvise_update_vma(vma, prev, start, end, new_flags, - vma_anon_name(vma)); - -out: - /* - * madvise() returns EAGAIN if kernel resources, such as - * slab, are temporarily unavailable. - */ - if (error == -ENOMEM) - error = -EAGAIN; - return error; -} - #ifdef CONFIG_MEMORY_FAILURE /* * Error injection support for memory error handling. @@ -1101,6 +978,30 @@ static int madvise_inject_error(int behavior, } #endif +static long +madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, + unsigned long start, unsigned long end, int behavior) +{ + switch (behavior) { + case MADV_REMOVE: + return madvise_remove(vma, prev, start, end); + case MADV_WILLNEED: + return madvise_willneed(vma, prev, start, end); + case MADV_COLD: + return madvise_cold(vma, prev, start, end); + case MADV_PAGEOUT: + return madvise_pageout(vma, prev, start, end); + case MADV_FREE: + case MADV_DONTNEED: + return madvise_dontneed_free(vma, prev, start, end, behavior); + case MADV_POPULATE_READ: + case MADV_POPULATE_WRITE: + return madvise_populate(vma, prev, start, end, behavior); + default: + return madvise_behavior(vma, prev, start, end, behavior); + } +} + static bool madvise_behavior_valid(int behavior) { @@ -1154,122 +1055,6 @@ process_madvise_behavior_valid(int behavior) } } -/* - * Walk the vmas in range [start,end), and call the visit function on each one. - * The visit function will get start and end parameters that cover the overlap - * between the current vma and the original range. Any unmapped regions in the - * original range will result in this function returning -ENOMEM while still - * calling the visit function on all of the existing vmas in the range. - * Must be called with the mmap_lock held for reading or writing. - */ -static -int madvise_walk_vmas(struct mm_struct *mm, unsigned long start, - unsigned long end, unsigned long arg, - int (*visit)(struct vm_area_struct *vma, - struct vm_area_struct **prev, unsigned long start, - unsigned long end, unsigned long arg)) -{ - struct vm_area_struct *vma; - struct vm_area_struct *prev; - unsigned long tmp; - int unmapped_error = 0; - - /* - * If the interval [start,end) covers some unmapped address - * ranges, just ignore them, but return -ENOMEM at the end. - * - different from the way of handling in mlock etc. - */ - vma = find_vma_prev(mm, start, &prev); - if (vma && start > vma->vm_start) - prev = vma; - - for (;;) { - int error; - - /* Still start < end. */ - if (!vma) - return -ENOMEM; - - /* Here start < (end|vma->vm_end). */ - if (start < vma->vm_start) { - unmapped_error = -ENOMEM; - start = vma->vm_start; - if (start >= end) - break; - } - - /* Here vma->vm_start <= start < (end|vma->vm_end) */ - tmp = vma->vm_end; - if (end < tmp) - tmp = end; - - /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ - error = visit(vma, &prev, start, tmp, arg); - if (error) - return error; - start = tmp; - if (prev && start < prev->vm_end) - start = prev->vm_end; - if (start >= end) - break; - if (prev) - vma = prev->vm_next; - else /* madvise_remove dropped mmap_lock */ - vma = find_vma(mm, start); - } - - return unmapped_error; -} - -#ifdef CONFIG_ANON_VMA_NAME -static int madvise_vma_anon_name(struct vm_area_struct *vma, - struct vm_area_struct **prev, - unsigned long start, unsigned long end, - unsigned long name) -{ - int error; - - /* Only anonymous mappings can be named */ - if (vma->vm_file) - return -EBADF; - - error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, - (const char *)name); - - /* - * madvise() returns EAGAIN if kernel resources, such as - * slab, are temporarily unavailable. - */ - if (error == -ENOMEM) - error = -EAGAIN; - return error; -} - -int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, const char *name) -{ - unsigned long end; - unsigned long len; - - if (start & ~PAGE_MASK) - return -EINVAL; - len = (len_in + ~PAGE_MASK) & PAGE_MASK; - - /* Check to see whether len was rounded up from small -ve to zero */ - if (len_in && !len) - return -EINVAL; - - end = start + len; - if (end < start) - return -EINVAL; - - if (end == start) - return 0; - - return madvise_walk_vmas(mm, start, end, (unsigned long)name, - madvise_vma_anon_name); -} -#endif /* CONFIG_ANON_VMA_NAME */ /* * The madvise(2) system call. * @@ -1342,8 +1127,10 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, */ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) { - unsigned long end; - int error; + unsigned long end, tmp; + struct vm_area_struct *vma, *prev; + int unmapped_error = 0; + int error = -EINVAL; int write; size_t len; struct blk_plug plug; @@ -1351,22 +1138,23 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh start = untagged_addr(start); if (!madvise_behavior_valid(behavior)) - return -EINVAL; + return error; if (!PAGE_ALIGNED(start)) - return -EINVAL; + return error; len = PAGE_ALIGN(len_in); /* Check to see whether len was rounded up from small -ve to zero */ if (len_in && !len) - return -EINVAL; + return error; end = start + len; if (end < start) - return -EINVAL; + return error; + error = 0; if (end == start) - return 0; + return error; #ifdef CONFIG_MEMORY_FAILURE if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) @@ -1381,9 +1169,51 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh mmap_read_lock(mm); } + /* + * If the interval [start,end) covers some unmapped address + * ranges, just ignore them, but return -ENOMEM at the end. + * - different from the way of handling in mlock etc. + */ + vma = find_vma_prev(mm, start, &prev); + if (vma && start > vma->vm_start) + prev = vma; + blk_start_plug(&plug); - error = madvise_walk_vmas(mm, start, end, behavior, - madvise_vma_behavior); + for (;;) { + /* Still start < end. */ + error = -ENOMEM; + if (!vma) + goto out; + + /* Here start < (end|vma->vm_end). */ + if (start < vma->vm_start) { + unmapped_error = -ENOMEM; + start = vma->vm_start; + if (start >= end) + goto out; + } + + /* Here vma->vm_start <= start < (end|vma->vm_end) */ + tmp = vma->vm_end; + if (end < tmp) + tmp = end; + + /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ + error = madvise_vma(vma, &prev, start, tmp, behavior); + if (error) + goto out; + start = tmp; + if (prev && start < prev->vm_end) + start = prev->vm_end; + error = unmapped_error; + if (start >= end) + goto out; + if (prev) + vma = prev->vm_next; + else /* madvise_remove dropped mmap_lock */ + vma = find_vma(mm, start); + } +out: blk_finish_plug(&plug); if (write) mmap_write_unlock(mm); @@ -1405,6 +1235,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, struct iovec iovstack[UIO_FASTIOV], iovec; struct iovec *iov = iovstack; struct iov_iter iter; + struct pid *pid; struct task_struct *task; struct mm_struct *mm; size_t total_len; @@ -1419,12 +1250,18 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, if (ret < 0) goto out; - task = pidfd_get_task(pidfd, &f_flags); - if (IS_ERR(task)) { - ret = PTR_ERR(task); + pid = pidfd_get_pid(pidfd, &f_flags); + if (IS_ERR(pid)) { + ret = PTR_ERR(pid); goto free_iov; } + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) { + ret = -ESRCH; + goto put_pid; + } + if (!process_madvise_behavior_valid(behavior)) { ret = -EINVAL; goto release_task; @@ -1464,6 +1301,8 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, mmput(mm); release_task: put_task_struct(task); +put_pid: + put_pid(pid); free_iov: kfree(iov); out: diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c index 1b0ab8fcfd..ea734f248f 100644 --- a/mm/mapping_dirty_helpers.c +++ b/mm/mapping_dirty_helpers.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include diff --git a/mm/memblock.c b/mm/memblock.c index b12a364f27..2b7397781c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -287,7 +287,7 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, { /* pump up @end */ if (end == MEMBLOCK_ALLOC_ACCESSIBLE || - end == MEMBLOCK_ALLOC_NOLEAKTRACE) + end == MEMBLOCK_ALLOC_KASAN) end = memblock.current_limit; /* avoid allocating the first page */ @@ -369,7 +369,7 @@ void __init memblock_discard(void) if (memblock_reserved_in_slab) kfree(memblock.reserved.regions); else - memblock_free_late(addr, size); + __memblock_free_late(addr, size); } if (memblock.memory.regions != memblock_memory_init_regions) { @@ -379,7 +379,7 @@ void __init memblock_discard(void) if (memblock_memory_in_slab) kfree(memblock.memory.regions); else - memblock_free_late(addr, size); + __memblock_free_late(addr, size); } memblock_memory = NULL; @@ -478,7 +478,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, kfree(old_array); else if (old_array != memblock_memory_init_regions && old_array != memblock_reserved_init_regions) - memblock_free(old_array, old_alloc_size); + memblock_free_ptr(old_array, old_alloc_size); /* * Reserve the new array if that comes from the memblock. Otherwise, we @@ -661,7 +661,6 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, * @base: base address of the new region * @size: size of the new region * @nid: nid of the new region - * @flags: flags of the new region * * Add new memblock region [@base, @base + @size) to the "memory" * type. See memblock_add_range() description for mode details @@ -670,14 +669,14 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, * 0 on success, -errno on failure. */ int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, - int nid, enum memblock_flags flags) + int nid) { phys_addr_t end = base + size - 1; - memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, - &base, &end, nid, flags, (void *)_RET_IP_); + memblock_dbg("%s: [%pa-%pa] nid=%d %pS\n", __func__, + &base, &end, nid, (void *)_RET_IP_); - return memblock_add_range(&memblock.memory, base, size, nid, flags); + return memblock_add_range(&memblock.memory, base, size, nid, 0); } /** @@ -803,28 +802,28 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) } /** - * memblock_free - free boot memory allocation + * memblock_free_ptr - free boot memory allocation * @ptr: starting address of the boot memory allocation * @size: size of the boot memory block in bytes * * Free boot memory block previously allocated by memblock_alloc_xx() API. * The freeing memory will not be released to the buddy allocator. */ -void __init_memblock memblock_free(void *ptr, size_t size) +void __init_memblock memblock_free_ptr(void *ptr, size_t size) { if (ptr) - memblock_phys_free(__pa(ptr), size); + memblock_free(__pa(ptr), size); } /** - * memblock_phys_free - free boot memory block + * memblock_free - free boot memory block * @base: phys starting address of the boot memory block * @size: size of the boot memory block in bytes * * Free boot memory block previously allocated by memblock_alloc_xx() API. * The freeing memory will not be released to the buddy allocator. */ -int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) +int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) { phys_addr_t end = base + size - 1; @@ -988,10 +987,6 @@ static bool should_skip_region(struct memblock_type *type, if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) return true; - /* skip driver-managed memory unless we were asked for it explicitly */ - if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m)) - return true; - return false; } @@ -1393,11 +1388,8 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, return 0; done: - /* - * Skip kmemleak for those places like kasan_init() and - * early_pgtable_alloc() due to high volume. - */ - if (end != MEMBLOCK_ALLOC_NOLEAKTRACE) + /* Skip kmemleak for kasan_init() due to high volume. */ + if (end != MEMBLOCK_ALLOC_KASAN) /* * The min_count is set to 0 so that memblock allocated * blocks are never reported as leaks. This is because many @@ -1603,7 +1595,7 @@ void * __init memblock_alloc_try_nid( } /** - * memblock_free_late - free pages directly to buddy allocator + * __memblock_free_late - free pages directly to buddy allocator * @base: phys starting address of the boot memory block * @size: size of the boot memory block in bytes * @@ -1611,7 +1603,7 @@ void * __init memblock_alloc_try_nid( * down, but we are still initializing the system. Pages are released directly * to the buddy allocator. */ -void __init memblock_free_late(phys_addr_t base, phys_addr_t size) +void __init __memblock_free_late(phys_addr_t base, phys_addr_t size) { phys_addr_t cursor, end; @@ -1951,7 +1943,7 @@ static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) * memmap array. */ if (pg < pgend) - memblock_phys_free(pg, pgend - pg); + memblock_free(pg, pgend - pg); } /* diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 36e9f38c91..32ba963ebf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -84,7 +84,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg); static bool cgroup_memory_nosocket __ro_after_init; /* Kernel memory accounting disabled? */ -static bool cgroup_memory_nokmem __ro_after_init; +bool cgroup_memory_nokmem __ro_after_init; /* Whether the swap controller is active */ #ifdef CONFIG_MEMCG_SWAP @@ -451,6 +451,28 @@ ino_t page_cgroup_ino(struct page *page) return ino; } +static struct mem_cgroup_per_node * +mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page) +{ + int nid = page_to_nid(page); + + return memcg->nodeinfo[nid]; +} + +static struct mem_cgroup_tree_per_node * +soft_limit_tree_node(int nid) +{ + return soft_limit_tree.rb_tree_per_node[nid]; +} + +static struct mem_cgroup_tree_per_node * +soft_limit_tree_from_page(struct page *page) +{ + int nid = page_to_nid(page); + + return soft_limit_tree.rb_tree_per_node[nid]; +} + static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, struct mem_cgroup_tree_per_node *mctz, unsigned long new_usage_in_excess) @@ -521,13 +543,13 @@ static unsigned long soft_limit_excess(struct mem_cgroup *memcg) return excess; } -static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) +static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) { unsigned long excess; struct mem_cgroup_per_node *mz; struct mem_cgroup_tree_per_node *mctz; - mctz = soft_limit_tree.rb_tree_per_node[nid]; + mctz = soft_limit_tree_from_page(page); if (!mctz) return; /* @@ -535,7 +557,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) * because their event counter is not touched. */ for (; memcg; memcg = parent_mem_cgroup(memcg)) { - mz = memcg->nodeinfo[nid]; + mz = mem_cgroup_page_nodeinfo(memcg, page); excess = soft_limit_excess(memcg); /* * We have to update the tree if mz is on RB-tree or @@ -566,7 +588,7 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) for_each_node(nid) { mz = memcg->nodeinfo[nid]; - mctz = soft_limit_tree.rb_tree_per_node[nid]; + mctz = soft_limit_tree_node(nid); if (mctz) mem_cgroup_remove_exceeded(mz, mctz); } @@ -782,6 +804,24 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) rcu_read_unlock(); } +/* + * mod_objcg_mlstate() may be called with irq enabled, so + * mod_memcg_lruvec_state() should be used. + */ +static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, + struct pglist_data *pgdat, + enum node_stat_item idx, int nr) +{ + struct mem_cgroup *memcg; + struct lruvec *lruvec; + + rcu_read_lock(); + memcg = obj_cgroup_memcg(objcg); + lruvec = mem_cgroup_lruvec(memcg, pgdat); + mod_memcg_lruvec_state(lruvec, idx, nr); + rcu_read_unlock(); +} + /** * __count_memcg_events - account VM events in a cgroup * @memcg: the memory cgroup @@ -814,6 +854,7 @@ static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) } static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, + struct page *page, int nr_pages) { /* pagein of a big page is an event. So, ignore page size */ @@ -856,7 +897,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, * Check events in order. * */ -static void memcg_check_events(struct mem_cgroup *memcg, int nid) +static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) { /* threshold event is triggered in finer grain than soft limit */ if (unlikely(mem_cgroup_event_ratelimit(memcg, @@ -867,7 +908,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, int nid) MEM_CGROUP_TARGET_SOFTLIMIT); mem_cgroup_threshold(memcg); if (unlikely(do_softlimit)) - mem_cgroup_update_tree(memcg, nid); + mem_cgroup_update_tree(memcg, page); } } @@ -1163,88 +1204,64 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, } #ifdef CONFIG_DEBUG_VM -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) +void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) { struct mem_cgroup *memcg; if (mem_cgroup_disabled()) return; - memcg = folio_memcg(folio); + memcg = page_memcg(page); if (!memcg) - VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != root_mem_cgroup, folio); + VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != root_mem_cgroup, page); else - VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio); + VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != memcg, page); } #endif /** - * folio_lruvec_lock - Lock the lruvec for a folio. - * @folio: Pointer to the folio. + * lock_page_lruvec - lock and return lruvec for a given page. + * @page: the page * * These functions are safe to use under any of the following conditions: - * - folio locked - * - folio_test_lru false - * - folio_memcg_lock() - * - folio frozen (refcount of 0) - * - * Return: The lruvec this folio is on with its lock held. + * - page locked + * - PageLRU cleared + * - lock_page_memcg() + * - page->_refcount is zero */ -struct lruvec *folio_lruvec_lock(struct folio *folio) +struct lruvec *lock_page_lruvec(struct page *page) { - struct lruvec *lruvec = folio_lruvec(folio); + struct lruvec *lruvec; + lruvec = mem_cgroup_page_lruvec(page); spin_lock(&lruvec->lru_lock); - lruvec_memcg_debug(lruvec, folio); + + lruvec_memcg_debug(lruvec, page); return lruvec; } -/** - * folio_lruvec_lock_irq - Lock the lruvec for a folio. - * @folio: Pointer to the folio. - * - * These functions are safe to use under any of the following conditions: - * - folio locked - * - folio_test_lru false - * - folio_memcg_lock() - * - folio frozen (refcount of 0) - * - * Return: The lruvec this folio is on with its lock held and interrupts - * disabled. - */ -struct lruvec *folio_lruvec_lock_irq(struct folio *folio) +struct lruvec *lock_page_lruvec_irq(struct page *page) { - struct lruvec *lruvec = folio_lruvec(folio); + struct lruvec *lruvec; + lruvec = mem_cgroup_page_lruvec(page); spin_lock_irq(&lruvec->lru_lock); - lruvec_memcg_debug(lruvec, folio); + + lruvec_memcg_debug(lruvec, page); return lruvec; } -/** - * folio_lruvec_lock_irqsave - Lock the lruvec for a folio. - * @folio: Pointer to the folio. - * @flags: Pointer to irqsave flags. - * - * These functions are safe to use under any of the following conditions: - * - folio locked - * - folio_test_lru false - * - folio_memcg_lock() - * - folio frozen (refcount of 0) - * - * Return: The lruvec this folio is on with its lock held and interrupts - * disabled. - */ -struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, - unsigned long *flags) +struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags) { - struct lruvec *lruvec = folio_lruvec(folio); + struct lruvec *lruvec; + lruvec = mem_cgroup_page_lruvec(page); spin_lock_irqsave(&lruvec->lru_lock, *flags); - lruvec_memcg_debug(lruvec, folio); + + lruvec_memcg_debug(lruvec, page); return lruvec; } @@ -1375,7 +1392,6 @@ static const struct memory_stat memory_stats[] = { { "pagetables", NR_PAGETABLE }, { "percpu", MEMCG_PERCPU_B }, { "sock", MEMCG_SOCK }, - { "vmalloc", MEMCG_VMALLOC }, { "shmem", NR_SHMEM }, { "file_mapped", NR_FILE_MAPPED }, { "file_dirty", NR_FILE_DIRTY }, @@ -1995,17 +2011,18 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) } /** - * folio_memcg_lock - Bind a folio to its memcg. - * @folio: The folio. + * lock_page_memcg - lock a page and memcg binding + * @page: the page * - * This function prevents unlocked LRU folios from being moved to + * This function protects unlocked LRU pages from being moved to * another cgroup. * - * It ensures lifetime of the bound memcg. The caller is responsible - * for the lifetime of the folio. + * It ensures lifetime of the locked memcg. Caller is responsible + * for the lifetime of the page. */ -void folio_memcg_lock(struct folio *folio) +void lock_page_memcg(struct page *page) { + struct page *head = compound_head(page); /* rmap on tail pages */ struct mem_cgroup *memcg; unsigned long flags; @@ -2019,7 +2036,7 @@ void folio_memcg_lock(struct folio *folio) if (mem_cgroup_disabled()) return; again: - memcg = folio_memcg(folio); + memcg = page_memcg(head); if (unlikely(!memcg)) return; @@ -2033,7 +2050,7 @@ void folio_memcg_lock(struct folio *folio) return; spin_lock_irqsave(&memcg->move_lock, flags); - if (memcg != folio_memcg(folio)) { + if (memcg != page_memcg(head)) { spin_unlock_irqrestore(&memcg->move_lock, flags); goto again; } @@ -2047,13 +2064,9 @@ void folio_memcg_lock(struct folio *folio) memcg->move_lock_task = current; memcg->move_lock_flags = flags; } +EXPORT_SYMBOL(lock_page_memcg); -void lock_page_memcg(struct page *page) -{ - folio_memcg_lock(page_folio(page)); -} - -static void __folio_memcg_unlock(struct mem_cgroup *memcg) +static void __unlock_page_memcg(struct mem_cgroup *memcg) { if (memcg && memcg->move_lock_task == current) { unsigned long flags = memcg->move_lock_flags; @@ -2068,22 +2081,16 @@ static void __folio_memcg_unlock(struct mem_cgroup *memcg) } /** - * folio_memcg_unlock - Release the binding between a folio and its memcg. - * @folio: The folio. - * - * This releases the binding created by folio_memcg_lock(). This does - * not change the accounting of this folio to its memcg, but it does - * permit others to change it. + * unlock_page_memcg - unlock a page and memcg binding + * @page: the page */ -void folio_memcg_unlock(struct folio *folio) -{ - __folio_memcg_unlock(folio_memcg(folio)); -} - void unlock_page_memcg(struct page *page) { - folio_memcg_unlock(page_folio(page)); + struct page *head = compound_head(page); + + __unlock_page_memcg(page_memcg(head)); } +EXPORT_SYMBOL(unlock_page_memcg); struct obj_stock { #ifdef CONFIG_MEMCG_KMEM @@ -2126,6 +2133,41 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, } #endif +/* + * Most kmem_cache_alloc() calls are from user context. The irq disable/enable + * sequence used in this case to access content from object stock is slow. + * To optimize for user context access, there are now two object stocks for + * task context and interrupt context access respectively. + * + * The task context object stock can be accessed by disabling preemption only + * which is cheap in non-preempt kernel. The interrupt context object stock + * can only be accessed after disabling interrupt. User context code can + * access interrupt object stock, but not vice versa. + */ +static inline struct obj_stock *get_obj_stock(unsigned long *pflags) +{ + struct memcg_stock_pcp *stock; + + if (likely(in_task())) { + *pflags = 0UL; + preempt_disable(); + stock = this_cpu_ptr(&memcg_stock); + return &stock->task_obj; + } + + local_irq_save(*pflags); + stock = this_cpu_ptr(&memcg_stock); + return &stock->irq_obj; +} + +static inline void put_obj_stock(unsigned long flags) +{ + if (likely(in_task())) + preempt_enable(); + else + local_irq_restore(flags); +} + /** * consume_stock: Try to consume stocked charge on this cpu. * @memcg: memcg to consume from. @@ -2724,7 +2766,8 @@ static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, return try_charge_memcg(memcg, gfp_mask, nr_pages); } -static inline void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) +#if defined(CONFIG_MEMCG_KMEM) || defined(CONFIG_MMU) +static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) { if (mem_cgroup_is_root(memcg)) return; @@ -2733,10 +2776,11 @@ static inline void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages if (do_memsw_account()) page_counter_uncharge(&memcg->memsw, nr_pages); } +#endif -static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) +static void commit_charge(struct page *page, struct mem_cgroup *memcg) { - VM_BUG_ON_FOLIO(folio_memcg(folio), folio); + VM_BUG_ON_PAGE(page_memcg(page), page); /* * Any of the following ensures page's memcg stability: * @@ -2745,7 +2789,7 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) * - lock_page_memcg() * - exclusive reference */ - folio->memcg_data = (unsigned long)memcg; + page->memcg_data = (unsigned long)memcg; } static struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) @@ -2770,84 +2814,31 @@ static struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) */ #define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT) -/* - * Most kmem_cache_alloc() calls are from user context. The irq disable/enable - * sequence used in this case to access content from object stock is slow. - * To optimize for user context access, there are now two object stocks for - * task context and interrupt context access respectively. - * - * The task context object stock can be accessed by disabling preemption only - * which is cheap in non-preempt kernel. The interrupt context object stock - * can only be accessed after disabling interrupt. User context code can - * access interrupt object stock, but not vice versa. - */ -static inline struct obj_stock *get_obj_stock(unsigned long *pflags) +int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, + gfp_t gfp, bool new_page) { - struct memcg_stock_pcp *stock; - - if (likely(in_task())) { - *pflags = 0UL; - preempt_disable(); - stock = this_cpu_ptr(&memcg_stock); - return &stock->task_obj; - } - - local_irq_save(*pflags); - stock = this_cpu_ptr(&memcg_stock); - return &stock->irq_obj; -} - -static inline void put_obj_stock(unsigned long flags) -{ - if (likely(in_task())) - preempt_enable(); - else - local_irq_restore(flags); -} - -/* - * mod_objcg_mlstate() may be called with irq enabled, so - * mod_memcg_lruvec_state() should be used. - */ -static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, - struct pglist_data *pgdat, - enum node_stat_item idx, int nr) -{ - struct mem_cgroup *memcg; - struct lruvec *lruvec; - - rcu_read_lock(); - memcg = obj_cgroup_memcg(objcg); - lruvec = mem_cgroup_lruvec(memcg, pgdat); - mod_memcg_lruvec_state(lruvec, idx, nr); - rcu_read_unlock(); -} - -int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, - gfp_t gfp, bool new_slab) -{ - unsigned int objects = objs_per_slab(s, slab); + unsigned int objects = objs_per_slab_page(s, page); unsigned long memcg_data; void *vec; gfp &= ~OBJCGS_CLEAR_MASK; vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp, - slab_nid(slab)); + page_to_nid(page)); if (!vec) return -ENOMEM; memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS; - if (new_slab) { + if (new_page) { /* - * If the slab is brand new and nobody can yet access its - * memcg_data, no synchronization is required and memcg_data can - * be simply assigned. + * If the slab page is brand new and nobody can yet access + * it's memcg_data, no synchronization is required and + * memcg_data can be simply assigned. */ - slab->memcg_data = memcg_data; - } else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) { + page->memcg_data = memcg_data; + } else if (cmpxchg(&page->memcg_data, 0, memcg_data)) { /* - * If the slab is already in use, somebody can allocate and - * assign obj_cgroups in parallel. In this case the existing + * If the slab page is already in use, somebody can allocate + * and assign obj_cgroups in parallel. In this case the existing * objcg vector should be reused. */ kfree(vec); @@ -2872,43 +2863,38 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, */ struct mem_cgroup *mem_cgroup_from_obj(void *p) { - struct folio *folio; + struct page *page; if (mem_cgroup_disabled()) return NULL; - folio = virt_to_folio(p); + page = virt_to_head_page(p); /* * Slab objects are accounted individually, not per-page. * Memcg membership data for each individual object is saved in - * slab->memcg_data. + * the page->obj_cgroups. */ - if (folio_test_slab(folio)) { - struct obj_cgroup **objcgs; - struct slab *slab; + if (page_objcgs_check(page)) { + struct obj_cgroup *objcg; unsigned int off; - slab = folio_slab(folio); - objcgs = slab_objcgs(slab); - if (!objcgs) - return NULL; - - off = obj_to_index(slab->slab_cache, slab, p); - if (objcgs[off]) - return obj_cgroup_memcg(objcgs[off]); + off = obj_to_index(page->slab_cache, page, p); + objcg = page_objcgs(page)[off]; + if (objcg) + return obj_cgroup_memcg(objcg); return NULL; } /* - * page_memcg_check() is used here, because in theory we can encounter - * a folio where the slab flag has been cleared already, but - * slab->memcg_data has not been freed yet + * page_memcg_check() is used here, because page_has_obj_cgroups() + * check above could fail because the object cgroups vector wasn't set + * at that moment, but it can be set concurrently. * page_memcg_check(page) will guarantee that a proper memory * cgroup pointer or NULL will be returned. */ - return page_memcg_check(folio_page(folio, 0)); + return page_memcg_check(page); } __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void) @@ -3009,6 +2995,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp, unsigned int nr_pages) { + struct page_counter *counter; struct mem_cgroup *memcg; int ret; @@ -3018,8 +3005,21 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp, if (ret) goto out; - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) - page_counter_charge(&memcg->kmem, nr_pages); + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && + !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) { + + /* + * Enforce __GFP_NOFAIL allocation because callers are not + * prepared to see failures and likely do not have any failure + * handling code. + */ + if (gfp & __GFP_NOFAIL) { + page_counter_charge(&memcg->kmem, nr_pages); + goto out; + } + cancel_charge(memcg, nr_pages); + ret = -ENOMEM; + } out: css_put(&memcg->css); @@ -3059,16 +3059,15 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) */ void __memcg_kmem_uncharge_page(struct page *page, int order) { - struct folio *folio = page_folio(page); struct obj_cgroup *objcg; unsigned int nr_pages = 1 << order; - if (!folio_memcg_kmem(folio)) + if (!PageMemcgKmem(page)) return; - objcg = __folio_objcg(folio); + objcg = __page_objcg(page); obj_cgroup_uncharge_pages(objcg, nr_pages); - folio->memcg_data = 0; + page->memcg_data = 0; obj_cgroup_put(objcg); } @@ -3302,18 +3301,17 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) */ void split_page_memcg(struct page *head, unsigned int nr) { - struct folio *folio = page_folio(head); - struct mem_cgroup *memcg = folio_memcg(folio); + struct mem_cgroup *memcg = page_memcg(head); int i; if (mem_cgroup_disabled() || !memcg) return; for (i = 1; i < nr; i++) - folio_page(folio, i)->memcg_data = folio->memcg_data; + head[i].memcg_data = head->memcg_data; - if (folio_memcg_kmem(folio)) - obj_cgroup_get_many(__folio_objcg(folio), nr - 1); + if (PageMemcgKmem(head)) + obj_cgroup_get_many(__page_objcg(head), nr - 1); else css_get_many(&memcg->css, nr - 1); } @@ -3427,7 +3425,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, if (order > 0) return 0; - mctz = soft_limit_tree.rb_tree_per_node[pgdat->node_id]; + mctz = soft_limit_tree_node(pgdat->node_id); /* * Do not even bother to check the largest node if the root @@ -3511,11 +3509,19 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) /* try to free all pages in this cgroup */ while (nr_retries && page_counter_read(&memcg->memory)) { + int progress; + if (signal_pending(current)) return -EINTR; - if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true)) + progress = try_to_free_mem_cgroup_pages(memcg, 1, + GFP_KERNEL, true); + if (!progress) { nr_retries--; + /* maybe some writeback is necessary */ + congestion_wait(BLK_RW_ASYNC, HZ/10); + } + } return 0; @@ -3631,6 +3637,7 @@ static int memcg_online_kmem(struct mem_cgroup *memcg) return 0; BUG_ON(memcg->kmemcg_id >= 0); + BUG_ON(memcg->kmem_state); memcg_id = memcg_alloc_cache_id(); if (memcg_id < 0) @@ -3647,18 +3654,22 @@ static int memcg_online_kmem(struct mem_cgroup *memcg) static_branch_enable(&memcg_kmem_enabled_key); memcg->kmemcg_id = memcg_id; + memcg->kmem_state = KMEM_ONLINE; return 0; } static void memcg_offline_kmem(struct mem_cgroup *memcg) { - struct mem_cgroup *parent; + struct cgroup_subsys_state *css; + struct mem_cgroup *parent, *child; int kmemcg_id; - if (memcg->kmemcg_id == -1) + if (memcg->kmem_state != KMEM_ONLINE) return; + memcg->kmem_state = KMEM_ALLOCATED; + parent = parent_mem_cgroup(memcg); if (!parent) parent = root_mem_cgroup; @@ -3669,15 +3680,31 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) BUG_ON(kmemcg_id < 0); /* - * After we have finished memcg_reparent_objcgs(), all list_lrus - * corresponding to this cgroup are guaranteed to remain empty. - * The ordering is imposed by list_lru_node->lock taken by + * Change kmemcg_id of this cgroup and all its descendants to the + * parent's id, and then move all entries from this cgroup's list_lrus + * to ones of the parent. After we have finished, all list_lrus + * corresponding to this cgroup are guaranteed to remain empty. The + * ordering is imposed by list_lru_node->lock taken by * memcg_drain_all_list_lrus(). */ + rcu_read_lock(); /* can be called from css_free w/o cgroup_mutex */ + css_for_each_descendant_pre(css, &memcg->css) { + child = mem_cgroup_from_css(css); + BUG_ON(child->kmemcg_id != kmemcg_id); + child->kmemcg_id = parent->kmemcg_id; + } + rcu_read_unlock(); + memcg_drain_all_list_lrus(kmemcg_id, parent); memcg_free_cache_id(kmemcg_id); - memcg->kmemcg_id = -1; +} + +static void memcg_free_kmem(struct mem_cgroup *memcg) +{ + /* css_alloc() failed, offlining didn't happen */ + if (unlikely(memcg->kmem_state == KMEM_ONLINE)) + memcg_offline_kmem(memcg); } #else static int memcg_online_kmem(struct mem_cgroup *memcg) @@ -3687,8 +3714,22 @@ static int memcg_online_kmem(struct mem_cgroup *memcg) static void memcg_offline_kmem(struct mem_cgroup *memcg) { } +static void memcg_free_kmem(struct mem_cgroup *memcg) +{ +} #endif /* CONFIG_MEMCG_KMEM */ +static int memcg_update_kmem_max(struct mem_cgroup *memcg, + unsigned long max) +{ + int ret; + + mutex_lock(&memcg_max_mutex); + ret = page_counter_set_max(&memcg->kmem, max); + mutex_unlock(&memcg_max_mutex); + return ret; +} + static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max) { int ret; @@ -3754,8 +3795,10 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of, ret = mem_cgroup_resize_max(memcg, nr_pages, true); break; case _KMEM: - /* kmem.limit_in_bytes is deprecated. */ - ret = -EOPNOTSUPP; + pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); + ret = memcg_update_kmem_max(memcg, nr_pages); break; case _TCP: ret = memcg_update_tcp_max(memcg, nr_pages); @@ -4537,17 +4580,17 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, * As being wrong occasionally doesn't matter, updates and accesses to the * records are lockless and racy. */ -void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, +void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, struct bdi_writeback *wb) { - struct mem_cgroup *memcg = folio_memcg(folio); + struct mem_cgroup *memcg = page_memcg(page); struct memcg_cgwb_frn *frn; u64 now = get_jiffies_64(); u64 oldest_at = now; int oldest = -1; int i; - trace_track_foreign_dirty(folio, wb); + trace_track_foreign_dirty(page, wb); /* * Pick the slot to use. If there is already a slot for @wb, keep @@ -4857,17 +4900,6 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, return ret; } -#if defined(CONFIG_MEMCG_KMEM) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)) -static int mem_cgroup_slab_show(struct seq_file *m, void *p) -{ - /* - * Deprecated. - * Please, take a look at tools/cgroup/slabinfo.py . - */ - return 0; -} -#endif - static struct cftype mem_cgroup_legacy_files[] = { { .name = "usage_in_bytes", @@ -4968,7 +5000,7 @@ static struct cftype mem_cgroup_legacy_files[] = { (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)) { .name = "kmem.slabinfo", - .seq_show = mem_cgroup_slab_show, + .seq_show = memcg_slab_show, }, #endif { @@ -5128,11 +5160,15 @@ static void mem_cgroup_free(struct mem_cgroup *memcg) static struct mem_cgroup *mem_cgroup_alloc(void) { struct mem_cgroup *memcg; + unsigned int size; int node; int __maybe_unused i; long error = -ENOMEM; - memcg = kzalloc(struct_size(memcg, nodeinfo, nr_node_ids), GFP_KERNEL); + size = sizeof(struct mem_cgroup); + size += nr_node_ids * sizeof(struct mem_cgroup_per_node *); + + memcg = kzalloc(size, GFP_KERNEL); if (!memcg) return ERR_PTR(error); @@ -5315,9 +5351,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) cancel_work_sync(&memcg->high_work); mem_cgroup_remove_from_trees(memcg); free_shrinker_info(memcg); - - /* Need to offline kmem if online_css() fails */ - memcg_offline_kmem(memcg); + memcg_free_kmem(memcg); mem_cgroup_free(memcg); } @@ -5539,7 +5573,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, #endif static struct page *mc_handle_file_pte(struct vm_area_struct *vma, - unsigned long addr, pte_t ptent) + unsigned long addr, pte_t ptent, swp_entry_t *entry) { if (!vma->vm_file) /* anonymous vma */ return NULL; @@ -5569,39 +5603,38 @@ static int mem_cgroup_move_account(struct page *page, struct mem_cgroup *from, struct mem_cgroup *to) { - struct folio *folio = page_folio(page); struct lruvec *from_vec, *to_vec; struct pglist_data *pgdat; - unsigned int nr_pages = compound ? folio_nr_pages(folio) : 1; - int nid, ret; + unsigned int nr_pages = compound ? thp_nr_pages(page) : 1; + int ret; VM_BUG_ON(from == to); - VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); - VM_BUG_ON(compound && !folio_test_large(folio)); + VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON(compound && !PageTransHuge(page)); /* * Prevent mem_cgroup_migrate() from looking at * page's memory cgroup of its source page while we change it. */ ret = -EBUSY; - if (!folio_trylock(folio)) + if (!trylock_page(page)) goto out; ret = -EINVAL; - if (folio_memcg(folio) != from) + if (page_memcg(page) != from) goto out_unlock; - pgdat = folio_pgdat(folio); + pgdat = page_pgdat(page); from_vec = mem_cgroup_lruvec(from, pgdat); to_vec = mem_cgroup_lruvec(to, pgdat); - folio_memcg_lock(folio); + lock_page_memcg(page); - if (folio_test_anon(folio)) { - if (folio_mapped(folio)) { + if (PageAnon(page)) { + if (page_mapped(page)) { __mod_lruvec_state(from_vec, NR_ANON_MAPPED, -nr_pages); __mod_lruvec_state(to_vec, NR_ANON_MAPPED, nr_pages); - if (folio_test_transhuge(folio)) { + if (PageTransHuge(page)) { __mod_lruvec_state(from_vec, NR_ANON_THPS, -nr_pages); __mod_lruvec_state(to_vec, NR_ANON_THPS, @@ -5612,18 +5645,18 @@ static int mem_cgroup_move_account(struct page *page, __mod_lruvec_state(from_vec, NR_FILE_PAGES, -nr_pages); __mod_lruvec_state(to_vec, NR_FILE_PAGES, nr_pages); - if (folio_test_swapbacked(folio)) { + if (PageSwapBacked(page)) { __mod_lruvec_state(from_vec, NR_SHMEM, -nr_pages); __mod_lruvec_state(to_vec, NR_SHMEM, nr_pages); } - if (folio_mapped(folio)) { + if (page_mapped(page)) { __mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages); __mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages); } - if (folio_test_dirty(folio)) { - struct address_space *mapping = folio_mapping(folio); + if (PageDirty(page)) { + struct address_space *mapping = page_mapping(page); if (mapping_can_writeback(mapping)) { __mod_lruvec_state(from_vec, NR_FILE_DIRTY, @@ -5634,7 +5667,7 @@ static int mem_cgroup_move_account(struct page *page, } } - if (folio_test_writeback(folio)) { + if (PageWriteback(page)) { __mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages); __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages); } @@ -5657,21 +5690,20 @@ static int mem_cgroup_move_account(struct page *page, css_get(&to->css); css_put(&from->css); - folio->memcg_data = (unsigned long)to; + page->memcg_data = (unsigned long)to; - __folio_memcg_unlock(from); + __unlock_page_memcg(from); ret = 0; - nid = folio_nid(folio); local_irq_disable(); - mem_cgroup_charge_statistics(to, nr_pages); - memcg_check_events(to, nid); - mem_cgroup_charge_statistics(from, -nr_pages); - memcg_check_events(from, nid); + mem_cgroup_charge_statistics(to, page, nr_pages); + memcg_check_events(to, page); + mem_cgroup_charge_statistics(from, page, -nr_pages); + memcg_check_events(from, page); local_irq_enable(); out_unlock: - folio_unlock(folio); + unlock_page(page); out: return ret; } @@ -5714,7 +5746,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, else if (is_swap_pte(ptent)) page = mc_handle_swap_pte(vma, ptent, &ent); else if (pte_none(ptent)) - page = mc_handle_file_pte(vma, addr, ptent); + page = mc_handle_file_pte(vma, addr, ptent, &ent); if (!page && !ent.val) return ret; @@ -6326,8 +6358,6 @@ static void __memory_events_show(struct seq_file *m, atomic_long_t *events) seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM])); seq_printf(m, "oom_kill %lu\n", atomic_long_read(&events[MEMCG_OOM_KILL])); - seq_printf(m, "oom_group_kill %lu\n", - atomic_long_read(&events[MEMCG_OOM_GROUP_KILL])); } static int memory_events_show(struct seq_file *m, void *v) @@ -6678,10 +6708,9 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, atomic_long_read(&parent->memory.children_low_usage))); } -static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg, - gfp_t gfp) +static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp) { - long nr_pages = folio_nr_pages(folio); + unsigned int nr_pages = thp_nr_pages(page); int ret; ret = try_charge(memcg, gfp, nr_pages); @@ -6689,23 +6718,38 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg, goto out; css_get(&memcg->css); - commit_charge(folio, memcg); + commit_charge(page, memcg); local_irq_disable(); - mem_cgroup_charge_statistics(memcg, nr_pages); - memcg_check_events(memcg, folio_nid(folio)); + mem_cgroup_charge_statistics(memcg, page, nr_pages); + memcg_check_events(memcg, page); local_irq_enable(); out: return ret; } -int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp) +/** + * __mem_cgroup_charge - charge a newly allocated page to a cgroup + * @page: page to charge + * @mm: mm context of the victim + * @gfp_mask: reclaim mode + * + * Try to charge @page to the memcg that @mm belongs to, reclaiming + * pages according to @gfp_mask if necessary. if @mm is NULL, try to + * charge to the active memcg. + * + * Do not use this for pages allocated for swapin. + * + * Returns 0 on success. Otherwise, an error code is returned. + */ +int __mem_cgroup_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask) { struct mem_cgroup *memcg; int ret; memcg = get_mem_cgroup_from_mm(mm); - ret = charge_memcg(folio, memcg, gfp); + ret = charge_memcg(page, memcg, gfp_mask); css_put(&memcg->css); return ret; @@ -6726,7 +6770,6 @@ int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp) int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) { - struct folio *folio = page_folio(page); struct mem_cgroup *memcg; unsigned short id; int ret; @@ -6741,7 +6784,7 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, memcg = get_mem_cgroup_from_mm(mm); rcu_read_unlock(); - ret = charge_memcg(folio, memcg, gfp); + ret = charge_memcg(page, memcg, gfp); css_put(&memcg->css); return ret; @@ -6785,7 +6828,7 @@ struct uncharge_gather { unsigned long nr_memory; unsigned long pgpgout; unsigned long nr_kmem; - int nid; + struct page *dummy_page; }; static inline void uncharge_gather_clear(struct uncharge_gather *ug) @@ -6809,36 +6852,36 @@ static void uncharge_batch(const struct uncharge_gather *ug) local_irq_save(flags); __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_memory); - memcg_check_events(ug->memcg, ug->nid); + memcg_check_events(ug->memcg, ug->dummy_page); local_irq_restore(flags); - /* drop reference from uncharge_folio */ + /* drop reference from uncharge_page */ css_put(&ug->memcg->css); } -static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) +static void uncharge_page(struct page *page, struct uncharge_gather *ug) { - long nr_pages; + unsigned long nr_pages; struct mem_cgroup *memcg; struct obj_cgroup *objcg; - bool use_objcg = folio_memcg_kmem(folio); + bool use_objcg = PageMemcgKmem(page); - VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + VM_BUG_ON_PAGE(PageLRU(page), page); /* * Nobody should be changing or seriously looking at - * folio memcg or objcg at this point, we have fully - * exclusive access to the folio. + * page memcg or objcg at this point, we have fully + * exclusive access to the page. */ if (use_objcg) { - objcg = __folio_objcg(folio); + objcg = __page_objcg(page); /* * This get matches the put at the end of the function and * kmem pages do not hold memcg references anymore. */ memcg = get_mem_cgroup_from_objcg(objcg); } else { - memcg = __folio_memcg(folio); + memcg = __page_memcg(page); } if (!memcg) @@ -6850,19 +6893,19 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) uncharge_gather_clear(ug); } ug->memcg = memcg; - ug->nid = folio_nid(folio); + ug->dummy_page = page; /* pairs with css_put in uncharge_batch */ css_get(&memcg->css); } - nr_pages = folio_nr_pages(folio); + nr_pages = compound_nr(page); if (use_objcg) { ug->nr_memory += nr_pages; ug->nr_kmem += nr_pages; - folio->memcg_data = 0; + page->memcg_data = 0; obj_cgroup_put(objcg); } else { /* LRU pages aren't accounted at the root level */ @@ -6870,22 +6913,28 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) ug->nr_memory += nr_pages; ug->pgpgout++; - folio->memcg_data = 0; + page->memcg_data = 0; } css_put(&memcg->css); } -void __mem_cgroup_uncharge(struct folio *folio) +/** + * __mem_cgroup_uncharge - uncharge a page + * @page: page to uncharge + * + * Uncharge a page previously charged with __mem_cgroup_charge(). + */ +void __mem_cgroup_uncharge(struct page *page) { struct uncharge_gather ug; - /* Don't touch folio->lru of any random page, pre-check: */ - if (!folio_memcg(folio)) + /* Don't touch page->lru of any random page, pre-check: */ + if (!page_memcg(page)) return; uncharge_gather_clear(&ug); - uncharge_folio(folio, &ug); + uncharge_page(page, &ug); uncharge_batch(&ug); } @@ -6899,49 +6948,52 @@ void __mem_cgroup_uncharge(struct folio *folio) void __mem_cgroup_uncharge_list(struct list_head *page_list) { struct uncharge_gather ug; - struct folio *folio; + struct page *page; uncharge_gather_clear(&ug); - list_for_each_entry(folio, page_list, lru) - uncharge_folio(folio, &ug); + list_for_each_entry(page, page_list, lru) + uncharge_page(page, &ug); if (ug.memcg) uncharge_batch(&ug); } /** - * mem_cgroup_migrate - Charge a folio's replacement. - * @old: Currently circulating folio. - * @new: Replacement folio. + * mem_cgroup_migrate - charge a page's replacement + * @oldpage: currently circulating page + * @newpage: replacement page * - * Charge @new as a replacement folio for @old. @old will + * Charge @newpage as a replacement page for @oldpage. @oldpage will * be uncharged upon free. * - * Both folios must be locked, @new->mapping must be set up. + * Both pages must be locked, @newpage->mapping must be set up. */ -void mem_cgroup_migrate(struct folio *old, struct folio *new) +void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) { struct mem_cgroup *memcg; - long nr_pages = folio_nr_pages(new); + unsigned int nr_pages; unsigned long flags; - VM_BUG_ON_FOLIO(!folio_test_locked(old), old); - VM_BUG_ON_FOLIO(!folio_test_locked(new), new); - VM_BUG_ON_FOLIO(folio_test_anon(old) != folio_test_anon(new), new); - VM_BUG_ON_FOLIO(folio_nr_pages(old) != nr_pages, new); + VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); + VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); + VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage); + VM_BUG_ON_PAGE(PageTransHuge(oldpage) != PageTransHuge(newpage), + newpage); if (mem_cgroup_disabled()) return; - /* Page cache replacement: new folio already charged? */ - if (folio_memcg(new)) + /* Page cache replacement: new page already charged? */ + if (page_memcg(newpage)) return; - memcg = folio_memcg(old); - VM_WARN_ON_ONCE_FOLIO(!memcg, old); + memcg = page_memcg(oldpage); + VM_WARN_ON_ONCE_PAGE(!memcg, oldpage); if (!memcg) return; /* Force-charge the new page. The old one will be freed soon */ + nr_pages = thp_nr_pages(newpage); + if (!mem_cgroup_is_root(memcg)) { page_counter_charge(&memcg->memory, nr_pages); if (do_memsw_account()) @@ -6949,11 +7001,11 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new) } css_get(&memcg->css); - commit_charge(new, memcg); + commit_charge(newpage, memcg); local_irq_save(flags); - mem_cgroup_charge_statistics(memcg, nr_pages); - memcg_check_events(memcg, folio_nid(new)); + mem_cgroup_charge_statistics(memcg, newpage, nr_pages); + memcg_check_events(memcg, newpage); local_irq_restore(flags); } @@ -7180,8 +7232,8 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) * only synchronisation we have for updating the per-CPU variables. */ VM_BUG_ON(!irqs_disabled()); - mem_cgroup_charge_statistics(memcg, -nr_entries); - memcg_check_events(memcg, page_to_nid(page)); + mem_cgroup_charge_statistics(memcg, page, -nr_entries); + memcg_check_events(memcg, page); css_put(&memcg->css); } diff --git a/mm/memfd.c b/mm/memfd.c index 9f80f16279..475d095dd7 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -31,20 +31,28 @@ static void memfd_tag_pins(struct xa_state *xas) { struct page *page; - unsigned int tagged = 0; + int latency = 0; + int cache_count; lru_add_drain(); xas_lock_irq(xas); xas_for_each(xas, page, ULONG_MAX) { - if (xa_is_value(page)) - continue; - page = find_subpage(page, xas->xa_index); - if (page_count(page) - page_mapcount(page) > 1) - xas_set_mark(xas, MEMFD_TAG_PINNED); + cache_count = 1; + if (!xa_is_value(page) && + PageTransHuge(page) && !PageHuge(page)) + cache_count = HPAGE_PMD_NR; - if (++tagged % XA_CHECK_SCHED) + if (!xa_is_value(page) && + page_count(page) - total_mapcount(page) != cache_count) + xas_set_mark(xas, MEMFD_TAG_PINNED); + if (cache_count != 1) + xas_set(xas, page->index + cache_count); + + latency += cache_count; + if (latency < XA_CHECK_SCHED) continue; + latency = 0; xas_pause(xas); xas_unlock_irq(xas); @@ -73,7 +81,8 @@ static int memfd_wait_for_pins(struct address_space *mapping) error = 0; for (scan = 0; scan <= LAST_SCAN; scan++) { - unsigned int tagged = 0; + int latency = 0; + int cache_count; if (!xas_marked(&xas, MEMFD_TAG_PINNED)) break; @@ -87,10 +96,14 @@ static int memfd_wait_for_pins(struct address_space *mapping) xas_lock_irq(&xas); xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { bool clear = true; - if (xa_is_value(page)) - continue; - page = find_subpage(page, xas.xa_index); - if (page_count(page) - page_mapcount(page) != 1) { + + cache_count = 1; + if (!xa_is_value(page) && + PageTransHuge(page) && !PageHuge(page)) + cache_count = HPAGE_PMD_NR; + + if (!xa_is_value(page) && cache_count != + page_count(page) - total_mapcount(page)) { /* * On the last scan, we clean up all those tags * we inserted; but make a note that we still @@ -103,8 +116,11 @@ static int memfd_wait_for_pins(struct address_space *mapping) } if (clear) xas_clear_mark(&xas, MEMFD_TAG_PINNED); - if (++tagged % XA_CHECK_SCHED) + + latency += cache_count; + if (latency < XA_CHECK_SCHED) continue; + latency = 0; xas_pause(&xas); xas_unlock_irq(&xas); @@ -297,7 +313,9 @@ SYSCALL_DEFINE2(memfd_create, } if (flags & MFD_HUGETLB) { - file = hugetlb_file_setup(name, 0, VM_NORESERVE, + struct ucounts *ucounts = NULL; + + file = hugetlb_file_setup(name, 0, VM_NORESERVE, &ucounts, HUGETLB_ANONHUGE_INODE, (flags >> MFD_HUGE_SHIFT) & MFD_HUGE_MASK); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 97a9ed8f87..f66977a171 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -58,7 +57,6 @@ #include #include #include -#include #include "internal.h" #include "ras/ras_event.h" @@ -675,7 +673,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, #define hwpoison_hugetlb_range NULL #endif -static const struct mm_walk_ops hwp_walk_ops = { +static struct mm_walk_ops hwp_walk_ops = { .pmd_entry = hwpoison_pte_range, .hugetlb_entry = hwpoison_hugetlb_range, }; @@ -723,6 +721,7 @@ static const char * const action_page_types[] = { [MF_MSG_KERNEL_HIGH_ORDER] = "high-order kernel page", [MF_MSG_SLAB] = "kernel slab page", [MF_MSG_DIFFERENT_COMPOUND] = "different compound page after locking", + [MF_MSG_POISONED_HUGE] = "huge page already hardware poisoned", [MF_MSG_HUGE] = "huge page", [MF_MSG_FREE_HUGE] = "free huge page", [MF_MSG_NON_PMD_HUGE] = "non-pmd-sized huge page", @@ -737,6 +736,7 @@ static const char * const action_page_types[] = { [MF_MSG_CLEAN_LRU] = "clean LRU page", [MF_MSG_TRUNCATED_LRU] = "already truncated LRU page", [MF_MSG_BUDDY] = "free buddy page", + [MF_MSG_BUDDY_2ND] = "free buddy page (2nd try)", [MF_MSG_DAX] = "dax page", [MF_MSG_UNSPLIT_THP] = "unsplit thp", [MF_MSG_UNKNOWN] = "unknown page", @@ -762,7 +762,7 @@ static int delete_from_lru_cache(struct page *p) * Poisoned page might never drop its ref count to 0 so we have * to uncharge it manually from its memcg. */ - mem_cgroup_uncharge(page_folio(p)); + mem_cgroup_uncharge(p); /* * drop the page count elevated by isolate_lru_page() @@ -806,44 +806,12 @@ static int truncate_error_page(struct page *p, unsigned long pfn, return ret; } -struct page_state { - unsigned long mask; - unsigned long res; - enum mf_action_page_type type; - - /* Callback ->action() has to unlock the relevant page inside it. */ - int (*action)(struct page_state *ps, struct page *p); -}; - -/* - * Return true if page is still referenced by others, otherwise return - * false. - * - * The extra_pins is true when one extra refcount is expected. - */ -static bool has_extra_refcount(struct page_state *ps, struct page *p, - bool extra_pins) -{ - int count = page_count(p) - 1; - - if (extra_pins) - count -= 1; - - if (count > 0) { - pr_err("Memory failure: %#lx: %s still referenced by %d users\n", - page_to_pfn(p), action_page_types[ps->type], count); - return true; - } - - return false; -} - /* * Error hit kernel page. * Do nothing, try to be lucky and not touch this instead. For a few cases we * could be more sophisticated. */ -static int me_kernel(struct page_state *ps, struct page *p) +static int me_kernel(struct page *p, unsigned long pfn) { unlock_page(p); return MF_IGNORED; @@ -852,9 +820,9 @@ static int me_kernel(struct page_state *ps, struct page *p) /* * Page in unknown state. Do nothing. */ -static int me_unknown(struct page_state *ps, struct page *p) +static int me_unknown(struct page *p, unsigned long pfn) { - pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p)); + pr_err("Memory failure: %#lx: Unknown page state\n", pfn); unlock_page(p); return MF_FAILED; } @@ -862,11 +830,10 @@ static int me_unknown(struct page_state *ps, struct page *p) /* * Clean (or cleaned) page cache page. */ -static int me_pagecache_clean(struct page_state *ps, struct page *p) +static int me_pagecache_clean(struct page *p, unsigned long pfn) { int ret; struct address_space *mapping; - bool extra_pins; delete_from_lru_cache(p); @@ -895,24 +862,14 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p) goto out; } - /* - * The shmem page is kept in page cache instead of truncating - * so is expected to have an extra refcount after error-handling. - */ - extra_pins = shmem_mapping(mapping); - /* * Truncation is a bit tricky. Enable it per file system for now. * * Open: to take i_rwsem or not for this? Right now we don't. */ - ret = truncate_error_page(p, page_to_pfn(p), mapping); - if (has_extra_refcount(ps, p, extra_pins)) - ret = MF_FAILED; - + ret = truncate_error_page(p, pfn, mapping); out: unlock_page(p); - return ret; } @@ -921,7 +878,7 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p) * Issues: when the error hit a hole page the error is not properly * propagated. */ -static int me_pagecache_dirty(struct page_state *ps, struct page *p) +static int me_pagecache_dirty(struct page *p, unsigned long pfn) { struct address_space *mapping = page_mapping(p); @@ -965,7 +922,7 @@ static int me_pagecache_dirty(struct page_state *ps, struct page *p) mapping_set_error(mapping, -EIO); } - return me_pagecache_clean(ps, p); + return me_pagecache_clean(p, pfn); } /* @@ -987,10 +944,9 @@ static int me_pagecache_dirty(struct page_state *ps, struct page *p) * Clean swap cache pages can be directly isolated. A later page fault will * bring in the known good data from disk. */ -static int me_swapcache_dirty(struct page_state *ps, struct page *p) +static int me_swapcache_dirty(struct page *p, unsigned long pfn) { int ret; - bool extra_pins = false; ClearPageDirty(p); /* Trigger EIO in shmem: */ @@ -998,17 +954,10 @@ static int me_swapcache_dirty(struct page_state *ps, struct page *p) ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED; unlock_page(p); - - if (ret == MF_DELAYED) - extra_pins = true; - - if (has_extra_refcount(ps, p, extra_pins)) - ret = MF_FAILED; - return ret; } -static int me_swapcache_clean(struct page_state *ps, struct page *p) +static int me_swapcache_clean(struct page *p, unsigned long pfn) { int ret; @@ -1016,10 +965,6 @@ static int me_swapcache_clean(struct page_state *ps, struct page *p) ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED; unlock_page(p); - - if (has_extra_refcount(ps, p, false)) - ret = MF_FAILED; - return ret; } @@ -1029,7 +974,7 @@ static int me_swapcache_clean(struct page_state *ps, struct page *p) * - Error on hugepage is contained in hugepage unit (not in raw page unit.) * To narrow down kill region to one page, we need to break up pmd. */ -static int me_huge_page(struct page_state *ps, struct page *p) +static int me_huge_page(struct page *p, unsigned long pfn) { int res; struct page *hpage = compound_head(p); @@ -1040,7 +985,7 @@ static int me_huge_page(struct page_state *ps, struct page *p) mapping = page_mapping(hpage); if (mapping) { - res = truncate_error_page(hpage, page_to_pfn(p), mapping); + res = truncate_error_page(hpage, pfn, mapping); unlock_page(hpage); } else { res = MF_FAILED; @@ -1058,9 +1003,6 @@ static int me_huge_page(struct page_state *ps, struct page *p) } } - if (has_extra_refcount(ps, p, false)) - res = MF_FAILED; - return res; } @@ -1086,7 +1028,14 @@ static int me_huge_page(struct page_state *ps, struct page *p) #define slab (1UL << PG_slab) #define reserved (1UL << PG_reserved) -static struct page_state error_states[] = { +static struct page_state { + unsigned long mask; + unsigned long res; + enum mf_action_page_type type; + + /* Callback ->action() has to unlock the relevant page inside it. */ + int (*action)(struct page *p, unsigned long pfn); +} error_states[] = { { reserved, reserved, MF_MSG_KERNEL, me_kernel }, /* * free pages are specially detected outside this table: @@ -1146,10 +1095,19 @@ static int page_action(struct page_state *ps, struct page *p, unsigned long pfn) { int result; + int count; /* page p should be unlocked after returning from ps->action(). */ - result = ps->action(ps, p); + result = ps->action(p, pfn); + count = page_count(p) - 1; + if (ps->action == me_swapcache_dirty && result == MF_DELAYED) + count--; + if (count > 0) { + pr_err("Memory failure: %#lx: %s still referenced by %d users\n", + pfn, action_page_types[ps->type], count); + result = MF_FAILED; + } action_result(pfn, ps->type, result); /* Could do more checks here if page looks ok */ @@ -1160,22 +1118,6 @@ static int page_action(struct page_state *ps, struct page *p, return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY; } -static inline bool PageHWPoisonTakenOff(struct page *page) -{ - return PageHWPoison(page) && page_private(page) == MAGIC_HWPOISON; -} - -void SetPageHWPoisonTakenOff(struct page *page) -{ - set_page_private(page, MAGIC_HWPOISON); -} - -void ClearPageHWPoisonTakenOff(struct page *page) -{ - if (PageHWPoison(page)) - set_page_private(page, 0); -} - /* * Return true if a page type of a given page is supported by hwpoison * mechanism (while handling could fail), otherwise false. This function @@ -1278,27 +1220,6 @@ static int get_any_page(struct page *p, unsigned long flags) return ret; } -static int __get_unpoison_page(struct page *page) -{ - struct page *head = compound_head(page); - int ret = 0; - bool hugetlb = false; - - ret = get_hwpoison_huge_page(head, &hugetlb); - if (hugetlb) - return ret; - - /* - * PageHWPoisonTakenOff pages are not only marked as PG_hwpoison, - * but also isolated from buddy freelist, so need to identify the - * state and have to cancel both operations to unpoison. - */ - if (PageHWPoisonTakenOff(page)) - return -EHWPOISON; - - return get_page_unless_zero(page) ? 1 : 0; -} - /** * get_hwpoison_page() - Get refcount for memory error handling * @p: Raw error page (hit by memory error) @@ -1306,7 +1227,7 @@ static int __get_unpoison_page(struct page *page) * * get_hwpoison_page() takes a page refcount of an error page to handle memory * error on it, after checking that the error page is in a well-defined state - * (defined as a page-type we can successfully handle the memory error on it, + * (defined as a page-type we can successfully handle the memor error on it, * such as LRU page and hugetlb page). * * Memory error handling could be triggered at any time on any type of page, @@ -1315,26 +1236,18 @@ static int __get_unpoison_page(struct page *page) * extra care for the error page's state (as done in __get_hwpoison_page()), * and has some retry logic in get_any_page(). * - * When called from unpoison_memory(), the caller should already ensure that - * the given page has PG_hwpoison. So it's never reused for other page - * allocations, and __get_unpoison_page() never races with them. - * * Return: 0 on failure, * 1 on success for in-use pages in a well-defined state, * -EIO for pages on which we can not handle memory errors, * -EBUSY when get_hwpoison_page() has raced with page lifecycle - * operations like allocation and free, - * -EHWPOISON when the page is hwpoisoned and taken off from buddy. + * operations like allocation and free. */ static int get_hwpoison_page(struct page *p, unsigned long flags) { int ret; zone_pcp_disable(page_zone(p)); - if (flags & MF_UNPOISON) - ret = __get_unpoison_page(p); - else - ret = get_any_page(p, flags); + ret = get_any_page(p, flags); zone_pcp_enable(page_zone(p)); return ret; @@ -1487,11 +1400,14 @@ static int identify_page_state(unsigned long pfn, struct page *p, static int try_to_split_thp_page(struct page *page, const char *msg) { lock_page(page); - if (unlikely(split_huge_page(page))) { + if (!PageAnon(page) || unlikely(split_huge_page(page))) { unsigned long pfn = page_to_pfn(page); unlock_page(page); - pr_info("%s: %#lx: thp split failed\n", msg, pfn); + if (!PageAnon(page)) + pr_info("%s: %#lx: non anonymous thp\n", msg, pfn); + else + pr_info("%s: %#lx: thp split failed\n", msg, pfn); put_page(page); return -EBUSY; } @@ -1545,6 +1461,14 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) lock_page(head); page_flags = head->flags; + if (!PageHWPoison(head)) { + pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); + num_poisoned_pages_dec(); + unlock_page(head); + put_page(head); + return 0; + } + /* * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so * simply disable it. In order to make it work properly, we need @@ -1595,12 +1519,6 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, goto out; } - /* - * Pages instantiated by device-dax (not filesystem-dax) - * may be compound pages. - */ - page = compound_head(page); - /* * Prevent the inode from being freed while we are interrogating * the address_space, typically this would be handled by @@ -1664,8 +1582,6 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, return rc; } -static DEFINE_MUTEX(mf_mutex); - /** * memory_failure - Handle memory failure of a page. * @pfn: Page Number of the corrupted page @@ -1692,32 +1608,26 @@ int memory_failure(unsigned long pfn, int flags) int res = 0; unsigned long page_flags; bool retry = true; + static DEFINE_MUTEX(mf_mutex); if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); - mutex_lock(&mf_mutex); - p = pfn_to_online_page(pfn); if (!p) { - res = arch_memory_failure(pfn, flags); - if (res == 0) - goto unlock_mutex; - if (pfn_valid(pfn)) { pgmap = get_dev_pagemap(pfn, NULL); - if (pgmap) { - res = memory_failure_dev_pagemap(pfn, flags, - pgmap); - goto unlock_mutex; - } + if (pgmap) + return memory_failure_dev_pagemap(pfn, flags, + pgmap); } pr_err("Memory failure: %#lx: memory outside kernel control\n", pfn); - res = -ENXIO; - goto unlock_mutex; + return -ENXIO; } + mutex_lock(&mf_mutex); + try_again: if (PageHuge(p)) { res = memory_failure_hugetlb(pfn, flags); @@ -1832,6 +1742,16 @@ int memory_failure(unsigned long pfn, int flags) */ page_flags = p->flags; + /* + * unpoison always clear PG_hwpoison inside page lock + */ + if (!PageHWPoison(p)) { + pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); + num_poisoned_pages_dec(); + unlock_page(p); + put_page(p); + goto unlock_mutex; + } if (hwpoison_filter(p)) { if (TestClearPageHWPoison(p)) num_poisoned_pages_dec(); @@ -1995,28 +1915,6 @@ core_initcall(memory_failure_init); pr_info(fmt, pfn); \ }) -static inline int clear_page_hwpoison(struct ratelimit_state *rs, struct page *p) -{ - if (TestClearPageHWPoison(p)) { - unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", - page_to_pfn(p), rs); - num_poisoned_pages_dec(); - return 1; - } - return 0; -} - -static inline int unpoison_taken_off_page(struct ratelimit_state *rs, - struct page *p) -{ - if (put_page_back_buddy(p)) { - unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", - page_to_pfn(p), rs); - return 0; - } - return -EBUSY; -} - /** * unpoison_memory - Unpoison a previously poisoned page * @pfn: Page number of the to be unpoisoned page @@ -2033,7 +1931,8 @@ int unpoison_memory(unsigned long pfn) { struct page *page; struct page *p; - int ret = -EBUSY; + int freeit = 0; + unsigned long flags = 0; static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -2043,60 +1942,69 @@ int unpoison_memory(unsigned long pfn) p = pfn_to_page(pfn); page = compound_head(p); - mutex_lock(&mf_mutex); - if (!PageHWPoison(p)) { unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n", pfn, &unpoison_rs); - goto unlock_mutex; + return 0; } if (page_count(page) > 1) { unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n", pfn, &unpoison_rs); - goto unlock_mutex; + return 0; } if (page_mapped(page)) { unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n", pfn, &unpoison_rs); - goto unlock_mutex; + return 0; } if (page_mapping(page)) { unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n", pfn, &unpoison_rs); - goto unlock_mutex; + return 0; } - if (PageSlab(page) || PageTable(page)) - goto unlock_mutex; + /* + * unpoison_memory() can encounter thp only when the thp is being + * worked by memory_failure() and the page lock is not held yet. + * In such case, we yield to memory_failure() and make unpoison fail. + */ + if (!PageHuge(page) && PageTransHuge(page)) { + unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n", + pfn, &unpoison_rs); + return 0; + } - ret = get_hwpoison_page(p, MF_UNPOISON); - if (!ret) { - if (clear_page_hwpoison(&unpoison_rs, page)) - ret = 0; - else - ret = -EBUSY; - } else if (ret < 0) { - if (ret == -EHWPOISON) { - ret = unpoison_taken_off_page(&unpoison_rs, p); - } else - unpoison_pr_info("Unpoison: failed to grab page %#lx\n", - pfn, &unpoison_rs); - } else { - int freeit = clear_page_hwpoison(&unpoison_rs, p); + if (!get_hwpoison_page(p, flags)) { + if (TestClearPageHWPoison(p)) + num_poisoned_pages_dec(); + unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n", + pfn, &unpoison_rs); + return 0; + } + lock_page(page); + /* + * This test is racy because PG_hwpoison is set outside of page lock. + * That's acceptable because that won't trigger kernel panic. Instead, + * the PG_hwpoison page will be caught and isolated on the entrance to + * the free buddy page pool. + */ + if (TestClearPageHWPoison(page)) { + unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", + pfn, &unpoison_rs); + num_poisoned_pages_dec(); + freeit = 1; + } + unlock_page(page); + + put_page(page); + if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) put_page(page); - if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) { - put_page(page); - ret = 0; - } - } -unlock_mutex: - mutex_unlock(&mf_mutex); - return ret; + return 0; } EXPORT_SYMBOL(unpoison_memory); @@ -2196,14 +2104,14 @@ static int __soft_offline_page(struct page *page) if (!list_empty(&pagelist)) putback_movable_pages(&pagelist); - pr_info("soft offline: %#lx: %s migration failed %d, type %pGp\n", - pfn, msg_page[huge], ret, &page->flags); + pr_info("soft offline: %#lx: %s migration failed %d, type %lx (%pGp)\n", + pfn, msg_page[huge], ret, page->flags, &page->flags); if (ret > 0) ret = -EBUSY; } } else { - pr_info("soft offline: %#lx: %s isolation failed, page count %d, type %pGp\n", - pfn, msg_page[huge], page_count(page), &page->flags); + pr_info("soft offline: %#lx: %s isolation failed, page count %d, type %lx (%pGp)\n", + pfn, msg_page[huge], page_count(page), page->flags, &page->flags); ret = -EBUSY; } return ret; @@ -2277,12 +2185,9 @@ int soft_offline_page(unsigned long pfn, int flags) return -EIO; } - mutex_lock(&mf_mutex); - if (PageHWPoison(page)) { pr_info("%s: %#lx page already poisoned\n", __func__, pfn); put_ref_page(ref_page); - mutex_unlock(&mf_mutex); return 0; } @@ -2301,7 +2206,5 @@ int soft_offline_page(unsigned long pfn, int flags) } } - mutex_unlock(&mf_mutex); - return ret; } diff --git a/mm/memory.c b/mm/memory.c index c125c49699..c52be6d6b6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -41,7 +41,6 @@ #include #include -#include #include #include #include @@ -434,39 +433,35 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, } } -void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte) -{ - spinlock_t *ptl = pmd_lock(mm, pmd); - - if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ - mm_inc_nr_ptes(mm); - /* - * Ensure all pte setup (eg. pte page lock and page clearing) are - * visible before the pte is made visible to other CPUs by being - * put into page tables. - * - * The other side of the story is the pointer chasing in the page - * table walking code (when walking the page table without locking; - * ie. most of the time). Fortunately, these data accesses consist - * of a chain of data-dependent loads, meaning most CPUs (alpha - * being the notable exception) will already guarantee loads are - * seen in-order. See the alpha page table accessors for the - * smp_rmb() barriers in page table walking code. - */ - smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ - pmd_populate(mm, pmd, *pte); - *pte = NULL; - } - spin_unlock(ptl); -} - int __pte_alloc(struct mm_struct *mm, pmd_t *pmd) { + spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm); if (!new) return -ENOMEM; - pmd_install(mm, pmd, &new); + /* + * Ensure all pte setup (eg. pte page lock and page clearing) are + * visible before the pte is made visible to other CPUs by being + * put into page tables. + * + * The other side of the story is the pointer chasing in the page + * table walking code (when walking the page table without locking; + * ie. most of the time). Fortunately, these data accesses consist + * of a chain of data-dependent loads, meaning most CPUs (alpha + * being the notable exception) will already guarantee loads are + * seen in-order. See the alpha page table accessors for the + * smp_rmb() barriers in page table walking code. + */ + smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ + + ptl = pmd_lock(mm, pmd); + if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ + mm_inc_nr_ptes(mm); + pmd_populate(mm, pmd, new); + new = NULL; + } + spin_unlock(ptl); if (new) pte_free(mm, new); return 0; @@ -478,9 +473,10 @@ int __pte_alloc_kernel(pmd_t *pmd) if (!new) return -ENOMEM; + smp_wmb(); /* See comment in __pte_alloc */ + spin_lock(&init_mm.page_table_lock); if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ - smp_wmb(); /* See comment in pmd_install() */ pmd_populate_kernel(&init_mm, pmd, new); new = NULL; } @@ -720,6 +716,8 @@ static void restore_exclusive_pte(struct vm_area_struct *vma, else if (is_writable_device_exclusive_entry(entry)) pte = maybe_mkwrite(pte_mkdirty(pte), vma); + set_pte_at(vma->vm_mm, address, ptep, pte); + /* * No need to take a page reference as one was already * created when the swap entry was made. @@ -733,8 +731,6 @@ static void restore_exclusive_pte(struct vm_area_struct *vma, */ WARN_ON_ONCE(!PageAnon(page)); - set_pte_at(vma->vm_mm, address, ptep, pte); - if (vma->vm_flags & VM_LOCKED) mlock_vma_page(page); @@ -994,7 +990,7 @@ page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma, if (!new_page) return NULL; - if (mem_cgroup_charge(page_folio(new_page), src_mm, GFP_KERNEL)) { + if (mem_cgroup_charge(new_page, src_mm, GFP_KERNEL)) { put_page(new_page); return NULL; } @@ -1305,28 +1301,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) return ret; } -/* - * Parameter block passed down to zap_pte_range in exceptional cases. - */ -struct zap_details { - struct address_space *zap_mapping; /* Check page->mapping if set */ - struct folio *single_folio; /* Locked folio to be unmapped */ -}; - -/* - * We set details->zap_mapping when we want to unmap shared but keep private - * pages. Return true if skip zapping this page, false otherwise. - */ -static inline bool -zap_skip_check_mapping(struct zap_details *details, struct page *page) -{ - if (!details || !page) - return false; - - return details->zap_mapping && - (details->zap_mapping != page_rmapping(page)); -} - static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, @@ -1359,8 +1333,16 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, struct page *page; page = vm_normal_page(vma, addr, ptent); - if (unlikely(zap_skip_check_mapping(details, page))) - continue; + if (unlikely(details) && page) { + /* + * unmap_shared_mapping_pages() wants to + * invalidate cache without truncating: + * unmap shared but keep private pages. + */ + if (details->check_mapping && + details->check_mapping != page_rmapping(page)) + continue; + } ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); tlb_remove_tlb_entry(tlb, pte, addr); @@ -1393,8 +1375,17 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, is_device_exclusive_entry(entry)) { struct page *page = pfn_swap_entry_to_page(entry); - if (unlikely(zap_skip_check_mapping(details, page))) - continue; + if (unlikely(details && details->check_mapping)) { + /* + * unmap_shared_mapping_pages() wants to + * invalidate cache without truncating: + * unmap shared but keep private pages. + */ + if (details->check_mapping != + page_rmapping(page)) + continue; + } + pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); rss[mm_counter(page)]--; @@ -1466,8 +1457,8 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ - } else if (details && details->single_folio && - folio_test_pmd_mappable(details->single_folio) && + } else if (details && details->single_page && + PageTransCompound(details->single_page) && next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { spinlock_t *ptl = pmd_lock(tlb->mm, pmd); /* @@ -2733,19 +2724,19 @@ EXPORT_SYMBOL_GPL(apply_to_existing_page_range); * proceeding (but do_wp_page is only called after already making such a check; * and do_anonymous_page can safely check later on). */ -static inline int pte_unmap_same(struct vm_fault *vmf) +static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, + pte_t *page_table, pte_t orig_pte) { int same = 1; #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION) if (sizeof(pte_t) > sizeof(unsigned long)) { - spinlock_t *ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); + spinlock_t *ptl = pte_lockptr(mm, pmd); spin_lock(ptl); - same = pte_same(*vmf->pte, vmf->orig_pte); + same = pte_same(*page_table, orig_pte); spin_unlock(ptl); } #endif - pte_unmap(vmf->pte); - vmf->pte = NULL; + pte_unmap(page_table); return same; } @@ -3028,7 +3019,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) } } - if (mem_cgroup_charge(page_folio(new_page), mm, GFP_KERNEL)) + if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) goto oom_free_new; cgroup_throttle_swaprate(new_page, GFP_KERNEL); @@ -3330,20 +3321,20 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma, } static inline void unmap_mapping_range_tree(struct rb_root_cached *root, - pgoff_t first_index, - pgoff_t last_index, struct zap_details *details) { struct vm_area_struct *vma; pgoff_t vba, vea, zba, zea; - vma_interval_tree_foreach(vma, root, first_index, last_index) { + vma_interval_tree_foreach(vma, root, + details->first_index, details->last_index) { + vba = vma->vm_pgoff; vea = vba + vma_pages(vma) - 1; - zba = first_index; + zba = details->first_index; if (zba < vba) zba = vba; - zea = last_index; + zea = details->last_index; if (zea > vea) zea = vea; @@ -3355,35 +3346,32 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root, } /** - * unmap_mapping_folio() - Unmap single folio from processes. - * @folio: The locked folio to be unmapped. + * unmap_mapping_page() - Unmap single page from processes. + * @page: The locked page to be unmapped. * - * Unmap this folio from any userspace process which still has it mmaped. + * Unmap this page from any userspace process which still has it mmaped. * Typically, for efficiency, the range of nearby pages has already been * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once - * truncation or invalidation holds the lock on a folio, it may find that - * the page has been remapped again: and then uses unmap_mapping_folio() + * truncation or invalidation holds the lock on a page, it may find that + * the page has been remapped again: and then uses unmap_mapping_page() * to unmap it finally. */ -void unmap_mapping_folio(struct folio *folio) +void unmap_mapping_page(struct page *page) { - struct address_space *mapping = folio->mapping; + struct address_space *mapping = page->mapping; struct zap_details details = { }; - pgoff_t first_index; - pgoff_t last_index; - VM_BUG_ON(!folio_test_locked(folio)); + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(PageTail(page)); - first_index = folio->index; - last_index = folio->index + folio_nr_pages(folio) - 1; - - details.zap_mapping = mapping; - details.single_folio = folio; + details.check_mapping = mapping; + details.first_index = page->index; + details.last_index = page->index + thp_nr_pages(page) - 1; + details.single_page = page; i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) - unmap_mapping_range_tree(&mapping->i_mmap, first_index, - last_index, &details); + unmap_mapping_range_tree(&mapping->i_mmap, &details); i_mmap_unlock_write(mapping); } @@ -3403,17 +3391,16 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { struct zap_details details = { }; - pgoff_t first_index = start; - pgoff_t last_index = start + nr - 1; - details.zap_mapping = even_cows ? NULL : mapping; - if (last_index < first_index) - last_index = ULONG_MAX; + details.check_mapping = even_cows ? NULL : mapping; + details.first_index = start; + details.last_index = start + nr - 1; + if (details.last_index < details.first_index) + details.last_index = ULONG_MAX; i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) - unmap_mapping_range_tree(&mapping->i_mmap, first_index, - last_index, &details); + unmap_mapping_range_tree(&mapping->i_mmap, &details); i_mmap_unlock_write(mapping); } EXPORT_SYMBOL_GPL(unmap_mapping_pages); @@ -3501,7 +3488,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) vm_fault_t ret = 0; void *shadow = NULL; - if (!pte_unmap_same(vmf)) + if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) goto out; entry = pte_to_swp_entry(vmf->orig_pte); @@ -3529,6 +3516,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (unlikely(!si)) goto out; + delayacct_set_flag(current, DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry, vma, vmf->address); swapcache = page; @@ -3551,8 +3539,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) shadow = get_shadow_from_swap_cache(entry); if (shadow) - workingset_refault(page_folio(page), - shadow); + workingset_refault(page, shadow); lru_cache_add(page); @@ -3576,6 +3563,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) vmf->address, &vmf->ptl); if (likely(pte_same(*vmf->pte, vmf->orig_pte))) ret = VM_FAULT_OOM; + delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN); goto unlock; } @@ -3589,11 +3577,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) * owner processes (which may be unknown at hwpoison time) */ ret = VM_FAULT_HWPOISON; + delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN); goto out_release; } locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); + delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN); if (!locked) { ret |= VM_FAULT_RETRY; goto out_release; @@ -3644,7 +3634,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); - if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) { + if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); vmf->flags &= ~FAULT_FLAG_WRITE; ret |= VM_FAULT_WRITE; @@ -3657,6 +3647,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) pte = pte_mkuffd_wp(pte); pte = pte_wrprotect(pte); } + set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); + arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); vmf->orig_pte = pte; /* ksm created a completely new copy */ @@ -3667,9 +3659,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) do_page_add_anon_rmap(page, vma, vmf->address, exclusive); } - set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); - arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); - swap_free(entry); if (mem_cgroup_swap_full(page) || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) @@ -3780,7 +3769,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (!page) goto oom; - if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) goto oom_free_page; cgroup_throttle_swaprate(page, GFP_KERNEL); @@ -3863,6 +3852,7 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; + smp_wmb(); /* See comment in __pte_alloc() */ } ret = vma->vm_ops->fault(vmf); @@ -3933,6 +3923,7 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; + smp_wmb(); /* See comment in __pte_alloc() */ } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); @@ -4045,10 +4036,17 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return ret; } - if (vmf->prealloc_pte) - pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte); - else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) + if (vmf->prealloc_pte) { + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (likely(pmd_none(*vmf->pmd))) { + mm_inc_nr_ptes(vma->vm_mm); + pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); + vmf->prealloc_pte = NULL; + } + spin_unlock(vmf->ptl); + } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { return VM_FAULT_OOM; + } } /* See comment in handle_pte_fault() */ @@ -4157,6 +4155,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; + smp_wmb(); /* See comment in __pte_alloc() */ } return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); @@ -4203,8 +4202,7 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf) if (!vmf->cow_page) return VM_FAULT_OOM; - if (mem_cgroup_charge(page_folio(vmf->cow_page), vma->vm_mm, - GFP_KERNEL)) { + if (mem_cgroup_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL)) { put_page(vmf->cow_page); return VM_FAULT_OOM; } @@ -4269,7 +4267,7 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults). * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __folio_lock_or_retry(). + * return value. See filemap_fault() and __lock_page_or_retry(). * If mmap_lock is released, vma may become invalid (for example * by other thread calling munmap()). */ @@ -4510,7 +4508,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) * concurrent faults). * * The mmap_lock may have been released depending on flags and our return value. - * See filemap_fault() and __folio_lock_or_retry(). + * See filemap_fault() and __lock_page_or_retry(). */ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { @@ -4614,7 +4612,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) * By the time we get here, we already hold the mm semaphore * * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __folio_lock_or_retry(). + * return value. See filemap_fault() and __lock_page_or_retry(). */ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) @@ -4770,7 +4768,7 @@ static inline void mm_account_fault(struct pt_regs *regs, * By the time we get here, we already hold the mm semaphore * * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __folio_lock_or_retry(). + * return value. See filemap_fault() and __lock_page_or_retry(). */ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags, struct pt_regs *regs) @@ -4831,13 +4829,13 @@ int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) if (!new) return -ENOMEM; + smp_wmb(); /* See comment in __pte_alloc */ + spin_lock(&mm->page_table_lock); - if (pgd_present(*pgd)) { /* Another has populated it */ + if (pgd_present(*pgd)) /* Another has populated it */ p4d_free(mm, new); - } else { - smp_wmb(); /* See comment in pmd_install() */ + else pgd_populate(mm, pgd, new); - } spin_unlock(&mm->page_table_lock); return 0; } @@ -4854,10 +4852,11 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) if (!new) return -ENOMEM; + smp_wmb(); /* See comment in __pte_alloc */ + spin_lock(&mm->page_table_lock); if (!p4d_present(*p4d)) { mm_inc_nr_puds(mm); - smp_wmb(); /* See comment in pmd_install() */ p4d_populate(mm, p4d, new); } else /* Another has populated it */ pud_free(mm, new); @@ -4878,14 +4877,14 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) if (!new) return -ENOMEM; + smp_wmb(); /* See comment in __pte_alloc */ + ptl = pud_lock(mm, pud); if (!pud_present(*pud)) { mm_inc_nr_pmds(mm); - smp_wmb(); /* See comment in pmd_install() */ pud_populate(mm, pud, new); - } else { /* Another has populated it */ + } else /* Another has populated it */ pmd_free(mm, new); - } spin_unlock(ptl); return 0; } @@ -5266,7 +5265,7 @@ void __might_fault(const char *file, int line) return; if (pagefault_disabled()) return; - __might_sleep(file, line); + __might_sleep(file, line, 0); #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) if (current->mm) might_lock_read(¤t->mm->mmap_lock); @@ -5422,6 +5421,7 @@ long copy_huge_page_from_user(struct page *dst_page, unsigned int pages_per_huge_page, bool allow_pagefault) { + void *src = (void *)usr_src; void *page_kaddr; unsigned long i, rc = 0; unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; @@ -5434,7 +5434,8 @@ long copy_huge_page_from_user(struct page *dst_page, else page_kaddr = kmap_atomic(subpage); rc = copy_from_user(page_kaddr, - usr_src + i * PAGE_SIZE, PAGE_SIZE); + (const void __user *)(src + i * PAGE_SIZE), + PAGE_SIZE); if (allow_pagefault) kunmap(subpage); else diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2a9627dc78..9fd0be32a2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,6 @@ #include #include #include -#include #include @@ -57,7 +57,7 @@ enum { ONLINE_POLICY_AUTO_MOVABLE, }; -static const char * const online_policy_to_str[] = { +const char *online_policy_to_str[] = { [ONLINE_POLICY_CONTIG_ZONES] = "contig-zones", [ONLINE_POLICY_AUTO_MOVABLE] = "auto-movable", }; @@ -220,6 +220,7 @@ static void release_memory_resource(struct resource *res) kfree(res); } +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static int check_pfn_span(unsigned long pfn, unsigned long nr_pages, const char *reason) { @@ -585,6 +586,10 @@ void generic_online_page(struct page *page, unsigned int order) debug_pagealloc_map_pages(page, 1 << order); __free_pages_core(page, order); totalram_pages_add(1UL << order); +#ifdef CONFIG_HIGHMEM + if (PageHighMem(page)) + totalhigh_pages_add(1UL << order); +#endif } EXPORT_SYMBOL_GPL(generic_online_page); @@ -621,11 +626,16 @@ static void node_states_check_changes_online(unsigned long nr_pages, arg->status_change_nid = NUMA_NO_NODE; arg->status_change_nid_normal = NUMA_NO_NODE; + arg->status_change_nid_high = NUMA_NO_NODE; if (!node_state(nid, N_MEMORY)) arg->status_change_nid = nid; if (zone_idx(zone) <= ZONE_NORMAL && !node_state(nid, N_NORMAL_MEMORY)) arg->status_change_nid_normal = nid; +#ifdef CONFIG_HIGHMEM + if (zone_idx(zone) <= ZONE_HIGHMEM && !node_state(nid, N_HIGH_MEMORY)) + arg->status_change_nid_high = nid; +#endif } static void node_states_set_node(int node, struct memory_notify *arg) @@ -633,6 +643,9 @@ static void node_states_set_node(int node, struct memory_notify *arg) if (arg->status_change_nid_normal >= 0) node_set_state(node, N_NORMAL_MEMORY); + if (arg->status_change_nid_high >= 0) + node_set_state(node, N_HIGH_MEMORY); + if (arg->status_change_nid >= 0) node_set_state(node, N_MEMORY); } @@ -1150,6 +1163,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, mem_hotplug_done(); return ret; } +#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ static void reset_node_present_pages(pg_data_t *pgdat) { @@ -1343,7 +1357,6 @@ bool mhp_supports_memmap_on_memory(unsigned long size) int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) { struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; - enum memblock_flags memblock_flags = MEMBLOCK_NONE; struct vmem_altmap mhp_altmap = {}; struct memory_group *group = NULL; u64 start, size; @@ -1371,13 +1384,8 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) mem_hotplug_begin(); - if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { - if (res->flags & IORESOURCE_SYSRAM_DRIVER_MANAGED) - memblock_flags = MEMBLOCK_DRIVER_MANAGED; - ret = memblock_add_node(start, size, nid, memblock_flags); - if (ret) - goto error_mem_hotplug_end; - } + if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) + memblock_add_node(start, size, nid); ret = __try_online_node(nid, false); if (ret < 0) @@ -1450,7 +1458,6 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) rollback_node_hotadd(nid); if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) memblock_remove(start, size); -error_mem_hotplug_end: mem_hotplug_done(); return ret; } @@ -1796,6 +1803,7 @@ static void node_states_check_changes_offline(unsigned long nr_pages, arg->status_change_nid = NUMA_NO_NODE; arg->status_change_nid_normal = NUMA_NO_NODE; + arg->status_change_nid_high = NUMA_NO_NODE; /* * Check whether node_states[N_NORMAL_MEMORY] will be changed. @@ -1810,9 +1818,24 @@ static void node_states_check_changes_offline(unsigned long nr_pages, if (zone_idx(zone) <= ZONE_NORMAL && nr_pages >= present_pages) arg->status_change_nid_normal = zone_to_nid(zone); +#ifdef CONFIG_HIGHMEM /* - * We have accounted the pages from [0..ZONE_NORMAL); ZONE_HIGHMEM - * does not apply as we don't support 32bit. + * node_states[N_HIGH_MEMORY] contains nodes which + * have normal memory or high memory. + * Here we add the present_pages belonging to ZONE_HIGHMEM. + * If the zone is within the range of [0..ZONE_HIGHMEM), and + * we determine that the zones in that range become empty, + * we need to clear the node for N_HIGH_MEMORY. + */ + present_pages += pgdat->node_zones[ZONE_HIGHMEM].present_pages; + if (zone_idx(zone) <= ZONE_HIGHMEM && nr_pages >= present_pages) + arg->status_change_nid_high = zone_to_nid(zone); +#endif + + /* + * We have accounted the pages from [0..ZONE_NORMAL), and + * in case of CONFIG_HIGHMEM the pages from ZONE_HIGHMEM + * as well. * Here we count the possible pages from ZONE_MOVABLE. * If after having accounted all the pages, we see that the nr_pages * to be offlined is over or equal to the accounted pages, @@ -1830,6 +1853,9 @@ static void node_states_clear_node(int node, struct memory_notify *arg) if (arg->status_change_nid_normal >= 0) node_clear_state(node, N_NORMAL_MEMORY); + if (arg->status_change_nid_high >= 0) + node_clear_state(node, N_HIGH_MEMORY); + if (arg->status_change_nid >= 0) node_clear_state(node, N_MEMORY); } @@ -2178,7 +2204,7 @@ static int __ref try_remove_memory(u64 start, u64 size) arch_remove_memory(start, size, altmap); if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { - memblock_phys_free(start, size); + memblock_free(start, size); memblock_remove(start, size); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 028e8dd82b..fa9ed9c987 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -134,8 +134,6 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES]; * @node: Node id to start the search * * Lookup the next closest node by distance if @nid is not online. - * - * Return: this @node if it is online, otherwise the closest node by distance */ int numa_map_to_online_node(int node) { @@ -298,7 +296,6 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, atomic_set(&policy->refcnt, 1); policy->mode = mode; policy->flags = flags; - policy->home_node = NUMA_NO_NODE; return policy; } @@ -813,8 +810,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, ((vmstart - vma->vm_start) >> PAGE_SHIFT); prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, - new_pol, vma->vm_userfaultfd_ctx, - vma_anon_name(vma)); + new_pol, vma->vm_userfaultfd_ctx); if (prev) { vma = prev; next = vma->vm_next; @@ -1481,77 +1477,6 @@ static long kernel_mbind(unsigned long start, unsigned long len, return do_mbind(start, len, lmode, mode_flags, &nodes, flags); } -SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, len, - unsigned long, home_node, unsigned long, flags) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - struct mempolicy *new; - unsigned long vmstart; - unsigned long vmend; - unsigned long end; - int err = -ENOENT; - - start = untagged_addr(start); - if (start & ~PAGE_MASK) - return -EINVAL; - /* - * flags is used for future extension if any. - */ - if (flags != 0) - return -EINVAL; - - /* - * Check home_node is online to avoid accessing uninitialized - * NODE_DATA. - */ - if (home_node >= MAX_NUMNODES || !node_online(home_node)) - return -EINVAL; - - len = (len + PAGE_SIZE - 1) & PAGE_MASK; - end = start + len; - - if (end < start) - return -EINVAL; - if (end == start) - return 0; - mmap_write_lock(mm); - vma = find_vma(mm, start); - for (; vma && vma->vm_start < end; vma = vma->vm_next) { - - vmstart = max(start, vma->vm_start); - vmend = min(end, vma->vm_end); - new = mpol_dup(vma_policy(vma)); - if (IS_ERR(new)) { - err = PTR_ERR(new); - break; - } - /* - * Only update home node if there is an existing vma policy - */ - if (!new) - continue; - - /* - * If any vma in the range got policy other than MPOL_BIND - * or MPOL_PREFERRED_MANY we return error. We don't reset - * the home node for vmas we already updated before. - */ - if (new->mode != MPOL_BIND && new->mode != MPOL_PREFERRED_MANY) { - err = -EOPNOTSUPP; - break; - } - - new->home_node = home_node; - err = mbind_range(mm, vmstart, vmend, new); - mpol_put(new); - if (err) - break; - } - mmap_write_unlock(mm); - return err; -} - SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, unsigned long, mode, const unsigned long __user *, nmask, unsigned long, maxnode, unsigned int, flags) @@ -1876,11 +1801,6 @@ static int policy_node(gfp_t gfp, struct mempolicy *policy, int nd) WARN_ON_ONCE(policy->mode == MPOL_BIND && (gfp & __GFP_THISNODE)); } - if ((policy->mode == MPOL_BIND || - policy->mode == MPOL_PREFERRED_MANY) && - policy->home_node != NUMA_NO_NODE) - return policy->home_node; - return nd; } @@ -2141,7 +2061,7 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); page = __alloc_pages(preferred_gfp, order, nid, &pol->nodes); if (!page) - page = __alloc_pages(gfp, order, nid, NULL); + page = __alloc_pages(gfp, order, numa_node_id(), NULL); return page; } @@ -2152,6 +2072,7 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, * @order: Order of the GFP allocation. * @vma: Pointer to VMA or NULL if not available. * @addr: Virtual address of the allocation. Must be inside @vma. + * @node: Which node to prefer for allocation (modulo policy). * @hugepage: For hugepages try only the preferred node if possible. * * Allocate a page for a specific address in @vma, using the appropriate @@ -2162,10 +2083,9 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, * Return: The page on success or NULL if allocation fails. */ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, - unsigned long addr, bool hugepage) + unsigned long addr, int node, bool hugepage) { struct mempolicy *pol; - int node = numa_node_id(); struct page *page; int preferred_nid; nodemask_t *nmask; @@ -2182,7 +2102,6 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, } if (pol->mode == MPOL_PREFERRED_MANY) { - node = policy_node(gfp, pol, node); page = alloc_pages_preferred_many(gfp, order, node, pol); mpol_cond_put(pol); goto out; @@ -2266,7 +2185,7 @@ struct page *alloc_pages(gfp_t gfp, unsigned order) page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); else if (pol->mode == MPOL_PREFERRED_MANY) page = alloc_pages_preferred_many(gfp, order, - policy_node(gfp, pol, numa_node_id()), pol); + numa_node_id(), pol); else page = __alloc_pages(gfp, order, policy_node(gfp, pol, numa_node_id()), @@ -2276,98 +2195,6 @@ struct page *alloc_pages(gfp_t gfp, unsigned order) } EXPORT_SYMBOL(alloc_pages); -struct folio *folio_alloc(gfp_t gfp, unsigned order) -{ - struct page *page = alloc_pages(gfp | __GFP_COMP, order); - - if (page && order > 1) - prep_transhuge_page(page); - return (struct folio *)page; -} -EXPORT_SYMBOL(folio_alloc); - -static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp, - struct mempolicy *pol, unsigned long nr_pages, - struct page **page_array) -{ - int nodes; - unsigned long nr_pages_per_node; - int delta; - int i; - unsigned long nr_allocated; - unsigned long total_allocated = 0; - - nodes = nodes_weight(pol->nodes); - nr_pages_per_node = nr_pages / nodes; - delta = nr_pages - nodes * nr_pages_per_node; - - for (i = 0; i < nodes; i++) { - if (delta) { - nr_allocated = __alloc_pages_bulk(gfp, - interleave_nodes(pol), NULL, - nr_pages_per_node + 1, NULL, - page_array); - delta--; - } else { - nr_allocated = __alloc_pages_bulk(gfp, - interleave_nodes(pol), NULL, - nr_pages_per_node, NULL, page_array); - } - - page_array += nr_allocated; - total_allocated += nr_allocated; - } - - return total_allocated; -} - -static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid, - struct mempolicy *pol, unsigned long nr_pages, - struct page **page_array) -{ - gfp_t preferred_gfp; - unsigned long nr_allocated = 0; - - preferred_gfp = gfp | __GFP_NOWARN; - preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); - - nr_allocated = __alloc_pages_bulk(preferred_gfp, nid, &pol->nodes, - nr_pages, NULL, page_array); - - if (nr_allocated < nr_pages) - nr_allocated += __alloc_pages_bulk(gfp, numa_node_id(), NULL, - nr_pages - nr_allocated, NULL, - page_array + nr_allocated); - return nr_allocated; -} - -/* alloc pages bulk and mempolicy should be considered at the - * same time in some situation such as vmalloc. - * - * It can accelerate memory allocation especially interleaving - * allocate memory. - */ -unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp, - unsigned long nr_pages, struct page **page_array) -{ - struct mempolicy *pol = &default_policy; - - if (!in_interrupt() && !(gfp & __GFP_THISNODE)) - pol = get_task_policy(current); - - if (pol->mode == MPOL_INTERLEAVE) - return alloc_pages_bulk_array_interleave(gfp, pol, - nr_pages, page_array); - - if (pol->mode == MPOL_PREFERRED_MANY) - return alloc_pages_bulk_array_preferred_many(gfp, - numa_node_id(), pol, nr_pages, page_array); - - return __alloc_pages_bulk(gfp, policy_node(gfp, pol, numa_node_id()), - policy_nodemask(gfp, pol), nr_pages, NULL, - page_array); -} - int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { struct mempolicy *pol = mpol_dup(vma_policy(src)); @@ -2422,8 +2249,6 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) return false; if (a->flags != b->flags) return false; - if (a->home_node != b->home_node) - return false; if (mpol_store_user_nodemask(a)) if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask)) return false; @@ -2967,7 +2792,7 @@ static const char * const policy_modes[] = * Format of input: * [=][:] * - * Return: %0 on success, else %1 + * On success, returns 0, else 1 */ int mpol_parse_str(char *str, struct mempolicy **mpol) { @@ -3149,3 +2974,64 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) p += scnprintf(p, buffer + maxlen - p, ":%*pbl", nodemask_pr_args(&nodes)); } + +bool numa_demotion_enabled = false; + +#ifdef CONFIG_SYSFS +static ssize_t numa_demotion_enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", + numa_demotion_enabled? "true" : "false"); +} + +static ssize_t numa_demotion_enabled_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) + numa_demotion_enabled = true; + else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) + numa_demotion_enabled = false; + else + return -EINVAL; + + return count; +} + +static struct kobj_attribute numa_demotion_enabled_attr = + __ATTR(demotion_enabled, 0644, numa_demotion_enabled_show, + numa_demotion_enabled_store); + +static struct attribute *numa_attrs[] = { + &numa_demotion_enabled_attr.attr, + NULL, +}; + +static const struct attribute_group numa_attr_group = { + .attrs = numa_attrs, +}; + +static int __init numa_init_sysfs(void) +{ + int err; + struct kobject *numa_kobj; + + numa_kobj = kobject_create_and_add("numa", mm_kobj); + if (!numa_kobj) { + pr_err("failed to create numa kobject\n"); + return -ENOMEM; + } + err = sysfs_create_group(numa_kobj, &numa_attr_group); + if (err) { + pr_err("failed to register numa group\n"); + goto delete_obj; + } + return 0; + +delete_obj: + kobject_put(numa_kobj); + return err; +} +subsys_initcall(numa_init_sysfs); +#endif diff --git a/mm/mempool.c b/mm/mempool.c index b933d0fc21..0b8afbec3e 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "slab.h" diff --git a/mm/memremap.c b/mm/memremap.c index 6aa5f0c2d1..ed593bf871 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -102,22 +102,39 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id) return (range->start + range_len(range)) >> PAGE_SHIFT; } -static unsigned long pfn_next(struct dev_pagemap *pgmap, unsigned long pfn) +static unsigned long pfn_next(unsigned long pfn) { - if (pfn % (1024 << pgmap->vmemmap_shift)) + if (pfn % 1024 == 0) cond_resched(); - return pfn + pgmap_vmemmap_nr(pgmap); -} - -static unsigned long pfn_len(struct dev_pagemap *pgmap, unsigned long range_id) -{ - return (pfn_end(pgmap, range_id) - - pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift; + return pfn + 1; } #define for_each_device_pfn(pfn, map, i) \ - for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); \ - pfn = pfn_next(map, pfn)) + for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn)) + +static void dev_pagemap_kill(struct dev_pagemap *pgmap) +{ + if (pgmap->ops && pgmap->ops->kill) + pgmap->ops->kill(pgmap); + else + percpu_ref_kill(pgmap->ref); +} + +static void dev_pagemap_cleanup(struct dev_pagemap *pgmap) +{ + if (pgmap->ops && pgmap->ops->cleanup) { + pgmap->ops->cleanup(pgmap); + } else { + wait_for_completion(&pgmap->done); + percpu_ref_exit(pgmap->ref); + } + /* + * Undo the pgmap ref assignment for the internal case as the + * caller may re-enable the same pgmap. + */ + if (pgmap->ref == &pgmap->internal_ref) + pgmap->ref = NULL; +} static void pageunmap_range(struct dev_pagemap *pgmap, int range_id) { @@ -150,12 +167,11 @@ void memunmap_pages(struct dev_pagemap *pgmap) unsigned long pfn; int i; - percpu_ref_kill(&pgmap->ref); + dev_pagemap_kill(pgmap); for (i = 0; i < pgmap->nr_range; i++) for_each_device_pfn(pfn, pgmap, i) put_page(pfn_to_page(pfn)); - wait_for_completion(&pgmap->done); - percpu_ref_exit(&pgmap->ref); + dev_pagemap_cleanup(pgmap); for (i = 0; i < pgmap->nr_range; i++) pageunmap_range(pgmap, i); @@ -172,7 +188,8 @@ static void devm_memremap_pages_release(void *data) static void dev_pagemap_percpu_release(struct percpu_ref *ref) { - struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref); + struct dev_pagemap *pgmap = + container_of(ref, struct dev_pagemap, internal_ref); complete(&pgmap->done); } @@ -278,7 +295,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params, memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], PHYS_PFN(range->start), PHYS_PFN(range_len(range)), pgmap); - percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id)); + percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id) + - pfn_first(pgmap, range_id)); return 0; err_add_memory: @@ -344,11 +362,22 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) break; } - init_completion(&pgmap->done); - error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0, - GFP_KERNEL); - if (error) - return ERR_PTR(error); + if (!pgmap->ref) { + if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup)) + return ERR_PTR(-EINVAL); + + init_completion(&pgmap->done); + error = percpu_ref_init(&pgmap->internal_ref, + dev_pagemap_percpu_release, 0, GFP_KERNEL); + if (error) + return ERR_PTR(error); + pgmap->ref = &pgmap->internal_ref; + } else { + if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) { + WARN(1, "Missing reference count teardown definition\n"); + return ERR_PTR(-EINVAL); + } + } devmap_managed_enable_get(pgmap); @@ -457,7 +486,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, /* fall back to slow path lookup */ rcu_read_lock(); pgmap = xa_load(&pgmap_array, PHYS_PFN(phys)); - if (pgmap && !percpu_ref_tryget_live(&pgmap->ref)) + if (pgmap && !percpu_ref_tryget_live(pgmap->ref)) pgmap = NULL; rcu_read_unlock(); @@ -476,7 +505,7 @@ void free_devmap_managed_page(struct page *page) __ClearPageWaiters(page); - mem_cgroup_uncharge(page_folio(page)); + mem_cgroup_uncharge(page); /* * When a device_private page is freed, the page->mapping field diff --git a/mm/migrate.c b/mm/migrate.c index c7da064b47..1852d787e6 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -50,7 +50,6 @@ #include #include #include -#include #include @@ -237,19 +236,20 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, pte = pte_mkhuge(pte); pte = arch_make_huge_pte(pte, shift, vma->vm_flags); + set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); if (PageAnon(new)) hugepage_add_anon_rmap(new, vma, pvmw.address); else page_dup_rmap(new, true); - set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); } else #endif { + set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); + if (PageAnon(new)) page_add_anon_rmap(new, vma, pvmw.address, false); else page_add_file_rmap(new, false); - set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); } if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new)) mlock_vma_page(new); @@ -291,6 +291,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, { pte_t pte; swp_entry_t entry; + struct page *page; spin_lock(ptl); pte = *ptep; @@ -301,7 +302,18 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, if (!is_migration_entry(entry)) goto out; - migration_entry_wait_on_locked(entry, ptep, ptl); + page = pfn_swap_entry_to_page(entry); + page = compound_head(page); + + /* + * Once page cache replacement of page migration started, page_count + * is zero; but we must not call put_and_wait_on_page_locked() without + * a ref. Use get_page_unless_zero(), and just fault again if it fails. + */ + if (!get_page_unless_zero(page)) + goto out; + pte_unmap_unlock(ptep, ptl); + put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE); return; out: pte_unmap_unlock(ptep, ptl); @@ -326,11 +338,16 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) { spinlock_t *ptl; + struct page *page; ptl = pmd_lock(mm, pmd); if (!is_pmd_migration_entry(*pmd)) goto unlock; - migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), NULL, ptl); + page = pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd)); + if (!get_page_unless_zero(page)) + goto unlock; + spin_unlock(ptl); + put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE); return; unlock: spin_unlock(ptl); @@ -347,7 +364,7 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) */ expected_count += is_device_private_page(page); if (mapping) - expected_count += compound_nr(page) + page_has_private(page); + expected_count += thp_nr_pages(page) + page_has_private(page); return expected_count; } @@ -360,70 +377,83 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) * 2 for pages with a mapping * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ -int folio_migrate_mapping(struct address_space *mapping, - struct folio *newfolio, struct folio *folio, int extra_count) +int migrate_page_move_mapping(struct address_space *mapping, + struct page *newpage, struct page *page, int extra_count) { - XA_STATE(xas, &mapping->i_pages, folio_index(folio)); + XA_STATE(xas, &mapping->i_pages, page_index(page)); struct zone *oldzone, *newzone; int dirty; - int expected_count = expected_page_refs(mapping, &folio->page) + extra_count; - long nr = folio_nr_pages(folio); + int expected_count = expected_page_refs(mapping, page) + extra_count; + int nr = thp_nr_pages(page); if (!mapping) { /* Anonymous page without mapping */ - if (folio_ref_count(folio) != expected_count) + if (page_count(page) != expected_count) return -EAGAIN; /* No turning back from here */ - newfolio->index = folio->index; - newfolio->mapping = folio->mapping; - if (folio_test_swapbacked(folio)) - __folio_set_swapbacked(newfolio); + newpage->index = page->index; + newpage->mapping = page->mapping; + if (PageSwapBacked(page)) + __SetPageSwapBacked(newpage); return MIGRATEPAGE_SUCCESS; } - oldzone = folio_zone(folio); - newzone = folio_zone(newfolio); + oldzone = page_zone(page); + newzone = page_zone(newpage); xas_lock_irq(&xas); - if (!folio_ref_freeze(folio, expected_count)) { + if (page_count(page) != expected_count || xas_load(&xas) != page) { + xas_unlock_irq(&xas); + return -EAGAIN; + } + + if (!page_ref_freeze(page, expected_count)) { xas_unlock_irq(&xas); return -EAGAIN; } /* - * Now we know that no one else is looking at the folio: + * Now we know that no one else is looking at the page: * no turning back from here. */ - newfolio->index = folio->index; - newfolio->mapping = folio->mapping; - folio_ref_add(newfolio, nr); /* add cache reference */ - if (folio_test_swapbacked(folio)) { - __folio_set_swapbacked(newfolio); - if (folio_test_swapcache(folio)) { - folio_set_swapcache(newfolio); - newfolio->private = folio_get_private(folio); + newpage->index = page->index; + newpage->mapping = page->mapping; + page_ref_add(newpage, nr); /* add cache reference */ + if (PageSwapBacked(page)) { + __SetPageSwapBacked(newpage); + if (PageSwapCache(page)) { + SetPageSwapCache(newpage); + set_page_private(newpage, page_private(page)); } } else { - VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); + VM_BUG_ON_PAGE(PageSwapCache(page), page); } /* Move dirty while page refs frozen and newpage not yet exposed */ - dirty = folio_test_dirty(folio); + dirty = PageDirty(page); if (dirty) { - folio_clear_dirty(folio); - folio_set_dirty(newfolio); + ClearPageDirty(page); + SetPageDirty(newpage); } - xas_store(&xas, newfolio); + xas_store(&xas, newpage); + if (PageTransHuge(page)) { + int i; + + for (i = 1; i < nr; i++) { + xas_next(&xas); + xas_store(&xas, newpage); + } + } /* * Drop cache reference from old page by unfreezing * to one less reference. * We know this isn't the last reference. */ - folio_ref_unfreeze(folio, expected_count - nr); + page_ref_unfreeze(page, expected_count - nr); xas_unlock(&xas); /* Leave irq disabled to prevent preemption while updating stats */ @@ -442,18 +472,18 @@ int folio_migrate_mapping(struct address_space *mapping, struct lruvec *old_lruvec, *new_lruvec; struct mem_cgroup *memcg; - memcg = folio_memcg(folio); + memcg = page_memcg(page); old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat); new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat); __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr); __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr); - if (folio_test_swapbacked(folio) && !folio_test_swapcache(folio)) { + if (PageSwapBacked(page) && !PageSwapCache(page)) { __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr); __mod_lruvec_state(new_lruvec, NR_SHMEM, nr); } #ifdef CONFIG_SWAP - if (folio_test_swapcache(folio)) { + if (PageSwapCache(page)) { __mod_lruvec_state(old_lruvec, NR_SWAPCACHE, -nr); __mod_lruvec_state(new_lruvec, NR_SWAPCACHE, nr); } @@ -469,11 +499,11 @@ int folio_migrate_mapping(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } -EXPORT_SYMBOL(folio_migrate_mapping); +EXPORT_SYMBOL(migrate_page_move_mapping); /* * The expected number of remaining references is the same as that - * of folio_migrate_mapping(). + * of migrate_page_move_mapping(). */ int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page) @@ -508,87 +538,91 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, } /* - * Copy the flags and some other ancillary information + * Copy the page to its new location */ -void folio_migrate_flags(struct folio *newfolio, struct folio *folio) +void migrate_page_states(struct page *newpage, struct page *page) { int cpupid; - if (folio_test_error(folio)) - folio_set_error(newfolio); - if (folio_test_referenced(folio)) - folio_set_referenced(newfolio); - if (folio_test_uptodate(folio)) - folio_mark_uptodate(newfolio); - if (folio_test_clear_active(folio)) { - VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); - folio_set_active(newfolio); - } else if (folio_test_clear_unevictable(folio)) - folio_set_unevictable(newfolio); - if (folio_test_workingset(folio)) - folio_set_workingset(newfolio); - if (folio_test_checked(folio)) - folio_set_checked(newfolio); - if (folio_test_mappedtodisk(folio)) - folio_set_mappedtodisk(newfolio); + if (PageError(page)) + SetPageError(newpage); + if (PageReferenced(page)) + SetPageReferenced(newpage); + if (PageUptodate(page)) + SetPageUptodate(newpage); + if (TestClearPageActive(page)) { + VM_BUG_ON_PAGE(PageUnevictable(page), page); + SetPageActive(newpage); + } else if (TestClearPageUnevictable(page)) + SetPageUnevictable(newpage); + if (PageWorkingset(page)) + SetPageWorkingset(newpage); + if (PageChecked(page)) + SetPageChecked(newpage); + if (PageMappedToDisk(page)) + SetPageMappedToDisk(newpage); - /* Move dirty on pages not done by folio_migrate_mapping() */ - if (folio_test_dirty(folio)) - folio_set_dirty(newfolio); + /* Move dirty on pages not done by migrate_page_move_mapping() */ + if (PageDirty(page)) + SetPageDirty(newpage); - if (folio_test_young(folio)) - folio_set_young(newfolio); - if (folio_test_idle(folio)) - folio_set_idle(newfolio); + if (page_is_young(page)) + set_page_young(newpage); + if (page_is_idle(page)) + set_page_idle(newpage); /* * Copy NUMA information to the new page, to prevent over-eager * future migrations of this same page. */ - cpupid = page_cpupid_xchg_last(&folio->page, -1); - page_cpupid_xchg_last(&newfolio->page, cpupid); + cpupid = page_cpupid_xchg_last(page, -1); + page_cpupid_xchg_last(newpage, cpupid); - folio_migrate_ksm(newfolio, folio); + ksm_migrate_page(newpage, page); /* * Please do not reorder this without considering how mm/ksm.c's * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache(). */ - if (folio_test_swapcache(folio)) - folio_clear_swapcache(folio); - folio_clear_private(folio); + if (PageSwapCache(page)) + ClearPageSwapCache(page); + ClearPagePrivate(page); /* page->private contains hugetlb specific flags */ - if (!folio_test_hugetlb(folio)) - folio->private = NULL; + if (!PageHuge(page)) + set_page_private(page, 0); /* * If any waiters have accumulated on the new page then * wake them up. */ - if (folio_test_writeback(newfolio)) - folio_end_writeback(newfolio); + if (PageWriteback(newpage)) + end_page_writeback(newpage); /* * PG_readahead shares the same bit with PG_reclaim. The above * end_page_writeback() may clear PG_readahead mistakenly, so set the * bit after that. */ - if (folio_test_readahead(folio)) - folio_set_readahead(newfolio); + if (PageReadahead(page)) + SetPageReadahead(newpage); - folio_copy_owner(newfolio, folio); + copy_page_owner(page, newpage); - if (!folio_test_hugetlb(folio)) - mem_cgroup_migrate(folio, newfolio); + if (!PageHuge(page)) + mem_cgroup_migrate(page, newpage); } -EXPORT_SYMBOL(folio_migrate_flags); +EXPORT_SYMBOL(migrate_page_states); -void folio_migrate_copy(struct folio *newfolio, struct folio *folio) +void migrate_page_copy(struct page *newpage, struct page *page) { - folio_copy(newfolio, folio); - folio_migrate_flags(newfolio, folio); + if (PageHuge(page) || PageTransHuge(page)) + copy_huge_page(newpage, page); + else + copy_highpage(newpage, page); + + migrate_page_states(newpage, page); } -EXPORT_SYMBOL(folio_migrate_copy); +EXPORT_SYMBOL(migrate_page_copy); /************************************************************ * Migration functions @@ -604,21 +638,19 @@ int migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode) { - struct folio *newfolio = page_folio(newpage); - struct folio *folio = page_folio(page); int rc; - BUG_ON(folio_test_writeback(folio)); /* Writeback must be complete */ + BUG_ON(PageWriteback(page)); /* Writeback must be complete */ - rc = folio_migrate_mapping(mapping, newfolio, folio, 0); + rc = migrate_page_move_mapping(mapping, newpage, page, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; if (mode != MIGRATE_SYNC_NO_COPY) - folio_migrate_copy(newfolio, folio); + migrate_page_copy(newpage, page); else - folio_migrate_flags(newfolio, folio); + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } EXPORT_SYMBOL(migrate_page); @@ -1068,6 +1100,80 @@ static int __unmap_and_move(struct page *page, struct page *newpage, return rc; } + +/* + * node_demotion[] example: + * + * Consider a system with two sockets. Each socket has + * three classes of memory attached: fast, medium and slow. + * Each memory class is placed in its own NUMA node. The + * CPUs are placed in the node with the "fast" memory. The + * 6 NUMA nodes (0-5) might be split among the sockets like + * this: + * + * Socket A: 0, 1, 2 + * Socket B: 3, 4, 5 + * + * When Node 0 fills up, its memory should be migrated to + * Node 1. When Node 1 fills up, it should be migrated to + * Node 2. The migration path start on the nodes with the + * processors (since allocations default to this node) and + * fast memory, progress through medium and end with the + * slow memory: + * + * 0 -> 1 -> 2 -> stop + * 3 -> 4 -> 5 -> stop + * + * This is represented in the node_demotion[] like this: + * + * { 1, // Node 0 migrates to 1 + * 2, // Node 1 migrates to 2 + * -1, // Node 2 does not migrate + * 4, // Node 3 migrates to 4 + * 5, // Node 4 migrates to 5 + * -1} // Node 5 does not migrate + */ + +/* + * Writes to this array occur without locking. Cycles are + * not allowed: Node X demotes to Y which demotes to X... + * + * If multiple reads are performed, a single rcu_read_lock() + * must be held over all reads to ensure that no cycles are + * observed. + */ +static int node_demotion[MAX_NUMNODES] __read_mostly = + {[0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE}; + +/** + * next_demotion_node() - Get the next node in the demotion path + * @node: The starting node to lookup the next node + * + * Return: node id for next memory node in the demotion path hierarchy + * from @node; NUMA_NO_NODE if @node is terminal. This does not keep + * @node online or guarantee that it *continues* to be the next demotion + * target. + */ +int next_demotion_node(int node) +{ + int target; + + /* + * node_demotion[] is updated without excluding this + * function from running. RCU doesn't provide any + * compiler barriers, so the READ_ONCE() is required + * to avoid compiler reordering or read merging. + * + * Make sure to use RCU over entire code blocks if + * node_demotion[] reads need to be consistent. + */ + rcu_read_lock(); + target = READ_ONCE(node_demotion[node]); + rcu_read_unlock(); + + return target; +} + /* * Obtain the lock on page, remove all ptes and migrate the page * to the newly allocated page in newpage. @@ -1323,7 +1429,7 @@ static inline int try_split_thp(struct page *page, struct page **page2, * @mode: The migration mode that specifies the constraints for * page migration, if any. * @reason: The reason for page migration. - * @ret_succeeded: Set to the number of normal pages migrated successfully if + * @ret_succeeded: Set to the number of pages migrated successfully if * the caller passes a non-NULL pointer. * * The function returns after 10 attempts or if no pages are movable any more @@ -1331,9 +1437,7 @@ static inline int try_split_thp(struct page *page, struct page **page2, * It is caller's responsibility to call putback_movable_pages() to return pages * to the LRU or free list only if ret != 0. * - * Returns the number of {normal page, THP, hugetlb} that were not migrated, or - * an error code. The number of THP splits will be considered as the number of - * non-migrated THP, no matter how many subpages of the THP are migrated successfully. + * Returns the number of pages that were not migrated, or an error code. */ int migrate_pages(struct list_head *from, new_page_t get_new_page, free_page_t put_new_page, unsigned long private, @@ -1342,7 +1446,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, int retry = 1; int thp_retry = 1; int nr_failed = 0; - int nr_failed_pages = 0; int nr_succeeded = 0; int nr_thp_succeeded = 0; int nr_thp_failed = 0; @@ -1354,16 +1457,13 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, int swapwrite = current->flags & PF_SWAPWRITE; int rc, nr_subpages; LIST_HEAD(ret_pages); - LIST_HEAD(thp_split_pages); bool nosplit = (reason == MR_NUMA_MISPLACED); - bool no_subpage_counting = false; trace_mm_migrate_pages_start(mode, reason); if (!swapwrite) current->flags |= PF_SWAPWRITE; -thp_subpage_migration: for (pass = 0; pass < 10 && (retry || thp_retry); pass++) { retry = 0; thp_retry = 0; @@ -1376,7 +1476,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, * during migration. */ is_thp = PageTransHuge(page) && !PageHuge(page); - nr_subpages = compound_nr(page); + nr_subpages = thp_nr_pages(page); cond_resched(); if (PageHuge(page)) @@ -1412,20 +1512,18 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, case -ENOSYS: /* THP migration is unsupported */ if (is_thp) { - nr_thp_failed++; - if (!try_split_thp(page, &page2, &thp_split_pages)) { + if (!try_split_thp(page, &page2, from)) { nr_thp_split++; goto retry; } - nr_failed_pages += nr_subpages; + nr_thp_failed++; + nr_failed += nr_subpages; break; } /* Hugetlb migration is unsupported */ - if (!no_subpage_counting) - nr_failed++; - nr_failed_pages += nr_subpages; + nr_failed++; break; case -ENOMEM: /* @@ -1434,19 +1532,16 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, * THP NUMA faulting doesn't split THP to retry. */ if (is_thp && !nosplit) { - nr_thp_failed++; - if (!try_split_thp(page, &page2, &thp_split_pages)) { + if (!try_split_thp(page, &page2, from)) { nr_thp_split++; goto retry; } - nr_failed_pages += nr_subpages; + nr_thp_failed++; + nr_failed += nr_subpages; goto out; } - - if (!no_subpage_counting) - nr_failed++; - nr_failed_pages += nr_subpages; + nr_failed++; goto out; case -EAGAIN: if (is_thp) { @@ -1456,11 +1551,12 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, retry++; break; case MIGRATEPAGE_SUCCESS: - nr_succeeded += nr_subpages; if (is_thp) { nr_thp_succeeded++; + nr_succeeded += nr_subpages; break; } + nr_succeeded++; break; default: /* @@ -1471,37 +1567,17 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, */ if (is_thp) { nr_thp_failed++; - nr_failed_pages += nr_subpages; + nr_failed += nr_subpages; break; } - - if (!no_subpage_counting) - nr_failed++; - nr_failed_pages += nr_subpages; + nr_failed++; break; } } } - nr_failed += retry; + nr_failed += retry + thp_retry; nr_thp_failed += thp_retry; - /* - * Try to migrate subpages of fail-to-migrate THPs, no nr_failed - * counting in this round, since all subpages of a THP is counted - * as 1 failure in the first round. - */ - if (!list_empty(&thp_split_pages)) { - /* - * Move non-migrated pages (after 10 retries) to ret_pages - * to avoid migrating them again. - */ - list_splice_init(from, &ret_pages); - list_splice_init(&thp_split_pages, from); - no_subpage_counting = true; - retry = 1; - goto thp_subpage_migration; - } - - rc = nr_failed + nr_thp_failed; + rc = nr_failed; out: /* * Put the permanent failure page back to migration list, they @@ -1510,11 +1586,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, list_splice(&ret_pages, from); count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); - count_vm_events(PGMIGRATE_FAIL, nr_failed_pages); + count_vm_events(PGMIGRATE_FAIL, nr_failed); count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded); count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed); count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split); - trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded, + trace_mm_migrate_pages(nr_succeeded, nr_failed, nr_thp_succeeded, nr_thp_failed, nr_thp_split, mode, reason); if (!swapwrite) @@ -2293,6 +2369,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, * can't be dropped from it). */ get_page(page); + migrate->cpages++; /* * Optimize for the common case where page is only mapped once @@ -2302,7 +2379,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, if (trylock_page(page)) { pte_t swp_pte; - migrate->cpages++; + mpfn |= MIGRATE_PFN_LOCKED; ptep_get_and_clear(mm, addr, ptep); /* Setup special migration page table entry */ @@ -2336,9 +2413,6 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, if (pte_present(pte)) unmapped++; - } else { - put_page(page); - mpfn = 0; } next: @@ -2394,7 +2468,7 @@ static void migrate_vma_collect(struct migrate_vma *migrate) * @page: struct page to check * * Pinned pages cannot be migrated. This is the same test as in - * folio_migrate_mapping(), except that here we allow migration of a + * migrate_page_move_mapping(), except that here we allow migration of a * ZONE_DEVICE page. */ static bool migrate_vma_check_page(struct page *page) @@ -2415,8 +2489,22 @@ static bool migrate_vma_check_page(struct page *page) return false; /* Page from ZONE_DEVICE have one extra reference */ - if (is_zone_device_page(page)) - extra++; + if (is_zone_device_page(page)) { + /* + * Private page can never be pin as they have no valid pte and + * GUP will fail for those. Yet if there is a pending migration + * a thread might try to wait on the pte migration entry and + * will bump the page reference count. Sadly there is no way to + * differentiate a regular pin from migration wait. Hence to + * avoid 2 racing thread trying to migrate back to CPU to enter + * infinite loop (one stopping migration because the other is + * waiting on pte migration entry). We always return true here. + * + * FIXME proper solution is to rework migration_entry_wait() so + * it does not need to take a reference on page. + */ + return is_device_private_page(page); + } /* For file back page */ if (page_mapping(page)) @@ -2429,30 +2517,49 @@ static bool migrate_vma_check_page(struct page *page) } /* - * migrate_vma_unmap() - replace page mapping with special migration pte entry + * migrate_vma_prepare() - lock pages and isolate them from the lru * @migrate: migrate struct containing all migration information * - * Isolate pages from the LRU and replace mappings (CPU page table pte) with a - * special migration pte entry and check if it has been pinned. Pinned pages are - * restored because we cannot migrate them. - * - * This is the last step before we call the device driver callback to allocate - * destination memory and copy contents of original page over to new page. + * This locks pages that have been collected by migrate_vma_collect(). Once each + * page is locked it is isolated from the lru (for non-device pages). Finally, + * the ref taken by migrate_vma_collect() is dropped, as locked pages cannot be + * migrated by concurrent kernel threads. */ -static void migrate_vma_unmap(struct migrate_vma *migrate) +static void migrate_vma_prepare(struct migrate_vma *migrate) { const unsigned long npages = migrate->npages; - unsigned long i, restore = 0; + const unsigned long start = migrate->start; + unsigned long addr, i, restore = 0; bool allow_drain = true; lru_add_drain(); - for (i = 0; i < npages; i++) { + for (i = 0; (i < npages) && migrate->cpages; i++) { struct page *page = migrate_pfn_to_page(migrate->src[i]); + bool remap = true; if (!page) continue; + if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) { + /* + * Because we are migrating several pages there can be + * a deadlock between 2 concurrent migration where each + * are waiting on each other page lock. + * + * Make migrate_vma() a best effort thing and backoff + * for any page we can not lock right away. + */ + if (!trylock_page(page)) { + migrate->src[i] = 0; + migrate->cpages--; + put_page(page); + continue; + } + remap = false; + migrate->src[i] |= MIGRATE_PFN_LOCKED; + } + /* ZONE_DEVICE pages are not on LRU */ if (!is_zone_device_page(page)) { if (!PageLRU(page) && allow_drain) { @@ -2462,9 +2569,16 @@ static void migrate_vma_unmap(struct migrate_vma *migrate) } if (isolate_lru_page(page)) { - migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; - migrate->cpages--; - restore++; + if (remap) { + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + migrate->cpages--; + restore++; + } else { + migrate->src[i] = 0; + unlock_page(page); + migrate->cpages--; + put_page(page); + } continue; } @@ -2472,23 +2586,83 @@ static void migrate_vma_unmap(struct migrate_vma *migrate) put_page(page); } - if (page_mapped(page)) - try_to_migrate(page, 0); + if (!migrate_vma_check_page(page)) { + if (remap) { + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + migrate->cpages--; + restore++; - if (page_mapped(page) || !migrate_vma_check_page(page)) { - if (!is_zone_device_page(page)) { - get_page(page); - putback_lru_page(page); + if (!is_zone_device_page(page)) { + get_page(page); + putback_lru_page(page); + } + } else { + migrate->src[i] = 0; + unlock_page(page); + migrate->cpages--; + + if (!is_zone_device_page(page)) + putback_lru_page(page); + else + put_page(page); } - - migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; - migrate->cpages--; - restore++; - continue; } } - for (i = 0; i < npages && restore; i++) { + for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) { + struct page *page = migrate_pfn_to_page(migrate->src[i]); + + if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE)) + continue; + + remove_migration_pte(page, migrate->vma, addr, page); + + migrate->src[i] = 0; + unlock_page(page); + put_page(page); + restore--; + } +} + +/* + * migrate_vma_unmap() - replace page mapping with special migration pte entry + * @migrate: migrate struct containing all migration information + * + * Replace page mapping (CPU page table pte) with a special migration pte entry + * and check again if it has been pinned. Pinned pages are restored because we + * cannot migrate them. + * + * This is the last step before we call the device driver callback to allocate + * destination memory and copy contents of original page over to new page. + */ +static void migrate_vma_unmap(struct migrate_vma *migrate) +{ + const unsigned long npages = migrate->npages; + const unsigned long start = migrate->start; + unsigned long addr, i, restore = 0; + + for (i = 0; i < npages; i++) { + struct page *page = migrate_pfn_to_page(migrate->src[i]); + + if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE)) + continue; + + if (page_mapped(page)) { + try_to_migrate(page, 0); + if (page_mapped(page)) + goto restore; + } + + if (migrate_vma_check_page(page)) + continue; + +restore: + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + migrate->cpages--; + restore++; + } + + for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) { struct page *page = migrate_pfn_to_page(migrate->src[i]); if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE)) @@ -2498,8 +2672,12 @@ static void migrate_vma_unmap(struct migrate_vma *migrate) migrate->src[i] = 0; unlock_page(page); - put_page(page); restore--; + + if (is_zone_device_page(page)) + put_page(page); + else + putback_lru_page(page); } } @@ -2522,8 +2700,8 @@ static void migrate_vma_unmap(struct migrate_vma *migrate) * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE * flag set). Once these are allocated and copied, the caller must update each * corresponding entry in the dst array with the pfn value of the destination - * page and with MIGRATE_PFN_VALID. Destination pages must be locked via - * lock_page(). + * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set + * (destination pages must have their struct pages locked, via lock_page()). * * Note that the caller does not have to migrate all the pages that are marked * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from @@ -2592,6 +2770,8 @@ int migrate_vma_setup(struct migrate_vma *args) migrate_vma_collect(args); + if (args->cpages) + migrate_vma_prepare(args); if (args->cpages) migrate_vma_unmap(args); @@ -2666,7 +2846,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, if (unlikely(anon_vma_prepare(vma))) goto abort; - if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) goto abort; /* @@ -2886,152 +3066,14 @@ void migrate_vma_finalize(struct migrate_vma *migrate) EXPORT_SYMBOL(migrate_vma_finalize); #endif /* CONFIG_DEVICE_PRIVATE */ -/* - * node_demotion[] example: - * - * Consider a system with two sockets. Each socket has - * three classes of memory attached: fast, medium and slow. - * Each memory class is placed in its own NUMA node. The - * CPUs are placed in the node with the "fast" memory. The - * 6 NUMA nodes (0-5) might be split among the sockets like - * this: - * - * Socket A: 0, 1, 2 - * Socket B: 3, 4, 5 - * - * When Node 0 fills up, its memory should be migrated to - * Node 1. When Node 1 fills up, it should be migrated to - * Node 2. The migration path start on the nodes with the - * processors (since allocations default to this node) and - * fast memory, progress through medium and end with the - * slow memory: - * - * 0 -> 1 -> 2 -> stop - * 3 -> 4 -> 5 -> stop - * - * This is represented in the node_demotion[] like this: - * - * { nr=1, nodes[0]=1 }, // Node 0 migrates to 1 - * { nr=1, nodes[0]=2 }, // Node 1 migrates to 2 - * { nr=0, nodes[0]=-1 }, // Node 2 does not migrate - * { nr=1, nodes[0]=4 }, // Node 3 migrates to 4 - * { nr=1, nodes[0]=5 }, // Node 4 migrates to 5 - * { nr=0, nodes[0]=-1 }, // Node 5 does not migrate - * - * Moreover some systems may have multiple slow memory nodes. - * Suppose a system has one socket with 3 memory nodes, node 0 - * is fast memory type, and node 1/2 both are slow memory - * type, and the distance between fast memory node and slow - * memory node is same. So the migration path should be: - * - * 0 -> 1/2 -> stop - * - * This is represented in the node_demotion[] like this: - * { nr=2, {nodes[0]=1, nodes[1]=2} }, // Node 0 migrates to node 1 and node 2 - * { nr=0, nodes[0]=-1, }, // Node 1 dose not migrate - * { nr=0, nodes[0]=-1, }, // Node 2 does not migrate - */ - -/* - * Writes to this array occur without locking. Cycles are - * not allowed: Node X demotes to Y which demotes to X... - * - * If multiple reads are performed, a single rcu_read_lock() - * must be held over all reads to ensure that no cycles are - * observed. - */ -#define DEFAULT_DEMOTION_TARGET_NODES 15 - -#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES -#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1) -#else -#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES -#endif - -struct demotion_nodes { - unsigned short nr; - short nodes[DEMOTION_TARGET_NODES]; -}; - -static struct demotion_nodes *node_demotion __read_mostly; - -/** - * next_demotion_node() - Get the next node in the demotion path - * @node: The starting node to lookup the next node - * - * Return: node id for next memory node in the demotion path hierarchy - * from @node; NUMA_NO_NODE if @node is terminal. This does not keep - * @node online or guarantee that it *continues* to be the next demotion - * target. - */ -int next_demotion_node(int node) -{ - struct demotion_nodes *nd; - unsigned short target_nr, index; - int target; - - if (!node_demotion) - return NUMA_NO_NODE; - - nd = &node_demotion[node]; - - /* - * node_demotion[] is updated without excluding this - * function from running. RCU doesn't provide any - * compiler barriers, so the READ_ONCE() is required - * to avoid compiler reordering or read merging. - * - * Make sure to use RCU over entire code blocks if - * node_demotion[] reads need to be consistent. - */ - rcu_read_lock(); - target_nr = READ_ONCE(nd->nr); - - switch (target_nr) { - case 0: - target = NUMA_NO_NODE; - goto out; - case 1: - index = 0; - break; - default: - /* - * If there are multiple target nodes, just select one - * target node randomly. - * - * In addition, we can also use round-robin to select - * target node, but we should introduce another variable - * for node_demotion[] to record last selected target node, - * that may cause cache ping-pong due to the changing of - * last target node. Or introducing per-cpu data to avoid - * caching issue, which seems more complicated. So selecting - * target node randomly seems better until now. - */ - index = get_random_int() % target_nr; - break; - } - - target = READ_ONCE(nd->nodes[index]); - -out: - rcu_read_unlock(); - return target; -} - #if defined(CONFIG_HOTPLUG_CPU) /* Disable reclaim-based migration. */ static void __disable_all_migrate_targets(void) { - int node, i; + int node; - if (!node_demotion) - return; - - for_each_online_node(node) { - node_demotion[node].nr = 0; - for (i = 0; i < DEMOTION_TARGET_NODES; i++) - node_demotion[node].nodes[i] = NUMA_NO_NODE; - } + for_each_online_node(node) + node_demotion[node] = NUMA_NO_NODE; } static void disable_all_migrate_targets(void) @@ -3058,40 +3100,26 @@ static void disable_all_migrate_targets(void) * Failing here is OK. It might just indicate * being at the end of a chain. */ -static int establish_migrate_target(int node, nodemask_t *used, - int best_distance) +static int establish_migrate_target(int node, nodemask_t *used) { - int migration_target, index, val; - struct demotion_nodes *nd; + int migration_target; - if (!node_demotion) + /* + * Can not set a migration target on a + * node with it already set. + * + * No need for READ_ONCE() here since this + * in the write path for node_demotion[]. + * This should be the only thread writing. + */ + if (node_demotion[node] != NUMA_NO_NODE) return NUMA_NO_NODE; - nd = &node_demotion[node]; - migration_target = find_next_best_node(node, used); if (migration_target == NUMA_NO_NODE) return NUMA_NO_NODE; - /* - * If the node has been set a migration target node before, - * which means it's the best distance between them. Still - * check if this node can be demoted to other target nodes - * if they have a same best distance. - */ - if (best_distance != -1) { - val = node_distance(node, migration_target); - if (val > best_distance) - return NUMA_NO_NODE; - } - - index = nd->nr; - if (WARN_ONCE(index >= DEMOTION_TARGET_NODES, - "Exceeds maximum demotion target nodes\n")) - return NUMA_NO_NODE; - - nd->nodes[index] = migration_target; - nd->nr++; + node_demotion[node] = migration_target; return migration_target; } @@ -3107,9 +3135,7 @@ static int establish_migrate_target(int node, nodemask_t *used, * * The difference here is that cycles must be avoided. If * node0 migrates to node1, then neither node1, nor anything - * node1 migrates to can migrate to node0. Also one node can - * be migrated to multiple nodes if the target nodes all have - * a same best-distance against the source node. + * node1 migrates to can migrate to node0. * * This function can run simultaneously with readers of * node_demotion[]. However, it can not run simultaneously @@ -3121,7 +3147,7 @@ static void __set_migration_target_nodes(void) nodemask_t next_pass = NODE_MASK_NONE; nodemask_t this_pass = NODE_MASK_NONE; nodemask_t used_targets = NODE_MASK_NONE; - int node, best_distance; + int node; /* * Avoid any oddities like cycles that could occur @@ -3150,33 +3176,18 @@ static void __set_migration_target_nodes(void) * multiple source nodes to share a destination. */ nodes_or(used_targets, used_targets, this_pass); - for_each_node_mask(node, this_pass) { - best_distance = -1; + int target_node = establish_migrate_target(node, &used_targets); + + if (target_node == NUMA_NO_NODE) + continue; /* - * Try to set up the migration path for the node, and the target - * migration nodes can be multiple, so doing a loop to find all - * the target nodes if they all have a best node distance. + * Visit targets from this pass in the next pass. + * Eventually, every node will have been part of + * a pass, and will become set in 'used_targets'. */ - do { - int target_node = - establish_migrate_target(node, &used_targets, - best_distance); - - if (target_node == NUMA_NO_NODE) - break; - - if (best_distance == -1) - best_distance = node_distance(node, target_node); - - /* - * Visit targets from this pass in the next pass. - * Eventually, every node will have been part of - * a pass, and will become set in 'used_targets'. - */ - node_set(target_node, next_pass); - } while (1); + node_set(target_node, next_pass); } /* * 'next_pass' contains nodes which became migration @@ -3277,11 +3288,6 @@ static int __init migrate_on_reclaim_init(void) { int ret; - node_demotion = kmalloc_array(nr_node_ids, - sizeof(struct demotion_nodes), - GFP_KERNEL); - WARN_ON(!node_demotion); - ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline", NULL, migration_offline_cpu); /* @@ -3300,64 +3306,3 @@ static int __init migrate_on_reclaim_init(void) } late_initcall(migrate_on_reclaim_init); #endif /* CONFIG_HOTPLUG_CPU */ - -bool numa_demotion_enabled = false; - -#ifdef CONFIG_SYSFS -static ssize_t numa_demotion_enabled_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return sysfs_emit(buf, "%s\n", - numa_demotion_enabled ? "true" : "false"); -} - -static ssize_t numa_demotion_enabled_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) - numa_demotion_enabled = true; - else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) - numa_demotion_enabled = false; - else - return -EINVAL; - - return count; -} - -static struct kobj_attribute numa_demotion_enabled_attr = - __ATTR(demotion_enabled, 0644, numa_demotion_enabled_show, - numa_demotion_enabled_store); - -static struct attribute *numa_attrs[] = { - &numa_demotion_enabled_attr.attr, - NULL, -}; - -static const struct attribute_group numa_attr_group = { - .attrs = numa_attrs, -}; - -static int __init numa_init_sysfs(void) -{ - int err; - struct kobject *numa_kobj; - - numa_kobj = kobject_create_and_add("numa", mm_kobj); - if (!numa_kobj) { - pr_err("failed to create numa kobject\n"); - return -ENOMEM; - } - err = sysfs_create_group(numa_kobj, &numa_attr_group); - if (err) { - pr_err("failed to register numa group\n"); - goto delete_obj; - } - return 0; - -delete_obj: - kobject_put(numa_kobj); - return err; -} -subsys_initcall(numa_init_sysfs); -#endif diff --git a/mm/mlock.c b/mm/mlock.c index 8f584eddd3..16d2ee160d 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -271,7 +271,6 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) /* Phase 1: page isolation */ for (i = 0; i < nr; i++) { struct page *page = pvec->pages[i]; - struct folio *folio = page_folio(page); if (TestClearPageMlocked(page)) { /* @@ -279,7 +278,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) * so we can spare the get_page() here. */ if (TestClearPageLRU(page)) { - lruvec = folio_lruvec_relock_irq(folio, lruvec); + lruvec = relock_page_lruvec_irq(page, lruvec); del_page_from_lru_list(page, lruvec); continue; } else @@ -512,7 +511,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, vma_anon_name(vma)); + vma->vm_userfaultfd_ctx); if (*prev) { vma = *prev; goto success; diff --git a/mm/mmap.c b/mm/mmap.c index d445c1b9d6..88dcc5c252 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -1030,8 +1029,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, */ static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags, - struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char *anon_name) + struct vm_userfaultfd_ctx vm_userfaultfd_ctx) { /* * VM_SOFTDIRTY should not prevent from VMA merging, if we @@ -1049,8 +1047,6 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma, return 0; if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx)) return 0; - if (!is_same_vma_anon_name(vma, anon_name)) - return 0; return 1; } @@ -1083,10 +1079,9 @@ static int can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, - struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char *anon_name) + struct vm_userfaultfd_ctx vm_userfaultfd_ctx) { - if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && + if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { if (vma->vm_pgoff == vm_pgoff) return 1; @@ -1105,10 +1100,9 @@ static int can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, - struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char *anon_name) + struct vm_userfaultfd_ctx vm_userfaultfd_ctx) { - if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && + if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { pgoff_t vm_pglen; vm_pglen = vma_pages(vma); @@ -1119,9 +1113,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, } /* - * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name), - * figure out whether that can be merged with its predecessor or its - * successor. Or both (it neatly fills a hole). + * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out + * whether that can be merged with its predecessor or its successor. + * Or both (it neatly fills a hole). * * In most cases - when called for mmap, brk or mremap - [addr,end) is * certain not to be mapped by the time vma_merge is called; but when @@ -1166,8 +1160,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, unsigned long end, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t pgoff, struct mempolicy *policy, - struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char *anon_name) + struct vm_userfaultfd_ctx vm_userfaultfd_ctx) { pgoff_t pglen = (end - addr) >> PAGE_SHIFT; struct vm_area_struct *area, *next; @@ -1197,7 +1190,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, mpol_equal(vma_policy(prev), policy) && can_vma_merge_after(prev, vm_flags, anon_vma, file, pgoff, - vm_userfaultfd_ctx, anon_name)) { + vm_userfaultfd_ctx)) { /* * OK, it can. Can we now merge in the successor as well? */ @@ -1206,7 +1199,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, - vm_userfaultfd_ctx, anon_name) && + vm_userfaultfd_ctx) && is_mergeable_anon_vma(prev->anon_vma, next->anon_vma, NULL)) { /* cases 1, 6 */ @@ -1229,7 +1222,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, mpol_equal(policy, vma_policy(next)) && can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, - vm_userfaultfd_ctx, anon_name)) { + vm_userfaultfd_ctx)) { if (prev && addr < prev->vm_end) /* case 4 */ err = __vma_adjust(prev, prev->vm_start, addr, prev->vm_pgoff, NULL, next); @@ -1606,6 +1599,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, goto out_fput; } } else if (flags & MAP_HUGETLB) { + struct ucounts *ucounts = NULL; struct hstate *hs; hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); @@ -1621,7 +1615,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, */ file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, - HUGETLB_ANONHUGE_INODE, + &ucounts, HUGETLB_ANONHUGE_INODE, (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); if (IS_ERR(file)) return PTR_ERR(file); @@ -1761,7 +1755,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * Can we just expand an old mapping? */ vma = vma_merge(mm, prev, addr, addr + len, vm_flags, - NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL); + NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX); if (vma) goto out; @@ -1810,7 +1804,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ if (unlikely(vm_flags != vma->vm_flags && prev)) { merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags, - NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL); + NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX); if (merge) { /* ->mmap() can change vma->vm_file and fput the original file. So * fput the vma->vm_file here or we would add an extra fput for file @@ -2935,6 +2929,7 @@ EXPORT_SYMBOL(vm_munmap); SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) { addr = untagged_addr(addr); + profile_munmap(addr); return __vm_munmap(addr, len, true); } @@ -3062,7 +3057,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla /* Can we just expand an old private anonymous mapping? */ vma = vma_merge(mm, prev, addr, addr + len, flags, - NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL); + NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX); if (vma) goto out; @@ -3148,27 +3143,25 @@ void exit_mmap(struct mm_struct *mm) * to mmu_notifier_release(mm) ensures mmu notifier callbacks in * __oom_reap_task_mm() will not block. * - * This needs to be done before calling unlock_range(), + * This needs to be done before calling munlock_vma_pages_all(), * which clears VM_LOCKED, otherwise the oom reaper cannot * reliably test it. */ (void)__oom_reap_task_mm(mm); set_bit(MMF_OOM_SKIP, &mm->flags); + mmap_write_lock(mm); + mmap_write_unlock(mm); } - mmap_write_lock(mm); if (mm->locked_vm) unlock_range(mm->mmap, ULONG_MAX); arch_exit_mmap(mm); vma = mm->mmap; - if (!vma) { - /* Can happen if dup_mmap() received an OOM */ - mmap_write_unlock(mm); + if (!vma) /* Can happen if dup_mmap() received an OOM */ return; - } lru_add_drain(); flush_cache_mm(mm); @@ -3179,15 +3172,16 @@ void exit_mmap(struct mm_struct *mm) free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb); - /* Walk the list again, actually closing and freeing it. */ + /* + * Walk the list again, actually closing and freeing it, + * with preemption enabled, without holding any MM locks. + */ while (vma) { if (vma->vm_flags & VM_ACCOUNT) nr_accounted += vma_pages(vma); vma = remove_vma(vma); cond_resched(); } - mm->mmap = NULL; - mmap_write_unlock(mm); vm_unacct_memory(nr_accounted); } @@ -3256,7 +3250,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, return NULL; /* should never get here */ new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, vma_anon_name(vma)); + vma->vm_userfaultfd_ctx); if (new_vma) { /* * Source vma may have been merged into new_vma @@ -3338,7 +3332,7 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages) void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages) { - WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages); + mm->total_vm += npages; if (is_exec_mapping(flags)) mm->exec_vm += npages; diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index afb7185ffd..1b9837419b 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include diff --git a/mm/mprotect.c b/mm/mprotect.c index 5ca3fbcb14..ed18dc4953 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -464,7 +464,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *pprev = vma_merge(mm, *pprev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, vma_anon_name(vma)); + vma->vm_userfaultfd_ctx); if (*pprev) { vma = *pprev; VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY); @@ -563,7 +563,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, error = -ENOMEM; if (!vma) goto out; - + prev = vma->vm_prev; if (unlikely(grows & PROT_GROWSDOWN)) { if (vma->vm_start >= end) goto out; @@ -581,11 +581,8 @@ static int do_mprotect_pkey(unsigned long start, size_t len, goto out; } } - if (start > vma->vm_start) prev = vma; - else - prev = vma->vm_prev; for (nstart = start ; ; ) { unsigned long mask_off_old_flags; diff --git a/mm/mremap.c b/mm/mremap.c index 002eec83e9..badfe17ade 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -489,10 +489,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); - if (is_vm_hugetlb_page(vma)) - return move_hugetlb_page_tables(vma, new_vma, old_addr, - new_addr, len); - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, old_addr, old_end); mmu_notifier_invalidate_range_start(&range); @@ -569,7 +565,6 @@ static unsigned long move_vma(struct vm_area_struct *vma, bool *locked, unsigned long flags, struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) { - long to_account = new_len - old_len; struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma; unsigned long vm_flags = vma->vm_flags; @@ -588,9 +583,6 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (mm->map_count >= sysctl_max_map_count - 3) return -ENOMEM; - if (unlikely(flags & MREMAP_DONTUNMAP)) - to_account = new_len; - if (vma->vm_ops && vma->vm_ops->may_split) { if (vma->vm_start != old_addr) err = vma->vm_ops->may_split(vma, old_addr); @@ -612,8 +604,8 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (err) return err; - if (vm_flags & VM_ACCOUNT) { - if (security_vm_enough_memory_mm(mm, to_account >> PAGE_SHIFT)) + if (unlikely(flags & MREMAP_DONTUNMAP && vm_flags & VM_ACCOUNT)) { + if (security_vm_enough_memory_mm(mm, new_len >> PAGE_SHIFT)) return -ENOMEM; } @@ -621,8 +613,8 @@ static unsigned long move_vma(struct vm_area_struct *vma, new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff, &need_rmap_locks); if (!new_vma) { - if (vm_flags & VM_ACCOUNT) - vm_unacct_memory(to_account >> PAGE_SHIFT); + if (unlikely(flags & MREMAP_DONTUNMAP && vm_flags & VM_ACCOUNT)) + vm_unacct_memory(new_len >> PAGE_SHIFT); return -ENOMEM; } @@ -650,10 +642,6 @@ static unsigned long move_vma(struct vm_area_struct *vma, mremap_userfaultfd_prep(new_vma, uf); } - if (is_vm_hugetlb_page(vma)) { - clear_vma_resv_huge_pages(vma); - } - /* Conceal VM_ACCOUNT so old reservation is not undone */ if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) { vma->vm_flags &= ~VM_ACCOUNT; @@ -720,7 +708,8 @@ static unsigned long move_vma(struct vm_area_struct *vma, } static struct vm_area_struct *vma_to_resize(unsigned long addr, - unsigned long old_len, unsigned long new_len, unsigned long flags) + unsigned long old_len, unsigned long new_len, unsigned long flags, + unsigned long *p) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -747,6 +736,9 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))) return ERR_PTR(-EINVAL); + if (is_vm_hugetlb_page(vma)) + return ERR_PTR(-EINVAL); + /* We can't remap across vm area boundaries */ if (old_len > vma->vm_end - addr) return ERR_PTR(-EFAULT); @@ -776,6 +768,13 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, (new_len - old_len) >> PAGE_SHIFT)) return ERR_PTR(-ENOMEM); + if (vma->vm_flags & VM_ACCOUNT) { + unsigned long charged = (new_len - old_len) >> PAGE_SHIFT; + if (security_vm_enough_memory_mm(mm, charged)) + return ERR_PTR(-ENOMEM); + *p = charged; + } + return vma; } @@ -788,6 +787,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long ret = -EINVAL; + unsigned long charged = 0; unsigned long map_flags = 0; if (offset_in_page(new_addr)) @@ -830,7 +830,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, old_len = new_len; } - vma = vma_to_resize(addr, old_len, new_len, flags); + vma = vma_to_resize(addr, old_len, new_len, flags, &charged); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; @@ -853,7 +853,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, ((addr - vma->vm_start) >> PAGE_SHIFT), map_flags); if (IS_ERR_VALUE(ret)) - goto out; + goto out1; /* We got a new mapping */ if (!(flags & MREMAP_FIXED)) @@ -862,6 +862,12 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf, uf_unmap); + if (!(offset_in_page(ret))) + goto out; + +out1: + vm_unacct_memory(charged); + out: return ret; } @@ -893,6 +899,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long ret = -EINVAL; + unsigned long charged = 0; bool locked = false; bool downgraded = false; struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; @@ -942,31 +949,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (mmap_write_lock_killable(current->mm)) return -EINTR; - vma = find_vma(mm, addr); - if (!vma || vma->vm_start > addr) { - ret = EFAULT; - goto out; - } - - if (is_vm_hugetlb_page(vma)) { - struct hstate *h __maybe_unused = hstate_vma(vma); - - old_len = ALIGN(old_len, huge_page_size(h)); - new_len = ALIGN(new_len, huge_page_size(h)); - - /* addrs must be huge page aligned */ - if (addr & ~huge_page_mask(h)) - goto out; - if (new_addr & ~huge_page_mask(h)) - goto out; - - /* - * Don't allow remap expansion, because the underlying hugetlb - * reservation is not yet capable to handle split reservation. - */ - if (new_len > old_len) - goto out; - } if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) { ret = mremap_to(addr, old_len, new_addr, new_len, @@ -999,7 +981,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, /* * Ok, we need to grow.. */ - vma = vma_to_resize(addr, old_len, new_len, flags); + vma = vma_to_resize(addr, old_len, new_len, flags, &charged); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; @@ -1010,18 +992,10 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (old_len == vma->vm_end - addr) { /* can we just expand the current mapping? */ if (vma_expandable(vma, new_len - old_len)) { - long pages = (new_len - old_len) >> PAGE_SHIFT; - - if (vma->vm_flags & VM_ACCOUNT) { - if (security_vm_enough_memory_mm(mm, pages)) { - ret = -ENOMEM; - goto out; - } - } + int pages = (new_len - old_len) >> PAGE_SHIFT; if (vma_adjust(vma, vma->vm_start, addr + new_len, vma->vm_pgoff, NULL)) { - vm_unacct_memory(pages); ret = -ENOMEM; goto out; } @@ -1060,8 +1034,10 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, &locked, flags, &uf, &uf_unmap); } out: - if (offset_in_page(ret)) + if (offset_in_page(ret)) { + vm_unacct_memory(charged); locked = false; + } if (downgraded) mmap_read_unlock(current->mm); else diff --git a/mm/nommu.c b/mm/nommu.c index 55a9e48a7a..02d2427b8f 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1638,6 +1639,12 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, } EXPORT_SYMBOL(remap_vmalloc_range); +unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + return -ENOMEM; +} + vm_fault_t filemap_fault(struct vm_fault *vmf) { BUG(); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 832fb33037..bfa9e348c3 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -641,8 +641,6 @@ static void oom_reap_task(struct task_struct *tsk) static int oom_reaper(void *unused) { - set_freezable(); - while (true) { struct task_struct *tsk = NULL; @@ -789,11 +787,11 @@ static inline bool __task_will_free_mem(struct task_struct *task) struct signal_struct *sig = task->signal; /* - * A coredumping process may sleep for an extended period in - * coredump_task_exit(), so the oom killer cannot assume that - * the process will promptly exit and release memory. + * A coredumping process may sleep for an extended period in exit_mm(), + * so the oom killer cannot assume that the process will promptly exit + * and release memory. */ - if (sig->core_state) + if (sig->flags & SIGNAL_GROUP_COREDUMP) return false; if (sig->flags & SIGNAL_GROUP_EXIT) @@ -994,7 +992,6 @@ static void oom_kill_process(struct oom_control *oc, const char *message) * If necessary, kill all tasks in the selected memory cgroup. */ if (oom_group) { - memcg_memory_event(oom_group, MEMCG_OOM_GROUP_KILL); mem_cgroup_print_oom_group(oom_group); mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member, (void *)message); @@ -1058,7 +1055,7 @@ bool out_of_memory(struct oom_control *oc) if (!is_memcg_oom(oc)) { blocking_notifier_call_chain(&oom_notify_list, 0, &freed); - if (freed > 0 && !is_sysrq_oom(oc)) + if (freed > 0) /* Got some memory back in the last second. */ return true; } @@ -1151,14 +1148,21 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) struct task_struct *p; unsigned int f_flags; bool reap = false; + struct pid *pid; long ret = 0; if (flags) return -EINVAL; - task = pidfd_get_task(pidfd, &f_flags); - if (IS_ERR(task)) - return PTR_ERR(task); + pid = pidfd_get_pid(pidfd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + task = get_pid_task(pid, PIDTYPE_TGID); + if (!task) { + ret = -ESRCH; + goto put_pid; + } /* * Make sure to choose a thread which still has a reference to mm @@ -1170,15 +1174,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) goto put_task; } - mm = p->mm; - mmgrab(mm); - - if (task_will_free_mem(p)) - reap = true; - else { - /* Error only if the work has not been done already */ - if (!test_bit(MMF_OOM_SKIP, &mm->flags)) - ret = -EINVAL; + if (mmget_not_zero(p->mm)) { + mm = p->mm; + if (task_will_free_mem(p)) + reap = true; + else { + /* Error only if the work has not been done already */ + if (!test_bit(MMF_OOM_SKIP, &mm->flags)) + ret = -EINVAL; + } } task_unlock(p); @@ -1189,18 +1193,17 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) ret = -EINTR; goto drop_mm; } - /* - * Check MMF_OOM_SKIP again under mmap_read_lock protection to ensure - * possible change in exit_mmap is seen - */ - if (!test_bit(MMF_OOM_SKIP, &mm->flags) && !__oom_reap_task_mm(mm)) + if (!__oom_reap_task_mm(mm)) ret = -EAGAIN; mmap_read_unlock(mm); drop_mm: - mmdrop(mm); + if (mm) + mmput(mm); put_task: put_task_struct(task); +put_pid: + put_pid(pid); return ret; #else return -ENOSYS; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 91d163f8d3..4812a17b28 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -562,12 +562,12 @@ static unsigned long wp_next_time(unsigned long cur_time) return cur_time; } -static void wb_domain_writeout_add(struct wb_domain *dom, +static void wb_domain_writeout_inc(struct wb_domain *dom, struct fprop_local_percpu *completions, - unsigned int max_prop_frac, long nr) + unsigned int max_prop_frac) { - __fprop_add_percpu_max(&dom->completions, completions, - max_prop_frac, nr); + __fprop_inc_percpu_max(&dom->completions, completions, + max_prop_frac); /* First event after period switching was turned off? */ if (unlikely(!dom->period_time)) { /* @@ -583,20 +583,20 @@ static void wb_domain_writeout_add(struct wb_domain *dom, /* * Increment @wb's writeout completion count and the global writeout - * completion count. Called from __folio_end_writeback(). + * completion count. Called from test_clear_page_writeback(). */ -static inline void __wb_writeout_add(struct bdi_writeback *wb, long nr) +static inline void __wb_writeout_inc(struct bdi_writeback *wb) { struct wb_domain *cgdom; - wb_stat_mod(wb, WB_WRITTEN, nr); - wb_domain_writeout_add(&global_wb_domain, &wb->completions, - wb->bdi->max_prop_frac, nr); + inc_wb_stat(wb, WB_WRITTEN); + wb_domain_writeout_inc(&global_wb_domain, &wb->completions, + wb->bdi->max_prop_frac); cgdom = mem_cgroup_wb_domain(wb); if (cgdom) - wb_domain_writeout_add(cgdom, wb_memcg_completions(wb), - wb->bdi->max_prop_frac, nr); + wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb), + wb->bdi->max_prop_frac); } void wb_writeout_inc(struct bdi_writeback *wb) @@ -604,7 +604,7 @@ void wb_writeout_inc(struct bdi_writeback *wb) unsigned long flags; local_irq_save(flags); - __wb_writeout_add(wb, 1); + __wb_writeout_inc(wb); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(wb_writeout_inc); @@ -1084,7 +1084,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb, * write_bandwidth = --------------------------------------------------- * period * - * @written may have decreased due to folio_account_redirty(). + * @written may have decreased due to account_page_redirty(). * Avoid underflowing @bw calculation. */ bw = written - min(written, wb->written_stamp); @@ -2366,15 +2366,8 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) ret = generic_writepages(mapping, wbc); if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL)) break; - - /* - * Lacking an allocation context or the locality or writeback - * state of any of the inode's pages, throttle based on - * writeback activity on the local node. It's as good a - * guess as any. - */ - reclaim_throttle(NODE_DATA(numa_node_id()), - VMSCAN_THROTTLE_WRITEBACK); + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); } /* * Usually few pages are written by now from those we've just submitted @@ -2388,44 +2381,44 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) } /** - * folio_write_one - write out a single folio and wait on I/O. - * @folio: The folio to write. + * write_one_page - write out a single page and wait on I/O + * @page: the page to write * - * The folio must be locked by the caller and will be unlocked upon return. + * The page must be locked by the caller and will be unlocked upon return. * * Note that the mapping's AS_EIO/AS_ENOSPC flags will be cleared when this * function returns. * * Return: %0 on success, negative error code otherwise */ -int folio_write_one(struct folio *folio) +int write_one_page(struct page *page) { - struct address_space *mapping = folio->mapping; + struct address_space *mapping = page->mapping; int ret = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, - .nr_to_write = folio_nr_pages(folio), + .nr_to_write = 1, }; - BUG_ON(!folio_test_locked(folio)); + BUG_ON(!PageLocked(page)); - folio_wait_writeback(folio); + wait_on_page_writeback(page); - if (folio_clear_dirty_for_io(folio)) { - folio_get(folio); - ret = mapping->a_ops->writepage(&folio->page, &wbc); + if (clear_page_dirty_for_io(page)) { + get_page(page); + ret = mapping->a_ops->writepage(page, &wbc); if (ret == 0) - folio_wait_writeback(folio); - folio_put(folio); + wait_on_page_writeback(page); + put_page(page); } else { - folio_unlock(folio); + unlock_page(page); } if (!ret) ret = filemap_check_errors(mapping); return ret; } -EXPORT_SYMBOL(folio_write_one); +EXPORT_SYMBOL(write_one_page); /* * For address_spaces which do not use buffers nor write back. @@ -2445,30 +2438,29 @@ EXPORT_SYMBOL(__set_page_dirty_no_writeback); * * NOTE: This relies on being atomic wrt interrupts. */ -static void folio_account_dirtied(struct folio *folio, +static void account_page_dirtied(struct page *page, struct address_space *mapping) { struct inode *inode = mapping->host; - trace_writeback_dirty_folio(folio, mapping); + trace_writeback_dirty_page(page, mapping); if (mapping_can_writeback(mapping)) { struct bdi_writeback *wb; - long nr = folio_nr_pages(folio); - inode_attach_wb(inode, &folio->page); + inode_attach_wb(inode, page); wb = inode_to_wb(inode); - __lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr); - __zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr); - __node_stat_mod_folio(folio, NR_DIRTIED, nr); - wb_stat_mod(wb, WB_RECLAIMABLE, nr); - wb_stat_mod(wb, WB_DIRTIED, nr); - task_io_account_write(nr * PAGE_SIZE); - current->nr_dirtied += nr; - __this_cpu_add(bdp_ratelimits, nr); + __inc_lruvec_page_state(page, NR_FILE_DIRTY); + __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); + __inc_node_page_state(page, NR_DIRTIED); + inc_wb_stat(wb, WB_RECLAIMABLE); + inc_wb_stat(wb, WB_DIRTIED); + task_io_account_write(PAGE_SIZE); + current->nr_dirtied++; + __this_cpu_inc(bdp_ratelimits); - mem_cgroup_track_foreign_dirty(folio, wb); + mem_cgroup_track_foreign_dirty(page, wb); } } @@ -2477,156 +2469,130 @@ static void folio_account_dirtied(struct folio *folio, * * Caller must hold lock_page_memcg(). */ -void folio_account_cleaned(struct folio *folio, struct address_space *mapping, +void account_page_cleaned(struct page *page, struct address_space *mapping, struct bdi_writeback *wb) { if (mapping_can_writeback(mapping)) { - long nr = folio_nr_pages(folio); - lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); - zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); - wb_stat_mod(wb, WB_RECLAIMABLE, -nr); - task_io_account_cancelled_write(nr * PAGE_SIZE); + dec_lruvec_page_state(page, NR_FILE_DIRTY); + dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); + dec_wb_stat(wb, WB_RECLAIMABLE); + task_io_account_cancelled_write(PAGE_SIZE); } } /* - * Mark the folio dirty, and set it dirty in the page cache, and mark - * the inode dirty. + * Mark the page dirty, and set it dirty in the page cache, and mark the inode + * dirty. * - * If warn is true, then emit a warning if the folio is not uptodate and has + * If warn is true, then emit a warning if the page is not uptodate and has * not been truncated. * - * The caller must hold lock_page_memcg(). Most callers have the folio - * locked. A few have the folio blocked from truncation through other - * means (eg zap_page_range() has it mapped and is holding the page table - * lock). This can also be called from mark_buffer_dirty(), which I - * cannot prove is always protected against truncate. + * The caller must hold lock_page_memcg(). */ -void __folio_mark_dirty(struct folio *folio, struct address_space *mapping, +void __set_page_dirty(struct page *page, struct address_space *mapping, int warn) { unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - if (folio->mapping) { /* Race with truncate? */ - WARN_ON_ONCE(warn && !folio_test_uptodate(folio)); - folio_account_dirtied(folio, mapping); - __xa_set_mark(&mapping->i_pages, folio_index(folio), + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(warn && !PageUptodate(page)); + account_page_dirtied(page, mapping); + __xa_set_mark(&mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); } xa_unlock_irqrestore(&mapping->i_pages, flags); } -/** - * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads. - * @mapping: Address space this folio belongs to. - * @folio: Folio to be marked as dirty. +/* + * For address_spaces which do not use buffers. Just tag the page as dirty in + * the xarray. * - * Filesystems which do not use buffer heads should call this function - * from their set_page_dirty address space operation. It ignores the - * contents of folio_get_private(), so if the filesystem marks individual - * blocks as dirty, the filesystem should handle that itself. + * This is also used when a single buffer is being dirtied: we want to set the + * page dirty in that case, but not all the buffers. This is a "bottom-up" + * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. * - * This is also sometimes used by filesystems which use buffer_heads when - * a single buffer is being dirtied: we want to set the folio dirty in - * that case, but not all the buffers. This is a "bottom-up" dirtying, - * whereas __set_page_dirty_buffers() is a "top-down" dirtying. - * - * The caller must ensure this doesn't race with truncation. Most will - * simply hold the folio lock, but e.g. zap_pte_range() calls with the - * folio mapped and the pte lock held, which also locks out truncation. + * The caller must ensure this doesn't race with truncation. Most will simply + * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and + * the pte lock held, which also locks out truncation. */ -bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio) +int __set_page_dirty_nobuffers(struct page *page) { - folio_memcg_lock(folio); - if (folio_test_set_dirty(folio)) { - folio_memcg_unlock(folio); - return false; - } + lock_page_memcg(page); + if (!TestSetPageDirty(page)) { + struct address_space *mapping = page_mapping(page); - __folio_mark_dirty(folio, mapping, !folio_test_private(folio)); - folio_memcg_unlock(folio); + if (!mapping) { + unlock_page_memcg(page); + return 1; + } + __set_page_dirty(page, mapping, !PagePrivate(page)); + unlock_page_memcg(page); - if (mapping->host) { - /* !PageAnon && !swapper_space */ - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + if (mapping->host) { + /* !PageAnon && !swapper_space */ + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + } + return 1; } - return true; + unlock_page_memcg(page); + return 0; } -EXPORT_SYMBOL(filemap_dirty_folio); +EXPORT_SYMBOL(__set_page_dirty_nobuffers); -/** - * folio_account_redirty - Manually account for redirtying a page. - * @folio: The folio which is being redirtied. - * - * Most filesystems should call folio_redirty_for_writepage() instead - * of this fuction. If your filesystem is doing writeback outside the - * context of a writeback_control(), it can call this when redirtying - * a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED, - * tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN, - * WB_WRITTEN) in long term. The mismatches will lead to systematic errors - * in balanced_dirty_ratelimit and the dirty pages position control. +/* + * Call this whenever redirtying a page, to de-account the dirty counters + * (NR_DIRTIED, WB_DIRTIED, tsk->nr_dirtied), so that they match the written + * counters (NR_WRITTEN, WB_WRITTEN) in long term. The mismatches will lead to + * systematic errors in balanced_dirty_ratelimit and the dirty pages position + * control. */ -void folio_account_redirty(struct folio *folio) +void account_page_redirty(struct page *page) { - struct address_space *mapping = folio->mapping; + struct address_space *mapping = page->mapping; if (mapping && mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; - long nr = folio_nr_pages(folio); wb = unlocked_inode_to_wb_begin(inode, &cookie); - current->nr_dirtied -= nr; - node_stat_mod_folio(folio, NR_DIRTIED, -nr); - wb_stat_mod(wb, WB_DIRTIED, -nr); + current->nr_dirtied--; + dec_node_page_state(page, NR_DIRTIED); + dec_wb_stat(wb, WB_DIRTIED); unlocked_inode_to_wb_end(inode, &cookie); } } -EXPORT_SYMBOL(folio_account_redirty); +EXPORT_SYMBOL(account_page_redirty); -/** - * folio_redirty_for_writepage - Decline to write a dirty folio. - * @wbc: The writeback control. - * @folio: The folio. - * - * When a writepage implementation decides that it doesn't want to write - * @folio for some reason, it should call this function, unlock @folio and - * return 0. - * - * Return: True if we redirtied the folio. False if someone else dirtied - * it first. +/* + * When a writepage implementation decides that it doesn't want to write this + * page for some reason, it should redirty the locked page via + * redirty_page_for_writepage() and it should then unlock the page and return 0 */ -bool folio_redirty_for_writepage(struct writeback_control *wbc, - struct folio *folio) +int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) { - bool ret; - long nr = folio_nr_pages(folio); - - wbc->pages_skipped += nr; - ret = filemap_dirty_folio(folio->mapping, folio); - folio_account_redirty(folio); + int ret; + wbc->pages_skipped++; + ret = __set_page_dirty_nobuffers(page); + account_page_redirty(page); return ret; } -EXPORT_SYMBOL(folio_redirty_for_writepage); +EXPORT_SYMBOL(redirty_page_for_writepage); -/** - * folio_mark_dirty - Mark a folio as being modified. - * @folio: The folio. +/* + * Dirty a page. * - * For folios with a mapping this should be done under the page lock - * for the benefit of asynchronous memory errors who prefer a consistent - * dirty state. This rule can be broken in some special cases, - * but should be better not to. - * - * Return: True if the folio was newly dirtied, false if it was already dirty. + * For pages with a mapping this should be done under the page lock for the + * benefit of asynchronous memory errors who prefer a consistent dirty state. + * This rule can be broken in some special cases, but should be better not to. */ -bool folio_mark_dirty(struct folio *folio) +int set_page_dirty(struct page *page) { - struct address_space *mapping = folio_mapping(folio); + struct address_space *mapping = page_mapping(page); + page = compound_head(page); if (likely(mapping)) { /* * readahead/lru_deactivate_page could remain @@ -2638,17 +2604,17 @@ bool folio_mark_dirty(struct folio *folio) * it will confuse readahead and make it restart the size rampup * process. But it's a trivial problem. */ - if (folio_test_reclaim(folio)) - folio_clear_reclaim(folio); - return mapping->a_ops->set_page_dirty(&folio->page); + if (PageReclaim(page)) + ClearPageReclaim(page); + return mapping->a_ops->set_page_dirty(page); } - if (!folio_test_dirty(folio)) { - if (!folio_test_set_dirty(folio)) - return true; + if (!PageDirty(page)) { + if (!TestSetPageDirty(page)) + return 1; } - return false; + return 0; } -EXPORT_SYMBOL(folio_mark_dirty); +EXPORT_SYMBOL(set_page_dirty); /* * set_page_dirty() is racy if the caller has no reference against @@ -2684,49 +2650,49 @@ EXPORT_SYMBOL(set_page_dirty_lock); * page without actually doing it through the VM. Can you say "ext3 is * horribly ugly"? Thought you could. */ -void __folio_cancel_dirty(struct folio *folio) +void __cancel_dirty_page(struct page *page) { - struct address_space *mapping = folio_mapping(folio); + struct address_space *mapping = page_mapping(page); if (mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; - folio_memcg_lock(folio); + lock_page_memcg(page); wb = unlocked_inode_to_wb_begin(inode, &cookie); - if (folio_test_clear_dirty(folio)) - folio_account_cleaned(folio, mapping, wb); + if (TestClearPageDirty(page)) + account_page_cleaned(page, mapping, wb); unlocked_inode_to_wb_end(inode, &cookie); - folio_memcg_unlock(folio); + unlock_page_memcg(page); } else { - folio_clear_dirty(folio); + ClearPageDirty(page); } } -EXPORT_SYMBOL(__folio_cancel_dirty); +EXPORT_SYMBOL(__cancel_dirty_page); /* - * Clear a folio's dirty flag, while caring for dirty memory accounting. - * Returns true if the folio was previously dirty. + * Clear a page's dirty flag, while caring for dirty memory accounting. + * Returns true if the page was previously dirty. * - * This is for preparing to put the folio under writeout. We leave - * the folio tagged as dirty in the xarray so that a concurrent - * write-for-sync can discover it via a PAGECACHE_TAG_DIRTY walk. - * The ->writepage implementation will run either folio_start_writeback() - * or folio_mark_dirty(), at which stage we bring the folio's dirty flag - * and xarray dirty tag back into sync. + * This is for preparing to put the page under writeout. We leave the page + * tagged as dirty in the xarray so that a concurrent write-for-sync + * can discover it via a PAGECACHE_TAG_DIRTY walk. The ->writepage + * implementation will run either set_page_writeback() or set_page_dirty(), + * at which stage we bring the page's dirty flag and xarray dirty tag + * back into sync. * - * This incoherency between the folio's dirty flag and xarray tag is - * unfortunate, but it only exists while the folio is locked. + * This incoherency between the page's dirty flag and xarray tag is + * unfortunate, but it only exists while the page is locked. */ -bool folio_clear_dirty_for_io(struct folio *folio) +int clear_page_dirty_for_io(struct page *page) { - struct address_space *mapping = folio_mapping(folio); - bool ret = false; + struct address_space *mapping = page_mapping(page); + int ret = 0; - VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_PAGE(!PageLocked(page), page); if (mapping && mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; @@ -2739,49 +2705,48 @@ bool folio_clear_dirty_for_io(struct folio *folio) * We use this sequence to make sure that * (a) we account for dirty stats properly * (b) we tell the low-level filesystem to - * mark the whole folio dirty if it was + * mark the whole page dirty if it was * dirty in a pagetable. Only to then - * (c) clean the folio again and return 1 to + * (c) clean the page again and return 1 to * cause the writeback. * * This way we avoid all nasty races with the * dirty bit in multiple places and clearing * them concurrently from different threads. * - * Note! Normally the "folio_mark_dirty(folio)" + * Note! Normally the "set_page_dirty(page)" * has no effect on the actual dirty bit - since * that will already usually be set. But we * need the side effects, and it can help us * avoid races. * - * We basically use the folio "master dirty bit" + * We basically use the page "master dirty bit" * as a serialization point for all the different * threads doing their things. */ - if (folio_mkclean(folio)) - folio_mark_dirty(folio); + if (page_mkclean(page)) + set_page_dirty(page); /* * We carefully synchronise fault handlers against - * installing a dirty pte and marking the folio dirty + * installing a dirty pte and marking the page dirty * at this point. We do this by having them hold the - * page lock while dirtying the folio, and folios are + * page lock while dirtying the page, and pages are * always locked coming in here, so we get the desired * exclusion. */ wb = unlocked_inode_to_wb_begin(inode, &cookie); - if (folio_test_clear_dirty(folio)) { - long nr = folio_nr_pages(folio); - lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); - zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); - wb_stat_mod(wb, WB_RECLAIMABLE, -nr); - ret = true; + if (TestClearPageDirty(page)) { + dec_lruvec_page_state(page, NR_FILE_DIRTY); + dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); + dec_wb_stat(wb, WB_RECLAIMABLE); + ret = 1; } unlocked_inode_to_wb_end(inode, &cookie); return ret; } - return folio_test_clear_dirty(folio); + return TestClearPageDirty(page); } -EXPORT_SYMBOL(folio_clear_dirty_for_io); +EXPORT_SYMBOL(clear_page_dirty_for_io); static void wb_inode_writeback_start(struct bdi_writeback *wb) { @@ -2801,28 +2766,27 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb) queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); } -bool __folio_end_writeback(struct folio *folio) +int test_clear_page_writeback(struct page *page) { - long nr = folio_nr_pages(folio); - struct address_space *mapping = folio_mapping(folio); - bool ret; + struct address_space *mapping = page_mapping(page); + int ret; - folio_memcg_lock(folio); + lock_page_memcg(page); if (mapping && mapping_use_writeback_tags(mapping)) { struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - ret = folio_test_clear_writeback(folio); + ret = TestClearPageWriteback(page); if (ret) { - __xa_clear_mark(&mapping->i_pages, folio_index(folio), + __xa_clear_mark(&mapping->i_pages, page_index(page), PAGECACHE_TAG_WRITEBACK); if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { struct bdi_writeback *wb = inode_to_wb(inode); - wb_stat_mod(wb, WB_WRITEBACK, -nr); - __wb_writeout_add(wb, nr); + dec_wb_stat(wb, WB_WRITEBACK); + __wb_writeout_inc(wb); if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) wb_inode_writeback_end(wb); @@ -2835,34 +2799,32 @@ bool __folio_end_writeback(struct folio *folio) xa_unlock_irqrestore(&mapping->i_pages, flags); } else { - ret = folio_test_clear_writeback(folio); + ret = TestClearPageWriteback(page); } if (ret) { - lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr); - zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); - node_stat_mod_folio(folio, NR_WRITTEN, nr); + dec_lruvec_page_state(page, NR_WRITEBACK); + dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); + inc_node_page_state(page, NR_WRITTEN); } - folio_memcg_unlock(folio); + unlock_page_memcg(page); return ret; } -bool __folio_start_writeback(struct folio *folio, bool keep_write) +int __test_set_page_writeback(struct page *page, bool keep_write) { - long nr = folio_nr_pages(folio); - struct address_space *mapping = folio_mapping(folio); - bool ret; - int access_ret; + struct address_space *mapping = page_mapping(page); + int ret, access_ret; - folio_memcg_lock(folio); + lock_page_memcg(page); if (mapping && mapping_use_writeback_tags(mapping)) { - XA_STATE(xas, &mapping->i_pages, folio_index(folio)); + XA_STATE(xas, &mapping->i_pages, page_index(page)); struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); unsigned long flags; xas_lock_irqsave(&xas, flags); xas_load(&xas); - ret = folio_test_set_writeback(folio); + ret = TestSetPageWriteback(page); if (!ret) { bool on_wblist; @@ -2873,105 +2835,84 @@ bool __folio_start_writeback(struct folio *folio, bool keep_write) if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { struct bdi_writeback *wb = inode_to_wb(inode); - wb_stat_mod(wb, WB_WRITEBACK, nr); + inc_wb_stat(wb, WB_WRITEBACK); if (!on_wblist) wb_inode_writeback_start(wb); } /* - * We can come through here when swapping - * anonymous folios, so we don't necessarily - * have an inode to track for sync. + * We can come through here when swapping anonymous + * pages, so we don't necessarily have an inode to track + * for sync. */ if (mapping->host && !on_wblist) sb_mark_inode_writeback(mapping->host); } - if (!folio_test_dirty(folio)) + if (!PageDirty(page)) xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); if (!keep_write) xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); xas_unlock_irqrestore(&xas, flags); } else { - ret = folio_test_set_writeback(folio); + ret = TestSetPageWriteback(page); } if (!ret) { - lruvec_stat_mod_folio(folio, NR_WRITEBACK, nr); - zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr); + inc_lruvec_page_state(page, NR_WRITEBACK); + inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); } - folio_memcg_unlock(folio); - access_ret = arch_make_folio_accessible(folio); + unlock_page_memcg(page); + access_ret = arch_make_page_accessible(page); /* * If writeback has been triggered on a page that cannot be made * accessible, it is too late to recover here. */ - VM_BUG_ON_FOLIO(access_ret != 0, folio); + VM_BUG_ON_PAGE(access_ret != 0, page); return ret; -} -EXPORT_SYMBOL(__folio_start_writeback); -/** - * folio_wait_writeback - Wait for a folio to finish writeback. - * @folio: The folio to wait for. - * - * If the folio is currently being written back to storage, wait for the - * I/O to complete. - * - * Context: Sleeps. Must be called in process context and with - * no spinlocks held. Caller should hold a reference on the folio. - * If the folio is not locked, writeback may start again after writeback - * has finished. +} +EXPORT_SYMBOL(__test_set_page_writeback); + +/* + * Wait for a page to complete writeback */ -void folio_wait_writeback(struct folio *folio) +void wait_on_page_writeback(struct page *page) { - while (folio_test_writeback(folio)) { - trace_folio_wait_writeback(folio, folio_mapping(folio)); - folio_wait_bit(folio, PG_writeback); + while (PageWriteback(page)) { + trace_wait_on_page_writeback(page, page_mapping(page)); + wait_on_page_bit(page, PG_writeback); } } -EXPORT_SYMBOL_GPL(folio_wait_writeback); +EXPORT_SYMBOL_GPL(wait_on_page_writeback); -/** - * folio_wait_writeback_killable - Wait for a folio to finish writeback. - * @folio: The folio to wait for. - * - * If the folio is currently being written back to storage, wait for the - * I/O to complete or a fatal signal to arrive. - * - * Context: Sleeps. Must be called in process context and with - * no spinlocks held. Caller should hold a reference on the folio. - * If the folio is not locked, writeback may start again after writeback - * has finished. - * Return: 0 on success, -EINTR if we get a fatal signal while waiting. +/* + * Wait for a page to complete writeback. Returns -EINTR if we get a + * fatal signal while waiting. */ -int folio_wait_writeback_killable(struct folio *folio) +int wait_on_page_writeback_killable(struct page *page) { - while (folio_test_writeback(folio)) { - trace_folio_wait_writeback(folio, folio_mapping(folio)); - if (folio_wait_bit_killable(folio, PG_writeback)) + while (PageWriteback(page)) { + trace_wait_on_page_writeback(page, page_mapping(page)); + if (wait_on_page_bit_killable(page, PG_writeback)) return -EINTR; } return 0; } -EXPORT_SYMBOL_GPL(folio_wait_writeback_killable); +EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable); /** - * folio_wait_stable() - wait for writeback to finish, if necessary. - * @folio: The folio to wait on. + * wait_for_stable_page() - wait for writeback to finish, if necessary. + * @page: The page to wait on. * - * This function determines if the given folio is related to a backing - * device that requires folio contents to be held stable during writeback. - * If so, then it will wait for any pending writeback to complete. - * - * Context: Sleeps. Must be called in process context and with - * no spinlocks held. Caller should hold a reference on the folio. - * If the folio is not locked, writeback may start again after writeback - * has finished. + * This function determines if the given page is related to a backing device + * that requires page contents to be held stable during writeback. If so, then + * it will wait for any pending writeback to complete. */ -void folio_wait_stable(struct folio *folio) +void wait_for_stable_page(struct page *page) { - if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES) - folio_wait_writeback(folio); + page = thp_head(page); + if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) + wait_on_page_writeback(page); } -EXPORT_SYMBOL_GPL(folio_wait_stable); +EXPORT_SYMBOL_GPL(wait_for_stable_page); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3589febc6d..7773bae3b6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -64,7 +63,6 @@ #include #include #include -#include #include #include #include @@ -74,7 +72,6 @@ #include #include #include -#include #include #include #include @@ -680,8 +677,10 @@ static inline int pindex_to_order(unsigned int pindex) int order = pindex / MIGRATE_PCPTYPES; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (order > PAGE_ALLOC_COSTLY_ORDER) + if (order > PAGE_ALLOC_COSTLY_ORDER) { order = pageblock_order; + VM_BUG_ON(order != pageblock_order); + } #else VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); #endif @@ -725,37 +724,27 @@ static inline void free_the_page(struct page *page, unsigned int order) void free_compound_page(struct page *page) { - mem_cgroup_uncharge(page_folio(page)); + mem_cgroup_uncharge(page); free_the_page(page, compound_order(page)); } -static void prep_compound_head(struct page *page, unsigned int order) -{ - set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); - set_compound_order(page, order); - atomic_set(compound_mapcount_ptr(page), -1); - if (hpage_pincount_available(page)) - atomic_set(compound_pincount_ptr(page), 0); -} - -static void prep_compound_tail(struct page *head, int tail_idx) -{ - struct page *p = head + tail_idx; - - p->mapping = TAIL_MAPPING; - set_compound_head(p, head); -} - void prep_compound_page(struct page *page, unsigned int order) { int i; int nr_pages = 1 << order; __SetPageHead(page); - for (i = 1; i < nr_pages; i++) - prep_compound_tail(page, i); + for (i = 1; i < nr_pages; i++) { + struct page *p = page + i; + p->mapping = TAIL_MAPPING; + set_compound_head(p, page); + } - prep_compound_head(page, order); + set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); + set_compound_order(page, order); + atomic_set(compound_mapcount_ptr(page), -1); + if (hpage_pincount_available(page)) + atomic_set(compound_pincount_ptr(page), 0); } #ifdef CONFIG_DEBUG_PAGEALLOC @@ -1310,7 +1299,6 @@ static __always_inline bool free_pages_prepare(struct page *page, if (memcg_kmem_enabled() && PageMemcgKmem(page)) __memcg_kmem_uncharge_page(page, order); reset_page_owner(page, order); - page_table_check_free(page, order); return false; } @@ -1350,7 +1338,6 @@ static __always_inline bool free_pages_prepare(struct page *page, page_cpupid_reset_last(page); page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; reset_page_owner(page, order); - page_table_check_free(page, order); if (!PageHighMem(page)) { debug_check_no_locks_freed(page_address(page), @@ -1443,8 +1430,14 @@ static inline void prefetch_buddy(struct page *page) /* * Frees a number of pages from the PCP lists - * Assumes all pages on list are in same zone. + * Assumes all pages on list are in same zone, and of same order. * count is the number of pages to free. + * + * If the zone was previously in an "all pages pinned" state then look to + * see if this freeing clears that state. + * + * And clear the zone's pages_scanned counter, to hold off the "all pages are + * pinned" detection logic. */ static void free_pcppages_bulk(struct zone *zone, int count, struct per_cpu_pages *pcp) @@ -1598,7 +1591,7 @@ static void __meminit init_reserved_page(unsigned long pfn) for (zid = 0; zid < MAX_NR_ZONES; zid++) { struct zone *zone = &pgdat->node_zones[zid]; - if (zone_spans_pfn(zone, pfn)) + if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone)) break; } __init_single_page(pfn_to_page(pfn), pfn, zid, nid); @@ -2425,7 +2418,6 @@ inline void post_alloc_hook(struct page *page, unsigned int order, } set_page_owner(page, order, gfp_flags); - page_table_check_alloc(page, order); } static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, @@ -3157,9 +3149,9 @@ static void drain_local_pages_wq(struct work_struct *work) * cpu which is alright but we also have to make sure to not move to * a different one. */ - migrate_disable(); + preempt_disable(); drain_local_pages(drain->zone); - migrate_enable(); + preempt_enable(); } /* @@ -3976,8 +3968,6 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order, } #ifdef CONFIG_NUMA -int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; - static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <= @@ -4366,7 +4356,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; psi_memstall_enter(&pflags); - delayacct_compact_start(); noreclaim_flag = memalloc_noreclaim_save(); *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, @@ -4374,7 +4363,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, memalloc_noreclaim_restore(noreclaim_flag); psi_memstall_leave(&pflags); - delayacct_compact_end(); if (*compact_result == COMPACT_SKIPPED) return NULL; @@ -4811,11 +4799,30 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, trace_reclaim_retry_zone(z, order, reclaimable, available, min_wmark, *no_progress_loops, wmark); if (wmark) { + /* + * If we didn't make any progress and have a lot of + * dirty + writeback pages then we should wait for + * an IO to complete to slow down the reclaim and + * prevent from pre mature OOM + */ + if (!did_some_progress) { + unsigned long write_pending; + + write_pending = zone_page_state_snapshot(zone, + NR_ZONE_WRITE_PENDING); + + if (2 * write_pending > reclaimable) { + congestion_wait(BLK_RW_ASYNC, HZ/10); + return true; + } + } + ret = true; - break; + goto out; } } +out: /* * Memory allocation/reclaim might be called from a WQ context and the * current implementation of the WQ concurrency control doesn't @@ -4911,19 +4918,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, if (!ac->preferred_zoneref->zone) goto nopage; - /* - * Check for insane configurations where the cpuset doesn't contain - * any suitable zone to satisfy the request - e.g. non-movable - * GFP_HIGHUSER allocations from MOVABLE nodes only. - */ - if (cpusets_insane_config() && (gfp_mask & __GFP_HARDWALL)) { - struct zoneref *z = first_zones_zonelist(ac->zonelist, - ac->highest_zoneidx, - &cpuset_current_mems_allowed); - if (!z->zone) - goto nopage; - } - if (alloc_flags & ALLOC_KSWAPD) wake_all_kswapds(order, gfp_mask, ac); @@ -5414,18 +5408,6 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, } EXPORT_SYMBOL(__alloc_pages); -struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, - nodemask_t *nodemask) -{ - struct page *page = __alloc_pages(gfp | __GFP_COMP, order, - preferred_nid, nodemask); - - if (page && order > 1) - prep_transhuge_page(page); - return (struct folio *)page; -} -EXPORT_SYMBOL(__folio_alloc); - /* * Common helper functions. Never use with __GFP_HIGHMEM because the returned * address cannot represent highmem pages. Use alloc_pages and then kmap if @@ -5638,8 +5620,8 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask) unsigned int order = get_order(size); unsigned long addr; - if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM))) - gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM); + if (WARN_ON_ONCE(gfp_mask & __GFP_COMP)) + gfp_mask &= ~__GFP_COMP; addr = __get_free_pages(gfp_mask, order); return make_alloc_exact(addr, order, size); @@ -5663,8 +5645,8 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) unsigned int order = get_order(size); struct page *p; - if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM))) - gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM); + if (WARN_ON_ONCE(gfp_mask & __GFP_COMP)) + gfp_mask &= ~__GFP_COMP; p = alloc_pages_node(nid, gfp_mask, order); if (!p) @@ -6006,7 +5988,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) printk(KERN_CONT "%s" " free:%lukB" - " boost:%lukB" " min:%lukB" " low:%lukB" " high:%lukB" @@ -6027,7 +6008,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) "\n", zone->name, K(zone_page_state(zone, NR_FREE_PAGES)), - K(zone->watermark_boost), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), @@ -6283,7 +6263,7 @@ static void build_zonelists(pg_data_t *pgdat) */ if (node_distance(local_node, node) != node_distance(local_node, prev_node)) - node_load[node] += load; + node_load[node] = load; node_order[nr_nodes++] = node; prev_node = node; @@ -6292,10 +6272,6 @@ static void build_zonelists(pg_data_t *pgdat) build_zonelists_in_node_order(pgdat, node_order, nr_nodes); build_thisnode_zonelists(pgdat); - pr_info("Fallback order for Node %d: ", local_node); - for (node = 0; node < nr_nodes; node++) - pr_cont("%d ", node_order[node]); - pr_cont("\n"); } #ifdef CONFIG_HAVE_MEMORYLESS_NODES @@ -6582,75 +6558,6 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone } #ifdef CONFIG_ZONE_DEVICE -static void __ref __init_zone_device_page(struct page *page, unsigned long pfn, - unsigned long zone_idx, int nid, - struct dev_pagemap *pgmap) -{ - - __init_single_page(page, pfn, zone_idx, nid); - - /* - * Mark page reserved as it will need to wait for onlining - * phase for it to be fully associated with a zone. - * - * We can use the non-atomic __set_bit operation for setting - * the flag as we are still initializing the pages. - */ - __SetPageReserved(page); - - /* - * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer - * and zone_device_data. It is a bug if a ZONE_DEVICE page is - * ever freed or placed on a driver-private list. - */ - page->pgmap = pgmap; - page->zone_device_data = NULL; - - /* - * Mark the block movable so that blocks are reserved for - * movable at startup. This will force kernel allocations - * to reserve their blocks rather than leaking throughout - * the address space during boot when many long-lived - * kernel allocations are made. - * - * Please note that MEMINIT_HOTPLUG path doesn't clear memmap - * because this is done early in section_activate() - */ - if (IS_ALIGNED(pfn, pageblock_nr_pages)) { - set_pageblock_migratetype(page, MIGRATE_MOVABLE); - cond_resched(); - } -} - -static void __ref memmap_init_compound(struct page *head, - unsigned long head_pfn, - unsigned long zone_idx, int nid, - struct dev_pagemap *pgmap, - unsigned long nr_pages) -{ - unsigned long pfn, end_pfn = head_pfn + nr_pages; - unsigned int order = pgmap->vmemmap_shift; - - __SetPageHead(head); - for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) { - struct page *page = pfn_to_page(pfn); - - __init_zone_device_page(page, pfn, zone_idx, nid, pgmap); - prep_compound_tail(head, pfn - head_pfn); - set_page_count(page, 0); - - /* - * The first tail page stores compound_mapcount_ptr() and - * compound_order() and the second tail page stores - * compound_pincount_ptr(). Call prep_compound_head() after - * the first and second tail pages have been initialized to - * not have the data overwritten. - */ - if (pfn == head_pfn + 2) - prep_compound_head(head, order); - } -} - void __ref memmap_init_zone_device(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, @@ -6659,7 +6566,6 @@ void __ref memmap_init_zone_device(struct zone *zone, unsigned long pfn, end_pfn = start_pfn + nr_pages; struct pglist_data *pgdat = zone->zone_pgdat; struct vmem_altmap *altmap = pgmap_altmap(pgmap); - unsigned int pfns_per_compound = pgmap_vmemmap_nr(pgmap); unsigned long zone_idx = zone_idx(zone); unsigned long start = jiffies; int nid = pgdat->node_id; @@ -6677,16 +6583,42 @@ void __ref memmap_init_zone_device(struct zone *zone, nr_pages = end_pfn - start_pfn; } - for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) { + for (pfn = start_pfn; pfn < end_pfn; pfn++) { struct page *page = pfn_to_page(pfn); - __init_zone_device_page(page, pfn, zone_idx, nid, pgmap); + __init_single_page(page, pfn, zone_idx, nid); - if (pfns_per_compound == 1) - continue; + /* + * Mark page reserved as it will need to wait for onlining + * phase for it to be fully associated with a zone. + * + * We can use the non-atomic __set_bit operation for setting + * the flag as we are still initializing the pages. + */ + __SetPageReserved(page); - memmap_init_compound(page, pfn, zone_idx, nid, pgmap, - pfns_per_compound); + /* + * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer + * and zone_device_data. It is a bug if a ZONE_DEVICE page is + * ever freed or placed on a driver-private list. + */ + page->pgmap = pgmap; + page->zone_device_data = NULL; + + /* + * Mark the block movable so that blocks are reserved for + * movable at startup. This will force kernel allocations + * to reserve their blocks rather than leaking throughout + * the address space during boot when many long-lived + * kernel allocations are made. + * + * Please note that MEMINIT_HOTPLUG path doesn't clear memmap + * because this is done early in section_activate() + */ + if (IS_ALIGNED(pfn, pageblock_nr_pages)) { + set_pageblock_migratetype(page, MIGRATE_MOVABLE); + cond_resched(); + } } pr_info("%s initialised %lu pages in %ums\n", __func__, @@ -7465,8 +7397,6 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat) {} static void __meminit pgdat_init_internals(struct pglist_data *pgdat) { - int i; - pgdat_resize_init(pgdat); pgdat_init_split_queue(pgdat); @@ -7475,9 +7405,6 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) init_waitqueue_head(&pgdat->kswapd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); - for (i = 0; i < NR_VMSCAN_THROTTLE; i++) - init_waitqueue_head(&pgdat->reclaim_wait[i]); - pgdat_page_ext_init(pgdat); lruvec_init(&pgdat->__lruvec); } @@ -8207,7 +8134,8 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char } if (pages && s) - pr_info("Freeing %s memory: %ldK\n", s, K(pages)); + pr_info("Freeing %s memory: %ldK\n", + s, pages << (PAGE_SHIFT - 10)); return pages; } @@ -8234,7 +8162,7 @@ void __init mem_init_print_info(void) */ #define adj_init_size(start, end, size, pos, adj) \ do { \ - if (&start[0] <= &pos[0] && &pos[0] < &end[0] && size > adj) \ + if (start <= pos && pos < end && size > adj) \ size -= adj; \ } while (0) @@ -8252,13 +8180,14 @@ void __init mem_init_print_info(void) ", %luK highmem" #endif ")\n", - K(nr_free_pages()), K(physpages), + nr_free_pages() << (PAGE_SHIFT - 10), + physpages << (PAGE_SHIFT - 10), codesize >> 10, datasize >> 10, rosize >> 10, (init_data_size + init_code_size) >> 10, bss_size >> 10, - K(physpages - totalram_pages() - totalcma_pages), - K(totalcma_pages) + (physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10), + totalcma_pages << (PAGE_SHIFT - 10) #ifdef CONFIG_HIGHMEM - , K(totalhigh_pages()) + , totalhigh_pages() << (PAGE_SHIFT - 10) #endif ); } @@ -8531,7 +8460,7 @@ void setup_per_zone_wmarks(void) * 8192MB: 11584k * 16384MB: 16384k */ -void calculate_min_free_kbytes(void) +int __meminit init_per_zone_wmark_min(void) { unsigned long lowmem_kbytes; int new_min_free_kbytes; @@ -8539,17 +8468,16 @@ void calculate_min_free_kbytes(void) lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10); new_min_free_kbytes = int_sqrt(lowmem_kbytes * 16); - if (new_min_free_kbytes > user_min_free_kbytes) - min_free_kbytes = clamp(new_min_free_kbytes, 128, 262144); - else + if (new_min_free_kbytes > user_min_free_kbytes) { + min_free_kbytes = new_min_free_kbytes; + if (min_free_kbytes < 128) + min_free_kbytes = 128; + if (min_free_kbytes > 262144) + min_free_kbytes = 262144; + } else { pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n", new_min_free_kbytes, user_min_free_kbytes); - -} - -int __meminit init_per_zone_wmark_min(void) -{ - calculate_min_free_kbytes(); + } setup_per_zone_wmarks(); refresh_zone_stat_thresholds(); setup_per_zone_lowmem_reserve(); @@ -8836,8 +8764,7 @@ void *__init alloc_large_system_hash(const char *tablename, } else if (get_order(size) >= MAX_ORDER || hashdist) { table = __vmalloc(size, gfp_flags); virt = true; - if (table) - huge = is_vm_area_hugepages(table); + huge = is_vm_area_hugepages(table); } else { /* * If bucketsize is not a power-of-two, we may free @@ -9278,8 +9205,8 @@ static bool zone_spans_last_pfn(const struct zone *zone, * for allocation requests which can not be fulfilled with the buddy allocator. * * The allocated memory is always aligned to a page boundary. If nr_pages is a - * power of two, then allocated range is also guaranteed to be aligned to same - * nr_pages (e.g. 1GB request would be aligned to 1GB). + * power of two then the alignment is guaranteed to be to the given nr_pages + * (e.g. 1GB request would be aligned to 1GB). * * Allocated pages can be freed with free_contig_range() or by manually calling * __free_page() on each allocated page. @@ -9434,21 +9361,21 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) } #endif -/* - * This function returns a stable result only if called under zone lock. - */ bool is_free_buddy_page(struct page *page) { + struct zone *zone = page_zone(page); unsigned long pfn = page_to_pfn(page); + unsigned long flags; unsigned int order; + spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { struct page *page_head = page - (pfn & ((1 << order) - 1)); - if (PageBuddy(page_head) && - buddy_order_unsafe(page_head) >= order) + if (PageBuddy(page_head) && buddy_order(page_head) >= order) break; } + spin_unlock_irqrestore(&zone->lock, flags); return order < MAX_ORDER; } @@ -9512,7 +9439,6 @@ bool take_page_off_buddy(struct page *page) del_page_from_free_list(page_head, zone, page_order); break_down_buddy_pages(zone, page_head, page, 0, page_order, migratetype); - SetPageHWPoisonTakenOff(page); if (!is_migrate_isolate(migratetype)) __mod_zone_freepage_state(zone, -1, migratetype); ret = true; @@ -9524,31 +9450,6 @@ bool take_page_off_buddy(struct page *page) spin_unlock_irqrestore(&zone->lock, flags); return ret; } - -/* - * Cancel takeoff done by take_page_off_buddy(). - */ -bool put_page_back_buddy(struct page *page) -{ - struct zone *zone = page_zone(page); - unsigned long pfn = page_to_pfn(page); - unsigned long flags; - int migratetype = get_pfnblock_migratetype(page, pfn); - bool ret = false; - - spin_lock_irqsave(&zone->lock, flags); - if (put_page_testzero(page)) { - ClearPageHWPoisonTakenOff(page); - __free_one_page(page, pfn, zone, 0, migratetype, FPI_NONE); - if (TestClearPageHWPoison(page)) { - num_poisoned_pages_dec(); - ret = true; - } - } - spin_unlock_irqrestore(&zone->lock, flags); - - return ret; -} #endif #ifdef CONFIG_ZONE_DMA diff --git a/mm/page_counter.c b/mm/page_counter.c index eb156ff5d6..7d83641eb8 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -120,6 +120,7 @@ bool page_counter_try_charge(struct page_counter *counter, new = atomic_long_add_return(nr_pages, &c->usage); if (new > c->max) { atomic_long_sub(nr_pages, &c->usage); + propagate_protected_usage(c, new); /* * This is racy, but we can live with some * inaccuracy in the failcnt which is only used diff --git a/mm/page_ext.c b/mm/page_ext.c index 2e66d934d6..2a52fd9ed4 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -8,7 +8,6 @@ #include #include #include -#include /* * struct page extension @@ -64,21 +63,18 @@ static bool need_page_idle(void) { return true; } -static struct page_ext_operations page_idle_ops __initdata = { +struct page_ext_operations page_idle_ops = { .need = need_page_idle, }; #endif -static struct page_ext_operations *page_ext_ops[] __initdata = { +static struct page_ext_operations *page_ext_ops[] = { #ifdef CONFIG_PAGE_OWNER &page_owner_ops, #endif #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) &page_idle_ops, #endif -#ifdef CONFIG_PAGE_TABLE_CHECK - &page_table_check_ops, -#endif }; unsigned long page_ext_size = sizeof(struct page_ext); @@ -205,7 +201,7 @@ void __init page_ext_init_flatmem(void) panic("Out of memory"); } -#else /* CONFIG_SPARSEMEM */ +#else /* CONFIG_FLATMEM */ struct page_ext *lookup_page_ext(const struct page *page) { diff --git a/mm/page_io.c b/mm/page_io.c index 0bf8e40f4e..c493ce9ebc 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -25,7 +25,6 @@ #include #include #include -#include void end_swap_bio_write(struct bio *bio) { @@ -39,7 +38,7 @@ void end_swap_bio_write(struct bio *bio) * Also print a dire warning that things will go BAD (tm) * very quickly. * - * Also clear PG_reclaim to avoid folio_rotate_reclaimable() + * Also clear PG_reclaim to avoid rotate_reclaimable_page() */ set_page_dirty(page); pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n", @@ -318,7 +317,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, * temporary failure if the system has limited * memory for allocating transmit buffers. * Mark the page dirty and avoid - * folio_rotate_reclaimable but rate-limit the + * rotate_reclaimable_page but rate-limit the * messages but do not flag PageError like * the normal direct-to-bio case as it could * be temporary. @@ -359,6 +358,8 @@ int swap_readpage(struct page *page, bool synchronous) struct bio *bio; int ret = 0; struct swap_info_struct *sis = page_swap_info(page); + blk_qc_t qc; + struct gendisk *disk; unsigned long pflags; VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page); @@ -371,7 +372,6 @@ int swap_readpage(struct page *page, bool synchronous) * significant part of overall IO time. */ psi_memstall_enter(&pflags); - delayacct_swapin_start(); if (frontswap_load(page) == 0) { SetPageUptodate(page); @@ -409,24 +409,26 @@ int swap_readpage(struct page *page, bool synchronous) bio->bi_iter.bi_sector = swap_page_sector(page); bio->bi_end_io = end_swap_bio_read; bio_add_page(bio, page, thp_size(page), 0); + + disk = bio->bi_bdev->bd_disk; /* * Keep this task valid during swap readpage because the oom killer may * attempt to access it in the page fault retry time check. */ if (synchronous) { - bio->bi_opf |= REQ_POLLED; + bio->bi_opf |= REQ_HIPRI; get_task_struct(current); bio->bi_private = current; } count_vm_event(PSWPIN); bio_get(bio); - submit_bio(bio); + qc = submit_bio(bio); while (synchronous) { set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(bio->bi_private)) break; - if (!bio_poll(bio, NULL, 0)) + if (!blk_poll(disk->queue, qc, true)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); @@ -434,7 +436,6 @@ int swap_readpage(struct page *page, bool synchronous) out: psi_memstall_leave(&pflags); - delayacct_swapin_end(); return ret; } diff --git a/mm/page_isolation.c b/mm/page_isolation.c index f67c4c70f1..a95c2c6562 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -94,13 +94,8 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) buddy = page + (buddy_pfn - pfn); if (!is_migrate_isolate_page(buddy)) { - isolated_page = !!__isolate_free_page(page, order); - /* - * Isolating a free page in an isolated pageblock - * is expected to always work as watermarks don't - * apply here. - */ - VM_WARN_ON(!isolated_page); + __isolate_free_page(page, order); + isolated_page = true; } } } @@ -188,6 +183,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned migratetype, int flags) { unsigned long pfn; + unsigned long undo_pfn; struct page *page; BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); @@ -197,12 +193,25 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (page && set_migratetype_isolate(page, migratetype, flags)) { - undo_isolate_page_range(start_pfn, pfn, migratetype); - return -EBUSY; + if (page) { + if (set_migratetype_isolate(page, migratetype, flags)) { + undo_pfn = pfn; + goto undo; + } } } return 0; +undo: + for (pfn = start_pfn; + pfn < undo_pfn; + pfn += pageblock_nr_pages) { + struct page *page = pfn_to_online_page(pfn); + if (!page) + continue; + unset_migratetype_isolate(page, migratetype); + } + + return -EBUSY; } /* diff --git a/mm/page_owner.c b/mm/page_owner.c index 99e360df94..62402d2253 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -46,7 +46,7 @@ static int __init early_page_owner_param(char *buf) } early_param("page_owner", early_page_owner_param); -static __init bool need_page_owner(void) +static bool need_page_owner(void) { return page_owner_enabled; } @@ -75,13 +75,11 @@ static noinline void register_early_stack(void) early_handle = create_dummy_stack(); } -static __init void init_page_owner(void) +static void init_page_owner(void) { if (!page_owner_enabled) return; - stack_depot_init(); - register_dummy_stack(); register_failure_stack(); register_early_stack(); @@ -127,7 +125,7 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags) return handle; } -void __reset_page_owner(struct page *page, unsigned short order) +void __reset_page_owner(struct page *page, unsigned int order) { int i; struct page_ext *page_ext; @@ -151,7 +149,7 @@ void __reset_page_owner(struct page *page, unsigned short order) static inline void __set_page_owner_handle(struct page_ext *page_ext, depot_stack_handle_t handle, - unsigned short order, gfp_t gfp_mask) + unsigned int order, gfp_t gfp_mask) { struct page_owner *page_owner; int i; @@ -171,7 +169,7 @@ static inline void __set_page_owner_handle(struct page_ext *page_ext, } } -noinline void __set_page_owner(struct page *page, unsigned short order, +noinline void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page); @@ -212,10 +210,10 @@ void __split_page_owner(struct page *page, unsigned int nr) } } -void __folio_copy_owner(struct folio *newfolio, struct folio *old) +void __copy_page_owner(struct page *oldpage, struct page *newpage) { - struct page_ext *old_ext = lookup_page_ext(&old->page); - struct page_ext *new_ext = lookup_page_ext(&newfolio->page); + struct page_ext *old_ext = lookup_page_ext(oldpage); + struct page_ext *new_ext = lookup_page_ext(newpage); struct page_owner *old_page_owner, *new_page_owner; if (unlikely(!old_ext || !new_ext)) @@ -233,11 +231,11 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old) new_page_owner->free_ts_nsec = old_page_owner->ts_nsec; /* - * We don't clear the bit on the old folio as it's going to be freed + * We don't clear the bit on the oldpage as it's going to be freed * after migration. Until then, the info can be useful in case of * a bug, and the overall stats will be off a bit only temporarily. * Also, migrate_misplaced_transhuge_page() can still fail the - * migration and then we want the old folio to retain the info. But + * migration and then we want the oldpage to retain the info. But * in that case we also don't need to explicitly clear the info from * the new page, which will be freed. */ @@ -331,6 +329,8 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, depot_stack_handle_t handle) { int ret, pageblock_mt, page_mt; + unsigned long *entries; + unsigned int nr_entries; char *kbuf; count = min_t(size_t, count, PAGE_SIZE); @@ -351,17 +351,18 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, pageblock_mt = get_pageblock_migratetype(page); page_mt = gfp_migratetype(page_owner->gfp_mask); ret += snprintf(kbuf + ret, count - ret, - "PFN %lu type %s Block %lu type %s Flags %pGp\n", + "PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n", pfn, migratetype_names[page_mt], pfn >> pageblock_order, migratetype_names[pageblock_mt], - &page->flags); + page->flags, &page->flags); if (ret >= count) goto err; - ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0); + nr_entries = stack_depot_fetch(handle, &entries); + ret += stack_trace_snprint(kbuf + ret, count - ret, entries, nr_entries, 0); if (ret >= count) goto err; @@ -393,6 +394,8 @@ void __dump_page_owner(const struct page *page) struct page_ext *page_ext = lookup_page_ext(page); struct page_owner *page_owner; depot_stack_handle_t handle; + unsigned long *entries; + unsigned int nr_entries; gfp_t gfp_mask; int mt; @@ -420,17 +423,20 @@ void __dump_page_owner(const struct page *page) page_owner->pid, page_owner->ts_nsec, page_owner->free_ts_nsec); handle = READ_ONCE(page_owner->handle); - if (!handle) + if (!handle) { pr_alert("page_owner allocation stack trace missing\n"); - else - stack_depot_print(handle); + } else { + nr_entries = stack_depot_fetch(handle, &entries); + stack_trace_print(entries, nr_entries, 0); + } handle = READ_ONCE(page_owner->free_handle); if (!handle) { pr_alert("page_owner free stack trace missing\n"); } else { + nr_entries = stack_depot_fetch(handle, &entries); pr_alert("page last free stack trace:\n"); - stack_depot_print(handle); + stack_trace_print(entries, nr_entries, 0); } if (page_owner->last_migrate_reason != -1) diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index 411d1593ef..639662c20c 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -113,24 +113,6 @@ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk) return pcpu_nr_pages_to_map_bits(chunk->nr_pages); } -#ifdef CONFIG_MEMCG_KMEM -/** - * pcpu_obj_full_size - helper to calculate size of each accounted object - * @size: size of area to allocate in bytes - * - * For each accounted object there is an extra space which is used to store - * obj_cgroup membership. Charge it too. - */ -static inline size_t pcpu_obj_full_size(size_t size) -{ - size_t extra_size; - - extra_size = size / PCPU_MIN_ALLOC_SIZE * sizeof(struct obj_cgroup *); - - return size * num_possible_cpus() + extra_size; -} -#endif /* CONFIG_MEMCG_KMEM */ - #ifdef CONFIG_PERCPU_STATS #include diff --git a/mm/percpu.c b/mm/percpu.c index ea28db2830..e0a9868189 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -779,7 +779,7 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index) { struct pcpu_block_md *block = chunk->md_blocks + index; unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index); - unsigned int start, end; /* region start, region end */ + unsigned int rs, re, start; /* region start, region end */ /* promote scan_hint to contig_hint */ if (block->scan_hint) { @@ -795,8 +795,9 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index) block->right_free = 0; /* iterate over free areas and update the contig hints */ - for_each_clear_bitrange_from(start, end, alloc_map, PCPU_BITMAP_BLOCK_BITS) - pcpu_block_update(block, start, end); + bitmap_for_each_clear_region(alloc_map, rs, re, start, + PCPU_BITMAP_BLOCK_BITS) + pcpu_block_update(block, rs, re); } /** @@ -1069,18 +1070,17 @@ static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off, static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits, int *next_off) { - unsigned int start, end; + unsigned int page_start, page_end, rs, re; - start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE); - end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE); + page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE); + page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE); - start = find_next_zero_bit(chunk->populated, end, start); - if (start >= end) + rs = page_start; + bitmap_next_clear_region(chunk->populated, &rs, &re, page_end); + if (rs >= page_end) return true; - end = find_next_bit(chunk->populated, end, start + 1); - - *next_off = end * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE; + *next_off = re * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE; return false; } @@ -1635,7 +1635,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, if (!objcg) return true; - if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size))) { + if (obj_cgroup_charge(objcg, gfp, size * num_possible_cpus())) { obj_cgroup_put(objcg); return false; } @@ -1656,10 +1656,10 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, rcu_read_lock(); mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, - pcpu_obj_full_size(size)); + size * num_possible_cpus()); rcu_read_unlock(); } else { - obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size)); + obj_cgroup_uncharge(objcg, size * num_possible_cpus()); obj_cgroup_put(objcg); } } @@ -1676,11 +1676,11 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) return; chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL; - obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size)); + obj_cgroup_uncharge(objcg, size * num_possible_cpus()); rcu_read_lock(); mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, - -pcpu_obj_full_size(size)); + -(size * num_possible_cpus())); rcu_read_unlock(); obj_cgroup_put(objcg); @@ -1851,12 +1851,13 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, /* populate if not all pages are already there */ if (!is_atomic) { - unsigned int page_end, rs, re; + unsigned int page_start, page_end, rs, re; - rs = PFN_DOWN(off); + page_start = PFN_DOWN(off); page_end = PFN_UP(off + size); - for_each_clear_bitrange_from(rs, re, chunk->populated, page_end) { + bitmap_for_each_clear_region(chunk->populated, rs, re, + page_start, page_end) { WARN_ON(chunk->immutable); ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); @@ -2012,7 +2013,8 @@ static void pcpu_balance_free(bool empty_only) list_for_each_entry_safe(chunk, next, &to_free, list) { unsigned int rs, re; - for_each_set_bitrange(rs, re, chunk->populated, chunk->nr_pages) { + bitmap_for_each_set_region(chunk->populated, rs, re, 0, + chunk->nr_pages) { pcpu_depopulate_chunk(chunk, rs, re); spin_lock_irq(&pcpu_lock); pcpu_chunk_depopulated(chunk, rs, re); @@ -2082,7 +2084,8 @@ static void pcpu_balance_populated(void) continue; /* @chunk can't go away while pcpu_alloc_mutex is held */ - for_each_clear_bitrange(rs, re, chunk->populated, chunk->nr_pages) { + bitmap_for_each_clear_region(chunk->populated, rs, re, 0, + chunk->nr_pages) { int nr = min_t(int, re - rs, nr_to_pop); spin_unlock_irq(&pcpu_lock); @@ -2469,7 +2472,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, */ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) { - memblock_free(ai, ai->__ai_size); + memblock_free_early(__pa(ai), ai->__ai_size); } /** @@ -2989,42 +2992,6 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info( return ai; } - -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align, - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn) -{ - const unsigned long goal = __pa(MAX_DMA_ADDRESS); -#ifdef CONFIG_NUMA - int node = NUMA_NO_NODE; - void *ptr; - - if (cpu_to_nd_fn) - node = cpu_to_nd_fn(cpu); - - if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) { - ptr = memblock_alloc_from(size, align, goal); - pr_info("cpu %d has no node %d or node-local memory\n", - cpu, node); - pr_debug("per cpu data for cpu%d %zu bytes at 0x%llx\n", - cpu, size, (u64)__pa(ptr)); - } else { - ptr = memblock_alloc_try_nid(size, align, goal, - MEMBLOCK_ALLOC_ACCESSIBLE, - node); - - pr_debug("per cpu data for cpu%d %zu bytes on node%d at 0x%llx\n", - cpu, size, node, (u64)__pa(ptr)); - } - return ptr; -#else - return memblock_alloc_from(size, align, goal); -#endif -} - -static void __init pcpu_fc_free(void *ptr, size_t size) -{ - memblock_free(ptr, size); -} #endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */ #if defined(BUILD_EMBED_FIRST_CHUNK) @@ -3034,13 +3001,14 @@ static void __init pcpu_fc_free(void *ptr, size_t size) * @dyn_size: minimum free size for dynamic allocation in bytes * @atom_size: allocation atom size * @cpu_distance_fn: callback to determine distance between cpus, optional - * @cpu_to_nd_fn: callback to convert cpu to it's node, optional + * @alloc_fn: function to allocate percpu page + * @free_fn: function to free percpu page * * This is a helper to ease setting up embedded first percpu chunk and * can be called where pcpu_setup_first_chunk() is expected. * * If this function is used to setup the first chunk, it is allocated - * by calling pcpu_fc_alloc and used as-is without being mapped into + * by calling @alloc_fn and used as-is without being mapped into * vmalloc area. Allocations are always whole multiples of @atom_size * aligned to @atom_size. * @@ -3054,7 +3022,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size) * @dyn_size specifies the minimum dynamic area size. * * If the needed size is smaller than the minimum or specified unit - * size, the leftover is returned using pcpu_fc_free. + * size, the leftover is returned using @free_fn. * * RETURNS: * 0 on success, -errno on failure. @@ -3062,7 +3030,8 @@ static void __init pcpu_fc_free(void *ptr, size_t size) int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn, - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn) + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn) { void *base = (void *)ULONG_MAX; void **areas = NULL; @@ -3097,7 +3066,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, BUG_ON(cpu == NR_CPUS); /* allocate space for the whole group */ - ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn); + ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size); if (!ptr) { rc = -ENOMEM; goto out_free_areas; @@ -3136,12 +3105,12 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) { if (gi->cpu_map[i] == NR_CPUS) { /* unused unit, free whole */ - pcpu_fc_free(ptr, ai->unit_size); + free_fn(ptr, ai->unit_size); continue; } /* copy and return the unused part */ memcpy(ptr, __per_cpu_load, ai->static_size); - pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum); + free_fn(ptr + size_sum, ai->unit_size - size_sum); } } @@ -3160,90 +3129,23 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, out_free_areas: for (group = 0; group < ai->nr_groups; group++) if (areas[group]) - pcpu_fc_free(areas[group], + free_fn(areas[group], ai->groups[group].nr_units * ai->unit_size); out_free: pcpu_free_alloc_info(ai); if (areas) - memblock_free(areas, areas_size); + memblock_free_early(__pa(areas), areas_size); return rc; } #endif /* BUILD_EMBED_FIRST_CHUNK */ #ifdef BUILD_PAGE_FIRST_CHUNK -#include - -#ifndef P4D_TABLE_SIZE -#define P4D_TABLE_SIZE PAGE_SIZE -#endif - -#ifndef PUD_TABLE_SIZE -#define PUD_TABLE_SIZE PAGE_SIZE -#endif - -#ifndef PMD_TABLE_SIZE -#define PMD_TABLE_SIZE PAGE_SIZE -#endif - -#ifndef PTE_TABLE_SIZE -#define PTE_TABLE_SIZE PAGE_SIZE -#endif -void __init __weak pcpu_populate_pte(unsigned long addr) -{ - pgd_t *pgd = pgd_offset_k(addr); - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - if (pgd_none(*pgd)) { - p4d_t *new; - - new = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE); - if (!new) - goto err_alloc; - pgd_populate(&init_mm, pgd, new); - } - - p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) { - pud_t *new; - - new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); - if (!new) - goto err_alloc; - p4d_populate(&init_mm, p4d, new); - } - - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - pmd_t *new; - - new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); - if (!new) - goto err_alloc; - pud_populate(&init_mm, pud, new); - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - pte_t *new; - - new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); - if (!new) - goto err_alloc; - pmd_populate_kernel(&init_mm, pmd, new); - } - - return; - -err_alloc: - panic("%s: Failed to allocate memory\n", __func__); -} - /** * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages * @reserved_size: the size of reserved percpu area in bytes - * @cpu_to_nd_fn: callback to convert cpu to it's node, optional + * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE + * @free_fn: function to free percpu page, always called with PAGE_SIZE + * @populate_pte_fn: function to populate pte * * This is a helper to ease setting up page-remapped first percpu * chunk and can be called where pcpu_setup_first_chunk() is expected. @@ -3254,7 +3156,10 @@ void __init __weak pcpu_populate_pte(unsigned long addr) * RETURNS: * 0 on success, -errno on failure. */ -int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn) +int __init pcpu_page_first_chunk(size_t reserved_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct vm; struct pcpu_alloc_info *ai; @@ -3296,7 +3201,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t for (i = 0; i < unit_pages; i++) { void *ptr; - ptr = pcpu_fc_alloc(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn); + ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); if (!ptr) { pr_warn("failed to allocate %s page for cpu%u\n", psize_str, cpu); @@ -3318,7 +3223,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t (unsigned long)vm.addr + unit * ai->unit_size; for (i = 0; i < unit_pages; i++) - pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT)); + populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); /* pte already populated, the following shouldn't fail */ rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages], @@ -3348,10 +3253,10 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t enomem: while (--j >= 0) - pcpu_fc_free(page_address(pages[j]), PAGE_SIZE); + free_fn(page_address(pages[j]), PAGE_SIZE); rc = -ENOMEM; out_free_ar: - memblock_free(pages, pages_size); + memblock_free_early(__pa(pages), pages_size); pcpu_free_alloc_info(ai); return rc; } @@ -3373,6 +3278,17 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + return memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS)); +} + +static void __init pcpu_dfl_fc_free(void *ptr, size_t size) +{ + memblock_free_early(__pa(ptr), size); +} + void __init setup_per_cpu_areas(void) { unsigned long delta; @@ -3383,8 +3299,9 @@ void __init setup_per_cpu_areas(void) * Always reserve area for module percpu variables. That's * what the legacy allocator did. */ - rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE, - PAGE_SIZE, NULL, NULL); + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, + pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); if (rc < 0) panic("Failed to initialize percpu areas."); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 6523fda274..4e640baf97 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -10,7 +10,6 @@ #include #include #include -#include #include /* diff --git a/mm/readahead.c b/mm/readahead.c index cf0dcf89eb..41b75d76d3 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -196,9 +197,9 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, * Preallocate as many pages as we will need. */ for (i = 0; i < nr_to_read; i++) { - struct folio *folio = xa_load(&mapping->i_pages, index + i); + struct page *page = xa_load(&mapping->i_pages, index + i); - if (folio && !xa_is_value(folio)) { + if (page && !xa_is_value(page)) { /* * Page already present? Kick off the current batch * of contiguous pages before continuing with the @@ -212,21 +213,21 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, continue; } - folio = filemap_alloc_folio(gfp_mask, 0); - if (!folio) + page = __page_cache_alloc(gfp_mask); + if (!page) break; if (mapping->a_ops->readpages) { - folio->index = index + i; - list_add(&folio->lru, &page_pool); - } else if (filemap_add_folio(mapping, folio, index + i, + page->index = index + i; + list_add(&page->lru, &page_pool); + } else if (add_to_page_cache_lru(page, mapping, index + i, gfp_mask) < 0) { - folio_put(folio); + put_page(page); read_pages(ractl, &page_pool, true); i = ractl->_index + ractl->_nr_pages - index - 1; continue; } if (i == nr_to_read - lookahead_size) - folio_set_readahead(folio); + SetPageReadahead(page); ractl->_nr_pages++; } @@ -308,7 +309,7 @@ void force_page_cache_ra(struct readahead_control *ractl, * Set the initial window size, round to next power of 2 and square * for small size, x 4 for medium, and x 2 for large * for 128k (32 page) max ra - * 1-2 page = 16k, 3-4 page 32k, 5-8 page = 64k, > 8 page = 128k initial + * 1-8 page = 32k initial, > 8 page = 128k initial */ static unsigned long get_init_ra_size(unsigned long size, unsigned long max) { @@ -581,7 +582,7 @@ void page_cache_sync_ra(struct readahead_control *ractl, EXPORT_SYMBOL_GPL(page_cache_sync_ra); void page_cache_async_ra(struct readahead_control *ractl, - struct folio *folio, unsigned long req_count) + struct page *page, unsigned long req_count) { /* no read-ahead */ if (!ractl->ra->ra_pages) @@ -590,10 +591,10 @@ void page_cache_async_ra(struct readahead_control *ractl, /* * Same bit is used for PG_readahead and PG_reclaim. */ - if (folio_test_writeback(folio)) + if (PageWriteback(page)) return; - folio_clear_readahead(folio); + ClearPageReadahead(page); /* * Defer asynchronous read-ahead on IO congestion. diff --git a/mm/rmap.c b/mm/rmap.c index 6a1e8c7f62..6aebd17472 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -34,7 +34,7 @@ * mapping->private_lock (in __set_page_dirty_buffers) * lock_page_memcg move_lock (in __set_page_dirty_buffers) * i_pages lock (widely used) - * lruvec->lru_lock (in folio_lruvec_lock_irq) + * lruvec->lru_lock (in lock_page_lruvec_irq) * inode->i_lock (in set_page_dirty's __mark_inode_dirty) * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) * sb_lock (within inode_lock in fs/fs-writeback.c) @@ -621,20 +621,9 @@ void try_to_unmap_flush_dirty(void) try_to_unmap_flush(); } -/* - * Bits 0-14 of mm->tlb_flush_batched record pending generations. - * Bits 16-30 of mm->tlb_flush_batched bit record flushed generations. - */ -#define TLB_FLUSH_BATCH_FLUSHED_SHIFT 16 -#define TLB_FLUSH_BATCH_PENDING_MASK \ - ((1 << (TLB_FLUSH_BATCH_FLUSHED_SHIFT - 1)) - 1) -#define TLB_FLUSH_BATCH_PENDING_LARGE \ - (TLB_FLUSH_BATCH_PENDING_MASK / 2) - static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) { struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc; - int batch, nbatch; arch_tlbbatch_add_mm(&tlb_ubc->arch, mm); tlb_ubc->flush_required = true; @@ -644,22 +633,7 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) * before the PTE is cleared. */ barrier(); - batch = atomic_read(&mm->tlb_flush_batched); -retry: - if ((batch & TLB_FLUSH_BATCH_PENDING_MASK) > TLB_FLUSH_BATCH_PENDING_LARGE) { - /* - * Prevent `pending' from catching up with `flushed' because of - * overflow. Reset `pending' and `flushed' to be 1 and 0 if - * `pending' becomes large. - */ - nbatch = atomic_cmpxchg(&mm->tlb_flush_batched, batch, 1); - if (nbatch != batch) { - batch = nbatch; - goto retry; - } - } else { - atomic_inc(&mm->tlb_flush_batched); - } + mm->tlb_flush_batched = true; /* * If the PTE was dirty then it's best to assume it's writable. The @@ -706,18 +680,15 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) */ void flush_tlb_batched_pending(struct mm_struct *mm) { - int batch = atomic_read(&mm->tlb_flush_batched); - int pending = batch & TLB_FLUSH_BATCH_PENDING_MASK; - int flushed = batch >> TLB_FLUSH_BATCH_FLUSHED_SHIFT; - - if (pending != flushed) { + if (data_race(mm->tlb_flush_batched)) { flush_tlb_mm(mm); + /* - * If the new TLB flushing is pending during flushing, leave - * mm->tlb_flush_batched as is, to avoid losing flushing. + * Do not allow the compiler to re-order the clearing of + * tlb_flush_batched before the tlb is flushed. */ - atomic_cmpxchg(&mm->tlb_flush_batched, batch, - pending | (pending << TLB_FLUSH_BATCH_FLUSHED_SHIFT)); + barrier(); + mm->tlb_flush_batched = false; } } #else @@ -1010,7 +981,7 @@ static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg) return true; } -int folio_mkclean(struct folio *folio) +int page_mkclean(struct page *page) { int cleaned = 0; struct address_space *mapping; @@ -1020,20 +991,20 @@ int folio_mkclean(struct folio *folio) .invalid_vma = invalid_mkclean_vma, }; - BUG_ON(!folio_test_locked(folio)); + BUG_ON(!PageLocked(page)); - if (!folio_mapped(folio)) + if (!page_mapped(page)) return 0; - mapping = folio_mapping(folio); + mapping = page_mapping(page); if (!mapping) return 0; - rmap_walk(&folio->page, &rwc); + rmap_walk(page, &rwc); return cleaned; } -EXPORT_SYMBOL_GPL(folio_mkclean); +EXPORT_SYMBOL_GPL(page_mkclean); /** * page_move_anon_rmap - move a page to our anon_vma @@ -1836,7 +1807,6 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma, update_hiwater_rss(mm); if (is_zone_device_page(page)) { - unsigned long pfn = page_to_pfn(page); swp_entry_t entry; pte_t swp_pte; @@ -1845,11 +1815,8 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma, * pte. do_swap_page() will wait until the migration * pte is removed and then restart fault handling. */ - entry = pte_to_swp_entry(pteval); - if (is_writable_device_private_entry(entry)) - entry = make_writable_migration_entry(pfn); - else - entry = make_readable_migration_entry(pfn); + entry = make_readable_migration_entry( + page_to_pfn(page)); swp_pte = swp_entry_to_pte(entry); /* diff --git a/mm/shmem.c b/mm/shmem.c index a09b29ec2b..1609a8daba 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,7 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include #include #include @@ -698,6 +700,7 @@ static int shmem_add_to_page_cache(struct page *page, struct mm_struct *charge_mm) { XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page)); + unsigned long i = 0; unsigned long nr = compound_nr(page); int error; @@ -712,7 +715,7 @@ static int shmem_add_to_page_cache(struct page *page, page->index = index; if (!PageSwapCache(page)) { - error = mem_cgroup_charge(page_folio(page), charge_mm, gfp); + error = mem_cgroup_charge(page, charge_mm, gfp); if (error) { if (PageTransHuge(page)) { count_vm_event(THP_FILE_FALLBACK); @@ -724,18 +727,20 @@ static int shmem_add_to_page_cache(struct page *page, cgroup_throttle_swaprate(page, gfp); do { + void *entry; xas_lock_irq(&xas); - if (expected != xas_find_conflict(&xas)) { + entry = xas_find_conflict(&xas); + if (entry != expected) xas_set_err(&xas, -EEXIST); - goto unlock; - } - if (expected && xas_find_conflict(&xas)) { - xas_set_err(&xas, -EEXIST); - goto unlock; - } - xas_store(&xas, page); + xas_create_range(&xas); if (xas_error(&xas)) goto unlock; +next: + xas_store(&xas, page); + if (++i < nr) { + xas_next(&xas); + goto next; + } if (PageTransHuge(page)) { count_vm_event(THP_FILE_ALLOC); __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr); @@ -856,8 +861,9 @@ unsigned long shmem_swap_usage(struct vm_area_struct *vma) return swapped << PAGE_SHIFT; /* Here comes the more involved part */ - return shmem_partial_swap_usage(mapping, vma->vm_pgoff, - vma->vm_pgoff + vma_pages(vma)); + return shmem_partial_swap_usage(mapping, + linear_page_index(vma, vma->vm_start), + linear_page_index(vma, vma->vm_end)); } /* @@ -881,26 +887,30 @@ void shmem_unlock_mapping(struct address_space *mapping) } } -static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index) +/* + * Check whether a hole-punch or truncation needs to split a huge page, + * returning true if no split was required, or the split has been successful. + * + * Eviction (or truncation to 0 size) should never need to split a huge page; + * but in rare cases might do so, if shmem_undo_range() failed to trylock on + * head, and then succeeded to trylock on tail. + * + * A split can only succeed when there are no additional references on the + * huge page: so the split below relies upon find_get_entries() having stopped + * when it found a subpage of the huge page, without getting further references. + */ +static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end) { - struct folio *folio; - struct page *page; + if (!PageTransCompound(page)) + return true; - /* - * At first avoid shmem_getpage(,,,SGP_READ): that fails - * beyond i_size, and reports fallocated pages as holes. - */ - folio = __filemap_get_folio(inode->i_mapping, index, - FGP_ENTRY | FGP_LOCK, 0); - if (!xa_is_value(folio)) - return folio; - /* - * But read a page back from swap if any of it is within i_size - * (although in some cases this is just a waste of time). - */ - page = NULL; - shmem_getpage(inode, index, &page, SGP_READ); - return page ? page_folio(page) : NULL; + /* Just proceed to delete a huge page wholly within the range punched */ + if (PageHead(page) && + page->index >= start && page->index + HPAGE_PMD_NR <= end) + return true; + + /* Try to split huge page, so we can truly punch the hole or truncate */ + return split_huge_page(page) >= 0; } /* @@ -914,10 +924,10 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, struct shmem_inode_info *info = SHMEM_I(inode); pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; pgoff_t end = (lend + 1) >> PAGE_SHIFT; - struct folio_batch fbatch; + unsigned int partial_start = lstart & (PAGE_SIZE - 1); + unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1); + struct pagevec pvec; pgoff_t indices[PAGEVEC_SIZE]; - struct folio *folio; - bool same_folio; long nr_swaps_freed = 0; pgoff_t index; int i; @@ -928,64 +938,67 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (info->fallocend > start && info->fallocend <= end && !unfalloc) info->fallocend = start; - folio_batch_init(&fbatch); + pagevec_init(&pvec); index = start; while (index < end && find_lock_entries(mapping, index, end - 1, - &fbatch, indices)) { - for (i = 0; i < folio_batch_count(&fbatch); i++) { - folio = fbatch.folios[i]; + &pvec, indices)) { + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; index = indices[i]; - if (xa_is_value(folio)) { + if (xa_is_value(page)) { if (unfalloc) continue; nr_swaps_freed += !shmem_free_swap(mapping, - index, folio); + index, page); continue; } - index += folio_nr_pages(folio) - 1; + index += thp_nr_pages(page) - 1; - if (!unfalloc || !folio_test_uptodate(folio)) - truncate_inode_folio(mapping, folio); - folio_unlock(folio); + if (!unfalloc || !PageUptodate(page)) + truncate_inode_page(mapping, page); + unlock_page(page); } - folio_batch_remove_exceptionals(&fbatch); - folio_batch_release(&fbatch); + pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); cond_resched(); index++; } - same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); - folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT); - if (folio) { - same_folio = lend < folio_pos(folio) + folio_size(folio); - folio_mark_dirty(folio); - if (!truncate_inode_partial_folio(folio, lstart, lend)) { - start = folio->index + folio_nr_pages(folio); - if (same_folio) - end = folio->index; + if (partial_start) { + struct page *page = NULL; + shmem_getpage(inode, start - 1, &page, SGP_READ); + if (page) { + unsigned int top = PAGE_SIZE; + if (start > end) { + top = partial_end; + partial_end = 0; + } + zero_user_segment(page, partial_start, top); + set_page_dirty(page); + unlock_page(page); + put_page(page); } - folio_unlock(folio); - folio_put(folio); - folio = NULL; } - - if (!same_folio) - folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT); - if (folio) { - folio_mark_dirty(folio); - if (!truncate_inode_partial_folio(folio, lstart, lend)) - end = folio->index; - folio_unlock(folio); - folio_put(folio); + if (partial_end) { + struct page *page = NULL; + shmem_getpage(inode, end, &page, SGP_READ); + if (page) { + zero_user_segment(page, 0, partial_end); + set_page_dirty(page); + unlock_page(page); + put_page(page); + } } + if (start >= end) + return; index = start; while (index < end) { cond_resched(); - if (!find_get_entries(mapping, index, end - 1, &fbatch, + if (!find_get_entries(mapping, index, end - 1, &pvec, indices)) { /* If all gone or hole-punch or unfalloc, we're done */ if (index == start || end != -1) @@ -994,14 +1007,14 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, index = start; continue; } - for (i = 0; i < folio_batch_count(&fbatch); i++) { - folio = fbatch.folios[i]; + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; index = indices[i]; - if (xa_is_value(folio)) { + if (xa_is_value(page)) { if (unfalloc) continue; - if (shmem_free_swap(mapping, index, folio)) { + if (shmem_free_swap(mapping, index, page)) { /* Swap was replaced by page: retry */ index--; break; @@ -1010,24 +1023,32 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, continue; } - folio_lock(folio); + lock_page(page); - if (!unfalloc || !folio_test_uptodate(folio)) { - if (folio_mapping(folio) != mapping) { + if (!unfalloc || !PageUptodate(page)) { + if (page_mapping(page) != mapping) { /* Page was replaced by swap: retry */ - folio_unlock(folio); + unlock_page(page); index--; break; } - VM_BUG_ON_FOLIO(folio_test_writeback(folio), - folio); - truncate_inode_folio(mapping, folio); + VM_BUG_ON_PAGE(PageWriteback(page), page); + if (shmem_punch_compound(page, start, end)) + truncate_inode_page(mapping, page); + else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + /* Wipe the page and don't get stuck */ + clear_highpage(page); + flush_dcache_page(page); + set_page_dirty(page); + if (index < + round_up(start, HPAGE_PMD_NR)) + start = index + 1; + } } - index = folio->index + folio_nr_pages(folio) - 1; - folio_unlock(folio); + unlock_page(page); } - folio_batch_remove_exceptionals(&fbatch); - folio_batch_release(&fbatch); + pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); index++; } @@ -1151,7 +1172,7 @@ static void shmem_evict_inode(struct inode *inode) static int shmem_find_swap_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices, - unsigned int type) + unsigned int type, bool frontswap) { XA_STATE(xas, &mapping->i_pages, start); struct page *page; @@ -1172,6 +1193,9 @@ static int shmem_find_swap_entries(struct address_space *mapping, entry = radix_to_swp_entry(page); if (swp_type(entry) != type) continue; + if (frontswap && + !frontswap_test(swap_info[type], swp_offset(entry))) + continue; indices[ret] = xas.xa_index; entries[ret] = page; @@ -1224,20 +1248,26 @@ static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec, /* * If swap found in inode, free it and move page from swapcache to filecache. */ -static int shmem_unuse_inode(struct inode *inode, unsigned int type) +static int shmem_unuse_inode(struct inode *inode, unsigned int type, + bool frontswap, unsigned long *fs_pages_to_unuse) { struct address_space *mapping = inode->i_mapping; pgoff_t start = 0; struct pagevec pvec; pgoff_t indices[PAGEVEC_SIZE]; + bool frontswap_partial = (frontswap && *fs_pages_to_unuse > 0); int ret = 0; pagevec_init(&pvec); do { unsigned int nr_entries = PAGEVEC_SIZE; + if (frontswap_partial && *fs_pages_to_unuse < PAGEVEC_SIZE) + nr_entries = *fs_pages_to_unuse; + pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries, - pvec.pages, indices, type); + pvec.pages, indices, + type, frontswap); if (pvec.nr == 0) { ret = 0; break; @@ -1247,6 +1277,14 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type) if (ret < 0) break; + if (frontswap_partial) { + *fs_pages_to_unuse -= ret; + if (*fs_pages_to_unuse == 0) { + ret = FRONTSWAP_PAGES_UNUSED; + break; + } + } + start = indices[pvec.nr - 1]; } while (true); @@ -1258,7 +1296,8 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type) * device 'type' back into memory, so the swap device can be * unused. */ -int shmem_unuse(unsigned int type) +int shmem_unuse(unsigned int type, bool frontswap, + unsigned long *fs_pages_to_unuse) { struct shmem_inode_info *info, *next; int error = 0; @@ -1281,7 +1320,8 @@ int shmem_unuse(unsigned int type) atomic_inc(&info->stop_eviction); mutex_unlock(&shmem_swaplist_mutex); - error = shmem_unuse_inode(&info->vfs_inode, type); + error = shmem_unuse_inode(&info->vfs_inode, type, frontswap, + fs_pages_to_unuse); cond_resched(); mutex_lock(&shmem_swaplist_mutex); @@ -1526,7 +1566,8 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, return NULL; shmem_pseudo_vma_init(&pvma, info, hindex); - page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, true); + page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), + true); shmem_pseudo_vma_destroy(&pvma); if (page) prep_transhuge_page(page); @@ -1601,7 +1642,6 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { struct page *oldpage, *newpage; - struct folio *old, *new; struct address_space *swap_mapping; swp_entry_t entry; pgoff_t swap_index; @@ -1638,9 +1678,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, xa_lock_irq(&swap_mapping->i_pages); error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage); if (!error) { - old = page_folio(oldpage); - new = page_folio(newpage); - mem_cgroup_migrate(old, new); + mem_cgroup_migrate(oldpage, newpage); __inc_lruvec_page_state(newpage, NR_FILE_PAGES); __dec_lruvec_page_state(oldpage, NR_FILE_PAGES); } @@ -2269,7 +2307,6 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode INIT_LIST_HEAD(&info->swaplist); simple_xattrs_init(&info->xattrs); cache_no_acl(inode); - mapping_set_large_folios(inode->i_mapping); switch (mode & S_IFMT) { default: @@ -2392,6 +2429,7 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, shmem_recalc_inode(inode); spin_unlock_irq(&info->lock); + SetPageDirty(page); unlock_page(page); return 0; out_delete_from_cache: @@ -2423,7 +2461,6 @@ shmem_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; struct shmem_inode_info *info = SHMEM_I(inode); pgoff_t index = pos >> PAGE_SHIFT; - int ret = 0; /* i_rwsem is held by caller */ if (unlikely(info->seals & (F_SEAL_GROW | @@ -2434,19 +2471,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping, return -EPERM; } - ret = shmem_getpage(inode, index, pagep, SGP_WRITE); - - if (ret) - return ret; - - if (PageHWPoison(*pagep)) { - unlock_page(*pagep); - put_page(*pagep); - *pagep = NULL; - return -EIO; - } - - return 0; + return shmem_getpage(inode, index, pagep, SGP_WRITE); } static int @@ -2533,12 +2558,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (sgp == SGP_CACHE) set_page_dirty(page); unlock_page(page); - - if (PageHWPoison(page)) { - put_page(page); - error = -EIO; - break; - } } /* @@ -2931,6 +2950,28 @@ static int shmem_rmdir(struct inode *dir, struct dentry *dentry) return shmem_unlink(dir, dentry); } +static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) +{ + bool old_is_dir = d_is_dir(old_dentry); + bool new_is_dir = d_is_dir(new_dentry); + + if (old_dir != new_dir && old_is_dir != new_is_dir) { + if (old_is_dir) { + drop_nlink(old_dir); + inc_nlink(new_dir); + } else { + drop_nlink(new_dir); + inc_nlink(old_dir); + } + } + old_dir->i_ctime = old_dir->i_mtime = + new_dir->i_ctime = new_dir->i_mtime = + d_inode(old_dentry)->i_ctime = + d_inode(new_dentry)->i_ctime = current_time(old_dir); + + return 0; +} + static int shmem_whiteout(struct user_namespace *mnt_userns, struct inode *old_dir, struct dentry *old_dentry) { @@ -2976,7 +3017,7 @@ static int shmem_rename2(struct user_namespace *mnt_userns, return -EINVAL; if (flags & RENAME_EXCHANGE) - return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); + return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry); if (!simple_empty(new_dentry)) return -ENOTEMPTY; @@ -3078,8 +3119,7 @@ static const char *shmem_get_link(struct dentry *dentry, page = find_get_page(inode->i_mapping, 0); if (!page) return ERR_PTR(-ECHILD); - if (PageHWPoison(page) || - !PageUptodate(page)) { + if (!PageUptodate(page)) { put_page(page); return ERR_PTR(-ECHILD); } @@ -3087,13 +3127,6 @@ static const char *shmem_get_link(struct dentry *dentry, error = shmem_getpage(inode, 0, &page, SGP_READ); if (error) return ERR_PTR(error); - if (!page) - return ERR_PTR(-ECHILD); - if (PageHWPoison(page)) { - unlock_page(page); - put_page(page); - return ERR_PTR(-ECHILD); - } unlock_page(page); } set_delayed_call(done, shmem_put_link, page); @@ -3744,13 +3777,6 @@ static void shmem_destroy_inodecache(void) kmem_cache_destroy(shmem_inode_cachep); } -/* Keep the page in page cache instead of truncating it */ -static int shmem_error_remove_page(struct address_space *mapping, - struct page *page) -{ - return 0; -} - const struct address_space_operations shmem_aops = { .writepage = shmem_writepage, .set_page_dirty = __set_page_dirty_no_writeback, @@ -3761,7 +3787,7 @@ const struct address_space_operations shmem_aops = { #ifdef CONFIG_MIGRATION .migratepage = migrate_page, #endif - .error_remove_page = shmem_error_remove_page, + .error_remove_page = generic_error_remove_page, }; EXPORT_SYMBOL(shmem_aops); @@ -3871,7 +3897,7 @@ static struct file_system_type shmem_fs_type = { .parameters = shmem_fs_parameters, #endif .kill_sb = kill_litter_super, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_MOUNT | FS_THP_SUPPORT, }; int __init shmem_init(void) @@ -3995,7 +4021,8 @@ int __init shmem_init(void) return 0; } -int shmem_unuse(unsigned int type) +int shmem_unuse(unsigned int type, bool frontswap, + unsigned long *fs_pages_to_unuse) { return 0; } @@ -4168,14 +4195,9 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL, NULL, NULL); if (error) - return ERR_PTR(error); - - unlock_page(page); - if (PageHWPoison(page)) { - put_page(page); - return ERR_PTR(-EIO); - } - + page = ERR_PTR(error); + else + unlock_page(page); return page; #else /* diff --git a/mm/slab.c b/mm/slab.c index ddf5737c63..874b3f8fe8 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -218,7 +218,7 @@ static void cache_reap(struct work_struct *unused); static inline void fixup_objfreelist_debug(struct kmem_cache *cachep, void **list); static inline void fixup_slab_list(struct kmem_cache *cachep, - struct kmem_cache_node *n, struct slab *slab, + struct kmem_cache_node *n, struct page *page, void **list); static int slab_early_init = 1; @@ -372,10 +372,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) static int slab_max_order = SLAB_MAX_ORDER_LO; static bool slab_max_order_set __initdata; -static inline void *index_to_obj(struct kmem_cache *cache, - const struct slab *slab, unsigned int idx) +static inline void *index_to_obj(struct kmem_cache *cache, struct page *page, + unsigned int idx) { - return slab->s_mem + cache->size * idx; + return page->s_mem + cache->size * idx; } #define BOOT_CPUCACHE_ENTRIES 1 @@ -550,17 +550,17 @@ static struct array_cache *alloc_arraycache(int node, int entries, } static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep, - struct slab *slab, void *objp) + struct page *page, void *objp) { struct kmem_cache_node *n; - int slab_node; + int page_node; LIST_HEAD(list); - slab_node = slab_nid(slab); - n = get_node(cachep, slab_node); + page_node = page_to_nid(page); + n = get_node(cachep, page_node); spin_lock(&n->list_lock); - free_block(cachep, &objp, 1, slab_node, &list); + free_block(cachep, &objp, 1, page_node, &list); spin_unlock(&n->list_lock); slabs_destroy(cachep, &list); @@ -761,7 +761,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, } static int __cache_free_alien(struct kmem_cache *cachep, void *objp, - int node, int slab_node) + int node, int page_node) { struct kmem_cache_node *n; struct alien_cache *alien = NULL; @@ -770,21 +770,21 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp, n = get_node(cachep, node); STATS_INC_NODEFREES(cachep); - if (n->alien && n->alien[slab_node]) { - alien = n->alien[slab_node]; + if (n->alien && n->alien[page_node]) { + alien = n->alien[page_node]; ac = &alien->ac; spin_lock(&alien->lock); if (unlikely(ac->avail == ac->limit)) { STATS_INC_ACOVERFLOW(cachep); - __drain_alien_cache(cachep, ac, slab_node, &list); + __drain_alien_cache(cachep, ac, page_node, &list); } __free_one(ac, objp); spin_unlock(&alien->lock); slabs_destroy(cachep, &list); } else { - n = get_node(cachep, slab_node); + n = get_node(cachep, page_node); spin_lock(&n->list_lock); - free_block(cachep, &objp, 1, slab_node, &list); + free_block(cachep, &objp, 1, page_node, &list); spin_unlock(&n->list_lock); slabs_destroy(cachep, &list); } @@ -793,16 +793,16 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp, static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) { - int slab_node = slab_nid(virt_to_slab(objp)); + int page_node = page_to_nid(virt_to_page(objp)); int node = numa_mem_id(); /* * Make sure we are not freeing a object from another node to the array * cache on this cpu. */ - if (likely(node == slab_node)) + if (likely(node == page_node)) return 0; - return __cache_free_alien(cachep, objp, node, slab_node); + return __cache_free_alien(cachep, objp, node, page_node); } /* @@ -1367,60 +1367,57 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) * did not request dmaable memory, we might get it, but that * would be relatively rare and ignorable. */ -static struct slab *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, +static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) { - struct folio *folio; - struct slab *slab; + struct page *page; flags |= cachep->allocflags; - folio = (struct folio *) __alloc_pages_node(nodeid, flags, cachep->gfporder); - if (!folio) { + page = __alloc_pages_node(nodeid, flags, cachep->gfporder); + if (!page) { slab_out_of_memory(cachep, flags, nodeid); return NULL; } - slab = folio_slab(folio); - - account_slab(slab, cachep->gfporder, cachep, flags); - __folio_set_slab(folio); + account_slab_page(page, cachep->gfporder, cachep, flags); + __SetPageSlab(page); /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ - if (sk_memalloc_socks() && page_is_pfmemalloc(folio_page(folio, 0))) - slab_set_pfmemalloc(slab); + if (sk_memalloc_socks() && page_is_pfmemalloc(page)) + SetPageSlabPfmemalloc(page); - return slab; + return page; } /* * Interface to system's page release. */ -static void kmem_freepages(struct kmem_cache *cachep, struct slab *slab) +static void kmem_freepages(struct kmem_cache *cachep, struct page *page) { int order = cachep->gfporder; - struct folio *folio = slab_folio(slab); - BUG_ON(!folio_test_slab(folio)); - __slab_clear_pfmemalloc(slab); - __folio_clear_slab(folio); - page_mapcount_reset(folio_page(folio, 0)); - folio->mapping = NULL; + BUG_ON(!PageSlab(page)); + __ClearPageSlabPfmemalloc(page); + __ClearPageSlab(page); + page_mapcount_reset(page); + /* In union with page->mapping where page allocator expects NULL */ + page->slab_cache = NULL; if (current->reclaim_state) current->reclaim_state->reclaimed_slab += 1 << order; - unaccount_slab(slab, order, cachep); - __free_pages(folio_page(folio, 0), order); + unaccount_slab_page(page, order, cachep); + __free_pages(page, order); } static void kmem_rcu_free(struct rcu_head *head) { struct kmem_cache *cachep; - struct slab *slab; + struct page *page; - slab = container_of(head, struct slab, rcu_head); - cachep = slab->slab_cache; + page = container_of(head, struct page, rcu_head); + cachep = page->slab_cache; - kmem_freepages(cachep, slab); + kmem_freepages(cachep, page); } #if DEBUG @@ -1556,18 +1553,18 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) /* Print some data about the neighboring objects, if they * exist: */ - struct slab *slab = virt_to_slab(objp); + struct page *page = virt_to_head_page(objp); unsigned int objnr; - objnr = obj_to_index(cachep, slab, objp); + objnr = obj_to_index(cachep, page, objp); if (objnr) { - objp = index_to_obj(cachep, slab, objnr - 1); + objp = index_to_obj(cachep, page, objnr - 1); realobj = (char *)objp + obj_offset(cachep); pr_err("Prev obj: start=%px, len=%d\n", realobj, size); print_objinfo(cachep, objp, 2); } if (objnr + 1 < cachep->num) { - objp = index_to_obj(cachep, slab, objnr + 1); + objp = index_to_obj(cachep, page, objnr + 1); realobj = (char *)objp + obj_offset(cachep); pr_err("Next obj: start=%px, len=%d\n", realobj, size); print_objinfo(cachep, objp, 2); @@ -1578,17 +1575,17 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) #if DEBUG static void slab_destroy_debugcheck(struct kmem_cache *cachep, - struct slab *slab) + struct page *page) { int i; if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) { - poison_obj(cachep, slab->freelist - obj_offset(cachep), + poison_obj(cachep, page->freelist - obj_offset(cachep), POISON_FREE); } for (i = 0; i < cachep->num; i++) { - void *objp = index_to_obj(cachep, slab, i); + void *objp = index_to_obj(cachep, page, i); if (cachep->flags & SLAB_POISON) { check_poison_obj(cachep, objp); @@ -1604,7 +1601,7 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, } #else static void slab_destroy_debugcheck(struct kmem_cache *cachep, - struct slab *slab) + struct page *page) { } #endif @@ -1612,22 +1609,22 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, /** * slab_destroy - destroy and release all objects in a slab * @cachep: cache pointer being destroyed - * @slab: slab being destroyed + * @page: page pointer being destroyed * - * Destroy all the objs in a slab, and release the mem back to the system. - * Before calling the slab must have been unlinked from the cache. The + * Destroy all the objs in a slab page, and release the mem back to the system. + * Before calling the slab page must have been unlinked from the cache. The * kmem_cache_node ->list_lock is not held/needed. */ -static void slab_destroy(struct kmem_cache *cachep, struct slab *slab) +static void slab_destroy(struct kmem_cache *cachep, struct page *page) { void *freelist; - freelist = slab->freelist; - slab_destroy_debugcheck(cachep, slab); + freelist = page->freelist; + slab_destroy_debugcheck(cachep, page); if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU)) - call_rcu(&slab->rcu_head, kmem_rcu_free); + call_rcu(&page->rcu_head, kmem_rcu_free); else - kmem_freepages(cachep, slab); + kmem_freepages(cachep, page); /* * From now on, we don't use freelist @@ -1643,11 +1640,11 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slab) */ static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list) { - struct slab *slab, *n; + struct page *page, *n; - list_for_each_entry_safe(slab, n, list, slab_list) { - list_del(&slab->slab_list); - slab_destroy(cachep, slab); + list_for_each_entry_safe(page, n, list, slab_list) { + list_del(&page->slab_list); + slab_destroy(cachep, page); } } @@ -2197,7 +2194,7 @@ static int drain_freelist(struct kmem_cache *cache, { struct list_head *p; int nr_freed; - struct slab *slab; + struct page *page; nr_freed = 0; while (nr_freed < tofree && !list_empty(&n->slabs_free)) { @@ -2209,8 +2206,8 @@ static int drain_freelist(struct kmem_cache *cache, goto out; } - slab = list_entry(p, struct slab, slab_list); - list_del(&slab->slab_list); + page = list_entry(p, struct page, slab_list); + list_del(&page->slab_list); n->free_slabs--; n->total_slabs--; /* @@ -2219,7 +2216,7 @@ static int drain_freelist(struct kmem_cache *cache, */ n->free_objects -= cache->num; spin_unlock_irq(&n->list_lock); - slab_destroy(cache, slab); + slab_destroy(cache, page); nr_freed++; } out: @@ -2294,14 +2291,14 @@ void __kmem_cache_release(struct kmem_cache *cachep) * which are all initialized during kmem_cache_init(). */ static void *alloc_slabmgmt(struct kmem_cache *cachep, - struct slab *slab, int colour_off, + struct page *page, int colour_off, gfp_t local_flags, int nodeid) { void *freelist; - void *addr = slab_address(slab); + void *addr = page_address(page); - slab->s_mem = addr + colour_off; - slab->active = 0; + page->s_mem = addr + colour_off; + page->active = 0; if (OBJFREELIST_SLAB(cachep)) freelist = NULL; @@ -2318,24 +2315,24 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, return freelist; } -static inline freelist_idx_t get_free_obj(struct slab *slab, unsigned int idx) +static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx) { - return ((freelist_idx_t *) slab->freelist)[idx]; + return ((freelist_idx_t *)page->freelist)[idx]; } -static inline void set_free_obj(struct slab *slab, +static inline void set_free_obj(struct page *page, unsigned int idx, freelist_idx_t val) { - ((freelist_idx_t *)(slab->freelist))[idx] = val; + ((freelist_idx_t *)(page->freelist))[idx] = val; } -static void cache_init_objs_debug(struct kmem_cache *cachep, struct slab *slab) +static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page) { #if DEBUG int i; for (i = 0; i < cachep->num; i++) { - void *objp = index_to_obj(cachep, slab, i); + void *objp = index_to_obj(cachep, page, i); if (cachep->flags & SLAB_STORE_USER) *dbg_userword(cachep, objp) = NULL; @@ -2419,17 +2416,17 @@ static freelist_idx_t next_random_slot(union freelist_init_state *state) } /* Swap two freelist entries */ -static void swap_free_obj(struct slab *slab, unsigned int a, unsigned int b) +static void swap_free_obj(struct page *page, unsigned int a, unsigned int b) { - swap(((freelist_idx_t *) slab->freelist)[a], - ((freelist_idx_t *) slab->freelist)[b]); + swap(((freelist_idx_t *)page->freelist)[a], + ((freelist_idx_t *)page->freelist)[b]); } /* * Shuffle the freelist initialization state based on pre-computed lists. * return true if the list was successfully shuffled, false otherwise. */ -static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab) +static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page) { unsigned int objfreelist = 0, i, rand, count = cachep->num; union freelist_init_state state; @@ -2446,7 +2443,7 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab) objfreelist = count - 1; else objfreelist = next_random_slot(&state); - slab->freelist = index_to_obj(cachep, slab, objfreelist) + + page->freelist = index_to_obj(cachep, page, objfreelist) + obj_offset(cachep); count--; } @@ -2457,51 +2454,51 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab) */ if (!precomputed) { for (i = 0; i < count; i++) - set_free_obj(slab, i, i); + set_free_obj(page, i, i); /* Fisher-Yates shuffle */ for (i = count - 1; i > 0; i--) { rand = prandom_u32_state(&state.rnd_state); rand %= (i + 1); - swap_free_obj(slab, i, rand); + swap_free_obj(page, i, rand); } } else { for (i = 0; i < count; i++) - set_free_obj(slab, i, next_random_slot(&state)); + set_free_obj(page, i, next_random_slot(&state)); } if (OBJFREELIST_SLAB(cachep)) - set_free_obj(slab, cachep->num - 1, objfreelist); + set_free_obj(page, cachep->num - 1, objfreelist); return true; } #else static inline bool shuffle_freelist(struct kmem_cache *cachep, - struct slab *slab) + struct page *page) { return false; } #endif /* CONFIG_SLAB_FREELIST_RANDOM */ static void cache_init_objs(struct kmem_cache *cachep, - struct slab *slab) + struct page *page) { int i; void *objp; bool shuffled; - cache_init_objs_debug(cachep, slab); + cache_init_objs_debug(cachep, page); /* Try to randomize the freelist if enabled */ - shuffled = shuffle_freelist(cachep, slab); + shuffled = shuffle_freelist(cachep, page); if (!shuffled && OBJFREELIST_SLAB(cachep)) { - slab->freelist = index_to_obj(cachep, slab, cachep->num - 1) + + page->freelist = index_to_obj(cachep, page, cachep->num - 1) + obj_offset(cachep); } for (i = 0; i < cachep->num; i++) { - objp = index_to_obj(cachep, slab, i); + objp = index_to_obj(cachep, page, i); objp = kasan_init_slab_obj(cachep, objp); /* constructor could break poison info */ @@ -2512,56 +2509,68 @@ static void cache_init_objs(struct kmem_cache *cachep, } if (!shuffled) - set_free_obj(slab, i, i); + set_free_obj(page, i, i); } } -static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slab) +static void *slab_get_obj(struct kmem_cache *cachep, struct page *page) { void *objp; - objp = index_to_obj(cachep, slab, get_free_obj(slab, slab->active)); - slab->active++; + objp = index_to_obj(cachep, page, get_free_obj(page, page->active)); + page->active++; return objp; } static void slab_put_obj(struct kmem_cache *cachep, - struct slab *slab, void *objp) + struct page *page, void *objp) { - unsigned int objnr = obj_to_index(cachep, slab, objp); + unsigned int objnr = obj_to_index(cachep, page, objp); #if DEBUG unsigned int i; /* Verify double free bug */ - for (i = slab->active; i < cachep->num; i++) { - if (get_free_obj(slab, i) == objnr) { + for (i = page->active; i < cachep->num; i++) { + if (get_free_obj(page, i) == objnr) { pr_err("slab: double free detected in cache '%s', objp %px\n", cachep->name, objp); BUG(); } } #endif - slab->active--; - if (!slab->freelist) - slab->freelist = objp + obj_offset(cachep); + page->active--; + if (!page->freelist) + page->freelist = objp + obj_offset(cachep); - set_free_obj(slab, slab->active, objnr); + set_free_obj(page, page->active, objnr); +} + +/* + * Map pages beginning at addr to the given cache and slab. This is required + * for the slab allocator to be able to lookup the cache and slab of a + * virtual address for kfree, ksize, and slab debugging. + */ +static void slab_map_pages(struct kmem_cache *cache, struct page *page, + void *freelist) +{ + page->slab_cache = cache; + page->freelist = freelist; } /* * Grow (by 1) the number of slabs within a cache. This is called by * kmem_cache_alloc() when there are no active objs left in a cache. */ -static struct slab *cache_grow_begin(struct kmem_cache *cachep, +static struct page *cache_grow_begin(struct kmem_cache *cachep, gfp_t flags, int nodeid) { void *freelist; size_t offset; gfp_t local_flags; - int slab_node; + int page_node; struct kmem_cache_node *n; - struct slab *slab; + struct page *page; /* * Be lazy and only check for valid flags here, keeping it out of the @@ -2581,12 +2590,12 @@ static struct slab *cache_grow_begin(struct kmem_cache *cachep, * Get mem for the objs. Attempt to allocate a physical page from * 'nodeid'. */ - slab = kmem_getpages(cachep, local_flags, nodeid); - if (!slab) + page = kmem_getpages(cachep, local_flags, nodeid); + if (!page) goto failed; - slab_node = slab_nid(slab); - n = get_node(cachep, slab_node); + page_node = page_to_nid(page); + n = get_node(cachep, page_node); /* Get colour for the slab, and cal the next value. */ n->colour_next++; @@ -2604,55 +2613,54 @@ static struct slab *cache_grow_begin(struct kmem_cache *cachep, * page_address() in the latter returns a non-tagged pointer, * as it should be for slab pages. */ - kasan_poison_slab(slab); + kasan_poison_slab(page); /* Get slab management. */ - freelist = alloc_slabmgmt(cachep, slab, offset, - local_flags & ~GFP_CONSTRAINT_MASK, slab_node); + freelist = alloc_slabmgmt(cachep, page, offset, + local_flags & ~GFP_CONSTRAINT_MASK, page_node); if (OFF_SLAB(cachep) && !freelist) goto opps1; - slab->slab_cache = cachep; - slab->freelist = freelist; + slab_map_pages(cachep, page, freelist); - cache_init_objs(cachep, slab); + cache_init_objs(cachep, page); if (gfpflags_allow_blocking(local_flags)) local_irq_disable(); - return slab; + return page; opps1: - kmem_freepages(cachep, slab); + kmem_freepages(cachep, page); failed: if (gfpflags_allow_blocking(local_flags)) local_irq_disable(); return NULL; } -static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab) +static void cache_grow_end(struct kmem_cache *cachep, struct page *page) { struct kmem_cache_node *n; void *list = NULL; check_irq_off(); - if (!slab) + if (!page) return; - INIT_LIST_HEAD(&slab->slab_list); - n = get_node(cachep, slab_nid(slab)); + INIT_LIST_HEAD(&page->slab_list); + n = get_node(cachep, page_to_nid(page)); spin_lock(&n->list_lock); n->total_slabs++; - if (!slab->active) { - list_add_tail(&slab->slab_list, &n->slabs_free); + if (!page->active) { + list_add_tail(&page->slab_list, &n->slabs_free); n->free_slabs++; } else - fixup_slab_list(cachep, n, slab, &list); + fixup_slab_list(cachep, n, page, &list); STATS_INC_GROWN(cachep); - n->free_objects += cachep->num - slab->active; + n->free_objects += cachep->num - page->active; spin_unlock(&n->list_lock); fixup_objfreelist_debug(cachep, &list); @@ -2700,13 +2708,13 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, unsigned long caller) { unsigned int objnr; - struct slab *slab; + struct page *page; BUG_ON(virt_to_cache(objp) != cachep); objp -= obj_offset(cachep); kfree_debugcheck(objp); - slab = virt_to_slab(objp); + page = virt_to_head_page(objp); if (cachep->flags & SLAB_RED_ZONE) { verify_redzone_free(cachep, objp); @@ -2716,10 +2724,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, if (cachep->flags & SLAB_STORE_USER) *dbg_userword(cachep, objp) = (void *)caller; - objnr = obj_to_index(cachep, slab, objp); + objnr = obj_to_index(cachep, page, objp); BUG_ON(objnr >= cachep->num); - BUG_ON(objp != index_to_obj(cachep, slab, objnr)); + BUG_ON(objp != index_to_obj(cachep, page, objnr)); if (cachep->flags & SLAB_POISON) { poison_obj(cachep, objp, POISON_FREE); @@ -2749,97 +2757,97 @@ static inline void fixup_objfreelist_debug(struct kmem_cache *cachep, } static inline void fixup_slab_list(struct kmem_cache *cachep, - struct kmem_cache_node *n, struct slab *slab, + struct kmem_cache_node *n, struct page *page, void **list) { /* move slabp to correct slabp list: */ - list_del(&slab->slab_list); - if (slab->active == cachep->num) { - list_add(&slab->slab_list, &n->slabs_full); + list_del(&page->slab_list); + if (page->active == cachep->num) { + list_add(&page->slab_list, &n->slabs_full); if (OBJFREELIST_SLAB(cachep)) { #if DEBUG /* Poisoning will be done without holding the lock */ if (cachep->flags & SLAB_POISON) { - void **objp = slab->freelist; + void **objp = page->freelist; *objp = *list; *list = objp; } #endif - slab->freelist = NULL; + page->freelist = NULL; } } else - list_add(&slab->slab_list, &n->slabs_partial); + list_add(&page->slab_list, &n->slabs_partial); } /* Try to find non-pfmemalloc slab if needed */ -static noinline struct slab *get_valid_first_slab(struct kmem_cache_node *n, - struct slab *slab, bool pfmemalloc) +static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n, + struct page *page, bool pfmemalloc) { - if (!slab) + if (!page) return NULL; if (pfmemalloc) - return slab; + return page; - if (!slab_test_pfmemalloc(slab)) - return slab; + if (!PageSlabPfmemalloc(page)) + return page; /* No need to keep pfmemalloc slab if we have enough free objects */ if (n->free_objects > n->free_limit) { - slab_clear_pfmemalloc(slab); - return slab; + ClearPageSlabPfmemalloc(page); + return page; } /* Move pfmemalloc slab to the end of list to speed up next search */ - list_del(&slab->slab_list); - if (!slab->active) { - list_add_tail(&slab->slab_list, &n->slabs_free); + list_del(&page->slab_list); + if (!page->active) { + list_add_tail(&page->slab_list, &n->slabs_free); n->free_slabs++; } else - list_add_tail(&slab->slab_list, &n->slabs_partial); + list_add_tail(&page->slab_list, &n->slabs_partial); - list_for_each_entry(slab, &n->slabs_partial, slab_list) { - if (!slab_test_pfmemalloc(slab)) - return slab; + list_for_each_entry(page, &n->slabs_partial, slab_list) { + if (!PageSlabPfmemalloc(page)) + return page; } n->free_touched = 1; - list_for_each_entry(slab, &n->slabs_free, slab_list) { - if (!slab_test_pfmemalloc(slab)) { + list_for_each_entry(page, &n->slabs_free, slab_list) { + if (!PageSlabPfmemalloc(page)) { n->free_slabs--; - return slab; + return page; } } return NULL; } -static struct slab *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc) +static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc) { - struct slab *slab; + struct page *page; assert_spin_locked(&n->list_lock); - slab = list_first_entry_or_null(&n->slabs_partial, struct slab, + page = list_first_entry_or_null(&n->slabs_partial, struct page, slab_list); - if (!slab) { + if (!page) { n->free_touched = 1; - slab = list_first_entry_or_null(&n->slabs_free, struct slab, + page = list_first_entry_or_null(&n->slabs_free, struct page, slab_list); - if (slab) + if (page) n->free_slabs--; } if (sk_memalloc_socks()) - slab = get_valid_first_slab(n, slab, pfmemalloc); + page = get_valid_first_slab(n, page, pfmemalloc); - return slab; + return page; } static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep, struct kmem_cache_node *n, gfp_t flags) { - struct slab *slab; + struct page *page; void *obj; void *list = NULL; @@ -2847,16 +2855,16 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep, return NULL; spin_lock(&n->list_lock); - slab = get_first_slab(n, true); - if (!slab) { + page = get_first_slab(n, true); + if (!page) { spin_unlock(&n->list_lock); return NULL; } - obj = slab_get_obj(cachep, slab); + obj = slab_get_obj(cachep, page); n->free_objects--; - fixup_slab_list(cachep, n, slab, &list); + fixup_slab_list(cachep, n, page, &list); spin_unlock(&n->list_lock); fixup_objfreelist_debug(cachep, &list); @@ -2869,20 +2877,20 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep, * or cache_grow_end() for new slab */ static __always_inline int alloc_block(struct kmem_cache *cachep, - struct array_cache *ac, struct slab *slab, int batchcount) + struct array_cache *ac, struct page *page, int batchcount) { /* * There must be at least one object available for * allocation. */ - BUG_ON(slab->active >= cachep->num); + BUG_ON(page->active >= cachep->num); - while (slab->active < cachep->num && batchcount--) { + while (page->active < cachep->num && batchcount--) { STATS_INC_ALLOCED(cachep); STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - ac->entry[ac->avail++] = slab_get_obj(cachep, slab); + ac->entry[ac->avail++] = slab_get_obj(cachep, page); } return batchcount; @@ -2895,7 +2903,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) struct array_cache *ac, *shared; int node; void *list = NULL; - struct slab *slab; + struct page *page; check_irq_off(); node = numa_mem_id(); @@ -2928,14 +2936,14 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) while (batchcount > 0) { /* Get slab alloc is to come from. */ - slab = get_first_slab(n, false); - if (!slab) + page = get_first_slab(n, false); + if (!page) goto must_grow; check_spinlock_acquired(cachep); - batchcount = alloc_block(cachep, ac, slab, batchcount); - fixup_slab_list(cachep, n, slab, &list); + batchcount = alloc_block(cachep, ac, page, batchcount); + fixup_slab_list(cachep, n, page, &list); } must_grow: @@ -2954,16 +2962,16 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) return obj; } - slab = cache_grow_begin(cachep, gfp_exact_node(flags), node); + page = cache_grow_begin(cachep, gfp_exact_node(flags), node); /* * cache_grow_begin() can reenable interrupts, * then ac could change. */ ac = cpu_cache_get(cachep); - if (!ac->avail && slab) - alloc_block(cachep, ac, slab, batchcount); - cache_grow_end(cachep, slab); + if (!ac->avail && page) + alloc_block(cachep, ac, page, batchcount); + cache_grow_end(cachep, page); if (!ac->avail) return NULL; @@ -3093,7 +3101,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) struct zone *zone; enum zone_type highest_zoneidx = gfp_zone(flags); void *obj = NULL; - struct slab *slab; + struct page *page; int nid; unsigned int cpuset_mems_cookie; @@ -3129,10 +3137,10 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) * We may trigger various forms of reclaim on the allowed * set and go into memory reserves if necessary. */ - slab = cache_grow_begin(cache, flags, numa_mem_id()); - cache_grow_end(cache, slab); - if (slab) { - nid = slab_nid(slab); + page = cache_grow_begin(cache, flags, numa_mem_id()); + cache_grow_end(cache, page); + if (page) { + nid = page_to_nid(page); obj = ____cache_alloc_node(cache, gfp_exact_node(flags), nid); @@ -3156,7 +3164,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { - struct slab *slab; + struct page *page; struct kmem_cache_node *n; void *obj = NULL; void *list = NULL; @@ -3167,8 +3175,8 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, check_irq_off(); spin_lock(&n->list_lock); - slab = get_first_slab(n, false); - if (!slab) + page = get_first_slab(n, false); + if (!page) goto must_grow; check_spinlock_acquired_node(cachep, nodeid); @@ -3177,12 +3185,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - BUG_ON(slab->active == cachep->num); + BUG_ON(page->active == cachep->num); - obj = slab_get_obj(cachep, slab); + obj = slab_get_obj(cachep, page); n->free_objects--; - fixup_slab_list(cachep, n, slab, &list); + fixup_slab_list(cachep, n, page, &list); spin_unlock(&n->list_lock); fixup_objfreelist_debug(cachep, &list); @@ -3190,12 +3198,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, must_grow: spin_unlock(&n->list_lock); - slab = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); - if (slab) { + page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); + if (page) { /* This slab isn't counted yet so don't update free_objects */ - obj = slab_get_obj(cachep, slab); + obj = slab_get_obj(cachep, page); } - cache_grow_end(cachep, slab); + cache_grow_end(cachep, page); return obj ? obj : fallback_alloc(cachep, flags); } @@ -3325,40 +3333,40 @@ static void free_block(struct kmem_cache *cachep, void **objpp, { int i; struct kmem_cache_node *n = get_node(cachep, node); - struct slab *slab; + struct page *page; n->free_objects += nr_objects; for (i = 0; i < nr_objects; i++) { void *objp; - struct slab *slab; + struct page *page; objp = objpp[i]; - slab = virt_to_slab(objp); - list_del(&slab->slab_list); + page = virt_to_head_page(objp); + list_del(&page->slab_list); check_spinlock_acquired_node(cachep, node); - slab_put_obj(cachep, slab, objp); + slab_put_obj(cachep, page, objp); STATS_DEC_ACTIVE(cachep); /* fixup slab chains */ - if (slab->active == 0) { - list_add(&slab->slab_list, &n->slabs_free); + if (page->active == 0) { + list_add(&page->slab_list, &n->slabs_free); n->free_slabs++; } else { /* Unconditionally move a slab to the end of the * partial list on free - maximum time for the * other objects to be freed, too. */ - list_add_tail(&slab->slab_list, &n->slabs_partial); + list_add_tail(&page->slab_list, &n->slabs_partial); } } while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) { n->free_objects -= cachep->num; - slab = list_last_entry(&n->slabs_free, struct slab, slab_list); - list_move(&slab->slab_list, list); + page = list_last_entry(&n->slabs_free, struct page, slab_list); + list_move(&page->slab_list, list); n->free_slabs--; n->total_slabs--; } @@ -3394,10 +3402,10 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) #if STATS { int i = 0; - struct slab *slab; + struct page *page; - list_for_each_entry(slab, &n->slabs_free, slab_list) { - BUG_ON(slab->active); + list_for_each_entry(page, &n->slabs_free, slab_list) { + BUG_ON(page->active); i++; } @@ -3473,10 +3481,10 @@ void ___cache_free(struct kmem_cache *cachep, void *objp, } if (sk_memalloc_socks()) { - struct slab *slab = virt_to_slab(objp); + struct page *page = virt_to_head_page(objp); - if (unlikely(slab_test_pfmemalloc(slab))) { - cache_free_pfmemalloc(cachep, slab, objp); + if (unlikely(PageSlabPfmemalloc(page))) { + cache_free_pfmemalloc(cachep, page, objp); return; } } @@ -3649,21 +3657,21 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller); #endif /* CONFIG_NUMA */ #ifdef CONFIG_PRINTK -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page) { struct kmem_cache *cachep; unsigned int objnr; void *objp; kpp->kp_ptr = object; - kpp->kp_slab = slab; - cachep = slab->slab_cache; + kpp->kp_page = page; + cachep = page->slab_cache; kpp->kp_slab_cache = cachep; objp = object - obj_offset(cachep); kpp->kp_data_offset = obj_offset(cachep); - slab = virt_to_slab(objp); - objnr = obj_to_index(cachep, slab, objp); - objp = index_to_obj(cachep, slab, objnr); + page = virt_to_head_page(objp); + objnr = obj_to_index(cachep, page, objp); + objp = index_to_obj(cachep, page, objnr); kpp->kp_objp = objp; if (DEBUG && cachep->flags & SLAB_STORE_USER) kpp->kp_ret = *dbg_userword(cachep, objp); @@ -3725,13 +3733,14 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) if (!cachep) return; - trace_kmem_cache_free(_RET_IP_, objp, cachep->name); local_irq_save(flags); debug_check_no_locks_freed(objp, cachep->object_size); if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(objp, cachep->object_size); __cache_free(cachep, objp, _RET_IP_); local_irq_restore(flags); + + trace_kmem_cache_free(_RET_IP_, objp, cachep->name); } EXPORT_SYMBOL(kmem_cache_free); @@ -3891,6 +3900,8 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) if (err) goto end; + if (limit && shared && batchcount) + goto skip_setup; /* * The head array serves three purposes: * - create a LIFO ordering, i.e. return objects that are cache-warm @@ -3933,6 +3944,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) limit = 32; #endif batchcount = (limit + 1) / 2; +skip_setup: err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp); end: if (err) @@ -4169,8 +4181,8 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer, * Returns NULL if check passes, otherwise const char * to name of cache * to indicate an error. */ -void __check_heap_object(const void *ptr, unsigned long n, - const struct slab *slab, bool to_user) +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user) { struct kmem_cache *cachep; unsigned int objnr; @@ -4179,15 +4191,15 @@ void __check_heap_object(const void *ptr, unsigned long n, ptr = kasan_reset_tag(ptr); /* Find and validate object. */ - cachep = slab->slab_cache; - objnr = obj_to_index(cachep, slab, (void *)ptr); + cachep = page->slab_cache; + objnr = obj_to_index(cachep, page, (void *)ptr); BUG_ON(objnr >= cachep->num); /* Find offset within object. */ if (is_kfence_address(ptr)) offset = ptr - kfence_object_start(ptr); else - offset = ptr - index_to_obj(cachep, slab, objnr) - obj_offset(cachep); + offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); /* Allow address range falling entirely within usercopy region. */ if (offset >= cachep->useroffset && @@ -4195,6 +4207,19 @@ void __check_heap_object(const void *ptr, unsigned long n, n <= cachep->useroffset - offset + cachep->usersize) return; + /* + * If the copy is still within the allocated object, produce + * a warning instead of rejecting the copy. This is intended + * to be a temporary method to find any missing usercopy + * whitelists. + */ + if (usercopy_fallback && + offset <= cachep->object_size && + n <= cachep->object_size - offset) { + usercopy_warn("SLAB object", cachep->name, to_user, offset, n); + return; + } + usercopy_abort("SLAB object", cachep->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ diff --git a/mm/slab.h b/mm/slab.h index c7f2abc2b1..56ad7eea3d 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -5,191 +5,6 @@ * Internal slab definitions */ -/* Reuses the bits in struct page */ -struct slab { - unsigned long __page_flags; - -#if defined(CONFIG_SLAB) - - union { - struct list_head slab_list; - struct rcu_head rcu_head; - }; - struct kmem_cache *slab_cache; - void *freelist; /* array of free object indexes */ - void *s_mem; /* first object */ - unsigned int active; - -#elif defined(CONFIG_SLUB) - - union { - struct list_head slab_list; - struct rcu_head rcu_head; -#ifdef CONFIG_SLUB_CPU_PARTIAL - struct { - struct slab *next; - int slabs; /* Nr of slabs left */ - }; -#endif - }; - struct kmem_cache *slab_cache; - /* Double-word boundary */ - void *freelist; /* first free object */ - union { - unsigned long counters; - struct { - unsigned inuse:16; - unsigned objects:15; - unsigned frozen:1; - }; - }; - unsigned int __unused; - -#elif defined(CONFIG_SLOB) - - struct list_head slab_list; - void *__unused_1; - void *freelist; /* first free block */ - long units; - unsigned int __unused_2; - -#else -#error "Unexpected slab allocator configured" -#endif - - atomic_t __page_refcount; -#ifdef CONFIG_MEMCG - unsigned long memcg_data; -#endif -}; - -#define SLAB_MATCH(pg, sl) \ - static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) -SLAB_MATCH(flags, __page_flags); -SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */ -#ifndef CONFIG_SLOB -SLAB_MATCH(rcu_head, rcu_head); -#endif -SLAB_MATCH(_refcount, __page_refcount); -#ifdef CONFIG_MEMCG -SLAB_MATCH(memcg_data, memcg_data); -#endif -#undef SLAB_MATCH -static_assert(sizeof(struct slab) <= sizeof(struct page)); - -/** - * folio_slab - Converts from folio to slab. - * @folio: The folio. - * - * Currently struct slab is a different representation of a folio where - * folio_test_slab() is true. - * - * Return: The slab which contains this folio. - */ -#define folio_slab(folio) (_Generic((folio), \ - const struct folio *: (const struct slab *)(folio), \ - struct folio *: (struct slab *)(folio))) - -/** - * slab_folio - The folio allocated for a slab - * @slab: The slab. - * - * Slabs are allocated as folios that contain the individual objects and are - * using some fields in the first struct page of the folio - those fields are - * now accessed by struct slab. It is occasionally necessary to convert back to - * a folio in order to communicate with the rest of the mm. Please use this - * helper function instead of casting yourself, as the implementation may change - * in the future. - */ -#define slab_folio(s) (_Generic((s), \ - const struct slab *: (const struct folio *)s, \ - struct slab *: (struct folio *)s)) - -/** - * page_slab - Converts from first struct page to slab. - * @p: The first (either head of compound or single) page of slab. - * - * A temporary wrapper to convert struct page to struct slab in situations where - * we know the page is the compound head, or single order-0 page. - * - * Long-term ideally everything would work with struct slab directly or go - * through folio to struct slab. - * - * Return: The slab which contains this page - */ -#define page_slab(p) (_Generic((p), \ - const struct page *: (const struct slab *)(p), \ - struct page *: (struct slab *)(p))) - -/** - * slab_page - The first struct page allocated for a slab - * @slab: The slab. - * - * A convenience wrapper for converting slab to the first struct page of the - * underlying folio, to communicate with code not yet converted to folio or - * struct slab. - */ -#define slab_page(s) folio_page(slab_folio(s), 0) - -/* - * If network-based swap is enabled, sl*b must keep track of whether pages - * were allocated from pfmemalloc reserves. - */ -static inline bool slab_test_pfmemalloc(const struct slab *slab) -{ - return folio_test_active((struct folio *)slab_folio(slab)); -} - -static inline void slab_set_pfmemalloc(struct slab *slab) -{ - folio_set_active(slab_folio(slab)); -} - -static inline void slab_clear_pfmemalloc(struct slab *slab) -{ - folio_clear_active(slab_folio(slab)); -} - -static inline void __slab_clear_pfmemalloc(struct slab *slab) -{ - __folio_clear_active(slab_folio(slab)); -} - -static inline void *slab_address(const struct slab *slab) -{ - return folio_address(slab_folio(slab)); -} - -static inline int slab_nid(const struct slab *slab) -{ - return folio_nid(slab_folio(slab)); -} - -static inline pg_data_t *slab_pgdat(const struct slab *slab) -{ - return folio_pgdat(slab_folio(slab)); -} - -static inline struct slab *virt_to_slab(const void *addr) -{ - struct folio *folio = virt_to_folio(addr); - - if (!folio_test_slab(folio)) - return NULL; - - return folio_slab(folio); -} - -static inline int slab_order(const struct slab *slab) -{ - return folio_order((struct folio *)slab_folio(slab)); -} - -static inline size_t slab_size(const struct slab *slab) -{ - return PAGE_SIZE << slab_order(slab); -} - #ifdef CONFIG_SLOB /* * Common fields provided in kmem_cache by all slab allocators @@ -430,33 +245,15 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla } #ifdef CONFIG_MEMCG_KMEM -/* - * slab_objcgs - get the object cgroups vector associated with a slab - * @slab: a pointer to the slab struct - * - * Returns a pointer to the object cgroups vector associated with the slab, - * or NULL if no such vector has been associated yet. - */ -static inline struct obj_cgroup **slab_objcgs(struct slab *slab) -{ - unsigned long memcg_data = READ_ONCE(slab->memcg_data); - - VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), - slab_page(slab)); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab)); - - return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); -} - -int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, - gfp_t gfp, bool new_slab); +int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, + gfp_t gfp, bool new_page); void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr); -static inline void memcg_free_slab_cgroups(struct slab *slab) +static inline void memcg_free_page_obj_cgroups(struct page *page) { - kfree(slab_objcgs(slab)); - slab->memcg_data = 0; + kfree(page_objcgs(page)); + page->memcg_data = 0; } static inline size_t obj_full_size(struct kmem_cache *s) @@ -501,7 +298,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { - struct slab *slab; + struct page *page; unsigned long off; size_t i; @@ -510,19 +307,19 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, for (i = 0; i < size; i++) { if (likely(p[i])) { - slab = virt_to_slab(p[i]); + page = virt_to_head_page(p[i]); - if (!slab_objcgs(slab) && - memcg_alloc_slab_cgroups(slab, s, flags, + if (!page_objcgs(page) && + memcg_alloc_page_obj_cgroups(page, s, flags, false)) { obj_cgroup_uncharge(objcg, obj_full_size(s)); continue; } - off = obj_to_index(s, slab, p[i]); + off = obj_to_index(s, page, p[i]); obj_cgroup_get(objcg); - slab_objcgs(slab)[off] = objcg; - mod_objcg_state(objcg, slab_pgdat(slab), + page_objcgs(page)[off] = objcg; + mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s), obj_full_size(s)); } else { obj_cgroup_uncharge(objcg, obj_full_size(s)); @@ -537,7 +334,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, struct kmem_cache *s; struct obj_cgroup **objcgs; struct obj_cgroup *objcg; - struct slab *slab; + struct page *page; unsigned int off; int i; @@ -548,52 +345,43 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, if (unlikely(!p[i])) continue; - slab = virt_to_slab(p[i]); - /* we could be given a kmalloc_large() object, skip those */ - if (!slab) - continue; - - objcgs = slab_objcgs(slab); + page = virt_to_head_page(p[i]); + objcgs = page_objcgs_check(page); if (!objcgs) continue; if (!s_orig) - s = slab->slab_cache; + s = page->slab_cache; else s = s_orig; - off = obj_to_index(s, slab, p[i]); + off = obj_to_index(s, page, p[i]); objcg = objcgs[off]; if (!objcg) continue; objcgs[off] = NULL; obj_cgroup_uncharge(objcg, obj_full_size(s)); - mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), + mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s), -obj_full_size(s)); obj_cgroup_put(objcg); } } #else /* CONFIG_MEMCG_KMEM */ -static inline struct obj_cgroup **slab_objcgs(struct slab *slab) -{ - return NULL; -} - static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr) { return NULL; } -static inline int memcg_alloc_slab_cgroups(struct slab *slab, +static inline int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, gfp_t gfp, - bool new_slab) + bool new_page) { return 0; } -static inline void memcg_free_slab_cgroups(struct slab *slab) +static inline void memcg_free_page_obj_cgroups(struct page *page) { } @@ -617,35 +405,35 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, } #endif /* CONFIG_MEMCG_KMEM */ -#ifndef CONFIG_SLOB static inline struct kmem_cache *virt_to_cache(const void *obj) { - struct slab *slab; + struct page *page; - slab = virt_to_slab(obj); - if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n", + page = virt_to_head_page(obj); + if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n", __func__)) return NULL; - return slab->slab_cache; + return page->slab_cache; } -static __always_inline void account_slab(struct slab *slab, int order, - struct kmem_cache *s, gfp_t gfp) +static __always_inline void account_slab_page(struct page *page, int order, + struct kmem_cache *s, + gfp_t gfp) { if (memcg_kmem_enabled() && (s->flags & SLAB_ACCOUNT)) - memcg_alloc_slab_cgroups(slab, s, gfp, true); + memcg_alloc_page_obj_cgroups(page, s, gfp, true); - mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), PAGE_SIZE << order); } -static __always_inline void unaccount_slab(struct slab *slab, int order, - struct kmem_cache *s) +static __always_inline void unaccount_slab_page(struct page *page, int order, + struct kmem_cache *s) { if (memcg_kmem_enabled()) - memcg_free_slab_cgroups(slab); + memcg_free_page_obj_cgroups(page); - mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), -(PAGE_SIZE << order)); } @@ -664,7 +452,6 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) print_tracking(cachep, x); return cachep; } -#endif /* CONFIG_SLOB */ static inline size_t slab_ksize(const struct kmem_cache *s) { @@ -788,6 +575,11 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) #endif +void *slab_start(struct seq_file *m, loff_t *pos); +void *slab_next(struct seq_file *m, void *p, loff_t *pos); +void slab_stop(struct seq_file *m, void *p); +int memcg_slab_show(struct seq_file *m, void *p); + #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG) void dump_unreclaimable_slab(void); #else @@ -843,7 +635,7 @@ static inline void debugfs_slab_release(struct kmem_cache *s) { } #define KS_ADDRS_COUNT 16 struct kmem_obj_info { void *kp_ptr; - struct slab *kp_slab; + struct page *kp_page; void *kp_objp; unsigned long kp_data_offset; struct kmem_cache *kp_slab_cache; @@ -851,18 +643,7 @@ struct kmem_obj_info { void *kp_stack[KS_ADDRS_COUNT]; void *kp_free_stack[KS_ADDRS_COUNT]; }; -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); -#endif - -#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR -void __check_heap_object(const void *ptr, unsigned long n, - const struct slab *slab, bool to_user); -#else -static inline -void __check_heap_object(const void *ptr, unsigned long n, - const struct slab *slab, bool to_user) -{ -} +void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page); #endif #endif /* MM_SLAB_H */ diff --git a/mm/slab_common.c b/mm/slab_common.c index 23f2ab0713..ec2bb0beed 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -37,6 +37,14 @@ LIST_HEAD(slab_caches); DEFINE_MUTEX(slab_mutex); struct kmem_cache *kmem_cache; +#ifdef CONFIG_HARDENED_USERCOPY +bool usercopy_fallback __ro_after_init = + IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK); +module_param(usercopy_fallback, bool, 0400); +MODULE_PARM_DESC(usercopy_fallback, + "WARN instead of reject usercopy whitelist violations"); +#endif + static LIST_HEAD(slab_caches_to_rcu_destroy); static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work); static DECLARE_WORK(slab_caches_to_rcu_destroy_work, @@ -489,7 +497,9 @@ void slab_kmem_cache_release(struct kmem_cache *s) void kmem_cache_destroy(struct kmem_cache *s) { - if (unlikely(!s) || !kasan_check_byte(s)) + int err; + + if (unlikely(!s)) return; cpus_read_lock(); @@ -499,9 +509,12 @@ void kmem_cache_destroy(struct kmem_cache *s) if (s->refcount) goto out_unlock; - WARN(shutdown_cache(s), - "%s %s: Slab cache still has objects when called from %pS", - __func__, s->name, (void *)_RET_IP_); + err = shutdown_cache(s); + if (err) { + pr_err("%s %s: Slab cache still has objects\n", + __func__, s->name); + dump_stack(); + } out_unlock: mutex_unlock(&slab_mutex); cpus_read_unlock(); @@ -545,13 +558,13 @@ bool slab_is_available(void) */ bool kmem_valid_obj(void *object) { - struct folio *folio; + struct page *page; /* Some arches consider ZERO_SIZE_PTR to be a valid address. */ if (object < (void *)PAGE_SIZE || !virt_addr_valid(object)) return false; - folio = virt_to_folio(object); - return folio_test_slab(folio); + page = virt_to_head_page(object); + return PageSlab(page); } EXPORT_SYMBOL_GPL(kmem_valid_obj); @@ -574,18 +587,18 @@ void kmem_dump_obj(void *object) { char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc"; int i; - struct slab *slab; + struct page *page; unsigned long ptroffset; struct kmem_obj_info kp = { }; if (WARN_ON_ONCE(!virt_addr_valid(object))) return; - slab = virt_to_slab(object); - if (WARN_ON_ONCE(!slab)) { + page = virt_to_head_page(object); + if (WARN_ON_ONCE(!PageSlab(page))) { pr_cont(" non-slab memory.\n"); return; } - kmem_obj_info(&kp, object, slab); + kmem_obj_info(&kp, object, page); if (kp.kp_slab_cache) pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name); else @@ -819,7 +832,7 @@ void __init setup_kmalloc_cache_index_table(void) if (KMALLOC_MIN_SIZE >= 64) { /* - * The 96 byte sized cache is not used if the alignment + * The 96 byte size cache is not used if the alignment * is 64 byte. */ for (i = 64 + 8; i <= 96; i += 8) @@ -844,7 +857,7 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags) if (type == KMALLOC_RECLAIM) { flags |= SLAB_RECLAIM_ACCOUNT; } else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) { - if (mem_cgroup_kmem_disabled()) { + if (cgroup_memory_nokmem) { kmalloc_caches[type][idx] = kmalloc_caches[KMALLOC_NORMAL][idx]; return; } @@ -1039,18 +1052,18 @@ static void print_slabinfo_header(struct seq_file *m) seq_putc(m, '\n'); } -static void *slab_start(struct seq_file *m, loff_t *pos) +void *slab_start(struct seq_file *m, loff_t *pos) { mutex_lock(&slab_mutex); return seq_list_start(&slab_caches, *pos); } -static void *slab_next(struct seq_file *m, void *p, loff_t *pos) +void *slab_next(struct seq_file *m, void *p, loff_t *pos) { return seq_list_next(p, &slab_caches, pos); } -static void slab_stop(struct seq_file *m, void *p) +void slab_stop(struct seq_file *m, void *p) { mutex_unlock(&slab_mutex); } @@ -1118,6 +1131,17 @@ void dump_unreclaimable_slab(void) mutex_unlock(&slab_mutex); } +#if defined(CONFIG_MEMCG_KMEM) +int memcg_slab_show(struct seq_file *m, void *p) +{ + /* + * Deprecated. + * Please, take a look at tools/cgroup/slabinfo.py . + */ + return 0; +} +#endif + /* * slabinfo_op - iterator that generates /proc/slabinfo * diff --git a/mm/slob.c b/mm/slob.c index 60c5842215..74d3f6e606 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -30,7 +30,7 @@ * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls * alloc_pages() directly, allocating compound pages so the page order * does not have to be separately tracked. - * These objects are detected in kfree() because folio_test_slab() + * These objects are detected in kfree() because PageSlab() * is false for them. * * SLAB is emulated on top of SLOB by simply calling constructors and @@ -105,21 +105,21 @@ static LIST_HEAD(free_slob_large); /* * slob_page_free: true for pages on free_slob_pages list. */ -static inline int slob_page_free(struct slab *slab) +static inline int slob_page_free(struct page *sp) { - return PageSlobFree(slab_page(slab)); + return PageSlobFree(sp); } -static void set_slob_page_free(struct slab *slab, struct list_head *list) +static void set_slob_page_free(struct page *sp, struct list_head *list) { - list_add(&slab->slab_list, list); - __SetPageSlobFree(slab_page(slab)); + list_add(&sp->slab_list, list); + __SetPageSlobFree(sp); } -static inline void clear_slob_page_free(struct slab *slab) +static inline void clear_slob_page_free(struct page *sp) { - list_del(&slab->slab_list); - __ClearPageSlobFree(slab_page(slab)); + list_del(&sp->slab_list); + __ClearPageSlobFree(sp); } #define SLOB_UNIT sizeof(slob_t) @@ -234,7 +234,7 @@ static void slob_free_pages(void *b, int order) * freelist, in this case @page_removed_from_list will be set to * true (set to false otherwise). */ -static void *slob_page_alloc(struct slab *sp, size_t size, int align, +static void *slob_page_alloc(struct page *sp, size_t size, int align, int align_offset, bool *page_removed_from_list) { slob_t *prev, *cur, *aligned = NULL; @@ -301,8 +301,7 @@ static void *slob_page_alloc(struct slab *sp, size_t size, int align, static void *slob_alloc(size_t size, gfp_t gfp, int align, int node, int align_offset) { - struct folio *folio; - struct slab *sp; + struct page *sp; struct list_head *slob_list; slob_t *b = NULL; unsigned long flags; @@ -324,7 +323,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node, * If there's a node specification, search for a partial * page with a matching node id in the freelist. */ - if (node != NUMA_NO_NODE && slab_nid(sp) != node) + if (node != NUMA_NO_NODE && page_to_nid(sp) != node) continue; #endif /* Enough room on this page? */ @@ -359,9 +358,8 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node, b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node); if (!b) return NULL; - folio = virt_to_folio(b); - __folio_set_slab(folio); - sp = folio_slab(folio); + sp = virt_to_page(b); + __SetPageSlab(sp); spin_lock_irqsave(&slob_lock, flags); sp->units = SLOB_UNITS(PAGE_SIZE); @@ -383,7 +381,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node, */ static void slob_free(void *block, int size) { - struct slab *sp; + struct page *sp; slob_t *prev, *next, *b = (slob_t *)block; slobidx_t units; unsigned long flags; @@ -393,7 +391,7 @@ static void slob_free(void *block, int size) return; BUG_ON(!size); - sp = virt_to_slab(block); + sp = virt_to_page(block); units = SLOB_UNITS(size); spin_lock_irqsave(&slob_lock, flags); @@ -403,7 +401,8 @@ static void slob_free(void *block, int size) if (slob_page_free(sp)) clear_slob_page_free(sp); spin_unlock_irqrestore(&slob_lock, flags); - __folio_clear_slab(slab_folio(sp)); + __ClearPageSlab(sp); + page_mapcount_reset(sp); slob_free_pages(b, 0); return; } @@ -463,10 +462,10 @@ static void slob_free(void *block, int size) } #ifdef CONFIG_PRINTK -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page) { kpp->kp_ptr = object; - kpp->kp_slab = slab; + kpp->kp_page = page; } #endif @@ -545,7 +544,7 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller); void kfree(const void *block) { - struct folio *sp; + struct page *sp; trace_kfree(_RET_IP_, block); @@ -553,17 +552,16 @@ void kfree(const void *block) return; kmemleak_free(block); - sp = virt_to_folio(block); - if (folio_test_slab(sp)) { + sp = virt_to_page(block); + if (PageSlab(sp)) { int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); unsigned int *m = (unsigned int *)(block - align); slob_free(m, *m + align); } else { - unsigned int order = folio_order(sp); - - mod_node_page_state(folio_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B, + unsigned int order = compound_order(sp); + mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); - __free_pages(folio_page(sp, 0), order); + __free_pages(sp, order); } } @@ -572,7 +570,7 @@ EXPORT_SYMBOL(kfree); /* can't use ksize for kmem_cache_alloc memory, only kmalloc */ size_t __ksize(const void *block) { - struct folio *folio; + struct page *sp; int align; unsigned int *m; @@ -580,9 +578,9 @@ size_t __ksize(const void *block) if (unlikely(block == ZERO_SIZE_PTR)) return 0; - folio = virt_to_folio(block); - if (unlikely(!folio_test_slab(folio))) - return folio_size(folio); + sp = virt_to_page(block); + if (unlikely(!PageSlab(sp))) + return page_size(sp); align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); m = (unsigned int *)(block - align); @@ -668,7 +666,6 @@ static void kmem_rcu_free(struct rcu_head *head) void kmem_cache_free(struct kmem_cache *c, void *b) { kmemleak_free_recursive(b, c->flags); - trace_kmem_cache_free(_RET_IP_, b, c->name); if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) { struct slob_rcu *slob_rcu; slob_rcu = b + (c->size - sizeof(struct slob_rcu)); @@ -677,6 +674,8 @@ void kmem_cache_free(struct kmem_cache *c, void *b) } else { __kmem_cache_free(b, c->size); } + + trace_kmem_cache_free(_RET_IP_, b, c->name); } EXPORT_SYMBOL(kmem_cache_free); diff --git a/mm/slub.c b/mm/slub.c index 261474092e..ca6ba6bdf2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -48,7 +48,7 @@ * 1. slab_mutex (Global Mutex) * 2. node->list_lock (Spinlock) * 3. kmem_cache->cpu_slab->lock (Local lock) - * 4. slab_lock(slab) (Only on some arches or for debugging) + * 4. slab_lock(page) (Only on some arches or for debugging) * 5. object_map_lock (Only for debugging) * * slab_mutex @@ -64,19 +64,19 @@ * * The slab_lock is only used for debugging and on arches that do not * have the ability to do a cmpxchg_double. It only protects: - * A. slab->freelist -> List of free objects in a slab - * B. slab->inuse -> Number of objects in use - * C. slab->objects -> Number of objects in slab - * D. slab->frozen -> frozen state + * A. page->freelist -> List of object free in a page + * B. page->inuse -> Number of objects in use + * C. page->objects -> Number of objects in page + * D. page->frozen -> frozen state * * Frozen slabs * * If a slab is frozen then it is exempt from list management. It is not * on any list except per cpu partial list. The processor that froze the - * slab is the one who can perform list operations on the slab. Other + * slab is the one who can perform list operations on the page. Other * processors may put objects onto the freelist but the processor that * froze the slab is the only one that can retrieve the objects from the - * slab's freelist. + * page's freelist. * * list_lock * @@ -135,7 +135,7 @@ * minimal so we rely on the page allocators per cpu caches for * fast frees and allocs. * - * slab->frozen The slab is frozen and exempt from list processing. + * page->frozen The slab is frozen and exempt from list processing. * This means that the slab is dedicated to a purpose * such as satisfying allocations for a specific * processor. Objects may be freed in the slab while @@ -250,7 +250,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) #define OO_SHIFT 16 #define OO_MASK ((1 << OO_SHIFT) - 1) -#define MAX_OBJS_PER_PAGE 32767 /* since slab.objects is u15 */ +#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */ /* Internal SLUB flags */ /* Poison object */ @@ -354,7 +354,7 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object) static void prefetch_freepointer(const struct kmem_cache *s, void *object) { - prefetchw(object + s->offset); + prefetch(object + s->offset); } static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) @@ -414,58 +414,31 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x) return x.x & OO_MASK; } -#ifdef CONFIG_SLUB_CPU_PARTIAL -static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) -{ - unsigned int nr_slabs; - - s->cpu_partial = nr_objects; - - /* - * We take the number of objects but actually limit the number of - * slabs on the per cpu partial list, in order to limit excessive - * growth of the list. For simplicity we assume that the slabs will - * be half-full. - */ - nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo)); - s->cpu_partial_slabs = nr_slabs; -} -#else -static inline void -slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) -{ -} -#endif /* CONFIG_SLUB_CPU_PARTIAL */ - /* * Per slab locking using the pagelock */ -static __always_inline void __slab_lock(struct slab *slab) +static __always_inline void __slab_lock(struct page *page) { - struct page *page = slab_page(slab); - VM_BUG_ON_PAGE(PageTail(page), page); bit_spin_lock(PG_locked, &page->flags); } -static __always_inline void __slab_unlock(struct slab *slab) +static __always_inline void __slab_unlock(struct page *page) { - struct page *page = slab_page(slab); - VM_BUG_ON_PAGE(PageTail(page), page); __bit_spin_unlock(PG_locked, &page->flags); } -static __always_inline void slab_lock(struct slab *slab, unsigned long *flags) +static __always_inline void slab_lock(struct page *page, unsigned long *flags) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_save(*flags); - __slab_lock(slab); + __slab_lock(page); } -static __always_inline void slab_unlock(struct slab *slab, unsigned long *flags) +static __always_inline void slab_unlock(struct page *page, unsigned long *flags) { - __slab_unlock(slab); + __slab_unlock(page); if (IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_restore(*flags); } @@ -475,7 +448,7 @@ static __always_inline void slab_unlock(struct slab *slab, unsigned long *flags) * by an _irqsave() lock variant. Except on PREEMPT_RT where locks are different * so we disable interrupts as part of slab_[un]lock(). */ -static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, +static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, void *freelist_old, unsigned long counters_old, void *freelist_new, unsigned long counters_new, const char *n) @@ -485,7 +458,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) if (s->flags & __CMPXCHG_DOUBLE) { - if (cmpxchg_double(&slab->freelist, &slab->counters, + if (cmpxchg_double(&page->freelist, &page->counters, freelist_old, counters_old, freelist_new, counters_new)) return true; @@ -495,15 +468,15 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab /* init to 0 to prevent spurious warnings */ unsigned long flags = 0; - slab_lock(slab, &flags); - if (slab->freelist == freelist_old && - slab->counters == counters_old) { - slab->freelist = freelist_new; - slab->counters = counters_new; - slab_unlock(slab, &flags); + slab_lock(page, &flags); + if (page->freelist == freelist_old && + page->counters == counters_old) { + page->freelist = freelist_new; + page->counters = counters_new; + slab_unlock(page, &flags); return true; } - slab_unlock(slab, &flags); + slab_unlock(page, &flags); } cpu_relax(); @@ -516,7 +489,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab return false; } -static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, +static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, void *freelist_old, unsigned long counters_old, void *freelist_new, unsigned long counters_new, const char *n) @@ -524,7 +497,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) if (s->flags & __CMPXCHG_DOUBLE) { - if (cmpxchg_double(&slab->freelist, &slab->counters, + if (cmpxchg_double(&page->freelist, &page->counters, freelist_old, counters_old, freelist_new, counters_new)) return true; @@ -534,16 +507,16 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, unsigned long flags; local_irq_save(flags); - __slab_lock(slab); - if (slab->freelist == freelist_old && - slab->counters == counters_old) { - slab->freelist = freelist_new; - slab->counters = counters_new; - __slab_unlock(slab); + __slab_lock(page); + if (page->freelist == freelist_old && + page->counters == counters_old) { + page->freelist = freelist_new; + page->counters = counters_new; + __slab_unlock(page); local_irq_restore(flags); return true; } - __slab_unlock(slab); + __slab_unlock(page); local_irq_restore(flags); } @@ -562,14 +535,14 @@ static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; static DEFINE_RAW_SPINLOCK(object_map_lock); static void __fill_map(unsigned long *obj_map, struct kmem_cache *s, - struct slab *slab) + struct page *page) { - void *addr = slab_address(slab); + void *addr = page_address(page); void *p; - bitmap_zero(obj_map, slab->objects); + bitmap_zero(obj_map, page->objects); - for (p = slab->freelist; p; p = get_freepointer(s, p)) + for (p = page->freelist; p; p = get_freepointer(s, p)) set_bit(__obj_to_index(s, addr, p), obj_map); } @@ -594,19 +567,19 @@ static inline bool slab_add_kunit_errors(void) { return false; } #endif /* - * Determine a map of objects in use in a slab. + * Determine a map of object in use on a page. * - * Node listlock must be held to guarantee that the slab does + * Node listlock must be held to guarantee that the page does * not vanish from under us. */ -static unsigned long *get_map(struct kmem_cache *s, struct slab *slab) +static unsigned long *get_map(struct kmem_cache *s, struct page *page) __acquires(&object_map_lock) { VM_BUG_ON(!irqs_disabled()); raw_spin_lock(&object_map_lock); - __fill_map(object_map, s, slab); + __fill_map(object_map, s, page); return object_map; } @@ -667,17 +640,17 @@ static inline void metadata_access_disable(void) /* Verify that a pointer has an address that is valid within a slab page */ static inline int check_valid_pointer(struct kmem_cache *s, - struct slab *slab, void *object) + struct page *page, void *object) { void *base; if (!object) return 1; - base = slab_address(slab); + base = page_address(page); object = kasan_reset_tag(object); object = restore_red_left(s, object); - if (object < base || object >= base + slab->objects * s->size || + if (object < base || object >= base + page->objects * s->size || (object - base) % s->size) { return 0; } @@ -788,13 +761,12 @@ void print_tracking(struct kmem_cache *s, void *object) print_track("Freed", get_track(s, object, TRACK_FREE), pr_time); } -static void print_slab_info(const struct slab *slab) +static void print_page_info(struct page *page) { - struct folio *folio = (struct folio *)slab_folio(slab); + pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%#lx(%pGp)\n", + page, page->objects, page->inuse, page->freelist, + page->flags, &page->flags); - pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n", - slab, slab->objects, slab->inuse, slab->freelist, - folio_flags(folio, 0)); } static void slab_bug(struct kmem_cache *s, char *fmt, ...) @@ -827,14 +799,28 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...) va_end(args); } -static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p) +static bool freelist_corrupted(struct kmem_cache *s, struct page *page, + void **freelist, void *nextfree) +{ + if ((s->flags & SLAB_CONSISTENCY_CHECKS) && + !check_valid_pointer(s, page, nextfree) && freelist) { + object_err(s, page, *freelist, "Freechain corrupt"); + *freelist = NULL; + slab_fix(s, "Isolate corrupted freechain"); + return true; + } + + return false; +} + +static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) { unsigned int off; /* Offset of last byte */ - u8 *addr = slab_address(slab); + u8 *addr = page_address(page); print_tracking(s, p); - print_slab_info(slab); + print_page_info(page); pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n", p, p - addr, get_freepointer(s, p)); @@ -866,32 +852,18 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p) dump_stack(); } -static void object_err(struct kmem_cache *s, struct slab *slab, +void object_err(struct kmem_cache *s, struct page *page, u8 *object, char *reason) { if (slab_add_kunit_errors()) return; slab_bug(s, "%s", reason); - print_trailer(s, slab, object); + print_trailer(s, page, object); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } -static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, - void **freelist, void *nextfree) -{ - if ((s->flags & SLAB_CONSISTENCY_CHECKS) && - !check_valid_pointer(s, slab, nextfree) && freelist) { - object_err(s, slab, *freelist, "Freechain corrupt"); - *freelist = NULL; - slab_fix(s, "Isolate corrupted freechain"); - return true; - } - - return false; -} - -static __printf(3, 4) void slab_err(struct kmem_cache *s, struct slab *slab, +static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...) { va_list args; @@ -904,7 +876,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct slab *slab, vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); slab_bug(s, "%s", buf); - print_slab_info(slab); + print_page_info(page); dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } @@ -932,13 +904,13 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data, memset(from, data, to - from); } -static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab, +static int check_bytes_and_report(struct kmem_cache *s, struct page *page, u8 *object, char *what, u8 *start, unsigned int value, unsigned int bytes) { u8 *fault; u8 *end; - u8 *addr = slab_address(slab); + u8 *addr = page_address(page); metadata_access_enable(); fault = memchr_inv(kasan_reset_tag(start), value, bytes); @@ -957,7 +929,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab, pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n", fault, end - 1, fault - addr, fault[0], value); - print_trailer(s, slab, object); + print_trailer(s, page, object); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); skip_bug_print: @@ -1003,7 +975,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab, * may be used with merged slabcaches. */ -static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p) +static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) { unsigned long off = get_info_end(s); /* The end of info */ @@ -1016,12 +988,12 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p) if (size_from_object(s) == off) return 1; - return check_bytes_and_report(s, slab, p, "Object padding", + return check_bytes_and_report(s, page, p, "Object padding", p + off, POISON_INUSE, size_from_object(s) - off); } /* Check the pad bytes at the end of a slab page */ -static int slab_pad_check(struct kmem_cache *s, struct slab *slab) +static int slab_pad_check(struct kmem_cache *s, struct page *page) { u8 *start; u8 *fault; @@ -1033,8 +1005,8 @@ static int slab_pad_check(struct kmem_cache *s, struct slab *slab) if (!(s->flags & SLAB_POISON)) return 1; - start = slab_address(slab); - length = slab_size(slab); + start = page_address(page); + length = page_size(page); end = start + length; remainder = length % s->size; if (!remainder) @@ -1049,7 +1021,7 @@ static int slab_pad_check(struct kmem_cache *s, struct slab *slab) while (end > fault && end[-1] == POISON_INUSE) end--; - slab_err(s, slab, "Padding overwritten. 0x%p-0x%p @offset=%tu", + slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu", fault, end - 1, fault - start); print_section(KERN_ERR, "Padding ", pad, remainder); @@ -1057,23 +1029,23 @@ static int slab_pad_check(struct kmem_cache *s, struct slab *slab) return 0; } -static int check_object(struct kmem_cache *s, struct slab *slab, +static int check_object(struct kmem_cache *s, struct page *page, void *object, u8 val) { u8 *p = object; u8 *endobject = object + s->object_size; if (s->flags & SLAB_RED_ZONE) { - if (!check_bytes_and_report(s, slab, object, "Left Redzone", + if (!check_bytes_and_report(s, page, object, "Left Redzone", object - s->red_left_pad, val, s->red_left_pad)) return 0; - if (!check_bytes_and_report(s, slab, object, "Right Redzone", + if (!check_bytes_and_report(s, page, object, "Right Redzone", endobject, val, s->inuse - s->object_size)) return 0; } else { if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) { - check_bytes_and_report(s, slab, p, "Alignment padding", + check_bytes_and_report(s, page, p, "Alignment padding", endobject, POISON_INUSE, s->inuse - s->object_size); } @@ -1081,15 +1053,15 @@ static int check_object(struct kmem_cache *s, struct slab *slab, if (s->flags & SLAB_POISON) { if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && - (!check_bytes_and_report(s, slab, p, "Poison", p, + (!check_bytes_and_report(s, page, p, "Poison", p, POISON_FREE, s->object_size - 1) || - !check_bytes_and_report(s, slab, p, "End Poison", + !check_bytes_and_report(s, page, p, "End Poison", p + s->object_size - 1, POISON_END, 1))) return 0; /* * check_pad_bytes cleans up on its own. */ - check_pad_bytes(s, slab, p); + check_pad_bytes(s, page, p); } if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE) @@ -1100,8 +1072,8 @@ static int check_object(struct kmem_cache *s, struct slab *slab, return 1; /* Check free pointer validity */ - if (!check_valid_pointer(s, slab, get_freepointer(s, p))) { - object_err(s, slab, p, "Freepointer corrupt"); + if (!check_valid_pointer(s, page, get_freepointer(s, p))) { + object_err(s, page, p, "Freepointer corrupt"); /* * No choice but to zap it and thus lose the remainder * of the free objects in this slab. May cause @@ -1113,55 +1085,55 @@ static int check_object(struct kmem_cache *s, struct slab *slab, return 1; } -static int check_slab(struct kmem_cache *s, struct slab *slab) +static int check_slab(struct kmem_cache *s, struct page *page) { int maxobj; - if (!folio_test_slab(slab_folio(slab))) { - slab_err(s, slab, "Not a valid slab page"); + if (!PageSlab(page)) { + slab_err(s, page, "Not a valid slab page"); return 0; } - maxobj = order_objects(slab_order(slab), s->size); - if (slab->objects > maxobj) { - slab_err(s, slab, "objects %u > max %u", - slab->objects, maxobj); + maxobj = order_objects(compound_order(page), s->size); + if (page->objects > maxobj) { + slab_err(s, page, "objects %u > max %u", + page->objects, maxobj); return 0; } - if (slab->inuse > slab->objects) { - slab_err(s, slab, "inuse %u > max %u", - slab->inuse, slab->objects); + if (page->inuse > page->objects) { + slab_err(s, page, "inuse %u > max %u", + page->inuse, page->objects); return 0; } /* Slab_pad_check fixes things up after itself */ - slab_pad_check(s, slab); + slab_pad_check(s, page); return 1; } /* - * Determine if a certain object in a slab is on the freelist. Must hold the + * Determine if a certain object on a page is on the freelist. Must hold the * slab lock to guarantee that the chains are in a consistent state. */ -static int on_freelist(struct kmem_cache *s, struct slab *slab, void *search) +static int on_freelist(struct kmem_cache *s, struct page *page, void *search) { int nr = 0; void *fp; void *object = NULL; int max_objects; - fp = slab->freelist; - while (fp && nr <= slab->objects) { + fp = page->freelist; + while (fp && nr <= page->objects) { if (fp == search) return 1; - if (!check_valid_pointer(s, slab, fp)) { + if (!check_valid_pointer(s, page, fp)) { if (object) { - object_err(s, slab, object, + object_err(s, page, object, "Freechain corrupt"); set_freepointer(s, object, NULL); } else { - slab_err(s, slab, "Freepointer corrupt"); - slab->freelist = NULL; - slab->inuse = slab->objects; + slab_err(s, page, "Freepointer corrupt"); + page->freelist = NULL; + page->inuse = page->objects; slab_fix(s, "Freelist cleared"); return 0; } @@ -1172,34 +1144,34 @@ static int on_freelist(struct kmem_cache *s, struct slab *slab, void *search) nr++; } - max_objects = order_objects(slab_order(slab), s->size); + max_objects = order_objects(compound_order(page), s->size); if (max_objects > MAX_OBJS_PER_PAGE) max_objects = MAX_OBJS_PER_PAGE; - if (slab->objects != max_objects) { - slab_err(s, slab, "Wrong number of objects. Found %d but should be %d", - slab->objects, max_objects); - slab->objects = max_objects; + if (page->objects != max_objects) { + slab_err(s, page, "Wrong number of objects. Found %d but should be %d", + page->objects, max_objects); + page->objects = max_objects; slab_fix(s, "Number of objects adjusted"); } - if (slab->inuse != slab->objects - nr) { - slab_err(s, slab, "Wrong object count. Counter is %d but counted were %d", - slab->inuse, slab->objects - nr); - slab->inuse = slab->objects - nr; + if (page->inuse != page->objects - nr) { + slab_err(s, page, "Wrong object count. Counter is %d but counted were %d", + page->inuse, page->objects - nr); + page->inuse = page->objects - nr; slab_fix(s, "Object count adjusted"); } return search == NULL; } -static void trace(struct kmem_cache *s, struct slab *slab, void *object, +static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) { if (s->flags & SLAB_TRACE) { pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n", s->name, alloc ? "alloc" : "free", - object, slab->inuse, - slab->freelist); + object, page->inuse, + page->freelist); if (!alloc) print_section(KERN_INFO, "Object ", (void *)object, @@ -1213,22 +1185,22 @@ static void trace(struct kmem_cache *s, struct slab *slab, void *object, * Tracking of fully allocated slabs for debugging purposes. */ static void add_full(struct kmem_cache *s, - struct kmem_cache_node *n, struct slab *slab) + struct kmem_cache_node *n, struct page *page) { if (!(s->flags & SLAB_STORE_USER)) return; lockdep_assert_held(&n->list_lock); - list_add(&slab->slab_list, &n->full); + list_add(&page->slab_list, &n->full); } -static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct slab *slab) +static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) { if (!(s->flags & SLAB_STORE_USER)) return; lockdep_assert_held(&n->list_lock); - list_del(&slab->slab_list); + list_del(&page->slab_list); } /* Tracking of the number of slabs for debugging purposes */ @@ -1268,7 +1240,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) } /* Object debug checks for alloc/free paths */ -static void setup_object_debug(struct kmem_cache *s, struct slab *slab, +static void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) { if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)) @@ -1279,89 +1251,89 @@ static void setup_object_debug(struct kmem_cache *s, struct slab *slab, } static -void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) +void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) { if (!kmem_cache_debug_flags(s, SLAB_POISON)) return; metadata_access_enable(); - memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); + memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page)); metadata_access_disable(); } static inline int alloc_consistency_checks(struct kmem_cache *s, - struct slab *slab, void *object) + struct page *page, void *object) { - if (!check_slab(s, slab)) + if (!check_slab(s, page)) return 0; - if (!check_valid_pointer(s, slab, object)) { - object_err(s, slab, object, "Freelist Pointer check fails"); + if (!check_valid_pointer(s, page, object)) { + object_err(s, page, object, "Freelist Pointer check fails"); return 0; } - if (!check_object(s, slab, object, SLUB_RED_INACTIVE)) + if (!check_object(s, page, object, SLUB_RED_INACTIVE)) return 0; return 1; } static noinline int alloc_debug_processing(struct kmem_cache *s, - struct slab *slab, + struct page *page, void *object, unsigned long addr) { if (s->flags & SLAB_CONSISTENCY_CHECKS) { - if (!alloc_consistency_checks(s, slab, object)) + if (!alloc_consistency_checks(s, page, object)) goto bad; } /* Success perform special debug activities for allocs */ if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_ALLOC, addr); - trace(s, slab, object, 1); + trace(s, page, object, 1); init_object(s, object, SLUB_RED_ACTIVE); return 1; bad: - if (folio_test_slab(slab_folio(slab))) { + if (PageSlab(page)) { /* * If this is a slab page then lets do the best we can * to avoid issues in the future. Marking all objects * as used avoids touching the remaining objects. */ slab_fix(s, "Marking all objects used"); - slab->inuse = slab->objects; - slab->freelist = NULL; + page->inuse = page->objects; + page->freelist = NULL; } return 0; } static inline int free_consistency_checks(struct kmem_cache *s, - struct slab *slab, void *object, unsigned long addr) + struct page *page, void *object, unsigned long addr) { - if (!check_valid_pointer(s, slab, object)) { - slab_err(s, slab, "Invalid object pointer 0x%p", object); + if (!check_valid_pointer(s, page, object)) { + slab_err(s, page, "Invalid object pointer 0x%p", object); return 0; } - if (on_freelist(s, slab, object)) { - object_err(s, slab, object, "Object already free"); + if (on_freelist(s, page, object)) { + object_err(s, page, object, "Object already free"); return 0; } - if (!check_object(s, slab, object, SLUB_RED_ACTIVE)) + if (!check_object(s, page, object, SLUB_RED_ACTIVE)) return 0; - if (unlikely(s != slab->slab_cache)) { - if (!folio_test_slab(slab_folio(slab))) { - slab_err(s, slab, "Attempt to free object(0x%p) outside of slab", + if (unlikely(s != page->slab_cache)) { + if (!PageSlab(page)) { + slab_err(s, page, "Attempt to free object(0x%p) outside of slab", object); - } else if (!slab->slab_cache) { + } else if (!page->slab_cache) { pr_err("SLUB : no slab for object 0x%p.\n", object); dump_stack(); } else - object_err(s, slab, object, + object_err(s, page, object, "page slab pointer corrupt."); return 0; } @@ -1370,21 +1342,21 @@ static inline int free_consistency_checks(struct kmem_cache *s, /* Supports checking bulk free of a constructed freelist */ static noinline int free_debug_processing( - struct kmem_cache *s, struct slab *slab, + struct kmem_cache *s, struct page *page, void *head, void *tail, int bulk_cnt, unsigned long addr) { - struct kmem_cache_node *n = get_node(s, slab_nid(slab)); + struct kmem_cache_node *n = get_node(s, page_to_nid(page)); void *object = head; int cnt = 0; unsigned long flags, flags2; int ret = 0; spin_lock_irqsave(&n->list_lock, flags); - slab_lock(slab, &flags2); + slab_lock(page, &flags2); if (s->flags & SLAB_CONSISTENCY_CHECKS) { - if (!check_slab(s, slab)) + if (!check_slab(s, page)) goto out; } @@ -1392,13 +1364,13 @@ static noinline int free_debug_processing( cnt++; if (s->flags & SLAB_CONSISTENCY_CHECKS) { - if (!free_consistency_checks(s, slab, object, addr)) + if (!free_consistency_checks(s, page, object, addr)) goto out; } if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_FREE, addr); - trace(s, slab, object, 0); + trace(s, page, object, 0); /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ init_object(s, object, SLUB_RED_INACTIVE); @@ -1411,10 +1383,10 @@ static noinline int free_debug_processing( out: if (cnt != bulk_cnt) - slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n", + slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", bulk_cnt, cnt); - slab_unlock(slab, &flags2); + slab_unlock(page, &flags2); spin_unlock_irqrestore(&n->list_lock, flags); if (!ret) slab_fix(s, "Object at 0x%p not freed", object); @@ -1629,26 +1601,26 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, } #else /* !CONFIG_SLUB_DEBUG */ static inline void setup_object_debug(struct kmem_cache *s, - struct slab *slab, void *object) {} + struct page *page, void *object) {} static inline -void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {} +void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {} static inline int alloc_debug_processing(struct kmem_cache *s, - struct slab *slab, void *object, unsigned long addr) { return 0; } + struct page *page, void *object, unsigned long addr) { return 0; } static inline int free_debug_processing( - struct kmem_cache *s, struct slab *slab, + struct kmem_cache *s, struct page *page, void *head, void *tail, int bulk_cnt, unsigned long addr) { return 0; } -static inline int slab_pad_check(struct kmem_cache *s, struct slab *slab) +static inline int slab_pad_check(struct kmem_cache *s, struct page *page) { return 1; } -static inline int check_object(struct kmem_cache *s, struct slab *slab, +static inline int check_object(struct kmem_cache *s, struct page *page, void *object, u8 val) { return 1; } static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, - struct slab *slab) {} + struct page *page) {} static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, - struct slab *slab) {} + struct page *page) {} slab_flags_t kmem_cache_flags(unsigned int object_size, slab_flags_t flags, const char *name) { @@ -1667,7 +1639,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} -static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, +static bool freelist_corrupted(struct kmem_cache *s, struct page *page, void **freelist, void *nextfree) { return false; @@ -1772,10 +1744,10 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, return *head != NULL; } -static void *setup_object(struct kmem_cache *s, struct slab *slab, +static void *setup_object(struct kmem_cache *s, struct page *page, void *object) { - setup_object_debug(s, slab, object); + setup_object_debug(s, page, object); object = kasan_init_slab_obj(s, object); if (unlikely(s->ctor)) { kasan_unpoison_object_data(s, object); @@ -1788,27 +1760,18 @@ static void *setup_object(struct kmem_cache *s, struct slab *slab, /* * Slab allocation and freeing */ -static inline struct slab *alloc_slab_page(struct kmem_cache *s, +static inline struct page *alloc_slab_page(struct kmem_cache *s, gfp_t flags, int node, struct kmem_cache_order_objects oo) { - struct folio *folio; - struct slab *slab; + struct page *page; unsigned int order = oo_order(oo); if (node == NUMA_NO_NODE) - folio = (struct folio *)alloc_pages(flags, order); + page = alloc_pages(flags, order); else - folio = (struct folio *)__alloc_pages_node(node, flags, order); + page = __alloc_pages_node(node, flags, order); - if (!folio) - return NULL; - - slab = folio_slab(folio); - __folio_set_slab(folio); - if (page_is_pfmemalloc(folio_page(folio, 0))) - slab_set_pfmemalloc(slab); - - return slab; + return page; } #ifdef CONFIG_SLAB_FREELIST_RANDOM @@ -1853,7 +1816,7 @@ static void __init init_freelist_randomization(void) } /* Get the next entry on the pre-computed freelist randomized */ -static void *next_freelist_entry(struct kmem_cache *s, struct slab *slab, +static void *next_freelist_entry(struct kmem_cache *s, struct page *page, unsigned long *pos, void *start, unsigned long page_limit, unsigned long freelist_count) @@ -1875,32 +1838,32 @@ static void *next_freelist_entry(struct kmem_cache *s, struct slab *slab, } /* Shuffle the single linked freelist based on a random pre-computed sequence */ -static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) +static bool shuffle_freelist(struct kmem_cache *s, struct page *page) { void *start; void *cur; void *next; unsigned long idx, pos, page_limit, freelist_count; - if (slab->objects < 2 || !s->random_seq) + if (page->objects < 2 || !s->random_seq) return false; freelist_count = oo_objects(s->oo); pos = get_random_int() % freelist_count; - page_limit = slab->objects * s->size; - start = fixup_red_left(s, slab_address(slab)); + page_limit = page->objects * s->size; + start = fixup_red_left(s, page_address(page)); /* First entry is used as the base of the freelist */ - cur = next_freelist_entry(s, slab, &pos, start, page_limit, + cur = next_freelist_entry(s, page, &pos, start, page_limit, freelist_count); - cur = setup_object(s, slab, cur); - slab->freelist = cur; + cur = setup_object(s, page, cur); + page->freelist = cur; - for (idx = 1; idx < slab->objects; idx++) { - next = next_freelist_entry(s, slab, &pos, start, page_limit, + for (idx = 1; idx < page->objects; idx++) { + next = next_freelist_entry(s, page, &pos, start, page_limit, freelist_count); - next = setup_object(s, slab, next); + next = setup_object(s, page, next); set_freepointer(s, cur, next); cur = next; } @@ -1914,15 +1877,15 @@ static inline int init_cache_random_seq(struct kmem_cache *s) return 0; } static inline void init_freelist_randomization(void) { } -static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) +static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page) { return false; } #endif /* CONFIG_SLAB_FREELIST_RANDOM */ -static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) +static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { - struct slab *slab; + struct page *page; struct kmem_cache_order_objects oo = s->oo; gfp_t alloc_gfp; void *start, *p, *next; @@ -1941,60 +1904,63 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min)) alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL); - slab = alloc_slab_page(s, alloc_gfp, node, oo); - if (unlikely(!slab)) { + page = alloc_slab_page(s, alloc_gfp, node, oo); + if (unlikely(!page)) { oo = s->min; alloc_gfp = flags; /* * Allocation may have failed due to fragmentation. * Try a lower order alloc if possible */ - slab = alloc_slab_page(s, alloc_gfp, node, oo); - if (unlikely(!slab)) + page = alloc_slab_page(s, alloc_gfp, node, oo); + if (unlikely(!page)) goto out; stat(s, ORDER_FALLBACK); } - slab->objects = oo_objects(oo); + page->objects = oo_objects(oo); - account_slab(slab, oo_order(oo), s, flags); + account_slab_page(page, oo_order(oo), s, flags); - slab->slab_cache = s; + page->slab_cache = s; + __SetPageSlab(page); + if (page_is_pfmemalloc(page)) + SetPageSlabPfmemalloc(page); - kasan_poison_slab(slab); + kasan_poison_slab(page); - start = slab_address(slab); + start = page_address(page); - setup_slab_debug(s, slab, start); + setup_page_debug(s, page, start); - shuffle = shuffle_freelist(s, slab); + shuffle = shuffle_freelist(s, page); if (!shuffle) { start = fixup_red_left(s, start); - start = setup_object(s, slab, start); - slab->freelist = start; - for (idx = 0, p = start; idx < slab->objects - 1; idx++) { + start = setup_object(s, page, start); + page->freelist = start; + for (idx = 0, p = start; idx < page->objects - 1; idx++) { next = p + s->size; - next = setup_object(s, slab, next); + next = setup_object(s, page, next); set_freepointer(s, p, next); p = next; } set_freepointer(s, p, NULL); } - slab->inuse = slab->objects; - slab->frozen = 1; + page->inuse = page->objects; + page->frozen = 1; out: - if (!slab) + if (!page) return NULL; - inc_slabs_node(s, slab_nid(slab), slab->objects); + inc_slabs_node(s, page_to_nid(page), page->objects); - return slab; + return page; } -static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node) +static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) { if (unlikely(flags & GFP_SLAB_BUG_MASK)) flags = kmalloc_fix_flags(flags); @@ -2005,75 +1971,76 @@ static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node) flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); } -static void __free_slab(struct kmem_cache *s, struct slab *slab) +static void __free_slab(struct kmem_cache *s, struct page *page) { - struct folio *folio = slab_folio(slab); - int order = folio_order(folio); + int order = compound_order(page); int pages = 1 << order; if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { void *p; - slab_pad_check(s, slab); - for_each_object(p, s, slab_address(slab), slab->objects) - check_object(s, slab, p, SLUB_RED_INACTIVE); + slab_pad_check(s, page); + for_each_object(p, s, page_address(page), + page->objects) + check_object(s, page, p, SLUB_RED_INACTIVE); } - __slab_clear_pfmemalloc(slab); - __folio_clear_slab(folio); - folio->mapping = NULL; + __ClearPageSlabPfmemalloc(page); + __ClearPageSlab(page); + /* In union with page->mapping where page allocator expects NULL */ + page->slab_cache = NULL; if (current->reclaim_state) current->reclaim_state->reclaimed_slab += pages; - unaccount_slab(slab, order, s); - __free_pages(folio_page(folio, 0), order); + unaccount_slab_page(page, order, s); + __free_pages(page, order); } static void rcu_free_slab(struct rcu_head *h) { - struct slab *slab = container_of(h, struct slab, rcu_head); + struct page *page = container_of(h, struct page, rcu_head); - __free_slab(slab->slab_cache, slab); + __free_slab(page->slab_cache, page); } -static void free_slab(struct kmem_cache *s, struct slab *slab) +static void free_slab(struct kmem_cache *s, struct page *page) { if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { - call_rcu(&slab->rcu_head, rcu_free_slab); + call_rcu(&page->rcu_head, rcu_free_slab); } else - __free_slab(s, slab); + __free_slab(s, page); } -static void discard_slab(struct kmem_cache *s, struct slab *slab) +static void discard_slab(struct kmem_cache *s, struct page *page) { - dec_slabs_node(s, slab_nid(slab), slab->objects); - free_slab(s, slab); + dec_slabs_node(s, page_to_nid(page), page->objects); + free_slab(s, page); } /* * Management of partially allocated slabs. */ static inline void -__add_partial(struct kmem_cache_node *n, struct slab *slab, int tail) +__add_partial(struct kmem_cache_node *n, struct page *page, int tail) { n->nr_partial++; if (tail == DEACTIVATE_TO_TAIL) - list_add_tail(&slab->slab_list, &n->partial); + list_add_tail(&page->slab_list, &n->partial); else - list_add(&slab->slab_list, &n->partial); + list_add(&page->slab_list, &n->partial); } static inline void add_partial(struct kmem_cache_node *n, - struct slab *slab, int tail) + struct page *page, int tail) { lockdep_assert_held(&n->list_lock); - __add_partial(n, slab, tail); + __add_partial(n, page, tail); } static inline void remove_partial(struct kmem_cache_node *n, - struct slab *slab) + struct page *page) { lockdep_assert_held(&n->list_lock); - list_del(&slab->slab_list); + list_del(&page->slab_list); n->nr_partial--; } @@ -2084,12 +2051,12 @@ static inline void remove_partial(struct kmem_cache_node *n, * Returns a list of objects or NULL if it fails. */ static inline void *acquire_slab(struct kmem_cache *s, - struct kmem_cache_node *n, struct slab *slab, - int mode) + struct kmem_cache_node *n, struct page *page, + int mode, int *objects) { void *freelist; unsigned long counters; - struct slab new; + struct page new; lockdep_assert_held(&n->list_lock); @@ -2098,11 +2065,12 @@ static inline void *acquire_slab(struct kmem_cache *s, * The old freelist is the list of objects for the * per cpu allocation list. */ - freelist = slab->freelist; - counters = slab->counters; + freelist = page->freelist; + counters = page->counters; new.counters = counters; + *objects = new.objects - new.inuse; if (mode) { - new.inuse = slab->objects; + new.inuse = page->objects; new.freelist = NULL; } else { new.freelist = freelist; @@ -2111,35 +2079,36 @@ static inline void *acquire_slab(struct kmem_cache *s, VM_BUG_ON(new.frozen); new.frozen = 1; - if (!__cmpxchg_double_slab(s, slab, + if (!__cmpxchg_double_slab(s, page, freelist, counters, new.freelist, new.counters, "acquire_slab")) return NULL; - remove_partial(n, slab); + remove_partial(n, page); WARN_ON(!freelist); return freelist; } #ifdef CONFIG_SLUB_CPU_PARTIAL -static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain); +static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); #else -static inline void put_cpu_partial(struct kmem_cache *s, struct slab *slab, +static inline void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) { } #endif -static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags); +static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); /* * Try to allocate a partial slab from a specific node. */ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, - struct slab **ret_slab, gfp_t gfpflags) + struct page **ret_page, gfp_t gfpflags) { - struct slab *slab, *slab2; + struct page *page, *page2; void *object = NULL; + unsigned int available = 0; unsigned long flags; - unsigned int partial_slabs = 0; + int objects; /* * Racy check. If we mistakenly see no partial slabs then we @@ -2151,32 +2120,28 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, return NULL; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) { + list_for_each_entry_safe(page, page2, &n->partial, slab_list) { void *t; - if (!pfmemalloc_match(slab, gfpflags)) + if (!pfmemalloc_match(page, gfpflags)) continue; - t = acquire_slab(s, n, slab, object == NULL); + t = acquire_slab(s, n, page, object == NULL, &objects); if (!t) break; + available += objects; if (!object) { - *ret_slab = slab; + *ret_page = page; stat(s, ALLOC_FROM_PARTIAL); object = t; } else { - put_cpu_partial(s, slab, 0); + put_cpu_partial(s, page, 0); stat(s, CPU_PARTIAL_NODE); - partial_slabs++; } -#ifdef CONFIG_SLUB_CPU_PARTIAL if (!kmem_cache_has_cpu_partial(s) - || partial_slabs > s->cpu_partial_slabs / 2) + || available > slub_cpu_partial(s) / 2) break; -#else - break; -#endif } spin_unlock_irqrestore(&n->list_lock, flags); @@ -2184,10 +2149,10 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, } /* - * Get a slab from somewhere. Search in increasing NUMA distances. + * Get a page from somewhere. Search in increasing NUMA distances. */ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, - struct slab **ret_slab) + struct page **ret_page) { #ifdef CONFIG_NUMA struct zonelist *zonelist; @@ -2229,7 +2194,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, if (n && cpuset_zone_allowed(zone, flags) && n->nr_partial > s->min_partial) { - object = get_partial_node(s, n, ret_slab, flags); + object = get_partial_node(s, n, ret_page, flags); if (object) { /* * Don't check read_mems_allowed_retry() @@ -2248,10 +2213,10 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, } /* - * Get a partial slab, lock it and return it. + * Get a partial page, lock it and return it. */ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, - struct slab **ret_slab) + struct page **ret_page) { void *object; int searchnode = node; @@ -2259,11 +2224,11 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, if (node == NUMA_NO_NODE) searchnode = numa_mem_id(); - object = get_partial_node(s, get_node(s, searchnode), ret_slab, flags); + object = get_partial_node(s, get_node(s, searchnode), ret_page, flags); if (object || node != NUMA_NO_NODE) return object; - return get_any_partial(s, flags, ret_slab); + return get_any_partial(s, flags, ret_page); } #ifdef CONFIG_PREEMPTION @@ -2340,25 +2305,25 @@ static void init_kmem_cache_cpus(struct kmem_cache *s) } /* - * Finishes removing the cpu slab. Merges cpu's freelist with slab's freelist, + * Finishes removing the cpu slab. Merges cpu's freelist with page's freelist, * unfreezes the slabs and puts it on the proper list. * Assumes the slab has been already safely taken away from kmem_cache_cpu * by the caller. */ -static void deactivate_slab(struct kmem_cache *s, struct slab *slab, +static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist) { enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; - struct kmem_cache_node *n = get_node(s, slab_nid(slab)); + struct kmem_cache_node *n = get_node(s, page_to_nid(page)); int lock = 0, free_delta = 0; enum slab_modes l = M_NONE, m = M_NONE; void *nextfree, *freelist_iter, *freelist_tail; int tail = DEACTIVATE_TO_HEAD; unsigned long flags = 0; - struct slab new; - struct slab old; + struct page new; + struct page old; - if (slab->freelist) { + if (page->freelist) { stat(s, DEACTIVATE_REMOTE_FREES); tail = DEACTIVATE_TO_TAIL; } @@ -2377,7 +2342,7 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab, * 'freelist_iter' is already corrupted. So isolate all objects * starting at 'freelist_iter' by skipping them. */ - if (freelist_corrupted(s, slab, &freelist_iter, nextfree)) + if (freelist_corrupted(s, page, &freelist_iter, nextfree)) break; freelist_tail = freelist_iter; @@ -2387,25 +2352,25 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab, } /* - * Stage two: Unfreeze the slab while splicing the per-cpu - * freelist to the head of slab's freelist. + * Stage two: Unfreeze the page while splicing the per-cpu + * freelist to the head of page's freelist. * - * Ensure that the slab is unfrozen while the list presence + * Ensure that the page is unfrozen while the list presence * reflects the actual number of objects during unfreeze. * * We setup the list membership and then perform a cmpxchg - * with the count. If there is a mismatch then the slab - * is not unfrozen but the slab is on the wrong list. + * with the count. If there is a mismatch then the page + * is not unfrozen but the page is on the wrong list. * * Then we restart the process which may have to remove - * the slab from the list that we just put it on again + * the page from the list that we just put it on again * because the number of objects in the slab may have * changed. */ redo: - old.freelist = READ_ONCE(slab->freelist); - old.counters = READ_ONCE(slab->counters); + old.freelist = READ_ONCE(page->freelist); + old.counters = READ_ONCE(page->counters); VM_BUG_ON(!old.frozen); /* Determine target state of the slab */ @@ -2426,8 +2391,9 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab, if (!lock) { lock = 1; /* - * Taking the spinlock removes the possibility that - * acquire_slab() will see a slab that is frozen + * Taking the spinlock removes the possibility + * that acquire_slab() will see a slab page that + * is frozen */ spin_lock_irqsave(&n->list_lock, flags); } @@ -2446,18 +2412,18 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab, if (l != m) { if (l == M_PARTIAL) - remove_partial(n, slab); + remove_partial(n, page); else if (l == M_FULL) - remove_full(s, n, slab); + remove_full(s, n, page); if (m == M_PARTIAL) - add_partial(n, slab, tail); + add_partial(n, page, tail); else if (m == M_FULL) - add_full(s, n, slab); + add_full(s, n, page); } l = m; - if (!cmpxchg_double_slab(s, slab, + if (!cmpxchg_double_slab(s, page, old.freelist, old.counters, new.freelist, new.counters, "unfreezing slab")) @@ -2472,26 +2438,26 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab, stat(s, DEACTIVATE_FULL); else if (m == M_FREE) { stat(s, DEACTIVATE_EMPTY); - discard_slab(s, slab); + discard_slab(s, page); stat(s, FREE_SLAB); } } #ifdef CONFIG_SLUB_CPU_PARTIAL -static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab) +static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page) { struct kmem_cache_node *n = NULL, *n2 = NULL; - struct slab *slab, *slab_to_discard = NULL; + struct page *page, *discard_page = NULL; unsigned long flags = 0; - while (partial_slab) { - struct slab new; - struct slab old; + while (partial_page) { + struct page new; + struct page old; - slab = partial_slab; - partial_slab = slab->next; + page = partial_page; + partial_page = page->next; - n2 = get_node(s, slab_nid(slab)); + n2 = get_node(s, page_to_nid(page)); if (n != n2) { if (n) spin_unlock_irqrestore(&n->list_lock, flags); @@ -2502,8 +2468,8 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab) do { - old.freelist = slab->freelist; - old.counters = slab->counters; + old.freelist = page->freelist; + old.counters = page->counters; VM_BUG_ON(!old.frozen); new.counters = old.counters; @@ -2511,16 +2477,16 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab) new.frozen = 0; - } while (!__cmpxchg_double_slab(s, slab, + } while (!__cmpxchg_double_slab(s, page, old.freelist, old.counters, new.freelist, new.counters, "unfreezing slab")); if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) { - slab->next = slab_to_discard; - slab_to_discard = slab; + page->next = discard_page; + discard_page = page; } else { - add_partial(n, slab, DEACTIVATE_TO_TAIL); + add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } } @@ -2528,12 +2494,12 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab) if (n) spin_unlock_irqrestore(&n->list_lock, flags); - while (slab_to_discard) { - slab = slab_to_discard; - slab_to_discard = slab_to_discard->next; + while (discard_page) { + page = discard_page; + discard_page = discard_page->next; stat(s, DEACTIVATE_EMPTY); - discard_slab(s, slab); + discard_slab(s, page); stat(s, FREE_SLAB); } } @@ -2543,73 +2509,77 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab) */ static void unfreeze_partials(struct kmem_cache *s) { - struct slab *partial_slab; + struct page *partial_page; unsigned long flags; local_lock_irqsave(&s->cpu_slab->lock, flags); - partial_slab = this_cpu_read(s->cpu_slab->partial); + partial_page = this_cpu_read(s->cpu_slab->partial); this_cpu_write(s->cpu_slab->partial, NULL); local_unlock_irqrestore(&s->cpu_slab->lock, flags); - if (partial_slab) - __unfreeze_partials(s, partial_slab); + if (partial_page) + __unfreeze_partials(s, partial_page); } static void unfreeze_partials_cpu(struct kmem_cache *s, struct kmem_cache_cpu *c) { - struct slab *partial_slab; + struct page *partial_page; - partial_slab = slub_percpu_partial(c); + partial_page = slub_percpu_partial(c); c->partial = NULL; - if (partial_slab) - __unfreeze_partials(s, partial_slab); + if (partial_page) + __unfreeze_partials(s, partial_page); } /* - * Put a slab that was just frozen (in __slab_free|get_partial_node) into a - * partial slab slot if available. + * Put a page that was just frozen (in __slab_free|get_partial_node) into a + * partial page slot if available. * * If we did not find a slot then simply move all the partials to the * per node partial list. */ -static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain) +static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) { - struct slab *oldslab; - struct slab *slab_to_unfreeze = NULL; + struct page *oldpage; + struct page *page_to_unfreeze = NULL; unsigned long flags; - int slabs = 0; + int pages = 0; + int pobjects = 0; local_lock_irqsave(&s->cpu_slab->lock, flags); - oldslab = this_cpu_read(s->cpu_slab->partial); + oldpage = this_cpu_read(s->cpu_slab->partial); - if (oldslab) { - if (drain && oldslab->slabs >= s->cpu_partial_slabs) { + if (oldpage) { + if (drain && oldpage->pobjects > slub_cpu_partial(s)) { /* * Partial array is full. Move the existing set to the * per node partial list. Postpone the actual unfreezing * outside of the critical section. */ - slab_to_unfreeze = oldslab; - oldslab = NULL; + page_to_unfreeze = oldpage; + oldpage = NULL; } else { - slabs = oldslab->slabs; + pobjects = oldpage->pobjects; + pages = oldpage->pages; } } - slabs++; + pages++; + pobjects += page->objects - page->inuse; - slab->slabs = slabs; - slab->next = oldslab; + page->pages = pages; + page->pobjects = pobjects; + page->next = oldpage; - this_cpu_write(s->cpu_slab->partial, slab); + this_cpu_write(s->cpu_slab->partial, page); local_unlock_irqrestore(&s->cpu_slab->lock, flags); - if (slab_to_unfreeze) { - __unfreeze_partials(s, slab_to_unfreeze); + if (page_to_unfreeze) { + __unfreeze_partials(s, page_to_unfreeze); stat(s, CPU_PARTIAL_DRAIN); } } @@ -2625,22 +2595,22 @@ static inline void unfreeze_partials_cpu(struct kmem_cache *s, static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { unsigned long flags; - struct slab *slab; + struct page *page; void *freelist; local_lock_irqsave(&s->cpu_slab->lock, flags); - slab = c->slab; + page = c->page; freelist = c->freelist; - c->slab = NULL; + c->page = NULL; c->freelist = NULL; c->tid = next_tid(c->tid); local_unlock_irqrestore(&s->cpu_slab->lock, flags); - if (slab) { - deactivate_slab(s, slab, freelist); + if (page) { + deactivate_slab(s, page, freelist); stat(s, CPUSLAB_FLUSH); } } @@ -2649,14 +2619,14 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); void *freelist = c->freelist; - struct slab *slab = c->slab; + struct page *page = c->page; - c->slab = NULL; + c->page = NULL; c->freelist = NULL; c->tid = next_tid(c->tid); - if (slab) { - deactivate_slab(s, slab, freelist); + if (page) { + deactivate_slab(s, page, freelist); stat(s, CPUSLAB_FLUSH); } @@ -2685,7 +2655,7 @@ static void flush_cpu_slab(struct work_struct *w) s = sfw->s; c = this_cpu_ptr(s->cpu_slab); - if (c->slab) + if (c->page) flush_slab(s, c); unfreeze_partials(s); @@ -2695,7 +2665,7 @@ static bool has_cpu_slab(int cpu, struct kmem_cache *s) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); - return c->slab || slub_percpu_partial(c); + return c->page || slub_percpu_partial(c); } static DEFINE_MUTEX(flush_lock); @@ -2757,19 +2727,19 @@ static int slub_cpu_dead(unsigned int cpu) * Check if the objects in a per cpu structure fit numa * locality expectations. */ -static inline int node_match(struct slab *slab, int node) +static inline int node_match(struct page *page, int node) { #ifdef CONFIG_NUMA - if (node != NUMA_NO_NODE && slab_nid(slab) != node) + if (node != NUMA_NO_NODE && page_to_nid(page) != node) return 0; #endif return 1; } #ifdef CONFIG_SLUB_DEBUG -static int count_free(struct slab *slab) +static int count_free(struct page *page) { - return slab->objects - slab->inuse; + return page->objects - page->inuse; } static inline unsigned long node_nr_objs(struct kmem_cache_node *n) @@ -2780,15 +2750,15 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n) #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS) static unsigned long count_partial(struct kmem_cache_node *n, - int (*get_count)(struct slab *)) + int (*get_count)(struct page *)) { unsigned long flags; unsigned long x = 0; - struct slab *slab; + struct page *page; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(slab, &n->partial, slab_list) - x += get_count(slab); + list_for_each_entry(page, &n->partial, slab_list) + x += get_count(page); spin_unlock_irqrestore(&n->list_lock, flags); return x; } @@ -2831,41 +2801,54 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) #endif } -static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags) +static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags) { - if (unlikely(slab_test_pfmemalloc(slab))) + if (unlikely(PageSlabPfmemalloc(page))) return gfp_pfmemalloc_allowed(gfpflags); return true; } /* - * Check the slab->freelist and either transfer the freelist to the - * per cpu freelist or deactivate the slab. - * - * The slab is still frozen if the return value is not NULL. - * - * If this function returns NULL then the slab has been unfrozen. + * A variant of pfmemalloc_match() that tests page flags without asserting + * PageSlab. Intended for opportunistic checks before taking a lock and + * rechecking that nobody else freed the page under us. */ -static inline void *get_freelist(struct kmem_cache *s, struct slab *slab) +static inline bool pfmemalloc_match_unsafe(struct page *page, gfp_t gfpflags) { - struct slab new; + if (unlikely(__PageSlabPfmemalloc(page))) + return gfp_pfmemalloc_allowed(gfpflags); + + return true; +} + +/* + * Check the page->freelist of a page and either transfer the freelist to the + * per cpu freelist or deactivate the page. + * + * The page is still frozen if the return value is not NULL. + * + * If this function returns NULL then the page has been unfrozen. + */ +static inline void *get_freelist(struct kmem_cache *s, struct page *page) +{ + struct page new; unsigned long counters; void *freelist; lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock)); do { - freelist = slab->freelist; - counters = slab->counters; + freelist = page->freelist; + counters = page->counters; new.counters = counters; VM_BUG_ON(!new.frozen); - new.inuse = slab->objects; + new.inuse = page->objects; new.frozen = freelist != NULL; - } while (!__cmpxchg_double_slab(s, slab, + } while (!__cmpxchg_double_slab(s, page, freelist, counters, NULL, new.counters, "get_freelist")); @@ -2896,15 +2879,15 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c) { void *freelist; - struct slab *slab; + struct page *page; unsigned long flags; stat(s, ALLOC_SLOWPATH); -reread_slab: +reread_page: - slab = READ_ONCE(c->slab); - if (!slab) { + page = READ_ONCE(c->page); + if (!page) { /* * if the node is not online or has no normal memory, just * ignore the node constraint @@ -2916,7 +2899,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, } redo: - if (unlikely(!node_match(slab, node))) { + if (unlikely(!node_match(page, node))) { /* * same as above but node_match() being false already * implies node != NUMA_NO_NODE @@ -2935,23 +2918,23 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, * PFMEMALLOC but right now, we are losing the pfmemalloc * information when the page leaves the per-cpu allocator */ - if (unlikely(!pfmemalloc_match(slab, gfpflags))) + if (unlikely(!pfmemalloc_match_unsafe(page, gfpflags))) goto deactivate_slab; - /* must check again c->slab in case we got preempted and it changed */ + /* must check again c->page in case we got preempted and it changed */ local_lock_irqsave(&s->cpu_slab->lock, flags); - if (unlikely(slab != c->slab)) { + if (unlikely(page != c->page)) { local_unlock_irqrestore(&s->cpu_slab->lock, flags); - goto reread_slab; + goto reread_page; } freelist = c->freelist; if (freelist) goto load_freelist; - freelist = get_freelist(s, slab); + freelist = get_freelist(s, page); if (!freelist) { - c->slab = NULL; + c->page = NULL; local_unlock_irqrestore(&s->cpu_slab->lock, flags); stat(s, DEACTIVATE_BYPASS); goto new_slab; @@ -2965,10 +2948,10 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, /* * freelist is pointing to the list of objects to be used. - * slab is pointing to the slab from which the objects are obtained. - * That slab must be frozen for per cpu allocations to work. + * page is pointing to the page from which the objects are obtained. + * That page must be frozen for per cpu allocations to work. */ - VM_BUG_ON(!c->slab->frozen); + VM_BUG_ON(!c->page->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); local_unlock_irqrestore(&s->cpu_slab->lock, flags); @@ -2977,23 +2960,23 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, deactivate_slab: local_lock_irqsave(&s->cpu_slab->lock, flags); - if (slab != c->slab) { + if (page != c->page) { local_unlock_irqrestore(&s->cpu_slab->lock, flags); - goto reread_slab; + goto reread_page; } freelist = c->freelist; - c->slab = NULL; + c->page = NULL; c->freelist = NULL; local_unlock_irqrestore(&s->cpu_slab->lock, flags); - deactivate_slab(s, slab, freelist); + deactivate_slab(s, page, freelist); new_slab: if (slub_percpu_partial(c)) { local_lock_irqsave(&s->cpu_slab->lock, flags); - if (unlikely(c->slab)) { + if (unlikely(c->page)) { local_unlock_irqrestore(&s->cpu_slab->lock, flags); - goto reread_slab; + goto reread_page; } if (unlikely(!slub_percpu_partial(c))) { local_unlock_irqrestore(&s->cpu_slab->lock, flags); @@ -3001,8 +2984,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, goto new_objects; } - slab = c->slab = slub_percpu_partial(c); - slub_set_percpu_partial(c, slab); + page = c->page = slub_percpu_partial(c); + slub_set_percpu_partial(c, page); local_unlock_irqrestore(&s->cpu_slab->lock, flags); stat(s, CPU_PARTIAL_ALLOC); goto redo; @@ -3010,32 +2993,32 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, new_objects: - freelist = get_partial(s, gfpflags, node, &slab); + freelist = get_partial(s, gfpflags, node, &page); if (freelist) - goto check_new_slab; + goto check_new_page; slub_put_cpu_ptr(s->cpu_slab); - slab = new_slab(s, gfpflags, node); + page = new_slab(s, gfpflags, node); c = slub_get_cpu_ptr(s->cpu_slab); - if (unlikely(!slab)) { + if (unlikely(!page)) { slab_out_of_memory(s, gfpflags, node); return NULL; } /* - * No other reference to the slab yet so we can + * No other reference to the page yet so we can * muck around with it freely without cmpxchg */ - freelist = slab->freelist; - slab->freelist = NULL; + freelist = page->freelist; + page->freelist = NULL; stat(s, ALLOC_SLAB); -check_new_slab: +check_new_page: if (kmem_cache_debug(s)) { - if (!alloc_debug_processing(s, slab, freelist, addr)) { + if (!alloc_debug_processing(s, page, freelist, addr)) { /* Slab failed checks. Next slab needed */ goto new_slab; } else { @@ -3047,39 +3030,39 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, } } - if (unlikely(!pfmemalloc_match(slab, gfpflags))) + if (unlikely(!pfmemalloc_match(page, gfpflags))) /* * For !pfmemalloc_match() case we don't load freelist so that * we don't make further mismatched allocations easier. */ goto return_single; -retry_load_slab: +retry_load_page: local_lock_irqsave(&s->cpu_slab->lock, flags); - if (unlikely(c->slab)) { + if (unlikely(c->page)) { void *flush_freelist = c->freelist; - struct slab *flush_slab = c->slab; + struct page *flush_page = c->page; - c->slab = NULL; + c->page = NULL; c->freelist = NULL; c->tid = next_tid(c->tid); local_unlock_irqrestore(&s->cpu_slab->lock, flags); - deactivate_slab(s, flush_slab, flush_freelist); + deactivate_slab(s, flush_page, flush_freelist); stat(s, CPUSLAB_FLUSH); - goto retry_load_slab; + goto retry_load_page; } - c->slab = slab; + c->page = page; goto load_freelist; return_single: - deactivate_slab(s, slab, get_freepointer(s, freelist)); + deactivate_slab(s, page, get_freepointer(s, freelist)); return freelist; } @@ -3136,7 +3119,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, { void *object; struct kmem_cache_cpu *c; - struct slab *slab; + struct page *page; unsigned long tid; struct obj_cgroup *objcg = NULL; bool init = false; @@ -3168,9 +3151,9 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, /* * Irqless object alloc/free algorithm used here depends on sequence * of fetching cpu_slab's data. tid should be fetched before anything - * on c to guarantee that object and slab associated with previous tid + * on c to guarantee that object and page associated with previous tid * won't be used with current tid. If we fetch tid first, object and - * slab could be one associated with next tid and our alloc/free + * page could be one associated with next tid and our alloc/free * request will be failed. In this case, we will retry. So, no problem. */ barrier(); @@ -3183,7 +3166,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, */ object = c->freelist; - slab = c->slab; + page = c->page; /* * We cannot use the lockless fastpath on PREEMPT_RT because if a * slowpath has taken the local_lock_irqsave(), it is not protected @@ -3192,7 +3175,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, * there is a suitable cpu freelist. */ if (IS_ENABLED(CONFIG_PREEMPT_RT) || - unlikely(!object || !slab || !node_match(slab, node))) { + unlikely(!object || !page || !node_match(page, node))) { object = __slab_alloc(s, gfpflags, node, addr, c); } else { void *next_object = get_freepointer_safe(s, object); @@ -3294,17 +3277,17 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace); * have a longer lifetime than the cpu slabs in most processing loads. * * So we still attempt to reduce cache line usage. Just take the slab - * lock and free the item. If there is no additional partial slab + * lock and free the item. If there is no additional partial page * handling required then we can return immediately. */ -static void __slab_free(struct kmem_cache *s, struct slab *slab, +static void __slab_free(struct kmem_cache *s, struct page *page, void *head, void *tail, int cnt, unsigned long addr) { void *prior; int was_frozen; - struct slab new; + struct page new; unsigned long counters; struct kmem_cache_node *n = NULL; unsigned long flags; @@ -3315,7 +3298,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, return; if (kmem_cache_debug(s) && - !free_debug_processing(s, slab, head, tail, cnt, addr)) + !free_debug_processing(s, page, head, tail, cnt, addr)) return; do { @@ -3323,8 +3306,8 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, spin_unlock_irqrestore(&n->list_lock, flags); n = NULL; } - prior = slab->freelist; - counters = slab->counters; + prior = page->freelist; + counters = page->counters; set_freepointer(s, tail, prior); new.counters = counters; was_frozen = new.frozen; @@ -3343,7 +3326,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, } else { /* Needs to be taken off a list */ - n = get_node(s, slab_nid(slab)); + n = get_node(s, page_to_nid(page)); /* * Speculatively acquire the list_lock. * If the cmpxchg does not succeed then we may @@ -3357,7 +3340,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, } } - } while (!cmpxchg_double_slab(s, slab, + } while (!cmpxchg_double_slab(s, page, prior, counters, head, new.counters, "__slab_free")); @@ -3372,10 +3355,10 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, stat(s, FREE_FROZEN); } else if (new.frozen) { /* - * If we just froze the slab then put it onto the + * If we just froze the page then put it onto the * per cpu partial list. */ - put_cpu_partial(s, slab, 1); + put_cpu_partial(s, page, 1); stat(s, CPU_PARTIAL_FREE); } @@ -3390,8 +3373,8 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, * then add it. */ if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) { - remove_full(s, n, slab); - add_partial(n, slab, DEACTIVATE_TO_TAIL); + remove_full(s, n, page); + add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } spin_unlock_irqrestore(&n->list_lock, flags); @@ -3402,16 +3385,16 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, /* * Slab on the partial list. */ - remove_partial(n, slab); + remove_partial(n, page); stat(s, FREE_REMOVE_PARTIAL); } else { /* Slab must be on the full list */ - remove_full(s, n, slab); + remove_full(s, n, page); } spin_unlock_irqrestore(&n->list_lock, flags); stat(s, FREE_SLAB); - discard_slab(s, slab); + discard_slab(s, page); } /* @@ -3426,11 +3409,11 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, * with all sorts of special processing. * * Bulk free of a freelist with several objects (all pointing to the - * same slab) possible by specifying head and tail ptr, plus objects + * same page) possible by specifying head and tail ptr, plus objects * count (cnt). Bulk free indicated by tail pointer being set. */ static __always_inline void do_slab_free(struct kmem_cache *s, - struct slab *slab, void *head, void *tail, + struct page *page, void *head, void *tail, int cnt, unsigned long addr) { void *tail_obj = tail ? : head; @@ -3453,7 +3436,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s, /* Same with comment on barrier() in slab_alloc_node() */ barrier(); - if (likely(slab == c->slab)) { + if (likely(page == c->page)) { #ifndef CONFIG_PREEMPT_RT void **freelist = READ_ONCE(c->freelist); @@ -3479,7 +3462,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s, local_lock(&s->cpu_slab->lock); c = this_cpu_ptr(s->cpu_slab); - if (unlikely(slab != c->slab)) { + if (unlikely(page != c->page)) { local_unlock(&s->cpu_slab->lock); goto redo; } @@ -3494,11 +3477,11 @@ static __always_inline void do_slab_free(struct kmem_cache *s, #endif stat(s, FREE_FASTPATH); } else - __slab_free(s, slab, head, tail_obj, cnt, addr); + __slab_free(s, page, head, tail_obj, cnt, addr); } -static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab, +static __always_inline void slab_free(struct kmem_cache *s, struct page *page, void *head, void *tail, int cnt, unsigned long addr) { @@ -3507,13 +3490,13 @@ static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab, * to remove objects, whose reuse must be delayed. */ if (slab_free_freelist_hook(s, &head, &tail, &cnt)) - do_slab_free(s, slab, head, tail, cnt, addr); + do_slab_free(s, page, head, tail, cnt, addr); } #ifdef CONFIG_KASAN_GENERIC void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr) { - do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr); + do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr); } #endif @@ -3522,37 +3505,34 @@ void kmem_cache_free(struct kmem_cache *s, void *x) s = cache_from_obj(s, x); if (!s) return; + slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_); trace_kmem_cache_free(_RET_IP_, x, s->name); - slab_free(s, virt_to_slab(x), x, NULL, 1, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_free); struct detached_freelist { - struct slab *slab; + struct page *page; void *tail; void *freelist; int cnt; struct kmem_cache *s; }; -static inline void free_large_kmalloc(struct folio *folio, void *object) +static inline void free_nonslab_page(struct page *page, void *object) { - unsigned int order = folio_order(folio); - - if (WARN_ON_ONCE(order == 0)) - pr_warn_once("object pointer: 0x%p\n", object); + unsigned int order = compound_order(page); + VM_BUG_ON_PAGE(!PageCompound(page), page); kfree_hook(object); - mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B, - -(PAGE_SIZE << order)); - __free_pages(folio_page(folio, 0), order); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); + __free_pages(page, order); } /* * This function progressively scans the array with free objects (with * a limited look ahead) and extract objects belonging to the same - * slab. It builds a detached freelist directly within the given - * slab/objects. This can happen without any need for + * page. It builds a detached freelist directly within the given + * page/objects. This can happen without any need for * synchronization, because the objects are owned by running process. * The freelist is build up as a single linked list in the objects. * The idea is, that this detached freelist can then be bulk @@ -3567,11 +3547,10 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, size_t first_skipped_index = 0; int lookahead = 3; void *object; - struct folio *folio; - struct slab *slab; + struct page *page; /* Always re-init detached_freelist */ - df->slab = NULL; + df->page = NULL; do { object = p[--size]; @@ -3581,19 +3560,17 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, if (!object) return 0; - folio = virt_to_folio(object); + page = virt_to_head_page(object); if (!s) { /* Handle kalloc'ed objects */ - if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(folio, object); + if (unlikely(!PageSlab(page))) { + free_nonslab_page(page, object); p[size] = NULL; /* mark object processed */ return size; } /* Derive kmem_cache from object */ - slab = folio_slab(folio); - df->s = slab->slab_cache; + df->s = page->slab_cache; } else { - slab = folio_slab(folio); df->s = cache_from_obj(s, object); /* Support for memcg */ } @@ -3605,7 +3582,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, } /* Start new detached freelist */ - df->slab = slab; + df->page = page; set_freepointer(df->s, object, NULL); df->tail = object; df->freelist = object; @@ -3617,8 +3594,8 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, if (!object) continue; /* Skip processed objects */ - /* df->slab is always set at this point */ - if (df->slab == virt_to_slab(object)) { + /* df->page is always set at this point */ + if (df->page == virt_to_head_page(object)) { /* Opportunity build freelist */ set_freepointer(df->s, object, df->freelist); df->freelist = object; @@ -3650,10 +3627,10 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) struct detached_freelist df; size = build_detached_freelist(s, size, p, &df); - if (!df.slab) + if (!df.page) continue; - slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt, _RET_IP_); + slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); } while (likely(size)); } EXPORT_SYMBOL(kmem_cache_free_bulk); @@ -3787,7 +3764,7 @@ static unsigned int slub_min_objects; * requested a higher minimum order then we start with that one instead of * the smallest order which will fit the object. */ -static inline unsigned int calc_slab_order(unsigned int size, +static inline unsigned int slab_order(unsigned int size, unsigned int min_objects, unsigned int max_order, unsigned int fract_leftover) { @@ -3851,7 +3828,7 @@ static inline int calculate_order(unsigned int size) fraction = 16; while (fraction >= 4) { - order = calc_slab_order(size, min_objects, + order = slab_order(size, min_objects, slub_max_order, fraction); if (order <= slub_max_order) return order; @@ -3864,14 +3841,14 @@ static inline int calculate_order(unsigned int size) * We were unable to place multiple objects in a slab. Now * lets see if we can place a single object there. */ - order = calc_slab_order(size, 1, slub_max_order, 1); + order = slab_order(size, 1, slub_max_order, 1); if (order <= slub_max_order) return order; /* * Doh this slab cannot be placed using slub_max_order. */ - order = calc_slab_order(size, 1, MAX_ORDER, 1); + order = slab_order(size, 1, MAX_ORDER, 1); if (order < MAX_ORDER) return order; return -ENOSYS; @@ -3923,38 +3900,38 @@ static struct kmem_cache *kmem_cache_node; */ static void early_kmem_cache_node_alloc(int node) { - struct slab *slab; + struct page *page; struct kmem_cache_node *n; BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); - slab = new_slab(kmem_cache_node, GFP_NOWAIT, node); + page = new_slab(kmem_cache_node, GFP_NOWAIT, node); - BUG_ON(!slab); - if (slab_nid(slab) != node) { + BUG_ON(!page); + if (page_to_nid(page) != node) { pr_err("SLUB: Unable to allocate memory from node %d\n", node); pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n"); } - n = slab->freelist; + n = page->freelist; BUG_ON(!n); #ifdef CONFIG_SLUB_DEBUG init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); init_tracking(kmem_cache_node, n); #endif n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); - slab->freelist = get_freepointer(kmem_cache_node, n); - slab->inuse = 1; - slab->frozen = 0; + page->freelist = get_freepointer(kmem_cache_node, n); + page->inuse = 1; + page->frozen = 0; kmem_cache_node->node[node] = n; init_kmem_cache_node(n); - inc_slabs_node(kmem_cache_node, node, slab->objects); + inc_slabs_node(kmem_cache_node, node, page->objects); /* * No locks need to be taken here as it has just been * initialized and there is no concurrent access. */ - __add_partial(n, slab, DEACTIVATE_TO_HEAD); + __add_partial(n, page, DEACTIVATE_TO_HEAD); } static void free_kmem_cache_nodes(struct kmem_cache *s) @@ -4012,8 +3989,6 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min) static void set_cpu_partial(struct kmem_cache *s) { #ifdef CONFIG_SLUB_CPU_PARTIAL - unsigned int nr_objects; - /* * cpu_partial determined the maximum number of objects kept in the * per cpu partial lists of a processor. @@ -4023,22 +3998,24 @@ static void set_cpu_partial(struct kmem_cache *s) * filled up again with minimal effort. The slab will never hit the * per node partial lists and therefore no locking will be required. * - * For backwards compatibility reasons, this is determined as number - * of objects, even though we now limit maximum number of pages, see - * slub_set_cpu_partial() + * This setting also determines + * + * A) The number of objects from per cpu partial slabs dumped to the + * per node list when we reach the limit. + * B) The number of objects in cpu partial slabs to extract from the + * per node list when we run out of per cpu objects. We only fetch + * 50% to keep some capacity around for frees. */ if (!kmem_cache_has_cpu_partial(s)) - nr_objects = 0; + slub_set_cpu_partial(s, 0); else if (s->size >= PAGE_SIZE) - nr_objects = 6; + slub_set_cpu_partial(s, 2); else if (s->size >= 1024) - nr_objects = 24; + slub_set_cpu_partial(s, 6); else if (s->size >= 256) - nr_objects = 52; + slub_set_cpu_partial(s, 13); else - nr_objects = 120; - - slub_set_cpu_partial(s, nr_objects); + slub_set_cpu_partial(s, 30); #endif } @@ -4212,7 +4189,7 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) #endif /* - * The larger the object size is, the more slabs we want on the partial + * The larger the object size is, the more pages we want on the partial * list to avoid pounding the page allocator excessively. */ set_min_partial(s, ilog2(s->size) / 2); @@ -4240,20 +4217,20 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) return -EINVAL; } -static void list_slab_objects(struct kmem_cache *s, struct slab *slab, +static void list_slab_objects(struct kmem_cache *s, struct page *page, const char *text) { #ifdef CONFIG_SLUB_DEBUG - void *addr = slab_address(slab); + void *addr = page_address(page); unsigned long flags; unsigned long *map; void *p; - slab_err(s, slab, text, s->name); - slab_lock(slab, &flags); + slab_err(s, page, text, s->name); + slab_lock(page, &flags); - map = get_map(s, slab); - for_each_object(p, s, addr, slab->objects) { + map = get_map(s, page); + for_each_object(p, s, addr, page->objects) { if (!test_bit(__obj_to_index(s, addr, p), map)) { pr_err("Object 0x%p @offset=%tu\n", p, p - addr); @@ -4261,7 +4238,7 @@ static void list_slab_objects(struct kmem_cache *s, struct slab *slab, } } put_map(map); - slab_unlock(slab, &flags); + slab_unlock(page, &flags); #endif } @@ -4273,23 +4250,23 @@ static void list_slab_objects(struct kmem_cache *s, struct slab *slab, static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) { LIST_HEAD(discard); - struct slab *slab, *h; + struct page *page, *h; BUG_ON(irqs_disabled()); spin_lock_irq(&n->list_lock); - list_for_each_entry_safe(slab, h, &n->partial, slab_list) { - if (!slab->inuse) { - remove_partial(n, slab); - list_add(&slab->slab_list, &discard); + list_for_each_entry_safe(page, h, &n->partial, slab_list) { + if (!page->inuse) { + remove_partial(n, page); + list_add(&page->slab_list, &discard); } else { - list_slab_objects(s, slab, + list_slab_objects(s, page, "Objects remaining in %s on __kmem_cache_shutdown()"); } } spin_unlock_irq(&n->list_lock); - list_for_each_entry_safe(slab, h, &discard, slab_list) - discard_slab(s, slab); + list_for_each_entry_safe(page, h, &discard, slab_list) + discard_slab(s, page); } bool __kmem_cache_empty(struct kmem_cache *s) @@ -4322,32 +4299,31 @@ int __kmem_cache_shutdown(struct kmem_cache *s) } #ifdef CONFIG_PRINTK -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page) { void *base; int __maybe_unused i; unsigned int objnr; void *objp; void *objp0; - struct kmem_cache *s = slab->slab_cache; + struct kmem_cache *s = page->slab_cache; struct track __maybe_unused *trackp; kpp->kp_ptr = object; - kpp->kp_slab = slab; + kpp->kp_page = page; kpp->kp_slab_cache = s; - base = slab_address(slab); + base = page_address(page); objp0 = kasan_reset_tag(object); #ifdef CONFIG_SLUB_DEBUG objp = restore_red_left(s, objp0); #else objp = objp0; #endif - objnr = obj_to_index(s, slab, objp); + objnr = obj_to_index(s, page, objp); kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp); objp = base + s->size * objnr; kpp->kp_objp = objp; - if (WARN_ON_ONCE(objp < base || objp >= base + slab->objects * s->size - || (objp - base) % s->size) || + if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) || !(s->flags & SLAB_STORE_USER)) return; #ifdef CONFIG_SLUB_DEBUG @@ -4485,20 +4461,21 @@ EXPORT_SYMBOL(__kmalloc_node); * Returns NULL if check passes, otherwise const char * to name of cache * to indicate an error. */ -void __check_heap_object(const void *ptr, unsigned long n, - const struct slab *slab, bool to_user) +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user) { struct kmem_cache *s; unsigned int offset; + size_t object_size; bool is_kfence = is_kfence_address(ptr); ptr = kasan_reset_tag(ptr); /* Find object and usable object size. */ - s = slab->slab_cache; + s = page->slab_cache; /* Reject impossible pointers. */ - if (ptr < slab_address(slab)) + if (ptr < page_address(page)) usercopy_abort("SLUB object not in SLUB page?!", NULL, to_user, 0, n); @@ -4506,7 +4483,7 @@ void __check_heap_object(const void *ptr, unsigned long n, if (is_kfence) offset = ptr - kfence_object_start(ptr); else - offset = (ptr - slab_address(slab)) % s->size; + offset = (ptr - page_address(page)) % s->size; /* Adjust for redzone and reject if within the redzone. */ if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) { @@ -4522,30 +4499,44 @@ void __check_heap_object(const void *ptr, unsigned long n, n <= s->useroffset - offset + s->usersize) return; + /* + * If the copy is still within the allocated object, produce + * a warning instead of rejecting the copy. This is intended + * to be a temporary method to find any missing usercopy + * whitelists. + */ + object_size = slab_ksize(s); + if (usercopy_fallback && + offset <= object_size && n <= object_size - offset) { + usercopy_warn("SLUB object", s->name, to_user, offset, n); + return; + } + usercopy_abort("SLUB object", s->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ size_t __ksize(const void *object) { - struct folio *folio; + struct page *page; if (unlikely(object == ZERO_SIZE_PTR)) return 0; - folio = virt_to_folio(object); + page = virt_to_head_page(object); - if (unlikely(!folio_test_slab(folio))) - return folio_size(folio); + if (unlikely(!PageSlab(page))) { + WARN_ON(!PageCompound(page)); + return page_size(page); + } - return slab_ksize(folio_slab(folio)->slab_cache); + return slab_ksize(page->slab_cache); } EXPORT_SYMBOL(__ksize); void kfree(const void *x) { - struct folio *folio; - struct slab *slab; + struct page *page; void *object = (void *)x; trace_kfree(_RET_IP_, x); @@ -4553,13 +4544,12 @@ void kfree(const void *x) if (unlikely(ZERO_OR_NULL_PTR(x))) return; - folio = virt_to_folio(x); - if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(folio, object); + page = virt_to_head_page(x); + if (unlikely(!PageSlab(page))) { + free_nonslab_page(page, object); return; } - slab = folio_slab(folio); - slab_free(slab->slab_cache, slab, object, NULL, 1, _RET_IP_); + slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); } EXPORT_SYMBOL(kfree); @@ -4579,8 +4569,8 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) int node; int i; struct kmem_cache_node *n; - struct slab *slab; - struct slab *t; + struct page *page; + struct page *t; struct list_head discard; struct list_head promote[SHRINK_PROMOTE_MAX]; unsigned long flags; @@ -4597,22 +4587,22 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) * Build lists of slabs to discard or promote. * * Note that concurrent frees may occur while we hold the - * list_lock. slab->inuse here is the upper limit. + * list_lock. page->inuse here is the upper limit. */ - list_for_each_entry_safe(slab, t, &n->partial, slab_list) { - int free = slab->objects - slab->inuse; + list_for_each_entry_safe(page, t, &n->partial, slab_list) { + int free = page->objects - page->inuse; - /* Do not reread slab->inuse */ + /* Do not reread page->inuse */ barrier(); /* We do not keep full slabs on the list */ BUG_ON(free <= 0); - if (free == slab->objects) { - list_move(&slab->slab_list, &discard); + if (free == page->objects) { + list_move(&page->slab_list, &discard); n->nr_partial--; } else if (free <= SHRINK_PROMOTE_MAX) - list_move(&slab->slab_list, promote + free - 1); + list_move(&page->slab_list, promote + free - 1); } /* @@ -4625,8 +4615,8 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) spin_unlock_irqrestore(&n->list_lock, flags); /* Release empty slabs */ - list_for_each_entry_safe(slab, t, &discard, slab_list) - discard_slab(s, slab); + list_for_each_entry_safe(page, t, &discard, slab_list) + discard_slab(s, page); if (slabs_node(s, node)) ret = 1; @@ -4787,7 +4777,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache) */ __flush_cpu_slab(s, smp_processor_id()); for_each_kmem_cache_node(s, node, n) { - struct slab *p; + struct page *p; list_for_each_entry(p, &n->partial, slab_list) p->slab_cache = s; @@ -4965,54 +4955,54 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller); #endif #ifdef CONFIG_SYSFS -static int count_inuse(struct slab *slab) +static int count_inuse(struct page *page) { - return slab->inuse; + return page->inuse; } -static int count_total(struct slab *slab) +static int count_total(struct page *page) { - return slab->objects; + return page->objects; } #endif #ifdef CONFIG_SLUB_DEBUG -static void validate_slab(struct kmem_cache *s, struct slab *slab, +static void validate_slab(struct kmem_cache *s, struct page *page, unsigned long *obj_map) { void *p; - void *addr = slab_address(slab); + void *addr = page_address(page); unsigned long flags; - slab_lock(slab, &flags); + slab_lock(page, &flags); - if (!check_slab(s, slab) || !on_freelist(s, slab, NULL)) + if (!check_slab(s, page) || !on_freelist(s, page, NULL)) goto unlock; /* Now we know that a valid freelist exists */ - __fill_map(obj_map, s, slab); - for_each_object(p, s, addr, slab->objects) { + __fill_map(obj_map, s, page); + for_each_object(p, s, addr, page->objects) { u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ? SLUB_RED_INACTIVE : SLUB_RED_ACTIVE; - if (!check_object(s, slab, p, val)) + if (!check_object(s, page, p, val)) break; } unlock: - slab_unlock(slab, &flags); + slab_unlock(page, &flags); } static int validate_slab_node(struct kmem_cache *s, struct kmem_cache_node *n, unsigned long *obj_map) { unsigned long count = 0; - struct slab *slab; + struct page *page; unsigned long flags; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(slab, &n->partial, slab_list) { - validate_slab(s, slab, obj_map); + list_for_each_entry(page, &n->partial, slab_list) { + validate_slab(s, page, obj_map); count++; } if (count != n->nr_partial) { @@ -5024,8 +5014,8 @@ static int validate_slab_node(struct kmem_cache *s, if (!(s->flags & SLAB_STORE_USER)) goto out; - list_for_each_entry(slab, &n->full, slab_list) { - validate_slab(s, slab, obj_map); + list_for_each_entry(page, &n->full, slab_list) { + validate_slab(s, page, obj_map); count++; } if (count != atomic_long_read(&n->nr_slabs)) { @@ -5191,15 +5181,15 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, } static void process_slab(struct loc_track *t, struct kmem_cache *s, - struct slab *slab, enum track_item alloc, + struct page *page, enum track_item alloc, unsigned long *obj_map) { - void *addr = slab_address(slab); + void *addr = page_address(page); void *p; - __fill_map(obj_map, s, slab); + __fill_map(obj_map, s, page); - for_each_object(p, s, addr, slab->objects) + for_each_object(p, s, addr, page->objects) if (!test_bit(__obj_to_index(s, addr, p), obj_map)) add_location(t, s, get_track(s, p, alloc)); } @@ -5241,37 +5231,35 @@ static ssize_t show_slab_objects(struct kmem_cache *s, struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); int node; - struct slab *slab; + struct page *page; - slab = READ_ONCE(c->slab); - if (!slab) + page = READ_ONCE(c->page); + if (!page) continue; - node = slab_nid(slab); + node = page_to_nid(page); if (flags & SO_TOTAL) - x = slab->objects; + x = page->objects; else if (flags & SO_OBJECTS) - x = slab->inuse; + x = page->inuse; else x = 1; total += x; nodes[node] += x; -#ifdef CONFIG_SLUB_CPU_PARTIAL - slab = slub_percpu_partial_read_once(c); - if (slab) { - node = slab_nid(slab); + page = slub_percpu_partial_read_once(c); + if (page) { + node = page_to_nid(page); if (flags & SO_TOTAL) WARN_ON_ONCE(1); else if (flags & SO_OBJECTS) WARN_ON_ONCE(1); else - x = slab->slabs; + x = page->pages; total += x; nodes[node] += x; } -#endif } } @@ -5403,12 +5391,7 @@ SLAB_ATTR(min_partial); static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) { - unsigned int nr_partial = 0; -#ifdef CONFIG_SLUB_CPU_PARTIAL - nr_partial = s->cpu_partial; -#endif - - return sysfs_emit(buf, "%u\n", nr_partial); + return sysfs_emit(buf, "%u\n", slub_cpu_partial(s)); } static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, @@ -5470,37 +5453,32 @@ SLAB_ATTR_RO(objects_partial); static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) { int objects = 0; - int slabs = 0; - int cpu __maybe_unused; + int pages = 0; + int cpu; int len = 0; -#ifdef CONFIG_SLUB_CPU_PARTIAL for_each_online_cpu(cpu) { - struct slab *slab; + struct page *page; - slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); + page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); - if (slab) - slabs += slab->slabs; - } -#endif - - /* Approximate half-full slabs, see slub_set_cpu_partial() */ - objects = (slabs * oo_objects(s->oo)) / 2; - len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs); - -#if defined(CONFIG_SLUB_CPU_PARTIAL) && defined(CONFIG_SMP) - for_each_online_cpu(cpu) { - struct slab *slab; - - slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); - if (slab) { - slabs = READ_ONCE(slab->slabs); - objects = (slabs * oo_objects(s->oo)) / 2; - len += sysfs_emit_at(buf, len, " C%d=%d(%d)", - cpu, objects, slabs); + if (page) { + pages += page->pages; + objects += page->pobjects; } } + + len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages); + +#ifdef CONFIG_SMP + for_each_online_cpu(cpu) { + struct page *page; + + page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); + if (page) + len += sysfs_emit_at(buf, len, " C%d=%d(%d)", + cpu, page->pobjects, page->pages); + } #endif len += sysfs_emit_at(buf, len, "\n"); @@ -6166,16 +6144,16 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep) for_each_kmem_cache_node(s, node, n) { unsigned long flags; - struct slab *slab; + struct page *page; if (!atomic_long_read(&n->nr_slabs)) continue; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(slab, &n->partial, slab_list) - process_slab(t, s, slab, alloc, obj_map); - list_for_each_entry(slab, &n->full, slab_list) - process_slab(t, s, slab, alloc, obj_map); + list_for_each_entry(page, &n->partial, slab_list) + process_slab(t, s, page, alloc, obj_map); + list_for_each_entry(page, &n->full, slab_list) + process_slab(t, s, page, alloc, obj_map); spin_unlock_irqrestore(&n->list_lock, flags); } diff --git a/mm/sparse.c b/mm/sparse.c index d21c6e5910..120bc8ea52 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -451,7 +451,7 @@ static void *sparsemap_buf_end __meminitdata; static inline void __meminit sparse_buffer_free(unsigned long size) { WARN_ON(!sparsemap_buf || size == 0); - memblock_free(sparsemap_buf, size); + memblock_free_early(__pa(sparsemap_buf), size); } static void __init sparse_buffer_init(unsigned long size, int nid) @@ -722,7 +722,7 @@ static void free_map_bootmem(struct page *memmap) >> PAGE_SHIFT; for (i = 0; i < nr_pages; i++, page++) { - magic = page->index; + magic = (unsigned long) page->freelist; BUG_ON(magic == NODE_INFO); diff --git a/mm/swap.c b/mm/swap.c index bcf3ac288b..af3cad4e53 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -80,11 +80,10 @@ static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = { static void __page_cache_release(struct page *page) { if (PageLRU(page)) { - struct folio *folio = page_folio(page); struct lruvec *lruvec; unsigned long flags; - lruvec = folio_lruvec_lock_irqsave(folio, &flags); + lruvec = lock_page_lruvec_irqsave(page, &flags); del_page_from_lru_list(page, lruvec); __clear_page_lru_flags(page); unlock_page_lruvec_irqrestore(lruvec, flags); @@ -95,7 +94,7 @@ static void __page_cache_release(struct page *page) static void __put_single_page(struct page *page) { __page_cache_release(page); - mem_cgroup_uncharge(page_folio(page)); + mem_cgroup_uncharge(page); free_unref_page(page, 0); } @@ -135,28 +134,18 @@ EXPORT_SYMBOL(__put_page); * put_pages_list() - release a list of pages * @pages: list of pages threaded on page->lru * - * Release a list of pages which are strung together on page.lru. + * Release a list of pages which are strung together on page.lru. Currently + * used by read_cache_pages() and related error recovery code. */ void put_pages_list(struct list_head *pages) { - struct page *page, *next; + while (!list_empty(pages)) { + struct page *victim; - list_for_each_entry_safe(page, next, pages, lru) { - if (!put_page_testzero(page)) { - list_del(&page->lru); - continue; - } - if (PageHead(page)) { - list_del(&page->lru); - __put_compound_page(page); - continue; - } - /* Cannot be PageLRU because it's passed to us using the lru */ - __ClearPageWaiters(page); + victim = lru_to_page(pages); + list_del(&victim->lru); + put_page(victim); } - - free_unref_page_list(pages); - INIT_LIST_HEAD(pages); } EXPORT_SYMBOL(put_pages_list); @@ -199,13 +188,12 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; - struct folio *folio = page_folio(page); /* block memcg migration during page moving between lru */ if (!TestClearPageLRU(page)) continue; - lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); + lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); (*move_fn)(page, lruvec); SetPageLRU(page); @@ -218,13 +206,11 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec) { - struct folio *folio = page_folio(page); - - if (!folio_test_unevictable(folio)) { - lruvec_del_folio(lruvec, folio); - folio_clear_active(folio); - lruvec_add_folio_tail(lruvec, folio); - __count_vm_events(PGROTATED, folio_nr_pages(folio)); + if (!PageUnevictable(page)) { + del_page_from_lru_list(page, lruvec); + ClearPageActive(page); + add_page_to_lru_list_tail(page, lruvec); + __count_vm_events(PGROTATED, thp_nr_pages(page)); } } @@ -241,23 +227,23 @@ static bool pagevec_add_and_need_flush(struct pagevec *pvec, struct page *page) } /* - * Writeback is about to end against a folio which has been marked for - * immediate reclaim. If it still appears to be reclaimable, move it - * to the tail of the inactive list. + * Writeback is about to end against a page which has been marked for immediate + * reclaim. If it still appears to be reclaimable, move it to the tail of the + * inactive list. * - * folio_rotate_reclaimable() must disable IRQs, to prevent nasty races. + * rotate_reclaimable_page() must disable IRQs, to prevent nasty races. */ -void folio_rotate_reclaimable(struct folio *folio) +void rotate_reclaimable_page(struct page *page) { - if (!folio_test_locked(folio) && !folio_test_dirty(folio) && - !folio_test_unevictable(folio) && folio_test_lru(folio)) { + if (!PageLocked(page) && !PageDirty(page) && + !PageUnevictable(page) && PageLRU(page)) { struct pagevec *pvec; unsigned long flags; - folio_get(folio); + get_page(page); local_lock_irqsave(&lru_rotate.lock, flags); pvec = this_cpu_ptr(&lru_rotate.pvec); - if (pagevec_add_and_need_flush(pvec, &folio->page)) + if (pagevec_add_and_need_flush(pvec, page)) pagevec_lru_move_fn(pvec, pagevec_move_tail_fn); local_unlock_irqrestore(&lru_rotate.lock, flags); } @@ -303,21 +289,21 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) } while ((lruvec = parent_lruvec(lruvec))); } -void lru_note_cost_folio(struct folio *folio) +void lru_note_cost_page(struct page *page) { - lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio), - folio_nr_pages(folio)); + lru_note_cost(mem_cgroup_page_lruvec(page), + page_is_file_lru(page), thp_nr_pages(page)); } -static void __folio_activate(struct folio *folio, struct lruvec *lruvec) +static void __activate_page(struct page *page, struct lruvec *lruvec) { - if (!folio_test_active(folio) && !folio_test_unevictable(folio)) { - long nr_pages = folio_nr_pages(folio); + if (!PageActive(page) && !PageUnevictable(page)) { + int nr_pages = thp_nr_pages(page); - lruvec_del_folio(lruvec, folio); - folio_set_active(folio); - lruvec_add_folio(lruvec, folio); - trace_mm_lru_activate(folio); + del_page_from_lru_list(page, lruvec); + SetPageActive(page); + add_page_to_lru_list(page, lruvec); + trace_mm_lru_activate(page); __count_vm_events(PGACTIVATE, nr_pages); __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, @@ -326,11 +312,6 @@ static void __folio_activate(struct folio *folio, struct lruvec *lruvec) } #ifdef CONFIG_SMP -static void __activate_page(struct page *page, struct lruvec *lruvec) -{ - return __folio_activate(page_folio(page), lruvec); -} - static void activate_page_drain(int cpu) { struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu); @@ -344,16 +325,16 @@ static bool need_activate_page_drain(int cpu) return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; } -static void folio_activate(struct folio *folio) +static void activate_page(struct page *page) { - if (folio_test_lru(folio) && !folio_test_active(folio) && - !folio_test_unevictable(folio)) { + page = compound_head(page); + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { struct pagevec *pvec; - folio_get(folio); local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.activate_page); - if (pagevec_add_and_need_flush(pvec, &folio->page)) + get_page(page); + if (pagevec_add_and_need_flush(pvec, page)) pagevec_lru_move_fn(pvec, __activate_page); local_unlock(&lru_pvecs.lock); } @@ -364,20 +345,21 @@ static inline void activate_page_drain(int cpu) { } -static void folio_activate(struct folio *folio) +static void activate_page(struct page *page) { struct lruvec *lruvec; - if (folio_test_clear_lru(folio)) { - lruvec = folio_lruvec_lock_irq(folio); - __folio_activate(folio, lruvec); + page = compound_head(page); + if (TestClearPageLRU(page)) { + lruvec = lock_page_lruvec_irq(page); + __activate_page(page, lruvec); unlock_page_lruvec_irq(lruvec); - folio_set_lru(folio); + SetPageLRU(page); } } #endif -static void __lru_cache_activate_folio(struct folio *folio) +static void __lru_cache_activate_page(struct page *page) { struct pagevec *pvec; int i; @@ -398,8 +380,8 @@ static void __lru_cache_activate_folio(struct folio *folio) for (i = pagevec_count(pvec) - 1; i >= 0; i--) { struct page *pagevec_page = pvec->pages[i]; - if (pagevec_page == &folio->page) { - folio_set_active(folio); + if (pagevec_page == page) { + SetPageActive(page); break; } } @@ -417,59 +399,61 @@ static void __lru_cache_activate_folio(struct folio *folio) * When a newly allocated page is not yet visible, so safe for non-atomic ops, * __SetPageReferenced(page) may be substituted for mark_page_accessed(page). */ -void folio_mark_accessed(struct folio *folio) +void mark_page_accessed(struct page *page) { - if (!folio_test_referenced(folio)) { - folio_set_referenced(folio); - } else if (folio_test_unevictable(folio)) { + page = compound_head(page); + + if (!PageReferenced(page)) { + SetPageReferenced(page); + } else if (PageUnevictable(page)) { /* * Unevictable pages are on the "LRU_UNEVICTABLE" list. But, * this list is never rotated or maintained, so marking an * evictable page accessed has no effect. */ - } else if (!folio_test_active(folio)) { + } else if (!PageActive(page)) { /* * If the page is on the LRU, queue it for activation via * lru_pvecs.activate_page. Otherwise, assume the page is on a * pagevec, mark it active and it'll be moved to the active * LRU on the next drain. */ - if (folio_test_lru(folio)) - folio_activate(folio); + if (PageLRU(page)) + activate_page(page); else - __lru_cache_activate_folio(folio); - folio_clear_referenced(folio); - workingset_activation(folio); + __lru_cache_activate_page(page); + ClearPageReferenced(page); + workingset_activation(page); } - if (folio_test_idle(folio)) - folio_clear_idle(folio); + if (page_is_idle(page)) + clear_page_idle(page); } -EXPORT_SYMBOL(folio_mark_accessed); +EXPORT_SYMBOL(mark_page_accessed); /** - * folio_add_lru - Add a folio to an LRU list. - * @folio: The folio to be added to the LRU. + * lru_cache_add - add a page to a page list + * @page: the page to be added to the LRU. * - * Queue the folio for addition to the LRU. The decision on whether + * Queue the page for addition to the LRU via pagevec. The decision on whether * to add the page to the [in]active [file|anon] list is deferred until the - * pagevec is drained. This gives a chance for the caller of folio_add_lru() - * have the folio added to the active list using folio_mark_accessed(). + * pagevec is drained. This gives a chance for the caller of lru_cache_add() + * have the page added to the active list using mark_page_accessed(). */ -void folio_add_lru(struct folio *folio) +void lru_cache_add(struct page *page) { struct pagevec *pvec; - VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); - VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); + VM_BUG_ON_PAGE(PageLRU(page), page); - folio_get(folio); + get_page(page); local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.lru_add); - if (pagevec_add_and_need_flush(pvec, &folio->page)) + if (pagevec_add_and_need_flush(pvec, page)) __pagevec_lru_add(pvec); local_unlock(&lru_pvecs.lock); } -EXPORT_SYMBOL(folio_add_lru); +EXPORT_SYMBOL(lru_cache_add); /** * lru_cache_add_inactive_or_unevictable @@ -882,7 +866,7 @@ void lru_cache_disable(void) * all online CPUs so any calls of lru_cache_disabled wrapped by * local_lock or preemption disabled would be ordered by that. * The atomic operation doesn't need to have stronger ordering - * requirements because that is enforced by the scheduling + * requirements because that is enforeced by the scheduling * guarantees. */ __lru_add_drain_all(true); @@ -904,12 +888,11 @@ void release_pages(struct page **pages, int nr) int i; LIST_HEAD(pages_to_free); struct lruvec *lruvec = NULL; - unsigned long flags = 0; + unsigned long flags; unsigned int lock_batch; for (i = 0; i < nr; i++) { struct page *page = pages[i]; - struct folio *folio = page_folio(page); /* * Make sure the IRQ-safe lock-holding time does not get @@ -921,7 +904,7 @@ void release_pages(struct page **pages, int nr) lruvec = NULL; } - page = &folio->page; + page = compound_head(page); if (is_huge_zero_page(page)) continue; @@ -960,7 +943,7 @@ void release_pages(struct page **pages, int nr) if (PageLRU(page)) { struct lruvec *prev_lruvec = lruvec; - lruvec = folio_lruvec_relock_irqsave(folio, lruvec, + lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); if (prev_lruvec != lruvec) lock_batch = 0; @@ -1002,18 +985,17 @@ void __pagevec_release(struct pagevec *pvec) } EXPORT_SYMBOL(__pagevec_release); -static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec) +static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec) { - int was_unevictable = folio_test_clear_unevictable(folio); - long nr_pages = folio_nr_pages(folio); + int was_unevictable = TestClearPageUnevictable(page); + int nr_pages = thp_nr_pages(page); - VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + VM_BUG_ON_PAGE(PageLRU(page), page); /* - * A folio becomes evictable in two ways: + * Page becomes evictable in two ways: * 1) Within LRU lock [munlock_vma_page() and __munlock_pagevec()]. - * 2) Before acquiring LRU lock to put the folio on the correct LRU - * and then + * 2) Before acquiring LRU lock to put the page to correct LRU and then * a) do PageLRU check with lock [check_move_unevictable_pages] * b) do PageLRU check before lock [clear_page_mlock] * @@ -1022,36 +1004,35 @@ static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec) * * #0: __pagevec_lru_add_fn #1: clear_page_mlock * - * folio_set_lru() folio_test_clear_mlocked() + * SetPageLRU() TestClearPageMlocked() * smp_mb() // explicit ordering // above provides strict * // ordering - * folio_test_mlocked() folio_test_lru() + * PageMlocked() PageLRU() * * - * if '#1' does not observe setting of PG_lru by '#0' and - * fails isolation, the explicit barrier will make sure that - * folio_evictable check will put the folio on the correct - * LRU. Without smp_mb(), folio_set_lru() can be reordered - * after folio_test_mlocked() check and can make '#1' fail the - * isolation of the folio whose mlocked bit is cleared (#0 is - * also looking at the same folio) and the evictable folio will - * be stranded on an unevictable LRU. + * if '#1' does not observe setting of PG_lru by '#0' and fails + * isolation, the explicit barrier will make sure that page_evictable + * check will put the page in correct LRU. Without smp_mb(), SetPageLRU + * can be reordered after PageMlocked check and can make '#1' to fail + * the isolation of the page whose Mlocked bit is cleared (#0 is also + * looking at the same page) and the evictable page will be stranded + * in an unevictable LRU. */ - folio_set_lru(folio); + SetPageLRU(page); smp_mb__after_atomic(); - if (folio_evictable(folio)) { + if (page_evictable(page)) { if (was_unevictable) __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); } else { - folio_clear_active(folio); - folio_set_unevictable(folio); + ClearPageActive(page); + SetPageUnevictable(page); if (!was_unevictable) __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); } - lruvec_add_folio(lruvec, folio); - trace_mm_lru_insertion(folio); + add_page_to_lru_list(page, lruvec); + trace_mm_lru_insertion(page); } /* @@ -1065,10 +1046,10 @@ void __pagevec_lru_add(struct pagevec *pvec) unsigned long flags = 0; for (i = 0; i < pagevec_count(pvec); i++) { - struct folio *folio = page_folio(pvec->pages[i]); + struct page *page = pvec->pages[i]; - lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); - __pagevec_lru_add_fn(folio, lruvec); + lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); + __pagevec_lru_add_fn(page, lruvec); } if (lruvec) unlock_page_lruvec_irqrestore(lruvec, flags); @@ -1077,24 +1058,24 @@ void __pagevec_lru_add(struct pagevec *pvec) } /** - * folio_batch_remove_exceptionals() - Prune non-folios from a batch. - * @fbatch: The batch to prune + * pagevec_remove_exceptionals - pagevec exceptionals pruning + * @pvec: The pagevec to prune * - * find_get_entries() fills a batch with both folios and shadow/swap/DAX - * entries. This function prunes all the non-folio entries from @fbatch - * without leaving holes, so that it can be passed on to folio-only batch - * operations. + * find_get_entries() fills both pages and XArray value entries (aka + * exceptional entries) into the pagevec. This function prunes all + * exceptionals from @pvec without leaving holes, so that it can be + * passed on to page-only pagevec operations. */ -void folio_batch_remove_exceptionals(struct folio_batch *fbatch) +void pagevec_remove_exceptionals(struct pagevec *pvec) { - unsigned int i, j; + int i, j; - for (i = 0, j = 0; i < folio_batch_count(fbatch); i++) { - struct folio *folio = fbatch->folios[i]; - if (!xa_is_value(folio)) - fbatch->folios[j++] = folio; + for (i = 0, j = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + if (!xa_is_value(page)) + pvec->pages[j++] = page; } - fbatch->nr = j; + pvec->nr = j; } /** diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 2b55318405..16f706c55d 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/mm/swap_state.c b/mm/swap_state.c index 8d41042421..bc7cee6b2e 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -498,7 +498,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, mem_cgroup_swapin_uncharge_swap(entry); if (shadow) - workingset_refault(page_folio(page), shadow); + workingset_refault(page, shadow); /* Caller will initiate read into locked page */ lru_cache_add(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index bf0df7aa71..22d10f7138 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -49,7 +49,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t, unsigned char); static void free_swap_count_continuations(struct swap_info_struct *); -static DEFINE_SPINLOCK(swap_lock); +DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; atomic_long_t nr_swap_pages; /* @@ -71,7 +71,7 @@ static const char Unused_offset[] = "Unused swap offset entry "; * all active swap_info_structs * protected with swap_lock, and ordered by priority. */ -static PLIST_HEAD(swap_active_head); +PLIST_HEAD(swap_active_head); /* * all available (active, not full) swap_info_structs @@ -1601,30 +1601,31 @@ static bool page_swapped(struct page *page) return false; } -static int page_trans_huge_map_swapcount(struct page *page, +static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount, int *total_swapcount) { - int i, map_swapcount, _total_swapcount; + int i, map_swapcount, _total_mapcount, _total_swapcount; unsigned long offset = 0; struct swap_info_struct *si; struct swap_cluster_info *ci = NULL; unsigned char *map = NULL; - int swapcount = 0; + int mapcount, swapcount = 0; /* hugetlbfs shouldn't call it */ VM_BUG_ON_PAGE(PageHuge(page), page); if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page))) { + mapcount = page_trans_huge_mapcount(page, total_mapcount); if (PageSwapCache(page)) swapcount = page_swapcount(page); if (total_swapcount) *total_swapcount = swapcount; - return swapcount + page_trans_huge_mapcount(page); + return mapcount + swapcount; } page = compound_head(page); - _total_swapcount = map_swapcount = 0; + _total_mapcount = _total_swapcount = map_swapcount = 0; if (PageSwapCache(page)) { swp_entry_t entry; @@ -1638,7 +1639,8 @@ static int page_trans_huge_map_swapcount(struct page *page, if (map) ci = lock_cluster(si, offset); for (i = 0; i < HPAGE_PMD_NR; i++) { - int mapcount = atomic_read(&page[i]._mapcount) + 1; + mapcount = atomic_read(&page[i]._mapcount) + 1; + _total_mapcount += mapcount; if (map) { swapcount = swap_count(map[offset + i]); _total_swapcount += swapcount; @@ -1646,14 +1648,19 @@ static int page_trans_huge_map_swapcount(struct page *page, map_swapcount = max(map_swapcount, mapcount + swapcount); } unlock_cluster(ci); - - if (PageDoubleMap(page)) + if (PageDoubleMap(page)) { map_swapcount -= 1; - + _total_mapcount -= HPAGE_PMD_NR; + } + mapcount = compound_mapcount(page); + map_swapcount += mapcount; + _total_mapcount += mapcount; + if (total_mapcount) + *total_mapcount = _total_mapcount; if (total_swapcount) *total_swapcount = _total_swapcount; - return map_swapcount + compound_mapcount(page); + return map_swapcount; } /* @@ -1661,15 +1668,22 @@ static int page_trans_huge_map_swapcount(struct page *page, * to it. And as a side-effect, free up its swap: because the old content * on disk will never be read, and seeking back there to write new content * later would only waste time away from clustering. + * + * NOTE: total_map_swapcount should not be relied upon by the caller if + * reuse_swap_page() returns false, but it may be always overwritten + * (see the other implementation for CONFIG_SWAP=n). */ -bool reuse_swap_page(struct page *page) +bool reuse_swap_page(struct page *page, int *total_map_swapcount) { - int count, total_swapcount; + int count, total_mapcount, total_swapcount; VM_BUG_ON_PAGE(!PageLocked(page), page); if (unlikely(PageKsm(page))) return false; - count = page_trans_huge_map_swapcount(page, &total_swapcount); + count = page_trans_huge_map_swapcount(page, &total_mapcount, + &total_swapcount); + if (total_map_swapcount) + *total_map_swapcount = total_mapcount + total_swapcount; if (count == 1 && PageSwapCache(page) && (likely(!PageTransCompound(page)) || /* The remaining swap count will be freed soon */ @@ -1903,14 +1917,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, dec_mm_counter(vma->vm_mm, MM_SWAPENTS); inc_mm_counter(vma->vm_mm, MM_ANONPAGES); get_page(page); + set_pte_at(vma->vm_mm, addr, pte, + pte_mkold(mk_pte(page, vma->vm_page_prot))); if (page == swapcache) { page_add_anon_rmap(page, vma, addr, false); } else { /* ksm created a completely new copy */ page_add_new_anon_rmap(page, vma, addr, false); lru_cache_add_inactive_or_unevictable(page, vma); } - set_pte_at(vma->vm_mm, addr, pte, - pte_mkold(mk_pte(page, vma->vm_page_prot))); swap_free(entry); out: pte_unmap_unlock(pte, ptl); @@ -1923,7 +1937,8 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, - unsigned int type) + unsigned int type, bool frontswap, + unsigned long *fs_pages_to_unuse) { struct page *page; swp_entry_t entry; @@ -1944,6 +1959,9 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, continue; offset = swp_offset(entry); + if (frontswap && !frontswap_test(si, offset)) + continue; + pte_unmap(pte); swap_map = &si->swap_map[offset]; page = lookup_swap_cache(entry, vma, addr); @@ -1975,6 +1993,11 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, try_to_free_swap(page); unlock_page(page); put_page(page); + + if (*fs_pages_to_unuse && !--(*fs_pages_to_unuse)) { + ret = FRONTSWAP_PAGES_UNUSED; + goto out; + } try_next: pte = pte_offset_map(pmd, addr); } while (pte++, addr += PAGE_SIZE, addr != end); @@ -1987,7 +2010,8 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, - unsigned int type) + unsigned int type, bool frontswap, + unsigned long *fs_pages_to_unuse) { pmd_t *pmd; unsigned long next; @@ -1999,7 +2023,8 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, next = pmd_addr_end(addr, end); if (pmd_none_or_trans_huge_or_clear_bad(pmd)) continue; - ret = unuse_pte_range(vma, pmd, addr, next, type); + ret = unuse_pte_range(vma, pmd, addr, next, type, + frontswap, fs_pages_to_unuse); if (ret) return ret; } while (pmd++, addr = next, addr != end); @@ -2008,7 +2033,8 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, - unsigned int type) + unsigned int type, bool frontswap, + unsigned long *fs_pages_to_unuse) { pud_t *pud; unsigned long next; @@ -2019,7 +2045,8 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; - ret = unuse_pmd_range(vma, pud, addr, next, type); + ret = unuse_pmd_range(vma, pud, addr, next, type, + frontswap, fs_pages_to_unuse); if (ret) return ret; } while (pud++, addr = next, addr != end); @@ -2028,7 +2055,8 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, - unsigned int type) + unsigned int type, bool frontswap, + unsigned long *fs_pages_to_unuse) { p4d_t *p4d; unsigned long next; @@ -2039,14 +2067,16 @@ static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd, next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) continue; - ret = unuse_pud_range(vma, p4d, addr, next, type); + ret = unuse_pud_range(vma, p4d, addr, next, type, + frontswap, fs_pages_to_unuse); if (ret) return ret; } while (p4d++, addr = next, addr != end); return 0; } -static int unuse_vma(struct vm_area_struct *vma, unsigned int type) +static int unuse_vma(struct vm_area_struct *vma, unsigned int type, + bool frontswap, unsigned long *fs_pages_to_unuse) { pgd_t *pgd; unsigned long addr, end, next; @@ -2060,14 +2090,16 @@ static int unuse_vma(struct vm_area_struct *vma, unsigned int type) next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - ret = unuse_p4d_range(vma, pgd, addr, next, type); + ret = unuse_p4d_range(vma, pgd, addr, next, type, + frontswap, fs_pages_to_unuse); if (ret) return ret; } while (pgd++, addr = next, addr != end); return 0; } -static int unuse_mm(struct mm_struct *mm, unsigned int type) +static int unuse_mm(struct mm_struct *mm, unsigned int type, + bool frontswap, unsigned long *fs_pages_to_unuse) { struct vm_area_struct *vma; int ret = 0; @@ -2075,7 +2107,8 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type) mmap_read_lock(mm); for (vma = mm->mmap; vma; vma = vma->vm_next) { if (vma->anon_vma) { - ret = unuse_vma(vma, type); + ret = unuse_vma(vma, type, frontswap, + fs_pages_to_unuse); if (ret) break; } @@ -2091,7 +2124,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type) * if there are no inuse entries after prev till end of the map. */ static unsigned int find_next_to_unuse(struct swap_info_struct *si, - unsigned int prev) + unsigned int prev, bool frontswap) { unsigned int i; unsigned char count; @@ -2105,7 +2138,8 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, for (i = prev + 1; i < si->max; i++) { count = READ_ONCE(si->swap_map[i]); if (count && swap_count(count) != SWAP_MAP_BAD) - break; + if (!frontswap || frontswap_test(si, i)) + break; if ((i % LATENCY_LIMIT) == 0) cond_resched(); } @@ -2116,7 +2150,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, return i; } -static int try_to_unuse(unsigned int type) +/* + * If the boolean frontswap is true, only unuse pages_to_unuse pages; + * pages_to_unuse==0 means all pages; ignored if frontswap is false + */ +int try_to_unuse(unsigned int type, bool frontswap, + unsigned long pages_to_unuse) { struct mm_struct *prev_mm; struct mm_struct *mm; @@ -2130,10 +2169,13 @@ static int try_to_unuse(unsigned int type) if (!READ_ONCE(si->inuse_pages)) return 0; + if (!frontswap) + pages_to_unuse = 0; + retry: - retval = shmem_unuse(type); + retval = shmem_unuse(type, frontswap, &pages_to_unuse); if (retval) - return retval; + goto out; prev_mm = &init_mm; mmget(prev_mm); @@ -2150,10 +2192,11 @@ static int try_to_unuse(unsigned int type) spin_unlock(&mmlist_lock); mmput(prev_mm); prev_mm = mm; - retval = unuse_mm(mm, type); + retval = unuse_mm(mm, type, frontswap, &pages_to_unuse); + if (retval) { mmput(prev_mm); - return retval; + goto out; } /* @@ -2170,7 +2213,7 @@ static int try_to_unuse(unsigned int type) i = 0; while (READ_ONCE(si->inuse_pages) && !signal_pending(current) && - (i = find_next_to_unuse(si, i)) != 0) { + (i = find_next_to_unuse(si, i, frontswap)) != 0) { entry = swp_entry(type, i); page = find_get_page(swap_address_space(entry), i); @@ -2188,6 +2231,14 @@ static int try_to_unuse(unsigned int type) try_to_free_swap(page); unlock_page(page); put_page(page); + + /* + * For frontswap, we just need to unuse pages_to_unuse, if + * it was specified. Need not check frontswap again here as + * we already zeroed out pages_to_unuse if not frontswap. + */ + if (pages_to_unuse && --pages_to_unuse == 0) + goto out; } /* @@ -2205,10 +2256,10 @@ static int try_to_unuse(unsigned int type) if (READ_ONCE(si->inuse_pages)) { if (!signal_pending(current)) goto retry; - return -EINTR; + retval = -EINTR; } - - return 0; +out: + return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval; } /* @@ -2426,8 +2477,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, struct swap_cluster_info *cluster_info, unsigned long *frontswap_map) { - if (IS_ENABLED(CONFIG_FRONTSWAP)) - frontswap_init(p->type, frontswap_map); + frontswap_init(p->type, frontswap_map); spin_lock(&swap_lock); spin_lock(&p->lock); setup_swap_info(p, prio, swap_map, cluster_info); @@ -2540,7 +2590,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) disable_swap_slots_cache_lock(); set_current_oom_origin(); - err = try_to_unuse(p->type); + err = try_to_unuse(p->type, false, 0); /* force unuse all pages */ clear_current_oom_origin(); if (err) { @@ -2713,7 +2763,7 @@ static int swap_show(struct seq_file *swap, void *v) struct swap_info_struct *si = v; struct file *file; int len; - unsigned long bytes, inuse; + unsigned int bytes, inuse; if (si == SEQ_START_TOKEN) { seq_puts(swap, "Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n"); @@ -2725,7 +2775,7 @@ static int swap_show(struct seq_file *swap, void *v) file = si->swap_file; len = seq_file_path(swap, file, " \t\n\\"); - seq_printf(swap, "%*s%s\t%lu\t%s%lu\t%s%d\n", + seq_printf(swap, "%*s%s\t%u\t%s%u\t%s%d\n", len < 40 ? 40 - len : 1, " ", S_ISBLK(file_inode(file)->i_mode) ? "partition" : "file\t", @@ -3068,7 +3118,7 @@ static bool swap_discardable(struct swap_info_struct *si) { struct request_queue *q = bdev_get_queue(si->bdev); - if (!blk_queue_discard(q)) + if (!q || !blk_queue_discard(q)) return false; return true; @@ -3484,13 +3534,13 @@ struct swap_info_struct *page_swap_info(struct page *page) } /* - * out-of-line methods to avoid include hell. + * out-of-line __page_file_ methods to avoid include hell. */ -struct address_space *swapcache_mapping(struct folio *folio) +struct address_space *__page_file_mapping(struct page *page) { - return page_swap_info(&folio->page)->swap_file->f_mapping; + return page_swap_info(page)->swap_file->f_mapping; } -EXPORT_SYMBOL_GPL(swapcache_mapping); +EXPORT_SYMBOL_GPL(__page_file_mapping); pgoff_t __page_file_index(struct page *page) { diff --git a/mm/truncate.c b/mm/truncate.c index 9dbf0b75da..714eaf1982 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -22,6 +22,7 @@ #include /* grr. try_to_release_page, do_invalidatepage */ #include +#include #include #include "internal.h" @@ -44,22 +45,18 @@ static inline void __clear_shadow_entry(struct address_space *mapping, static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, void *entry) { - spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); __clear_shadow_entry(mapping, index, entry); xa_unlock_irq(&mapping->i_pages); - if (mapping_shrinkable(mapping)) - inode_add_lru(mapping->host); - spin_unlock(&mapping->host->i_lock); } /* * Unconditionally remove exceptional entries. Usually called from truncate - * path. Note that the folio_batch may be altered by this function by removing - * exceptional entries similar to what folio_batch_remove_exceptionals() does. + * path. Note that the pagevec may be altered by this function by removing + * exceptional entries similar to what pagevec_remove_exceptionals does. */ -static void truncate_folio_batch_exceptionals(struct address_space *mapping, - struct folio_batch *fbatch, pgoff_t *indices) +static void truncate_exceptional_pvec_entries(struct address_space *mapping, + struct pagevec *pvec, pgoff_t *indices) { int i, j; bool dax; @@ -68,25 +65,23 @@ static void truncate_folio_batch_exceptionals(struct address_space *mapping, if (shmem_mapping(mapping)) return; - for (j = 0; j < folio_batch_count(fbatch); j++) - if (xa_is_value(fbatch->folios[j])) + for (j = 0; j < pagevec_count(pvec); j++) + if (xa_is_value(pvec->pages[j])) break; - if (j == folio_batch_count(fbatch)) + if (j == pagevec_count(pvec)) return; dax = dax_mapping(mapping); - if (!dax) { - spin_lock(&mapping->host->i_lock); + if (!dax) xa_lock_irq(&mapping->i_pages); - } - for (i = j; i < folio_batch_count(fbatch); i++) { - struct folio *folio = fbatch->folios[i]; + for (i = j; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; pgoff_t index = indices[i]; - if (!xa_is_value(folio)) { - fbatch->folios[j++] = folio; + if (!xa_is_value(page)) { + pvec->pages[j++] = page; continue; } @@ -95,16 +90,12 @@ static void truncate_folio_batch_exceptionals(struct address_space *mapping, continue; } - __clear_shadow_entry(mapping, index, folio); + __clear_shadow_entry(mapping, index, page); } - if (!dax) { + if (!dax) xa_unlock_irq(&mapping->i_pages); - if (mapping_shrinkable(mapping)) - inode_add_lru(mapping->host); - spin_unlock(&mapping->host->i_lock); - } - fbatch->nr = j; + pvec->nr = j; } /* @@ -176,21 +167,21 @@ void do_invalidatepage(struct page *page, unsigned int offset, * its lock, b) when a concurrent invalidate_mapping_pages got there first and * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. */ -static void truncate_cleanup_folio(struct folio *folio) +static void truncate_cleanup_page(struct page *page) { - if (folio_mapped(folio)) - unmap_mapping_folio(folio); + if (page_mapped(page)) + unmap_mapping_page(page); - if (folio_has_private(folio)) - do_invalidatepage(&folio->page, 0, folio_size(folio)); + if (page_has_private(page)) + do_invalidatepage(page, 0, thp_size(page)); /* * Some filesystems seem to re-dirty the page even after * the VM has canceled the dirty bit (eg ext3 journaling). * Hence dirty accounting check is placed after invalidation. */ - folio_cancel_dirty(folio); - folio_clear_mappedtodisk(folio); + cancel_dirty_page(page); + ClearPageMappedToDisk(page); } /* @@ -204,6 +195,7 @@ static void truncate_cleanup_folio(struct folio *folio) static int invalidate_complete_page(struct address_space *mapping, struct page *page) { + int ret; if (page->mapping != mapping) return 0; @@ -211,77 +203,28 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) if (page_has_private(page) && !try_to_release_page(page, 0)) return 0; - return remove_mapping(mapping, page); + ret = remove_mapping(mapping, page); + + return ret; } -int truncate_inode_folio(struct address_space *mapping, struct folio *folio) +int truncate_inode_page(struct address_space *mapping, struct page *page) { - if (folio->mapping != mapping) + VM_BUG_ON_PAGE(PageTail(page), page); + + if (page->mapping != mapping) return -EIO; - truncate_cleanup_folio(folio); - filemap_remove_folio(folio); + truncate_cleanup_page(page); + delete_from_page_cache(page); return 0; } -/* - * Handle partial folios. The folio may be entirely within the - * range if a split has raced with us. If not, we zero the part of the - * folio that's within the [start, end] range, and then split the folio if - * it's large. split_page_range() will discard pages which now lie beyond - * i_size, and we rely on the caller to discard pages which lie within a - * newly created hole. - * - * Returns false if splitting failed so the caller can avoid - * discarding the entire folio which is stubbornly unsplit. - */ -bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) -{ - loff_t pos = folio_pos(folio); - unsigned int offset, length; - - if (pos < start) - offset = start - pos; - else - offset = 0; - length = folio_size(folio); - if (pos + length <= (u64)end) - length = length - offset; - else - length = end + 1 - pos - offset; - - folio_wait_writeback(folio); - if (length == folio_size(folio)) { - truncate_inode_folio(folio->mapping, folio); - return true; - } - - /* - * We may be zeroing pages we're about to discard, but it avoids - * doing a complex calculation here, and then doing the zeroing - * anyway if the page split fails. - */ - folio_zero_range(folio, offset, length); - - if (folio_has_private(folio)) - do_invalidatepage(&folio->page, offset, length); - if (!folio_test_large(folio)) - return true; - if (split_huge_page(&folio->page) == 0) - return true; - if (folio_test_dirty(folio)) - return false; - truncate_inode_folio(folio->mapping, folio); - return true; -} - /* * Used to get rid of pages on hardware memory corruption. */ int generic_error_remove_page(struct address_space *mapping, struct page *page) { - VM_BUG_ON_PAGE(PageTail(page), page); - if (!mapping) return -EINVAL; /* @@ -290,7 +233,7 @@ int generic_error_remove_page(struct address_space *mapping, struct page *page) */ if (!S_ISREG(mapping->host->i_mode)) return -EIO; - return truncate_inode_folio(mapping, page_folio(page)); + return truncate_inode_page(mapping, page); } EXPORT_SYMBOL(generic_error_remove_page); @@ -341,15 +284,19 @@ void truncate_inode_pages_range(struct address_space *mapping, { pgoff_t start; /* inclusive */ pgoff_t end; /* exclusive */ - struct folio_batch fbatch; + unsigned int partial_start; /* inclusive */ + unsigned int partial_end; /* exclusive */ + struct pagevec pvec; pgoff_t indices[PAGEVEC_SIZE]; pgoff_t index; int i; - struct folio *folio; - bool same_folio; if (mapping_empty(mapping)) - return; + goto out; + + /* Offsets within partial pages */ + partial_start = lstart & (PAGE_SIZE - 1); + partial_end = (lend + 1) & (PAGE_SIZE - 1); /* * 'start' and 'end' always covers the range of pages to be fully @@ -368,49 +315,64 @@ void truncate_inode_pages_range(struct address_space *mapping, else end = (lend + 1) >> PAGE_SHIFT; - folio_batch_init(&fbatch); + pagevec_init(&pvec); index = start; while (index < end && find_lock_entries(mapping, index, end - 1, - &fbatch, indices)) { - index = indices[folio_batch_count(&fbatch) - 1] + 1; - truncate_folio_batch_exceptionals(mapping, &fbatch, indices); - for (i = 0; i < folio_batch_count(&fbatch); i++) - truncate_cleanup_folio(fbatch.folios[i]); - delete_from_page_cache_batch(mapping, &fbatch); - for (i = 0; i < folio_batch_count(&fbatch); i++) - folio_unlock(fbatch.folios[i]); - folio_batch_release(&fbatch); + &pvec, indices)) { + index = indices[pagevec_count(&pvec) - 1] + 1; + truncate_exceptional_pvec_entries(mapping, &pvec, indices); + for (i = 0; i < pagevec_count(&pvec); i++) + truncate_cleanup_page(pvec.pages[i]); + delete_from_page_cache_batch(mapping, &pvec); + for (i = 0; i < pagevec_count(&pvec); i++) + unlock_page(pvec.pages[i]); + pagevec_release(&pvec); cond_resched(); } - same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); - folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0); - if (folio) { - same_folio = lend < folio_pos(folio) + folio_size(folio); - if (!truncate_inode_partial_folio(folio, lstart, lend)) { - start = folio->index + folio_nr_pages(folio); - if (same_folio) - end = folio->index; + if (partial_start) { + struct page *page = find_lock_page(mapping, start - 1); + if (page) { + unsigned int top = PAGE_SIZE; + if (start > end) { + /* Truncation within a single page */ + top = partial_end; + partial_end = 0; + } + wait_on_page_writeback(page); + zero_user_segment(page, partial_start, top); + cleancache_invalidate_page(mapping, page); + if (page_has_private(page)) + do_invalidatepage(page, partial_start, + top - partial_start); + unlock_page(page); + put_page(page); } - folio_unlock(folio); - folio_put(folio); - folio = NULL; } - - if (!same_folio) - folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT, - FGP_LOCK, 0); - if (folio) { - if (!truncate_inode_partial_folio(folio, lstart, lend)) - end = folio->index; - folio_unlock(folio); - folio_put(folio); + if (partial_end) { + struct page *page = find_lock_page(mapping, end); + if (page) { + wait_on_page_writeback(page); + zero_user_segment(page, 0, partial_end); + cleancache_invalidate_page(mapping, page); + if (page_has_private(page)) + do_invalidatepage(page, 0, + partial_end); + unlock_page(page); + put_page(page); + } } + /* + * If the truncation happened within a single page no pages + * will be released, just zeroed, so we can bail out now. + */ + if (start >= end) + goto out; index = start; - while (index < end) { + for ( ; ; ) { cond_resched(); - if (!find_get_entries(mapping, index, end - 1, &fbatch, + if (!find_get_entries(mapping, index, end - 1, &pvec, indices)) { /* If all gone from start onwards, we're done */ if (index == start) @@ -420,26 +382,28 @@ void truncate_inode_pages_range(struct address_space *mapping, continue; } - for (i = 0; i < folio_batch_count(&fbatch); i++) { - struct folio *folio = fbatch.folios[i]; + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; /* We rely upon deletion not changing page->index */ index = indices[i]; - if (xa_is_value(folio)) + if (xa_is_value(page)) continue; - folio_lock(folio); - VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); - folio_wait_writeback(folio); - truncate_inode_folio(mapping, folio); - folio_unlock(folio); - index = folio_index(folio) + folio_nr_pages(folio) - 1; + lock_page(page); + WARN_ON(page_to_index(page) != index); + wait_on_page_writeback(page); + truncate_inode_page(mapping, page); + unlock_page(page); } - truncate_folio_batch_exceptionals(mapping, &fbatch, indices); - folio_batch_release(&fbatch); + truncate_exceptional_pvec_entries(mapping, &pvec, indices); + pagevec_release(&pvec); index++; } + +out: + cleancache_invalidate_inode(mapping); } EXPORT_SYMBOL(truncate_inode_pages_range); @@ -493,6 +457,10 @@ void truncate_inode_pages_final(struct address_space *mapping) xa_unlock_irq(&mapping->i_pages); } + /* + * Cleancache needs notification even if there are no pages or shadow + * entries. + */ truncate_inode_pages(mapping, 0); } EXPORT_SYMBOL(truncate_inode_pages_final); @@ -501,16 +469,16 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) { pgoff_t indices[PAGEVEC_SIZE]; - struct folio_batch fbatch; + struct pagevec pvec; pgoff_t index = start; unsigned long ret; unsigned long count = 0; int i; - folio_batch_init(&fbatch); - while (find_lock_entries(mapping, index, end, &fbatch, indices)) { - for (i = 0; i < folio_batch_count(&fbatch); i++) { - struct page *page = &fbatch.folios[i]->page; + pagevec_init(&pvec); + while (find_lock_entries(mapping, index, end, &pvec, indices)) { + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; /* We rely upon deletion not changing page->index */ index = indices[i]; @@ -537,8 +505,8 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping, } count += ret; } - folio_batch_remove_exceptionals(&fbatch); - folio_batch_release(&fbatch); + pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); cond_resched(); index++; } @@ -590,43 +558,40 @@ void invalidate_mapping_pagevec(struct address_space *mapping, * shrink_page_list() has a temp ref on them, or because they're transiently * sitting in the lru_cache_add() pagevecs. */ -static int invalidate_complete_folio2(struct address_space *mapping, - struct folio *folio) +static int +invalidate_complete_page2(struct address_space *mapping, struct page *page) { - if (folio->mapping != mapping) + if (page->mapping != mapping) return 0; - if (folio_has_private(folio) && - !filemap_release_folio(folio, GFP_KERNEL)) + if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) return 0; - spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); - if (folio_test_dirty(folio)) + if (PageDirty(page)) goto failed; - BUG_ON(folio_has_private(folio)); - __filemap_remove_folio(folio, NULL); + BUG_ON(page_has_private(page)); + __delete_from_page_cache(page, NULL); xa_unlock_irq(&mapping->i_pages); - if (mapping_shrinkable(mapping)) - inode_add_lru(mapping->host); - spin_unlock(&mapping->host->i_lock); - filemap_free_folio(mapping, folio); + if (mapping->a_ops->freepage) + mapping->a_ops->freepage(page); + + put_page(page); /* pagecache ref */ return 1; failed: xa_unlock_irq(&mapping->i_pages); - spin_unlock(&mapping->host->i_lock); return 0; } -static int do_launder_folio(struct address_space *mapping, struct folio *folio) +static int do_launder_page(struct address_space *mapping, struct page *page) { - if (!folio_test_dirty(folio)) + if (!PageDirty(page)) return 0; - if (folio->mapping != mapping || mapping->a_ops->launder_page == NULL) + if (page->mapping != mapping || mapping->a_ops->launder_page == NULL) return 0; - return mapping->a_ops->launder_page(&folio->page); + return mapping->a_ops->launder_page(page); } /** @@ -644,7 +609,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end) { pgoff_t indices[PAGEVEC_SIZE]; - struct folio_batch fbatch; + struct pagevec pvec; pgoff_t index; int i; int ret = 0; @@ -652,27 +617,27 @@ int invalidate_inode_pages2_range(struct address_space *mapping, int did_range_unmap = 0; if (mapping_empty(mapping)) - return 0; + goto out; - folio_batch_init(&fbatch); + pagevec_init(&pvec); index = start; - while (find_get_entries(mapping, index, end, &fbatch, indices)) { - for (i = 0; i < folio_batch_count(&fbatch); i++) { - struct folio *folio = fbatch.folios[i]; + while (find_get_entries(mapping, index, end, &pvec, indices)) { + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; - /* We rely upon deletion not changing folio->index */ + /* We rely upon deletion not changing page->index */ index = indices[i]; - if (xa_is_value(folio)) { + if (xa_is_value(page)) { if (!invalidate_exceptional_entry2(mapping, - index, folio)) + index, page)) ret = -EBUSY; continue; } - if (!did_range_unmap && folio_mapped(folio)) { + if (!did_range_unmap && page_mapped(page)) { /* - * If folio is mapped, before taking its lock, + * If page is mapped, before taking its lock, * zap the rest of the file in one hit. */ unmap_mapping_pages(mapping, index, @@ -680,29 +645,29 @@ int invalidate_inode_pages2_range(struct address_space *mapping, did_range_unmap = 1; } - folio_lock(folio); - VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); - if (folio->mapping != mapping) { - folio_unlock(folio); + lock_page(page); + WARN_ON(page_to_index(page) != index); + if (page->mapping != mapping) { + unlock_page(page); continue; } - folio_wait_writeback(folio); + wait_on_page_writeback(page); - if (folio_mapped(folio)) - unmap_mapping_folio(folio); - BUG_ON(folio_mapped(folio)); + if (page_mapped(page)) + unmap_mapping_page(page); + BUG_ON(page_mapped(page)); - ret2 = do_launder_folio(mapping, folio); + ret2 = do_launder_page(mapping, page); if (ret2 == 0) { - if (!invalidate_complete_folio2(mapping, folio)) + if (!invalidate_complete_page2(mapping, page)) ret2 = -EBUSY; } if (ret2 < 0) ret = ret2; - folio_unlock(folio); + unlock_page(page); } - folio_batch_remove_exceptionals(&fbatch); - folio_batch_release(&fbatch); + pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); cond_resched(); index++; } @@ -716,6 +681,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping, if (dax_mapping(mapping)) { unmap_mapping_pages(mapping, start, end - start + 1, false); } +out: + cleancache_invalidate_inode(mapping); return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); diff --git a/mm/usercopy.c b/mm/usercopy.c index d0d268135d..b3de3c4eef 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -20,7 +20,6 @@ #include #include #include -#include "slab.h" /* * Checks if a given pointer and length is contained by the current @@ -224,7 +223,7 @@ static inline void check_page_span(const void *ptr, unsigned long n, static inline void check_heap_object(const void *ptr, unsigned long n, bool to_user) { - struct folio *folio; + struct page *page; if (!virt_addr_valid(ptr)) return; @@ -232,16 +231,16 @@ static inline void check_heap_object(const void *ptr, unsigned long n, /* * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the * highmem page or fallback to virt_to_page(). The following - * is effectively a highmem-aware virt_to_slab(). + * is effectively a highmem-aware virt_to_head_page(). */ - folio = page_folio(kmap_to_page((void *)ptr)); + page = compound_head(kmap_to_page((void *)ptr)); - if (folio_test_slab(folio)) { + if (PageSlab(page)) { /* Check slab allocator for flags and size. */ - __check_heap_object(ptr, n, folio_slab(folio), to_user); + __check_heap_object(ptr, n, page, to_user); } else { /* Verify object does not incorrectly span multiple pages. */ - check_page_span(ptr, n, folio_page(folio, 0), to_user); + check_page_span(ptr, n, page, to_user); } } diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 0780c2a57f..7a90084155 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -69,9 +69,10 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, pgoff_t offset, max_off; _dst_pte = mk_pte(page, dst_vma->vm_page_prot); - _dst_pte = pte_mkdirty(_dst_pte); if (page_in_cache && !vm_shared) writable = false; + if (writable || !page_in_cache) + _dst_pte = pte_mkdirty(_dst_pte); if (writable) { if (wp_copy) _dst_pte = pte_mkuffd_wp(_dst_pte); @@ -163,7 +164,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, __SetPageUptodate(page); ret = -ENOMEM; - if (mem_cgroup_charge(page_folio(page), dst_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL)) goto out_release; ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, @@ -232,11 +233,6 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm, goto out; } - if (PageHWPoison(page)) { - ret = -EIO; - goto out_release; - } - ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, page, false, wp_copy); if (ret) diff --git a/mm/util.c b/mm/util.c index 7e43369064..ea09dd33ab 100644 --- a/mm/util.c +++ b/mm/util.c @@ -549,10 +549,13 @@ EXPORT_SYMBOL(vm_mmap); * Uses kmalloc to get the memory but if the allocation fails then falls back * to the vmalloc allocator. Use kvfree for freeing the memory. * - * GFP_NOWAIT and GFP_ATOMIC are not supported, neither is the __GFP_NORETRY modifier. + * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is * preferable to the vmalloc fallback, due to visible performance drawbacks. * + * Please note that any use of gfp flags outside of GFP_KERNEL is careful to not + * fall back to vmalloc. + * * Return: pointer to the allocated memory of %NULL in case of failure */ void *kvmalloc_node(size_t size, gfp_t flags, int node) @@ -560,6 +563,13 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) gfp_t kmalloc_flags = flags; void *ret; + /* + * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables) + * so the given set of flags has to be compatible. + */ + if ((flags & GFP_KERNEL) != GFP_KERNEL) + return kmalloc_node(size, flags, node); + /* * We want to attempt a large physically contiguous block first because * it is less likely to fragment multiple larger blocks and therefore @@ -572,9 +582,6 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL)) kmalloc_flags |= __GFP_NORETRY; - - /* nofail semantic is implemented by the vmalloc fallback */ - kmalloc_flags &= ~__GFP_NOFAIL; } ret = kmalloc_node(size, kmalloc_flags, node); @@ -587,8 +594,10 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) return ret; /* Don't even allow crazy sizes */ - if (WARN_ON_ONCE(size > INT_MAX)) + if (unlikely(size > INT_MAX)) { + WARN_ON_ONCE(!(flags & __GFP_NOWARN)); return NULL; + } return __vmalloc_node(size, 1, flags, node, __builtin_return_address(0)); @@ -647,78 +656,81 @@ void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) } EXPORT_SYMBOL(kvrealloc); +static inline void *__page_rmapping(struct page *page) +{ + unsigned long mapping; + + mapping = (unsigned long)page->mapping; + mapping &= ~PAGE_MAPPING_FLAGS; + + return (void *)mapping; +} + /* Neutral page->mapping pointer to address_space or anon_vma or other */ void *page_rmapping(struct page *page) { - return folio_raw_mapping(page_folio(page)); + page = compound_head(page); + return __page_rmapping(page); } -/** - * folio_mapped - Is this folio mapped into userspace? - * @folio: The folio. - * - * Return: True if any page in this folio is referenced by user page tables. +/* + * Return true if this page is mapped into pagetables. + * For compound page it returns true if any subpage of compound page is mapped. */ -bool folio_mapped(struct folio *folio) +bool page_mapped(struct page *page) { - long i, nr; + int i; - if (!folio_test_large(folio)) - return atomic_read(&folio->_mapcount) >= 0; - if (atomic_read(folio_mapcount_ptr(folio)) >= 0) + if (likely(!PageCompound(page))) + return atomic_read(&page->_mapcount) >= 0; + page = compound_head(page); + if (atomic_read(compound_mapcount_ptr(page)) >= 0) return true; - if (folio_test_hugetlb(folio)) + if (PageHuge(page)) return false; - - nr = folio_nr_pages(folio); - for (i = 0; i < nr; i++) { - if (atomic_read(&folio_page(folio, i)->_mapcount) >= 0) + for (i = 0; i < compound_nr(page); i++) { + if (atomic_read(&page[i]._mapcount) >= 0) return true; } return false; } -EXPORT_SYMBOL(folio_mapped); +EXPORT_SYMBOL(page_mapped); struct anon_vma *page_anon_vma(struct page *page) { - struct folio *folio = page_folio(page); - unsigned long mapping = (unsigned long)folio->mapping; + unsigned long mapping; + page = compound_head(page); + mapping = (unsigned long)page->mapping; if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) return NULL; - return (void *)(mapping - PAGE_MAPPING_ANON); + return __page_rmapping(page); } -/** - * folio_mapping - Find the mapping where this folio is stored. - * @folio: The folio. - * - * For folios which are in the page cache, return the mapping that this - * page belongs to. Folios in the swap cache return the swap mapping - * this page is stored in (which is different from the mapping for the - * swap file or swap device where the data is stored). - * - * You can call this for folios which aren't in the swap cache or page - * cache and it will return NULL. - */ -struct address_space *folio_mapping(struct folio *folio) +struct address_space *page_mapping(struct page *page) { struct address_space *mapping; + page = compound_head(page); + /* This happens if someone calls flush_dcache_page on slab page */ - if (unlikely(folio_test_slab(folio))) + if (unlikely(PageSlab(page))) return NULL; - if (unlikely(folio_test_swapcache(folio))) - return swap_address_space(folio_swap_entry(folio)); + if (unlikely(PageSwapCache(page))) { + swp_entry_t entry; - mapping = folio->mapping; + entry.val = page_private(page); + return swap_address_space(entry); + } + + mapping = page->mapping; if ((unsigned long)mapping & PAGE_MAPPING_ANON) return NULL; return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS); } -EXPORT_SYMBOL(folio_mapping); +EXPORT_SYMBOL(page_mapping); /* Slow path of page_mapcount() for compound pages */ int __page_mapcount(struct page *page) @@ -740,26 +752,13 @@ int __page_mapcount(struct page *page) } EXPORT_SYMBOL_GPL(__page_mapcount); -/** - * folio_copy - Copy the contents of one folio to another. - * @dst: Folio to copy to. - * @src: Folio to copy from. - * - * The bytes in the folio represented by @src are copied to @dst. - * Assumes the caller has validated that @dst is at least as large as @src. - * Can be called in atomic context for order-0 folios, but if the folio is - * larger, it may sleep. - */ -void folio_copy(struct folio *dst, struct folio *src) +void copy_huge_page(struct page *dst, struct page *src) { - long i = 0; - long nr = folio_nr_pages(src); + unsigned i, nr = compound_nr(src); - for (;;) { - copy_highpage(folio_page(dst, i), folio_page(src, i)); - if (++i == nr) - break; + for (i = 0; i < nr; i++) { cond_resched(); + copy_highpage(nth_page(dst, i), nth_page(src, i)); } } @@ -1082,14 +1081,3 @@ void page_offline_end(void) up_write(&page_offline_rwsem); } EXPORT_SYMBOL(page_offline_end); - -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO -void flush_dcache_folio(struct folio *folio) -{ - long i, nr = folio_nr_pages(folio); - - for (i = 0; i < nr; i++) - flush_dcache_page(folio_page(folio, i)); -} -EXPORT_SYMBOL(flush_dcache_folio); -#endif diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4165304d35..8375eecc55 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -39,7 +38,6 @@ #include #include #include -#include #include #include @@ -1197,14 +1195,18 @@ find_vmap_lowest_match(unsigned long size, { struct vmap_area *va; struct rb_node *node; + unsigned long length; /* Start from the root. */ node = free_vmap_area_root.rb_node; + /* Adjust the search size for alignment overhead. */ + length = size + align - 1; + while (node) { va = rb_entry(node, struct vmap_area, rb_node); - if (get_subtree_max_size(node->rb_left) >= size && + if (get_subtree_max_size(node->rb_left) >= length && vstart < va->va_start) { node = node->rb_left; } else { @@ -1214,9 +1216,9 @@ find_vmap_lowest_match(unsigned long size, /* * Does not make sense to go deeper towards the right * sub-tree if it does not have a free block that is - * equal or bigger to the requested search size. + * equal or bigger to the requested search length. */ - if (get_subtree_max_size(node->rb_right) >= size) { + if (get_subtree_max_size(node->rb_right) >= length) { node = node->rb_right; continue; } @@ -1224,23 +1226,15 @@ find_vmap_lowest_match(unsigned long size, /* * OK. We roll back and find the first right sub-tree, * that will satisfy the search criteria. It can happen - * due to "vstart" restriction or an alignment overhead - * that is bigger then PAGE_SIZE. + * only once due to "vstart" restriction. */ while ((node = rb_parent(node))) { va = rb_entry(node, struct vmap_area, rb_node); if (is_within_this_va(va, size, align, vstart)) return va; - if (get_subtree_max_size(node->rb_right) >= size && + if (get_subtree_max_size(node->rb_right) >= length && vstart <= va->va_start) { - /* - * Shift the vstart forward. Please note, we update it with - * parent's start address adding "1" because we do not want - * to enter same sub-tree after it has already been checked - * and no suitable free block found there. - */ - vstart = va->va_start + 1; node = node->rb_right; break; } @@ -1271,7 +1265,7 @@ find_vmap_lowest_linear_match(unsigned long size, } static void -find_vmap_lowest_match_check(unsigned long size, unsigned long align) +find_vmap_lowest_match_check(unsigned long size) { struct vmap_area *va_1, *va_2; unsigned long vstart; @@ -1280,8 +1274,8 @@ find_vmap_lowest_match_check(unsigned long size, unsigned long align) get_random_bytes(&rnd, sizeof(rnd)); vstart = VMALLOC_START + rnd; - va_1 = find_vmap_lowest_match(size, align, vstart); - va_2 = find_vmap_lowest_linear_match(size, align, vstart); + va_1 = find_vmap_lowest_match(size, 1, vstart); + va_2 = find_vmap_lowest_linear_match(size, 1, vstart); if (va_1 != va_2) pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n", @@ -1460,7 +1454,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align, return vend; #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK - find_vmap_lowest_match_check(size, align); + find_vmap_lowest_match_check(size); #endif return nva_start_addr; @@ -2278,22 +2272,15 @@ void __init vm_area_add_early(struct vm_struct *vm) */ void __init vm_area_register_early(struct vm_struct *vm, size_t align) { - unsigned long addr = ALIGN(VMALLOC_START, align); - struct vm_struct *cur, **p; + static size_t vm_init_off __initdata; + unsigned long addr; - BUG_ON(vmap_initialized); + addr = ALIGN(VMALLOC_START + vm_init_off, align); + vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; - for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) { - if ((unsigned long)cur->addr - addr >= vm->size) - break; - addr = ALIGN((unsigned long)cur->addr + cur->size, align); - } - - BUG_ON(addr > VMALLOC_END - vm->size); vm->addr = (void *)addr; - vm->next = *p; - *p = vm; - kasan_populate_early_vm_area_shadow(vm->addr, vm->size); + + vm_area_add_early(vm); } static void vmap_init_free_space(void) @@ -2625,13 +2612,12 @@ static void __vunmap(const void *addr, int deallocate_pages) if (deallocate_pages) { unsigned int page_order = vm_area_page_order(area); - int i, step = 1U << page_order; + int i; - for (i = 0; i < area->nr_pages; i += step) { + for (i = 0; i < area->nr_pages; i += 1U << page_order) { struct page *page = area->pages[i]; BUG_ON(!page); - mod_memcg_page_state(page, MEMCG_VMALLOC, -step); __free_pages(page, page_order); cond_resched(); } @@ -2757,13 +2743,6 @@ void *vmap(struct page **pages, unsigned int count, might_sleep(); - /* - * Your top guard is someone else's bottom guard. Not having a top - * guard compromises someone else's mappings too. - */ - if (WARN_ON_ONCE(flags & VM_NO_GUARD)) - flags &= ~VM_NO_GUARD; - if (count > totalram_pages()) return NULL; @@ -2846,9 +2825,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, * to fails, fallback to a single page allocator that is * more permissive. */ - if (!order) { - gfp_t bulk_gfp = gfp & ~__GFP_NOFAIL; - + if (!order && nid != NUMA_NO_NODE) { while (nr_allocated < nr_pages) { unsigned int nr, nr_pages_request; @@ -2860,20 +2837,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, */ nr_pages_request = min(100U, nr_pages - nr_allocated); - /* memory allocation should consider mempolicy, we can't - * wrongly use nearest node when nid == NUMA_NO_NODE, - * otherwise memory may be allocated in only one node, - * but mempolcy want to alloc memory by interleaving. - */ - if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE) - nr = alloc_pages_bulk_array_mempolicy(bulk_gfp, - nr_pages_request, - pages + nr_allocated); - - else - nr = alloc_pages_bulk_array_node(bulk_gfp, nid, - nr_pages_request, - pages + nr_allocated); + nr = alloc_pages_bulk_array_node(gfp, nid, + nr_pages_request, pages + nr_allocated); nr_allocated += nr; cond_resched(); @@ -2885,7 +2850,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, if (nr != nr_pages_request) break; } - } else + } else if (order) /* * Compound pages required for remap_vmalloc_page if * high-order pages. @@ -2895,9 +2860,6 @@ vm_area_alloc_pages(gfp_t gfp, int nid, /* High-order pages or fallback path if "bulk" fails. */ while (nr_allocated < nr_pages) { - if (fatal_signal_pending(current)) - break; - if (nid == NUMA_NO_NODE) page = alloc_pages(gfp, order); else @@ -2925,15 +2887,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, int node) { const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; - const gfp_t orig_gfp_mask = gfp_mask; - bool nofail = gfp_mask & __GFP_NOFAIL; unsigned long addr = (unsigned long)area->addr; unsigned long size = get_vm_area_size(area); unsigned long array_size; unsigned int nr_small_pages = size >> PAGE_SHIFT; unsigned int page_order; - unsigned int flags; - int ret; array_size = (unsigned long)nr_small_pages * sizeof(struct page *); gfp_mask |= __GFP_NOWARN; @@ -2949,7 +2907,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, } if (!area->pages) { - warn_alloc(orig_gfp_mask, NULL, + warn_alloc(gfp_mask, NULL, "vmalloc error: size %lu, failed to allocated page array size %lu", nr_small_pages * PAGE_SIZE, array_size); free_vm_area(area); @@ -2963,48 +2921,21 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, page_order, nr_small_pages, area->pages); atomic_long_add(area->nr_pages, &nr_vmalloc_pages); - if (gfp_mask & __GFP_ACCOUNT) { - int i, step = 1U << page_order; - - for (i = 0; i < area->nr_pages; i += step) - mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, - step); - } /* * If not enough pages were obtained to accomplish an * allocation request, free them via __vfree() if any. */ if (area->nr_pages != nr_small_pages) { - warn_alloc(orig_gfp_mask, NULL, + warn_alloc(gfp_mask, NULL, "vmalloc error: size %lu, page order %u, failed to allocate pages", area->nr_pages * PAGE_SIZE, page_order); goto fail; } - /* - * page tables allocations ignore external gfp mask, enforce it - * by the scope API - */ - if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO) - flags = memalloc_nofs_save(); - else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0) - flags = memalloc_noio_save(); - - do { - ret = vmap_pages_range(addr, addr + size, prot, area->pages, - page_shift); - if (nofail && (ret < 0)) - schedule_timeout_uninterruptible(1); - } while (nofail && (ret < 0)); - - if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO) - memalloc_nofs_restore(flags); - else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0) - memalloc_noio_restore(flags); - - if (ret < 0) { - warn_alloc(orig_gfp_mask, NULL, + if (vmap_pages_range(addr, addr + size, prot, area->pages, + page_shift) < 0) { + warn_alloc(gfp_mask, NULL, "vmalloc error: size %lu, failed to map pages", area->nr_pages * PAGE_SIZE); goto fail; @@ -3030,18 +2961,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, * @caller: caller's return address * * Allocate enough pages to cover @size from the page level - * allocator with @gfp_mask flags. Please note that the full set of gfp - * flags are not supported. GFP_KERNEL, GFP_NOFS and GFP_NOIO are all - * supported. - * Zone modifiers are not supported. From the reclaim modifiers - * __GFP_DIRECT_RECLAIM is required (aka GFP_NOWAIT is not supported) - * and only __GFP_NOFAIL is supported (i.e. __GFP_NORETRY and - * __GFP_RETRY_MAYFAIL are not supported). - * - * __GFP_NOWARN can be used to suppress failures messages. - * - * Map them into contiguous kernel virtual space, using a pagetable - * protection of @prot. + * allocator with @gfp_mask flags. Map them into contiguous + * kernel virtual space, using a pagetable protection of @prot. * * Return: the address of the area or %NULL on failure */ @@ -3093,14 +3014,9 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, VM_UNINITIALIZED | vm_flags, start, end, node, gfp_mask, caller); if (!area) { - bool nofail = gfp_mask & __GFP_NOFAIL; warn_alloc(gfp_mask, NULL, - "vmalloc error: size %lu, vm_struct allocation failed%s", - real_size, (nofail) ? ". Retrying." : ""); - if (nofail) { - schedule_timeout_uninterruptible(1); - goto again; - } + "vmalloc error: size %lu, vm_struct allocation failed", + real_size); goto fail; } @@ -3941,7 +3857,6 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) { if (IS_ENABLED(CONFIG_NUMA)) { unsigned int nr, *counters = m->private; - unsigned int step = 1U << vm_area_page_order(v); if (!counters) return; @@ -3953,8 +3868,9 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) memset(counters, 0, nr_node_ids * sizeof(unsigned int)); - for (nr = 0; nr < v->nr_pages; nr += step) - counters[page_to_nid(v->pages[nr])] += step; + for (nr = 0; nr < v->nr_pages; nr++) + counters[page_to_nid(v->pages[nr])]++; + for_each_node_state(nr, N_HIGH_MEMORY) if (counters[nr]) seq_printf(m, " N%u=%u", nr, counters[nr]); @@ -3990,7 +3906,7 @@ static int s_show(struct seq_file *m, void *p) (void *)va->va_start, (void *)va->va_end, va->va_end - va->va_start); - goto final; + return 0; } v = va->vm; @@ -4031,7 +3947,6 @@ static int s_show(struct seq_file *m, void *p) /* * As a final step, dump "unpurged" areas. */ -final: if (list_is_last(&va->list, &vmap_area_list)) show_purge_info(m); diff --git a/mm/vmpressure.c b/mm/vmpressure.c index b52644771c..76518e4166 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -308,7 +308,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, * asserted for a second in which subsequent * pressure events can occur. */ - WRITE_ONCE(memcg->socket_pressure, jiffies + HZ); + memcg->socket_pressure = jiffies + HZ; } } } diff --git a/mm/vmscan.c b/mm/vmscan.c index 59b14e0d69..74296c2d1f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -687,21 +687,6 @@ void unregister_shrinker(struct shrinker *shrinker) } EXPORT_SYMBOL(unregister_shrinker); -/** - * synchronize_shrinkers - Wait for all running shrinkers to complete. - * - * This is equivalent to calling unregister_shrink() and register_shrinker(), - * but atomically and with less overhead. This is useful to guarantee that all - * shrinker invocations have seen an update, before freeing memory, similar to - * rcu. - */ -void synchronize_shrinkers(void) -{ - down_write(&shrinker_rwsem); - up_write(&shrinker_rwsem); -} -EXPORT_SYMBOL(synchronize_shrinkers); - #define SHRINK_BATCH 128 static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, @@ -951,7 +936,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, return freed; } -static void drop_slab_node(int nid) +void drop_slab_node(int nid) { unsigned long freed; int shift = 0; @@ -1021,134 +1006,6 @@ static void handle_write_error(struct address_space *mapping, unlock_page(page); } -static bool skip_throttle_noprogress(pg_data_t *pgdat) -{ - int reclaimable = 0, write_pending = 0; - int i; - - /* - * If kswapd is disabled, reschedule if necessary but do not - * throttle as the system is likely near OOM. - */ - if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) - return true; - - /* - * If there are a lot of dirty/writeback pages then do not - * throttle as throttling will occur when the pages cycle - * towards the end of the LRU if still under writeback. - */ - for (i = 0; i < MAX_NR_ZONES; i++) { - struct zone *zone = pgdat->node_zones + i; - - if (!populated_zone(zone)) - continue; - - reclaimable += zone_reclaimable_pages(zone); - write_pending += zone_page_state_snapshot(zone, - NR_ZONE_WRITE_PENDING); - } - if (2 * write_pending <= reclaimable) - return true; - - return false; -} - -void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) -{ - wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason]; - long timeout, ret; - DEFINE_WAIT(wait); - - /* - * Do not throttle IO workers, kthreads other than kswapd or - * workqueues. They may be required for reclaim to make - * forward progress (e.g. journalling workqueues or kthreads). - */ - if (!current_is_kswapd() && - current->flags & (PF_IO_WORKER|PF_KTHREAD)) { - cond_resched(); - return; - } - - /* - * These figures are pulled out of thin air. - * VMSCAN_THROTTLE_ISOLATED is a transient condition based on too many - * parallel reclaimers which is a short-lived event so the timeout is - * short. Failing to make progress or waiting on writeback are - * potentially long-lived events so use a longer timeout. This is shaky - * logic as a failure to make progress could be due to anything from - * writeback to a slow device to excessive references pages at the tail - * of the inactive LRU. - */ - switch(reason) { - case VMSCAN_THROTTLE_WRITEBACK: - timeout = HZ/10; - - if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) { - WRITE_ONCE(pgdat->nr_reclaim_start, - node_page_state(pgdat, NR_THROTTLED_WRITTEN)); - } - - break; - case VMSCAN_THROTTLE_CONGESTED: - fallthrough; - case VMSCAN_THROTTLE_NOPROGRESS: - if (skip_throttle_noprogress(pgdat)) { - cond_resched(); - return; - } - - timeout = 1; - - break; - case VMSCAN_THROTTLE_ISOLATED: - timeout = HZ/50; - break; - default: - WARN_ON_ONCE(1); - timeout = HZ; - break; - } - - prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); - ret = schedule_timeout(timeout); - finish_wait(wqh, &wait); - - if (reason == VMSCAN_THROTTLE_WRITEBACK) - atomic_dec(&pgdat->nr_writeback_throttled); - - trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout), - jiffies_to_usecs(timeout - ret), - reason); -} - -/* - * Account for pages written if tasks are throttled waiting on dirty - * pages to clean. If enough pages have been cleaned since throttling - * started then wakeup the throttled tasks. - */ -void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, - int nr_throttled) -{ - unsigned long nr_written; - - node_stat_add_folio(folio, NR_THROTTLED_WRITTEN); - - /* - * This is an inaccurate read as the per-cpu deltas may not - * be synchronised. However, given that the system is - * writeback throttled, it is not worth taking the penalty - * of getting an accurate count. At worst, the throttle - * timeout guarantees forward progress. - */ - nr_written = node_page_state(pgdat, NR_THROTTLED_WRITTEN) - - READ_ONCE(pgdat->nr_reclaim_start); - - if (nr_written > SWAP_CLUSTER_MAX * nr_throttled) - wake_up(&pgdat->reclaim_wait[VMSCAN_THROTTLE_WRITEBACK]); -} - /* possible outcome of pageout() */ typedef enum { /* failed to write page out, page is locked */ @@ -1248,8 +1105,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, BUG_ON(!PageLocked(page)); BUG_ON(mapping != page_mapping(page)); - if (!PageSwapCache(page)) - spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); /* * The non racy check for a busy page. @@ -1318,9 +1173,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, shadow = workingset_eviction(page, target_memcg); __delete_from_page_cache(page, shadow); xa_unlock_irq(&mapping->i_pages); - if (mapping_shrinkable(mapping)) - inode_add_lru(mapping->host); - spin_unlock(&mapping->host->i_lock); if (freepage != NULL) freepage(page); @@ -1330,8 +1182,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, cannot_free: xa_unlock_irq(&mapping->i_pages); - if (!PageSwapCache(page)) - spin_unlock(&mapping->host->i_lock); return 0; } @@ -1487,6 +1337,7 @@ static unsigned int demote_page_list(struct list_head *demote_pages, { int target_nid = next_demotion_node(pgdat->node_id); unsigned int nr_succeeded; + int err; if (list_empty(demote_pages)) return 0; @@ -1495,7 +1346,7 @@ static unsigned int demote_page_list(struct list_head *demote_pages, return 0; /* Demotion ignores all cpuset and mempolicy settings */ - migrate_pages(demote_pages, alloc_demote_page, NULL, + err = migrate_pages(demote_pages, alloc_demote_page, NULL, target_nid, MIGRATE_ASYNC, MR_DEMOTION, &nr_succeeded); @@ -1561,8 +1412,9 @@ static unsigned int shrink_page_list(struct list_head *page_list, /* * The number of dirty pages determines if a node is marked - * reclaim_congested. kswapd will stall and start writing - * pages if the tail of the LRU is all dirty unqueued pages. + * reclaim_congested which affects wait_iff_congested. kswapd + * will stall and start writing pages if the tail of the LRU + * is all dirty unqueued pages. */ page_check_dirty_writeback(page, &dirty, &writeback); if (dirty || writeback) @@ -2238,7 +2090,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, */ int isolate_lru_page(struct page *page) { - struct folio *folio = page_folio(page); int ret = -EBUSY; VM_BUG_ON_PAGE(!page_count(page), page); @@ -2248,7 +2099,7 @@ int isolate_lru_page(struct page *page) struct lruvec *lruvec; get_page(page); - lruvec = folio_lruvec_lock_irq(folio); + lruvec = lock_page_lruvec_irq(page); del_page_from_lru_list(page, lruvec); unlock_page_lruvec_irq(lruvec); ret = 0; @@ -2268,7 +2119,6 @@ static int too_many_isolated(struct pglist_data *pgdat, int file, struct scan_control *sc) { unsigned long inactive, isolated; - bool too_many; if (current_is_kswapd()) return 0; @@ -2292,13 +2142,7 @@ static int too_many_isolated(struct pglist_data *pgdat, int file, if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) inactive >>= 3; - too_many = isolated > inactive; - - /* Wake up tasks throttled due to too_many_isolated. */ - if (!too_many) - wake_throttle_isolated(pgdat); - - return too_many; + return isolated > inactive; } /* @@ -2355,7 +2199,7 @@ static unsigned int move_pages_to_lru(struct lruvec *lruvec, * All pages were isolated from the same lruvec (and isolation * inhibits memcg migration). */ - VM_BUG_ON_PAGE(!folio_matches_lruvec(page_folio(page), lruvec), page); + VM_BUG_ON_PAGE(!page_matches_lruvec(page, lruvec), page); add_page_to_lru_list(page, lruvec); nr_pages = thp_nr_pages(page); nr_moved += nr_pages; @@ -2407,8 +2251,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, return 0; /* wait a bit for the reclaimer. */ + msleep(100); stalled = true; - reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED); /* We are about to die and free our memory. Return now. */ if (fatal_signal_pending(current)) @@ -3336,19 +3180,19 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) * If kswapd scans pages marked for immediate * reclaim and under writeback (nr_immediate), it * implies that pages are cycling through the LRU - * faster than they are written so forcibly stall - * until some pages complete writeback. + * faster than they are written so also forcibly stall. */ if (sc->nr.immediate) - reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); + congestion_wait(BLK_RW_ASYNC, HZ/10); } /* - * Tag a node/memcg as congested if all the dirty pages were marked - * for writeback and immediate reclaim (counted in nr.congested). + * Tag a node/memcg as congested if all the dirty pages + * scanned were backed by a congested BDI and + * wait_iff_congested will stall. * * Legacy memcg will stall in page writeback so avoid forcibly - * stalling in reclaim_throttle(). + * stalling in wait_iff_congested(). */ if ((current_is_kswapd() || (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) && @@ -3356,15 +3200,15 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) set_bit(LRUVEC_CONGESTED, &target_lruvec->flags); /* - * Stall direct reclaim for IO completions if the lruvec is - * node is congested. Allow kswapd to continue until it + * Stall direct reclaim for IO completions if underlying BDIs + * and node is congested. Allow kswapd to continue until it * starts encountering unqueued dirty pages or cycling through * the LRU too quickly. */ if (!current_is_kswapd() && current_may_throttle() && !sc->hibernation_mode && test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) - reclaim_throttle(pgdat, VMSCAN_THROTTLE_CONGESTED); + wait_iff_congested(BLK_RW_ASYNC, HZ/10); if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc)) @@ -3412,36 +3256,6 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx); } -static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc) -{ - /* - * If reclaim is making progress greater than 12% efficiency then - * wake all the NOPROGRESS throttled tasks. - */ - if (sc->nr_reclaimed > (sc->nr_scanned >> 3)) { - wait_queue_head_t *wqh; - - wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_NOPROGRESS]; - if (waitqueue_active(wqh)) - wake_up(wqh); - - return; - } - - /* - * Do not throttle kswapd or cgroup reclaim on NOPROGRESS as it will - * throttle on VMSCAN_THROTTLE_WRITEBACK if there are too many pages - * under writeback and marked for immediate reclaim at the tail of the - * LRU. - */ - if (current_is_kswapd() || cgroup_reclaim(sc)) - return; - - /* Throttle if making no progress at high prioities. */ - if (sc->priority == 1 && !sc->nr_reclaimed) - reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS); -} - /* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation @@ -3458,7 +3272,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) unsigned long nr_soft_scanned; gfp_t orig_mask; pg_data_t *last_pgdat = NULL; - pg_data_t *first_pgdat = NULL; /* * If the number of buffer_heads in the machine exceeds the maximum @@ -3522,9 +3335,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) /* need some check for avoid more shrink_zone() */ } - if (!first_pgdat) - first_pgdat = zone->zone_pgdat; - /* See comment about same check for global reclaim above */ if (zone->zone_pgdat == last_pgdat) continue; @@ -3532,9 +3342,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) shrink_node(zone->zone_pgdat, sc); } - if (first_pgdat) - consider_reclaim_throttle(first_pgdat, sc); - /* * Restore to original mask to avoid the impact on the caller if we * promoted it to __GFP_HIGHMEM. @@ -4479,7 +4286,6 @@ static int kswapd(void *p) WRITE_ONCE(pgdat->kswapd_order, 0); WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); - atomic_set(&pgdat->nr_writeback_throttled, 0); for ( ; ; ) { bool ret; @@ -4859,7 +4665,6 @@ void check_move_unevictable_pages(struct pagevec *pvec) for (i = 0; i < pvec->nr; i++) { struct page *page = pvec->pages[i]; - struct folio *folio = page_folio(page); int nr_pages; if (PageTransTail(page)) @@ -4872,7 +4677,7 @@ void check_move_unevictable_pages(struct pagevec *pvec) if (!TestClearPageLRU(page)) continue; - lruvec = folio_lruvec_relock_irq(folio, lruvec); + lruvec = relock_page_lruvec_irq(page, lruvec); if (page_evictable(page) && PageUnevictable(page)) { del_page_from_lru_list(page, lruvec); ClearPageUnevictable(page); diff --git a/mm/vmstat.c b/mm/vmstat.c index 4057372745..8ce2620344 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -165,34 +165,6 @@ atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp; EXPORT_SYMBOL(vm_zone_stat); EXPORT_SYMBOL(vm_node_stat); -#ifdef CONFIG_NUMA -static void fold_vm_zone_numa_events(struct zone *zone) -{ - unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, }; - int cpu; - enum numa_stat_item item; - - for_each_online_cpu(cpu) { - struct per_cpu_zonestat *pzstats; - - pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); - for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) - zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0); - } - - for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) - zone_numa_event_add(zone_numa_events[item], zone, item); -} - -void fold_vm_numa_events(void) -{ - struct zone *zone; - - for_each_populated_zone(zone) - fold_vm_zone_numa_events(zone); -} -#endif - #ifdef CONFIG_SMP int calculate_pressure_threshold(struct zone *zone) @@ -799,6 +771,34 @@ static int fold_diff(int *zone_diff, int *node_diff) return changes; } +#ifdef CONFIG_NUMA +static void fold_vm_zone_numa_events(struct zone *zone) +{ + unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, }; + int cpu; + enum numa_stat_item item; + + for_each_online_cpu(cpu) { + struct per_cpu_zonestat *pzstats; + + pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); + for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) + zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0); + } + + for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) + zone_numa_event_add(zone_numa_events[item], zone, item); +} + +void fold_vm_numa_events(void) +{ + struct zone *zone; + + for_each_populated_zone(zone) + fold_vm_zone_numa_events(zone); +} +#endif + /* * Update the zone counters for the current cpu. * @@ -1070,13 +1070,8 @@ static void fill_contig_page_info(struct zone *zone, for (order = 0; order < MAX_ORDER; order++) { unsigned long blocks; - /* - * Count number of free blocks. - * - * Access to nr_free is lockless as nr_free is used only for - * diagnostic purposes. Use data_race to avoid KCSAN warning. - */ - blocks = data_race(zone->free_area[order].nr_free); + /* Count number of free blocks */ + blocks = zone->free_area[order].nr_free; info->free_blocks_total += blocks; /* Count free base pages */ @@ -1230,7 +1225,6 @@ const char * const vmstat_text[] = { "nr_vmscan_immediate_reclaim", "nr_dirtied", "nr_written", - "nr_throttled_written", "nr_kernel_misc_reclaimable", "nr_foll_pin_acquired", "nr_foll_pin_released", @@ -1353,9 +1347,6 @@ const char * const vmstat_text[] = { "thp_split_page_failed", "thp_deferred_split_page", "thp_split_pmd", - "thp_scan_exceed_none_pte", - "thp_scan_exceed_swap_pte", - "thp_scan_exceed_share_pte", #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD "thp_split_pud", #endif @@ -1454,11 +1445,7 @@ static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); for (order = 0; order < MAX_ORDER; ++order) - /* - * Access to nr_free is lockless as nr_free is used only for - * printing purposes. Use data_race to avoid KCSAN warning. - */ - seq_printf(m, "%6lu ", data_race(zone->free_area[order].nr_free)); + seq_printf(m, "%6lu ", zone->free_area[order].nr_free); seq_putc(m, '\n'); } @@ -1669,7 +1656,6 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, } seq_printf(m, "\n pages free %lu" - "\n boost %lu" "\n min %lu" "\n low %lu" "\n high %lu" @@ -1678,7 +1664,6 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n managed %lu" "\n cma %lu", zone_page_state(zone, NR_FREE_PAGES), - zone->watermark_boost, min_wmark_pages(zone), low_wmark_pages(zone), high_wmark_pages(zone), @@ -2194,7 +2179,7 @@ static void extfrag_show_print(struct seq_file *m, for (order = 0; order < MAX_ORDER; ++order) { fill_contig_page_info(zone, order, &info); index = __fragmentation_index(order, &info); - seq_printf(m, "%2d.%03d ", index / 1000, index % 1000); + seq_printf(m, "%d.%03d ", index / 1000, index % 1000); } seq_putc(m, '\n'); diff --git a/mm/workingset.c b/mm/workingset.c index 8c03afe1d6..d5b81e4f4c 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -273,17 +273,17 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) } /** - * workingset_refault - Evaluate the refault of a previously evicted folio. - * @folio: The freshly allocated replacement folio. - * @shadow: Shadow entry of the evicted folio. + * workingset_refault - evaluate the refault of a previously evicted page + * @page: the freshly allocated replacement page + * @shadow: shadow entry of the evicted page * * Calculates and evaluates the refault distance of the previously - * evicted folio in the context of the node and the memcg whose memory + * evicted page in the context of the node and the memcg whose memory * pressure caused the eviction. */ -void workingset_refault(struct folio *folio, void *shadow) +void workingset_refault(struct page *page, void *shadow) { - bool file = folio_is_file_lru(folio); + bool file = page_is_file_lru(page); struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; unsigned long refault_distance; @@ -295,17 +295,16 @@ void workingset_refault(struct folio *folio, void *shadow) unsigned long refault; bool workingset; int memcgid; - long nr; unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset); rcu_read_lock(); /* * Look up the memcg associated with the stored ID. It might - * have been deleted since the folio's eviction. + * have been deleted since the page's eviction. * * Note that in rare events the ID could have been recycled - * for a new cgroup that refaults a shared folio. This is + * for a new cgroup that refaults a shared page. This is * impossible to tell from the available data. However, this * should be a rare and limited disturbance, and activations * are always speculative anyway. Ultimately, it's the aging @@ -341,18 +340,17 @@ void workingset_refault(struct folio *folio, void *shadow) refault_distance = (refault - eviction) & EVICTION_MASK; /* - * The activation decision for this folio is made at the level + * The activation decision for this page is made at the level * where the eviction occurred, as that is where the LRU order - * during folio reclaim is being determined. + * during page reclaim is being determined. * - * However, the cgroup that will own the folio is the one that + * However, the cgroup that will own the page is the one that * is actually experiencing the refault event. */ - nr = folio_nr_pages(folio); - memcg = folio_memcg(folio); + memcg = page_memcg(page); lruvec = mem_cgroup_lruvec(memcg, pgdat); - mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr); + inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file); mem_cgroup_flush_stats(); /* @@ -378,16 +376,16 @@ void workingset_refault(struct folio *folio, void *shadow) if (refault_distance > workingset_size) goto out; - folio_set_active(folio); - workingset_age_nonresident(lruvec, nr); - mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file, nr); + SetPageActive(page); + workingset_age_nonresident(lruvec, thp_nr_pages(page)); + inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); - /* Folio was active prior to eviction */ + /* Page was active prior to eviction */ if (workingset) { - folio_set_workingset(folio); + SetPageWorkingset(page); /* XXX: Move to lru_cache_add() when it supports new vs putback */ - lru_note_cost_folio(folio); - mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr); + lru_note_cost_page(page); + inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file); } out: rcu_read_unlock(); @@ -395,11 +393,12 @@ void workingset_refault(struct folio *folio, void *shadow) /** * workingset_activation - note a page activation - * @folio: Folio that is being activated. + * @page: page that is being activated */ -void workingset_activation(struct folio *folio) +void workingset_activation(struct page *page) { struct mem_cgroup *memcg; + struct lruvec *lruvec; rcu_read_lock(); /* @@ -409,10 +408,11 @@ void workingset_activation(struct folio *folio) * XXX: See workingset_refault() - this should return * root_mem_cgroup even for !CONFIG_MEMCG. */ - memcg = folio_memcg_rcu(folio); + memcg = page_memcg_rcu(page); if (!mem_cgroup_disabled() && !memcg) goto out; - workingset_age_nonresident(folio_lruvec(folio), folio_nr_pages(folio)); + lruvec = mem_cgroup_page_lruvec(page); + workingset_age_nonresident(lruvec, thp_nr_pages(page)); out: rcu_read_unlock(); } @@ -543,13 +543,6 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, goto out; } - if (!spin_trylock(&mapping->host->i_lock)) { - xa_unlock(&mapping->i_pages); - spin_unlock_irq(lru_lock); - ret = LRU_RETRY; - goto out; - } - list_lru_isolate(lru, item); __dec_lruvec_kmem_state(node, WORKINGSET_NODES); @@ -569,9 +562,6 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, out_invalid: xa_unlock_irq(&mapping->i_pages); - if (mapping_shrinkable(mapping)) - inode_add_lru(mapping->host); - spin_unlock(&mapping->host->i_lock); ret = LRU_REMOVED_RETRY; out: cond_resched(); diff --git a/mm/zpool.c b/mm/zpool.c index 68facc1934..6d9ed48141 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -24,11 +24,16 @@ struct zpool { const struct zpool_ops *ops; bool evictable; bool can_sleep_mapped; + + struct list_head list; }; static LIST_HEAD(drivers_head); static DEFINE_SPINLOCK(drivers_lock); +static LIST_HEAD(pools_head); +static DEFINE_SPINLOCK(pools_lock); + /** * zpool_register_driver() - register a zpool implementation. * @driver: driver to register @@ -190,6 +195,10 @@ struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp, pr_debug("created pool type %s\n", type); + spin_lock(&pools_lock); + list_add(&zpool->list, &pools_head); + spin_unlock(&pools_lock); + return zpool; } @@ -208,6 +217,9 @@ void zpool_destroy_pool(struct zpool *zpool) { pr_debug("destroying pool type %s\n", zpool->driver->type); + spin_lock(&pools_lock); + list_del(&zpool->list); + spin_unlock(&pools_lock); zpool->driver->destroy(zpool->pool); zpool_put_driver(zpool->driver); kfree(zpool); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 9152fbde33..b897ce3b39 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -17,10 +17,10 @@ * * Usage of struct page fields: * page->private: points to zspage - * page->index: links together all component pages of a zspage + * page->freelist(index): links together all component pages of a zspage * For the huge page, this is always 0, so we use this field * to store handle. - * page->page_type: first object offset in a subpage of zspage + * page->units: first object offset in a subpage of zspage * * Usage of struct page flags: * PG_private: identifies the first component page @@ -30,14 +30,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -/* - * lock ordering: - * page_lock - * pool->migrate_lock - * class->lock - * zspage->lock - */ - #include #include #include @@ -65,7 +57,6 @@ #include #include #include -#include #define ZSPAGE_MAGIC 0x58 @@ -109,6 +100,15 @@ #define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT) +/* + * Memory for allocating for handle keeps object position by + * encoding and the encoded value has a room + * in least bit(ie, look at obj_to_location). + * We use the bit to synchronize between object access by + * user and migration. + */ +#define HANDLE_PIN_BIT 0 + /* * Head in allocated object should have OBJ_ALLOCATED_TAG * to identify the object was allocated or not. @@ -121,7 +121,6 @@ #define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS) #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) -#define HUGE_BITS 1 #define FULLNESS_BITS 2 #define CLASS_BITS 8 #define ISOLATED_BITS 3 @@ -159,7 +158,7 @@ enum fullness_group { NR_ZS_FULLNESS, }; -enum class_stat_type { +enum zs_stat_type { CLASS_EMPTY, CLASS_ALMOST_EMPTY, CLASS_ALMOST_FULL, @@ -214,6 +213,22 @@ struct size_class { struct zs_size_stat stats; }; +/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ +static void SetPageHugeObject(struct page *page) +{ + SetPageOwnerPriv1(page); +} + +static void ClearPageHugeObject(struct page *page) +{ + ClearPageOwnerPriv1(page); +} + +static int PageHugeObject(struct page *page) +{ + return PageOwnerPriv1(page); +} + /* * Placed within free objects to form a singly linked list. * For every zspage, zspage->freeobj gives head of this list. @@ -254,14 +269,15 @@ struct zs_pool { #ifdef CONFIG_COMPACTION struct inode *inode; struct work_struct free_work; + /* A wait queue for when migration races with async_free_zspage() */ + struct wait_queue_head migration_wait; + atomic_long_t isolated_pages; + bool destroying; #endif - /* protect page/zspage migration */ - rwlock_t migrate_lock; }; struct zspage { struct { - unsigned int huge:HUGE_BITS; unsigned int fullness:FULLNESS_BITS; unsigned int class:CLASS_BITS + 1; unsigned int isolated:ISOLATED_BITS; @@ -277,32 +293,17 @@ struct zspage { }; struct mapping_area { - local_lock_t lock; char *vm_buf; /* copy buffer for objects that span pages */ char *vm_addr; /* address of kmap_atomic()'ed pages */ enum zs_mapmode vm_mm; /* mapping mode */ }; -/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ -static void SetZsHugePage(struct zspage *zspage) -{ - zspage->huge = 1; -} - -static bool ZsHugePage(struct zspage *zspage) -{ - return zspage->huge; -} - #ifdef CONFIG_COMPACTION static int zs_register_migration(struct zs_pool *pool); static void zs_unregister_migration(struct zs_pool *pool); static void migrate_lock_init(struct zspage *zspage); static void migrate_read_lock(struct zspage *zspage); static void migrate_read_unlock(struct zspage *zspage); -static void migrate_write_lock(struct zspage *zspage); -static void migrate_write_lock_nested(struct zspage *zspage); -static void migrate_write_unlock(struct zspage *zspage); static void kick_deferred_free(struct zs_pool *pool); static void init_deferred_free(struct zs_pool *pool); static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage); @@ -314,9 +315,6 @@ static void zs_unregister_migration(struct zs_pool *pool) {} static void migrate_lock_init(struct zspage *zspage) {} static void migrate_read_lock(struct zspage *zspage) {} static void migrate_read_unlock(struct zspage *zspage) {} -static void migrate_write_lock(struct zspage *zspage) {} -static void migrate_write_lock_nested(struct zspage *zspage) {} -static void migrate_write_unlock(struct zspage *zspage) {} static void kick_deferred_free(struct zs_pool *pool) {} static void init_deferred_free(struct zs_pool *pool) {} static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} @@ -368,10 +366,14 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) kmem_cache_free(pool->zspage_cachep, zspage); } -/* class->lock(which owns the handle) synchronizes races */ static void record_obj(unsigned long handle, unsigned long obj) { - *(unsigned long *)handle = obj; + /* + * lsb of @obj represents handle lock while other bits + * represent object value the handle is pointing so + * updating shouldn't do store tearing. + */ + WRITE_ONCE(*(unsigned long *)handle, obj); } /* zpool driver */ @@ -453,9 +455,12 @@ MODULE_ALIAS("zpool-zsmalloc"); #endif /* CONFIG_ZPOOL */ /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ -static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = { - .lock = INIT_LOCAL_LOCK(lock), -}; +static DEFINE_PER_CPU(struct mapping_area, zs_map_area); + +static bool is_zspage_isolated(struct zspage *zspage) +{ + return zspage->isolated; +} static __maybe_unused int is_first_page(struct page *page) { @@ -484,12 +489,12 @@ static inline struct page *get_first_page(struct zspage *zspage) static inline int get_first_obj_offset(struct page *page) { - return page->page_type; + return page->units; } static inline void set_first_obj_offset(struct page *page, int offset) { - page->page_type = offset; + page->units = offset; } static inline unsigned int get_freeobj(struct zspage *zspage) @@ -512,12 +517,6 @@ static void get_zspage_mapping(struct zspage *zspage, *class_idx = zspage->class; } -static struct size_class *zspage_class(struct zs_pool *pool, - struct zspage *zspage) -{ - return pool->size_class[zspage->class]; -} - static void set_zspage_mapping(struct zspage *zspage, unsigned int class_idx, enum fullness_group fullness) @@ -544,21 +543,21 @@ static int get_size_class_index(int size) return min_t(int, ZS_SIZE_CLASSES - 1, idx); } -/* type can be of enum type class_stat_type or fullness_group */ -static inline void class_stat_inc(struct size_class *class, +/* type can be of enum type zs_stat_type or fullness_group */ +static inline void zs_stat_inc(struct size_class *class, int type, unsigned long cnt) { class->stats.objs[type] += cnt; } -/* type can be of enum type class_stat_type or fullness_group */ -static inline void class_stat_dec(struct size_class *class, +/* type can be of enum type zs_stat_type or fullness_group */ +static inline void zs_stat_dec(struct size_class *class, int type, unsigned long cnt) { class->stats.objs[type] -= cnt; } -/* type can be of enum type class_stat_type or fullness_group */ +/* type can be of enum type zs_stat_type or fullness_group */ static inline unsigned long zs_stat_get(struct size_class *class, int type) { @@ -720,7 +719,7 @@ static void insert_zspage(struct size_class *class, { struct zspage *head; - class_stat_inc(class, fullness, 1); + zs_stat_inc(class, fullness, 1); head = list_first_entry_or_null(&class->fullness_list[fullness], struct zspage, list); /* @@ -742,9 +741,10 @@ static void remove_zspage(struct size_class *class, enum fullness_group fullness) { VM_BUG_ON(list_empty(&class->fullness_list[fullness])); + VM_BUG_ON(is_zspage_isolated(zspage)); list_del_init(&zspage->list); - class_stat_dec(class, fullness, 1); + zs_stat_dec(class, fullness, 1); } /* @@ -767,9 +767,13 @@ static enum fullness_group fix_fullness_group(struct size_class *class, if (newfg == currfg) goto out; - remove_zspage(class, zspage, currfg); - insert_zspage(class, zspage, newfg); + if (!is_zspage_isolated(zspage)) { + remove_zspage(class, zspage, currfg); + insert_zspage(class, zspage, newfg); + } + set_zspage_mapping(zspage, class_idx, newfg); + out: return newfg; } @@ -820,12 +824,10 @@ static struct zspage *get_zspage(struct page *page) static struct page *get_next_page(struct page *page) { - struct zspage *zspage = get_zspage(page); - - if (unlikely(ZsHugePage(zspage))) + if (unlikely(PageHugeObject(page))) return NULL; - return (struct page *)page->index; + return page->freelist; } /** @@ -842,12 +844,6 @@ static void obj_to_location(unsigned long obj, struct page **page, *obj_idx = (obj & OBJ_INDEX_MASK); } -static void obj_to_page(unsigned long obj, struct page **page) -{ - obj >>= OBJ_TAG_BITS; - *page = pfn_to_page(obj >> OBJ_INDEX_BITS); -} - /** * location_to_obj - get obj value encoded from (, ) * @page: page object resides in zspage @@ -869,22 +865,33 @@ static unsigned long handle_to_obj(unsigned long handle) return *(unsigned long *)handle; } -static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle) +static unsigned long obj_to_head(struct page *page, void *obj) { - unsigned long handle; - struct zspage *zspage = get_zspage(page); - - if (unlikely(ZsHugePage(zspage))) { + if (unlikely(PageHugeObject(page))) { VM_BUG_ON_PAGE(!is_first_page(page), page); - handle = page->index; + return page->index; } else - handle = *(unsigned long *)obj; + return *(unsigned long *)obj; +} - if (!(handle & OBJ_ALLOCATED_TAG)) - return false; +static inline int testpin_tag(unsigned long handle) +{ + return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); +} - *phandle = handle & ~OBJ_ALLOCATED_TAG; - return true; +static inline int trypin_tag(unsigned long handle) +{ + return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); +} + +static void pin_tag(unsigned long handle) __acquires(bitlock) +{ + bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); +} + +static void unpin_tag(unsigned long handle) __releases(bitlock) +{ + bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); } static void reset_page(struct page *page) @@ -893,7 +900,8 @@ static void reset_page(struct page *page) ClearPagePrivate(page); set_page_private(page, 0); page_mapcount_reset(page); - page->index = 0; + ClearPageHugeObject(page); + page->freelist = NULL; } static int trylock_zspage(struct zspage *zspage) @@ -944,7 +952,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class, cache_free_zspage(pool, zspage); - class_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage); + zs_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage); atomic_long_sub(class->pages_per_zspage, &pool->pages_allocated); } @@ -955,11 +963,6 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class, VM_BUG_ON(get_zspage_inuse(zspage)); VM_BUG_ON(list_empty(&zspage->list)); - /* - * Since zs_free couldn't be sleepable, this function cannot call - * lock_page. The page locks trylock_zspage got will be released - * by __free_zspage. - */ if (!trylock_zspage(zspage)) { kick_deferred_free(pool); return; @@ -1024,7 +1027,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage, /* * Allocate individual pages and link them together as: - * 1. all pages are linked together using page->index + * 1. all pages are linked together using page->freelist * 2. each sub-page point to zspage using page->private * * we set PG_private to identify the first page (i.e. no other sub-page @@ -1033,15 +1036,15 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage, for (i = 0; i < nr_pages; i++) { page = pages[i]; set_page_private(page, (unsigned long)zspage); - page->index = 0; + page->freelist = NULL; if (i == 0) { zspage->first_page = page; SetPagePrivate(page); if (unlikely(class->objs_per_zspage == 1 && class->pages_per_zspage == 1)) - SetZsHugePage(zspage); + SetPageHugeObject(page); } else { - prev_page->index = (unsigned long)page; + prev_page->freelist = page; } prev_page = page; } @@ -1243,6 +1246,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, unsigned long obj, off; unsigned int obj_idx; + unsigned int class_idx; + enum fullness_group fg; struct size_class *class; struct mapping_area *area; struct page *pages[2]; @@ -1255,26 +1260,21 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, */ BUG_ON(in_interrupt()); - /* It guarantees it can get zspage from handle safely */ - read_lock(&pool->migrate_lock); + /* From now on, migration cannot move the object */ + pin_tag(handle); + obj = handle_to_obj(handle); obj_to_location(obj, &page, &obj_idx); zspage = get_zspage(page); - /* - * migration cannot move any zpages in this zspage. Here, class->lock - * is too heavy since callers would take some time until they calls - * zs_unmap_object API so delegate the locking from class to zspage - * which is smaller granularity. - */ + /* migration cannot move any subpage in this zspage */ migrate_read_lock(zspage); - read_unlock(&pool->migrate_lock); - class = zspage_class(pool, zspage); + get_zspage_mapping(zspage, &class_idx, &fg); + class = pool->size_class[class_idx]; off = (class->size * obj_idx) & ~PAGE_MASK; - local_lock(&zs_map_area.lock); - area = this_cpu_ptr(&zs_map_area); + area = &get_cpu_var(zs_map_area); area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ @@ -1290,7 +1290,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, ret = __zs_map_object(area, pages, off, class->size); out: - if (likely(!ZsHugePage(zspage))) + if (likely(!PageHugeObject(page))) ret += ZS_HANDLE_SIZE; return ret; @@ -1304,13 +1304,16 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) unsigned long obj, off; unsigned int obj_idx; + unsigned int class_idx; + enum fullness_group fg; struct size_class *class; struct mapping_area *area; obj = handle_to_obj(handle); obj_to_location(obj, &page, &obj_idx); zspage = get_zspage(page); - class = zspage_class(pool, zspage); + get_zspage_mapping(zspage, &class_idx, &fg); + class = pool->size_class[class_idx]; off = (class->size * obj_idx) & ~PAGE_MASK; area = this_cpu_ptr(&zs_map_area); @@ -1325,9 +1328,10 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) __zs_unmap_object(area, pages, off, class->size); } - local_unlock(&zs_map_area.lock); + put_cpu_var(zs_map_area); migrate_read_unlock(zspage); + unpin_tag(handle); } EXPORT_SYMBOL_GPL(zs_unmap_object); @@ -1350,19 +1354,17 @@ size_t zs_huge_class_size(struct zs_pool *pool) } EXPORT_SYMBOL_GPL(zs_huge_class_size); -static unsigned long obj_malloc(struct zs_pool *pool, +static unsigned long obj_malloc(struct size_class *class, struct zspage *zspage, unsigned long handle) { int i, nr_page, offset; unsigned long obj; struct link_free *link; - struct size_class *class; struct page *m_page; unsigned long m_offset; void *vaddr; - class = pool->size_class[zspage->class]; handle |= OBJ_ALLOCATED_TAG; obj = get_freeobj(zspage); @@ -1377,7 +1379,7 @@ static unsigned long obj_malloc(struct zs_pool *pool, vaddr = kmap_atomic(m_page); link = (struct link_free *)vaddr + m_offset / sizeof(*link); set_freeobj(zspage, link->next >> OBJ_TAG_BITS); - if (likely(!ZsHugePage(zspage))) + if (likely(!PageHugeObject(m_page))) /* record handle in the header of allocated chunk */ link->handle = handle; else @@ -1386,6 +1388,7 @@ static unsigned long obj_malloc(struct zs_pool *pool, kunmap_atomic(vaddr); mod_zspage_inuse(zspage, 1); + zs_stat_inc(class, OBJ_USED, 1); obj = location_to_obj(m_page, obj); @@ -1421,15 +1424,13 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) size += ZS_HANDLE_SIZE; class = pool->size_class[get_size_class_index(size)]; - /* class->lock effectively protects the zpage migration */ spin_lock(&class->lock); zspage = find_get_zspage(class); if (likely(zspage)) { - obj = obj_malloc(pool, zspage, handle); + obj = obj_malloc(class, zspage, handle); /* Now move the zspage to another fullness group, if required */ fix_fullness_group(class, zspage); record_obj(handle, obj); - class_stat_inc(class, OBJ_USED, 1); spin_unlock(&class->lock); return handle; @@ -1444,15 +1445,14 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) } spin_lock(&class->lock); - obj = obj_malloc(pool, zspage, handle); + obj = obj_malloc(class, zspage, handle); newfg = get_fullness_group(class, zspage); insert_zspage(class, zspage, newfg); set_zspage_mapping(zspage, class->index, newfg); record_obj(handle, obj); atomic_long_add(class->pages_per_zspage, &pool->pages_allocated); - class_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage); - class_stat_inc(class, OBJ_USED, 1); + zs_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage); /* We completely set up zspage so mark them as movable */ SetZsPageMovable(pool, zspage); @@ -1462,7 +1462,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) } EXPORT_SYMBOL_GPL(zs_malloc); -static void obj_free(int class_size, unsigned long obj) +static void obj_free(struct size_class *class, unsigned long obj) { struct link_free *link; struct zspage *zspage; @@ -1472,20 +1472,18 @@ static void obj_free(int class_size, unsigned long obj) void *vaddr; obj_to_location(obj, &f_page, &f_objidx); - f_offset = (class_size * f_objidx) & ~PAGE_MASK; + f_offset = (class->size * f_objidx) & ~PAGE_MASK; zspage = get_zspage(f_page); vaddr = kmap_atomic(f_page); /* Insert this object in containing zspage's freelist */ link = (struct link_free *)(vaddr + f_offset); - if (likely(!ZsHugePage(zspage))) - link->next = get_freeobj(zspage) << OBJ_TAG_BITS; - else - f_page->index = 0; + link->next = get_freeobj(zspage) << OBJ_TAG_BITS; kunmap_atomic(vaddr); set_freeobj(zspage, f_objidx); mod_zspage_inuse(zspage, -1); + zs_stat_dec(class, OBJ_USED, 1); } void zs_free(struct zs_pool *pool, unsigned long handle) @@ -1493,33 +1491,42 @@ void zs_free(struct zs_pool *pool, unsigned long handle) struct zspage *zspage; struct page *f_page; unsigned long obj; + unsigned int f_objidx; + int class_idx; struct size_class *class; enum fullness_group fullness; + bool isolated; if (unlikely(!handle)) return; - /* - * The pool->migrate_lock protects the race with zpage's migration - * so it's safe to get the page from handle. - */ - read_lock(&pool->migrate_lock); + pin_tag(handle); obj = handle_to_obj(handle); - obj_to_page(obj, &f_page); + obj_to_location(obj, &f_page, &f_objidx); zspage = get_zspage(f_page); - class = zspage_class(pool, zspage); + + migrate_read_lock(zspage); + + get_zspage_mapping(zspage, &class_idx, &fullness); + class = pool->size_class[class_idx]; + spin_lock(&class->lock); - read_unlock(&pool->migrate_lock); - - obj_free(class->size, obj); - class_stat_dec(class, OBJ_USED, 1); + obj_free(class, obj); fullness = fix_fullness_group(class, zspage); - if (fullness != ZS_EMPTY) + if (fullness != ZS_EMPTY) { + migrate_read_unlock(zspage); goto out; + } - free_zspage(pool, class, zspage); + isolated = is_zspage_isolated(zspage); + migrate_read_unlock(zspage); + /* If zspage is isolated, zs_page_putback will free the zspage */ + if (likely(!isolated)) + free_zspage(pool, class, zspage); out: + spin_unlock(&class->lock); + unpin_tag(handle); cache_free_handle(pool, handle); } EXPORT_SYMBOL_GPL(zs_free); @@ -1594,6 +1601,7 @@ static void zs_object_copy(struct size_class *class, unsigned long dst, static unsigned long find_alloced_obj(struct size_class *class, struct page *page, int *obj_idx) { + unsigned long head; int offset = 0; int index = *obj_idx; unsigned long handle = 0; @@ -1603,8 +1611,13 @@ static unsigned long find_alloced_obj(struct size_class *class, offset += class->size * index; while (offset < PAGE_SIZE) { - if (obj_allocated(page, addr + offset, &handle)) - break; + head = obj_to_head(page, addr + offset); + if (head & OBJ_ALLOCATED_TAG) { + handle = head & ~OBJ_ALLOCATED_TAG; + if (trypin_tag(handle)) + break; + handle = 0; + } offset += class->size; index++; @@ -1650,16 +1663,25 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class, /* Stop if there is no more space */ if (zspage_full(class, get_zspage(d_page))) { + unpin_tag(handle); ret = -ENOMEM; break; } used_obj = handle_to_obj(handle); - free_obj = obj_malloc(pool, get_zspage(d_page), handle); + free_obj = obj_malloc(class, get_zspage(d_page), handle); zs_object_copy(class, free_obj, used_obj); obj_idx++; + /* + * record_obj updates handle's value to free_obj and it will + * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which + * breaks synchronization using pin_tag(e,g, zs_free) so + * let's keep the lock bit. + */ + free_obj |= BIT(HANDLE_PIN_BIT); record_obj(handle, free_obj); - obj_free(class->size, used_obj); + unpin_tag(handle); + obj_free(class, used_obj); } /* Remember last position in this iteration */ @@ -1684,6 +1706,7 @@ static struct zspage *isolate_zspage(struct size_class *class, bool source) zspage = list_first_entry_or_null(&class->fullness_list[fg[i]], struct zspage, list); if (zspage) { + VM_BUG_ON(is_zspage_isolated(zspage)); remove_zspage(class, zspage, fg[i]); return zspage; } @@ -1704,6 +1727,8 @@ static enum fullness_group putback_zspage(struct size_class *class, { enum fullness_group fullness; + VM_BUG_ON(is_zspage_isolated(zspage)); + fullness = get_fullness_group(class, zspage); insert_zspage(class, zspage, fullness); set_zspage_mapping(zspage, class->index, fullness); @@ -1772,11 +1797,6 @@ static void migrate_write_lock(struct zspage *zspage) write_lock(&zspage->lock); } -static void migrate_write_lock_nested(struct zspage *zspage) -{ - write_lock_nested(&zspage->lock, SINGLE_DEPTH_NESTING); -} - static void migrate_write_unlock(struct zspage *zspage) { write_unlock(&zspage->lock); @@ -1790,10 +1810,35 @@ static void inc_zspage_isolation(struct zspage *zspage) static void dec_zspage_isolation(struct zspage *zspage) { - VM_BUG_ON(zspage->isolated == 0); zspage->isolated--; } +static void putback_zspage_deferred(struct zs_pool *pool, + struct size_class *class, + struct zspage *zspage) +{ + enum fullness_group fg; + + fg = putback_zspage(class, zspage); + if (fg == ZS_EMPTY) + schedule_work(&pool->free_work); + +} + +static inline void zs_pool_dec_isolated(struct zs_pool *pool) +{ + VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); + atomic_long_dec(&pool->isolated_pages); + /* + * Checking pool->destroying must happen after atomic_long_dec() + * for pool->isolated_pages above. Paired with the smp_mb() in + * zs_unregister_migration(). + */ + smp_mb__after_atomic(); + if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) + wake_up_all(&pool->migration_wait); +} + static void replace_sub_page(struct size_class *class, struct zspage *zspage, struct page *newpage, struct page *oldpage) { @@ -1812,14 +1857,19 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage, create_page_chain(class, zspage, pages); set_first_obj_offset(newpage, get_first_obj_offset(oldpage)); - if (unlikely(ZsHugePage(zspage))) + if (unlikely(PageHugeObject(oldpage))) newpage->index = oldpage->index; __SetPageMovable(newpage, page_mapping(oldpage)); } static bool zs_page_isolate(struct page *page, isolate_mode_t mode) { + struct zs_pool *pool; + struct size_class *class; + int class_idx; + enum fullness_group fullness; struct zspage *zspage; + struct address_space *mapping; /* * Page is locked so zspage couldn't be destroyed. For detail, look at @@ -1829,9 +1879,41 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode) VM_BUG_ON_PAGE(PageIsolated(page), page); zspage = get_zspage(page); - migrate_write_lock(zspage); + + /* + * Without class lock, fullness could be stale while class_idx is okay + * because class_idx is constant unless page is freed so we should get + * fullness again under class lock. + */ + get_zspage_mapping(zspage, &class_idx, &fullness); + mapping = page_mapping(page); + pool = mapping->private_data; + class = pool->size_class[class_idx]; + + spin_lock(&class->lock); + if (get_zspage_inuse(zspage) == 0) { + spin_unlock(&class->lock); + return false; + } + + /* zspage is isolated for object migration */ + if (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) { + spin_unlock(&class->lock); + return false; + } + + /* + * If this is first time isolation for the zspage, isolate zspage from + * size_class to prevent further object allocation from the zspage. + */ + if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) { + get_zspage_mapping(zspage, &class_idx, &fullness); + atomic_long_inc(&pool->isolated_pages); + remove_zspage(class, zspage, fullness); + } + inc_zspage_isolation(zspage); - migrate_write_unlock(zspage); + spin_unlock(&class->lock); return true; } @@ -1841,13 +1923,16 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, { struct zs_pool *pool; struct size_class *class; + int class_idx; + enum fullness_group fullness; struct zspage *zspage; struct page *dummy; void *s_addr, *d_addr, *addr; - int offset; - unsigned long handle; + int offset, pos; + unsigned long handle, head; unsigned long old_obj, new_obj; unsigned int obj_idx; + int ret = -EAGAIN; /* * We cannot support the _NO_COPY case here, because copy needs to @@ -1860,25 +1945,35 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); - pool = mapping->private_data; - - /* - * The pool migrate_lock protects the race between zpage migration - * and zs_free. - */ - write_lock(&pool->migrate_lock); zspage = get_zspage(page); - class = zspage_class(pool, zspage); - /* - * the class lock protects zpage alloc/free in the zspage. - */ - spin_lock(&class->lock); - /* the migrate_write_lock protects zpage access via zs_map_object */ + /* Concurrent compactor cannot migrate any subpage in zspage */ migrate_write_lock(zspage); - + get_zspage_mapping(zspage, &class_idx, &fullness); + pool = mapping->private_data; + class = pool->size_class[class_idx]; offset = get_first_obj_offset(page); + + spin_lock(&class->lock); + if (!get_zspage_inuse(zspage)) { + /* + * Set "offset" to end of the page so that every loops + * skips unnecessary object scanning. + */ + offset = PAGE_SIZE; + } + + pos = offset; s_addr = kmap_atomic(page); + while (pos < PAGE_SIZE) { + head = obj_to_head(page, s_addr + pos); + if (head & OBJ_ALLOCATED_TAG) { + handle = head & ~OBJ_ALLOCATED_TAG; + if (!trypin_tag(handle)) + goto unpin_objects; + } + pos += class->size; + } /* * Here, any user cannot access all objects in the zspage so let's move. @@ -1887,30 +1982,42 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, memcpy(d_addr, s_addr, PAGE_SIZE); kunmap_atomic(d_addr); - for (addr = s_addr + offset; addr < s_addr + PAGE_SIZE; + for (addr = s_addr + offset; addr < s_addr + pos; addr += class->size) { - if (obj_allocated(page, addr, &handle)) { + head = obj_to_head(page, addr); + if (head & OBJ_ALLOCATED_TAG) { + handle = head & ~OBJ_ALLOCATED_TAG; + BUG_ON(!testpin_tag(handle)); old_obj = handle_to_obj(handle); obj_to_location(old_obj, &dummy, &obj_idx); new_obj = (unsigned long)location_to_obj(newpage, obj_idx); + new_obj |= BIT(HANDLE_PIN_BIT); record_obj(handle, new_obj); } } - kunmap_atomic(s_addr); replace_sub_page(class, zspage, newpage, page); - /* - * Since we complete the data copy and set up new zspage structure, - * it's okay to release migration_lock. - */ - write_unlock(&pool->migrate_lock); - spin_unlock(&class->lock); - dec_zspage_isolation(zspage); - migrate_write_unlock(zspage); - get_page(newpage); + + dec_zspage_isolation(zspage); + + /* + * Page migration is done so let's putback isolated zspage to + * the list if @page is final isolated subpage in the zspage. + */ + if (!is_zspage_isolated(zspage)) { + /* + * We cannot race with zs_destroy_pool() here because we wait + * for isolation to hit zero before we start destroying. + * Also, we ensure that everyone can see pool->destroying before + * we start waiting. + */ + putback_zspage_deferred(pool, class, zspage); + zs_pool_dec_isolated(pool); + } + if (page_zone(newpage) != page_zone(page)) { dec_zone_page_state(page, NR_ZSPAGES); inc_zone_page_state(newpage, NR_ZSPAGES); @@ -1918,21 +2025,55 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, reset_page(page); put_page(page); + page = newpage; - return MIGRATEPAGE_SUCCESS; + ret = MIGRATEPAGE_SUCCESS; +unpin_objects: + for (addr = s_addr + offset; addr < s_addr + pos; + addr += class->size) { + head = obj_to_head(page, addr); + if (head & OBJ_ALLOCATED_TAG) { + handle = head & ~OBJ_ALLOCATED_TAG; + BUG_ON(!testpin_tag(handle)); + unpin_tag(handle); + } + } + kunmap_atomic(s_addr); + spin_unlock(&class->lock); + migrate_write_unlock(zspage); + + return ret; } static void zs_page_putback(struct page *page) { + struct zs_pool *pool; + struct size_class *class; + int class_idx; + enum fullness_group fg; + struct address_space *mapping; struct zspage *zspage; VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); zspage = get_zspage(page); - migrate_write_lock(zspage); + get_zspage_mapping(zspage, &class_idx, &fg); + mapping = page_mapping(page); + pool = mapping->private_data; + class = pool->size_class[class_idx]; + + spin_lock(&class->lock); dec_zspage_isolation(zspage); - migrate_write_unlock(zspage); + if (!is_zspage_isolated(zspage)) { + /* + * Due to page_lock, we cannot free zspage immediately + * so let's defer. + */ + putback_zspage_deferred(pool, class, zspage); + zs_pool_dec_isolated(pool); + } + spin_unlock(&class->lock); } static const struct address_space_operations zsmalloc_aops = { @@ -1954,8 +2095,36 @@ static int zs_register_migration(struct zs_pool *pool) return 0; } +static bool pool_isolated_are_drained(struct zs_pool *pool) +{ + return atomic_long_read(&pool->isolated_pages) == 0; +} + +/* Function for resolving migration */ +static void wait_for_isolated_drain(struct zs_pool *pool) +{ + + /* + * We're in the process of destroying the pool, so there are no + * active allocations. zs_page_isolate() fails for completely free + * zspages, so we need only wait for the zs_pool's isolated + * count to hit zero. + */ + wait_event(pool->migration_wait, + pool_isolated_are_drained(pool)); +} + static void zs_unregister_migration(struct zs_pool *pool) { + pool->destroying = true; + /* + * We need a memory barrier here to ensure global visibility of + * pool->destroying. Thus pool->isolated pages will either be 0 in which + * case we don't care, or it will be > 0 and pool->destroying will + * ensure that we wake up once isolation hits 0. + */ + smp_mb(); + wait_for_isolated_drain(pool); /* This can block */ flush_work(&pool->free_work); iput(pool->inode); } @@ -1985,6 +2154,7 @@ static void async_free_zspage(struct work_struct *work) spin_unlock(&class->lock); } + list_for_each_entry_safe(zspage, tmp, &free_pages, list) { list_del(&zspage->list); lock_zspage(zspage); @@ -2048,13 +2218,8 @@ static unsigned long __zs_compact(struct zs_pool *pool, struct zspage *dst_zspage = NULL; unsigned long pages_freed = 0; - /* protect the race between zpage migration and zs_free */ - write_lock(&pool->migrate_lock); - /* protect zpage allocation/free */ spin_lock(&class->lock); while ((src_zspage = isolate_zspage(class, true))) { - /* protect someone accessing the zspage(i.e., zs_map_object) */ - migrate_write_lock(src_zspage); if (!zs_can_compact(class)) break; @@ -2063,8 +2228,6 @@ static unsigned long __zs_compact(struct zs_pool *pool, cc.s_page = get_first_page(src_zspage); while ((dst_zspage = isolate_zspage(class, false))) { - migrate_write_lock_nested(dst_zspage); - cc.d_page = get_first_page(dst_zspage); /* * If there is no more space in dst_page, resched @@ -2074,10 +2237,6 @@ static unsigned long __zs_compact(struct zs_pool *pool, break; putback_zspage(class, dst_zspage); - migrate_write_unlock(dst_zspage); - dst_zspage = NULL; - if (rwlock_is_contended(&pool->migrate_lock)) - break; } /* Stop if we couldn't find slot */ @@ -2085,28 +2244,19 @@ static unsigned long __zs_compact(struct zs_pool *pool, break; putback_zspage(class, dst_zspage); - migrate_write_unlock(dst_zspage); - if (putback_zspage(class, src_zspage) == ZS_EMPTY) { - migrate_write_unlock(src_zspage); free_zspage(pool, class, src_zspage); pages_freed += class->pages_per_zspage; - } else - migrate_write_unlock(src_zspage); + } spin_unlock(&class->lock); - write_unlock(&pool->migrate_lock); cond_resched(); - write_lock(&pool->migrate_lock); spin_lock(&class->lock); } - if (src_zspage) { + if (src_zspage) putback_zspage(class, src_zspage); - migrate_write_unlock(src_zspage); - } spin_unlock(&class->lock); - write_unlock(&pool->migrate_lock); return pages_freed; } @@ -2212,12 +2362,15 @@ struct zs_pool *zs_create_pool(const char *name) return NULL; init_deferred_free(pool); - rwlock_init(&pool->migrate_lock); pool->name = kstrdup(name, GFP_KERNEL); if (!pool->name) goto err; +#ifdef CONFIG_COMPACTION + init_waitqueue_head(&pool->migration_wait); +#endif + if (create_cache(pool)) goto err; diff --git a/mm/zswap.c b/mm/zswap.c index d6a463c9cf..4719af5f27 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1394,7 +1394,7 @@ static void zswap_frontswap_init(unsigned type) zswap_trees[type] = tree; } -static const struct frontswap_ops zswap_frontswap_ops = { +static struct frontswap_ops zswap_frontswap_ops = { .store = zswap_frontswap_store, .load = zswap_frontswap_load, .invalidate_page = zswap_frontswap_invalidate_page, @@ -1479,9 +1479,7 @@ static int __init init_zswap(void) if (!shrink_wq) goto hp_fail; - ret = frontswap_register_ops(&zswap_frontswap_ops); - if (ret) - goto destroy_wq; + frontswap_register_ops(&zswap_frontswap_ops); if (zswap_debugfs_init()) pr_warn("debugfs initialization failed\n"); @@ -1490,8 +1488,6 @@ static int __init init_zswap(void) return 0; -destroy_wq: - destroy_workqueue(shrink_wq); hp_fail: cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE); dstmem_fail: diff --git a/net/802/hippi.c b/net/802/hippi.c index 1997b7dd26..f80b33a8f7 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -65,7 +65,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev, hip->le.src_addr_type = 2; /* 12 bit SC address */ memcpy(hip->le.src_switch_addr, dev->dev_addr + 3, 3); - memset_startat(&hip->le, 0, reserved); + memset(&hip->le.reserved, 0, 16); hip->snap.dsap = HIPPI_EXTENDED_SAP; hip->snap.ssap = HIPPI_EXTENDED_SAP; @@ -121,7 +121,7 @@ int hippi_mac_addr(struct net_device *dev, void *p) struct sockaddr *addr = p; if (netif_running(dev)) return -EBUSY; - dev_addr_set(dev, addr->sa_data); + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); return 0; } EXPORT_SYMBOL(hippi_mac_addr); diff --git a/net/802/p8022.c b/net/802/p8022.c index 79c2317311..a658562705 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -23,7 +23,7 @@ #include static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb, - const unsigned char *dest) + unsigned char *dest) { llc_build_and_send_ui_pkt(dl->sap, skb, dest, dl->sap->laddr.lsap); return 0; diff --git a/net/802/psnap.c b/net/802/psnap.c index 1406bfdbda..4492e8d7ad 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -79,7 +79,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, * Put a SNAP header on a frame and pass to 802.2 */ static int snap_request(struct datalink_proto *dl, - struct sk_buff *skb, const u8 *dest) + struct sk_buff *skb, u8 *dest) { memcpy(skb_push(skb, 5), dl->type, 5); llc_build_and_send_ui_pkt(snap_sap, skb, dest, snap_sap->laddr.lsap); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 788076b002..abaa5d96de 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -319,8 +319,8 @@ static void vlan_transfer_features(struct net_device *dev, { struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); - netif_set_gso_max_size(vlandev, dev->gso_max_size); - netif_set_gso_max_segs(vlandev, dev->gso_max_segs); + vlandev->gso_max_size = dev->gso_max_size; + vlandev->gso_max_segs = dev->gso_max_segs; if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) vlandev->hard_header_len = dev->hard_header_len; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 5eaf388755..1a705a4ef7 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -129,7 +129,6 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio); int vlan_dev_set_egress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio); -void vlan_dev_free_egress_priority(const struct net_device *dev); int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask); void vlan_dev_get_realdev_name(const struct net_device *dev, char *result, size_t size); @@ -140,6 +139,7 @@ int vlan_check_real_dev(struct net_device *real_dev, void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); +void vlan_dev_uninit(struct net_device *dev); bool vlan_dev_inherit_address(struct net_device *dev, struct net_device *real_dev); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index acf8c791f3..59bc13b5f1 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -476,9 +476,10 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head, type = vhdr->h_vlan_encapsulated_proto; + rcu_read_lock(); ptype = gro_find_receive_by_type(type); if (!ptype) - goto out; + goto out_unlock; flush = 0; @@ -500,6 +501,8 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head, ipv6_gro_receive, inet_gro_receive, head, skb); +out_unlock: + rcu_read_unlock(); out: skb_gro_flush_final(skb, pp, flush); @@ -513,12 +516,14 @@ static int vlan_gro_complete(struct sk_buff *skb, int nhoff) struct packet_offload *ptype; int err = -ENOENT; + rcu_read_lock(); ptype = gro_find_complete_by_type(type); if (ptype) err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, ipv6_gro_complete, inet_gro_complete, skb, nhoff + sizeof(*vhdr)); + rcu_read_unlock(); return err; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index d1902828a1..8602885c8a 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -250,7 +250,7 @@ bool vlan_dev_inherit_address(struct net_device *dev, if (dev->addr_assign_type != NET_ADDR_STOLEN) return false; - eth_hw_addr_set(dev, real_dev->dev_addr); + ether_addr_copy(dev->dev_addr, real_dev->dev_addr); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return true; } @@ -349,7 +349,7 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p) dev_uc_del(real_dev, dev->dev_addr); out: - eth_hw_addr_set(dev, addr->sa_data); + ether_addr_copy(dev->dev_addr, addr->sa_data); return 0; } @@ -573,8 +573,8 @@ static int vlan_dev_init(struct net_device *dev) NETIF_F_ALL_FCOE; dev->features |= dev->hw_features | NETIF_F_LLTX; - netif_set_gso_max_size(dev, real_dev->gso_max_size); - netif_set_gso_max_segs(dev, real_dev->gso_max_segs); + dev->gso_max_size = real_dev->gso_max_size; + dev->gso_max_segs = real_dev->gso_max_segs; if (dev->features & NETIF_F_VLAN_FEATURES) netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); @@ -586,7 +586,7 @@ static int vlan_dev_init(struct net_device *dev) dev->dev_id = real_dev->dev_id; if (is_zero_ether_addr(dev->dev_addr)) { - eth_hw_addr_set(dev, real_dev->dev_addr); + ether_addr_copy(dev->dev_addr, real_dev->dev_addr); dev->addr_assign_type = NET_ADDR_STOLEN; } if (is_zero_ether_addr(dev->broadcast)) @@ -616,13 +616,13 @@ static int vlan_dev_init(struct net_device *dev) return -ENOMEM; /* Get vlan's reference to real_dev */ - dev_hold_track(real_dev, &vlan->dev_tracker, GFP_KERNEL); + dev_hold(real_dev); return 0; } /* Note: this function might be called multiple times for the same device. */ -void vlan_dev_free_egress_priority(const struct net_device *dev) +void vlan_dev_uninit(struct net_device *dev) { struct vlan_priority_tci_mapping *pm; struct vlan_dev_priv *vlan = vlan_dev_priv(dev); @@ -636,16 +636,6 @@ void vlan_dev_free_egress_priority(const struct net_device *dev) } } -static void vlan_dev_uninit(struct net_device *dev) -{ - struct vlan_dev_priv *vlan = vlan_dev_priv(dev); - - vlan_dev_free_egress_priority(dev); - - /* Get rid of the vlan's reference to real_dev */ - dev_put_track(vlan->real_dev, &vlan->dev_tracker); -} - static netdev_features_t vlan_dev_fix_features(struct net_device *dev, netdev_features_t features) { @@ -856,6 +846,9 @@ static void vlan_dev_free(struct net_device *dev) free_percpu(vlan->vlan_pcpu_stats); vlan->vlan_pcpu_stats = NULL; + + /* Get rid of the vlan's reference to real_dev */ + dev_put(vlan->real_dev); } void vlan_setup(struct net_device *dev) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 53b1955b02..0db85aeb11 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -183,11 +183,10 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; err = vlan_changelink(dev, tb, data, extack); + if (!err) + err = register_vlan_dev(dev, extack); if (err) - return err; - err = register_vlan_dev(dev, extack); - if (err) - vlan_dev_free_egress_priority(dev); + vlan_dev_uninit(dev); return err; } diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 08bf6c839e..ec87dea237 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -252,7 +252,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) stats = dev_get_stats(vlandev, &temp); seq_printf(seq, - "%s VID: %d REORDER_HDR: %i dev->priv_flags: %llx\n", + "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n", vlandev->name, vlan->vlan_id, (int)(vlan->flags & 1), vlandev->priv_flags); diff --git a/net/9p/Kconfig b/net/9p/Kconfig index deabbd376c..64468c4979 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -15,13 +15,6 @@ menuconfig NET_9P if NET_9P -config NET_9P_FD - default NET_9P - tristate "9P FD Transport" - help - This builds support for transports over TCP, Unix sockets and - filedescriptors. - config NET_9P_VIRTIO depends on VIRTIO tristate "9P Virtio Transport" diff --git a/net/9p/Makefile b/net/9p/Makefile index 1df9b344c3..aa0a5641e5 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_NET_9P) := 9pnet.o -obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o @@ -10,10 +9,8 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o client.o \ error.o \ protocol.o \ - trans_common.o \ - -9pnet_fd-objs := \ trans_fd.o \ + trans_common.o \ 9pnet_virtio-objs := \ trans_virtio.o \ diff --git a/net/9p/client.c b/net/9p/client.c index 8bba0d9cf9..7973267ec8 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* + * net/9p/clnt.c + * * 9P Client * * Copyright (C) 2008 by Eric Van Hensbergen @@ -30,9 +32,10 @@ #define DEFAULT_MSIZE (128 * 1024) -/* Client Option Parsing (code inspired by NFS code) - * - a little lazy - parse all client options - */ +/* + * Client Option Parsing (code inspired by NFS code) + * - a little lazy - parse all client options + */ enum { Opt_msize, @@ -86,18 +89,20 @@ int p9_show_client_options(struct seq_file *m, struct p9_client *clnt) } EXPORT_SYMBOL(p9_show_client_options); -/* Some error codes are taken directly from the server replies, +/* + * Some error codes are taken directly from the server replies, * make sure they are valid. */ static int safe_errno(int err) { - if (err > 0 || err < -MAX_ERRNO) { + if ((err > 0) || (err < -MAX_ERRNO)) { p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err); return -EPROTO; } return err; } + /* Interpret mount option for protocol version */ static int get_protocol_version(char *s) { @@ -112,9 +117,8 @@ static int get_protocol_version(char *s) } else if (!strcmp(s, "9p2000.L")) { version = p9_proto_2000L; p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); - } else { + } else pr_info("Unknown protocol version %s\n", s); - } return version; } @@ -143,13 +147,15 @@ static int parse_opts(char *opts, struct p9_client *clnt) return 0; tmp_options = kstrdup(opts, GFP_KERNEL); - if (!tmp_options) + if (!tmp_options) { + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); return -ENOMEM; + } options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token, r; - if (!*p) continue; token = match_token(p, tokens, args); @@ -181,7 +187,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) v9fs_put_trans(clnt->trans_mod); clnt->trans_mod = v9fs_get_trans_by_name(s); - if (!clnt->trans_mod) { + if (clnt->trans_mod == NULL) { pr_info("Could not find request transport: %s\n", s); ret = -EINVAL; @@ -373,7 +379,6 @@ static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r) static void p9_req_free(struct kref *ref) { struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount); - p9_fcall_fini(&r->tc); p9_fcall_fini(&r->rc); kmem_cache_free(p9_req_cache, r); @@ -418,7 +423,8 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) { p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag); - /* This barrier is needed to make sure any change made to req before + /* + * This barrier is needed to make sure any change made to req before * the status change is visible to another thread */ smp_wmb(); @@ -440,12 +446,12 @@ EXPORT_SYMBOL(p9_client_cb); */ int -p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, - int16_t *tag, int rewind) +p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag, + int rewind) { - s8 r_type; - s16 r_tag; - s32 r_size; + int8_t r_type; + int16_t r_tag; + int32_t r_size; int offset = pdu->offset; int err; @@ -493,7 +499,7 @@ EXPORT_SYMBOL(p9_parse_header); static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) { - s8 type; + int8_t type; int err; int ecode; @@ -504,7 +510,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) req->rc.size); return -EIO; } - /* dump the response from server + /* + * dump the response from server * This should be after check errors which poplulate pdu_fcall. */ trace_9p_protocol_dump(c, &req->rc); @@ -517,7 +524,6 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (!p9_is_proto_dotl(c)) { char *ename; - err = p9pdu_readf(&req->rc, c->proto_version, "s?d", &ename, &ecode); if (err) @@ -568,11 +574,12 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, { int err; int ecode; - s8 type; + int8_t type; char *ename = NULL; err = p9_parse_header(&req->rc, NULL, &type, NULL, 0); - /* dump the response from server + /* + * dump the response from server * This should be after parse_header which poplulate pdu_fcall. */ trace_9p_protocol_dump(c, &req->rc); @@ -600,7 +607,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, if (len > inline_len) { /* We have error in external buffer */ if (!copy_from_iter_full(ename + inline_len, - len - inline_len, uidata)) { + len - inline_len, uidata)) { err = -EFAULT; goto out_err; } @@ -652,7 +659,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) { struct p9_req_t *req; - s16 oldtag; + int16_t oldtag; int err; err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1); @@ -665,7 +672,8 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) if (IS_ERR(req)) return PTR_ERR(req); - /* if we haven't received a response for oldreq, + /* + * if we haven't received a response for oldreq, * remove it from the list */ if (oldreq->status == REQ_STATUS_SENT) { @@ -691,7 +699,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, return ERR_PTR(-EIO); /* if status is begin_disconnected we allow only clunk request */ - if (c->status == BeginDisconnect && type != P9_TCLUNK) + if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) return ERR_PTR(-EIO); req = p9_tag_alloc(c, type, req_size); @@ -739,9 +747,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if (signal_pending(current)) { sigpending = 1; clear_thread_flag(TIF_SIGPENDING); - } else { + } else sigpending = 0; - } err = c->trans_mod->request(c, req); if (err < 0) { @@ -755,13 +762,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) /* Wait for the response */ err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD); - /* Make sure our req is coherent with regard to updates in other + /* + * Make sure our req is coherent with regard to updates in other * threads - echoes to wmb() in the callback */ smp_rmb(); - if (err == -ERESTARTSYS && c->status == Connected && - type == P9_TFLUSH) { + if ((err == -ERESTARTSYS) && (c->status == Connected) + && (type == P9_TFLUSH)) { sigpending = 1; clear_thread_flag(TIF_SIGPENDING); goto again; @@ -771,7 +779,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } - if (err == -ERESTARTSYS && c->status == Connected) { + if ((err == -ERESTARTSYS) && (c->status == Connected)) { p9_debug(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; clear_thread_flag(TIF_SIGPENDING); @@ -826,7 +834,8 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, struct p9_req_t *req; va_start(ap, fmt); - /* We allocate a inline protocol data of only 4k bytes. + /* + * We allocate a inline protocol data of only 4k bytes. * The actual content is passed in zero-copy fashion. */ req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); @@ -837,9 +846,8 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, if (signal_pending(current)) { sigpending = 1; clear_thread_flag(TIF_SIGPENDING); - } else { + } else sigpending = 0; - } err = c->trans_mod->zc_request(c, req, uidata, uodata, inlen, olen, in_hdrlen); @@ -853,7 +861,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } - if (err == -ERESTARTSYS && c->status == Connected) { + if ((err == -ERESTARTSYS) && (c->status == Connected)) { p9_debug(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; clear_thread_flag(TIF_SIGPENDING); @@ -889,11 +897,11 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) struct p9_fid *fid; p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt); - fid = kmalloc(sizeof(*fid), GFP_KERNEL); + fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); if (!fid) return NULL; - memset(&fid->qid, 0, sizeof(fid->qid)); + memset(&fid->qid, 0, sizeof(struct p9_qid)); fid->mode = -1; fid->uid = current_fsuid(); fid->clnt = clnt; @@ -941,15 +949,15 @@ static int p9_client_version(struct p9_client *c) switch (c->proto_version) { case p9_proto_2000L: req = p9_client_rpc(c, P9_TVERSION, "ds", - c->msize, "9P2000.L"); + c->msize, "9P2000.L"); break; case p9_proto_2000u: req = p9_client_rpc(c, P9_TVERSION, "ds", - c->msize, "9P2000.u"); + c->msize, "9P2000.u"); break; case p9_proto_legacy: req = p9_client_rpc(c, P9_TVERSION, "ds", - c->msize, "9P2000"); + c->msize, "9P2000"); break; default: return -EINVAL; @@ -966,13 +974,13 @@ static int p9_client_version(struct p9_client *c) } p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); - if (!strncmp(version, "9P2000.L", 8)) { + if (!strncmp(version, "9P2000.L", 8)) c->proto_version = p9_proto_2000L; - } else if (!strncmp(version, "9P2000.u", 8)) { + else if (!strncmp(version, "9P2000.u", 8)) c->proto_version = p9_proto_2000u; - } else if (!strncmp(version, "9P2000", 6)) { + else if (!strncmp(version, "9P2000", 6)) c->proto_version = p9_proto_legacy; - } else { + else { p9_debug(P9_DEBUG_ERROR, "server returned an unknown version: %s\n", version); err = -EREMOTEIO; @@ -1002,7 +1010,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) char *client_id; err = 0; - clnt = kmalloc(sizeof(*clnt), GFP_KERNEL); + clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); if (!clnt) return ERR_PTR(-ENOMEM); @@ -1024,7 +1032,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (!clnt->trans_mod) clnt->trans_mod = v9fs_get_default_trans(); - if (!clnt->trans_mod) { + if (clnt->trans_mod == NULL) { err = -EPROTONOSUPPORT; p9_debug(P9_DEBUG_ERROR, "No transport defined or default transport\n"); @@ -1038,13 +1046,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err) goto put_trans; - if (clnt->msize > clnt->trans_mod->maxsize) { + if (clnt->msize > clnt->trans_mod->maxsize) clnt->msize = clnt->trans_mod->maxsize; - pr_info("Limiting 'msize' to %d as this is the maximum " - "supported by transport %s\n", - clnt->msize, clnt->trans_mod->name - ); - } if (clnt->msize < 4096) { p9_debug(P9_DEBUG_ERROR, @@ -1117,14 +1120,14 @@ void p9_client_begin_disconnect(struct p9_client *clnt) EXPORT_SYMBOL(p9_client_begin_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, - const char *uname, kuid_t n_uname, - const char *aname) + const char *uname, kuid_t n_uname, const char *aname) { int err = 0; struct p9_req_t *req; struct p9_fid *fid; struct p9_qid qid; + p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", afid ? afid->fid : -1, uname, aname); fid = p9_fid_create(clnt); @@ -1135,7 +1138,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, fid->uid = n_uname; req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid, - afid ? afid->fid : P9_NOFID, uname, aname, n_uname); + afid ? afid->fid : P9_NOFID, uname, aname, n_uname); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1149,7 +1152,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, } p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, (unsigned long long)qid.path, qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -1164,14 +1167,14 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, EXPORT_SYMBOL(p9_client_attach); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, - const unsigned char * const *wnames, int clone) + const unsigned char * const *wnames, int clone) { int err; struct p9_client *clnt; struct p9_fid *fid; struct p9_qid *wqids; struct p9_req_t *req; - u16 nwqids, count; + uint16_t nwqids, count; err = 0; wqids = NULL; @@ -1184,14 +1187,14 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, } fid->uid = oldfid->uid; - } else { + } else fid = oldfid; - } + p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, - nwname, wnames); + nwname, wnames); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1214,9 +1217,9 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, for (count = 0; count < nwqids; count++) p9_debug(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", - count, wqids[count].type, - wqids[count].path, - wqids[count].version); + count, wqids[count].type, + (unsigned long long)wqids[count].path, + wqids[count].version); if (nwname) memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid)); @@ -1232,7 +1235,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, fid = NULL; error: - if (fid && fid != oldfid) + if (fid && (fid != oldfid)) p9_fid_destroy(fid); return ERR_PTR(err); @@ -1249,7 +1252,7 @@ int p9_client_open(struct p9_fid *fid, int mode) clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", - p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode); + p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode); err = 0; if (fid->mode != -1) @@ -1271,8 +1274,8 @@ int p9_client_open(struct p9_fid *fid, int mode) } p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", - p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type, - qid.path, qid.version, iounit); + p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type, + (unsigned long long)qid.path, qid.version, iounit); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); fid->mode = mode; @@ -1285,8 +1288,8 @@ int p9_client_open(struct p9_fid *fid, int mode) } EXPORT_SYMBOL(p9_client_open); -int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, - u32 mode, kgid_t gid, struct p9_qid *qid) +int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode, + kgid_t gid, struct p9_qid *qid) { int err = 0; struct p9_client *clnt; @@ -1294,16 +1297,16 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, int iounit; p9_debug(P9_DEBUG_9P, - ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", - ofid->fid, name, flags, mode, - from_kgid(&init_user_ns, gid)); + ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", + ofid->fid, name, flags, mode, + from_kgid(&init_user_ns, gid)); clnt = ofid->clnt; if (ofid->mode != -1) return -EINVAL; req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags, - mode, gid); + mode, gid); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1316,7 +1319,9 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, } p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", - qid->type, qid->path, qid->version, iounit); + qid->type, + (unsigned long long)qid->path, + qid->version, iounit); memmove(&ofid->qid, qid, sizeof(struct p9_qid)); ofid->mode = mode; @@ -1339,7 +1344,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, int iounit; p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", - fid->fid, name, perm, mode); + fid->fid, name, perm, mode); err = 0; clnt = fid->clnt; @@ -1347,7 +1352,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, return -EINVAL; req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm, - mode, extension); + mode, extension); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1360,7 +1365,9 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, } p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); fid->mode = mode; @@ -1374,18 +1381,18 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, EXPORT_SYMBOL(p9_client_fcreate); int p9_client_symlink(struct p9_fid *dfid, const char *name, - const char *symtgt, kgid_t gid, struct p9_qid *qid) + const char *symtgt, kgid_t gid, struct p9_qid *qid) { int err = 0; struct p9_client *clnt; struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", - dfid->fid, name, symtgt); + dfid->fid, name, symtgt); clnt = dfid->clnt; req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt, - gid); + gid); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1398,7 +1405,7 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name, } p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", - qid->type, qid->path, qid->version); + qid->type, (unsigned long long)qid->path, qid->version); free_and_error: p9_tag_remove(clnt, req); @@ -1413,10 +1420,10 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newna struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", - dfid->fid, oldfid->fid, newname); + dfid->fid, oldfid->fid, newname); clnt = dfid->clnt; req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid, - newname); + newname); if (IS_ERR(req)) return PTR_ERR(req); @@ -1433,7 +1440,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync) struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", - fid->fid, datasync); + fid->fid, datasync); err = 0; clnt = fid->clnt; @@ -1469,8 +1476,8 @@ int p9_client_clunk(struct p9_fid *fid) return 0; again: - p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", - fid->fid, retries); + p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid, + retries); err = 0; clnt = fid->clnt; @@ -1484,16 +1491,16 @@ int p9_client_clunk(struct p9_fid *fid) p9_tag_remove(clnt, req); error: - /* Fid is not valid even after a failed clunk + /* + * Fid is not valid even after a failed clunk * If interrupted, retry once then give up and * leak fid until umount. */ if (err == -ERESTARTSYS) { if (retries++ == 0) goto again; - } else { + } else p9_fid_destroy(fid); - } return err; } EXPORT_SYMBOL(p9_client_clunk); @@ -1533,7 +1540,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) struct p9_client *clnt; p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", - dfid->fid, name, flags); + dfid->fid, name, flags); clnt = dfid->clnt; req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags); @@ -1579,8 +1586,8 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, char *dataptr; *err = 0; - p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %zu\n", - fid->fid, offset, iov_iter_count(to)); + p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", + fid->fid, (unsigned long long) offset, (int)iov_iter_count(to)); rsize = fid->iounit; if (!rsize || rsize > clnt->msize - P9_IOHDRSZ) @@ -1646,13 +1653,13 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) *err = 0; p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n", - fid->fid, offset, iov_iter_count(from)); + fid->fid, (unsigned long long) offset, + iov_iter_count(from)); while (iov_iter_count(from)) { int count = iov_iter_count(from); int rsize = fid->iounit; - - if (!rsize || rsize > clnt->msize - P9_IOHDRSZ) + if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; if (count < rsize) @@ -1665,7 +1672,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) fid->fid, offset, rsize); } else { req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid, - offset, rsize, from); + offset, rsize, from); } if (IS_ERR(req)) { *err = PTR_ERR(req); @@ -1698,13 +1705,12 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) { int err; struct p9_client *clnt; - struct p9_wstat *ret; + struct p9_wstat *ret = kmalloc(sizeof(struct p9_wstat), GFP_KERNEL); struct p9_req_t *req; u16 ignored; p9_debug(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); - ret = kmalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return ERR_PTR(-ENOMEM); @@ -1725,17 +1731,17 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) } p9_debug(P9_DEBUG_9P, - "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n" - "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" - "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" - "<<< uid=%d gid=%d n_muid=%d\n", - ret->size, ret->type, ret->dev, ret->qid.type, ret->qid.path, - ret->qid.version, ret->mode, - ret->atime, ret->mtime, ret->length, - ret->name, ret->uid, ret->gid, ret->muid, ret->extension, - from_kuid(&init_user_ns, ret->n_uid), - from_kgid(&init_user_ns, ret->n_gid), - from_kuid(&init_user_ns, ret->n_muid)); + "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n" + "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" + "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" + "<<< uid=%d gid=%d n_muid=%d\n", + ret->size, ret->type, ret->dev, ret->qid.type, + (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, (unsigned long long)ret->length, + ret->name, ret->uid, ret->gid, ret->muid, ret->extension, + from_kuid(&init_user_ns, ret->n_uid), + from_kgid(&init_user_ns, ret->n_gid), + from_kuid(&init_user_ns, ret->n_muid)); p9_tag_remove(clnt, req); return ret; @@ -1747,17 +1753,17 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) EXPORT_SYMBOL(p9_client_stat); struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, - u64 request_mask) + u64 request_mask) { int err; struct p9_client *clnt; - struct p9_stat_dotl *ret; + struct p9_stat_dotl *ret = kmalloc(sizeof(struct p9_stat_dotl), + GFP_KERNEL); struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", - fid->fid, request_mask); + fid->fid, request_mask); - ret = kmalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return ERR_PTR(-ENOMEM); @@ -1777,27 +1783,26 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, goto error; } - p9_debug(P9_DEBUG_9P, "<<< RGETATTR st_result_mask=%lld\n" - "<<< qid=%x.%llx.%x\n" - "<<< st_mode=%8.8x st_nlink=%llu\n" - "<<< st_uid=%d st_gid=%d\n" - "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n" - "<<< st_atime_sec=%lld st_atime_nsec=%lld\n" - "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n" - "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n" - "<<< st_btime_sec=%lld st_btime_nsec=%lld\n" - "<<< st_gen=%lld st_data_version=%lld\n", - ret->st_result_mask, - ret->qid.type, ret->qid.path, ret->qid.version, - ret->st_mode, ret->st_nlink, - from_kuid(&init_user_ns, ret->st_uid), - from_kgid(&init_user_ns, ret->st_gid), - ret->st_rdev, ret->st_size, ret->st_blksize, ret->st_blocks, - ret->st_atime_sec, ret->st_atime_nsec, - ret->st_mtime_sec, ret->st_mtime_nsec, - ret->st_ctime_sec, ret->st_ctime_nsec, - ret->st_btime_sec, ret->st_btime_nsec, - ret->st_gen, ret->st_data_version); + p9_debug(P9_DEBUG_9P, + "<<< RGETATTR st_result_mask=%lld\n" + "<<< qid=%x.%llx.%x\n" + "<<< st_mode=%8.8x st_nlink=%llu\n" + "<<< st_uid=%d st_gid=%d\n" + "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n" + "<<< st_atime_sec=%lld st_atime_nsec=%lld\n" + "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n" + "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n" + "<<< st_btime_sec=%lld st_btime_nsec=%lld\n" + "<<< st_gen=%lld st_data_version=%lld\n", + ret->st_result_mask, ret->qid.type, ret->qid.path, + ret->qid.version, ret->st_mode, ret->st_nlink, + from_kuid(&init_user_ns, ret->st_uid), + from_kgid(&init_user_ns, ret->st_gid), + ret->st_rdev, ret->st_size, ret->st_blksize, + ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec, + ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec, + ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec, + ret->st_gen, ret->st_data_version); p9_tag_remove(clnt, req); return ret; @@ -1816,7 +1821,7 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version) /* size[2] type[2] dev[4] qid[13] */ /* mode[4] atime[4] mtime[4] length[8]*/ /* name[s] uid[s] gid[s] muid[s] */ - ret = 2 + 4 + 13 + 4 + 4 + 4 + 8 + 2 + 2 + 2 + 2; + ret = 2+4+13+4+4+4+8+2+2+2+2; if (wst->name) ret += strlen(wst->name); @@ -1827,10 +1832,9 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version) if (wst->muid) ret += strlen(wst->muid); - if (proto_version == p9_proto_2000u || - proto_version == p9_proto_2000L) { - /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ - ret += 2 + 4 + 4 + 4; + if ((proto_version == p9_proto_2000u) || + (proto_version == p9_proto_2000L)) { + ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ if (wst->extension) ret += strlen(wst->extension); } @@ -1847,23 +1851,21 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) err = 0; clnt = fid->clnt; wst->size = p9_client_statsize(wst, clnt->proto_version); - p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", - fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); p9_debug(P9_DEBUG_9P, - " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" - " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" - " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" - " uid=%d gid=%d n_muid=%d\n", - wst->size, wst->type, wst->dev, wst->qid.type, - wst->qid.path, wst->qid.version, - wst->mode, wst->atime, wst->mtime, wst->length, - wst->name, wst->uid, wst->gid, wst->muid, wst->extension, - from_kuid(&init_user_ns, wst->n_uid), - from_kgid(&init_user_ns, wst->n_gid), - from_kuid(&init_user_ns, wst->n_muid)); + " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" + " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" + " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" + " uid=%d gid=%d n_muid=%d\n", + wst->size, wst->type, wst->dev, wst->qid.type, + (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, (unsigned long long)wst->length, + wst->name, wst->uid, wst->gid, wst->muid, wst->extension, + from_kuid(&init_user_ns, wst->n_uid), + from_kgid(&init_user_ns, wst->n_gid), + from_kuid(&init_user_ns, wst->n_muid)); - req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", - fid->fid, wst->size + 2, wst); + req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1886,15 +1888,15 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) err = 0; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); - p9_debug(P9_DEBUG_9P, " valid=%x mode=%x uid=%d gid=%d size=%lld\n", - p9attr->valid, p9attr->mode, - from_kuid(&init_user_ns, p9attr->uid), - from_kgid(&init_user_ns, p9attr->gid), - p9attr->size); - p9_debug(P9_DEBUG_9P, " atime_sec=%lld atime_nsec=%lld\n", - p9attr->atime_sec, p9attr->atime_nsec); - p9_debug(P9_DEBUG_9P, " mtime_sec=%lld mtime_nsec=%lld\n", - p9attr->mtime_sec, p9attr->mtime_nsec); + p9_debug(P9_DEBUG_9P, + " valid=%x mode=%x uid=%d gid=%d size=%lld\n" + " atime_sec=%lld atime_nsec=%lld\n" + " mtime_sec=%lld mtime_nsec=%lld\n", + p9attr->valid, p9attr->mode, + from_kuid(&init_user_ns, p9attr->uid), + from_kgid(&init_user_ns, p9attr->gid), + p9attr->size, p9attr->atime_sec, p9attr->atime_nsec, + p9attr->mtime_sec, p9attr->mtime_nsec); req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr); @@ -1935,10 +1937,12 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) goto error; } - p9_debug(P9_DEBUG_9P, - "<<< RSTATFS fid %d type 0x%x bsize %u blocks %llu bfree %llu bavail %llu files %llu ffree %llu fsid %llu namelen %u\n", - fid->fid, sb->type, sb->bsize, sb->blocks, sb->bfree, - sb->bavail, sb->files, sb->ffree, sb->fsid, sb->namelen); + p9_debug(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld " + "blocks %llu bfree %llu bavail %llu files %llu ffree %llu " + "fsid %llu namelen %ld\n", + fid->fid, (long unsigned int)sb->type, (long int)sb->bsize, + sb->blocks, sb->bfree, sb->bavail, sb->files, sb->ffree, + sb->fsid, (long int)sb->namelen); p9_tag_remove(clnt, req); error: @@ -1957,10 +1961,10 @@ int p9_client_rename(struct p9_fid *fid, clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", - fid->fid, newdirfid->fid, name); + fid->fid, newdirfid->fid, name); req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid, - newdirfid->fid, name); + newdirfid->fid, name); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1984,9 +1988,9 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, err = 0; clnt = olddirfid->clnt; - p9_debug(P9_DEBUG_9P, - ">>> TRENAMEAT olddirfid %d old name %s newdirfid %d new name %s\n", - olddirfid->fid, old_name, newdirfid->fid, new_name); + p9_debug(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s" + " newdirfid %d new name %s\n", olddirfid->fid, old_name, + newdirfid->fid, new_name); req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid, old_name, newdirfid->fid, new_name); @@ -1996,7 +2000,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, } p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", - newdirfid->fid, new_name); + newdirfid->fid, new_name); p9_tag_remove(clnt, req); error: @@ -2004,10 +2008,11 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, } EXPORT_SYMBOL(p9_client_renameat); -/* An xattrwalk without @attr_name gives the fid for the lisxattr namespace +/* + * An xattrwalk without @attr_name gives the fid for the lisxattr namespace */ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, - const char *attr_name, u64 *attr_size) + const char *attr_name, u64 *attr_size) { int err; struct p9_req_t *req; @@ -2022,11 +2027,11 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, goto error; } p9_debug(P9_DEBUG_9P, - ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n", - file_fid->fid, attr_fid->fid, attr_name); + ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n", + file_fid->fid, attr_fid->fid, attr_name); req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds", - file_fid->fid, attr_fid->fid, attr_name); + file_fid->fid, attr_fid->fid, attr_name); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -2039,13 +2044,13 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, } p9_tag_remove(clnt, req); p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", - attr_fid->fid, *attr_size); + attr_fid->fid, *attr_size); return attr_fid; clunk_fid: p9_client_clunk(attr_fid); attr_fid = NULL; error: - if (attr_fid && attr_fid != file_fid) + if (attr_fid && (attr_fid != file_fid)) p9_fid_destroy(attr_fid); return ERR_PTR(err); @@ -2053,19 +2058,19 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, EXPORT_SYMBOL_GPL(p9_client_xattrwalk); int p9_client_xattrcreate(struct p9_fid *fid, const char *name, - u64 attr_size, int flags) + u64 attr_size, int flags) { int err; struct p9_req_t *req; struct p9_client *clnt; p9_debug(P9_DEBUG_9P, - ">>> TXATTRCREATE fid %d name %s size %llu flag %d\n", - fid->fid, name, attr_size, flags); + ">>> TXATTRCREATE fid %d name %s size %lld flag %d\n", + fid->fid, name, (long long)attr_size, flags); err = 0; clnt = fid->clnt; req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd", - fid->fid, name, attr_size, flags); + fid->fid, name, attr_size, flags); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -2089,13 +2094,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) iov_iter_kvec(&to, READ, &kv, 1, count); p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", - fid->fid, offset, count); + fid->fid, (unsigned long long) offset, count); err = 0; clnt = fid->clnt; rsize = fid->iounit; - if (!rsize || rsize > clnt->msize - P9_READDIRHDRSZ) + if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ) rsize = clnt->msize - P9_READDIRHDRSZ; if (count < rsize) @@ -2103,7 +2108,8 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) /* Don't bother zerocopy for small IO (< 1024) */ if (clnt->trans_mod->zc_request && rsize > 1024) { - /* response header len is 11 + /* + * response header len is 11 * PDU Header(7) + IO Size (4) */ req = p9_client_zc_rpc(clnt, P9_TREADDIR, &to, NULL, rsize, 0, @@ -2144,7 +2150,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) EXPORT_SYMBOL(p9_client_readdir); int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode, - dev_t rdev, kgid_t gid, struct p9_qid *qid) + dev_t rdev, kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; @@ -2152,11 +2158,10 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode, err = 0; clnt = fid->clnt; - p9_debug(P9_DEBUG_9P, - ">>> TMKNOD fid %d name %s mode %d major %d minor %d\n", - fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); + p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " + "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode, - MAJOR(rdev), MINOR(rdev), gid); + MAJOR(rdev), MINOR(rdev), gid); if (IS_ERR(req)) return PTR_ERR(req); @@ -2165,17 +2170,18 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode, trace_9p_protocol_dump(clnt, &req->rc); goto error; } - p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", - qid->type, qid->path, qid->version); + p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, + (unsigned long long)qid->path, qid->version); error: p9_tag_remove(clnt, req); return err; + } EXPORT_SYMBOL(p9_client_mknod_dotl); int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, - kgid_t gid, struct p9_qid *qid) + kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; @@ -2185,8 +2191,8 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", fid->fid, name, mode, from_kgid(&init_user_ns, gid)); - req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", - fid->fid, name, mode, gid); + req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode, + gid); if (IS_ERR(req)) return PTR_ERR(req); @@ -2196,11 +2202,12 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, goto error; } p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, - qid->path, qid->version); + (unsigned long long)qid->path, qid->version); error: p9_tag_remove(clnt, req); return err; + } EXPORT_SYMBOL(p9_client_mkdir_dotl); @@ -2212,14 +2219,14 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) err = 0; clnt = fid->clnt; - p9_debug(P9_DEBUG_9P, - ">>> TLOCK fid %d type %i flags %d start %lld length %lld proc_id %d client_id %s\n", - fid->fid, flock->type, flock->flags, flock->start, - flock->length, flock->proc_id, flock->client_id); + p9_debug(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d " + "start %lld length %lld proc_id %d client_id %s\n", + fid->fid, flock->type, flock->flags, flock->start, + flock->length, flock->proc_id, flock->client_id); req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type, - flock->flags, flock->start, flock->length, - flock->proc_id, flock->client_id); + flock->flags, flock->start, flock->length, + flock->proc_id, flock->client_id); if (IS_ERR(req)) return PTR_ERR(req); @@ -2233,6 +2240,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) error: p9_tag_remove(clnt, req); return err; + } EXPORT_SYMBOL(p9_client_lock_dotl); @@ -2244,14 +2252,12 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) err = 0; clnt = fid->clnt; - p9_debug(P9_DEBUG_9P, - ">>> TGETLOCK fid %d, type %i start %lld length %lld proc_id %d client_id %s\n", - fid->fid, glock->type, glock->start, glock->length, - glock->proc_id, glock->client_id); + p9_debug(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld " + "length %lld proc_id %d client_id %s\n", fid->fid, glock->type, + glock->start, glock->length, glock->proc_id, glock->client_id); - req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid, - glock->type, glock->start, glock->length, - glock->proc_id, glock->client_id); + req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid, glock->type, + glock->start, glock->length, glock->proc_id, glock->client_id); if (IS_ERR(req)) return PTR_ERR(req); @@ -2263,10 +2269,9 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) trace_9p_protocol_dump(clnt, &req->rc); goto error; } - p9_debug(P9_DEBUG_9P, - "<<< RGETLOCK type %i start %lld length %lld proc_id %d client_id %s\n", - glock->type, glock->start, glock->length, - glock->proc_id, glock->client_id); + p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " + "proc_id %d client_id %s\n", glock->type, glock->start, + glock->length, glock->proc_id, glock->client_id); error: p9_tag_remove(clnt, req); return err; diff --git a/net/9p/error.c b/net/9p/error.c index 8da744494b..61c18daf30 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* + * linux/fs/9p/error.c + * * Error string handling * * Plan 9 uses error strings, Unix uses error numbers. These functions @@ -183,7 +185,7 @@ int p9_error_init(void) INIT_HLIST_HEAD(&hash_errmap[bucket]); /* load initial error map into hash table */ - for (c = errmap; c->name; c++) { + for (c = errmap; c->name != NULL; c++) { c->namelen = strlen(c->name); bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ; INIT_HLIST_NODE(&c->list); diff --git a/net/9p/mod.c b/net/9p/mod.c index 55576c1866..5126566850 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* + * net/9p/9p.c + * * 9P entry point * * Copyright (C) 2007 by Latchesar Ionkov @@ -10,7 +12,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include @@ -23,13 +24,13 @@ #include #ifdef CONFIG_NET_9P_DEBUG -unsigned int p9_debug_level; /* feature-rific global debug level */ +unsigned int p9_debug_level = 0; /* feature-rific global debug level */ EXPORT_SYMBOL(p9_debug_level); module_param_named(debug, p9_debug_level, uint, 0); MODULE_PARM_DESC(debug, "9P debugging level"); void _p9_debug(enum p9_debug_flags level, const char *func, - const char *fmt, ...) + const char *fmt, ...) { struct va_format vaf; va_list args; @@ -52,7 +53,10 @@ void _p9_debug(enum p9_debug_flags level, const char *func, EXPORT_SYMBOL(_p9_debug); #endif -/* Dynamic Transport Registration Routines */ +/* + * Dynamic Transport Registration Routines + * + */ static DEFINE_SPINLOCK(v9fs_trans_lock); static LIST_HEAD(v9fs_trans_list); @@ -83,7 +87,12 @@ void v9fs_unregister_trans(struct p9_trans_module *m) } EXPORT_SYMBOL(v9fs_unregister_trans); -static struct p9_trans_module *_p9_get_trans_by_name(const char *s) +/** + * v9fs_get_trans_by_name - get transport with the matching name + * @s: string identifying transport + * + */ +struct p9_trans_module *v9fs_get_trans_by_name(char *s) { struct p9_trans_module *t, *found = NULL; @@ -97,36 +106,10 @@ static struct p9_trans_module *_p9_get_trans_by_name(const char *s) } spin_unlock(&v9fs_trans_lock); - - return found; -} - -/** - * v9fs_get_trans_by_name - get transport with the matching name - * @s: string identifying transport - * - */ -struct p9_trans_module *v9fs_get_trans_by_name(const char *s) -{ - struct p9_trans_module *found = NULL; - - found = _p9_get_trans_by_name(s); - -#ifdef CONFIG_MODULES - if (!found) { - request_module("9p-%s", s); - found = _p9_get_trans_by_name(s); - } -#endif - return found; } EXPORT_SYMBOL(v9fs_get_trans_by_name); -static const char * const v9fs_default_transports[] = { - "virtio", "tcp", "fd", "unix", "xen", "rdma", -}; - /** * v9fs_get_default_trans - get the default transport * @@ -135,7 +118,6 @@ static const char * const v9fs_default_transports[] = { struct p9_trans_module *v9fs_get_default_trans(void) { struct p9_trans_module *t, *found = NULL; - int i; spin_lock(&v9fs_trans_lock); @@ -153,10 +135,6 @@ struct p9_trans_module *v9fs_get_default_trans(void) } spin_unlock(&v9fs_trans_lock); - - for (i = 0; !found && i < ARRAY_SIZE(v9fs_default_transports); i++) - found = v9fs_get_trans_by_name(v9fs_default_transports[i]); - return found; } EXPORT_SYMBOL(v9fs_get_default_trans); @@ -186,6 +164,7 @@ static int __init init_p9(void) p9_error_init(); pr_info("Installing 9P2000 support\n"); + p9_trans_fd_init(); return ret; } @@ -199,6 +178,7 @@ static void __exit exit_p9(void) { pr_info("Unloading 9P2000 support\n"); + p9_trans_fd_exit(); p9_client_exit(); } diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 3754c33e29..03593eb240 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* + * net/9p/protocol.c + * * 9P Protocol Support Code * * Copyright (C) 2008 by Eric Van Hensbergen @@ -44,7 +46,6 @@ EXPORT_SYMBOL(p9stat_free); size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) { size_t len = min(pdu->size - pdu->offset, size); - memcpy(data, &pdu->sdata[pdu->offset], len); pdu->offset += len; return size - len; @@ -53,7 +54,6 @@ size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) { size_t len = min(pdu->capacity - pdu->size, size); - memcpy(&pdu->sdata[pdu->size], data, len); pdu->size += len; return size - len; @@ -64,7 +64,6 @@ pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size) { size_t len = min(pdu->capacity - pdu->size, size); struct iov_iter i = *from; - if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, &i)) len = 0; @@ -72,25 +71,26 @@ pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size) return size - len; } -/* b - int8_t - * w - int16_t - * d - int32_t - * q - int64_t - * s - string - * u - numeric uid - * g - numeric gid - * S - stat - * Q - qid - * D - data blob (int32_t size followed by void *, results are not freed) - * T - array of strings (int16_t count, followed by strings) - * R - array of qids (int16_t count, followed by qids) - * A - stat for 9p2000.L (p9_stat_dotl) - * ? - if optional = 1, continue parsing - */ +/* + b - int8_t + w - int16_t + d - int32_t + q - int64_t + s - string + u - numeric uid + g - numeric gid + S - stat + Q - qid + D - data blob (int32_t size followed by void *, results are not freed) + T - array of strings (int16_t count, followed by strings) + R - array of qids (int16_t count, followed by qids) + A - stat for 9p2000.L (p9_stat_dotl) + ? - if optional = 1, continue parsing +*/ static int p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, - va_list ap) + va_list ap) { const char *ptr; int errcode = 0; diff --git a/net/9p/protocol.h b/net/9p/protocol.h index 6d719c3033..6835f91cfd 100644 --- a/net/9p/protocol.h +++ b/net/9p/protocol.h @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* + * net/9p/protocol.h + * * 9P Protocol Support Code * * Copyright (C) 2008 by Eric Van Hensbergen @@ -9,7 +11,7 @@ */ int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, - va_list ap); + va_list ap); int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu); diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index c827f69455..6ea5ea548c 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -1,7 +1,15 @@ -// SPDX-License-Identifier: LGPL-2.1 /* * Copyright IBM Corporation, 2010 * Author Venkateswararao Jujjuri + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * */ #include diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h index 32134db6ab..c43babb3f6 100644 --- a/net/9p/trans_common.h +++ b/net/9p/trans_common.h @@ -1,7 +1,15 @@ -/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright IBM Corporation, 2010 * Author Venkateswararao Jujjuri + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * */ -void p9_release_pages(struct page **pages, int nr_pages); +void p9_release_pages(struct page **, int); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 8f8f95e39b..007bbcc680 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* + * linux/fs/9p/trans_fd.c + * * Fd transport layer. Includes deprecated socket layer. * * Copyright (C) 2006 by Russ Cox @@ -1090,7 +1092,6 @@ static struct p9_trans_module p9_tcp_trans = { .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; -MODULE_ALIAS_9P("tcp"); static struct p9_trans_module p9_unix_trans = { .name = "unix", @@ -1104,7 +1105,6 @@ static struct p9_trans_module p9_unix_trans = { .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; -MODULE_ALIAS_9P("unix"); static struct p9_trans_module p9_fd_trans = { .name = "fd", @@ -1118,7 +1118,6 @@ static struct p9_trans_module p9_fd_trans = { .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; -MODULE_ALIAS_9P("fd"); /** * p9_poll_workfn - poll worker thread @@ -1152,7 +1151,7 @@ static void p9_poll_workfn(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "finish\n"); } -static int __init p9_trans_fd_init(void) +int p9_trans_fd_init(void) { v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); @@ -1161,17 +1160,10 @@ static int __init p9_trans_fd_init(void) return 0; } -static void __exit p9_trans_fd_exit(void) +void p9_trans_fd_exit(void) { flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); } - -module_init(p9_trans_fd_init); -module_exit(p9_trans_fd_exit); - -MODULE_AUTHOR("Eric Van Hensbergen "); -MODULE_DESCRIPTION("Filedescriptor Transport for 9P"); -MODULE_LICENSE("GPL"); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 88e5638266..af0a8a6cd3 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* + * linux/fs/9p/trans_rdma.c + * * RDMA transport layer based on the trans_fd.c implementation. * * Copyright (C) 2008 by Tom Tucker @@ -765,7 +767,6 @@ static void __exit p9_trans_rdma_exit(void) module_init(p9_trans_rdma_init); module_exit(p9_trans_rdma_exit); -MODULE_ALIAS_9P("rdma"); MODULE_AUTHOR("Tom Tucker "); MODULE_DESCRIPTION("RDMA Transport for 9P"); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index b24a4fb0f0..490a4c9003 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -648,7 +648,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) * @args: args passed from sys_mount() for per-transport options (unused) * * This sets up a transport channel for 9p communication. Right now - * we only match the first available channel, but eventually we could look up + * we only match the first available channel, but eventually we couldlook up * alternate channels by matching devname versus a virtio_config entry. * We use a simple reference count mechanism to ensure that only a single * mount has a channel open at a time. @@ -721,7 +721,7 @@ static void p9_virtio_remove(struct virtio_device *vdev) mutex_unlock(&virtio_9p_lock); - virtio_reset_device(vdev); + vdev->config->reset(vdev); vdev->config->del_vqs(vdev); sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); @@ -794,7 +794,6 @@ static void __exit p9_virtio_cleanup(void) module_init(p9_virtio_init); module_exit(p9_virtio_cleanup); -MODULE_ALIAS_9P("virtio"); MODULE_DEVICE_TABLE(virtio, id_table); MODULE_AUTHOR("Eric Van Hensbergen "); diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index eb9fb55280..432ac5a16f 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -1,10 +1,33 @@ -// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/9p/trans_xen * * Xen transport layer. * * Copyright (C) 2017 by Stefano Stabellini + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. */ #include @@ -281,9 +304,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) ref = priv->rings[i].intf->ref[j]; gnttab_end_foreign_access(ref, 0, 0); } - free_pages((unsigned long)priv->rings[i].data.in, - priv->rings[i].intf->ring_order - - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + free_pages_exact(priv->rings[i].data.in, + 1UL << (priv->rings[i].intf->ring_order + + XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(priv->rings[i].ref, 0, 0); free_page((unsigned long)priv->rings[i].intf); @@ -322,8 +345,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, if (ret < 0) goto out; ring->ref = ret; - bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - order - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + bytes = alloc_pages_exact(1UL << (order + XEN_PAGE_SHIFT), + GFP_KERNEL | __GFP_ZERO); if (!bytes) { ret = -ENOMEM; goto out; @@ -354,9 +377,7 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, if (bytes) { for (i--; i >= 0; i--) gnttab_end_foreign_access(ring->intf->ref[i], 0, 0); - free_pages((unsigned long)bytes, - ring->intf->ring_order - - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(ring->ref, 0, 0); free_page((unsigned long)ring->intf); @@ -529,7 +550,6 @@ static int p9_trans_xen_init(void) return rc; } module_init(p9_trans_xen_init); -MODULE_ALIAS_9P("xen"); static void p9_trans_xen_exit(void) { @@ -538,7 +558,6 @@ static void p9_trans_xen_exit(void) } module_exit(p9_trans_xen_exit); -MODULE_ALIAS("xen:9pfs"); MODULE_AUTHOR("Stefano Stabellini "); MODULE_DESCRIPTION("Xen Transport for 9P"); MODULE_LICENSE("GPL"); diff --git a/net/Kconfig b/net/Kconfig index 8a1f9d0287..fb13460c6d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -294,7 +294,7 @@ config CGROUP_NET_CLASSID config NET_RX_BUSY_POLL bool - default y if !PREEMPT_RT + default y config BQL bool @@ -455,9 +455,4 @@ config ETHTOOL_NETLINK netlink. It provides better extensibility and some new features, e.g. notification messages. -config NETDEV_ADDR_LIST_TEST - tristate "Unit tests for device address list" - default KUNIT_ALL_TESTS - depends on KUNIT - endif # if NET diff --git a/net/atm/br2684.c b/net/atm/br2684.c index f666f2f98b..dd2a8dabed 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -577,12 +577,10 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) pr_debug("vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps, brvcc); if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) { unsigned char *esi = atmvcc->dev->esi; - const u8 one = 1; - if (esi[0] | esi[1] | esi[2] | esi[3] | esi[4] | esi[5]) - dev_addr_set(net_dev, esi); + memcpy(net_dev->dev_addr, esi, net_dev->addr_len); else - dev_addr_mod(net_dev, 2, &one, 1); + net_dev->dev_addr[2] = 1; } list_add(&brvcc->brvccs, &brdev->brvccs); write_unlock_irq(&devs_lock); diff --git a/net/atm/lec.c b/net/atm/lec.c index 6257bf12e5..7226c784db 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -340,12 +340,12 @@ static int lec_close(struct net_device *dev) static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) { - static const u8 zero_addr[ETH_ALEN] = {}; unsigned long flags; struct net_device *dev = (struct net_device *)vcc->proto_data; struct lec_priv *priv = netdev_priv(dev); struct atmlec_msg *mesg; struct lec_arp_table *entry; + int i; char *tmp; /* FIXME */ WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); @@ -355,10 +355,12 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("%s: msg from zeppelin:%d\n", dev->name, mesg->type); switch (mesg->type) { case l_set_mac_addr: - eth_hw_addr_set(dev, mesg->content.normal.mac_addr); + for (i = 0; i < 6; i++) + dev->dev_addr[i] = mesg->content.normal.mac_addr[i]; break; case l_del_mac_addr: - eth_hw_addr_set(dev, zero_addr); + for (i = 0; i < 6; i++) + dev->dev_addr[i] = 0; break; case l_addr_delete: lec_addr_delete(priv, mesg->content.normal.atm_addr, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d53cbb4e25..ea3431ac46 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -91,10 +91,8 @@ static void ax25_kill_by_device(struct net_device *dev) spin_unlock_bh(&ax25_list_lock); lock_sock(sk); s->ax25_dev = NULL; - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); - ax25_dev_put(ax25_dev); - ax25_disconnect(s, ENETUNREACH); release_sock(sk); + ax25_disconnect(s, ENETUNREACH); spin_lock_bh(&ax25_list_lock); sock_put(sk); /* The entry could have been deleted from the @@ -209,7 +207,7 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr, * Find an AX.25 control block given both ends. It will only pick up * floating AX.25 control blocks or non Raw socket bound control blocks. */ -ax25_cb *ax25_find_cb(const ax25_address *src_addr, ax25_address *dest_addr, +ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr, ax25_digi *digi, struct net_device *dev) { ax25_cb *s; @@ -360,25 +358,21 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl))) return -EFAULT; + if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL) + return -ENODEV; + if (ax25_ctl.digi_count > AX25_MAX_DIGIS) return -EINVAL; if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL) return -EINVAL; - ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr); - if (!ax25_dev) - return -ENODEV; - digi.ndigi = ax25_ctl.digi_count; for (k = 0; k < digi.ndigi; k++) digi.calls[k] = ax25_ctl.digi_addr[k]; - ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev); - if (!ax25) { - ax25_dev_put(ax25_dev); + if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL) return -ENOTCONN; - } switch (ax25_ctl.cmd) { case AX25_KILL: @@ -445,7 +439,6 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) } out_put: - ax25_dev_put(ax25_dev); ax25_cb_put(ax25); return ret; @@ -1117,10 +1110,8 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } } - if (ax25_dev) { + if (ax25_dev != NULL) ax25_fillin_cb(ax25, ax25_dev); - dev_hold_track(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC); - } done: ax25_cb_add(ax25); diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index d2a244e1c2..4ac2e08476 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -35,9 +35,8 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) spin_lock_bh(&ax25_dev_lock); for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) - if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { + if (ax25cmp(addr, (ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; - ax25_dev_hold(ax25_dev); } spin_unlock_bh(&ax25_dev_lock); @@ -57,10 +56,9 @@ void ax25_dev_device_up(struct net_device *dev) return; } - refcount_set(&ax25_dev->refcount, 1); dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; - dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC); + dev_hold(dev); ax25_dev->forward = NULL; ax25_dev->values[AX25_VALUES_IPDEFMODE] = AX25_DEF_IPDEFMODE; @@ -86,7 +84,6 @@ void ax25_dev_device_up(struct net_device *dev) ax25_dev->next = ax25_dev_list; ax25_dev_list = ax25_dev; spin_unlock_bh(&ax25_dev_lock); - ax25_dev_hold(ax25_dev); ax25_register_dev_sysctl(ax25_dev); } @@ -116,10 +113,9 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; spin_unlock_bh(&ax25_dev_lock); - ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; - dev_put_track(dev, &ax25_dev->dev_tracker); - ax25_dev_put(ax25_dev); + dev_put(dev); + kfree(ax25_dev); return; } @@ -127,10 +123,9 @@ void ax25_dev_device_down(struct net_device *dev) if (s->next == ax25_dev) { s->next = ax25_dev->next; spin_unlock_bh(&ax25_dev_lock); - ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; - dev_put_track(dev, &ax25_dev->dev_tracker); - ax25_dev_put(ax25_dev); + dev_put(dev); + kfree(ax25_dev); return; } @@ -138,7 +133,6 @@ void ax25_dev_device_down(struct net_device *dev) } spin_unlock_bh(&ax25_dev_lock); dev->ax25_ptr = NULL; - ax25_dev_put(ax25_dev); } int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) @@ -150,32 +144,20 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) switch (cmd) { case SIOCAX25ADDFWD: - fwd_dev = ax25_addr_ax25dev(&fwd->port_to); - if (!fwd_dev) { - ax25_dev_put(ax25_dev); + if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL) return -EINVAL; - } - if (ax25_dev->forward) { - ax25_dev_put(fwd_dev); - ax25_dev_put(ax25_dev); + if (ax25_dev->forward != NULL) return -EINVAL; - } ax25_dev->forward = fwd_dev->dev; - ax25_dev_put(fwd_dev); - ax25_dev_put(ax25_dev); break; case SIOCAX25DELFWD: - if (!ax25_dev->forward) { - ax25_dev_put(ax25_dev); + if (ax25_dev->forward == NULL) return -EINVAL; - } ax25_dev->forward = NULL; - ax25_dev_put(ax25_dev); break; default: - ax25_dev_put(ax25_dev); return -EINVAL; } @@ -206,7 +188,7 @@ void __exit ax25_dev_free(void) ax25_dev = ax25_dev_list; while (ax25_dev != NULL) { s = ax25_dev; - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + dev_put(ax25_dev->dev); ax25_dev = ax25_dev->next; kfree(s); } diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index 979bc4b828..b4083f30af 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -98,7 +98,7 @@ void ax25_linkfail_release(struct ax25_linkfail *lf) EXPORT_SYMBOL(ax25_linkfail_release); -int ax25_listen_register(const ax25_address *callsign, struct net_device *dev) +int ax25_listen_register(ax25_address *callsign, struct net_device *dev) { struct listen_struct *listen; @@ -121,7 +121,7 @@ int ax25_listen_register(const ax25_address *callsign, struct net_device *dev) EXPORT_SYMBOL(ax25_listen_register); -void ax25_listen_release(const ax25_address *callsign, struct net_device *dev) +void ax25_listen_release(ax25_address *callsign, struct net_device *dev) { struct listen_struct *s, *listen; @@ -171,7 +171,7 @@ int (*ax25_protocol_function(unsigned int pid))(struct sk_buff *, ax25_cb *) return res; } -int ax25_listen_mine(const ax25_address *callsign, struct net_device *dev) +int ax25_listen_mine(ax25_address *callsign, struct net_device *dev) { struct listen_struct *listen; diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 1cac25aca6..cd6afe895d 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -181,7 +181,7 @@ static int ax25_process_rx_frame(ax25_cb *ax25, struct sk_buff *skb, int type, i } static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, - const ax25_address *dev_addr, struct packet_type *ptype) + ax25_address *dev_addr, struct packet_type *ptype) { ax25_address src, dest, *next_digi = NULL; int type = 0, mine = 0, dama; @@ -447,5 +447,5 @@ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, skb_pull(skb, AX25_KISS_HEADER_LEN); /* Remove the KISS byte */ - return ax25_rcv(skb, dev, (const ax25_address *)dev->dev_addr, ptype); + return ax25_rcv(skb, dev, (ax25_address *)dev->dev_addr, ptype); } diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index 3db76d2470..22f2f66c6e 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -29,7 +29,7 @@ static DEFINE_SPINLOCK(ax25_frag_lock); -ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *src, ax25_address *dest, ax25_digi *digi, struct net_device *dev) +ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax25_address *dest, ax25_digi *digi, struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *ax25; diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 9751207f77..d0b2e094bd 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -75,11 +75,9 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_dev *ax25_dev; int i; - if (route->digi_count > AX25_MAX_DIGIS) + if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL) return -EINVAL; - - ax25_dev = ax25_addr_ax25dev(&route->port_addr); - if (!ax25_dev) + if (route->digi_count > AX25_MAX_DIGIS) return -EINVAL; write_lock_bh(&ax25_route_lock); @@ -93,7 +91,6 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -104,7 +101,6 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) } } write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); return 0; } ax25_rt = ax25_rt->next; @@ -112,7 +108,6 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); return -ENOMEM; } @@ -125,7 +120,6 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); kfree(ax25_rt); - ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -138,7 +132,6 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_rt->next = ax25_route_list; ax25_route_list = ax25_rt; write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); return 0; } @@ -180,7 +173,6 @@ static int ax25_rt_del(struct ax25_routes_struct *route) } } write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); return 0; } @@ -223,7 +215,6 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option) out: write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); return err; } diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 2ed9496fc4..17687848da 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -254,7 +254,7 @@ batadv_claim_hash_find(struct batadv_priv *bat_priv, * Return: backbone gateway if found or NULL otherwise */ static struct batadv_bla_backbone_gw * -batadv_backbone_hash_find(struct batadv_priv *bat_priv, const u8 *addr, +batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; @@ -336,7 +336,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) * @vid: the VLAN ID * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ -static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, +static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, unsigned short vid, int claimtype) { struct sk_buff *skb; @@ -488,7 +488,7 @@ static void batadv_bla_loopdetect_report(struct work_struct *work) * Return: the (possibly created) backbone gateway or NULL on error */ static struct batadv_bla_backbone_gw * -batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, +batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, unsigned short vid, bool own_backbone) { struct batadv_bla_backbone_gw *entry; @@ -926,7 +926,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv, */ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - const u8 *backbone_addr, const u8 *claim_addr, + u8 *backbone_addr, u8 *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -964,7 +964,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, */ static bool batadv_handle_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - const u8 *backbone_addr, const u8 *claim_addr, + u8 *backbone_addr, u8 *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -2130,7 +2130,7 @@ batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, struct batadv_hard_iface *primary_if, struct batadv_bla_claim *claim) { - const u8 *primary_addr = primary_if->net_dev->dev_addr; + u8 *primary_addr = primary_if->net_dev->dev_addr; u16 backbone_crc; bool is_own; void *hdr; @@ -2298,7 +2298,7 @@ batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, struct batadv_hard_iface *primary_if, struct batadv_bla_backbone_gw *backbone_gw) { - const u8 *primary_addr = primary_if->net_dev->dev_addr; + u8 *primary_addr = primary_if->net_dev->dev_addr; u16 backbone_crc; bool is_own; int msecs; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 8a2b78f9c4..35fadb9248 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -149,22 +149,25 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) struct net *net = dev_net(net_dev); struct net_device *parent_dev; struct net *parent_net; + int iflink; bool ret; /* check if this is a batman-adv mesh interface */ if (batadv_softif_is_valid(net_dev)) return true; - /* no more parents..stop recursion */ - if (dev_get_iflink(net_dev) == 0 || - dev_get_iflink(net_dev) == net_dev->ifindex) + iflink = dev_get_iflink(net_dev); + if (iflink == 0) return false; parent_net = batadv_getlink_net(net_dev, net); + /* iflink to itself, most likely physical device */ + if (net == parent_net && iflink == net_dev->ifindex) + return false; + /* recurse over the parent device */ - parent_dev = __dev_get_by_index((struct net *)parent_net, - dev_get_iflink(net_dev)); + parent_dev = __dev_get_by_index((struct net *)parent_net, iflink); /* if we got a NULL parent_dev there is something broken.. */ if (!parent_dev) { pr_err("Cannot find parent device\n"); @@ -214,14 +217,15 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) struct net_device *real_netdev = NULL; struct net *real_net; struct net *net; - int ifindex; + int iflink; ASSERT_RTNL(); if (!netdev) return NULL; - if (netdev->ifindex == dev_get_iflink(netdev)) { + iflink = dev_get_iflink(netdev); + if (iflink == 0) { dev_hold(netdev); return netdev; } @@ -231,9 +235,16 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) goto out; net = dev_net(hard_iface->soft_iface); - ifindex = dev_get_iflink(netdev); real_net = batadv_getlink_net(netdev, net); - real_netdev = dev_get_by_index(real_net, ifindex); + + /* iflink to itself, most likely physical device */ + if (net == real_net && netdev->ifindex == iflink) { + real_netdev = netdev; + dev_hold(real_netdev); + goto out; + } + + real_netdev = dev_get_by_index(real_net, iflink); out: batadv_hardif_put(hard_iface); diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index f4004cf0ff..6e3419beca 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -89,7 +89,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) rcu_read_lock(); do { upper = netdev_master_upper_dev_get_rcu(upper); - } while (upper && !netif_is_bridge_master(upper)); + } while (upper && !(upper->priv_flags & IFF_EBRIDGE)); dev_hold(upper); rcu_read_unlock(); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 974d726fab..0a7f1d36a6 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -58,9 +58,13 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, */ int __init batadv_nc_init(void) { + int ret; + /* Register our packet type */ - return batadv_recv_handler_register(BATADV_CODED, - batadv_nc_recv_coded_packet); + ret = batadv_recv_handler_register(BATADV_CODED, + batadv_nc_recv_coded_packet); + + return ret; } /** diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 83f31494ea..970d0d7ccc 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -747,8 +747,7 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL; struct batadv_hard_iface *primary_if = NULL; bool ret = false; - const u8 *orig_addr; - u8 orig_ttvn; + u8 *orig_addr, orig_ttvn; if (batadv_is_my_client(bat_priv, dst_addr, vid)) { primary_if = batadv_primary_if_get_selected(bat_priv); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 2dbbe6c196..6ab28b509d 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -134,7 +134,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) return -EADDRNOTAVAIL; ether_addr_copy(old_addr, dev->dev_addr); - eth_hw_addr_set(dev, addr->sa_data); + ether_addr_copy(dev->dev_addr, addr->sa_data); /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 93730d30af..56b9fe97b3 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -631,9 +631,9 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node = NULL; const struct batadv_icmp_tp_packet *icmp; struct batadv_tp_vars *tp_vars; - const unsigned char *dev_addr; size_t packet_len, mss; u32 rtt, recv_ack, cwnd; + unsigned char *dev_addr; packet_len = BATADV_TP_PLEN; mss = BATADV_TP_PLEN; @@ -890,7 +890,7 @@ static int batadv_tp_send(void *arg) batadv_tp_vars_put(tp_vars); - return 0; + do_exit(0); } /** diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 0cb58eb040..992773376e 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -587,8 +587,8 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, * @tvlv_value: tvlv content * @tvlv_value_len: tvlv content length */ -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, const u8 *src, - const u8 *dst, u8 type, u8 version, +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, + u8 *dst, u8 type, u8 version, void *tvlv_value, u16 tvlv_value_len) { struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h index 4cf8af00fc..54f2a35653 100644 --- a/net/batman-adv/tvlv.h +++ b/net/batman-adv/tvlv.h @@ -42,8 +42,8 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, u8 *src, u8 *dst, void *tvlv_buff, u16 tvlv_buff_len); -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, const u8 *src, - const u8 *dst, u8 type, u8 version, +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, + u8 *dst, u8 type, u8 version, void *tvlv_value, u16 tvlv_value_len); #endif /* _NET_BATMAN_ADV_TVLV_H_ */ diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 133d7ea063..fd164a2485 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -663,7 +663,6 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev) { struct net_device *netdev; - bdaddr_t addr; int err; netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_btle_dev)), @@ -673,8 +672,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev) return -ENOMEM; netdev->addr_assign_type = NET_ADDR_PERM; - baswap(&addr, &chan->src); - __dev_addr_set(netdev, &addr, sizeof(addr)); + baswap((void *)netdev->dev_addr, &chan->src); netdev->netdev_ops = &netdev_ops; SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev); diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index a52bba8500..cc0995301f 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -14,8 +14,7 @@ bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \ - ecdh_helper.o hci_request.o mgmt_util.o mgmt_config.o hci_codec.o \ - eir.o hci_sync.o + ecdh_helper.o hci_request.o mgmt_util.o mgmt_config.o bluetooth-$(CONFIG_BT_BREDR) += sco.o bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o diff --git a/net/bluetooth/aosp.c b/net/bluetooth/aosp.c index 432ae3aac9..a1b7762335 100644 --- a/net/bluetooth/aosp.c +++ b/net/bluetooth/aosp.c @@ -8,43 +8,9 @@ #include "aosp.h" -/* Command complete parameters of LE_Get_Vendor_Capabilities_Command - * The parameters grow over time. The base version that declares the - * version_supported field is v0.95. Refer to - * https://cs.android.com/android/platform/superproject/+/master:system/ - * bt/gd/hci/controller.cc;l=452?q=le_get_vendor_capabilities_handler - */ -struct aosp_rp_le_get_vendor_capa { - /* v0.95: 15 octets */ - __u8 status; - __u8 max_advt_instances; - __u8 offloaded_resolution_of_private_address; - __le16 total_scan_results_storage; - __u8 max_irk_list_sz; - __u8 filtering_support; - __u8 max_filter; - __u8 activity_energy_info_support; - __le16 version_supported; - __le16 total_num_of_advt_tracked; - __u8 extended_scan_support; - __u8 debug_logging_supported; - /* v0.96: 16 octets */ - __u8 le_address_generation_offloading_support; - /* v0.98: 21 octets */ - __le32 a2dp_source_offload_capability_mask; - __u8 bluetooth_quality_report_support; - /* v1.00: 25 octets */ - __le32 dynamic_audio_buffer_support; -} __packed; - -#define VENDOR_CAPA_BASE_SIZE 15 -#define VENDOR_CAPA_0_98_SIZE 21 - void aosp_do_open(struct hci_dev *hdev) { struct sk_buff *skb; - struct aosp_rp_le_get_vendor_capa *rp; - u16 version_supported; if (!hdev->aosp_capable) return; @@ -54,54 +20,9 @@ void aosp_do_open(struct hci_dev *hdev) /* LE Get Vendor Capabilities Command */ skb = __hci_cmd_sync(hdev, hci_opcode_pack(0x3f, 0x153), 0, NULL, HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) { - bt_dev_err(hdev, "AOSP get vendor capabilities (%ld)", - PTR_ERR(skb)); + if (IS_ERR(skb)) return; - } - /* A basic length check */ - if (skb->len < VENDOR_CAPA_BASE_SIZE) - goto length_error; - - rp = (struct aosp_rp_le_get_vendor_capa *)skb->data; - - version_supported = le16_to_cpu(rp->version_supported); - /* AOSP displays the verion number like v0.98, v1.00, etc. */ - bt_dev_info(hdev, "AOSP extensions version v%u.%02u", - version_supported >> 8, version_supported & 0xff); - - /* Do not support very old versions. */ - if (version_supported < 95) { - bt_dev_warn(hdev, "AOSP capabilities version %u too old", - version_supported); - goto done; - } - - if (version_supported < 98) { - bt_dev_warn(hdev, "AOSP quality report is not supported"); - goto done; - } - - if (skb->len < VENDOR_CAPA_0_98_SIZE) - goto length_error; - - /* The bluetooth_quality_report_support is defined at version - * v0.98. Refer to - * https://cs.android.com/android/platform/superproject/+/ - * master:system/bt/gd/hci/controller.cc;l=477 - */ - if (rp->bluetooth_quality_report_support) { - hdev->aosp_quality_report = true; - bt_dev_info(hdev, "AOSP quality report is supported"); - } - - goto done; - -length_error: - bt_dev_err(hdev, "AOSP capabilities length %d too short", skb->len); - -done: kfree_skb(skb); } @@ -112,90 +33,3 @@ void aosp_do_close(struct hci_dev *hdev) bt_dev_dbg(hdev, "Cleanup of AOSP extension"); } - -/* BQR command */ -#define BQR_OPCODE hci_opcode_pack(0x3f, 0x015e) - -/* BQR report action */ -#define REPORT_ACTION_ADD 0x00 -#define REPORT_ACTION_DELETE 0x01 -#define REPORT_ACTION_CLEAR 0x02 - -/* BQR event masks */ -#define QUALITY_MONITORING BIT(0) -#define APPRAOCHING_LSTO BIT(1) -#define A2DP_AUDIO_CHOPPY BIT(2) -#define SCO_VOICE_CHOPPY BIT(3) - -#define DEFAULT_BQR_EVENT_MASK (QUALITY_MONITORING | APPRAOCHING_LSTO | \ - A2DP_AUDIO_CHOPPY | SCO_VOICE_CHOPPY) - -/* Reporting at milliseconds so as not to stress the controller too much. - * Range: 0 ~ 65535 ms - */ -#define DEFALUT_REPORT_INTERVAL_MS 5000 - -struct aosp_bqr_cp { - __u8 report_action; - __u32 event_mask; - __u16 min_report_interval; -} __packed; - -static int enable_quality_report(struct hci_dev *hdev) -{ - struct sk_buff *skb; - struct aosp_bqr_cp cp; - - cp.report_action = REPORT_ACTION_ADD; - cp.event_mask = DEFAULT_BQR_EVENT_MASK; - cp.min_report_interval = DEFALUT_REPORT_INTERVAL_MS; - - skb = __hci_cmd_sync(hdev, BQR_OPCODE, sizeof(cp), &cp, - HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) { - bt_dev_err(hdev, "Enabling Android BQR failed (%ld)", - PTR_ERR(skb)); - return PTR_ERR(skb); - } - - kfree_skb(skb); - return 0; -} - -static int disable_quality_report(struct hci_dev *hdev) -{ - struct sk_buff *skb; - struct aosp_bqr_cp cp = { 0 }; - - cp.report_action = REPORT_ACTION_CLEAR; - - skb = __hci_cmd_sync(hdev, BQR_OPCODE, sizeof(cp), &cp, - HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) { - bt_dev_err(hdev, "Disabling Android BQR failed (%ld)", - PTR_ERR(skb)); - return PTR_ERR(skb); - } - - kfree_skb(skb); - return 0; -} - -bool aosp_has_quality_report(struct hci_dev *hdev) -{ - return hdev->aosp_quality_report; -} - -int aosp_set_quality_report(struct hci_dev *hdev, bool enable) -{ - if (!aosp_has_quality_report(hdev)) - return -EOPNOTSUPP; - - bt_dev_dbg(hdev, "quality report enable %d", enable); - - /* Enable or disable the quality report feature. */ - if (enable) - return enable_quality_report(hdev); - else - return disable_quality_report(hdev); -} diff --git a/net/bluetooth/aosp.h b/net/bluetooth/aosp.h index 2fd8886d51..328fc6d39f 100644 --- a/net/bluetooth/aosp.h +++ b/net/bluetooth/aosp.h @@ -8,22 +8,9 @@ void aosp_do_open(struct hci_dev *hdev); void aosp_do_close(struct hci_dev *hdev); -bool aosp_has_quality_report(struct hci_dev *hdev); -int aosp_set_quality_report(struct hci_dev *hdev, bool enable); - #else static inline void aosp_do_open(struct hci_dev *hdev) {} static inline void aosp_do_close(struct hci_dev *hdev) {} -static inline bool aosp_has_quality_report(struct hci_dev *hdev) -{ - return false; -} - -static inline int aosp_set_quality_report(struct hci_dev *hdev, bool enable) -{ - return -EOPNOTSUPP; -} - #endif diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 40baa6b732..72f47b3727 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -535,7 +535,7 @@ static int bnep_session(void *arg) up_write(&bnep_session_sem); free_netdev(dev); - module_put_and_kthread_exit(0); + module_put_and_exit(0); return 0; } @@ -594,7 +594,7 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) * ie. eh.h_dest is our local address. */ memcpy(s->eh.h_dest, &src, ETH_ALEN); memcpy(s->eh.h_source, &dst, ETH_ALEN); - eth_hw_addr_set(dev, s->eh.h_dest); + memcpy(dev->dev_addr, s->eh.h_dest, ETH_ALEN); s->dev = dev; s->sock = sock; diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 57d509d77c..d515571b2a 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -24,7 +24,6 @@ SOFTWARE IS DISCLAIMED. */ -#include #include #include diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 90d130588a..83eb84e8e6 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -323,7 +323,7 @@ static int cmtp_session(void *arg) up_write(&cmtp_session_sem); kfree(session); - module_put_and_kthread_exit(0); + module_put_and_exit(0); return 0; } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 04ebe901e8..2b5059a56c 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -108,7 +108,7 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) break; } - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); } static void hci_conn_cleanup(struct hci_conn *conn) @@ -307,133 +307,13 @@ static bool find_next_esco_param(struct hci_conn *conn, return conn->attempt <= size; } -static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle) -{ - struct hci_dev *hdev = conn->hdev; - struct hci_cp_enhanced_setup_sync_conn cp; - const struct sco_param *param; - - bt_dev_dbg(hdev, "hcon %p", conn); - - /* for offload use case, codec needs to configured before opening SCO */ - if (conn->codec.data_path) - hci_req_configure_datapath(hdev, &conn->codec); - - conn->state = BT_CONNECT; - conn->out = true; - - conn->attempt++; - - memset(&cp, 0x00, sizeof(cp)); - - cp.handle = cpu_to_le16(handle); - - cp.tx_bandwidth = cpu_to_le32(0x00001f40); - cp.rx_bandwidth = cpu_to_le32(0x00001f40); - - switch (conn->codec.id) { - case BT_CODEC_MSBC: - if (!find_next_esco_param(conn, esco_param_msbc, - ARRAY_SIZE(esco_param_msbc))) - return false; - - param = &esco_param_msbc[conn->attempt - 1]; - cp.tx_coding_format.id = 0x05; - cp.rx_coding_format.id = 0x05; - cp.tx_codec_frame_size = __cpu_to_le16(60); - cp.rx_codec_frame_size = __cpu_to_le16(60); - cp.in_bandwidth = __cpu_to_le32(32000); - cp.out_bandwidth = __cpu_to_le32(32000); - cp.in_coding_format.id = 0x04; - cp.out_coding_format.id = 0x04; - cp.in_coded_data_size = __cpu_to_le16(16); - cp.out_coded_data_size = __cpu_to_le16(16); - cp.in_pcm_data_format = 2; - cp.out_pcm_data_format = 2; - cp.in_pcm_sample_payload_msb_pos = 0; - cp.out_pcm_sample_payload_msb_pos = 0; - cp.in_data_path = conn->codec.data_path; - cp.out_data_path = conn->codec.data_path; - cp.in_transport_unit_size = 1; - cp.out_transport_unit_size = 1; - break; - - case BT_CODEC_TRANSPARENT: - if (!find_next_esco_param(conn, esco_param_msbc, - ARRAY_SIZE(esco_param_msbc))) - return false; - param = &esco_param_msbc[conn->attempt - 1]; - cp.tx_coding_format.id = 0x03; - cp.rx_coding_format.id = 0x03; - cp.tx_codec_frame_size = __cpu_to_le16(60); - cp.rx_codec_frame_size = __cpu_to_le16(60); - cp.in_bandwidth = __cpu_to_le32(0x1f40); - cp.out_bandwidth = __cpu_to_le32(0x1f40); - cp.in_coding_format.id = 0x03; - cp.out_coding_format.id = 0x03; - cp.in_coded_data_size = __cpu_to_le16(16); - cp.out_coded_data_size = __cpu_to_le16(16); - cp.in_pcm_data_format = 2; - cp.out_pcm_data_format = 2; - cp.in_pcm_sample_payload_msb_pos = 0; - cp.out_pcm_sample_payload_msb_pos = 0; - cp.in_data_path = conn->codec.data_path; - cp.out_data_path = conn->codec.data_path; - cp.in_transport_unit_size = 1; - cp.out_transport_unit_size = 1; - break; - - case BT_CODEC_CVSD: - if (lmp_esco_capable(conn->link)) { - if (!find_next_esco_param(conn, esco_param_cvsd, - ARRAY_SIZE(esco_param_cvsd))) - return false; - param = &esco_param_cvsd[conn->attempt - 1]; - } else { - if (conn->attempt > ARRAY_SIZE(sco_param_cvsd)) - return false; - param = &sco_param_cvsd[conn->attempt - 1]; - } - cp.tx_coding_format.id = 2; - cp.rx_coding_format.id = 2; - cp.tx_codec_frame_size = __cpu_to_le16(60); - cp.rx_codec_frame_size = __cpu_to_le16(60); - cp.in_bandwidth = __cpu_to_le32(16000); - cp.out_bandwidth = __cpu_to_le32(16000); - cp.in_coding_format.id = 4; - cp.out_coding_format.id = 4; - cp.in_coded_data_size = __cpu_to_le16(16); - cp.out_coded_data_size = __cpu_to_le16(16); - cp.in_pcm_data_format = 2; - cp.out_pcm_data_format = 2; - cp.in_pcm_sample_payload_msb_pos = 0; - cp.out_pcm_sample_payload_msb_pos = 0; - cp.in_data_path = conn->codec.data_path; - cp.out_data_path = conn->codec.data_path; - cp.in_transport_unit_size = 16; - cp.out_transport_unit_size = 16; - break; - default: - return false; - } - - cp.retrans_effort = param->retrans_effort; - cp.pkt_type = __cpu_to_le16(param->pkt_type); - cp.max_latency = __cpu_to_le16(param->max_latency); - - if (hci_send_cmd(hdev, HCI_OP_ENHANCED_SETUP_SYNC_CONN, sizeof(cp), &cp) < 0) - return false; - - return true; -} - -static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle) +bool hci_setup_sync(struct hci_conn *conn, __u16 handle) { struct hci_dev *hdev = conn->hdev; struct hci_cp_setup_sync_conn cp; const struct sco_param *param; - bt_dev_dbg(hdev, "hcon %p", conn); + BT_DBG("hcon %p", conn); conn->state = BT_CONNECT; conn->out = true; @@ -479,14 +359,6 @@ static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle) return true; } -bool hci_setup_sync(struct hci_conn *conn, __u16 handle) -{ - if (enhanced_sco_capable(conn->hdev)) - return hci_enhanced_setup_sync_conn(conn, handle); - - return hci_setup_sync_conn(conn, handle); -} - u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier) { @@ -900,56 +772,288 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) hci_conn_del(conn); + /* The suspend notifier is waiting for all devices to disconnect and an + * LE connect cancel will result in an hci_le_conn_failed. Once the last + * connection is deleted, we should also wake the suspend queue to + * complete suspend operations. + */ + if (list_empty(&hdev->conn_hash.list) && + test_and_clear_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks)) { + wake_up(&hdev->suspend_wait_q); + } + /* Since we may have temporarily stopped the background scanning in * favor of connection establishment, we should restart it. */ - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); - /* Enable advertising in case this was a failed connection + /* Re-enable advertising in case this was a failed connection * attempt as a peripheral. */ - hci_enable_advertising(hdev); + hci_req_reenable_advertising(hdev); } -static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) +static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct hci_conn *conn = data; + struct hci_conn *conn; hci_dev_lock(hdev); - if (!err) { + conn = hci_lookup_le_connect(hdev); + + if (hdev->adv_instance_cnt) + hci_req_resume_adv_instances(hdev); + + if (!status) { hci_connect_le_scan_cleanup(conn); goto done; } - bt_dev_err(hdev, "request failed to create LE connection: err %d", err); + bt_dev_err(hdev, "request failed to create LE connection: " + "status 0x%2.2x", status); if (!conn) goto done; - hci_le_conn_failed(conn, err); + hci_le_conn_failed(conn, status); done: hci_dev_unlock(hdev); } -static int hci_connect_le_sync(struct hci_dev *hdev, void *data) +static bool conn_use_rpa(struct hci_conn *conn) { - struct hci_conn *conn = data; + struct hci_dev *hdev = conn->hdev; - bt_dev_dbg(hdev, "conn %p", conn); + return hci_dev_test_flag(hdev, HCI_PRIVACY); +} - return hci_le_create_conn_sync(hdev, conn); +static void set_ext_conn_params(struct hci_conn *conn, + struct hci_cp_le_ext_conn_param *p) +{ + struct hci_dev *hdev = conn->hdev; + + memset(p, 0, sizeof(*p)); + + p->scan_interval = cpu_to_le16(hdev->le_scan_int_connect); + p->scan_window = cpu_to_le16(hdev->le_scan_window_connect); + p->conn_interval_min = cpu_to_le16(conn->le_conn_min_interval); + p->conn_interval_max = cpu_to_le16(conn->le_conn_max_interval); + p->conn_latency = cpu_to_le16(conn->le_conn_latency); + p->supervision_timeout = cpu_to_le16(conn->le_supv_timeout); + p->min_ce_len = cpu_to_le16(0x0000); + p->max_ce_len = cpu_to_le16(0x0000); +} + +static void hci_req_add_le_create_conn(struct hci_request *req, + struct hci_conn *conn, + bdaddr_t *direct_rpa) +{ + struct hci_dev *hdev = conn->hdev; + u8 own_addr_type; + + /* If direct address was provided we use it instead of current + * address. + */ + if (direct_rpa) { + if (bacmp(&req->hdev->random_addr, direct_rpa)) + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, + direct_rpa); + + /* direct address is always RPA */ + own_addr_type = ADDR_LE_DEV_RANDOM; + } else { + /* Update random address, but set require_privacy to false so + * that we never connect with an non-resolvable address. + */ + if (hci_update_random_address(req, false, conn_use_rpa(conn), + &own_addr_type)) + return; + } + + if (use_ext_conn(hdev)) { + struct hci_cp_le_ext_create_conn *cp; + struct hci_cp_le_ext_conn_param *p; + u8 data[sizeof(*cp) + sizeof(*p) * 3]; + u32 plen; + + cp = (void *) data; + p = (void *) cp->data; + + memset(cp, 0, sizeof(*cp)); + + bacpy(&cp->peer_addr, &conn->dst); + cp->peer_addr_type = conn->dst_type; + cp->own_addr_type = own_addr_type; + + plen = sizeof(*cp); + + if (scan_1m(hdev)) { + cp->phys |= LE_SCAN_PHY_1M; + set_ext_conn_params(conn, p); + + p++; + plen += sizeof(*p); + } + + if (scan_2m(hdev)) { + cp->phys |= LE_SCAN_PHY_2M; + set_ext_conn_params(conn, p); + + p++; + plen += sizeof(*p); + } + + if (scan_coded(hdev)) { + cp->phys |= LE_SCAN_PHY_CODED; + set_ext_conn_params(conn, p); + + plen += sizeof(*p); + } + + hci_req_add(req, HCI_OP_LE_EXT_CREATE_CONN, plen, data); + + } else { + struct hci_cp_le_create_conn cp; + + memset(&cp, 0, sizeof(cp)); + + cp.scan_interval = cpu_to_le16(hdev->le_scan_int_connect); + cp.scan_window = cpu_to_le16(hdev->le_scan_window_connect); + + bacpy(&cp.peer_addr, &conn->dst); + cp.peer_addr_type = conn->dst_type; + cp.own_address_type = own_addr_type; + cp.conn_interval_min = cpu_to_le16(conn->le_conn_min_interval); + cp.conn_interval_max = cpu_to_le16(conn->le_conn_max_interval); + cp.conn_latency = cpu_to_le16(conn->le_conn_latency); + cp.supervision_timeout = cpu_to_le16(conn->le_supv_timeout); + cp.min_ce_len = cpu_to_le16(0x0000); + cp.max_ce_len = cpu_to_le16(0x0000); + + hci_req_add(req, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp); + } + + conn->state = BT_CONNECT; + clear_bit(HCI_CONN_SCANNING, &conn->flags); +} + +static void hci_req_directed_advertising(struct hci_request *req, + struct hci_conn *conn) +{ + struct hci_dev *hdev = req->hdev; + u8 own_addr_type; + u8 enable; + + if (ext_adv_capable(hdev)) { + struct hci_cp_le_set_ext_adv_params cp; + bdaddr_t random_addr; + + /* Set require_privacy to false so that the remote device has a + * chance of identifying us. + */ + if (hci_get_random_address(hdev, false, conn_use_rpa(conn), NULL, + &own_addr_type, &random_addr) < 0) + return; + + memset(&cp, 0, sizeof(cp)); + + cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_DIRECT_IND); + cp.own_addr_type = own_addr_type; + cp.channel_map = hdev->le_adv_channel_map; + cp.tx_power = HCI_TX_POWER_INVALID; + cp.primary_phy = HCI_ADV_PHY_1M; + cp.secondary_phy = HCI_ADV_PHY_1M; + cp.handle = 0; /* Use instance 0 for directed adv */ + cp.own_addr_type = own_addr_type; + cp.peer_addr_type = conn->dst_type; + bacpy(&cp.peer_addr, &conn->dst); + + /* As per Core Spec 5.2 Vol 2, PART E, Sec 7.8.53, for + * advertising_event_property LE_LEGACY_ADV_DIRECT_IND + * does not supports advertising data when the advertising set already + * contains some, the controller shall return erroc code 'Invalid + * HCI Command Parameters(0x12). + * So it is required to remove adv set for handle 0x00. since we use + * instance 0 for directed adv. + */ + __hci_req_remove_ext_adv_instance(req, cp.handle); + + hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp); + + if (own_addr_type == ADDR_LE_DEV_RANDOM && + bacmp(&random_addr, BDADDR_ANY) && + bacmp(&random_addr, &hdev->random_addr)) { + struct hci_cp_le_set_adv_set_rand_addr cp; + + memset(&cp, 0, sizeof(cp)); + + cp.handle = 0; + bacpy(&cp.bdaddr, &random_addr); + + hci_req_add(req, + HCI_OP_LE_SET_ADV_SET_RAND_ADDR, + sizeof(cp), &cp); + } + + __hci_req_enable_ext_advertising(req, 0x00); + } else { + struct hci_cp_le_set_adv_param cp; + + /* Clear the HCI_LE_ADV bit temporarily so that the + * hci_update_random_address knows that it's safe to go ahead + * and write a new random address. The flag will be set back on + * as soon as the SET_ADV_ENABLE HCI command completes. + */ + hci_dev_clear_flag(hdev, HCI_LE_ADV); + + /* Set require_privacy to false so that the remote device has a + * chance of identifying us. + */ + if (hci_update_random_address(req, false, conn_use_rpa(conn), + &own_addr_type) < 0) + return; + + memset(&cp, 0, sizeof(cp)); + + /* Some controllers might reject command if intervals are not + * within range for undirected advertising. + * BCM20702A0 is known to be affected by this. + */ + cp.min_interval = cpu_to_le16(0x0020); + cp.max_interval = cpu_to_le16(0x0020); + + cp.type = LE_ADV_DIRECT_IND; + cp.own_address_type = own_addr_type; + cp.direct_addr_type = conn->dst_type; + bacpy(&cp.direct_addr, &conn->dst); + cp.channel_map = hdev->le_adv_channel_map; + + hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp); + + enable = 0x01; + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), + &enable); + } + + conn->state = BT_CONNECT; } struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, - u8 dst_type, bool dst_resolved, u8 sec_level, - u16 conn_timeout, u8 role) + u8 dst_type, u8 sec_level, u16 conn_timeout, + u8 role, bdaddr_t *direct_rpa) { + struct hci_conn_params *params; struct hci_conn *conn; struct smp_irk *irk; + struct hci_request req; int err; + /* This ensures that during disable le_scan address resolution + * will not be disabled if it is followed by le_create_conn + */ + bool rpa_le_conn = true; + /* Let's make sure that le is enabled.*/ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { if (lmp_le_capable(hdev)) @@ -974,24 +1078,19 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-EBUSY); } - /* Check if the destination address has been resolved by the controller - * since if it did then the identity address shall be used. + /* When given an identity address with existing identity + * resolving key, the connection needs to be established + * to a resolvable random address. + * + * Storing the resolvable random address is required here + * to handle connection failures. The address will later + * be resolved back into the original identity address + * from the connect request. */ - if (!dst_resolved) { - /* When given an identity address with existing identity - * resolving key, the connection needs to be established - * to a resolvable random address. - * - * Storing the resolvable random address is required here - * to handle connection failures. The address will later - * be resolved back into the original identity address - * from the connect request. - */ - irk = hci_find_irk_by_addr(hdev, dst, dst_type); - if (irk && bacmp(&irk->rpa, BDADDR_ANY)) { - dst = &irk->rpa; - dst_type = ADDR_LE_DEV_RANDOM; - } + irk = hci_find_irk_by_addr(hdev, dst, dst_type); + if (irk && bacmp(&irk->rpa, BDADDR_ANY)) { + dst = &irk->rpa; + dst_type = ADDR_LE_DEV_RANDOM; } if (conn) { @@ -1008,13 +1107,68 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, conn->sec_level = BT_SECURITY_LOW; conn->conn_timeout = conn_timeout; - conn->state = BT_CONNECT; - clear_bit(HCI_CONN_SCANNING, &conn->flags); + hci_req_init(&req, hdev); - err = hci_cmd_sync_queue(hdev, hci_connect_le_sync, conn, - create_le_conn_complete); + /* Disable advertising if we're active. For central role + * connections most controllers will refuse to connect if + * advertising is enabled, and for peripheral role connections we + * anyway have to disable it in order to start directed + * advertising. Any registered advertisements will be + * re-enabled after the connection attempt is finished. + */ + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + __hci_req_pause_adv_instances(&req); + + /* If requested to connect as peripheral use directed advertising */ + if (conn->role == HCI_ROLE_SLAVE) { + /* If we're active scanning most controllers are unable + * to initiate advertising. Simply reject the attempt. + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && + hdev->le_scan_type == LE_SCAN_ACTIVE) { + hci_req_purge(&req); + hci_conn_del(conn); + return ERR_PTR(-EBUSY); + } + + hci_req_directed_advertising(&req, conn); + goto create_conn; + } + + params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); + if (params) { + conn->le_conn_min_interval = params->conn_min_interval; + conn->le_conn_max_interval = params->conn_max_interval; + conn->le_conn_latency = params->conn_latency; + conn->le_supv_timeout = params->supervision_timeout; + } else { + conn->le_conn_min_interval = hdev->le_conn_min_interval; + conn->le_conn_max_interval = hdev->le_conn_max_interval; + conn->le_conn_latency = hdev->le_conn_latency; + conn->le_supv_timeout = hdev->le_supv_timeout; + } + + /* If controller is scanning, we stop it since some controllers are + * not able to scan and connect at the same time. Also set the + * HCI_LE_SCAN_INTERRUPTED flag so that the command complete + * handler for scan disabling knows to set the correct discovery + * state. + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { + hci_req_add_le_scan_disable(&req, rpa_le_conn); + hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED); + } + + hci_req_add_le_create_conn(&req, conn, direct_rpa); + +create_conn: + err = hci_req_run(&req, create_le_conn_complete); if (err) { hci_conn_del(conn); + + if (hdev->adv_instance_cnt) + hci_req_resume_adv_instances(hdev); + return ERR_PTR(err); } @@ -1124,7 +1278,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, conn->conn_timeout = conn_timeout; conn->conn_reason = conn_reason; - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); done: hci_conn_hold(conn); @@ -1165,7 +1319,7 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, } struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, - __u16 setting, struct bt_codec *codec) + __u16 setting) { struct hci_conn *acl; struct hci_conn *sco; @@ -1190,7 +1344,6 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, hci_conn_hold(sco); sco->setting = setting; - sco->codec = *codec; if (acl->state == BT_CONNECTED && (sco->state == BT_OPEN || sco->state == BT_CLOSED)) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2b7bd3655b..53f1b08017 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -45,7 +45,6 @@ #include "leds.h" #include "msft.h" #include "aosp.h" -#include "hci_codec.h" static void hci_rx_work(struct work_struct *work); static void hci_cmd_work(struct work_struct *work); @@ -62,6 +61,947 @@ DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); +/* ---- HCI debugfs entries ---- */ + +static ssize_t dut_mode_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + struct sk_buff *skb; + bool enable; + int err; + + if (!test_bit(HCI_UP, &hdev->flags)) + return -ENETDOWN; + + err = kstrtobool_from_user(user_buf, count, &enable); + if (err) + return err; + + if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE)) + return -EALREADY; + + hci_req_sync_lock(hdev); + if (enable) + skb = __hci_cmd_sync(hdev, HCI_OP_ENABLE_DUT_MODE, 0, NULL, + HCI_CMD_TIMEOUT); + else + skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, + HCI_CMD_TIMEOUT); + hci_req_sync_unlock(hdev); + + if (IS_ERR(skb)) + return PTR_ERR(skb); + + kfree_skb(skb); + + hci_dev_change_flag(hdev, HCI_DUT_MODE); + + return count; +} + +static const struct file_operations dut_mode_fops = { + .open = simple_open, + .read = dut_mode_read, + .write = dut_mode_write, + .llseek = default_llseek, +}; + +static ssize_t vendor_diag_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + bool enable; + int err; + + err = kstrtobool_from_user(user_buf, count, &enable); + if (err) + return err; + + /* When the diagnostic flags are not persistent and the transport + * is not active or in user channel operation, then there is no need + * for the vendor callback. Instead just store the desired value and + * the setting will be programmed when the controller gets powered on. + */ + if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && + (!test_bit(HCI_RUNNING, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_USER_CHANNEL))) + goto done; + + hci_req_sync_lock(hdev); + err = hdev->set_diag(hdev, enable); + hci_req_sync_unlock(hdev); + + if (err < 0) + return err; + +done: + if (enable) + hci_dev_set_flag(hdev, HCI_VENDOR_DIAG); + else + hci_dev_clear_flag(hdev, HCI_VENDOR_DIAG); + + return count; +} + +static const struct file_operations vendor_diag_fops = { + .open = simple_open, + .read = vendor_diag_read, + .write = vendor_diag_write, + .llseek = default_llseek, +}; + +static void hci_debugfs_create_basic(struct hci_dev *hdev) +{ + debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev, + &dut_mode_fops); + + if (hdev->set_diag) + debugfs_create_file("vendor_diag", 0644, hdev->debugfs, hdev, + &vendor_diag_fops); +} + +static int hci_reset_req(struct hci_request *req, unsigned long opt) +{ + BT_DBG("%s %ld", req->hdev->name, opt); + + /* Reset device */ + set_bit(HCI_RESET, &req->hdev->flags); + hci_req_add(req, HCI_OP_RESET, 0, NULL); + return 0; +} + +static void bredr_init(struct hci_request *req) +{ + req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; + + /* Read Local Supported Features */ + hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); + + /* Read Local Version */ + hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + + /* Read BD Address */ + hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL); +} + +static void amp_init1(struct hci_request *req) +{ + req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; + + /* Read Local Version */ + hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + + /* Read Local Supported Commands */ + hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); + + /* Read Local AMP Info */ + hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); + + /* Read Data Blk size */ + hci_req_add(req, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); + + /* Read Flow Control Mode */ + hci_req_add(req, HCI_OP_READ_FLOW_CONTROL_MODE, 0, NULL); + + /* Read Location Data */ + hci_req_add(req, HCI_OP_READ_LOCATION_DATA, 0, NULL); +} + +static int amp_init2(struct hci_request *req) +{ + /* Read Local Supported Features. Not all AMP controllers + * support this so it's placed conditionally in the second + * stage init. + */ + if (req->hdev->commands[14] & 0x20) + hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); + + return 0; +} + +static int hci_init1_req(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + BT_DBG("%s %ld", hdev->name, opt); + + /* Reset */ + if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) + hci_reset_req(req, 0); + + switch (hdev->dev_type) { + case HCI_PRIMARY: + bredr_init(req); + break; + case HCI_AMP: + amp_init1(req); + break; + default: + bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type); + break; + } + + return 0; +} + +static void bredr_setup(struct hci_request *req) +{ + __le16 param; + __u8 flt_type; + + /* Read Buffer Size (ACL mtu, max pkt, etc.) */ + hci_req_add(req, HCI_OP_READ_BUFFER_SIZE, 0, NULL); + + /* Read Class of Device */ + hci_req_add(req, HCI_OP_READ_CLASS_OF_DEV, 0, NULL); + + /* Read Local Name */ + hci_req_add(req, HCI_OP_READ_LOCAL_NAME, 0, NULL); + + /* Read Voice Setting */ + hci_req_add(req, HCI_OP_READ_VOICE_SETTING, 0, NULL); + + /* Read Number of Supported IAC */ + hci_req_add(req, HCI_OP_READ_NUM_SUPPORTED_IAC, 0, NULL); + + /* Read Current IAC LAP */ + hci_req_add(req, HCI_OP_READ_CURRENT_IAC_LAP, 0, NULL); + + /* Clear Event Filters */ + flt_type = HCI_FLT_CLEAR_ALL; + hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &flt_type); + + /* Connection accept timeout ~20 secs */ + param = cpu_to_le16(0x7d00); + hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); +} + +static void le_setup(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + + /* Read LE Buffer Size */ + hci_req_add(req, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); + + /* Read LE Local Supported Features */ + hci_req_add(req, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); + + /* Read LE Supported States */ + hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); + + /* LE-only controllers have LE implicitly enabled */ + if (!lmp_bredr_capable(hdev)) + hci_dev_set_flag(hdev, HCI_LE_ENABLED); +} + +static void hci_setup_event_mask(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + + /* The second byte is 0xff instead of 0x9f (two reserved bits + * disabled) since a Broadcom 1.2 dongle doesn't respond to the + * command otherwise. + */ + u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 }; + + /* CSR 1.1 dongles does not accept any bitfield so don't try to set + * any event mask for pre 1.2 devices. + */ + if (hdev->hci_ver < BLUETOOTH_VER_1_2) + return; + + if (lmp_bredr_capable(hdev)) { + events[4] |= 0x01; /* Flow Specification Complete */ + } else { + /* Use a different default for LE-only devices */ + memset(events, 0, sizeof(events)); + events[1] |= 0x20; /* Command Complete */ + events[1] |= 0x40; /* Command Status */ + events[1] |= 0x80; /* Hardware Error */ + + /* If the controller supports the Disconnect command, enable + * the corresponding event. In addition enable packet flow + * control related events. + */ + if (hdev->commands[0] & 0x20) { + events[0] |= 0x10; /* Disconnection Complete */ + events[2] |= 0x04; /* Number of Completed Packets */ + events[3] |= 0x02; /* Data Buffer Overflow */ + } + + /* If the controller supports the Read Remote Version + * Information command, enable the corresponding event. + */ + if (hdev->commands[2] & 0x80) + events[1] |= 0x08; /* Read Remote Version Information + * Complete + */ + + if (hdev->le_features[0] & HCI_LE_ENCRYPTION) { + events[0] |= 0x80; /* Encryption Change */ + events[5] |= 0x80; /* Encryption Key Refresh Complete */ + } + } + + if (lmp_inq_rssi_capable(hdev) || + test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) + events[4] |= 0x02; /* Inquiry Result with RSSI */ + + if (lmp_ext_feat_capable(hdev)) + events[4] |= 0x04; /* Read Remote Extended Features Complete */ + + if (lmp_esco_capable(hdev)) { + events[5] |= 0x08; /* Synchronous Connection Complete */ + events[5] |= 0x10; /* Synchronous Connection Changed */ + } + + if (lmp_sniffsubr_capable(hdev)) + events[5] |= 0x20; /* Sniff Subrating */ + + if (lmp_pause_enc_capable(hdev)) + events[5] |= 0x80; /* Encryption Key Refresh Complete */ + + if (lmp_ext_inq_capable(hdev)) + events[5] |= 0x40; /* Extended Inquiry Result */ + + if (lmp_no_flush_capable(hdev)) + events[7] |= 0x01; /* Enhanced Flush Complete */ + + if (lmp_lsto_capable(hdev)) + events[6] |= 0x80; /* Link Supervision Timeout Changed */ + + if (lmp_ssp_capable(hdev)) { + events[6] |= 0x01; /* IO Capability Request */ + events[6] |= 0x02; /* IO Capability Response */ + events[6] |= 0x04; /* User Confirmation Request */ + events[6] |= 0x08; /* User Passkey Request */ + events[6] |= 0x10; /* Remote OOB Data Request */ + events[6] |= 0x20; /* Simple Pairing Complete */ + events[7] |= 0x04; /* User Passkey Notification */ + events[7] |= 0x08; /* Keypress Notification */ + events[7] |= 0x10; /* Remote Host Supported + * Features Notification + */ + } + + if (lmp_le_capable(hdev)) + events[7] |= 0x20; /* LE Meta-Event */ + + hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events); +} + +static int hci_init2_req(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + if (hdev->dev_type == HCI_AMP) + return amp_init2(req); + + if (lmp_bredr_capable(hdev)) + bredr_setup(req); + else + hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); + + if (lmp_le_capable(hdev)) + le_setup(req); + + /* All Bluetooth 1.2 and later controllers should support the + * HCI command for reading the local supported commands. + * + * Unfortunately some controllers indicate Bluetooth 1.2 support, + * but do not have support for this command. If that is the case, + * the driver can quirk the behavior and skip reading the local + * supported commands. + */ + if (hdev->hci_ver > BLUETOOTH_VER_1_1 && + !test_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks)) + hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); + + if (lmp_ssp_capable(hdev)) { + /* When SSP is available, then the host features page + * should also be available as well. However some + * controllers list the max_page as 0 as long as SSP + * has not been enabled. To achieve proper debugging + * output, force the minimum max_page to 1 at least. + */ + hdev->max_page = 0x01; + + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { + u8 mode = 0x01; + + hci_req_add(req, HCI_OP_WRITE_SSP_MODE, + sizeof(mode), &mode); + } else { + struct hci_cp_write_eir cp; + + memset(hdev->eir, 0, sizeof(hdev->eir)); + memset(&cp, 0, sizeof(cp)); + + hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); + } + } + + if (lmp_inq_rssi_capable(hdev) || + test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) { + u8 mode; + + /* If Extended Inquiry Result events are supported, then + * they are clearly preferred over Inquiry Result with RSSI + * events. + */ + mode = lmp_ext_inq_capable(hdev) ? 0x02 : 0x01; + + hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); + } + + if (lmp_inq_tx_pwr_capable(hdev)) + hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); + + if (lmp_ext_feat_capable(hdev)) { + struct hci_cp_read_local_ext_features cp; + + cp.page = 0x01; + hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES, + sizeof(cp), &cp); + } + + if (hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) { + u8 enable = 1; + hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), + &enable); + } + + return 0; +} + +static void hci_setup_link_policy(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_def_link_policy cp; + u16 link_policy = 0; + + if (lmp_rswitch_capable(hdev)) + link_policy |= HCI_LP_RSWITCH; + if (lmp_hold_capable(hdev)) + link_policy |= HCI_LP_HOLD; + if (lmp_sniff_capable(hdev)) + link_policy |= HCI_LP_SNIFF; + if (lmp_park_capable(hdev)) + link_policy |= HCI_LP_PARK; + + cp.policy = cpu_to_le16(link_policy); + hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp); +} + +static void hci_set_le_support(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_le_host_supported cp; + + /* LE-only devices do not support explicit enablement */ + if (!lmp_bredr_capable(hdev)) + return; + + memset(&cp, 0, sizeof(cp)); + + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { + cp.le = 0x01; + cp.simul = 0x00; + } + + if (cp.le != lmp_host_le_capable(hdev)) + hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), + &cp); +} + +static void hci_set_event_mask_page_2(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + u8 events[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + bool changed = false; + + /* If Connectionless Peripheral Broadcast central role is supported + * enable all necessary events for it. + */ + if (lmp_cpb_central_capable(hdev)) { + events[1] |= 0x40; /* Triggered Clock Capture */ + events[1] |= 0x80; /* Synchronization Train Complete */ + events[2] |= 0x10; /* Peripheral Page Response Timeout */ + events[2] |= 0x20; /* CPB Channel Map Change */ + changed = true; + } + + /* If Connectionless Peripheral Broadcast peripheral role is supported + * enable all necessary events for it. + */ + if (lmp_cpb_peripheral_capable(hdev)) { + events[2] |= 0x01; /* Synchronization Train Received */ + events[2] |= 0x02; /* CPB Receive */ + events[2] |= 0x04; /* CPB Timeout */ + events[2] |= 0x08; /* Truncated Page Complete */ + changed = true; + } + + /* Enable Authenticated Payload Timeout Expired event if supported */ + if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) { + events[2] |= 0x80; + changed = true; + } + + /* Some Broadcom based controllers indicate support for Set Event + * Mask Page 2 command, but then actually do not support it. Since + * the default value is all bits set to zero, the command is only + * required if the event mask has to be changed. In case no change + * to the event mask is needed, skip this command. + */ + if (changed) + hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, + sizeof(events), events); +} + +static int hci_init3_req(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + u8 p; + + hci_setup_event_mask(req); + + if (hdev->commands[6] & 0x20 && + !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) { + struct hci_cp_read_stored_link_key cp; + + bacpy(&cp.bdaddr, BDADDR_ANY); + cp.read_all = 0x01; + hci_req_add(req, HCI_OP_READ_STORED_LINK_KEY, sizeof(cp), &cp); + } + + if (hdev->commands[5] & 0x10) + hci_setup_link_policy(req); + + if (hdev->commands[8] & 0x01) + hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); + + if (hdev->commands[18] & 0x04 && + !test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) + hci_req_add(req, HCI_OP_READ_DEF_ERR_DATA_REPORTING, 0, NULL); + + /* Some older Broadcom based Bluetooth 1.2 controllers do not + * support the Read Page Scan Type command. Check support for + * this command in the bit mask of supported commands. + */ + if (hdev->commands[13] & 0x01) + hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL); + + if (lmp_le_capable(hdev)) { + u8 events[8]; + + memset(events, 0, sizeof(events)); + + if (hdev->le_features[0] & HCI_LE_ENCRYPTION) + events[0] |= 0x10; /* LE Long Term Key Request */ + + /* If controller supports the Connection Parameters Request + * Link Layer Procedure, enable the corresponding event. + */ + if (hdev->le_features[0] & HCI_LE_CONN_PARAM_REQ_PROC) + events[0] |= 0x20; /* LE Remote Connection + * Parameter Request + */ + + /* If the controller supports the Data Length Extension + * feature, enable the corresponding event. + */ + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) + events[0] |= 0x40; /* LE Data Length Change */ + + /* If the controller supports LL Privacy feature, enable + * the corresponding event. + */ + if (hdev->le_features[0] & HCI_LE_LL_PRIVACY) + events[1] |= 0x02; /* LE Enhanced Connection + * Complete + */ + + /* If the controller supports Extended Scanner Filter + * Policies, enable the corresponding event. + */ + if (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY) + events[1] |= 0x04; /* LE Direct Advertising + * Report + */ + + /* If the controller supports Channel Selection Algorithm #2 + * feature, enable the corresponding event. + */ + if (hdev->le_features[1] & HCI_LE_CHAN_SEL_ALG2) + events[2] |= 0x08; /* LE Channel Selection + * Algorithm + */ + + /* If the controller supports the LE Set Scan Enable command, + * enable the corresponding advertising report event. + */ + if (hdev->commands[26] & 0x08) + events[0] |= 0x02; /* LE Advertising Report */ + + /* If the controller supports the LE Create Connection + * command, enable the corresponding event. + */ + if (hdev->commands[26] & 0x10) + events[0] |= 0x01; /* LE Connection Complete */ + + /* If the controller supports the LE Connection Update + * command, enable the corresponding event. + */ + if (hdev->commands[27] & 0x04) + events[0] |= 0x04; /* LE Connection Update + * Complete + */ + + /* If the controller supports the LE Read Remote Used Features + * command, enable the corresponding event. + */ + if (hdev->commands[27] & 0x20) + events[0] |= 0x08; /* LE Read Remote Used + * Features Complete + */ + + /* If the controller supports the LE Read Local P-256 + * Public Key command, enable the corresponding event. + */ + if (hdev->commands[34] & 0x02) + events[0] |= 0x80; /* LE Read Local P-256 + * Public Key Complete + */ + + /* If the controller supports the LE Generate DHKey + * command, enable the corresponding event. + */ + if (hdev->commands[34] & 0x04) + events[1] |= 0x01; /* LE Generate DHKey Complete */ + + /* If the controller supports the LE Set Default PHY or + * LE Set PHY commands, enable the corresponding event. + */ + if (hdev->commands[35] & (0x20 | 0x40)) + events[1] |= 0x08; /* LE PHY Update Complete */ + + /* If the controller supports LE Set Extended Scan Parameters + * and LE Set Extended Scan Enable commands, enable the + * corresponding event. + */ + if (use_ext_scan(hdev)) + events[1] |= 0x10; /* LE Extended Advertising + * Report + */ + + /* If the controller supports the LE Extended Advertising + * command, enable the corresponding event. + */ + if (ext_adv_capable(hdev)) + events[2] |= 0x02; /* LE Advertising Set + * Terminated + */ + + hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, sizeof(events), + events); + + /* Read LE Advertising Channel TX Power */ + if ((hdev->commands[25] & 0x40) && !ext_adv_capable(hdev)) { + /* HCI TS spec forbids mixing of legacy and extended + * advertising commands wherein READ_ADV_TX_POWER is + * also included. So do not call it if extended adv + * is supported otherwise controller will return + * COMMAND_DISALLOWED for extended commands. + */ + hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); + } + + if ((hdev->commands[38] & 0x80) && + !test_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks)) { + /* Read LE Min/Max Tx Power*/ + hci_req_add(req, HCI_OP_LE_READ_TRANSMIT_POWER, + 0, NULL); + } + + if (hdev->commands[26] & 0x40) { + /* Read LE Accept List Size */ + hci_req_add(req, HCI_OP_LE_READ_ACCEPT_LIST_SIZE, + 0, NULL); + } + + if (hdev->commands[26] & 0x80) { + /* Clear LE Accept List */ + hci_req_add(req, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL); + } + + if (hdev->commands[34] & 0x40) { + /* Read LE Resolving List Size */ + hci_req_add(req, HCI_OP_LE_READ_RESOLV_LIST_SIZE, + 0, NULL); + } + + if (hdev->commands[34] & 0x20) { + /* Clear LE Resolving List */ + hci_req_add(req, HCI_OP_LE_CLEAR_RESOLV_LIST, 0, NULL); + } + + if (hdev->commands[35] & 0x04) { + __le16 rpa_timeout = cpu_to_le16(hdev->rpa_timeout); + + /* Set RPA timeout */ + hci_req_add(req, HCI_OP_LE_SET_RPA_TIMEOUT, 2, + &rpa_timeout); + } + + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { + /* Read LE Maximum Data Length */ + hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL); + + /* Read LE Suggested Default Data Length */ + hci_req_add(req, HCI_OP_LE_READ_DEF_DATA_LEN, 0, NULL); + } + + if (ext_adv_capable(hdev)) { + /* Read LE Number of Supported Advertising Sets */ + hci_req_add(req, HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS, + 0, NULL); + } + + hci_set_le_support(req); + } + + /* Read features beyond page 1 if available */ + for (p = 2; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { + struct hci_cp_read_local_ext_features cp; + + cp.page = p; + hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES, + sizeof(cp), &cp); + } + + return 0; +} + +static int hci_init4_req(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + /* Some Broadcom based Bluetooth controllers do not support the + * Delete Stored Link Key command. They are clearly indicating its + * absence in the bit mask of supported commands. + * + * Check the supported commands and only if the command is marked + * as supported send it. If not supported assume that the controller + * does not have actual support for stored link keys which makes this + * command redundant anyway. + * + * Some controllers indicate that they support handling deleting + * stored link keys, but they don't. The quirk lets a driver + * just disable this command. + */ + if (hdev->commands[6] & 0x80 && + !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) { + struct hci_cp_delete_stored_link_key cp; + + bacpy(&cp.bdaddr, BDADDR_ANY); + cp.delete_all = 0x01; + hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, + sizeof(cp), &cp); + } + + /* Set event mask page 2 if the HCI command for it is supported */ + if (hdev->commands[22] & 0x04) + hci_set_event_mask_page_2(req); + + /* Read local codec list if the HCI command is supported */ + if (hdev->commands[29] & 0x20) + hci_req_add(req, HCI_OP_READ_LOCAL_CODECS, 0, NULL); + + /* Read local pairing options if the HCI command is supported */ + if (hdev->commands[41] & 0x08) + hci_req_add(req, HCI_OP_READ_LOCAL_PAIRING_OPTS, 0, NULL); + + /* Get MWS transport configuration if the HCI command is supported */ + if (hdev->commands[30] & 0x08) + hci_req_add(req, HCI_OP_GET_MWS_TRANSPORT_CONFIG, 0, NULL); + + /* Check for Synchronization Train support */ + if (lmp_sync_train_capable(hdev)) + hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL); + + /* Enable Secure Connections if supported and configured */ + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && + bredr_sc_enabled(hdev)) { + u8 support = 0x01; + + hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT, + sizeof(support), &support); + } + + /* Set erroneous data reporting if supported to the wideband speech + * setting value + */ + if (hdev->commands[18] & 0x08 && + !test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) { + bool enabled = hci_dev_test_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED); + + if (enabled != + (hdev->err_data_reporting == ERR_DATA_REPORTING_ENABLED)) { + struct hci_cp_write_def_err_data_reporting cp; + + cp.err_data_reporting = enabled ? + ERR_DATA_REPORTING_ENABLED : + ERR_DATA_REPORTING_DISABLED; + + hci_req_add(req, HCI_OP_WRITE_DEF_ERR_DATA_REPORTING, + sizeof(cp), &cp); + } + } + + /* Set Suggested Default Data Length to maximum if supported */ + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { + struct hci_cp_le_write_def_data_len cp; + + cp.tx_len = cpu_to_le16(hdev->le_max_tx_len); + cp.tx_time = cpu_to_le16(hdev->le_max_tx_time); + hci_req_add(req, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp); + } + + /* Set Default PHY parameters if command is supported */ + if (hdev->commands[35] & 0x20) { + struct hci_cp_le_set_default_phy cp; + + cp.all_phys = 0x00; + cp.tx_phys = hdev->le_tx_def_phys; + cp.rx_phys = hdev->le_rx_def_phys; + + hci_req_add(req, HCI_OP_LE_SET_DEFAULT_PHY, sizeof(cp), &cp); + } + + return 0; +} + +static int __hci_init(struct hci_dev *hdev) +{ + int err; + + err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT, NULL); + if (err < 0) + return err; + + if (hci_dev_test_flag(hdev, HCI_SETUP)) + hci_debugfs_create_basic(hdev); + + err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT, NULL); + if (err < 0) + return err; + + /* HCI_PRIMARY covers both single-mode LE, BR/EDR and dual-mode + * BR/EDR/LE type controllers. AMP controllers only need the + * first two stages of init. + */ + if (hdev->dev_type != HCI_PRIMARY) + return 0; + + err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT, NULL); + if (err < 0) + return err; + + err = __hci_req_sync(hdev, hci_init4_req, 0, HCI_INIT_TIMEOUT, NULL); + if (err < 0) + return err; + + /* This function is only called when the controller is actually in + * configured state. When the controller is marked as unconfigured, + * this initialization procedure is not run. + * + * It means that it is possible that a controller runs through its + * setup phase and then discovers missing settings. If that is the + * case, then this function will not be called. It then will only + * be called during the config phase. + * + * So only when in setup phase or config phase, create the debugfs + * entries and register the SMP channels. + */ + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG)) + return 0; + + hci_debugfs_create_common(hdev); + + if (lmp_bredr_capable(hdev)) + hci_debugfs_create_bredr(hdev); + + if (lmp_le_capable(hdev)) + hci_debugfs_create_le(hdev); + + return 0; +} + +static int hci_init0_req(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + BT_DBG("%s %ld", hdev->name, opt); + + /* Reset */ + if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) + hci_reset_req(req, 0); + + /* Read Local Version */ + hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + + /* Read BD Address */ + if (hdev->set_bdaddr) + hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL); + + return 0; +} + +static int __hci_unconf_init(struct hci_dev *hdev) +{ + int err; + + if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + return 0; + + err = __hci_req_sync(hdev, hci_init0_req, 0, HCI_INIT_TIMEOUT, NULL); + if (err < 0) + return err; + + if (hci_dev_test_flag(hdev, HCI_SETUP)) + hci_debugfs_create_basic(hdev); + + return 0; +} + static int hci_scan_req(struct hci_request *req, unsigned long opt) { __u8 scan = opt; @@ -157,7 +1097,7 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) switch (state) { case DISCOVERY_STOPPED: - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); if (old_state != DISCOVERY_STARTING) mgmt_discovering(hdev, 0); @@ -471,6 +1411,32 @@ int hci_inquiry(void __user *arg) return err; } +/** + * hci_dev_get_bd_addr_from_property - Get the Bluetooth Device Address + * (BD_ADDR) for a HCI device from + * a firmware node property. + * @hdev: The HCI device + * + * Search the firmware node for 'local-bd-address'. + * + * All-zero BD addresses are rejected, because those could be properties + * that exist in the firmware tables, but were not updated by the firmware. For + * example, the DTS could define 'local-bd-address', with zero BD addresses. + */ +static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev) +{ + struct fwnode_handle *fwnode = dev_fwnode(hdev->dev.parent); + bdaddr_t ba; + int ret; + + ret = fwnode_property_read_u8_array(fwnode, "local-bd-address", + (u8 *)&ba, sizeof(ba)); + if (ret < 0 || !bacmp(&ba, BDADDR_ANY)) + return; + + bacpy(&hdev->public_addr, &ba); +} + static int hci_dev_do_open(struct hci_dev *hdev) { int ret = 0; @@ -479,8 +1445,205 @@ static int hci_dev_do_open(struct hci_dev *hdev) hci_req_sync_lock(hdev); - ret = hci_dev_open_sync(hdev); + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + ret = -ENODEV; + goto done; + } + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG)) { + /* Check for rfkill but allow the HCI setup stage to + * proceed (which in itself doesn't cause any RF activity). + */ + if (hci_dev_test_flag(hdev, HCI_RFKILLED)) { + ret = -ERFKILL; + goto done; + } + + /* Check for valid public address or a configured static + * random address, but let the HCI setup proceed to + * be able to determine if there is a public address + * or not. + * + * In case of user channel usage, it is not important + * if a public address or static random address is + * available. + * + * This check is only valid for BR/EDR controllers + * since AMP controllers do not have an address. + */ + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hdev->dev_type == HCI_PRIMARY && + !bacmp(&hdev->bdaddr, BDADDR_ANY) && + !bacmp(&hdev->static_addr, BDADDR_ANY)) { + ret = -EADDRNOTAVAIL; + goto done; + } + } + + if (test_bit(HCI_UP, &hdev->flags)) { + ret = -EALREADY; + goto done; + } + + if (hdev->open(hdev)) { + ret = -EIO; + goto done; + } + + set_bit(HCI_RUNNING, &hdev->flags); + hci_sock_dev_event(hdev, HCI_DEV_OPEN); + + atomic_set(&hdev->cmd_cnt, 1); + set_bit(HCI_INIT, &hdev->flags); + + if (hci_dev_test_flag(hdev, HCI_SETUP) || + test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) { + bool invalid_bdaddr; + + hci_sock_dev_event(hdev, HCI_DEV_SETUP); + + if (hdev->setup) + ret = hdev->setup(hdev); + + /* The transport driver can set the quirk to mark the + * BD_ADDR invalid before creating the HCI device or in + * its setup callback. + */ + invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, + &hdev->quirks); + + if (ret) + goto setup_failed; + + if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) { + if (!bacmp(&hdev->public_addr, BDADDR_ANY)) + hci_dev_get_bd_addr_from_property(hdev); + + if (bacmp(&hdev->public_addr, BDADDR_ANY) && + hdev->set_bdaddr) { + ret = hdev->set_bdaddr(hdev, + &hdev->public_addr); + + /* If setting of the BD_ADDR from the device + * property succeeds, then treat the address + * as valid even if the invalid BD_ADDR + * quirk indicates otherwise. + */ + if (!ret) + invalid_bdaddr = false; + } + } + +setup_failed: + /* The transport driver can set these quirks before + * creating the HCI device or in its setup callback. + * + * For the invalid BD_ADDR quirk it is possible that + * it becomes a valid address if the bootloader does + * provide it (see above). + * + * In case any of them is set, the controller has to + * start up as unconfigured. + */ + if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || + invalid_bdaddr) + hci_dev_set_flag(hdev, HCI_UNCONFIGURED); + + /* For an unconfigured controller it is required to + * read at least the version information provided by + * the Read Local Version Information command. + * + * If the set_bdaddr driver callback is provided, then + * also the original Bluetooth public device address + * will be read using the Read BD Address command. + */ + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) + ret = __hci_unconf_init(hdev); + } + + if (hci_dev_test_flag(hdev, HCI_CONFIG)) { + /* If public address change is configured, ensure that + * the address gets programmed. If the driver does not + * support changing the public address, fail the power + * on procedure. + */ + if (bacmp(&hdev->public_addr, BDADDR_ANY) && + hdev->set_bdaddr) + ret = hdev->set_bdaddr(hdev, &hdev->public_addr); + else + ret = -EADDRNOTAVAIL; + } + + if (!ret) { + if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { + ret = __hci_init(hdev); + if (!ret && hdev->post_init) + ret = hdev->post_init(hdev); + } + } + + /* If the HCI Reset command is clearing all diagnostic settings, + * then they need to be reprogrammed after the init procedure + * completed. + */ + if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag) + ret = hdev->set_diag(hdev, true); + + msft_do_open(hdev); + aosp_do_open(hdev); + + clear_bit(HCI_INIT, &hdev->flags); + + if (!ret) { + hci_dev_hold(hdev); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); + hci_adv_instances_set_rpa_expired(hdev, true); + set_bit(HCI_UP, &hdev->flags); + hci_sock_dev_event(hdev, HCI_DEV_UP); + hci_leds_update_powered(hdev, true); + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG) && + !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hci_dev_test_flag(hdev, HCI_MGMT) && + hdev->dev_type == HCI_PRIMARY) { + ret = __hci_req_hci_power_on(hdev); + mgmt_power_on(hdev, ret); + } + } else { + /* Init failed, cleanup */ + flush_work(&hdev->tx_work); + + /* Since hci_rx_work() is possible to awake new cmd_work + * it should be flushed first to avoid unexpected call of + * hci_cmd_work() + */ + flush_work(&hdev->rx_work); + flush_work(&hdev->cmd_work); + + skb_queue_purge(&hdev->cmd_q); + skb_queue_purge(&hdev->rx_q); + + if (hdev->flush) + hdev->flush(hdev); + + if (hdev->sent_cmd) { + kfree_skb(hdev->sent_cmd); + hdev->sent_cmd = NULL; + } + + clear_bit(HCI_RUNNING, &hdev->flags); + hci_sock_dev_event(hdev, HCI_DEV_CLOSE); + + hdev->close(hdev); + hdev->flags &= BIT(HCI_RAW); + } + +done: hci_req_sync_unlock(hdev); return ret; } @@ -542,18 +1705,154 @@ int hci_dev_open(__u16 dev) return err; } +/* This function requires the caller holds hdev->lock */ +static void hci_pend_le_actions_clear(struct hci_dev *hdev) +{ + struct hci_conn_params *p; + + list_for_each_entry(p, &hdev->le_conn_params, list) { + if (p->conn) { + hci_conn_drop(p->conn); + hci_conn_put(p->conn); + p->conn = NULL; + } + list_del_init(&p->action); + } + + BT_DBG("All LE pending actions cleared"); +} + int hci_dev_do_close(struct hci_dev *hdev) { - int err; + bool auto_off; + int err = 0; BT_DBG("%s %p", hdev->name, hdev); + cancel_delayed_work(&hdev->power_off); + cancel_delayed_work(&hdev->ncmd_timer); + + hci_request_cancel_all(hdev); hci_req_sync_lock(hdev); - err = hci_dev_close_sync(hdev); + if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + test_bit(HCI_UP, &hdev->flags)) { + /* Execute vendor specific shutdown routine */ + if (hdev->shutdown) + err = hdev->shutdown(hdev); + } + + if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { + cancel_delayed_work_sync(&hdev->cmd_timer); + hci_req_sync_unlock(hdev); + return err; + } + + hci_leds_update_powered(hdev, false); + + /* Flush RX and TX works */ + flush_work(&hdev->tx_work); + flush_work(&hdev->rx_work); + + if (hdev->discov_timeout > 0) { + hdev->discov_timeout = 0; + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + } + + if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) + cancel_delayed_work(&hdev->service_cache); + + if (hci_dev_test_flag(hdev, HCI_MGMT)) { + struct adv_info *adv_instance; + + cancel_delayed_work_sync(&hdev->rpa_expired); + + list_for_each_entry(adv_instance, &hdev->adv_instances, list) + cancel_delayed_work_sync(&adv_instance->rpa_expired_cb); + } + + /* Avoid potential lockdep warnings from the *_flush() calls by + * ensuring the workqueue is empty up front. + */ + drain_workqueue(hdev->workqueue); + + hci_dev_lock(hdev); + + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + + auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); + + if (!auto_off && hdev->dev_type == HCI_PRIMARY && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hci_dev_test_flag(hdev, HCI_MGMT)) + __mgmt_power_off(hdev); + + hci_inquiry_cache_flush(hdev); + hci_pend_le_actions_clear(hdev); + hci_conn_hash_flush(hdev); + hci_dev_unlock(hdev); + + smp_unregister(hdev); + + hci_sock_dev_event(hdev, HCI_DEV_DOWN); + + aosp_do_close(hdev); + msft_do_close(hdev); + + if (hdev->flush) + hdev->flush(hdev); + + /* Reset device */ + skb_queue_purge(&hdev->cmd_q); + atomic_set(&hdev->cmd_cnt, 1); + if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) && + !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { + set_bit(HCI_INIT, &hdev->flags); + __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT, NULL); + clear_bit(HCI_INIT, &hdev->flags); + } + + /* flush cmd work */ + flush_work(&hdev->cmd_work); + + /* Drop queues */ + skb_queue_purge(&hdev->rx_q); + skb_queue_purge(&hdev->cmd_q); + skb_queue_purge(&hdev->raw_q); + + /* Drop last sent command */ + if (hdev->sent_cmd) { + cancel_delayed_work_sync(&hdev->cmd_timer); + kfree_skb(hdev->sent_cmd); + hdev->sent_cmd = NULL; + } + + clear_bit(HCI_RUNNING, &hdev->flags); + hci_sock_dev_event(hdev, HCI_DEV_CLOSE); + + if (test_and_clear_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks)) + wake_up(&hdev->suspend_wait_q); + + /* After this point our queues are empty + * and no tasks are scheduled. */ + hdev->close(hdev); + + /* Clear flags */ + hdev->flags &= BIT(HCI_RAW); + hci_dev_clear_volatile_flags(hdev); + + /* Controller radio is available but is currently powered down */ + hdev->amp_status = AMP_STATUS_POWERED_DOWN; + + memset(hdev->eir, 0, sizeof(hdev->eir)); + memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); + bacpy(&hdev->random_addr, BDADDR_ANY); hci_req_sync_unlock(hdev); + hci_dev_put(hdev); return err; } @@ -609,7 +1908,7 @@ static int hci_dev_do_reset(struct hci_dev *hdev) atomic_set(&hdev->cmd_cnt, 1); hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; - ret = hci_reset_sync(hdev); + ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT, NULL); hci_req_sync_unlock(hdev); return ret; @@ -672,7 +1971,7 @@ int hci_dev_reset_stat(__u16 dev) return ret; } -static void hci_update_passive_scan_state(struct hci_dev *hdev, u8 scan) +static void hci_update_scan_state(struct hci_dev *hdev, u8 scan) { bool conn_changed, discov_changed; @@ -773,7 +2072,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) * get correctly modified as this was a non-mgmt change. */ if (!err) - hci_update_passive_scan_state(hdev, dr.dev_opt); + hci_update_scan_state(hdev, dr.dev_opt); break; case HCISETLINKPOL: @@ -955,7 +2254,9 @@ static void hci_power_on(struct work_struct *work) hci_dev_test_flag(hdev, HCI_MGMT) && hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { cancel_delayed_work(&hdev->power_off); - err = hci_powered_update_sync(hdev); + hci_req_sync_lock(hdev); + err = __hci_req_hci_power_on(hdev); + hci_req_sync_unlock(hdev); mgmt_power_on(hdev, err); return; } @@ -1780,60 +3081,6 @@ int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, return 0; } -/* This function requires the caller holds hdev->lock */ -u32 hci_adv_instance_flags(struct hci_dev *hdev, u8 instance) -{ - u32 flags; - struct adv_info *adv; - - if (instance == 0x00) { - /* Instance 0 always manages the "Tx Power" and "Flags" - * fields - */ - flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; - - /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting - * corresponds to the "connectable" instance flag. - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) - flags |= MGMT_ADV_FLAG_CONNECTABLE; - - if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) - flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; - else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) - flags |= MGMT_ADV_FLAG_DISCOV; - - return flags; - } - - adv = hci_find_adv_instance(hdev, instance); - - /* Return 0 when we got an invalid instance identifier. */ - if (!adv) - return 0; - - return adv->flags; -} - -bool hci_adv_instance_is_scannable(struct hci_dev *hdev, u8 instance) -{ - struct adv_info *adv; - - /* Instance 0x00 always set local name */ - if (instance == 0x00) - return true; - - adv = hci_find_adv_instance(hdev, instance); - if (!adv) - return false; - - if (adv->flags & MGMT_ADV_FLAG_APPEARANCE || - adv->flags & MGMT_ADV_FLAG_LOCAL_NAME) - return true; - - return adv->scan_rsp_len ? true : false; -} - /* This function requires the caller holds hdev->lock */ void hci_adv_monitors_clear(struct hci_dev *hdev) { @@ -1916,7 +3163,7 @@ bool hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, switch (hci_get_adv_monitor_offload_ext(hdev)) { case HCI_ADV_MONITOR_EXT_NONE: - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); bt_dev_dbg(hdev, "%s add monitor status %d", hdev->name, *err); /* Message was not forwarded to controller - not an error */ return false; @@ -1980,7 +3227,7 @@ bool hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle, int *err) pending = hci_remove_adv_monitor(hdev, monitor, handle, err); if (!*err && !pending) - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); bt_dev_dbg(hdev, "%s remove monitor handle %d, status %d, %spending", hdev->name, handle, *err, pending ? "" : "not "); @@ -2012,7 +3259,7 @@ bool hci_remove_all_adv_monitor(struct hci_dev *hdev, int *err) } if (update) - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); bt_dev_dbg(hdev, "%s remove all monitors status %d, %spending", hdev->name, *err, pending ? "" : "not "); @@ -2153,7 +3400,7 @@ int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr, bacpy(&entry->bdaddr, bdaddr); entry->bdaddr_type = type; - bitmap_from_u64(entry->flags, flags); + entry->current_flags = flags; list_add(&entry->list, list); @@ -2241,6 +3488,15 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, { struct hci_conn_params *param; + switch (addr_type) { + case ADDR_LE_DEV_PUBLIC_RESOLVED: + addr_type = ADDR_LE_DEV_PUBLIC; + break; + case ADDR_LE_DEV_RANDOM_RESOLVED: + addr_type = ADDR_LE_DEV_RANDOM; + break; + } + list_for_each_entry(param, list, action) { if (bacmp(¶m->addr, addr) == 0 && param->addr_type == addr_type) @@ -2306,7 +3562,7 @@ void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) hci_conn_params_free(params); - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); BT_DBG("addr %pMR (type %u)", addr, addr_type); } @@ -2374,6 +3630,61 @@ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, } } +static void hci_suspend_clear_tasks(struct hci_dev *hdev) +{ + int i; + + for (i = 0; i < __SUSPEND_NUM_TASKS; i++) + clear_bit(i, hdev->suspend_tasks); + + wake_up(&hdev->suspend_wait_q); +} + +static int hci_suspend_wait_event(struct hci_dev *hdev) +{ +#define WAKE_COND \ + (find_first_bit(hdev->suspend_tasks, __SUSPEND_NUM_TASKS) == \ + __SUSPEND_NUM_TASKS) + + int i; + int ret = wait_event_timeout(hdev->suspend_wait_q, + WAKE_COND, SUSPEND_NOTIFIER_TIMEOUT); + + if (ret == 0) { + bt_dev_err(hdev, "Timed out waiting for suspend events"); + for (i = 0; i < __SUSPEND_NUM_TASKS; ++i) { + if (test_bit(i, hdev->suspend_tasks)) + bt_dev_err(hdev, "Suspend timeout bit: %d", i); + clear_bit(i, hdev->suspend_tasks); + } + + ret = -ETIMEDOUT; + } else { + ret = 0; + } + + return ret; +} + +static void hci_prepare_suspend(struct work_struct *work) +{ + struct hci_dev *hdev = + container_of(work, struct hci_dev, suspend_prepare); + + hci_dev_lock(hdev); + hci_req_prepare_suspend(hdev, hdev->suspend_state_next); + hci_dev_unlock(hdev); +} + +static int hci_change_suspend_state(struct hci_dev *hdev, + enum suspended_state next) +{ + hdev->suspend_state_next = next; + set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + queue_work(hdev->req_workqueue, &hdev->suspend_prepare); + return hci_suspend_wait_event(hdev); +} + static void hci_clear_wake_reason(struct hci_dev *hdev) { hci_dev_lock(hdev); @@ -2391,12 +3702,55 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, struct hci_dev *hdev = container_of(nb, struct hci_dev, suspend_notifier); int ret = 0; + u8 state = BT_RUNNING; - if (action == PM_SUSPEND_PREPARE) - ret = hci_suspend_dev(hdev); - else if (action == PM_POST_SUSPEND) - ret = hci_resume_dev(hdev); + /* If powering down, wait for completion. */ + if (mgmt_powering_down(hdev)) { + set_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks); + ret = hci_suspend_wait_event(hdev); + if (ret) + goto done; + } + /* Suspend notifier should only act on events when powered. */ + if (!hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_UNREGISTER)) + goto done; + + if (action == PM_SUSPEND_PREPARE) { + /* Suspend consists of two actions: + * - First, disconnect everything and make the controller not + * connectable (disabling scanning) + * - Second, program event filter/accept list and enable scan + */ + ret = hci_change_suspend_state(hdev, BT_SUSPEND_DISCONNECT); + if (!ret) + state = BT_SUSPEND_DISCONNECT; + + /* Only configure accept list if disconnect succeeded and wake + * isn't being prevented. + */ + if (!ret && !(hdev->prevent_wake && hdev->prevent_wake(hdev))) { + ret = hci_change_suspend_state(hdev, + BT_SUSPEND_CONFIGURE_WAKE); + if (!ret) + state = BT_SUSPEND_CONFIGURE_WAKE; + } + + hci_clear_wake_reason(hdev); + mgmt_suspending(hdev, state); + + } else if (action == PM_POST_SUSPEND) { + ret = hci_change_suspend_state(hdev, BT_RUNNING); + + mgmt_resuming(hdev, hdev->wake_reason, &hdev->wake_addr, + hdev->wake_addr_type); + } + +done: + /* We always allow suspend even if suspend preparation failed and + * attempt to recover in resume. + */ if (ret) bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d", action, ret); @@ -2504,14 +3858,12 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) INIT_LIST_HEAD(&hdev->adv_instances); INIT_LIST_HEAD(&hdev->blocked_keys); - INIT_LIST_HEAD(&hdev->local_codecs); INIT_WORK(&hdev->rx_work, hci_rx_work); INIT_WORK(&hdev->cmd_work, hci_cmd_work); INIT_WORK(&hdev->tx_work, hci_tx_work); INIT_WORK(&hdev->power_on, hci_power_on); INIT_WORK(&hdev->error_reset, hci_error_reset); - - hci_cmd_sync_init(hdev); + INIT_WORK(&hdev->suspend_prepare, hci_prepare_suspend); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); @@ -2520,6 +3872,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) skb_queue_head_init(&hdev->raw_q); init_waitqueue_head(&hdev->req_wait_q); + init_waitqueue_head(&hdev->suspend_wait_q); INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout); INIT_DELAYED_WORK(&hdev->ncmd_timer, hci_ncmd_timeout); @@ -2629,12 +3982,6 @@ int hci_register_dev(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) hci_dev_set_flag(hdev, HCI_UNCONFIGURED); - /* Mark Remote Wakeup connection flag as supported if driver has wakeup - * callback. - */ - if (hdev->wakeup) - set_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, hdev->conn_flags); - hci_sock_dev_event(hdev, HCI_DEV_REG); hci_dev_hold(hdev); @@ -2648,7 +3995,6 @@ int hci_register_dev(struct hci_dev *hdev) queue_work(hdev->req_workqueue, &hdev->power_on); idr_init(&hdev->adv_monitors_idr); - msft_register(hdev); return id; @@ -2676,12 +4022,11 @@ void hci_unregister_dev(struct hci_dev *hdev) cancel_work_sync(&hdev->power_on); - hci_cmd_sync_clear(hdev); - - if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) + if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + hci_suspend_clear_tasks(hdev); unregister_pm_notifier(&hdev->suspend_notifier); - - msft_unregister(hdev); + cancel_work_sync(&hdev->suspend_prepare); + } hci_dev_do_close(hdev); @@ -2745,56 +4090,16 @@ EXPORT_SYMBOL(hci_release_dev); /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) { - int ret; - - bt_dev_dbg(hdev, ""); - - /* Suspend should only act on when powered. */ - if (!hdev_is_powered(hdev) || - hci_dev_test_flag(hdev, HCI_UNREGISTER)) - return 0; - - /* If powering down don't attempt to suspend */ - if (mgmt_powering_down(hdev)) - return 0; - - hci_req_sync_lock(hdev); - ret = hci_suspend_sync(hdev); - hci_req_sync_unlock(hdev); - - hci_clear_wake_reason(hdev); - mgmt_suspending(hdev, hdev->suspend_state); - hci_sock_dev_event(hdev, HCI_DEV_SUSPEND); - return ret; + return 0; } EXPORT_SYMBOL(hci_suspend_dev); /* Resume HCI device */ int hci_resume_dev(struct hci_dev *hdev) { - int ret; - - bt_dev_dbg(hdev, ""); - - /* Resume should only act on when powered. */ - if (!hdev_is_powered(hdev) || - hci_dev_test_flag(hdev, HCI_UNREGISTER)) - return 0; - - /* If powering down don't attempt to resume */ - if (mgmt_powering_down(hdev)) - return 0; - - hci_req_sync_lock(hdev); - ret = hci_resume_sync(hdev); - hci_req_sync_unlock(hdev); - - mgmt_resuming(hdev, hdev->wake_reason, &hdev->wake_addr, - hdev->wake_addr_type); - hci_sock_dev_event(hdev, HCI_DEV_RESUME); - return ret; + return 0; } EXPORT_SYMBOL(hci_resume_dev); @@ -2912,7 +4217,7 @@ int hci_unregister_cb(struct hci_cb *cb) } EXPORT_SYMBOL(hci_unregister_cb); -static int hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) +static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { int err; @@ -2935,17 +4240,14 @@ static int hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) if (!test_bit(HCI_RUNNING, &hdev->flags)) { kfree_skb(skb); - return -EINVAL; + return; } err = hdev->send(hdev, skb); if (err < 0) { bt_dev_err(hdev, "sending frame failed (%d)", err); kfree_skb(skb); - return err; } - - return 0; } /* Send HCI command */ @@ -3022,6 +4324,25 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE; } +/* Send HCI command and wait for command complete event */ +struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout) +{ + struct sk_buff *skb; + + if (!test_bit(HCI_UP, &hdev->flags)) + return ERR_PTR(-ENETDOWN); + + bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen); + + hci_req_sync_lock(hdev); + skb = __hci_cmd_sync(hdev, opcode, plen, param, timeout); + hci_req_sync_unlock(hdev); + + return skb; +} +EXPORT_SYMBOL(hci_cmd_sync); + /* Send ACL data */ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags) { @@ -3852,15 +5173,10 @@ static void hci_cmd_work(struct work_struct *work) hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); if (hdev->sent_cmd) { - int res; if (hci_req_status_pend(hdev)) hci_dev_set_flag(hdev, HCI_CMD_PENDING); atomic_dec(&hdev->cmd_cnt); - - res = hci_send_frame(hdev, skb); - if (res < 0) - __hci_cmd_sync_cancel(hdev, -res); - + hci_send_frame(hdev, skb); if (test_bit(HCI_RESET, &hdev->flags)) cancel_delayed_work(&hdev->cmd_timer); else diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 902b40a90b..841393389f 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -27,7 +27,6 @@ #include #include "smp.h" -#include "hci_request.h" #include "hci_debugfs.h" #define DEFINE_QUIRK_ATTRIBUTE(__name, __quirk) \ @@ -1251,125 +1250,3 @@ void hci_debugfs_create_conn(struct hci_conn *conn) snprintf(name, sizeof(name), "%u", conn->handle); conn->debugfs = debugfs_create_dir(name, hdev->debugfs); } - -static ssize_t dut_mode_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y' : 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - struct sk_buff *skb; - bool enable; - int err; - - if (!test_bit(HCI_UP, &hdev->flags)) - return -ENETDOWN; - - err = kstrtobool_from_user(user_buf, count, &enable); - if (err) - return err; - - if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE)) - return -EALREADY; - - hci_req_sync_lock(hdev); - if (enable) - skb = __hci_cmd_sync(hdev, HCI_OP_ENABLE_DUT_MODE, 0, NULL, - HCI_CMD_TIMEOUT); - else - skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, - HCI_CMD_TIMEOUT); - hci_req_sync_unlock(hdev); - - if (IS_ERR(skb)) - return PTR_ERR(skb); - - kfree_skb(skb); - - hci_dev_change_flag(hdev, HCI_DUT_MODE); - - return count; -} - -static const struct file_operations dut_mode_fops = { - .open = simple_open, - .read = dut_mode_read, - .write = dut_mode_write, - .llseek = default_llseek, -}; - -static ssize_t vendor_diag_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y' : 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - bool enable; - int err; - - err = kstrtobool_from_user(user_buf, count, &enable); - if (err) - return err; - - /* When the diagnostic flags are not persistent and the transport - * is not active or in user channel operation, then there is no need - * for the vendor callback. Instead just store the desired value and - * the setting will be programmed when the controller gets powered on. - */ - if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && - (!test_bit(HCI_RUNNING, &hdev->flags) || - hci_dev_test_flag(hdev, HCI_USER_CHANNEL))) - goto done; - - hci_req_sync_lock(hdev); - err = hdev->set_diag(hdev, enable); - hci_req_sync_unlock(hdev); - - if (err < 0) - return err; - -done: - if (enable) - hci_dev_set_flag(hdev, HCI_VENDOR_DIAG); - else - hci_dev_clear_flag(hdev, HCI_VENDOR_DIAG); - - return count; -} - -static const struct file_operations vendor_diag_fops = { - .open = simple_open, - .read = vendor_diag_read, - .write = vendor_diag_write, - .llseek = default_llseek, -}; - -void hci_debugfs_create_basic(struct hci_dev *hdev) -{ - debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev, - &dut_mode_fops); - - if (hdev->set_diag) - debugfs_create_file("vendor_diag", 0644, hdev->debugfs, hdev, - &vendor_diag_fops); -} diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h index 9a8a7c93bb..4444dc8ced 100644 --- a/net/bluetooth/hci_debugfs.h +++ b/net/bluetooth/hci_debugfs.h @@ -26,7 +26,6 @@ void hci_debugfs_create_common(struct hci_dev *hdev); void hci_debugfs_create_bredr(struct hci_dev *hdev); void hci_debugfs_create_le(struct hci_dev *hdev); void hci_debugfs_create_conn(struct hci_conn *conn); -void hci_debugfs_create_basic(struct hci_dev *hdev); #else @@ -46,8 +45,4 @@ static inline void hci_debugfs_create_conn(struct hci_conn *conn) { } -static inline void hci_debugfs_create_basic(struct hci_dev *hdev) -{ -} - #endif diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index fc30f4c03d..868a22df32 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -36,7 +36,6 @@ #include "amp.h" #include "smp.h" #include "msft.h" -#include "eir.h" #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ "\x00\x00\x00\x00\x00\x00\x00\x00" @@ -45,48 +44,12 @@ /* Handle HCI Event packets */ -static void *hci_ev_skb_pull(struct hci_dev *hdev, struct sk_buff *skb, - u8 ev, size_t len) +static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb, + u8 *new_status) { - void *data; + __u8 status = *((__u8 *) skb->data); - data = skb_pull_data(skb, len); - if (!data) - bt_dev_err(hdev, "Malformed Event: 0x%2.2x", ev); - - return data; -} - -static void *hci_cc_skb_pull(struct hci_dev *hdev, struct sk_buff *skb, - u16 op, size_t len) -{ - void *data; - - data = skb_pull_data(skb, len); - if (!data) - bt_dev_err(hdev, "Malformed Command Complete: 0x%4.4x", op); - - return data; -} - -static void *hci_le_ev_skb_pull(struct hci_dev *hdev, struct sk_buff *skb, - u8 ev, size_t len) -{ - void *data; - - data = skb_pull_data(skb, len); - if (!data) - bt_dev_err(hdev, "Malformed LE Event: 0x%2.2x", ev); - - return data; -} - -static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_ev_status *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); /* It is possible that we receive Inquiry Complete event right * before we receive Inquiry Cancel Command Complete event, in @@ -95,13 +58,15 @@ static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data, * we actually achieve what Inquiry Cancel wants to achieve, * which is to end the last Inquiry session. */ - if (rp->status == 0x0c && !test_bit(HCI_INQUIRY, &hdev->flags)) { + if (status == 0x0c && !test_bit(HCI_INQUIRY, &hdev->flags)) { bt_dev_warn(hdev, "Ignoring error of Inquiry Cancel command"); - rp->status = 0x00; + status = 0x00; } - if (rp->status) - return rp->status; + *new_status = status; + + if (status) + return; clear_bit(HCI_INQUIRY, &hdev->flags); smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */ @@ -117,62 +82,49 @@ static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); hci_conn_check_pending(hdev); - - return rp->status; } -static u8 hci_cc_periodic_inq(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; hci_dev_set_flag(hdev, HCI_PERIODIC_INQ); - - return rp->status; } -static u8 hci_cc_exit_periodic_inq(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); hci_conn_check_pending(hdev); - - return rp->status; } -static u8 hci_cc_remote_name_req_cancel(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_remote_name_req_cancel(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_ev_status *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - return rp->status; + BT_DBG("%s", hdev->name); } -static u8 hci_cc_role_discovery(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_role_discovery(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_role_discovery *rp = data; + struct hci_rp_role_discovery *rp = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hci_dev_lock(hdev); @@ -181,20 +133,17 @@ static u8 hci_cc_role_discovery(struct hci_dev *hdev, void *data, conn->role = rp->role; hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_read_link_policy(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_link_policy(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_read_link_policy *rp = data; + struct hci_rp_read_link_policy *rp = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hci_dev_lock(hdev); @@ -203,25 +152,22 @@ static u8 hci_cc_read_link_policy(struct hci_dev *hdev, void *data, conn->link_policy = __le16_to_cpu(rp->policy); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_write_link_policy(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_link_policy(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_write_link_policy *rp = data; + struct hci_rp_write_link_policy *rp = (void *) skb->data; struct hci_conn *conn; void *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LINK_POLICY); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); @@ -230,55 +176,49 @@ static u8 hci_cc_write_link_policy(struct hci_dev *hdev, void *data, conn->link_policy = get_unaligned_le16(sent + 2); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_read_def_link_policy(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_def_link_policy(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_def_link_policy *rp = data; + struct hci_rp_read_def_link_policy *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->link_policy = __le16_to_cpu(rp->policy); - - return rp->status; } -static u8 hci_cc_write_def_link_policy(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_def_link_policy(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); void *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_DEF_LINK_POLICY); if (!sent) - return rp->status; + return; hdev->link_policy = get_unaligned_le16(sent); - - return rp->status; } -static u8 hci_cc_reset(struct hci_dev *hdev, void *data, struct sk_buff *skb) +static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); clear_bit(HCI_RESET, &hdev->flags); - if (rp->status) - return rp->status; + if (status) + return; /* Reset all non-persistent flags */ hci_dev_clear_volatile_flags(hdev); @@ -300,104 +240,91 @@ static u8 hci_cc_reset(struct hci_dev *hdev, void *data, struct sk_buff *skb) hci_bdaddr_list_clear(&hdev->le_accept_list); hci_bdaddr_list_clear(&hdev->le_resolv_list); - - return rp->status; } -static u8 hci_cc_read_stored_link_key(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_stored_link_key(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_stored_link_key *rp = data; + struct hci_rp_read_stored_link_key *rp = (void *)skb->data; struct hci_cp_read_stored_link_key *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); sent = hci_sent_cmd_data(hdev, HCI_OP_READ_STORED_LINK_KEY); if (!sent) - return rp->status; + return; if (!rp->status && sent->read_all == 0x01) { - hdev->stored_max_keys = le16_to_cpu(rp->max_keys); - hdev->stored_num_keys = le16_to_cpu(rp->num_keys); + hdev->stored_max_keys = rp->max_keys; + hdev->stored_num_keys = rp->num_keys; } - - return rp->status; } -static u8 hci_cc_delete_stored_link_key(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_delete_stored_link_key *rp = data; + struct hci_rp_delete_stored_link_key *rp = (void *)skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; if (rp->num_keys <= hdev->stored_num_keys) - hdev->stored_num_keys -= le16_to_cpu(rp->num_keys); + hdev->stored_num_keys -= rp->num_keys; else hdev->stored_num_keys = 0; - - return rp->status; } -static u8 hci_cc_write_local_name(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); void *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LOCAL_NAME); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); if (hci_dev_test_flag(hdev, HCI_MGMT)) - mgmt_set_local_name_complete(hdev, sent, rp->status); - else if (!rp->status) + mgmt_set_local_name_complete(hdev, sent, status); + else if (!status) memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_read_local_name(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_read_local_name *rp = data; + struct hci_rp_read_local_name *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; if (hci_dev_test_flag(hdev, HCI_SETUP) || hci_dev_test_flag(hdev, HCI_CONFIG)) memcpy(hdev->dev_name, rp->name, HCI_MAX_NAME_LENGTH); - - return rp->status; } -static u8 hci_cc_write_auth_enable(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); void *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_AUTH_ENABLE); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); - if (!rp->status) { + if (!status) { __u8 param = *((__u8 *) sent); if (param == AUTH_ENABLED) @@ -407,28 +334,25 @@ static u8 hci_cc_write_auth_enable(struct hci_dev *hdev, void *data, } if (hci_dev_test_flag(hdev, HCI_MGMT)) - mgmt_auth_enable_complete(hdev, rp->status); + mgmt_auth_enable_complete(hdev, status); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_write_encrypt_mode(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); __u8 param; void *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_ENCRYPT_MODE); if (!sent) - return rp->status; + return; param = *((__u8 *) sent); @@ -436,28 +360,25 @@ static u8 hci_cc_write_encrypt_mode(struct hci_dev *hdev, void *data, set_bit(HCI_ENCRYPT, &hdev->flags); else clear_bit(HCI_ENCRYPT, &hdev->flags); - - return rp->status; } -static u8 hci_cc_write_scan_enable(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); __u8 param; void *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SCAN_ENABLE); if (!sent) - return rp->status; + return; param = *((__u8 *) sent); hci_dev_lock(hdev); - if (rp->status) { + if (status) { hdev->discov_timeout = 0; goto done; } @@ -474,25 +395,22 @@ static u8 hci_cc_write_scan_enable(struct hci_dev *hdev, void *data, done: hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_set_event_filter(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_set_event_filter(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *)skb->data); struct hci_cp_set_event_filter *cp; void *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_SET_EVENT_FLT); if (!sent) - return rp->status; + return; cp = (struct hci_cp_set_event_filter *)sent; @@ -500,149 +418,135 @@ static u8 hci_cc_set_event_filter(struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_EVENT_FILTER_CONFIGURED); else hci_dev_set_flag(hdev, HCI_EVENT_FILTER_CONFIGURED); - - return rp->status; } -static u8 hci_cc_read_class_of_dev(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_read_class_of_dev *rp = data; + struct hci_rp_read_class_of_dev *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; memcpy(hdev->dev_class, rp->dev_class, 3); - bt_dev_dbg(hdev, "class 0x%.2x%.2x%.2x", hdev->dev_class[2], - hdev->dev_class[1], hdev->dev_class[0]); - - return rp->status; + BT_DBG("%s class 0x%.2x%.2x%.2x", hdev->name, + hdev->dev_class[2], hdev->dev_class[1], hdev->dev_class[0]); } -static u8 hci_cc_write_class_of_dev(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); void *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_CLASS_OF_DEV); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); - if (!rp->status) + if (status == 0) memcpy(hdev->dev_class, sent, 3); if (hci_dev_test_flag(hdev, HCI_MGMT)) - mgmt_set_class_of_dev_complete(hdev, sent, rp->status); + mgmt_set_class_of_dev_complete(hdev, sent, status); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_read_voice_setting(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_read_voice_setting *rp = data; + struct hci_rp_read_voice_setting *rp = (void *) skb->data; __u16 setting; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; setting = __le16_to_cpu(rp->voice_setting); if (hdev->voice_setting == setting) - return rp->status; + return; hdev->voice_setting = setting; - bt_dev_dbg(hdev, "voice setting 0x%4.4x", setting); + BT_DBG("%s voice setting 0x%4.4x", hdev->name, setting); if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); - - return rp->status; } -static u8 hci_cc_write_voice_setting(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_voice_setting(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); __u16 setting; void *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_VOICE_SETTING); if (!sent) - return rp->status; + return; setting = get_unaligned_le16(sent); if (hdev->voice_setting == setting) - return rp->status; + return; hdev->voice_setting = setting; - bt_dev_dbg(hdev, "voice setting 0x%4.4x", setting); + BT_DBG("%s voice setting 0x%4.4x", hdev->name, setting); if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); - - return rp->status; } -static u8 hci_cc_read_num_supported_iac(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_num_supported_iac(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_num_supported_iac *rp = data; + struct hci_rp_read_num_supported_iac *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->num_iac = rp->num_iac; - bt_dev_dbg(hdev, "num iac %d", hdev->num_iac); - - return rp->status; + BT_DBG("%s num iac %d", hdev->name, hdev->num_iac); } -static u8 hci_cc_write_ssp_mode(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); struct hci_cp_write_ssp_mode *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SSP_MODE); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); - if (!rp->status) { + if (!status) { if (sent->mode) hdev->features[1][0] |= LMP_HOST_SSP; else hdev->features[1][0] &= ~LMP_HOST_SSP; } - if (!rp->status) { + if (hci_dev_test_flag(hdev, HCI_MGMT)) + mgmt_ssp_enable_complete(hdev, sent->mode, status); + else if (!status) { if (sent->mode) hci_dev_set_flag(hdev, HCI_SSP_ENABLED); else @@ -650,32 +554,29 @@ static u8 hci_cc_write_ssp_mode(struct hci_dev *hdev, void *data, } hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_write_sc_support(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + u8 status = *((u8 *) skb->data); struct hci_cp_write_sc_support *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SC_SUPPORT); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); - if (!rp->status) { + if (!status) { if (sent->support) hdev->features[1][0] |= LMP_HOST_SC; else hdev->features[1][0] &= ~LMP_HOST_SC; } - if (!hci_dev_test_flag(hdev, HCI_MGMT) && !rp->status) { + if (!hci_dev_test_flag(hdev, HCI_MGMT) && !status) { if (sent->support) hci_dev_set_flag(hdev, HCI_SC_ENABLED); else @@ -683,19 +584,16 @@ static u8 hci_cc_write_sc_support(struct hci_dev *hdev, void *data, } hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_read_local_version(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_read_local_version *rp = data; + struct hci_rp_read_local_version *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; if (hci_dev_test_flag(hdev, HCI_SETUP) || hci_dev_test_flag(hdev, HCI_CONFIG)) { @@ -705,37 +603,33 @@ static u8 hci_cc_read_local_version(struct hci_dev *hdev, void *data, hdev->manufacturer = __le16_to_cpu(rp->manufacturer); hdev->lmp_subver = __le16_to_cpu(rp->lmp_subver); } - - return rp->status; } -static u8 hci_cc_read_local_commands(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_local_commands(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_local_commands *rp = data; + struct hci_rp_read_local_commands *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; if (hci_dev_test_flag(hdev, HCI_SETUP) || hci_dev_test_flag(hdev, HCI_CONFIG)) memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); - - return rp->status; } -static u8 hci_cc_read_auth_payload_timeout(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_auth_payload_timeout(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_auth_payload_to *rp = data; + struct hci_rp_read_auth_payload_to *rp = (void *)skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hci_dev_lock(hdev); @@ -744,25 +638,23 @@ static u8 hci_cc_read_auth_payload_timeout(struct hci_dev *hdev, void *data, conn->auth_payload_timeout = __le16_to_cpu(rp->timeout); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_write_auth_payload_timeout(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_auth_payload_timeout(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_write_auth_payload_to *rp = data; + struct hci_rp_write_auth_payload_to *rp = (void *)skb->data; struct hci_conn *conn; void *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_AUTH_PAYLOAD_TO); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); @@ -771,19 +663,17 @@ static u8 hci_cc_write_auth_payload_timeout(struct hci_dev *hdev, void *data, conn->auth_payload_timeout = get_unaligned_le16(sent + 2); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_read_local_features(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_local_features(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_local_features *rp = data; + struct hci_rp_read_local_features *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; memcpy(hdev->features, rp->features, 8); @@ -823,53 +713,46 @@ static u8 hci_cc_read_local_features(struct hci_dev *hdev, void *data, if (hdev->features[0][5] & LMP_EDR_3S_ESCO) hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5); - - return rp->status; } -static u8 hci_cc_read_local_ext_features(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_local_ext_features(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_local_ext_features *rp = data; + struct hci_rp_read_local_ext_features *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; if (hdev->max_page < rp->max_page) hdev->max_page = rp->max_page; if (rp->page < HCI_MAX_PAGES) memcpy(hdev->features[rp->page], rp->features, 8); - - return rp->status; } -static u8 hci_cc_read_flow_control_mode(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_flow_control_mode *rp = data; + struct hci_rp_read_flow_control_mode *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->flow_ctl_mode = rp->mode; - - return rp->status; } -static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_read_buffer_size *rp = data; + struct hci_rp_read_buffer_size *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->acl_mtu = __le16_to_cpu(rp->acl_mtu); hdev->sco_mtu = rp->sco_mtu; @@ -886,130 +769,115 @@ static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data, BT_DBG("%s acl mtu %d:%d sco mtu %d:%d", hdev->name, hdev->acl_mtu, hdev->acl_pkts, hdev->sco_mtu, hdev->sco_pkts); - - return rp->status; } -static u8 hci_cc_read_bd_addr(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_read_bd_addr *rp = data; + struct hci_rp_read_bd_addr *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; if (test_bit(HCI_INIT, &hdev->flags)) bacpy(&hdev->bdaddr, &rp->bdaddr); if (hci_dev_test_flag(hdev, HCI_SETUP)) bacpy(&hdev->setup_addr, &rp->bdaddr); - - return rp->status; } -static u8 hci_cc_read_local_pairing_opts(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_local_pairing_opts(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_local_pairing_opts *rp = data; + struct hci_rp_read_local_pairing_opts *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; if (hci_dev_test_flag(hdev, HCI_SETUP) || hci_dev_test_flag(hdev, HCI_CONFIG)) { hdev->pairing_opts = rp->pairing_opts; hdev->max_enc_key_size = rp->max_key_size; } - - return rp->status; } -static u8 hci_cc_read_page_scan_activity(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_page_scan_activity(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_page_scan_activity *rp = data; + struct hci_rp_read_page_scan_activity *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; if (test_bit(HCI_INIT, &hdev->flags)) { hdev->page_scan_interval = __le16_to_cpu(rp->interval); hdev->page_scan_window = __le16_to_cpu(rp->window); } - - return rp->status; } -static u8 hci_cc_write_page_scan_activity(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_page_scan_activity(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_ev_status *rp = data; + u8 status = *((u8 *) skb->data); struct hci_cp_write_page_scan_activity *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY); if (!sent) - return rp->status; + return; hdev->page_scan_interval = __le16_to_cpu(sent->interval); hdev->page_scan_window = __le16_to_cpu(sent->window); - - return rp->status; } -static u8 hci_cc_read_page_scan_type(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_page_scan_type(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_page_scan_type *rp = data; + struct hci_rp_read_page_scan_type *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; if (test_bit(HCI_INIT, &hdev->flags)) hdev->page_scan_type = rp->type; - - return rp->status; } -static u8 hci_cc_write_page_scan_type(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_page_scan_type(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_ev_status *rp = data; + u8 status = *((u8 *) skb->data); u8 *type; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; type = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE); if (type) hdev->page_scan_type = *type; - - return rp->status; } -static u8 hci_cc_read_data_block_size(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_data_block_size(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_data_block_size *rp = data; + struct hci_rp_read_data_block_size *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->block_mtu = __le16_to_cpu(rp->max_acl_len); hdev->block_len = __le16_to_cpu(rp->block_len); @@ -1019,21 +887,21 @@ static u8 hci_cc_read_data_block_size(struct hci_dev *hdev, void *data, BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu, hdev->block_cnt, hdev->block_len); - - return rp->status; } -static u8 hci_cc_read_clock(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_clock(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_read_clock *rp = data; + struct hci_rp_read_clock *rp = (void *) skb->data; struct hci_cp_read_clock *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s", hdev->name); + + if (skb->len < sizeof(*rp)) + return; if (rp->status) - return rp->status; + return; hci_dev_lock(hdev); @@ -1054,18 +922,17 @@ static u8 hci_cc_read_clock(struct hci_dev *hdev, void *data, unlock: hci_dev_unlock(hdev); - return rp->status; } -static u8 hci_cc_read_local_amp_info(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_local_amp_info(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_local_amp_info *rp = data; + struct hci_rp_read_local_amp_info *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->amp_status = rp->amp_status; hdev->amp_total_bw = __le32_to_cpu(rp->total_bw); @@ -1077,68 +944,59 @@ static u8 hci_cc_read_local_amp_info(struct hci_dev *hdev, void *data, hdev->amp_assoc_size = __le16_to_cpu(rp->max_assoc_size); hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to); hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to); - - return rp->status; } -static u8 hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_inq_rsp_tx_power *rp = data; + struct hci_rp_read_inq_rsp_tx_power *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->inq_tx_power = rp->tx_power; - - return rp->status; } -static u8 hci_cc_read_def_err_data_reporting(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_def_err_data_reporting(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_def_err_data_reporting *rp = data; + struct hci_rp_read_def_err_data_reporting *rp = (void *)skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->err_data_reporting = rp->err_data_reporting; - - return rp->status; } -static u8 hci_cc_write_def_err_data_reporting(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_def_err_data_reporting(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *)skb->data); struct hci_cp_write_def_err_data_reporting *cp; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; cp = hci_sent_cmd_data(hdev, HCI_OP_WRITE_DEF_ERR_DATA_REPORTING); if (!cp) - return rp->status; + return; hdev->err_data_reporting = cp->err_data_reporting; - - return rp->status; } -static u8 hci_cc_pin_code_reply(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_pin_code_reply *rp = data; + struct hci_rp_pin_code_reply *rp = (void *) skb->data; struct hci_cp_pin_code_reply *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); hci_dev_lock(hdev); @@ -1158,15 +1016,13 @@ static u8 hci_cc_pin_code_reply(struct hci_dev *hdev, void *data, unlock: hci_dev_unlock(hdev); - return rp->status; } -static u8 hci_cc_pin_code_neg_reply(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_pin_code_neg_reply *rp = data; + struct hci_rp_pin_code_neg_reply *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); hci_dev_lock(hdev); @@ -1175,19 +1031,17 @@ static u8 hci_cc_pin_code_neg_reply(struct hci_dev *hdev, void *data, rp->status); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_le_read_buffer_size(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_le_read_buffer_size *rp = data; + struct hci_rp_le_read_buffer_size *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->le_mtu = __le16_to_cpu(rp->le_mtu); hdev->le_pkts = rp->le_max_pkt; @@ -1195,46 +1049,39 @@ static u8 hci_cc_le_read_buffer_size(struct hci_dev *hdev, void *data, hdev->le_cnt = hdev->le_pkts; BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts); - - return rp->status; } -static u8 hci_cc_le_read_local_features(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_read_local_features(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_le_read_local_features *rp = data; + struct hci_rp_le_read_local_features *rp = (void *) skb->data; BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; memcpy(hdev->le_features, rp->features, 8); - - return rp->status; } -static u8 hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_le_read_adv_tx_power *rp = data; + struct hci_rp_le_read_adv_tx_power *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->adv_tx_power = rp->tx_power; - - return rp->status; } -static u8 hci_cc_user_confirm_reply(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_user_confirm_reply *rp = data; + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); hci_dev_lock(hdev); @@ -1243,16 +1090,14 @@ static u8 hci_cc_user_confirm_reply(struct hci_dev *hdev, void *data, rp->status); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_user_confirm_neg_reply(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_user_confirm_neg_reply(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_user_confirm_reply *rp = data; + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); hci_dev_lock(hdev); @@ -1261,16 +1106,13 @@ static u8 hci_cc_user_confirm_neg_reply(struct hci_dev *hdev, void *data, ACL_LINK, 0, rp->status); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_user_passkey_reply(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_user_passkey_reply(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_user_confirm_reply *rp = data; + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); hci_dev_lock(hdev); @@ -1279,16 +1121,14 @@ static u8 hci_cc_user_passkey_reply(struct hci_dev *hdev, void *data, 0, rp->status); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_user_passkey_neg_reply(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_user_passkey_neg_reply(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_user_confirm_reply *rp = data; + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); hci_dev_lock(hdev); @@ -1297,44 +1137,37 @@ static u8 hci_cc_user_passkey_neg_reply(struct hci_dev *hdev, void *data, ACL_LINK, 0, rp->status); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_read_local_oob_data(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_local_oob_data(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_local_oob_data *rp = data; + struct hci_rp_read_local_oob_data *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - return rp->status; + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); } -static u8 hci_cc_read_local_oob_ext_data(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_local_oob_ext_data(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_read_local_oob_ext_data *rp = data; + struct hci_rp_read_local_oob_ext_data *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - return rp->status; + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); } -static u8 hci_cc_le_set_random_addr(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); bdaddr_t *sent; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_RANDOM_ADDR); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); @@ -1347,24 +1180,21 @@ static u8 hci_cc_le_set_random_addr(struct hci_dev *hdev, void *data, } hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_le_set_default_phy(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_set_default_phy(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); struct hci_cp_le_set_default_phy *cp; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_DEFAULT_PHY); if (!cp) - return rp->status; + return; hci_dev_lock(hdev); @@ -1372,21 +1202,17 @@ static u8 hci_cc_le_set_default_phy(struct hci_dev *hdev, void *data, hdev->le_rx_def_phys = cp->rx_phys; hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); struct hci_cp_le_set_adv_set_rand_addr *cp; struct adv_info *adv; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; + if (status) + return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_SET_RAND_ADDR); /* Update only in case the adv instance since handle 0x00 shall be using @@ -1394,7 +1220,7 @@ static u8 hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev, void *data, * non-extended adverting. */ if (!cp || !cp->handle) - return rp->status; + return; hci_dev_lock(hdev); @@ -1410,126 +1236,34 @@ static u8 hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev, void *data, } hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_le_remove_adv_set(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_read_transmit_power(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_ev_status *rp = data; - u8 *instance; - int err; + struct hci_rp_le_read_transmit_power *rp = (void *)skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; - - instance = hci_sent_cmd_data(hdev, HCI_OP_LE_REMOVE_ADV_SET); - if (!instance) - return rp->status; - - hci_dev_lock(hdev); - - err = hci_remove_adv_instance(hdev, *instance); - if (!err) - mgmt_advertising_removed(hci_skb_sk(hdev->sent_cmd), hdev, - *instance); - - hci_dev_unlock(hdev); - - return rp->status; -} - -static u8 hci_cc_le_clear_adv_sets(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_ev_status *rp = data; - struct adv_info *adv, *n; - int err; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - if (!hci_sent_cmd_data(hdev, HCI_OP_LE_CLEAR_ADV_SETS)) - return rp->status; - - hci_dev_lock(hdev); - - list_for_each_entry_safe(adv, n, &hdev->adv_instances, list) { - u8 instance = adv->instance; - - err = hci_remove_adv_instance(hdev, instance); - if (!err) - mgmt_advertising_removed(hci_skb_sk(hdev->sent_cmd), - hdev, instance); - } - - hci_dev_unlock(hdev); - - return rp->status; -} - -static u8 hci_cc_le_read_transmit_power(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_le_read_transmit_power *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; + return; hdev->min_le_tx_power = rp->min_le_tx_power; hdev->max_le_tx_power = rp->max_le_tx_power; - - return rp->status; } -static u8 hci_cc_le_set_privacy_mode(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; - struct hci_cp_le_set_privacy_mode *cp; - struct hci_conn_params *params; + __u8 *sent, status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; - - cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_PRIVACY_MODE); - if (!cp) - return rp->status; - - hci_dev_lock(hdev); - - params = hci_conn_params_lookup(hdev, &cp->bdaddr, cp->bdaddr_type); - if (params) - params->privacy_mode = cp->mode; - - hci_dev_unlock(hdev); - - return rp->status; -} - -static u8 hci_cc_le_set_adv_enable(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_ev_status *rp = data; - __u8 *sent; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_ENABLE); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); @@ -1551,26 +1285,24 @@ static u8 hci_cc_le_set_adv_enable(struct hci_dev *hdev, void *data, } hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_cp_le_set_ext_adv_enable *cp; struct hci_cp_ext_adv_set *set; + __u8 status = *((__u8 *) skb->data); struct adv_info *adv = NULL, *n; - struct hci_ev_status *rp = data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_ENABLE); if (!cp) - return rp->status; + return; set = (void *)cp->data; @@ -1617,48 +1349,44 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, unlock: hci_dev_unlock(hdev); - return rp->status; } -static u8 hci_cc_le_set_scan_param(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_cp_le_set_scan_param *cp; - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_SCAN_PARAM); if (!cp) - return rp->status; + return; hci_dev_lock(hdev); hdev->le_scan_type = cp->type; hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_le_set_ext_scan_param(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_set_ext_scan_param(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_cp_le_set_ext_scan_params *cp; - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); struct hci_cp_le_scan_phy_params *phy_param; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_SCAN_PARAMS); if (!cp) - return rp->status; + return; phy_param = (void *)cp->data; @@ -1667,8 +1395,6 @@ static u8 hci_cc_le_set_ext_scan_param(struct hci_dev *hdev, void *data, hdev->le_scan_type = phy_param->type; hci_dev_unlock(hdev); - - return rp->status; } static bool has_pending_adv_report(struct hci_dev *hdev) @@ -1738,10 +1464,16 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) /* The HCI_LE_SCAN_INTERRUPTED flag indicates that we * interrupted scanning due to a connect request. Mark - * therefore discovery as stopped. + * therefore discovery as stopped. If this was not + * because of a connect request advertising might have + * been disabled because of active scanning, so + * re-enable it again if necessary. */ if (hci_dev_test_and_clear_flag(hdev, HCI_LE_SCAN_INTERRUPTED)) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) && + hdev->discovery.state == DISCOVERY_FINDING) + hci_req_reenable_advertising(hdev); break; @@ -1754,273 +1486,244 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) hci_dev_unlock(hdev); } -static u8 hci_cc_le_set_scan_enable(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_cp_le_set_scan_enable *cp; - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_SCAN_ENABLE); if (!cp) - return rp->status; + return; le_set_scan_enable_complete(hdev, cp->enable); - - return rp->status; } -static u8 hci_cc_le_set_ext_scan_enable(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_set_ext_scan_enable(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_cp_le_set_ext_scan_enable *cp; - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_SCAN_ENABLE); if (!cp) - return rp->status; + return; le_set_scan_enable_complete(hdev, cp->enable); - - return rp->status; } -static u8 hci_cc_le_read_num_adv_sets(struct hci_dev *hdev, void *data, +static void hci_cc_le_read_num_adv_sets(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_le_read_num_supported_adv_sets *rp = data; + struct hci_rp_le_read_num_supported_adv_sets *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x No of Adv sets %u", rp->status, - rp->num_of_sets); + BT_DBG("%s status 0x%2.2x No of Adv sets %u", hdev->name, rp->status, + rp->num_of_sets); if (rp->status) - return rp->status; + return; hdev->le_num_of_adv_sets = rp->num_of_sets; - - return rp->status; } -static u8 hci_cc_le_read_accept_list_size(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_read_accept_list_size(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_le_read_accept_list_size *rp = data; + struct hci_rp_le_read_accept_list_size *rp = (void *)skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x size %u", rp->status, rp->size); + BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size); if (rp->status) - return rp->status; + return; hdev->le_accept_list_size = rp->size; - - return rp->status; } -static u8 hci_cc_le_clear_accept_list(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_clear_accept_list(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; hci_bdaddr_list_clear(&hdev->le_accept_list); - - return rp->status; } -static u8 hci_cc_le_add_to_accept_list(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_add_to_accept_list(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_cp_le_add_to_accept_list *sent; - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST); if (!sent) - return rp->status; + return; hci_bdaddr_list_add(&hdev->le_accept_list, &sent->bdaddr, sent->bdaddr_type); - - return rp->status; } -static u8 hci_cc_le_del_from_accept_list(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_del_from_accept_list(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_cp_le_del_from_accept_list *sent; - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_ACCEPT_LIST); if (!sent) - return rp->status; + return; hci_bdaddr_list_del(&hdev->le_accept_list, &sent->bdaddr, sent->bdaddr_type); - - return rp->status; } -static u8 hci_cc_le_read_supported_states(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_read_supported_states(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_le_read_supported_states *rp = data; + struct hci_rp_le_read_supported_states *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; memcpy(hdev->le_states, rp->le_states, 8); - - return rp->status; } -static u8 hci_cc_le_read_def_data_len(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_read_def_data_len(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_le_read_def_data_len *rp = data; + struct hci_rp_le_read_def_data_len *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->le_def_tx_len = le16_to_cpu(rp->tx_len); hdev->le_def_tx_time = le16_to_cpu(rp->tx_time); - - return rp->status; } -static u8 hci_cc_le_write_def_data_len(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_write_def_data_len(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_cp_le_write_def_data_len *sent; - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_LE_WRITE_DEF_DATA_LEN); if (!sent) - return rp->status; + return; hdev->le_def_tx_len = le16_to_cpu(sent->tx_len); hdev->le_def_tx_time = le16_to_cpu(sent->tx_time); - - return rp->status; } -static u8 hci_cc_le_add_to_resolv_list(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_add_to_resolv_list(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_cp_le_add_to_resolv_list *sent; - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_RESOLV_LIST); if (!sent) - return rp->status; + return; hci_bdaddr_list_add_with_irk(&hdev->le_resolv_list, &sent->bdaddr, sent->bdaddr_type, sent->peer_irk, sent->local_irk); - - return rp->status; } -static u8 hci_cc_le_del_from_resolv_list(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_del_from_resolv_list(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_cp_le_del_from_resolv_list *sent; - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_RESOLV_LIST); if (!sent) - return rp->status; + return; hci_bdaddr_list_del_with_irk(&hdev->le_resolv_list, &sent->bdaddr, sent->bdaddr_type); - - return rp->status; } -static u8 hci_cc_le_clear_resolv_list(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_clear_resolv_list(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; hci_bdaddr_list_clear(&hdev->le_resolv_list); - - return rp->status; } -static u8 hci_cc_le_read_resolv_list_size(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_read_resolv_list_size(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_le_read_resolv_list_size *rp = data; + struct hci_rp_le_read_resolv_list_size *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x size %u", rp->status, rp->size); + BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size); if (rp->status) - return rp->status; + return; hdev->le_resolv_list_size = rp->size; - - return rp->status; } -static u8 hci_cc_le_set_addr_resolution_enable(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_set_addr_resolution_enable(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_ev_status *rp = data; - __u8 *sent; + __u8 *sent, status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); @@ -2030,42 +1733,38 @@ static u8 hci_cc_le_set_addr_resolution_enable(struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_le_read_max_data_len(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_le_read_max_data_len(struct hci_dev *hdev, + struct sk_buff *skb) { - struct hci_rp_le_read_max_data_len *rp = data; + struct hci_rp_le_read_max_data_len *rp = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hdev->le_max_tx_len = le16_to_cpu(rp->tx_len); hdev->le_max_tx_time = le16_to_cpu(rp->tx_time); hdev->le_max_rx_len = le16_to_cpu(rp->rx_len); hdev->le_max_rx_time = le16_to_cpu(rp->rx_time); - - return rp->status; } -static u8 hci_cc_write_le_host_supported(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_le_host_supported(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_cp_write_le_host_supported *sent; - struct hci_ev_status *rp = data; + __u8 status = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); @@ -2084,47 +1783,41 @@ static u8 hci_cc_write_le_host_supported(struct hci_dev *hdev, void *data, hdev->features[1][0] &= ~LMP_HOST_LE_BREDR; hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_set_adv_param(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_set_adv_param(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_cp_le_set_adv_param *cp; - struct hci_ev_status *rp = data; + u8 status = *((u8 *) skb->data); - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_PARAM); if (!cp) - return rp->status; + return; hci_dev_lock(hdev); hdev->adv_addr_type = cp->own_address_type; hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_set_ext_adv_param(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_le_set_ext_adv_params *rp = data; + struct hci_rp_le_set_ext_adv_params *rp = (void *) skb->data; struct hci_cp_le_set_ext_adv_params *cp; struct adv_info *adv_instance; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS); if (!cp) - return rp->status; + return; hci_dev_lock(hdev); hdev->adv_addr_type = cp->own_addr_type; @@ -2140,20 +1833,17 @@ static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data, hci_req_update_adv_data(hdev, cp->handle); hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_read_rssi(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_read_rssi *rp = data; + struct hci_rp_read_rssi *rp = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; hci_dev_lock(hdev); @@ -2162,25 +1852,22 @@ static u8 hci_cc_read_rssi(struct hci_dev *hdev, void *data, conn->rssi = rp->rssi; hci_dev_unlock(hdev); - - return rp->status; } -static u8 hci_cc_read_tx_power(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_read_tx_power(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_cp_read_tx_power *sent; - struct hci_rp_read_tx_power *rp = data; + struct hci_rp_read_tx_power *rp = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - return rp->status; + return; sent = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER); if (!sent) - return rp->status; + return; hci_dev_lock(hdev); @@ -2199,30 +1886,26 @@ static u8 hci_cc_read_tx_power(struct hci_dev *hdev, void *data, unlock: hci_dev_unlock(hdev); - return rp->status; } -static u8 hci_cc_write_ssp_debug_mode(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_cc_write_ssp_debug_mode(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + u8 status = *((u8 *) skb->data); u8 *mode; - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (rp->status) - return rp->status; + if (status) + return; mode = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE); if (mode) hdev->ssp_debug_mode = *mode; - - return rp->status; } static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); if (status) { hci_conn_check_pending(hdev); @@ -2237,7 +1920,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) struct hci_cp_create_conn *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); cp = hci_sent_cmd_data(hdev, HCI_OP_CREATE_CONN); if (!cp) @@ -2247,7 +1930,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); - bt_dev_dbg(hdev, "bdaddr %pMR hcon %p", &cp->bdaddr, conn); + BT_DBG("%s bdaddr %pMR hcon %p", hdev->name, &cp->bdaddr, conn); if (status) { if (conn && conn->state == BT_CONNECT) { @@ -2276,7 +1959,7 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status) struct hci_conn *acl, *sco; __u16 handle; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); if (!status) return; @@ -2287,7 +1970,7 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status) handle = __le16_to_cpu(cp->handle); - bt_dev_dbg(hdev, "handle 0x%4.4x", handle); + BT_DBG("%s handle 0x%4.4x", hdev->name, handle); hci_dev_lock(hdev); @@ -2310,7 +1993,7 @@ static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status) struct hci_cp_auth_requested *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); if (!status) return; @@ -2337,7 +2020,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status) struct hci_cp_set_conn_encrypt *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); if (!status) return; @@ -2404,12 +2087,6 @@ static bool hci_resolve_next_name(struct hci_dev *hdev) if (list_empty(&discov->resolve)) return false; - /* We should stop if we already spent too much time resolving names. */ - if (time_after(jiffies, discov->name_resolve_timeout)) { - bt_dev_warn_ratelimited(hdev, "Name resolve takes too long."); - return false; - } - e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_NEEDED); if (!e) return false; @@ -2456,10 +2133,13 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn, return; list_del(&e->list); - - e->name_state = name ? NAME_KNOWN : NAME_NOT_KNOWN; - mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, e->data.rssi, - name, name_len); + if (name) { + e->name_state = NAME_KNOWN; + mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, + e->data.rssi, name, name_len); + } else { + e->name_state = NAME_NOT_KNOWN; + } if (hci_resolve_next_name(hdev)) return; @@ -2473,7 +2153,7 @@ static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status) struct hci_cp_remote_name_req *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); /* If successful wait for the name req complete event before * checking for the need to do authentication */ @@ -2516,7 +2196,7 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status) struct hci_cp_read_remote_features *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); if (!status) return; @@ -2543,7 +2223,7 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status) struct hci_cp_read_remote_ext_features *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); if (!status) return; @@ -2571,7 +2251,7 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status) struct hci_conn *acl, *sco; __u16 handle; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); if (!status) return; @@ -2582,42 +2262,7 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status) handle = __le16_to_cpu(cp->handle); - bt_dev_dbg(hdev, "handle 0x%4.4x", handle); - - hci_dev_lock(hdev); - - acl = hci_conn_hash_lookup_handle(hdev, handle); - if (acl) { - sco = acl->link; - if (sco) { - sco->state = BT_CLOSED; - - hci_connect_cfm(sco, status); - hci_conn_del(sco); - } - } - - hci_dev_unlock(hdev); -} - -static void hci_cs_enhanced_setup_sync_conn(struct hci_dev *hdev, __u8 status) -{ - struct hci_cp_enhanced_setup_sync_conn *cp; - struct hci_conn *acl, *sco; - __u16 handle; - - bt_dev_dbg(hdev, "status 0x%2.2x", status); - - if (!status) - return; - - cp = hci_sent_cmd_data(hdev, HCI_OP_ENHANCED_SETUP_SYNC_CONN); - if (!cp) - return; - - handle = __le16_to_cpu(cp->handle); - - bt_dev_dbg(hdev, "handle 0x%4.4x", handle); + BT_DBG("%s handle 0x%4.4x", hdev->name, handle); hci_dev_lock(hdev); @@ -2640,7 +2285,7 @@ static void hci_cs_sniff_mode(struct hci_dev *hdev, __u8 status) struct hci_cp_sniff_mode *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); if (!status) return; @@ -2667,7 +2312,7 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status) struct hci_cp_exit_sniff_mode *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); if (!status) return; @@ -2692,16 +2337,9 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status) static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) { struct hci_cp_disconnect *cp; - struct hci_conn_params *params; struct hci_conn *conn; - bool mgmt_conn; - bt_dev_dbg(hdev, "status 0x%2.2x", status); - - /* Wait for HCI_EV_DISCONN_COMPLETE if status 0x00 and not suspended - * otherwise cleanup the connection immediately. - */ - if (!status && !hdev->suspended) + if (!status) return; cp = hci_sent_cmd_data(hdev, HCI_OP_DISCONNECT); @@ -2711,85 +2349,26 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (!conn) - goto unlock; - - if (status) { + if (conn) { mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); - if (conn->type == LE_LINK && conn->role == HCI_ROLE_SLAVE) { + if (conn->type == LE_LINK) { hdev->cur_adv_instance = conn->adv_instance; - hci_enable_advertising(hdev); + hci_req_reenable_advertising(hdev); } - goto done; + /* If the disconnection failed for any reason, the upper layer + * does not retry to disconnect in current implementation. + * Hence, we need to do some basic cleanup here and re-enable + * advertising if necessary. + */ + hci_conn_del(conn); } - mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags); - - if (conn->type == ACL_LINK) { - if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) - hci_remove_link_key(hdev, &conn->dst); - } - - params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); - if (params) { - switch (params->auto_connect) { - case HCI_AUTO_CONN_LINK_LOSS: - if (cp->reason != HCI_ERROR_CONNECTION_TIMEOUT) - break; - fallthrough; - - case HCI_AUTO_CONN_DIRECT: - case HCI_AUTO_CONN_ALWAYS: - list_del_init(¶ms->action); - list_add(¶ms->action, &hdev->pend_le_conns); - break; - - default: - break; - } - } - - mgmt_device_disconnected(hdev, &conn->dst, conn->type, conn->dst_type, - cp->reason, mgmt_conn); - - hci_disconn_cfm(conn, cp->reason); - -done: - /* If the disconnection failed for any reason, the upper layer - * does not retry to disconnect in current implementation. - * Hence, we need to do some basic cleanup here and re-enable - * advertising if necessary. - */ - hci_conn_del(conn); -unlock: hci_dev_unlock(hdev); } -static u8 ev_bdaddr_type(struct hci_dev *hdev, u8 type, bool *resolved) -{ - /* When using controller based address resolution, then the new - * address types 0x02 and 0x03 are used. These types need to be - * converted back into either public address or random address type - */ - switch (type) { - case ADDR_LE_DEV_PUBLIC_RESOLVED: - if (resolved) - *resolved = true; - return ADDR_LE_DEV_PUBLIC; - case ADDR_LE_DEV_RANDOM_RESOLVED: - if (resolved) - *resolved = true; - return ADDR_LE_DEV_RANDOM; - } - - if (resolved) - *resolved = false; - return type; -} - static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr, u8 peer_addr_type, u8 own_address_type, u8 filter_policy) @@ -2801,7 +2380,21 @@ static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr, if (!conn) return; - own_address_type = ev_bdaddr_type(hdev, own_address_type, NULL); + /* When using controller based address resolution, then the new + * address types 0x02 and 0x03 are used. These types need to be + * converted back into either public address or random address type + */ + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) { + switch (own_address_type) { + case ADDR_LE_DEV_PUBLIC_RESOLVED: + own_address_type = ADDR_LE_DEV_PUBLIC; + break; + case ADDR_LE_DEV_RANDOM_RESOLVED: + own_address_type = ADDR_LE_DEV_RANDOM; + break; + } + } /* Store the initiator and responder address information which * is needed for SMP. These values will not change during the @@ -2831,7 +2424,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status) { struct hci_cp_le_create_conn *cp; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); /* All connection failure handling is taken care of by the * hci_le_conn_failed function which is triggered by the HCI @@ -2856,7 +2449,7 @@ static void hci_cs_le_ext_create_conn(struct hci_dev *hdev, u8 status) { struct hci_cp_le_ext_create_conn *cp; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); /* All connection failure handling is taken care of by the * hci_le_conn_failed function which is triggered by the HCI @@ -2882,7 +2475,7 @@ static void hci_cs_le_read_remote_features(struct hci_dev *hdev, u8 status) struct hci_cp_le_read_remote_features *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); if (!status) return; @@ -2909,7 +2502,7 @@ static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status) struct hci_cp_le_start_enc *cp; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); if (!status) return; @@ -2957,14 +2550,13 @@ static void hci_cs_switch_role(struct hci_dev *hdev, u8 status) hci_dev_unlock(hdev); } -static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_status *ev = data; + __u8 status = *((__u8 *) skb->data); struct discovery_state *discov = &hdev->discovery; struct inquiry_entry *e; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); hci_conn_check_pending(hdev); @@ -3000,7 +2592,6 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data, if (e && hci_resolve_name(hdev, e) == 0) { e->name_state = NAME_PENDING; hci_discovery_set_state(hdev, DISCOVERY_RESOLVING); - discov->name_resolve_timeout = jiffies + NAME_RESOLVE_DURATION; } else { /* When BR/EDR inquiry is active and no LE scanning is in * progress, then change discovery state to indicate completion. @@ -3018,20 +2609,15 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_inquiry_result_evt(struct hci_dev *hdev, void *edata, - struct sk_buff *skb) +static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_inquiry_result *ev = edata; struct inquiry_data data; - int i; + struct inquiry_info *info = (void *) (skb->data + 1); + int num_rsp = *((__u8 *) skb->data); - if (!hci_ev_skb_pull(hdev, skb, HCI_EV_INQUIRY_RESULT, - flex_array_size(ev, info, ev->num))) - return; + BT_DBG("%s num_rsp %d", hdev->name, num_rsp); - bt_dev_dbg(hdev, "num %d", ev->num); - - if (!ev->num) + if (!num_rsp || skb->len < num_rsp * sizeof(*info) + 1) return; if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) @@ -3039,8 +2625,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, void *edata, hci_dev_lock(hdev); - for (i = 0; i < ev->num; i++) { - struct inquiry_info *info = &ev->info[i]; + for (; num_rsp; num_rsp--, info++) { u32 flags; bacpy(&data.bdaddr, &info->bdaddr); @@ -3062,13 +2647,12 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, void *edata, hci_dev_unlock(hdev); } -static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_conn_complete *ev = data; + struct hci_ev_conn_complete *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s", hdev->name); hci_dev_lock(hdev); @@ -3187,16 +2771,16 @@ static void hci_reject_conn(struct hci_dev *hdev, bdaddr_t *bdaddr) hci_send_cmd(hdev, HCI_OP_REJECT_CONN_REQ, sizeof(cp), &cp); } -static void hci_conn_request_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_conn_request *ev = data; + struct hci_ev_conn_request *ev = (void *) skb->data; int mask = hdev->link_mode; struct inquiry_entry *ie; struct hci_conn *conn; __u8 flags = 0; - bt_dev_dbg(hdev, "bdaddr %pMR type 0x%x", &ev->bdaddr, ev->link_type); + BT_DBG("%s bdaddr %pMR type 0x%x", hdev->name, &ev->bdaddr, + ev->link_type); mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type, &flags); @@ -3298,16 +2882,15 @@ static u8 hci_to_mgmt_reason(u8 err) } } -static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_disconn_complete *ev = data; + struct hci_ev_disconn_complete *ev = (void *) skb->data; u8 reason; struct hci_conn_params *params; struct hci_conn *conn; bool mgmt_connected; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); @@ -3352,7 +2935,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, case HCI_AUTO_CONN_ALWAYS: list_del_init(¶ms->action); list_add(¶ms->action, &hdev->pend_le_conns); - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); break; default: @@ -3362,6 +2945,14 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, hci_disconn_cfm(conn, ev->reason); + /* The suspend notifier is waiting for all devices to disconnect so + * clear the bit from pending tasks and inform the wait queue. + */ + if (list_empty(&hdev->conn_hash.list) && + test_and_clear_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks)) { + wake_up(&hdev->suspend_wait_q); + } + /* Re-enable advertising if necessary, since it might * have been disabled by the connection. From the * HCI_LE_Set_Advertise_Enable command description in @@ -3372,9 +2963,9 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, * or until a connection is created or until the Advertising * is timed out due to Directed Advertising." */ - if (conn->type == LE_LINK && conn->role == HCI_ROLE_SLAVE) { + if (conn->type == LE_LINK) { hdev->cur_adv_instance = conn->adv_instance; - hci_enable_advertising(hdev); + hci_req_reenable_advertising(hdev); } hci_conn_del(conn); @@ -3383,13 +2974,12 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_auth_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_auth_complete *ev = data; + struct hci_ev_auth_complete *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); @@ -3454,13 +3044,12 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_remote_name_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_remote_name_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_remote_name *ev = data; + struct hci_ev_remote_name *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s", hdev->name); hci_conn_check_pending(hdev); @@ -3538,13 +3127,12 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status, hci_dev_unlock(hdev); } -static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_encrypt_change *ev = data; + struct hci_ev_encrypt_change *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); @@ -3653,13 +3241,13 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_change_link_key_complete_evt(struct hci_dev *hdev, void *data, +static void hci_change_link_key_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_change_link_key_complete *ev = data; + struct hci_ev_change_link_key_complete *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); @@ -3676,13 +3264,13 @@ static void hci_change_link_key_complete_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_remote_features_evt(struct hci_dev *hdev, void *data, +static void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_remote_features *ev = data; + struct hci_ev_remote_features *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); @@ -3740,227 +3328,366 @@ static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, u8 ncmd) } } -#define HCI_CC_VL(_op, _func, _min, _max) \ -{ \ - .op = _op, \ - .func = _func, \ - .min_len = _min, \ - .max_len = _max, \ -} - -#define HCI_CC(_op, _func, _len) \ - HCI_CC_VL(_op, _func, _len, _len) - -#define HCI_CC_STATUS(_op, _func) \ - HCI_CC(_op, _func, sizeof(struct hci_ev_status)) - -static const struct hci_cc { - u16 op; - u8 (*func)(struct hci_dev *hdev, void *data, struct sk_buff *skb); - u16 min_len; - u16 max_len; -} hci_cc_table[] = { - HCI_CC_STATUS(HCI_OP_INQUIRY_CANCEL, hci_cc_inquiry_cancel), - HCI_CC_STATUS(HCI_OP_PERIODIC_INQ, hci_cc_periodic_inq), - HCI_CC_STATUS(HCI_OP_EXIT_PERIODIC_INQ, hci_cc_exit_periodic_inq), - HCI_CC_STATUS(HCI_OP_REMOTE_NAME_REQ_CANCEL, - hci_cc_remote_name_req_cancel), - HCI_CC(HCI_OP_ROLE_DISCOVERY, hci_cc_role_discovery, - sizeof(struct hci_rp_role_discovery)), - HCI_CC(HCI_OP_READ_LINK_POLICY, hci_cc_read_link_policy, - sizeof(struct hci_rp_read_link_policy)), - HCI_CC(HCI_OP_WRITE_LINK_POLICY, hci_cc_write_link_policy, - sizeof(struct hci_rp_write_link_policy)), - HCI_CC(HCI_OP_READ_DEF_LINK_POLICY, hci_cc_read_def_link_policy, - sizeof(struct hci_rp_read_def_link_policy)), - HCI_CC_STATUS(HCI_OP_WRITE_DEF_LINK_POLICY, - hci_cc_write_def_link_policy), - HCI_CC_STATUS(HCI_OP_RESET, hci_cc_reset), - HCI_CC(HCI_OP_READ_STORED_LINK_KEY, hci_cc_read_stored_link_key, - sizeof(struct hci_rp_read_stored_link_key)), - HCI_CC(HCI_OP_DELETE_STORED_LINK_KEY, hci_cc_delete_stored_link_key, - sizeof(struct hci_rp_delete_stored_link_key)), - HCI_CC_STATUS(HCI_OP_WRITE_LOCAL_NAME, hci_cc_write_local_name), - HCI_CC(HCI_OP_READ_LOCAL_NAME, hci_cc_read_local_name, - sizeof(struct hci_rp_read_local_name)), - HCI_CC_STATUS(HCI_OP_WRITE_AUTH_ENABLE, hci_cc_write_auth_enable), - HCI_CC_STATUS(HCI_OP_WRITE_ENCRYPT_MODE, hci_cc_write_encrypt_mode), - HCI_CC_STATUS(HCI_OP_WRITE_SCAN_ENABLE, hci_cc_write_scan_enable), - HCI_CC_STATUS(HCI_OP_SET_EVENT_FLT, hci_cc_set_event_filter), - HCI_CC(HCI_OP_READ_CLASS_OF_DEV, hci_cc_read_class_of_dev, - sizeof(struct hci_rp_read_class_of_dev)), - HCI_CC_STATUS(HCI_OP_WRITE_CLASS_OF_DEV, hci_cc_write_class_of_dev), - HCI_CC(HCI_OP_READ_VOICE_SETTING, hci_cc_read_voice_setting, - sizeof(struct hci_rp_read_voice_setting)), - HCI_CC_STATUS(HCI_OP_WRITE_VOICE_SETTING, hci_cc_write_voice_setting), - HCI_CC(HCI_OP_READ_NUM_SUPPORTED_IAC, hci_cc_read_num_supported_iac, - sizeof(struct hci_rp_read_num_supported_iac)), - HCI_CC_STATUS(HCI_OP_WRITE_SSP_MODE, hci_cc_write_ssp_mode), - HCI_CC_STATUS(HCI_OP_WRITE_SC_SUPPORT, hci_cc_write_sc_support), - HCI_CC(HCI_OP_READ_AUTH_PAYLOAD_TO, hci_cc_read_auth_payload_timeout, - sizeof(struct hci_rp_read_auth_payload_to)), - HCI_CC(HCI_OP_WRITE_AUTH_PAYLOAD_TO, hci_cc_write_auth_payload_timeout, - sizeof(struct hci_rp_write_auth_payload_to)), - HCI_CC(HCI_OP_READ_LOCAL_VERSION, hci_cc_read_local_version, - sizeof(struct hci_rp_read_local_version)), - HCI_CC(HCI_OP_READ_LOCAL_COMMANDS, hci_cc_read_local_commands, - sizeof(struct hci_rp_read_local_commands)), - HCI_CC(HCI_OP_READ_LOCAL_FEATURES, hci_cc_read_local_features, - sizeof(struct hci_rp_read_local_features)), - HCI_CC(HCI_OP_READ_LOCAL_EXT_FEATURES, hci_cc_read_local_ext_features, - sizeof(struct hci_rp_read_local_ext_features)), - HCI_CC(HCI_OP_READ_BUFFER_SIZE, hci_cc_read_buffer_size, - sizeof(struct hci_rp_read_buffer_size)), - HCI_CC(HCI_OP_READ_BD_ADDR, hci_cc_read_bd_addr, - sizeof(struct hci_rp_read_bd_addr)), - HCI_CC(HCI_OP_READ_LOCAL_PAIRING_OPTS, hci_cc_read_local_pairing_opts, - sizeof(struct hci_rp_read_local_pairing_opts)), - HCI_CC(HCI_OP_READ_PAGE_SCAN_ACTIVITY, hci_cc_read_page_scan_activity, - sizeof(struct hci_rp_read_page_scan_activity)), - HCI_CC_STATUS(HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, - hci_cc_write_page_scan_activity), - HCI_CC(HCI_OP_READ_PAGE_SCAN_TYPE, hci_cc_read_page_scan_type, - sizeof(struct hci_rp_read_page_scan_type)), - HCI_CC_STATUS(HCI_OP_WRITE_PAGE_SCAN_TYPE, hci_cc_write_page_scan_type), - HCI_CC(HCI_OP_READ_DATA_BLOCK_SIZE, hci_cc_read_data_block_size, - sizeof(struct hci_rp_read_data_block_size)), - HCI_CC(HCI_OP_READ_FLOW_CONTROL_MODE, hci_cc_read_flow_control_mode, - sizeof(struct hci_rp_read_flow_control_mode)), - HCI_CC(HCI_OP_READ_LOCAL_AMP_INFO, hci_cc_read_local_amp_info, - sizeof(struct hci_rp_read_local_amp_info)), - HCI_CC(HCI_OP_READ_CLOCK, hci_cc_read_clock, - sizeof(struct hci_rp_read_clock)), - HCI_CC(HCI_OP_READ_INQ_RSP_TX_POWER, hci_cc_read_inq_rsp_tx_power, - sizeof(struct hci_rp_read_inq_rsp_tx_power)), - HCI_CC(HCI_OP_READ_DEF_ERR_DATA_REPORTING, - hci_cc_read_def_err_data_reporting, - sizeof(struct hci_rp_read_def_err_data_reporting)), - HCI_CC_STATUS(HCI_OP_WRITE_DEF_ERR_DATA_REPORTING, - hci_cc_write_def_err_data_reporting), - HCI_CC(HCI_OP_PIN_CODE_REPLY, hci_cc_pin_code_reply, - sizeof(struct hci_rp_pin_code_reply)), - HCI_CC(HCI_OP_PIN_CODE_NEG_REPLY, hci_cc_pin_code_neg_reply, - sizeof(struct hci_rp_pin_code_neg_reply)), - HCI_CC(HCI_OP_READ_LOCAL_OOB_DATA, hci_cc_read_local_oob_data, - sizeof(struct hci_rp_read_local_oob_data)), - HCI_CC(HCI_OP_READ_LOCAL_OOB_EXT_DATA, hci_cc_read_local_oob_ext_data, - sizeof(struct hci_rp_read_local_oob_ext_data)), - HCI_CC(HCI_OP_LE_READ_BUFFER_SIZE, hci_cc_le_read_buffer_size, - sizeof(struct hci_rp_le_read_buffer_size)), - HCI_CC(HCI_OP_LE_READ_LOCAL_FEATURES, hci_cc_le_read_local_features, - sizeof(struct hci_rp_le_read_local_features)), - HCI_CC(HCI_OP_LE_READ_ADV_TX_POWER, hci_cc_le_read_adv_tx_power, - sizeof(struct hci_rp_le_read_adv_tx_power)), - HCI_CC(HCI_OP_USER_CONFIRM_REPLY, hci_cc_user_confirm_reply, - sizeof(struct hci_rp_user_confirm_reply)), - HCI_CC(HCI_OP_USER_CONFIRM_NEG_REPLY, hci_cc_user_confirm_neg_reply, - sizeof(struct hci_rp_user_confirm_reply)), - HCI_CC(HCI_OP_USER_PASSKEY_REPLY, hci_cc_user_passkey_reply, - sizeof(struct hci_rp_user_confirm_reply)), - HCI_CC(HCI_OP_USER_PASSKEY_NEG_REPLY, hci_cc_user_passkey_neg_reply, - sizeof(struct hci_rp_user_confirm_reply)), - HCI_CC_STATUS(HCI_OP_LE_SET_RANDOM_ADDR, hci_cc_le_set_random_addr), - HCI_CC_STATUS(HCI_OP_LE_SET_ADV_ENABLE, hci_cc_le_set_adv_enable), - HCI_CC_STATUS(HCI_OP_LE_SET_SCAN_PARAM, hci_cc_le_set_scan_param), - HCI_CC_STATUS(HCI_OP_LE_SET_SCAN_ENABLE, hci_cc_le_set_scan_enable), - HCI_CC(HCI_OP_LE_READ_ACCEPT_LIST_SIZE, - hci_cc_le_read_accept_list_size, - sizeof(struct hci_rp_le_read_accept_list_size)), - HCI_CC_STATUS(HCI_OP_LE_CLEAR_ACCEPT_LIST, hci_cc_le_clear_accept_list), - HCI_CC_STATUS(HCI_OP_LE_ADD_TO_ACCEPT_LIST, - hci_cc_le_add_to_accept_list), - HCI_CC_STATUS(HCI_OP_LE_DEL_FROM_ACCEPT_LIST, - hci_cc_le_del_from_accept_list), - HCI_CC(HCI_OP_LE_READ_SUPPORTED_STATES, hci_cc_le_read_supported_states, - sizeof(struct hci_rp_le_read_supported_states)), - HCI_CC(HCI_OP_LE_READ_DEF_DATA_LEN, hci_cc_le_read_def_data_len, - sizeof(struct hci_rp_le_read_def_data_len)), - HCI_CC_STATUS(HCI_OP_LE_WRITE_DEF_DATA_LEN, - hci_cc_le_write_def_data_len), - HCI_CC_STATUS(HCI_OP_LE_ADD_TO_RESOLV_LIST, - hci_cc_le_add_to_resolv_list), - HCI_CC_STATUS(HCI_OP_LE_DEL_FROM_RESOLV_LIST, - hci_cc_le_del_from_resolv_list), - HCI_CC_STATUS(HCI_OP_LE_CLEAR_RESOLV_LIST, - hci_cc_le_clear_resolv_list), - HCI_CC(HCI_OP_LE_READ_RESOLV_LIST_SIZE, hci_cc_le_read_resolv_list_size, - sizeof(struct hci_rp_le_read_resolv_list_size)), - HCI_CC_STATUS(HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, - hci_cc_le_set_addr_resolution_enable), - HCI_CC(HCI_OP_LE_READ_MAX_DATA_LEN, hci_cc_le_read_max_data_len, - sizeof(struct hci_rp_le_read_max_data_len)), - HCI_CC_STATUS(HCI_OP_WRITE_LE_HOST_SUPPORTED, - hci_cc_write_le_host_supported), - HCI_CC_STATUS(HCI_OP_LE_SET_ADV_PARAM, hci_cc_set_adv_param), - HCI_CC(HCI_OP_READ_RSSI, hci_cc_read_rssi, - sizeof(struct hci_rp_read_rssi)), - HCI_CC(HCI_OP_READ_TX_POWER, hci_cc_read_tx_power, - sizeof(struct hci_rp_read_tx_power)), - HCI_CC_STATUS(HCI_OP_WRITE_SSP_DEBUG_MODE, hci_cc_write_ssp_debug_mode), - HCI_CC_STATUS(HCI_OP_LE_SET_EXT_SCAN_PARAMS, - hci_cc_le_set_ext_scan_param), - HCI_CC_STATUS(HCI_OP_LE_SET_EXT_SCAN_ENABLE, - hci_cc_le_set_ext_scan_enable), - HCI_CC_STATUS(HCI_OP_LE_SET_DEFAULT_PHY, hci_cc_le_set_default_phy), - HCI_CC(HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS, - hci_cc_le_read_num_adv_sets, - sizeof(struct hci_rp_le_read_num_supported_adv_sets)), - HCI_CC(HCI_OP_LE_SET_EXT_ADV_PARAMS, hci_cc_set_ext_adv_param, - sizeof(struct hci_rp_le_set_ext_adv_params)), - HCI_CC_STATUS(HCI_OP_LE_SET_EXT_ADV_ENABLE, - hci_cc_le_set_ext_adv_enable), - HCI_CC_STATUS(HCI_OP_LE_SET_ADV_SET_RAND_ADDR, - hci_cc_le_set_adv_set_random_addr), - HCI_CC_STATUS(HCI_OP_LE_REMOVE_ADV_SET, hci_cc_le_remove_adv_set), - HCI_CC_STATUS(HCI_OP_LE_CLEAR_ADV_SETS, hci_cc_le_clear_adv_sets), - HCI_CC(HCI_OP_LE_READ_TRANSMIT_POWER, hci_cc_le_read_transmit_power, - sizeof(struct hci_rp_le_read_transmit_power)), - HCI_CC_STATUS(HCI_OP_LE_SET_PRIVACY_MODE, hci_cc_le_set_privacy_mode) -}; - -static u8 hci_cc_func(struct hci_dev *hdev, const struct hci_cc *cc, - struct sk_buff *skb) -{ - void *data; - - if (skb->len < cc->min_len) { - bt_dev_err(hdev, "unexpected cc 0x%4.4x length: %u < %u", - cc->op, skb->len, cc->min_len); - return HCI_ERROR_UNSPECIFIED; - } - - /* Just warn if the length is over max_len size it still be possible to - * partially parse the cc so leave to callback to decide if that is - * acceptable. - */ - if (skb->len > cc->max_len) - bt_dev_warn(hdev, "unexpected cc 0x%4.4x length: %u > %u", - cc->op, skb->len, cc->max_len); - - data = hci_cc_skb_pull(hdev, skb, cc->op, cc->min_len); - if (!data) - return HCI_ERROR_UNSPECIFIED; - - return cc->func(hdev, data, skb); -} - -static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb, u16 *opcode, u8 *status, +static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, + u16 *opcode, u8 *status, hci_req_complete_t *req_complete, hci_req_complete_skb_t *req_complete_skb) { - struct hci_ev_cmd_complete *ev = data; - int i; + struct hci_ev_cmd_complete *ev = (void *) skb->data; *opcode = __le16_to_cpu(ev->opcode); + *status = skb->data[sizeof(*ev)]; - bt_dev_dbg(hdev, "opcode 0x%4.4x", *opcode); + skb_pull(skb, sizeof(*ev)); - for (i = 0; i < ARRAY_SIZE(hci_cc_table); i++) { - if (hci_cc_table[i].op == *opcode) { - *status = hci_cc_func(hdev, &hci_cc_table[i], skb); - break; - } + switch (*opcode) { + case HCI_OP_INQUIRY_CANCEL: + hci_cc_inquiry_cancel(hdev, skb, status); + break; + + case HCI_OP_PERIODIC_INQ: + hci_cc_periodic_inq(hdev, skb); + break; + + case HCI_OP_EXIT_PERIODIC_INQ: + hci_cc_exit_periodic_inq(hdev, skb); + break; + + case HCI_OP_REMOTE_NAME_REQ_CANCEL: + hci_cc_remote_name_req_cancel(hdev, skb); + break; + + case HCI_OP_ROLE_DISCOVERY: + hci_cc_role_discovery(hdev, skb); + break; + + case HCI_OP_READ_LINK_POLICY: + hci_cc_read_link_policy(hdev, skb); + break; + + case HCI_OP_WRITE_LINK_POLICY: + hci_cc_write_link_policy(hdev, skb); + break; + + case HCI_OP_READ_DEF_LINK_POLICY: + hci_cc_read_def_link_policy(hdev, skb); + break; + + case HCI_OP_WRITE_DEF_LINK_POLICY: + hci_cc_write_def_link_policy(hdev, skb); + break; + + case HCI_OP_RESET: + hci_cc_reset(hdev, skb); + break; + + case HCI_OP_READ_STORED_LINK_KEY: + hci_cc_read_stored_link_key(hdev, skb); + break; + + case HCI_OP_DELETE_STORED_LINK_KEY: + hci_cc_delete_stored_link_key(hdev, skb); + break; + + case HCI_OP_WRITE_LOCAL_NAME: + hci_cc_write_local_name(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_NAME: + hci_cc_read_local_name(hdev, skb); + break; + + case HCI_OP_WRITE_AUTH_ENABLE: + hci_cc_write_auth_enable(hdev, skb); + break; + + case HCI_OP_WRITE_ENCRYPT_MODE: + hci_cc_write_encrypt_mode(hdev, skb); + break; + + case HCI_OP_WRITE_SCAN_ENABLE: + hci_cc_write_scan_enable(hdev, skb); + break; + + case HCI_OP_SET_EVENT_FLT: + hci_cc_set_event_filter(hdev, skb); + break; + + case HCI_OP_READ_CLASS_OF_DEV: + hci_cc_read_class_of_dev(hdev, skb); + break; + + case HCI_OP_WRITE_CLASS_OF_DEV: + hci_cc_write_class_of_dev(hdev, skb); + break; + + case HCI_OP_READ_VOICE_SETTING: + hci_cc_read_voice_setting(hdev, skb); + break; + + case HCI_OP_WRITE_VOICE_SETTING: + hci_cc_write_voice_setting(hdev, skb); + break; + + case HCI_OP_READ_NUM_SUPPORTED_IAC: + hci_cc_read_num_supported_iac(hdev, skb); + break; + + case HCI_OP_WRITE_SSP_MODE: + hci_cc_write_ssp_mode(hdev, skb); + break; + + case HCI_OP_WRITE_SC_SUPPORT: + hci_cc_write_sc_support(hdev, skb); + break; + + case HCI_OP_READ_AUTH_PAYLOAD_TO: + hci_cc_read_auth_payload_timeout(hdev, skb); + break; + + case HCI_OP_WRITE_AUTH_PAYLOAD_TO: + hci_cc_write_auth_payload_timeout(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_VERSION: + hci_cc_read_local_version(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_COMMANDS: + hci_cc_read_local_commands(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_FEATURES: + hci_cc_read_local_features(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_EXT_FEATURES: + hci_cc_read_local_ext_features(hdev, skb); + break; + + case HCI_OP_READ_BUFFER_SIZE: + hci_cc_read_buffer_size(hdev, skb); + break; + + case HCI_OP_READ_BD_ADDR: + hci_cc_read_bd_addr(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_PAIRING_OPTS: + hci_cc_read_local_pairing_opts(hdev, skb); + break; + + case HCI_OP_READ_PAGE_SCAN_ACTIVITY: + hci_cc_read_page_scan_activity(hdev, skb); + break; + + case HCI_OP_WRITE_PAGE_SCAN_ACTIVITY: + hci_cc_write_page_scan_activity(hdev, skb); + break; + + case HCI_OP_READ_PAGE_SCAN_TYPE: + hci_cc_read_page_scan_type(hdev, skb); + break; + + case HCI_OP_WRITE_PAGE_SCAN_TYPE: + hci_cc_write_page_scan_type(hdev, skb); + break; + + case HCI_OP_READ_DATA_BLOCK_SIZE: + hci_cc_read_data_block_size(hdev, skb); + break; + + case HCI_OP_READ_FLOW_CONTROL_MODE: + hci_cc_read_flow_control_mode(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_AMP_INFO: + hci_cc_read_local_amp_info(hdev, skb); + break; + + case HCI_OP_READ_CLOCK: + hci_cc_read_clock(hdev, skb); + break; + + case HCI_OP_READ_INQ_RSP_TX_POWER: + hci_cc_read_inq_rsp_tx_power(hdev, skb); + break; + + case HCI_OP_READ_DEF_ERR_DATA_REPORTING: + hci_cc_read_def_err_data_reporting(hdev, skb); + break; + + case HCI_OP_WRITE_DEF_ERR_DATA_REPORTING: + hci_cc_write_def_err_data_reporting(hdev, skb); + break; + + case HCI_OP_PIN_CODE_REPLY: + hci_cc_pin_code_reply(hdev, skb); + break; + + case HCI_OP_PIN_CODE_NEG_REPLY: + hci_cc_pin_code_neg_reply(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_OOB_DATA: + hci_cc_read_local_oob_data(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_OOB_EXT_DATA: + hci_cc_read_local_oob_ext_data(hdev, skb); + break; + + case HCI_OP_LE_READ_BUFFER_SIZE: + hci_cc_le_read_buffer_size(hdev, skb); + break; + + case HCI_OP_LE_READ_LOCAL_FEATURES: + hci_cc_le_read_local_features(hdev, skb); + break; + + case HCI_OP_LE_READ_ADV_TX_POWER: + hci_cc_le_read_adv_tx_power(hdev, skb); + break; + + case HCI_OP_USER_CONFIRM_REPLY: + hci_cc_user_confirm_reply(hdev, skb); + break; + + case HCI_OP_USER_CONFIRM_NEG_REPLY: + hci_cc_user_confirm_neg_reply(hdev, skb); + break; + + case HCI_OP_USER_PASSKEY_REPLY: + hci_cc_user_passkey_reply(hdev, skb); + break; + + case HCI_OP_USER_PASSKEY_NEG_REPLY: + hci_cc_user_passkey_neg_reply(hdev, skb); + break; + + case HCI_OP_LE_SET_RANDOM_ADDR: + hci_cc_le_set_random_addr(hdev, skb); + break; + + case HCI_OP_LE_SET_ADV_ENABLE: + hci_cc_le_set_adv_enable(hdev, skb); + break; + + case HCI_OP_LE_SET_SCAN_PARAM: + hci_cc_le_set_scan_param(hdev, skb); + break; + + case HCI_OP_LE_SET_SCAN_ENABLE: + hci_cc_le_set_scan_enable(hdev, skb); + break; + + case HCI_OP_LE_READ_ACCEPT_LIST_SIZE: + hci_cc_le_read_accept_list_size(hdev, skb); + break; + + case HCI_OP_LE_CLEAR_ACCEPT_LIST: + hci_cc_le_clear_accept_list(hdev, skb); + break; + + case HCI_OP_LE_ADD_TO_ACCEPT_LIST: + hci_cc_le_add_to_accept_list(hdev, skb); + break; + + case HCI_OP_LE_DEL_FROM_ACCEPT_LIST: + hci_cc_le_del_from_accept_list(hdev, skb); + break; + + case HCI_OP_LE_READ_SUPPORTED_STATES: + hci_cc_le_read_supported_states(hdev, skb); + break; + + case HCI_OP_LE_READ_DEF_DATA_LEN: + hci_cc_le_read_def_data_len(hdev, skb); + break; + + case HCI_OP_LE_WRITE_DEF_DATA_LEN: + hci_cc_le_write_def_data_len(hdev, skb); + break; + + case HCI_OP_LE_ADD_TO_RESOLV_LIST: + hci_cc_le_add_to_resolv_list(hdev, skb); + break; + + case HCI_OP_LE_DEL_FROM_RESOLV_LIST: + hci_cc_le_del_from_resolv_list(hdev, skb); + break; + + case HCI_OP_LE_CLEAR_RESOLV_LIST: + hci_cc_le_clear_resolv_list(hdev, skb); + break; + + case HCI_OP_LE_READ_RESOLV_LIST_SIZE: + hci_cc_le_read_resolv_list_size(hdev, skb); + break; + + case HCI_OP_LE_SET_ADDR_RESOLV_ENABLE: + hci_cc_le_set_addr_resolution_enable(hdev, skb); + break; + + case HCI_OP_LE_READ_MAX_DATA_LEN: + hci_cc_le_read_max_data_len(hdev, skb); + break; + + case HCI_OP_WRITE_LE_HOST_SUPPORTED: + hci_cc_write_le_host_supported(hdev, skb); + break; + + case HCI_OP_LE_SET_ADV_PARAM: + hci_cc_set_adv_param(hdev, skb); + break; + + case HCI_OP_READ_RSSI: + hci_cc_read_rssi(hdev, skb); + break; + + case HCI_OP_READ_TX_POWER: + hci_cc_read_tx_power(hdev, skb); + break; + + case HCI_OP_WRITE_SSP_DEBUG_MODE: + hci_cc_write_ssp_debug_mode(hdev, skb); + break; + + case HCI_OP_LE_SET_EXT_SCAN_PARAMS: + hci_cc_le_set_ext_scan_param(hdev, skb); + break; + + case HCI_OP_LE_SET_EXT_SCAN_ENABLE: + hci_cc_le_set_ext_scan_enable(hdev, skb); + break; + + case HCI_OP_LE_SET_DEFAULT_PHY: + hci_cc_le_set_default_phy(hdev, skb); + break; + + case HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS: + hci_cc_le_read_num_adv_sets(hdev, skb); + break; + + case HCI_OP_LE_SET_EXT_ADV_PARAMS: + hci_cc_set_ext_adv_param(hdev, skb); + break; + + case HCI_OP_LE_SET_EXT_ADV_ENABLE: + hci_cc_le_set_ext_adv_enable(hdev, skb); + break; + + case HCI_OP_LE_SET_ADV_SET_RAND_ADDR: + hci_cc_le_set_adv_set_random_addr(hdev, skb); + break; + + case HCI_OP_LE_READ_TRANSMIT_POWER: + hci_cc_le_read_transmit_power(hdev, skb); + break; + + default: + BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode); + break; } handle_cmd_cnt_and_timer(hdev, ev->ncmd); @@ -3978,56 +3705,90 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data, queue_work(hdev->workqueue, &hdev->cmd_work); } -#define HCI_CS(_op, _func) \ -{ \ - .op = _op, \ - .func = _func, \ -} - -static const struct hci_cs { - u16 op; - void (*func)(struct hci_dev *hdev, __u8 status); -} hci_cs_table[] = { - HCI_CS(HCI_OP_INQUIRY, hci_cs_inquiry), - HCI_CS(HCI_OP_CREATE_CONN, hci_cs_create_conn), - HCI_CS(HCI_OP_DISCONNECT, hci_cs_disconnect), - HCI_CS(HCI_OP_ADD_SCO, hci_cs_add_sco), - HCI_CS(HCI_OP_AUTH_REQUESTED, hci_cs_auth_requested), - HCI_CS(HCI_OP_SET_CONN_ENCRYPT, hci_cs_set_conn_encrypt), - HCI_CS(HCI_OP_REMOTE_NAME_REQ, hci_cs_remote_name_req), - HCI_CS(HCI_OP_READ_REMOTE_FEATURES, hci_cs_read_remote_features), - HCI_CS(HCI_OP_READ_REMOTE_EXT_FEATURES, - hci_cs_read_remote_ext_features), - HCI_CS(HCI_OP_SETUP_SYNC_CONN, hci_cs_setup_sync_conn), - HCI_CS(HCI_OP_ENHANCED_SETUP_SYNC_CONN, - hci_cs_enhanced_setup_sync_conn), - HCI_CS(HCI_OP_SNIFF_MODE, hci_cs_sniff_mode), - HCI_CS(HCI_OP_EXIT_SNIFF_MODE, hci_cs_exit_sniff_mode), - HCI_CS(HCI_OP_SWITCH_ROLE, hci_cs_switch_role), - HCI_CS(HCI_OP_LE_CREATE_CONN, hci_cs_le_create_conn), - HCI_CS(HCI_OP_LE_READ_REMOTE_FEATURES, hci_cs_le_read_remote_features), - HCI_CS(HCI_OP_LE_START_ENC, hci_cs_le_start_enc), - HCI_CS(HCI_OP_LE_EXT_CREATE_CONN, hci_cs_le_ext_create_conn) -}; - -static void hci_cmd_status_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb, u16 *opcode, u8 *status, +static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb, + u16 *opcode, u8 *status, hci_req_complete_t *req_complete, hci_req_complete_skb_t *req_complete_skb) { - struct hci_ev_cmd_status *ev = data; - int i; + struct hci_ev_cmd_status *ev = (void *) skb->data; + + skb_pull(skb, sizeof(*ev)); *opcode = __le16_to_cpu(ev->opcode); *status = ev->status; - bt_dev_dbg(hdev, "opcode 0x%4.4x", *opcode); + switch (*opcode) { + case HCI_OP_INQUIRY: + hci_cs_inquiry(hdev, ev->status); + break; - for (i = 0; i < ARRAY_SIZE(hci_cs_table); i++) { - if (hci_cs_table[i].op == *opcode) { - hci_cs_table[i].func(hdev, ev->status); - break; - } + case HCI_OP_CREATE_CONN: + hci_cs_create_conn(hdev, ev->status); + break; + + case HCI_OP_DISCONNECT: + hci_cs_disconnect(hdev, ev->status); + break; + + case HCI_OP_ADD_SCO: + hci_cs_add_sco(hdev, ev->status); + break; + + case HCI_OP_AUTH_REQUESTED: + hci_cs_auth_requested(hdev, ev->status); + break; + + case HCI_OP_SET_CONN_ENCRYPT: + hci_cs_set_conn_encrypt(hdev, ev->status); + break; + + case HCI_OP_REMOTE_NAME_REQ: + hci_cs_remote_name_req(hdev, ev->status); + break; + + case HCI_OP_READ_REMOTE_FEATURES: + hci_cs_read_remote_features(hdev, ev->status); + break; + + case HCI_OP_READ_REMOTE_EXT_FEATURES: + hci_cs_read_remote_ext_features(hdev, ev->status); + break; + + case HCI_OP_SETUP_SYNC_CONN: + hci_cs_setup_sync_conn(hdev, ev->status); + break; + + case HCI_OP_SNIFF_MODE: + hci_cs_sniff_mode(hdev, ev->status); + break; + + case HCI_OP_EXIT_SNIFF_MODE: + hci_cs_exit_sniff_mode(hdev, ev->status); + break; + + case HCI_OP_SWITCH_ROLE: + hci_cs_switch_role(hdev, ev->status); + break; + + case HCI_OP_LE_CREATE_CONN: + hci_cs_le_create_conn(hdev, ev->status); + break; + + case HCI_OP_LE_READ_REMOTE_FEATURES: + hci_cs_le_read_remote_features(hdev, ev->status); + break; + + case HCI_OP_LE_START_ENC: + hci_cs_le_start_enc(hdev, ev->status); + break; + + case HCI_OP_LE_EXT_CREATE_CONN: + hci_cs_le_ext_create_conn(hdev, ev->status); + break; + + default: + BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode); + break; } handle_cmd_cnt_and_timer(hdev, ev->ncmd); @@ -4038,39 +3799,36 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, void *data, * (since for this kind of commands there will not be a command * complete event). */ - if (ev->status || (hdev->sent_cmd && !hci_skb_event(hdev->sent_cmd))) { + if (ev->status || + (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->hci.req_event)) hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, req_complete_skb); - if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { - bt_dev_err(hdev, "unexpected event for opcode 0x%4.4x", - *opcode); - return; - } + + if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { + bt_dev_err(hdev, + "unexpected event for opcode 0x%4.4x", *opcode); + return; } if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q)) queue_work(hdev->workqueue, &hdev->cmd_work); } -static void hci_hardware_error_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_hardware_error_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_hardware_error *ev = data; - - bt_dev_dbg(hdev, "code 0x%2.2x", ev->code); + struct hci_ev_hardware_error *ev = (void *) skb->data; hdev->hw_error_code = ev->code; queue_work(hdev->req_workqueue, &hdev->error_reset); } -static void hci_role_change_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_role_change *ev = data; + struct hci_ev_role_change *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); @@ -4087,24 +3845,25 @@ static void hci_role_change_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_num_comp_pkts *ev = data; + struct hci_ev_num_comp_pkts *ev = (void *) skb->data; int i; - if (!hci_ev_skb_pull(hdev, skb, HCI_EV_NUM_COMP_PKTS, - flex_array_size(ev, handles, ev->num))) - return; - if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) { bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode); return; } - bt_dev_dbg(hdev, "num %d", ev->num); + if (skb->len < sizeof(*ev) || + skb->len < struct_size(ev, handles, ev->num_hndl)) { + BT_DBG("%s bad parameters", hdev->name); + return; + } - for (i = 0; i < ev->num; i++) { + BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl); + + for (i = 0; i < ev->num_hndl; i++) { struct hci_comp_pkts_info *info = &ev->handles[i]; struct hci_conn *conn; __u16 handle, count; @@ -4174,24 +3933,24 @@ static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev, return NULL; } -static void hci_num_comp_blocks_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_num_comp_blocks *ev = data; + struct hci_ev_num_comp_blocks *ev = (void *) skb->data; int i; - if (!hci_ev_skb_pull(hdev, skb, HCI_EV_NUM_COMP_BLOCKS, - flex_array_size(ev, handles, ev->num_hndl))) - return; - if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) { - bt_dev_err(hdev, "wrong event for mode %d", - hdev->flow_ctl_mode); + bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode); return; } - bt_dev_dbg(hdev, "num_blocks %d num_hndl %d", ev->num_blocks, - ev->num_hndl); + if (skb->len < sizeof(*ev) || + skb->len < struct_size(ev, handles, ev->num_hndl)) { + BT_DBG("%s bad parameters", hdev->name); + return; + } + + BT_DBG("%s num_blocks %d num_hndl %d", hdev->name, ev->num_blocks, + ev->num_hndl); for (i = 0; i < ev->num_hndl; i++) { struct hci_comp_blocks_info *info = &ev->handles[i]; @@ -4225,13 +3984,12 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, void *data, queue_work(hdev->workqueue, &hdev->tx_work); } -static void hci_mode_change_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_mode_change *ev = data; + struct hci_ev_mode_change *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); @@ -4254,13 +4012,12 @@ static void hci_mode_change_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_pin_code_request_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_pin_code_req *ev = data; + struct hci_ev_pin_code_req *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); hci_dev_lock(hdev); @@ -4325,15 +4082,14 @@ static void conn_set_key(struct hci_conn *conn, u8 key_type, u8 pin_len) } } -static void hci_link_key_request_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_link_key_req *ev = data; + struct hci_ev_link_key_req *ev = (void *) skb->data; struct hci_cp_link_key_reply cp; struct hci_conn *conn; struct link_key *key; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); if (!hci_dev_test_flag(hdev, HCI_MGMT)) return; @@ -4342,11 +4098,13 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, void *data, key = hci_find_link_key(hdev, &ev->bdaddr); if (!key) { - bt_dev_dbg(hdev, "link key not found for %pMR", &ev->bdaddr); + BT_DBG("%s link key not found for %pMR", hdev->name, + &ev->bdaddr); goto not_found; } - bt_dev_dbg(hdev, "found key type %u for %pMR", key->type, &ev->bdaddr); + BT_DBG("%s found key type %u for %pMR", hdev->name, key->type, + &ev->bdaddr); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (conn) { @@ -4355,14 +4113,15 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, void *data, if ((key->type == HCI_LK_UNAUTH_COMBINATION_P192 || key->type == HCI_LK_UNAUTH_COMBINATION_P256) && conn->auth_type != 0xff && (conn->auth_type & 0x01)) { - bt_dev_dbg(hdev, "ignoring unauthenticated key"); + BT_DBG("%s ignoring unauthenticated key", hdev->name); goto not_found; } if (key->type == HCI_LK_COMBINATION && key->pin_len < 16 && (conn->pending_sec_level == BT_SECURITY_HIGH || conn->pending_sec_level == BT_SECURITY_FIPS)) { - bt_dev_dbg(hdev, "ignoring key unauthenticated for high security"); + BT_DBG("%s ignoring key unauthenticated for high security", + hdev->name); goto not_found; } @@ -4383,16 +4142,15 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_link_key_notify_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_link_key_notify *ev = data; + struct hci_ev_link_key_notify *ev = (void *) skb->data; struct hci_conn *conn; struct link_key *key; bool persistent; u8 pin_len = 0; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); hci_dev_lock(hdev); @@ -4444,13 +4202,12 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_clock_offset_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_clock_offset *ev = data; + struct hci_ev_clock_offset *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); @@ -4468,13 +4225,12 @@ static void hci_clock_offset_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_pkt_type_change_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_pkt_type_change_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_pkt_type_change *ev = data; + struct hci_ev_pkt_type_change *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); @@ -4485,13 +4241,12 @@ static void hci_pkt_type_change_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_pscan_rep_mode_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_pscan_rep_mode *ev = data; + struct hci_ev_pscan_rep_mode *ev = (void *) skb->data; struct inquiry_entry *ie; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); hci_dev_lock(hdev); @@ -4504,16 +4259,15 @@ static void hci_pscan_rep_mode_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, +static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_inquiry_result_rssi *ev = edata; struct inquiry_data data; - int i; + int num_rsp = *((__u8 *) skb->data); - bt_dev_dbg(hdev, "num_rsp %d", ev->num); + BT_DBG("%s num_rsp %d", hdev->name, num_rsp); - if (!ev->num) + if (!num_rsp) return; if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) @@ -4521,22 +4275,16 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, hci_dev_lock(hdev); - if (skb->len == array_size(ev->num, - sizeof(struct inquiry_info_rssi_pscan))) { - struct inquiry_info_rssi_pscan *info; + if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { + struct inquiry_info_with_rssi_and_pscan_mode *info; + info = (void *) (skb->data + 1); - for (i = 0; i < ev->num; i++) { + if (skb->len < num_rsp * sizeof(*info) + 1) + goto unlock; + + for (; num_rsp; num_rsp--, info++) { u32 flags; - info = hci_ev_skb_pull(hdev, skb, - HCI_EV_INQUIRY_RESULT_WITH_RSSI, - sizeof(*info)); - if (!info) { - bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", - HCI_EV_INQUIRY_RESULT_WITH_RSSI); - return; - } - bacpy(&data.bdaddr, &info->bdaddr); data.pscan_rep_mode = info->pscan_rep_mode; data.pscan_period_mode = info->pscan_period_mode; @@ -4552,22 +4300,15 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, info->dev_class, info->rssi, flags, NULL, 0, NULL, 0); } - } else if (skb->len == array_size(ev->num, - sizeof(struct inquiry_info_rssi))) { - struct inquiry_info_rssi *info; + } else { + struct inquiry_info_with_rssi *info = (void *) (skb->data + 1); - for (i = 0; i < ev->num; i++) { + if (skb->len < num_rsp * sizeof(*info) + 1) + goto unlock; + + for (; num_rsp; num_rsp--, info++) { u32 flags; - info = hci_ev_skb_pull(hdev, skb, - HCI_EV_INQUIRY_RESULT_WITH_RSSI, - sizeof(*info)); - if (!info) { - bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", - HCI_EV_INQUIRY_RESULT_WITH_RSSI); - return; - } - bacpy(&data.bdaddr, &info->bdaddr); data.pscan_rep_mode = info->pscan_rep_mode; data.pscan_period_mode = info->pscan_period_mode; @@ -4583,21 +4324,19 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, info->dev_class, info->rssi, flags, NULL, 0, NULL, 0); } - } else { - bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", - HCI_EV_INQUIRY_RESULT_WITH_RSSI); } +unlock: hci_dev_unlock(hdev); } -static void hci_remote_ext_features_evt(struct hci_dev *hdev, void *data, +static void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_remote_ext_features *ev = data; + struct hci_ev_remote_ext_features *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s", hdev->name); hci_dev_lock(hdev); @@ -4655,13 +4394,13 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, +static void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_sync_conn_complete *ev = data; + struct hci_ev_sync_conn_complete *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); @@ -4731,18 +4470,16 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, } bt_dev_dbg(hdev, "SCO connected with air mode: %02x", ev->air_mode); - /* Notify only in case of SCO over HCI transport data path which - * is zero and non-zero value shall be non-HCI transport data path - */ - if (conn->codec.data_path == 0 && hdev->notify) { - switch (ev->air_mode) { - case 0x02: + + switch (ev->air_mode) { + case 0x02: + if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_CVSD); - break; - case 0x03: + break; + case 0x03: + if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_TRANSP); - break; - } + break; } hci_connect_cfm(conn, ev->status); @@ -4770,21 +4507,17 @@ static inline size_t eir_get_length(u8 *eir, size_t eir_len) return eir_len; } -static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, void *edata, +static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_ext_inquiry_result *ev = edata; struct inquiry_data data; + struct extended_inquiry_info *info = (void *) (skb->data + 1); + int num_rsp = *((__u8 *) skb->data); size_t eir_len; - int i; - if (!hci_ev_skb_pull(hdev, skb, HCI_EV_EXTENDED_INQUIRY_RESULT, - flex_array_size(ev, info, ev->num))) - return; + BT_DBG("%s num_rsp %d", hdev->name, num_rsp); - bt_dev_dbg(hdev, "num %d", ev->num); - - if (!ev->num) + if (!num_rsp || skb->len < num_rsp * sizeof(*info) + 1) return; if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) @@ -4792,8 +4525,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, void *edata, hci_dev_lock(hdev); - for (i = 0; i < ev->num; i++) { - struct extended_inquiry_info *info = &ev->info[i]; + for (; num_rsp; num_rsp--, info++) { u32 flags; bool name_known; @@ -4825,14 +4557,14 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, void *edata, hci_dev_unlock(hdev); } -static void hci_key_refresh_complete_evt(struct hci_dev *hdev, void *data, +static void hci_key_refresh_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_key_refresh_complete *ev = data; + struct hci_ev_key_refresh_complete *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x handle 0x%4.4x", ev->status, - __le16_to_cpu(ev->handle)); + BT_DBG("%s status 0x%2.2x handle 0x%4.4x", hdev->name, ev->status, + __le16_to_cpu(ev->handle)); hci_dev_lock(hdev); @@ -4935,13 +4667,12 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) return 0x01; } -static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_io_capa_request *ev = data; + struct hci_ev_io_capa_request *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); hci_dev_lock(hdev); @@ -5005,13 +4736,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_io_capa_reply_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_io_capa_reply_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_io_capa_reply *ev = data; + struct hci_ev_io_capa_reply *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); hci_dev_lock(hdev); @@ -5026,14 +4756,14 @@ static void hci_io_capa_reply_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_user_confirm_request_evt(struct hci_dev *hdev, void *data, +static void hci_user_confirm_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_user_confirm_req *ev = data; + struct hci_ev_user_confirm_req *ev = (void *) skb->data; int loc_mitm, rem_mitm, confirm_hint = 0; struct hci_conn *conn; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); hci_dev_lock(hdev); @@ -5054,7 +4784,7 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, void *data, */ if (conn->pending_sec_level > BT_SECURITY_MEDIUM && conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) { - bt_dev_dbg(hdev, "Rejecting request: remote device can't provide MITM"); + BT_DBG("Rejecting request: remote device can't provide MITM"); hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY, sizeof(ev->bdaddr), &ev->bdaddr); goto unlock; @@ -5073,7 +4803,7 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, void *data, if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && conn->io_capability != HCI_IO_NO_INPUT_OUTPUT && (loc_mitm || rem_mitm)) { - bt_dev_dbg(hdev, "Confirming auto-accept as acceptor"); + BT_DBG("Confirming auto-accept as acceptor"); confirm_hint = 1; goto confirm; } @@ -5111,24 +4841,24 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_user_passkey_request_evt(struct hci_dev *hdev, void *data, +static void hci_user_passkey_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_user_passkey_req *ev = data; + struct hci_ev_user_passkey_req *ev = (void *) skb->data; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_request(hdev, &ev->bdaddr, ACL_LINK, 0); } -static void hci_user_passkey_notify_evt(struct hci_dev *hdev, void *data, +static void hci_user_passkey_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_user_passkey_notify *ev = data; + struct hci_ev_user_passkey_notify *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (!conn) @@ -5143,13 +4873,12 @@ static void hci_user_passkey_notify_evt(struct hci_dev *hdev, void *data, conn->passkey_entered); } -static void hci_keypress_notify_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_keypress_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_keypress_notify *ev = data; + struct hci_ev_keypress_notify *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (!conn) @@ -5182,13 +4911,13 @@ static void hci_keypress_notify_evt(struct hci_dev *hdev, void *data, conn->passkey_entered); } -static void hci_simple_pair_complete_evt(struct hci_dev *hdev, void *data, +static void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_simple_pair_complete *ev = data; + struct hci_ev_simple_pair_complete *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); hci_dev_lock(hdev); @@ -5213,14 +4942,14 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_remote_host_features_evt(struct hci_dev *hdev, void *data, +static void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_remote_host_features *ev = data; + struct hci_ev_remote_host_features *ev = (void *) skb->data; struct inquiry_entry *ie; struct hci_conn *conn; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); hci_dev_lock(hdev); @@ -5235,13 +4964,13 @@ static void hci_remote_host_features_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_remote_oob_data_request_evt(struct hci_dev *hdev, void *edata, +static void hci_remote_oob_data_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_remote_oob_data_request *ev = edata; + struct hci_ev_remote_oob_data_request *ev = (void *) skb->data; struct oob_data *data; - bt_dev_dbg(hdev, ""); + BT_DBG("%s", hdev->name); hci_dev_lock(hdev); @@ -5290,13 +5019,14 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev, void *edata, } #if IS_ENABLED(CONFIG_BT_HS) -static void hci_chan_selected_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_chan_selected_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_channel_selected *ev = data; + struct hci_ev_channel_selected *ev = (void *)skb->data; struct hci_conn *hcon; - bt_dev_dbg(hdev, "handle 0x%2.2x", ev->phy_handle); + BT_DBG("%s handle 0x%2.2x", hdev->name, ev->phy_handle); + + skb_pull(skb, sizeof(*ev)); hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); if (!hcon) @@ -5305,14 +5035,14 @@ static void hci_chan_selected_evt(struct hci_dev *hdev, void *data, amp_read_loc_assoc_final_data(hdev, hcon); } -static void hci_phy_link_complete_evt(struct hci_dev *hdev, void *data, +static void hci_phy_link_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_phy_link_complete *ev = data; + struct hci_ev_phy_link_complete *ev = (void *) skb->data; struct hci_conn *hcon, *bredr_hcon; - bt_dev_dbg(hdev, "handle 0x%2.2x status 0x%2.2x", ev->phy_handle, - ev->status); + BT_DBG("%s handle 0x%2.2x status 0x%2.2x", hdev->name, ev->phy_handle, + ev->status); hci_dev_lock(hdev); @@ -5346,16 +5076,16 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_loglink_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_loglink_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_logical_link_complete *ev = data; + struct hci_ev_logical_link_complete *ev = (void *) skb->data; struct hci_conn *hcon; struct hci_chan *hchan; struct amp_mgr *mgr; - bt_dev_dbg(hdev, "log_handle 0x%4.4x phy_handle 0x%2.2x status 0x%2.2x", - le16_to_cpu(ev->handle), ev->phy_handle, ev->status); + BT_DBG("%s log_handle 0x%4.4x phy_handle 0x%2.2x status 0x%2.2x", + hdev->name, le16_to_cpu(ev->handle), ev->phy_handle, + ev->status); hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); if (!hcon) @@ -5385,14 +5115,14 @@ static void hci_loglink_complete_evt(struct hci_dev *hdev, void *data, } } -static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev, void *data, +static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_disconn_logical_link_complete *ev = data; + struct hci_ev_disconn_logical_link_complete *ev = (void *) skb->data; struct hci_chan *hchan; - bt_dev_dbg(hdev, "handle 0x%4.4x status 0x%2.2x", - le16_to_cpu(ev->handle), ev->status); + BT_DBG("%s log handle 0x%4.4x status 0x%2.2x", hdev->name, + le16_to_cpu(ev->handle), ev->status); if (ev->status) return; @@ -5409,13 +5139,13 @@ static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, void *data, +static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_disconn_phy_link_complete *ev = data; + struct hci_ev_disconn_phy_link_complete *ev = (void *) skb->data; struct hci_conn *hcon; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); if (ev->status) return; @@ -5554,7 +5284,22 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn->dst_type = irk->addr_type; } - conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL); + /* When using controller based address resolution, then the new + * address types 0x02 and 0x03 are used. These types need to be + * converted back into either public address or random address type + */ + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && + hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) { + switch (conn->dst_type) { + case ADDR_LE_DEV_PUBLIC_RESOLVED: + conn->dst_type = ADDR_LE_DEV_PUBLIC; + break; + case ADDR_LE_DEV_RANDOM_RESOLVED: + conn->dst_type = ADDR_LE_DEV_RANDOM; + break; + } + } if (status) { hci_le_conn_failed(conn, status); @@ -5629,16 +5374,15 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, } unlock: - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); hci_dev_unlock(hdev); } -static void hci_le_conn_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_conn_complete *ev = data; + struct hci_ev_le_conn_complete *ev = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type, NULL, ev->role, le16_to_cpu(ev->handle), @@ -5647,43 +5391,35 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, void *data, le16_to_cpu(ev->supervision_timeout)); } -static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev, void *data, +static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_enh_conn_complete *ev = data; + struct hci_ev_le_enh_conn_complete *ev = (void *) skb->data; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type, &ev->local_rpa, ev->role, le16_to_cpu(ev->handle), le16_to_cpu(ev->interval), le16_to_cpu(ev->latency), le16_to_cpu(ev->supervision_timeout)); + + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && + hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) + hci_req_disable_address_resolution(hdev); } -static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_evt_le_ext_adv_set_term *ev = data; + struct hci_evt_le_ext_adv_set_term *ev = (void *) skb->data; struct hci_conn *conn; - struct adv_info *adv, *n; + struct adv_info *adv; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); adv = hci_find_adv_instance(hdev, ev->handle); - /* The Bluetooth Core 5.3 specification clearly states that this event - * shall not be sent when the Host disables the advertising set. So in - * case of HCI_ERROR_CANCELLED_BY_HOST, just ignore the event. - * - * When the Host disables an advertising set, all cleanup is done via - * its command callback and not needed to be duplicated here. - */ - if (ev->status == HCI_ERROR_CANCELLED_BY_HOST) { - bt_dev_warn_ratelimited(hdev, "Unexpected advertising set terminated event"); - return; - } - if (ev->status) { if (!adv) return; @@ -5692,13 +5428,6 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data, hci_remove_adv_instance(hdev, ev->handle); mgmt_advertising_removed(NULL, hdev, ev->handle); - list_for_each_entry_safe(adv, n, &hdev->adv_instances, list) { - if (adv->enabled) - return; - } - - /* We are no longer advertising, clear HCI_LE_ADV */ - hci_dev_clear_flag(hdev, HCI_LE_ADV); return; } @@ -5726,13 +5455,13 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data, } } -static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, void *data, +static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_conn_update_complete *ev = data; + struct hci_ev_le_conn_update_complete *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); if (ev->status) return; @@ -5752,8 +5481,8 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, void *data, /* This function requires the caller holds hdev->lock */ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr, - u8 addr_type, bool addr_resolved, - u8 adv_type) + u8 addr_type, u8 adv_type, + bdaddr_t *direct_rpa) { struct hci_conn *conn; struct hci_conn_params *params; @@ -5762,9 +5491,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, if (adv_type != LE_ADV_IND && adv_type != LE_ADV_DIRECT_IND) return NULL; - /* Ignore if the device is blocked or hdev is suspended */ - if (hci_bdaddr_list_lookup(&hdev->reject_list, addr, addr_type) || - hdev->suspended) + /* Ignore if the device is blocked */ + if (hci_bdaddr_list_lookup(&hdev->reject_list, addr, addr_type)) return NULL; /* Most controller will fail if we try to create new connections @@ -5806,9 +5534,9 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, } } - conn = hci_connect_le(hdev, addr, addr_type, addr_resolved, - BT_SECURITY_LOW, hdev->def_le_autoconnect_timeout, - HCI_ROLE_MASTER); + conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW, + hdev->def_le_autoconnect_timeout, HCI_ROLE_MASTER, + direct_rpa); if (!IS_ERR(conn)) { /* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned * by higher layer that tried to connect, if no then @@ -5849,7 +5577,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, struct discovery_state *d = &hdev->discovery; struct smp_irk *irk; struct hci_conn *conn; - bool match, bdaddr_resolved; + bool match; u32 flags; u8 *ptr; @@ -5893,9 +5621,6 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * controller address. */ if (direct_addr) { - direct_addr_type = ev_bdaddr_type(hdev, direct_addr_type, - &bdaddr_resolved); - /* Only resolvable random addresses are valid for these * kind of reports and others can be ignored. */ @@ -5923,15 +5648,13 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, bdaddr_type = irk->addr_type; } - bdaddr_type = ev_bdaddr_type(hdev, bdaddr_type, &bdaddr_resolved); - /* Check if we have been requested to connect to this device. * * direct_addr is set only for directed advertising reports (it is NULL * for advertising reports) and is already verified to be RPA above. */ - conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved, - type); + conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type, + direct_addr); if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) { /* Store report for later inclusion by * mgmt_device_connected @@ -6048,38 +5771,33 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, clear_pending_adv_report(hdev); } -static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_advertising_report *ev = data; - - if (!ev->num) - return; + u8 num_reports = skb->data[0]; + void *ptr = &skb->data[1]; hci_dev_lock(hdev); - while (ev->num--) { - struct hci_ev_le_advertising_info *info; + while (num_reports--) { + struct hci_ev_le_advertising_info *ev = ptr; s8 rssi; - info = hci_le_ev_skb_pull(hdev, skb, - HCI_EV_LE_ADVERTISING_REPORT, - sizeof(*info)); - if (!info) + if (ptr > (void *)skb_tail_pointer(skb) - sizeof(*ev)) { + bt_dev_err(hdev, "Malicious advertising data."); break; + } - if (!hci_le_ev_skb_pull(hdev, skb, HCI_EV_LE_ADVERTISING_REPORT, - info->length + 1)) - break; - - if (info->length <= HCI_MAX_AD_LENGTH) { - rssi = info->data[info->length]; - process_adv_report(hdev, info->type, &info->bdaddr, - info->bdaddr_type, NULL, 0, rssi, - info->data, info->length, false); + if (ev->length <= HCI_MAX_AD_LENGTH && + ev->data + ev->length <= skb_tail_pointer(skb)) { + rssi = ev->data[ev->length]; + process_adv_report(hdev, ev->evt_type, &ev->bdaddr, + ev->bdaddr_type, NULL, 0, rssi, + ev->data, ev->length, false); } else { bt_dev_err(hdev, "Dropping invalid advertising data"); } + + ptr += sizeof(*ev) + ev->length + 1; } hci_dev_unlock(hdev); @@ -6129,50 +5847,40 @@ static u8 ext_evt_type_to_legacy(struct hci_dev *hdev, u16 evt_type) return LE_ADV_INVALID; } -static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_ext_adv_report *ev = data; - - if (!ev->num) - return; + u8 num_reports = skb->data[0]; + void *ptr = &skb->data[1]; hci_dev_lock(hdev); - while (ev->num--) { - struct hci_ev_le_ext_adv_info *info; + while (num_reports--) { + struct hci_ev_le_ext_adv_report *ev = ptr; u8 legacy_evt_type; u16 evt_type; - info = hci_le_ev_skb_pull(hdev, skb, HCI_EV_LE_EXT_ADV_REPORT, - sizeof(*info)); - if (!info) - break; - - if (!hci_le_ev_skb_pull(hdev, skb, HCI_EV_LE_EXT_ADV_REPORT, - info->length)) - break; - - evt_type = __le16_to_cpu(info->type); + evt_type = __le16_to_cpu(ev->evt_type); legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type); if (legacy_evt_type != LE_ADV_INVALID) { - process_adv_report(hdev, legacy_evt_type, &info->bdaddr, - info->bdaddr_type, NULL, 0, - info->rssi, info->data, info->length, + process_adv_report(hdev, legacy_evt_type, &ev->bdaddr, + ev->bdaddr_type, NULL, 0, ev->rssi, + ev->data, ev->length, !(evt_type & LE_EXT_ADV_LEGACY_PDU)); } + + ptr += sizeof(*ev) + ev->length; } hci_dev_unlock(hdev); } -static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev, void *data, +static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_remote_feat_complete *ev = data; + struct hci_ev_le_remote_feat_complete *ev = (void *)skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); @@ -6208,16 +5916,15 @@ static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -static void hci_le_ltk_request_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_ltk_req *ev = data; + struct hci_ev_le_ltk_req *ev = (void *) skb->data; struct hci_cp_le_ltk_reply cp; struct hci_cp_le_ltk_neg_reply neg; struct hci_conn *conn; struct smp_ltk *ltk; - bt_dev_dbg(hdev, "handle 0x%4.4x", __le16_to_cpu(ev->handle)); + BT_DBG("%s handle 0x%4.4x", hdev->name, __le16_to_cpu(ev->handle)); hci_dev_lock(hdev); @@ -6285,16 +5992,14 @@ static void send_conn_param_neg_reply(struct hci_dev *hdev, u16 handle, &cp); } -static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, +static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_remote_conn_param_req *ev = data; + struct hci_ev_le_remote_conn_param_req *ev = (void *) skb->data; struct hci_cp_le_conn_param_req_reply cp; struct hci_conn *hcon; u16 handle, min, max, latency, timeout; - bt_dev_dbg(hdev, "handle 0x%4.4x", __le16_to_cpu(ev->handle)); - handle = le16_to_cpu(ev->handle); min = le16_to_cpu(ev->interval_min); max = le16_to_cpu(ev->interval_max); @@ -6345,40 +6050,32 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, hci_send_cmd(hdev, HCI_OP_LE_CONN_PARAM_REQ_REPLY, sizeof(cp), &cp); } -static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, +static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_direct_adv_report *ev = data; - int i; + u8 num_reports = skb->data[0]; + struct hci_ev_le_direct_adv_info *ev = (void *)&skb->data[1]; - if (!hci_le_ev_skb_pull(hdev, skb, HCI_EV_LE_DIRECT_ADV_REPORT, - flex_array_size(ev, info, ev->num))) - return; - - if (!ev->num) + if (!num_reports || skb->len < num_reports * sizeof(*ev) + 1) return; hci_dev_lock(hdev); - for (i = 0; i < ev->num; i++) { - struct hci_ev_le_direct_adv_info *info = &ev->info[i]; - - process_adv_report(hdev, info->type, &info->bdaddr, - info->bdaddr_type, &info->direct_addr, - info->direct_addr_type, info->rssi, NULL, 0, + for (; num_reports; num_reports--, ev++) + process_adv_report(hdev, ev->evt_type, &ev->bdaddr, + ev->bdaddr_type, &ev->direct_addr, + ev->direct_addr_type, ev->rssi, NULL, 0, false); - } hci_dev_unlock(hdev); } -static void hci_le_phy_update_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) +static void hci_le_phy_update_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_phy_update_complete *ev = data; + struct hci_ev_le_phy_update_complete *ev = (void *) skb->data; struct hci_conn *conn; - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); if (ev->status) return; @@ -6396,113 +6093,60 @@ static void hci_le_phy_update_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } -#define HCI_LE_EV_VL(_op, _func, _min_len, _max_len) \ -[_op] = { \ - .func = _func, \ - .min_len = _min_len, \ - .max_len = _max_len, \ -} - -#define HCI_LE_EV(_op, _func, _len) \ - HCI_LE_EV_VL(_op, _func, _len, _len) - -#define HCI_LE_EV_STATUS(_op, _func) \ - HCI_LE_EV(_op, _func, sizeof(struct hci_ev_status)) - -/* Entries in this table shall have their position according to the subevent - * opcode they handle so the use of the macros above is recommend since it does - * attempt to initialize at its proper index using Designated Initializers that - * way events without a callback function can be ommited. - */ -static const struct hci_le_ev { - void (*func)(struct hci_dev *hdev, void *data, struct sk_buff *skb); - u16 min_len; - u16 max_len; -} hci_le_ev_table[U8_MAX + 1] = { - /* [0x01 = HCI_EV_LE_CONN_COMPLETE] */ - HCI_LE_EV(HCI_EV_LE_CONN_COMPLETE, hci_le_conn_complete_evt, - sizeof(struct hci_ev_le_conn_complete)), - /* [0x02 = HCI_EV_LE_ADVERTISING_REPORT] */ - HCI_LE_EV_VL(HCI_EV_LE_ADVERTISING_REPORT, hci_le_adv_report_evt, - sizeof(struct hci_ev_le_advertising_report), - HCI_MAX_EVENT_SIZE), - /* [0x03 = HCI_EV_LE_CONN_UPDATE_COMPLETE] */ - HCI_LE_EV(HCI_EV_LE_CONN_UPDATE_COMPLETE, - hci_le_conn_update_complete_evt, - sizeof(struct hci_ev_le_conn_update_complete)), - /* [0x04 = HCI_EV_LE_REMOTE_FEAT_COMPLETE] */ - HCI_LE_EV(HCI_EV_LE_REMOTE_FEAT_COMPLETE, - hci_le_remote_feat_complete_evt, - sizeof(struct hci_ev_le_remote_feat_complete)), - /* [0x05 = HCI_EV_LE_LTK_REQ] */ - HCI_LE_EV(HCI_EV_LE_LTK_REQ, hci_le_ltk_request_evt, - sizeof(struct hci_ev_le_ltk_req)), - /* [0x06 = HCI_EV_LE_REMOTE_CONN_PARAM_REQ] */ - HCI_LE_EV(HCI_EV_LE_REMOTE_CONN_PARAM_REQ, - hci_le_remote_conn_param_req_evt, - sizeof(struct hci_ev_le_remote_conn_param_req)), - /* [0x0a = HCI_EV_LE_ENHANCED_CONN_COMPLETE] */ - HCI_LE_EV(HCI_EV_LE_ENHANCED_CONN_COMPLETE, - hci_le_enh_conn_complete_evt, - sizeof(struct hci_ev_le_enh_conn_complete)), - /* [0x0b = HCI_EV_LE_DIRECT_ADV_REPORT] */ - HCI_LE_EV_VL(HCI_EV_LE_DIRECT_ADV_REPORT, hci_le_direct_adv_report_evt, - sizeof(struct hci_ev_le_direct_adv_report), - HCI_MAX_EVENT_SIZE), - /* [0x0c = HCI_EV_LE_PHY_UPDATE_COMPLETE] */ - HCI_LE_EV(HCI_EV_LE_PHY_UPDATE_COMPLETE, hci_le_phy_update_evt, - sizeof(struct hci_ev_le_phy_update_complete)), - /* [0x0d = HCI_EV_LE_EXT_ADV_REPORT] */ - HCI_LE_EV_VL(HCI_EV_LE_EXT_ADV_REPORT, hci_le_ext_adv_report_evt, - sizeof(struct hci_ev_le_ext_adv_report), - HCI_MAX_EVENT_SIZE), - /* [0x12 = HCI_EV_LE_EXT_ADV_SET_TERM] */ - HCI_LE_EV(HCI_EV_LE_EXT_ADV_SET_TERM, hci_le_ext_adv_term_evt, - sizeof(struct hci_evt_le_ext_adv_set_term)), -}; - -static void hci_le_meta_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb, u16 *opcode, u8 *status, - hci_req_complete_t *req_complete, - hci_req_complete_skb_t *req_complete_skb) +static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_meta *ev = data; - const struct hci_le_ev *subev; + struct hci_ev_le_meta *le_ev = (void *) skb->data; - bt_dev_dbg(hdev, "subevent 0x%2.2x", ev->subevent); + skb_pull(skb, sizeof(*le_ev)); - /* Only match event if command OGF is for LE */ - if (hdev->sent_cmd && - hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) == 0x08 && - hci_skb_event(hdev->sent_cmd) == ev->subevent) { - *opcode = hci_skb_opcode(hdev->sent_cmd); - hci_req_cmd_complete(hdev, *opcode, 0x00, req_complete, - req_complete_skb); + switch (le_ev->subevent) { + case HCI_EV_LE_CONN_COMPLETE: + hci_le_conn_complete_evt(hdev, skb); + break; + + case HCI_EV_LE_CONN_UPDATE_COMPLETE: + hci_le_conn_update_complete_evt(hdev, skb); + break; + + case HCI_EV_LE_ADVERTISING_REPORT: + hci_le_adv_report_evt(hdev, skb); + break; + + case HCI_EV_LE_REMOTE_FEAT_COMPLETE: + hci_le_remote_feat_complete_evt(hdev, skb); + break; + + case HCI_EV_LE_LTK_REQ: + hci_le_ltk_request_evt(hdev, skb); + break; + + case HCI_EV_LE_REMOTE_CONN_PARAM_REQ: + hci_le_remote_conn_param_req_evt(hdev, skb); + break; + + case HCI_EV_LE_DIRECT_ADV_REPORT: + hci_le_direct_adv_report_evt(hdev, skb); + break; + + case HCI_EV_LE_PHY_UPDATE_COMPLETE: + hci_le_phy_update_evt(hdev, skb); + break; + + case HCI_EV_LE_EXT_ADV_REPORT: + hci_le_ext_adv_report_evt(hdev, skb); + break; + + case HCI_EV_LE_ENHANCED_CONN_COMPLETE: + hci_le_enh_conn_complete_evt(hdev, skb); + break; + + case HCI_EV_LE_EXT_ADV_SET_TERM: + hci_le_ext_adv_term_evt(hdev, skb); + break; + + default: + break; } - - subev = &hci_le_ev_table[ev->subevent]; - if (!subev->func) - return; - - if (skb->len < subev->min_len) { - bt_dev_err(hdev, "unexpected subevent 0x%2.2x length: %u < %u", - ev->subevent, skb->len, subev->min_len); - return; - } - - /* Just warn if the length is over max_len size it still be - * possible to partially parse the event so leave to callback to - * decide if that is acceptable. - */ - if (skb->len > subev->max_len) - bt_dev_warn(hdev, "unexpected subevent 0x%2.2x length: %u > %u", - ev->subevent, skb->len, subev->max_len); - - data = hci_le_ev_skb_pull(hdev, skb, ev->subevent, subev->min_len); - if (!data) - return; - - subev->func(hdev, data, skb); } static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, @@ -6514,9 +6158,13 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, if (!skb) return false; - hdr = hci_ev_skb_pull(hdev, skb, event, sizeof(*hdr)); - if (!hdr) + if (skb->len < sizeof(*hdr)) { + bt_dev_err(hdev, "too short HCI event"); return false; + } + + hdr = (void *) skb->data; + skb_pull(skb, HCI_EVENT_HDR_SIZE); if (event) { if (hdr->evt != event) @@ -6536,9 +6184,13 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, return false; } - ev = hci_cc_skb_pull(hdev, skb, opcode, sizeof(*ev)); - if (!ev) + if (skb->len < sizeof(*ev)) { + bt_dev_err(hdev, "too short cmd_complete event"); return false; + } + + ev = (void *) skb->data; + skb_pull(skb, sizeof(*ev)); if (opcode != __le16_to_cpu(ev->opcode)) { BT_DBG("opcode doesn't match (0x%2.2x != 0x%2.2x)", opcode, @@ -6554,7 +6206,7 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, { struct hci_ev_le_advertising_info *adv; struct hci_ev_le_direct_adv_info *direct_adv; - struct hci_ev_le_ext_adv_info *ext_adv; + struct hci_ev_le_ext_adv_report *ext_adv; const struct hci_ev_conn_complete *conn_complete = (void *)skb->data; const struct hci_ev_conn_request *conn_request = (void *)skb->data; @@ -6618,252 +6270,25 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, hci_dev_unlock(hdev); } -#define HCI_EV_VL(_op, _func, _min_len, _max_len) \ -[_op] = { \ - .req = false, \ - .func = _func, \ - .min_len = _min_len, \ - .max_len = _max_len, \ -} - -#define HCI_EV(_op, _func, _len) \ - HCI_EV_VL(_op, _func, _len, _len) - -#define HCI_EV_STATUS(_op, _func) \ - HCI_EV(_op, _func, sizeof(struct hci_ev_status)) - -#define HCI_EV_REQ_VL(_op, _func, _min_len, _max_len) \ -[_op] = { \ - .req = true, \ - .func_req = _func, \ - .min_len = _min_len, \ - .max_len = _max_len, \ -} - -#define HCI_EV_REQ(_op, _func, _len) \ - HCI_EV_REQ_VL(_op, _func, _len, _len) - -/* Entries in this table shall have their position according to the event opcode - * they handle so the use of the macros above is recommend since it does attempt - * to initialize at its proper index using Designated Initializers that way - * events without a callback function don't have entered. - */ -static const struct hci_ev { - bool req; - union { - void (*func)(struct hci_dev *hdev, void *data, - struct sk_buff *skb); - void (*func_req)(struct hci_dev *hdev, void *data, - struct sk_buff *skb, u16 *opcode, u8 *status, - hci_req_complete_t *req_complete, - hci_req_complete_skb_t *req_complete_skb); - }; - u16 min_len; - u16 max_len; -} hci_ev_table[U8_MAX + 1] = { - /* [0x01 = HCI_EV_INQUIRY_COMPLETE] */ - HCI_EV_STATUS(HCI_EV_INQUIRY_COMPLETE, hci_inquiry_complete_evt), - /* [0x02 = HCI_EV_INQUIRY_RESULT] */ - HCI_EV_VL(HCI_EV_INQUIRY_RESULT, hci_inquiry_result_evt, - sizeof(struct hci_ev_inquiry_result), HCI_MAX_EVENT_SIZE), - /* [0x03 = HCI_EV_CONN_COMPLETE] */ - HCI_EV(HCI_EV_CONN_COMPLETE, hci_conn_complete_evt, - sizeof(struct hci_ev_conn_complete)), - /* [0x04 = HCI_EV_CONN_REQUEST] */ - HCI_EV(HCI_EV_CONN_REQUEST, hci_conn_request_evt, - sizeof(struct hci_ev_conn_request)), - /* [0x05 = HCI_EV_DISCONN_COMPLETE] */ - HCI_EV(HCI_EV_DISCONN_COMPLETE, hci_disconn_complete_evt, - sizeof(struct hci_ev_disconn_complete)), - /* [0x06 = HCI_EV_AUTH_COMPLETE] */ - HCI_EV(HCI_EV_AUTH_COMPLETE, hci_auth_complete_evt, - sizeof(struct hci_ev_auth_complete)), - /* [0x07 = HCI_EV_REMOTE_NAME] */ - HCI_EV(HCI_EV_REMOTE_NAME, hci_remote_name_evt, - sizeof(struct hci_ev_remote_name)), - /* [0x08 = HCI_EV_ENCRYPT_CHANGE] */ - HCI_EV(HCI_EV_ENCRYPT_CHANGE, hci_encrypt_change_evt, - sizeof(struct hci_ev_encrypt_change)), - /* [0x09 = HCI_EV_CHANGE_LINK_KEY_COMPLETE] */ - HCI_EV(HCI_EV_CHANGE_LINK_KEY_COMPLETE, - hci_change_link_key_complete_evt, - sizeof(struct hci_ev_change_link_key_complete)), - /* [0x0b = HCI_EV_REMOTE_FEATURES] */ - HCI_EV(HCI_EV_REMOTE_FEATURES, hci_remote_features_evt, - sizeof(struct hci_ev_remote_features)), - /* [0x0e = HCI_EV_CMD_COMPLETE] */ - HCI_EV_REQ_VL(HCI_EV_CMD_COMPLETE, hci_cmd_complete_evt, - sizeof(struct hci_ev_cmd_complete), HCI_MAX_EVENT_SIZE), - /* [0x0f = HCI_EV_CMD_STATUS] */ - HCI_EV_REQ(HCI_EV_CMD_STATUS, hci_cmd_status_evt, - sizeof(struct hci_ev_cmd_status)), - /* [0x10 = HCI_EV_CMD_STATUS] */ - HCI_EV(HCI_EV_HARDWARE_ERROR, hci_hardware_error_evt, - sizeof(struct hci_ev_hardware_error)), - /* [0x12 = HCI_EV_ROLE_CHANGE] */ - HCI_EV(HCI_EV_ROLE_CHANGE, hci_role_change_evt, - sizeof(struct hci_ev_role_change)), - /* [0x13 = HCI_EV_NUM_COMP_PKTS] */ - HCI_EV_VL(HCI_EV_NUM_COMP_PKTS, hci_num_comp_pkts_evt, - sizeof(struct hci_ev_num_comp_pkts), HCI_MAX_EVENT_SIZE), - /* [0x14 = HCI_EV_MODE_CHANGE] */ - HCI_EV(HCI_EV_MODE_CHANGE, hci_mode_change_evt, - sizeof(struct hci_ev_mode_change)), - /* [0x16 = HCI_EV_PIN_CODE_REQ] */ - HCI_EV(HCI_EV_PIN_CODE_REQ, hci_pin_code_request_evt, - sizeof(struct hci_ev_pin_code_req)), - /* [0x17 = HCI_EV_LINK_KEY_REQ] */ - HCI_EV(HCI_EV_LINK_KEY_REQ, hci_link_key_request_evt, - sizeof(struct hci_ev_link_key_req)), - /* [0x18 = HCI_EV_LINK_KEY_NOTIFY] */ - HCI_EV(HCI_EV_LINK_KEY_NOTIFY, hci_link_key_notify_evt, - sizeof(struct hci_ev_link_key_notify)), - /* [0x1c = HCI_EV_CLOCK_OFFSET] */ - HCI_EV(HCI_EV_CLOCK_OFFSET, hci_clock_offset_evt, - sizeof(struct hci_ev_clock_offset)), - /* [0x1d = HCI_EV_PKT_TYPE_CHANGE] */ - HCI_EV(HCI_EV_PKT_TYPE_CHANGE, hci_pkt_type_change_evt, - sizeof(struct hci_ev_pkt_type_change)), - /* [0x20 = HCI_EV_PSCAN_REP_MODE] */ - HCI_EV(HCI_EV_PSCAN_REP_MODE, hci_pscan_rep_mode_evt, - sizeof(struct hci_ev_pscan_rep_mode)), - /* [0x22 = HCI_EV_INQUIRY_RESULT_WITH_RSSI] */ - HCI_EV_VL(HCI_EV_INQUIRY_RESULT_WITH_RSSI, - hci_inquiry_result_with_rssi_evt, - sizeof(struct hci_ev_inquiry_result_rssi), - HCI_MAX_EVENT_SIZE), - /* [0x23 = HCI_EV_REMOTE_EXT_FEATURES] */ - HCI_EV(HCI_EV_REMOTE_EXT_FEATURES, hci_remote_ext_features_evt, - sizeof(struct hci_ev_remote_ext_features)), - /* [0x2c = HCI_EV_SYNC_CONN_COMPLETE] */ - HCI_EV(HCI_EV_SYNC_CONN_COMPLETE, hci_sync_conn_complete_evt, - sizeof(struct hci_ev_sync_conn_complete)), - /* [0x2d = HCI_EV_EXTENDED_INQUIRY_RESULT] */ - HCI_EV_VL(HCI_EV_EXTENDED_INQUIRY_RESULT, - hci_extended_inquiry_result_evt, - sizeof(struct hci_ev_ext_inquiry_result), HCI_MAX_EVENT_SIZE), - /* [0x30 = HCI_EV_KEY_REFRESH_COMPLETE] */ - HCI_EV(HCI_EV_KEY_REFRESH_COMPLETE, hci_key_refresh_complete_evt, - sizeof(struct hci_ev_key_refresh_complete)), - /* [0x31 = HCI_EV_IO_CAPA_REQUEST] */ - HCI_EV(HCI_EV_IO_CAPA_REQUEST, hci_io_capa_request_evt, - sizeof(struct hci_ev_io_capa_request)), - /* [0x32 = HCI_EV_IO_CAPA_REPLY] */ - HCI_EV(HCI_EV_IO_CAPA_REPLY, hci_io_capa_reply_evt, - sizeof(struct hci_ev_io_capa_reply)), - /* [0x33 = HCI_EV_USER_CONFIRM_REQUEST] */ - HCI_EV(HCI_EV_USER_CONFIRM_REQUEST, hci_user_confirm_request_evt, - sizeof(struct hci_ev_user_confirm_req)), - /* [0x34 = HCI_EV_USER_PASSKEY_REQUEST] */ - HCI_EV(HCI_EV_USER_PASSKEY_REQUEST, hci_user_passkey_request_evt, - sizeof(struct hci_ev_user_passkey_req)), - /* [0x35 = HCI_EV_REMOTE_OOB_DATA_REQUEST] */ - HCI_EV(HCI_EV_REMOTE_OOB_DATA_REQUEST, hci_remote_oob_data_request_evt, - sizeof(struct hci_ev_remote_oob_data_request)), - /* [0x36 = HCI_EV_SIMPLE_PAIR_COMPLETE] */ - HCI_EV(HCI_EV_SIMPLE_PAIR_COMPLETE, hci_simple_pair_complete_evt, - sizeof(struct hci_ev_simple_pair_complete)), - /* [0x3b = HCI_EV_USER_PASSKEY_NOTIFY] */ - HCI_EV(HCI_EV_USER_PASSKEY_NOTIFY, hci_user_passkey_notify_evt, - sizeof(struct hci_ev_user_passkey_notify)), - /* [0x3c = HCI_EV_KEYPRESS_NOTIFY] */ - HCI_EV(HCI_EV_KEYPRESS_NOTIFY, hci_keypress_notify_evt, - sizeof(struct hci_ev_keypress_notify)), - /* [0x3d = HCI_EV_REMOTE_HOST_FEATURES] */ - HCI_EV(HCI_EV_REMOTE_HOST_FEATURES, hci_remote_host_features_evt, - sizeof(struct hci_ev_remote_host_features)), - /* [0x3e = HCI_EV_LE_META] */ - HCI_EV_REQ_VL(HCI_EV_LE_META, hci_le_meta_evt, - sizeof(struct hci_ev_le_meta), HCI_MAX_EVENT_SIZE), -#if IS_ENABLED(CONFIG_BT_HS) - /* [0x40 = HCI_EV_PHY_LINK_COMPLETE] */ - HCI_EV(HCI_EV_PHY_LINK_COMPLETE, hci_phy_link_complete_evt, - sizeof(struct hci_ev_phy_link_complete)), - /* [0x41 = HCI_EV_CHANNEL_SELECTED] */ - HCI_EV(HCI_EV_CHANNEL_SELECTED, hci_chan_selected_evt, - sizeof(struct hci_ev_channel_selected)), - /* [0x42 = HCI_EV_DISCONN_PHY_LINK_COMPLETE] */ - HCI_EV(HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE, - hci_disconn_loglink_complete_evt, - sizeof(struct hci_ev_disconn_logical_link_complete)), - /* [0x45 = HCI_EV_LOGICAL_LINK_COMPLETE] */ - HCI_EV(HCI_EV_LOGICAL_LINK_COMPLETE, hci_loglink_complete_evt, - sizeof(struct hci_ev_logical_link_complete)), - /* [0x46 = HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE] */ - HCI_EV(HCI_EV_DISCONN_PHY_LINK_COMPLETE, - hci_disconn_phylink_complete_evt, - sizeof(struct hci_ev_disconn_phy_link_complete)), -#endif - /* [0x48 = HCI_EV_NUM_COMP_BLOCKS] */ - HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt, - sizeof(struct hci_ev_num_comp_blocks)), - /* [0xff = HCI_EV_VENDOR] */ - HCI_EV(HCI_EV_VENDOR, msft_vendor_evt, 0), -}; - -static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb, - u16 *opcode, u8 *status, - hci_req_complete_t *req_complete, - hci_req_complete_skb_t *req_complete_skb) -{ - const struct hci_ev *ev = &hci_ev_table[event]; - void *data; - - if (!ev->func) - return; - - if (skb->len < ev->min_len) { - bt_dev_err(hdev, "unexpected event 0x%2.2x length: %u < %u", - event, skb->len, ev->min_len); - return; - } - - /* Just warn if the length is over max_len size it still be - * possible to partially parse the event so leave to callback to - * decide if that is acceptable. - */ - if (skb->len > ev->max_len) - bt_dev_warn(hdev, "unexpected event 0x%2.2x length: %u > %u", - event, skb->len, ev->max_len); - - data = hci_ev_skb_pull(hdev, skb, event, ev->min_len); - if (!data) - return; - - if (ev->req) - ev->func_req(hdev, data, skb, opcode, status, req_complete, - req_complete_skb); - else - ev->func(hdev, data, skb); -} - void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_event_hdr *hdr = (void *) skb->data; hci_req_complete_t req_complete = NULL; hci_req_complete_skb_t req_complete_skb = NULL; struct sk_buff *orig_skb = NULL; - u8 status = 0, event, req_evt = 0; + u8 status = 0, event = hdr->evt, req_evt = 0; u16 opcode = HCI_OP_NOP; - if (skb->len < sizeof(*hdr)) { - bt_dev_err(hdev, "Malformed HCI Event"); - goto done; - } - - event = hdr->evt; if (!event) { - bt_dev_warn(hdev, "Received unexpected HCI Event 0x%2.2x", - event); + bt_dev_warn(hdev, "Received unexpected HCI Event 00000000"); goto done; } - /* Only match event if command OGF is not for LE */ - if (hdev->sent_cmd && - hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) != 0x08 && - hci_skb_event(hdev->sent_cmd) == event) { - hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->sent_cmd), - status, &req_complete, &req_complete_skb); + if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->hci.req_event == event) { + struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data; + opcode = __le16_to_cpu(cmd_hdr->opcode); + hci_req_cmd_complete(hdev, opcode, status, &req_complete, + &req_complete_skb); req_evt = event; } @@ -6881,10 +6306,191 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) /* Store wake reason if we're suspended */ hci_store_wake_reason(hdev, event, skb); - bt_dev_dbg(hdev, "event 0x%2.2x", event); + switch (event) { + case HCI_EV_INQUIRY_COMPLETE: + hci_inquiry_complete_evt(hdev, skb); + break; - hci_event_func(hdev, event, skb, &opcode, &status, &req_complete, - &req_complete_skb); + case HCI_EV_INQUIRY_RESULT: + hci_inquiry_result_evt(hdev, skb); + break; + + case HCI_EV_CONN_COMPLETE: + hci_conn_complete_evt(hdev, skb); + break; + + case HCI_EV_CONN_REQUEST: + hci_conn_request_evt(hdev, skb); + break; + + case HCI_EV_DISCONN_COMPLETE: + hci_disconn_complete_evt(hdev, skb); + break; + + case HCI_EV_AUTH_COMPLETE: + hci_auth_complete_evt(hdev, skb); + break; + + case HCI_EV_REMOTE_NAME: + hci_remote_name_evt(hdev, skb); + break; + + case HCI_EV_ENCRYPT_CHANGE: + hci_encrypt_change_evt(hdev, skb); + break; + + case HCI_EV_CHANGE_LINK_KEY_COMPLETE: + hci_change_link_key_complete_evt(hdev, skb); + break; + + case HCI_EV_REMOTE_FEATURES: + hci_remote_features_evt(hdev, skb); + break; + + case HCI_EV_CMD_COMPLETE: + hci_cmd_complete_evt(hdev, skb, &opcode, &status, + &req_complete, &req_complete_skb); + break; + + case HCI_EV_CMD_STATUS: + hci_cmd_status_evt(hdev, skb, &opcode, &status, &req_complete, + &req_complete_skb); + break; + + case HCI_EV_HARDWARE_ERROR: + hci_hardware_error_evt(hdev, skb); + break; + + case HCI_EV_ROLE_CHANGE: + hci_role_change_evt(hdev, skb); + break; + + case HCI_EV_NUM_COMP_PKTS: + hci_num_comp_pkts_evt(hdev, skb); + break; + + case HCI_EV_MODE_CHANGE: + hci_mode_change_evt(hdev, skb); + break; + + case HCI_EV_PIN_CODE_REQ: + hci_pin_code_request_evt(hdev, skb); + break; + + case HCI_EV_LINK_KEY_REQ: + hci_link_key_request_evt(hdev, skb); + break; + + case HCI_EV_LINK_KEY_NOTIFY: + hci_link_key_notify_evt(hdev, skb); + break; + + case HCI_EV_CLOCK_OFFSET: + hci_clock_offset_evt(hdev, skb); + break; + + case HCI_EV_PKT_TYPE_CHANGE: + hci_pkt_type_change_evt(hdev, skb); + break; + + case HCI_EV_PSCAN_REP_MODE: + hci_pscan_rep_mode_evt(hdev, skb); + break; + + case HCI_EV_INQUIRY_RESULT_WITH_RSSI: + hci_inquiry_result_with_rssi_evt(hdev, skb); + break; + + case HCI_EV_REMOTE_EXT_FEATURES: + hci_remote_ext_features_evt(hdev, skb); + break; + + case HCI_EV_SYNC_CONN_COMPLETE: + hci_sync_conn_complete_evt(hdev, skb); + break; + + case HCI_EV_EXTENDED_INQUIRY_RESULT: + hci_extended_inquiry_result_evt(hdev, skb); + break; + + case HCI_EV_KEY_REFRESH_COMPLETE: + hci_key_refresh_complete_evt(hdev, skb); + break; + + case HCI_EV_IO_CAPA_REQUEST: + hci_io_capa_request_evt(hdev, skb); + break; + + case HCI_EV_IO_CAPA_REPLY: + hci_io_capa_reply_evt(hdev, skb); + break; + + case HCI_EV_USER_CONFIRM_REQUEST: + hci_user_confirm_request_evt(hdev, skb); + break; + + case HCI_EV_USER_PASSKEY_REQUEST: + hci_user_passkey_request_evt(hdev, skb); + break; + + case HCI_EV_USER_PASSKEY_NOTIFY: + hci_user_passkey_notify_evt(hdev, skb); + break; + + case HCI_EV_KEYPRESS_NOTIFY: + hci_keypress_notify_evt(hdev, skb); + break; + + case HCI_EV_SIMPLE_PAIR_COMPLETE: + hci_simple_pair_complete_evt(hdev, skb); + break; + + case HCI_EV_REMOTE_HOST_FEATURES: + hci_remote_host_features_evt(hdev, skb); + break; + + case HCI_EV_LE_META: + hci_le_meta_evt(hdev, skb); + break; + + case HCI_EV_REMOTE_OOB_DATA_REQUEST: + hci_remote_oob_data_request_evt(hdev, skb); + break; + +#if IS_ENABLED(CONFIG_BT_HS) + case HCI_EV_CHANNEL_SELECTED: + hci_chan_selected_evt(hdev, skb); + break; + + case HCI_EV_PHY_LINK_COMPLETE: + hci_phy_link_complete_evt(hdev, skb); + break; + + case HCI_EV_LOGICAL_LINK_COMPLETE: + hci_loglink_complete_evt(hdev, skb); + break; + + case HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE: + hci_disconn_loglink_complete_evt(hdev, skb); + break; + + case HCI_EV_DISCONN_PHY_LINK_COMPLETE: + hci_disconn_phylink_complete_evt(hdev, skb); + break; +#endif + + case HCI_EV_NUM_COMP_BLOCKS: + hci_num_comp_blocks_evt(hdev, skb); + break; + + case HCI_EV_VENDOR: + msft_vendor_evt(hdev, skb); + break; + + default: + BT_DBG("%s event 0x%2.2x", hdev->name, event); + break; + } if (req_complete) { req_complete(hdev, status, opcode); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 42c8047a98..1d34d330af 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -30,7 +30,10 @@ #include "smp.h" #include "hci_request.h" #include "msft.h" -#include "eir.h" + +#define HCI_REQ_DONE 0 +#define HCI_REQ_PEND 1 +#define HCI_REQ_CANCELED 2 void hci_req_init(struct hci_request *req, struct hci_dev *hdev) { @@ -97,8 +100,8 @@ int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete) return req_run(req, NULL, complete); } -void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, - struct sk_buff *skb) +static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, + struct sk_buff *skb) { bt_dev_dbg(hdev, "result 0x%2.2x", result); @@ -111,6 +114,81 @@ void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, } } +void hci_req_sync_cancel(struct hci_dev *hdev, int err) +{ + bt_dev_dbg(hdev, "err 0x%2.2x", err); + + if (hdev->req_status == HCI_REQ_PEND) { + hdev->req_result = err; + hdev->req_status = HCI_REQ_CANCELED; + wake_up_interruptible(&hdev->req_wait_q); + } +} + +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout) +{ + struct hci_request req; + struct sk_buff *skb; + int err = 0; + + bt_dev_dbg(hdev, ""); + + hci_req_init(&req, hdev); + + hci_req_add_ev(&req, opcode, plen, param, event); + + hdev->req_status = HCI_REQ_PEND; + + err = hci_req_run_skb(&req, hci_req_sync_complete); + if (err < 0) + return ERR_PTR(err); + + err = wait_event_interruptible_timeout(hdev->req_wait_q, + hdev->req_status != HCI_REQ_PEND, timeout); + + if (err == -ERESTARTSYS) + return ERR_PTR(-EINTR); + + switch (hdev->req_status) { + case HCI_REQ_DONE: + err = -bt_to_errno(hdev->req_result); + break; + + case HCI_REQ_CANCELED: + err = -hdev->req_result; + break; + + default: + err = -ETIMEDOUT; + break; + } + + hdev->req_status = hdev->req_result = 0; + skb = hdev->req_skb; + hdev->req_skb = NULL; + + bt_dev_dbg(hdev, "end: err %d", err); + + if (err < 0) { + kfree_skb(skb); + return ERR_PTR(err); + } + + if (!skb) + return ERR_PTR(-ENODATA); + + return skb; +} +EXPORT_SYMBOL(__hci_cmd_sync_ev); + +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout) +{ + return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout); +} +EXPORT_SYMBOL(__hci_cmd_sync); + /* Execute request and wait for completion. */ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, unsigned long opt), @@ -357,6 +435,82 @@ static bool __hci_update_interleaved_scan(struct hci_dev *hdev) return false; } +/* This function controls the background scanning based on hdev->pend_le_conns + * list. If there are pending LE connection we start the background scanning, + * otherwise we stop it. + * + * This function requires the caller holds hdev->lock. + */ +static void __hci_update_background_scan(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + + if (!test_bit(HCI_UP, &hdev->flags) || + test_bit(HCI_INIT, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG) || + hci_dev_test_flag(hdev, HCI_AUTO_OFF) || + hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return; + + /* No point in doing scanning if LE support hasn't been enabled */ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return; + + /* If discovery is active don't interfere with it */ + if (hdev->discovery.state != DISCOVERY_STOPPED) + return; + + /* Reset RSSI and UUID filters when starting background scanning + * since these filters are meant for service discovery only. + * + * The Start Discovery and Start Service Discovery operations + * ensure to set proper values for RSSI threshold and UUID + * filter list. So it is safe to just reset them here. + */ + hci_discovery_filter_clear(hdev); + + bt_dev_dbg(hdev, "ADV monitoring is %s", + hci_is_adv_monitoring(hdev) ? "on" : "off"); + + if (list_empty(&hdev->pend_le_conns) && + list_empty(&hdev->pend_le_reports) && + !hci_is_adv_monitoring(hdev)) { + /* If there is no pending LE connections or devices + * to be scanned for or no ADV monitors, we should stop the + * background scanning. + */ + + /* If controller is not scanning we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + return; + + hci_req_add_le_scan_disable(req, false); + + bt_dev_dbg(hdev, "stopping background scanning"); + } else { + /* If there is at least one pending LE connection, we should + * keep the background scan running. + */ + + /* If controller is connecting, we should not start scanning + * since some controllers are not able to scan and connect at + * the same time. + */ + if (hci_lookup_le_connect(hdev)) + return; + + /* If controller is currently scanning, we stop it to ensure we + * don't miss any advertising (due to duplicates filter). + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + hci_req_add_le_scan_disable(req, false); + + hci_req_add_le_passive_scan(req); + bt_dev_dbg(hdev, "starting background scanning"); + } +} + void __hci_req_update_name(struct hci_request *req) { struct hci_dev *hdev = req->hdev; @@ -367,6 +521,164 @@ void __hci_req_update_name(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); } +#define PNP_INFO_SVCLASS_ID 0x1200 + +static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 4) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + u16 uuid16; + + if (uuid->size != 16) + continue; + + uuid16 = get_unaligned_le16(&uuid->uuid[12]); + if (uuid16 < 0x1100) + continue; + + if (uuid16 == PNP_INFO_SVCLASS_ID) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID16_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u16) > len) { + uuids_start[1] = EIR_UUID16_SOME; + break; + } + + *ptr++ = (uuid16 & 0x00ff); + *ptr++ = (uuid16 & 0xff00) >> 8; + uuids_start[0] += sizeof(uuid16); + } + + return ptr; +} + +static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 6) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 32) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID32_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u32) > len) { + uuids_start[1] = EIR_UUID32_SOME; + break; + } + + memcpy(ptr, &uuid->uuid[12], sizeof(u32)); + ptr += sizeof(u32); + uuids_start[0] += sizeof(u32); + } + + return ptr; +} + +static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 18) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 128) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID128_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + 16 > len) { + uuids_start[1] = EIR_UUID128_SOME; + break; + } + + memcpy(ptr, uuid->uuid, 16); + ptr += 16; + uuids_start[0] += 16; + } + + return ptr; +} + +static void create_eir(struct hci_dev *hdev, u8 *data) +{ + u8 *ptr = data; + size_t name_len; + + name_len = strlen(hdev->dev_name); + + if (name_len > 0) { + /* EIR Data type */ + if (name_len > 48) { + name_len = 48; + ptr[1] = EIR_NAME_SHORT; + } else + ptr[1] = EIR_NAME_COMPLETE; + + /* EIR Data length */ + ptr[0] = name_len + 1; + + memcpy(ptr + 2, hdev->dev_name, name_len); + + ptr += (name_len + 2); + } + + if (hdev->inq_tx_power != HCI_TX_POWER_INVALID) { + ptr[0] = 2; + ptr[1] = EIR_TX_POWER; + ptr[2] = (u8) hdev->inq_tx_power; + + ptr += 3; + } + + if (hdev->devid_source > 0) { + ptr[0] = 9; + ptr[1] = EIR_DEVICE_ID; + + put_unaligned_le16(hdev->devid_source, ptr + 2); + put_unaligned_le16(hdev->devid_vendor, ptr + 4); + put_unaligned_le16(hdev->devid_product, ptr + 6); + put_unaligned_le16(hdev->devid_version, ptr + 8); + + ptr += 10; + } + + ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); +} + void __hci_req_update_eir(struct hci_request *req) { struct hci_dev *hdev = req->hdev; @@ -386,7 +698,7 @@ void __hci_req_update_eir(struct hci_request *req) memset(&cp, 0, sizeof(cp)); - eir_create(hdev, cp.data); + create_eir(hdev, cp.data); if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) return; @@ -405,6 +717,9 @@ void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn) return; } + if (hdev->suspended) + set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); + if (use_ext_scan(hdev)) { struct hci_cp_le_set_ext_scan_enable cp; @@ -421,7 +736,9 @@ void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn) } /* Disable address resolution */ - if (hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION) && !rpa_le_conn) { + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && + hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION) && !rpa_le_conn) { __u8 enable = 0x00; hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); @@ -440,7 +757,8 @@ static void del_from_accept_list(struct hci_request *req, bdaddr_t *bdaddr, cp.bdaddr_type); hci_req_add(req, HCI_OP_LE_DEL_FROM_ACCEPT_LIST, sizeof(cp), &cp); - if (use_ll_privacy(req->hdev)) { + if (use_ll_privacy(req->hdev) && + hci_dev_test_flag(req->hdev, HCI_ENABLE_LL_PRIVACY)) { struct smp_irk *irk; irk = hci_find_irk_by_addr(req->hdev, bdaddr, bdaddr_type); @@ -481,8 +799,8 @@ static int add_to_accept_list(struct hci_request *req, } /* During suspend, only wakeable devices can be in accept list */ - if (hdev->suspended && - !test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, params->flags)) + if (hdev->suspended && !hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP, + params->current_flags)) return 0; *num_entries += 1; @@ -493,7 +811,8 @@ static int add_to_accept_list(struct hci_request *req, cp.bdaddr_type); hci_req_add(req, HCI_OP_LE_ADD_TO_ACCEPT_LIST, sizeof(cp), &cp); - if (use_ll_privacy(hdev)) { + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) { struct smp_irk *irk; irk = hci_find_irk_by_addr(hdev, ¶ms->addr, @@ -532,7 +851,8 @@ static u8 update_accept_list(struct hci_request *req) */ bool allow_rpa = hdev->suspended; - if (use_ll_privacy(hdev)) + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) allow_rpa = true; /* Go through the current accept list programmed into the @@ -621,7 +941,9 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval, return; } - if (use_ll_privacy(hdev) && addr_resolv) { + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && + addr_resolv) { u8 enable = 0x01; hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); @@ -778,6 +1100,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req) if (hdev->suspended) { window = hdev->le_scan_window_suspend; interval = hdev->le_scan_int_suspend; + + set_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks); } else if (hci_is_le_conn_scanning(hdev)) { window = hdev->le_scan_window_connect; interval = hdev->le_scan_int_connect; @@ -810,6 +1134,78 @@ void hci_req_add_le_passive_scan(struct hci_request *req) addr_resolv); } +static bool adv_instance_is_scannable(struct hci_dev *hdev, u8 instance) +{ + struct adv_info *adv_instance; + + /* Instance 0x00 always set local name */ + if (instance == 0x00) + return true; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return false; + + if (adv_instance->flags & MGMT_ADV_FLAG_APPEARANCE || + adv_instance->flags & MGMT_ADV_FLAG_LOCAL_NAME) + return true; + + return adv_instance->scan_rsp_len ? true : false; +} + +static void hci_req_clear_event_filter(struct hci_request *req) +{ + struct hci_cp_set_event_filter f; + + if (!hci_dev_test_flag(req->hdev, HCI_BREDR_ENABLED)) + return; + + if (hci_dev_test_flag(req->hdev, HCI_EVENT_FILTER_CONFIGURED)) { + memset(&f, 0, sizeof(f)); + f.flt_type = HCI_FLT_CLEAR_ALL; + hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &f); + } +} + +static void hci_req_set_event_filter(struct hci_request *req) +{ + struct bdaddr_list_with_flags *b; + struct hci_cp_set_event_filter f; + struct hci_dev *hdev = req->hdev; + u8 scan = SCAN_DISABLED; + bool scanning = test_bit(HCI_PSCAN, &hdev->flags); + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + return; + + /* Always clear event filter when starting */ + hci_req_clear_event_filter(req); + + list_for_each_entry(b, &hdev->accept_list, list) { + if (!hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP, + b->current_flags)) + continue; + + memset(&f, 0, sizeof(f)); + bacpy(&f.addr_conn_flt.bdaddr, &b->bdaddr); + f.flt_type = HCI_FLT_CONN_SETUP; + f.cond_type = HCI_CONN_SETUP_ALLOW_BDADDR; + f.addr_conn_flt.auto_accept = HCI_CONN_SETUP_AUTO_ON; + + bt_dev_dbg(hdev, "Adding event filters for %pMR", &b->bdaddr); + hci_req_add(req, HCI_OP_SET_EVENT_FLT, sizeof(f), &f); + scan = SCAN_PAGE; + } + + if (scan && !scanning) { + set_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks); + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + } else if (!scan && scanning) { + set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + } +} + static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { @@ -818,9 +1214,235 @@ static void cancel_adv_timeout(struct hci_dev *hdev) } } +/* This function requires the caller holds hdev->lock */ +void __hci_req_pause_adv_instances(struct hci_request *req) +{ + bt_dev_dbg(req->hdev, "Pausing advertising instances"); + + /* Call to disable any advertisements active on the controller. + * This will succeed even if no advertisements are configured. + */ + __hci_req_disable_advertising(req); + + /* If we are using software rotation, pause the loop */ + if (!ext_adv_capable(req->hdev)) + cancel_adv_timeout(req->hdev); +} + +/* This function requires the caller holds hdev->lock */ +static void __hci_req_resume_adv_instances(struct hci_request *req) +{ + struct adv_info *adv; + + bt_dev_dbg(req->hdev, "Resuming advertising instances"); + + if (ext_adv_capable(req->hdev)) { + /* Call for each tracked instance to be re-enabled */ + list_for_each_entry(adv, &req->hdev->adv_instances, list) { + __hci_req_enable_ext_advertising(req, + adv->instance); + } + + } else { + /* Schedule for most recent instance to be restarted and begin + * the software rotation loop + */ + __hci_req_schedule_adv_instance(req, + req->hdev->cur_adv_instance, + true); + } +} + +/* This function requires the caller holds hdev->lock */ +int hci_req_resume_adv_instances(struct hci_dev *hdev) +{ + struct hci_request req; + + hci_req_init(&req, hdev); + __hci_req_resume_adv_instances(&req); + + return hci_req_run(&req, NULL); +} + +static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + bt_dev_dbg(hdev, "Request complete opcode=0x%x, status=0x%x", opcode, + status); + if (test_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks) || + test_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks)) { + clear_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks); + clear_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); + wake_up(&hdev->suspend_wait_q); + } + + if (test_bit(SUSPEND_SET_ADV_FILTER, hdev->suspend_tasks)) { + clear_bit(SUSPEND_SET_ADV_FILTER, hdev->suspend_tasks); + wake_up(&hdev->suspend_wait_q); + } +} + +static void hci_req_add_set_adv_filter_enable(struct hci_request *req, + bool enable) +{ + struct hci_dev *hdev = req->hdev; + + switch (hci_get_adv_monitor_offload_ext(hdev)) { + case HCI_ADV_MONITOR_EXT_MSFT: + msft_req_add_set_filter_enable(req, enable); + break; + default: + return; + } + + /* No need to block when enabling since it's on resume path */ + if (hdev->suspended && !enable) + set_bit(SUSPEND_SET_ADV_FILTER, hdev->suspend_tasks); +} + +/* Call with hci_dev_lock */ +void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) +{ + int old_state; + struct hci_conn *conn; + struct hci_request req; + u8 page_scan; + int disconnect_counter; + + if (next == hdev->suspend_state) { + bt_dev_dbg(hdev, "Same state before and after: %d", next); + goto done; + } + + hdev->suspend_state = next; + hci_req_init(&req, hdev); + + if (next == BT_SUSPEND_DISCONNECT) { + /* Mark device as suspended */ + hdev->suspended = true; + + /* Pause discovery if not already stopped */ + old_state = hdev->discovery.state; + if (old_state != DISCOVERY_STOPPED) { + set_bit(SUSPEND_PAUSE_DISCOVERY, hdev->suspend_tasks); + hci_discovery_set_state(hdev, DISCOVERY_STOPPING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + } + + hdev->discovery_paused = true; + hdev->discovery_old_state = old_state; + + /* Stop directed advertising */ + old_state = hci_dev_test_flag(hdev, HCI_ADVERTISING); + if (old_state) { + set_bit(SUSPEND_PAUSE_ADVERTISING, hdev->suspend_tasks); + cancel_delayed_work(&hdev->discov_off); + queue_delayed_work(hdev->req_workqueue, + &hdev->discov_off, 0); + } + + /* Pause other advertisements */ + if (hdev->adv_instance_cnt) + __hci_req_pause_adv_instances(&req); + + hdev->advertising_paused = true; + hdev->advertising_old_state = old_state; + + /* Disable page scan if enabled */ + if (test_bit(HCI_PSCAN, &hdev->flags)) { + page_scan = SCAN_DISABLED; + hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, + &page_scan); + set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); + } + + /* Disable LE passive scan if enabled */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { + cancel_interleave_scan(hdev); + hci_req_add_le_scan_disable(&req, false); + } + + /* Disable advertisement filters */ + hci_req_add_set_adv_filter_enable(&req, false); + + /* Prevent disconnects from causing scanning to be re-enabled */ + hdev->scanning_paused = true; + + /* Run commands before disconnecting */ + hci_req_run(&req, suspend_req_complete); + + disconnect_counter = 0; + /* Soft disconnect everything (power off) */ + list_for_each_entry(conn, &hdev->conn_hash.list, list) { + hci_disconnect(conn, HCI_ERROR_REMOTE_POWER_OFF); + disconnect_counter++; + } + + if (disconnect_counter > 0) { + bt_dev_dbg(hdev, + "Had %d disconnects. Will wait on them", + disconnect_counter); + set_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks); + } + } else if (next == BT_SUSPEND_CONFIGURE_WAKE) { + /* Unpause to take care of updating scanning params */ + hdev->scanning_paused = false; + /* Enable event filter for paired devices */ + hci_req_set_event_filter(&req); + /* Enable passive scan at lower duty cycle */ + __hci_update_background_scan(&req); + /* Pause scan changes again. */ + hdev->scanning_paused = true; + hci_req_run(&req, suspend_req_complete); + } else { + hdev->suspended = false; + hdev->scanning_paused = false; + + /* Clear any event filters and restore scan state */ + hci_req_clear_event_filter(&req); + __hci_req_update_scan(&req); + + /* Reset passive/background scanning to normal */ + __hci_update_background_scan(&req); + /* Enable all of the advertisement filters */ + hci_req_add_set_adv_filter_enable(&req, true); + + /* Unpause directed advertising */ + hdev->advertising_paused = false; + if (hdev->advertising_old_state) { + set_bit(SUSPEND_UNPAUSE_ADVERTISING, + hdev->suspend_tasks); + hci_dev_set_flag(hdev, HCI_ADVERTISING); + queue_work(hdev->req_workqueue, + &hdev->discoverable_update); + hdev->advertising_old_state = 0; + } + + /* Resume other advertisements */ + if (hdev->adv_instance_cnt) + __hci_req_resume_adv_instances(&req); + + /* Unpause discovery */ + hdev->discovery_paused = false; + if (hdev->discovery_old_state != DISCOVERY_STOPPED && + hdev->discovery_old_state != DISCOVERY_STOPPING) { + set_bit(SUSPEND_UNPAUSE_DISCOVERY, hdev->suspend_tasks); + hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + } + + hci_req_run(&req, suspend_req_complete); + } + + hdev->suspend_state = next; + +done: + clear_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + wake_up(&hdev->suspend_wait_q); +} + static bool adv_cur_instance_is_scannable(struct hci_dev *hdev) { - return hci_adv_instance_is_scannable(hdev, hdev->cur_adv_instance); + return adv_instance_is_scannable(hdev, hdev->cur_adv_instance); } void __hci_req_disable_advertising(struct hci_request *req) @@ -835,6 +1457,40 @@ void __hci_req_disable_advertising(struct hci_request *req) } } +static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance) +{ + u32 flags; + struct adv_info *adv_instance; + + if (instance == 0x00) { + /* Instance 0 always manages the "Tx Power" and "Flags" + * fields + */ + flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; + + /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting + * corresponds to the "connectable" instance flag. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) + flags |= MGMT_ADV_FLAG_CONNECTABLE; + + if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) + flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; + else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) + flags |= MGMT_ADV_FLAG_DISCOV; + + return flags; + } + + adv_instance = hci_find_adv_instance(hdev, instance); + + /* Return 0 when we got an invalid instance identifier. */ + if (!adv_instance) + return 0; + + return adv_instance->flags; +} + static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags) { /* If privacy is not enabled don't use RPA */ @@ -899,15 +1555,15 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable) void __hci_req_enable_advertising(struct hci_request *req) { struct hci_dev *hdev = req->hdev; - struct adv_info *adv; + struct adv_info *adv_instance; struct hci_cp_le_set_adv_param cp; u8 own_addr_type, enable = 0x01; bool connectable; u16 adv_min_interval, adv_max_interval; u32 flags; - flags = hci_adv_instance_flags(hdev, hdev->cur_adv_instance); - adv = hci_find_adv_instance(hdev, hdev->cur_adv_instance); + flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance); + adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance); /* If the "connectable" instance flag was not set, then choose between * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. @@ -939,9 +1595,9 @@ void __hci_req_enable_advertising(struct hci_request *req) memset(&cp, 0, sizeof(cp)); - if (adv) { - adv_min_interval = adv->min_interval; - adv_max_interval = adv->max_interval; + if (adv_instance) { + adv_min_interval = adv_instance->min_interval; + adv_max_interval = adv_instance->max_interval; } else { adv_min_interval = hdev->le_adv_min_interval; adv_max_interval = hdev->le_adv_max_interval; @@ -972,6 +1628,85 @@ void __hci_req_enable_advertising(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); } +u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) +{ + size_t short_len; + size_t complete_len; + + /* no space left for name (+ NULL + type + len) */ + if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) + return ad_len; + + /* use complete name if present and fits */ + complete_len = strlen(hdev->dev_name); + if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) + return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, + hdev->dev_name, complete_len + 1); + + /* use short name if present */ + short_len = strlen(hdev->short_name); + if (short_len) + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, + hdev->short_name, short_len + 1); + + /* use shortened full name if present, we already know that name + * is longer then HCI_MAX_SHORT_NAME_LENGTH + */ + if (complete_len) { + u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; + + memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); + name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; + + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name, + sizeof(name)); + } + + return ad_len; +} + +static u8 append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len) +{ + return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance); +} + +static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +{ + u8 scan_rsp_len = 0; + + if (hdev->appearance) + scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); + + return append_local_name(hdev, ptr, scan_rsp_len); +} + +static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, + u8 *ptr) +{ + struct adv_info *adv_instance; + u32 instance_flags; + u8 scan_rsp_len = 0; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return 0; + + instance_flags = adv_instance->flags; + + if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) + scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); + + memcpy(&ptr[scan_rsp_len], adv_instance->scan_rsp_data, + adv_instance->scan_rsp_len); + + scan_rsp_len += adv_instance->scan_rsp_len; + + if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME) + scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len); + + return scan_rsp_len; +} + void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) { struct hci_dev *hdev = req->hdev; @@ -988,7 +1723,11 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) memset(&pdu, 0, sizeof(pdu)); - len = eir_create_scan_rsp(hdev, instance, pdu.data); + if (instance) + len = create_instance_scan_rsp_data(hdev, instance, + pdu.data); + else + len = create_default_scan_rsp_data(hdev, pdu.data); if (hdev->scan_rsp_data_len == len && !memcmp(pdu.data, hdev->scan_rsp_data, len)) @@ -1009,7 +1748,11 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) memset(&cp, 0, sizeof(cp)); - len = eir_create_scan_rsp(hdev, instance, cp.data); + if (instance) + len = create_instance_scan_rsp_data(hdev, instance, + cp.data); + else + len = create_default_scan_rsp_data(hdev, cp.data); if (hdev->scan_rsp_data_len == len && !memcmp(cp.data, hdev->scan_rsp_data, len)) @@ -1024,6 +1767,95 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) } } +static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) +{ + struct adv_info *adv_instance = NULL; + u8 ad_len = 0, flags = 0; + u32 instance_flags; + + /* Return 0 when the current instance identifier is invalid. */ + if (instance) { + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return 0; + } + + instance_flags = get_adv_instance_flags(hdev, instance); + + /* If instance already has the flags set skip adding it once + * again. + */ + if (adv_instance && eir_get_data(adv_instance->adv_data, + adv_instance->adv_data_len, EIR_FLAGS, + NULL)) + goto skip_flags; + + /* The Add Advertising command allows userspace to set both the general + * and limited discoverable flags. + */ + if (instance_flags & MGMT_ADV_FLAG_DISCOV) + flags |= LE_AD_GENERAL; + + if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) + flags |= LE_AD_LIMITED; + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + flags |= LE_AD_NO_BREDR; + + if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { + /* If a discovery flag wasn't provided, simply use the global + * settings. + */ + if (!flags) + flags |= mgmt_get_adv_discov_flags(hdev); + + /* If flags would still be empty, then there is no need to + * include the "Flags" AD field". + */ + if (flags) { + ptr[0] = 0x02; + ptr[1] = EIR_FLAGS; + ptr[2] = flags; + + ad_len += 3; + ptr += 3; + } + } + +skip_flags: + if (adv_instance) { + memcpy(ptr, adv_instance->adv_data, + adv_instance->adv_data_len); + ad_len += adv_instance->adv_data_len; + ptr += adv_instance->adv_data_len; + } + + if (instance_flags & MGMT_ADV_FLAG_TX_POWER) { + s8 adv_tx_power; + + if (ext_adv_capable(hdev)) { + if (adv_instance) + adv_tx_power = adv_instance->tx_power; + else + adv_tx_power = hdev->adv_tx_power; + } else { + adv_tx_power = hdev->adv_tx_power; + } + + /* Provide Tx Power only if we can provide a valid value for it */ + if (adv_tx_power != HCI_TX_POWER_INVALID) { + ptr[0] = 0x02; + ptr[1] = EIR_TX_POWER; + ptr[2] = (u8)adv_tx_power; + + ad_len += 3; + ptr += 3; + } + } + + return ad_len; +} + void __hci_req_update_adv_data(struct hci_request *req, u8 instance) { struct hci_dev *hdev = req->hdev; @@ -1040,7 +1872,7 @@ void __hci_req_update_adv_data(struct hci_request *req, u8 instance) memset(&pdu, 0, sizeof(pdu)); - len = eir_create_adv_data(hdev, instance, pdu.data); + len = create_instance_adv_data(hdev, instance, pdu.data); /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && @@ -1062,7 +1894,7 @@ void __hci_req_update_adv_data(struct hci_request *req, u8 instance) memset(&cp, 0, sizeof(cp)); - len = eir_create_adv_data(hdev, instance, cp.data); + len = create_instance_adv_data(hdev, instance, cp.data); /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && @@ -1099,7 +1931,8 @@ void hci_req_disable_address_resolution(struct hci_dev *hdev) struct hci_request req; __u8 enable = 0x00; - if (!hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) + if (!use_ll_privacy(hdev) && + !hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) return; hci_req_init(&req, hdev); @@ -1242,7 +2075,8 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, /* If Controller supports LL Privacy use own address type is * 0x03 */ - if (use_ll_privacy(hdev)) + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; else *own_addr_type = ADDR_LE_DEV_RANDOM; @@ -1349,7 +2183,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) adv_instance = NULL; } - flags = hci_adv_instance_flags(hdev, instance); + flags = get_adv_instance_flags(hdev, instance); /* If the "connectable" instance flag was not set, then choose between * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. @@ -1389,7 +2223,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_CONN_IND); else cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND); - } else if (hci_adv_instance_is_scannable(hdev, instance) || + } else if (adv_instance_is_scannable(hdev, instance) || (flags & MGMT_ADV_PARAM_SCAN_RSP)) { if (secondary_adv) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_SCAN_IND); @@ -1420,8 +2254,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp); - if ((own_addr_type == ADDR_LE_DEV_RANDOM || - own_addr_type == ADDR_LE_DEV_RANDOM_RESOLVED) && + if (own_addr_type == ADDR_LE_DEV_RANDOM && bacmp(&random_addr, BDADDR_ANY)) { struct hci_cp_le_set_adv_set_rand_addr cp; @@ -1485,7 +2318,7 @@ int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance) /* Set duration per instance since controller is responsible for * scheduling it. */ - if (adv_instance && adv_instance->duration) { + if (adv_instance && adv_instance->timeout) { u16 duration = adv_instance->timeout * MSEC_PER_SEC; /* Time = N * 10 ms */ @@ -1710,7 +2543,8 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, /* If Controller supports LL Privacy use own address type is * 0x03 */ - if (use_ll_privacy(hdev)) + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; else *own_addr_type = ADDR_LE_DEV_RANDOM; @@ -1850,6 +2684,47 @@ static void scan_update_work(struct work_struct *work) hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL); } +static int connectable_update(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + hci_dev_lock(hdev); + + __hci_req_update_scan(req); + + /* If BR/EDR is not enabled and we disable advertising as a + * by-product of disabling connectable, we need to update the + * advertising flags. + */ + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + __hci_req_update_adv_data(req, hdev->cur_adv_instance); + + /* Update the advertising parameters if necessary */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !list_empty(&hdev->adv_instances)) { + if (ext_adv_capable(hdev)) + __hci_req_start_ext_adv(req, hdev->cur_adv_instance); + else + __hci_req_enable_advertising(req); + } + + __hci_update_background_scan(req); + + hci_dev_unlock(hdev); + + return 0; +} + +static void connectable_update_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + connectable_update); + u8 status; + + hci_req_sync(hdev, connectable_update, 0, HCI_CMD_TIMEOUT, &status); + mgmt_set_connectable_complete(hdev, status); +} + static u8 get_service_classes(struct hci_dev *hdev) { struct bt_uuid *uuid; @@ -1953,6 +2828,16 @@ static int discoverable_update(struct hci_request *req, unsigned long opt) return 0; } +static void discoverable_update_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + discoverable_update); + u8 status; + + hci_req_sync(hdev, discoverable_update, 0, HCI_CMD_TIMEOUT, &status); + mgmt_set_discoverable_complete(hdev, status); +} + void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { @@ -2046,6 +2931,35 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) return 0; } +static int update_bg_scan(struct hci_request *req, unsigned long opt) +{ + hci_dev_lock(req->hdev); + __hci_update_background_scan(req); + hci_dev_unlock(req->hdev); + return 0; +} + +static void bg_scan_update(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + bg_scan_update); + struct hci_conn *conn; + u8 status; + int err; + + err = hci_req_sync(hdev, update_bg_scan, 0, HCI_CMD_TIMEOUT, &status); + if (!err) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (conn) + hci_le_conn_failed(conn, status); + + hci_dev_unlock(hdev); +} + static int le_scan_disable(struct hci_request *req, unsigned long opt) { hci_req_add_le_scan_disable(req, false); @@ -2413,53 +3327,6 @@ bool hci_req_stop_discovery(struct hci_request *req) return ret; } -static void config_data_path_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - bt_dev_dbg(hdev, "status %u", status); -} - -int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec) -{ - struct hci_request req; - int err; - __u8 vnd_len, *vnd_data = NULL; - struct hci_op_configure_data_path *cmd = NULL; - - hci_req_init(&req, hdev); - - err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len, - &vnd_data); - if (err < 0) - goto error; - - cmd = kzalloc(sizeof(*cmd) + vnd_len, GFP_KERNEL); - if (!cmd) { - err = -ENOMEM; - goto error; - } - - err = hdev->get_data_path_id(hdev, &cmd->data_path_id); - if (err < 0) - goto error; - - cmd->vnd_len = vnd_len; - memcpy(cmd->vnd_data, vnd_data, vnd_len); - - cmd->direction = 0x00; - hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd); - - cmd->direction = 0x01; - hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd); - - err = hci_req_run(&req, config_data_path_complete); -error: - - kfree(cmd); - kfree(vnd_data); - return err; -} - static int stop_discovery(struct hci_request *req, unsigned long opt) { hci_dev_lock(req->hdev); @@ -2632,7 +3499,10 @@ int __hci_req_hci_power_on(struct hci_dev *hdev) void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->discov_update, discov_update); + INIT_WORK(&hdev->bg_scan_update, bg_scan_update); INIT_WORK(&hdev->scan_update, scan_update_work); + INIT_WORK(&hdev->connectable_update, connectable_update_work); + INIT_WORK(&hdev->discoverable_update, discoverable_update_work); INIT_DELAYED_WORK(&hdev->discov_off, discov_off); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); @@ -2642,10 +3512,13 @@ void hci_request_setup(struct hci_dev *hdev) void hci_request_cancel_all(struct hci_dev *hdev) { - __hci_cmd_sync_cancel(hdev, ENODEV); + hci_req_sync_cancel(hdev, ENODEV); cancel_work_sync(&hdev->discov_update); + cancel_work_sync(&hdev->bg_scan_update); cancel_work_sync(&hdev->scan_update); + cancel_work_sync(&hdev->connectable_update); + cancel_work_sync(&hdev->discoverable_update); cancel_delayed_work_sync(&hdev->discov_off); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 7f8df258e2..39ee8a1808 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -22,17 +22,9 @@ #include -#define HCI_REQ_DONE 0 -#define HCI_REQ_PEND 1 -#define HCI_REQ_CANCELED 2 - #define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) #define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) -#define HCI_REQ_DONE 0 -#define HCI_REQ_PEND 1 -#define HCI_REQ_CANCELED 2 - struct hci_request { struct hci_dev *hdev; struct sk_buff_head cmd_q; @@ -48,8 +40,6 @@ void hci_req_purge(struct hci_request *req); bool hci_req_status_pend(struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete); -void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, - struct sk_buff *skb); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, const void *param); void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, @@ -64,6 +54,7 @@ int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req, int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, unsigned long opt), unsigned long opt, u32 timeout, u8 *hci_status); +void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); @@ -80,6 +71,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req); void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); void hci_req_disable_address_resolution(struct hci_dev *hdev); +void __hci_req_pause_adv_instances(struct hci_request *req); +int hci_req_resume_adv_instances(struct hci_dev *hdev); void hci_req_reenable_advertising(struct hci_dev *hdev); void __hci_req_enable_advertising(struct hci_request *req); void __hci_req_disable_advertising(struct hci_request *req); @@ -108,8 +101,6 @@ void __hci_req_update_class(struct hci_request *req); /* Returns true if HCI commands were queued */ bool hci_req_stop_discovery(struct hci_request *req); -int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec); - static inline void hci_req_update_scan(struct hci_dev *hdev) { queue_work(hdev->req_workqueue, &hdev->scan_update); @@ -124,5 +115,33 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason); void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason); +static inline void hci_update_background_scan(struct hci_dev *hdev) +{ + queue_work(hdev->req_workqueue, &hdev->bg_scan_update); +} + void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); + +u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len); + +static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, + u8 *data, u8 data_len) +{ + eir[eir_len++] = sizeof(type) + data_len; + eir[eir_len++] = type; + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; + + return eir_len; +} + +static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) +{ + eir[eir_len++] = sizeof(type) + sizeof(data); + eir[eir_len++] = type; + put_unaligned_le16(data, &eir[eir_len]); + eir_len += sizeof(data); + + return eir_len; +} diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 33b3c0ffc3..f1128c2134 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -57,7 +57,6 @@ struct hci_pinfo { unsigned long flags; __u32 cookie; char comm[TASK_COMM_LEN]; - __u16 mtu; }; static struct hci_dev *hci_hdev_from_sock(struct sock *sk) @@ -889,6 +888,10 @@ static int hci_sock_release(struct socket *sock) } sock_orphan(sk); + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); + release_sock(sk); sock_put(sk); return 0; @@ -1371,10 +1374,6 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, break; } - /* Default MTU to HCI_MAX_FRAME_SIZE if not set */ - if (!hci_pi(sk)->mtu) - hci_pi(sk)->mtu = HCI_MAX_FRAME_SIZE; - sk->sk_state = BT_BOUND; done: @@ -1507,8 +1506,9 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, } static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, - struct sk_buff *skb) + struct msghdr *msg, size_t msglen) { + void *buf; u8 *cp; struct mgmt_hdr *hdr; u16 opcode, index, len; @@ -1517,31 +1517,40 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, bool var_len, no_hdev; int err; - BT_DBG("got %d bytes", skb->len); + BT_DBG("got %zu bytes", msglen); - if (skb->len < sizeof(*hdr)) + if (msglen < sizeof(*hdr)) return -EINVAL; - hdr = (void *)skb->data; + buf = kmalloc(msglen, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (memcpy_from_msg(buf, msg, msglen)) { + err = -EFAULT; + goto done; + } + + hdr = buf; opcode = __le16_to_cpu(hdr->opcode); index = __le16_to_cpu(hdr->index); len = __le16_to_cpu(hdr->len); - if (len != skb->len - sizeof(*hdr)) { + if (len != msglen - sizeof(*hdr)) { err = -EINVAL; goto done; } if (chan->channel == HCI_CHANNEL_CONTROL) { - struct sk_buff *cmd; + struct sk_buff *skb; /* Send event to monitor */ - cmd = create_monitor_ctrl_command(sk, index, opcode, len, - skb->data + sizeof(*hdr)); - if (cmd) { - hci_send_to_channel(HCI_CHANNEL_MONITOR, cmd, + skb = create_monitor_ctrl_command(sk, index, opcode, len, + buf + sizeof(*hdr)); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); - kfree_skb(cmd); + kfree_skb(skb); } } @@ -1606,25 +1615,26 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, if (hdev && chan->hdev_init) chan->hdev_init(sk, hdev); - cp = skb->data + sizeof(*hdr); + cp = buf + sizeof(*hdr); err = handler->func(sk, hdev, cp, len); if (err < 0) goto done; - err = skb->len; + err = msglen; done: if (hdev) hci_dev_put(hdev); + kfree(buf); return err; } -static int hci_logging_frame(struct sock *sk, struct sk_buff *skb, - unsigned int flags) +static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len) { struct hci_mon_hdr *hdr; + struct sk_buff *skb; struct hci_dev *hdev; u16 index; int err; @@ -1633,13 +1643,24 @@ static int hci_logging_frame(struct sock *sk, struct sk_buff *skb, * the priority byte, the ident length byte and at least one string * terminator NUL byte. Anything shorter are invalid packets. */ - if (skb->len < sizeof(*hdr) + 3) + if (len < sizeof(*hdr) + 3) return -EINVAL; + skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return err; + + if (memcpy_from_msg(skb_put(skb, len), msg, len)) { + err = -EFAULT; + goto drop; + } + hdr = (void *)skb->data; - if (__le16_to_cpu(hdr->len) != skb->len - sizeof(*hdr)) - return -EINVAL; + if (__le16_to_cpu(hdr->len) != len - sizeof(*hdr)) { + err = -EINVAL; + goto drop; + } if (__le16_to_cpu(hdr->opcode) == 0x0000) { __u8 priority = skb->data[sizeof(*hdr)]; @@ -1658,20 +1679,25 @@ static int hci_logging_frame(struct sock *sk, struct sk_buff *skb, * The message follows the ident string (if present) and * must be NUL terminated. Otherwise it is not a valid packet. */ - if (priority > 7 || skb->data[skb->len - 1] != 0x00 || - ident_len > skb->len - sizeof(*hdr) - 3 || - skb->data[sizeof(*hdr) + ident_len + 1] != 0x00) - return -EINVAL; + if (priority > 7 || skb->data[len - 1] != 0x00 || + ident_len > len - sizeof(*hdr) - 3 || + skb->data[sizeof(*hdr) + ident_len + 1] != 0x00) { + err = -EINVAL; + goto drop; + } } else { - return -EINVAL; + err = -EINVAL; + goto drop; } index = __le16_to_cpu(hdr->index); if (index != MGMT_INDEX_NONE) { hdev = hci_dev_get(index); - if (!hdev) - return -ENODEV; + if (!hdev) { + err = -ENODEV; + goto drop; + } } else { hdev = NULL; } @@ -1679,11 +1705,13 @@ static int hci_logging_frame(struct sock *sk, struct sk_buff *skb, hdr->opcode = cpu_to_le16(HCI_MON_USER_LOGGING); hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); - err = skb->len; + err = len; if (hdev) hci_dev_put(hdev); +drop: + kfree_skb(skb); return err; } @@ -1695,23 +1723,19 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, struct hci_dev *hdev; struct sk_buff *skb; int err; - const unsigned int flags = msg->msg_flags; BT_DBG("sock %p sk %p", sock, sk); - if (flags & MSG_OOB) + if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; - if (flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL | MSG_ERRQUEUE | MSG_CMSG_COMPAT)) + if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE| + MSG_CMSG_COMPAT)) return -EINVAL; - if (len < 4 || len > hci_pi(sk)->mtu) + if (len < 4 || len > HCI_MAX_FRAME_SIZE) return -EINVAL; - skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0); - if (IS_ERR(skb)) - return PTR_ERR(skb); - lock_sock(sk); switch (hci_pi(sk)->channel) { @@ -1720,30 +1744,39 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, break; case HCI_CHANNEL_MONITOR: err = -EOPNOTSUPP; - goto drop; + goto done; case HCI_CHANNEL_LOGGING: - err = hci_logging_frame(sk, skb, flags); - goto drop; + err = hci_logging_frame(sk, msg, len); + goto done; default: mutex_lock(&mgmt_chan_list_lock); chan = __hci_mgmt_chan_find(hci_pi(sk)->channel); if (chan) - err = hci_mgmt_cmd(chan, sk, skb); + err = hci_mgmt_cmd(chan, sk, msg, len); else err = -EINVAL; mutex_unlock(&mgmt_chan_list_lock); - goto drop; + goto done; } hdev = hci_hdev_from_sock(sk); if (IS_ERR(hdev)) { err = PTR_ERR(hdev); - goto drop; + goto done; } if (!test_bit(HCI_UP, &hdev->flags)) { err = -ENETDOWN; + goto done; + } + + skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + goto done; + + if (memcpy_from_msg(skb_put(skb, len), msg, len)) { + err = -EFAULT; goto drop; } @@ -1824,8 +1857,8 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto done; } -static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int len) +static int hci_sock_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int len) { struct hci_ufilter uf = { .opcode = 0 }; struct sock *sk = sock->sk; @@ -1833,6 +1866,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname, BT_DBG("sk %p, opt %d", sk, optname); + if (level != SOL_HCI) + return -ENOPROTOOPT; + lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { @@ -1907,57 +1943,8 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname, return err; } -static int hci_sock_setsockopt(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int len) -{ - struct sock *sk = sock->sk; - int err = 0; - u16 opt; - - BT_DBG("sk %p, opt %d", sk, optname); - - if (level == SOL_HCI) - return hci_sock_setsockopt_old(sock, level, optname, optval, - len); - - if (level != SOL_BLUETOOTH) - return -ENOPROTOOPT; - - lock_sock(sk); - - switch (optname) { - case BT_SNDMTU: - case BT_RCVMTU: - switch (hci_pi(sk)->channel) { - /* Don't allow changing MTU for channels that are meant for HCI - * traffic only. - */ - case HCI_CHANNEL_RAW: - case HCI_CHANNEL_USER: - err = -ENOPROTOOPT; - goto done; - } - - if (copy_from_sockptr(&opt, optval, sizeof(opt))) { - err = -EFAULT; - break; - } - - hci_pi(sk)->mtu = opt; - break; - - default: - err = -ENOPROTOOPT; - break; - } - -done: - release_sock(sk); - return err; -} - -static int hci_sock_getsockopt_old(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) +static int hci_sock_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) { struct hci_ufilter uf; struct sock *sk = sock->sk; @@ -1965,6 +1952,9 @@ static int hci_sock_getsockopt_old(struct socket *sock, int level, int optname, BT_DBG("sk %p, opt %d", sk, optname); + if (level != SOL_HCI) + return -ENOPROTOOPT; + if (get_user(len, optlen)) return -EFAULT; @@ -2022,45 +2012,6 @@ static int hci_sock_getsockopt_old(struct socket *sock, int level, int optname, return err; } -static int hci_sock_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - int err = 0; - - BT_DBG("sk %p, opt %d", sk, optname); - - if (level == SOL_HCI) - return hci_sock_getsockopt_old(sock, level, optname, optval, - optlen); - - if (level != SOL_BLUETOOTH) - return -ENOPROTOOPT; - - lock_sock(sk); - - switch (optname) { - case BT_SNDMTU: - case BT_RCVMTU: - if (put_user(hci_pi(sk)->mtu, (u16 __user *)optval)) - err = -EFAULT; - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -static void hci_sock_destruct(struct sock *sk) -{ - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); -} - static const struct proto_ops hci_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, @@ -2114,7 +2065,6 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol, sock->state = SS_UNCONNECTED; sk->sk_state = BT_OPEN; - sk->sk_destruct = hci_sock_destruct; bt_sock_link(&hci_sk_list, sk); return 0; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 5940744a8c..80848dfc01 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1305,7 +1305,7 @@ static int hidp_session_thread(void *arg) l2cap_unregister_user(session->conn, &session->user); hidp_session_put(session); - module_put_and_kthread_exit(0); + module_put_and_exit(0); return 0; } diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 369ed92dac..595fb3c9d6 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -20,7 +20,6 @@ SOFTWARE IS DISCLAIMED. */ -#include #include #include diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index e817ff0607..77ba68209d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7902,10 +7902,10 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, dst_type = ADDR_LE_DEV_RANDOM; if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - hcon = hci_connect_le(hdev, dst, dst_type, false, + hcon = hci_connect_le(hdev, dst, dst_type, chan->sec_level, HCI_LE_CONN_TIMEOUT, - HCI_ROLE_SLAVE); + HCI_ROLE_SLAVE, NULL); else hcon = hci_connect_le_scan(hdev, dst, dst_type, chan->sec_level, diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index ca8f07f354..d2c6785205 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -29,7 +29,6 @@ #include #include -#include #include #include diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 37087cf7dc..f09f0a78eb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,8 +38,6 @@ #include "mgmt_util.h" #include "mgmt_config.h" #include "msft.h" -#include "eir.h" -#include "aosp.h" #define MGMT_VERSION 1 #define MGMT_REVISION 21 @@ -277,39 +275,10 @@ static const u8 mgmt_status_table[] = { MGMT_STATUS_CONNECT_FAILED, /* MAC Connection Failed */ }; -static u8 mgmt_errno_status(int err) +static u8 mgmt_status(u8 hci_status) { - switch (err) { - case 0: - return MGMT_STATUS_SUCCESS; - case -EPERM: - return MGMT_STATUS_REJECTED; - case -EINVAL: - return MGMT_STATUS_INVALID_PARAMS; - case -EOPNOTSUPP: - return MGMT_STATUS_NOT_SUPPORTED; - case -EBUSY: - return MGMT_STATUS_BUSY; - case -ETIMEDOUT: - return MGMT_STATUS_AUTH_FAILED; - case -ENOMEM: - return MGMT_STATUS_NO_RESOURCES; - case -EISCONN: - return MGMT_STATUS_ALREADY_CONNECTED; - case -ENOTCONN: - return MGMT_STATUS_DISCONNECTED; - } - - return MGMT_STATUS_FAILED; -} - -static u8 mgmt_status(int err) -{ - if (err < 0) - return mgmt_errno_status(err); - - if (err < ARRAY_SIZE(mgmt_status_table)) - return mgmt_status_table[err]; + if (hci_status < ARRAY_SIZE(mgmt_status_table)) + return mgmt_status_table[hci_status]; return MGMT_STATUS_FAILED; } @@ -335,12 +304,6 @@ static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len, HCI_SOCK_TRUSTED, skip_sk); } -static int mgmt_event_skb(struct sk_buff *skb, struct sock *skip_sk) -{ - return mgmt_send_event_skb(HCI_CHANNEL_CONTROL, skb, HCI_SOCK_TRUSTED, - skip_sk); -} - static u8 le_addr_type(u8 mgmt_addr_type) { if (mgmt_addr_type == BDADDR_LE_PUBLIC) @@ -846,7 +809,12 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_SECURE_CONN; settings |= MGMT_SETTING_PRIVACY; settings |= MGMT_SETTING_STATIC_ADDRESS; - settings |= MGMT_SETTING_ADVERTISING; + + /* When the experimental feature for LL Privacy support is + * enabled, then advertising is no longer supported. + */ + if (!hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + settings |= MGMT_SETTING_ADVERTISING; } if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || @@ -934,6 +902,13 @@ static struct mgmt_pending_cmd *pending_find(u16 opcode, struct hci_dev *hdev) return mgmt_pending_find(HCI_CHANNEL_CONTROL, opcode, hdev); } +static struct mgmt_pending_cmd *pending_find_data(u16 opcode, + struct hci_dev *hdev, + const void *data) +{ + return mgmt_pending_find_data(HCI_CHANNEL_CONTROL, opcode, hdev, data); +} + u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; @@ -975,41 +950,32 @@ bool mgmt_get_connectable(struct hci_dev *hdev) return hci_dev_test_flag(hdev, HCI_CONNECTABLE); } -static int service_cache_sync(struct hci_dev *hdev, void *data) -{ - hci_update_eir_sync(hdev); - hci_update_class_sync(hdev); - - return 0; -} - static void service_cache_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, service_cache.work); + struct hci_request req; if (!hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) return; - hci_cmd_sync_queue(hdev, service_cache_sync, NULL, NULL); -} + hci_req_init(&req, hdev); -static int rpa_expired_sync(struct hci_dev *hdev, void *data) -{ - /* The generation of a new RPA and programming it into the - * controller happens in the hci_req_enable_advertising() - * function. - */ - if (ext_adv_capable(hdev)) - return hci_start_ext_adv_sync(hdev, hdev->cur_adv_instance); - else - return hci_enable_advertising_sync(hdev); + hci_dev_lock(hdev); + + __hci_req_update_eir(&req); + __hci_req_update_class(&req); + + hci_dev_unlock(hdev); + + hci_req_run(&req, NULL); } static void rpa_expired(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, rpa_expired.work); + struct hci_request req; bt_dev_dbg(hdev, ""); @@ -1018,7 +984,16 @@ static void rpa_expired(struct work_struct *work) if (!hci_dev_test_flag(hdev, HCI_ADVERTISING)) return; - hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL); + /* The generation of a new RPA and programming it into the + * controller happens in the hci_req_enable_advertising() + * function. + */ + hci_req_init(&req, hdev); + if (ext_adv_capable(hdev)) + __hci_req_start_ext_adv(&req, hdev->cur_adv_instance); + else + __hci_req_enable_advertising(&req); + hci_req_run(&req, NULL); } static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) @@ -1155,6 +1130,16 @@ static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) sizeof(settings)); } +static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + bt_dev_dbg(hdev, "status 0x%02x", status); + + if (hci_conn_count(hdev) == 0) { + cancel_delayed_work(&hdev->power_off); + queue_work(hdev->req_workqueue, &hdev->power_off.work); + } +} + void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance) { struct mgmt_ev_advertising_added ev; @@ -1182,77 +1167,38 @@ static void cancel_adv_timeout(struct hci_dev *hdev) } } -/* This function requires the caller holds hdev->lock */ -static void restart_le_actions(struct hci_dev *hdev) +static int clean_up_hci_state(struct hci_dev *hdev) { - struct hci_conn_params *p; + struct hci_request req; + struct hci_conn *conn; + bool discov_stopped; + int err; - list_for_each_entry(p, &hdev->le_conn_params, list) { - /* Needed for AUTO_OFF case where might not "really" - * have been powered off. - */ - list_del_init(&p->action); + hci_req_init(&req, hdev); - switch (p->auto_connect) { - case HCI_AUTO_CONN_DIRECT: - case HCI_AUTO_CONN_ALWAYS: - list_add(&p->action, &hdev->pend_le_conns); - break; - case HCI_AUTO_CONN_REPORT: - list_add(&p->action, &hdev->pend_le_reports); - break; - default: - break; - } - } -} - -static int new_settings(struct hci_dev *hdev, struct sock *skip) -{ - __le32 ev = cpu_to_le32(get_current_settings(hdev)); - - return mgmt_limited_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, - sizeof(ev), HCI_MGMT_SETTING_EVENTS, skip); -} - -static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_mode *cp = cmd->param; - - bt_dev_dbg(hdev, "err %d", err); - - if (!err) { - if (cp->val) { - hci_dev_lock(hdev); - restart_le_actions(hdev); - hci_update_passive_scan(hdev); - hci_dev_unlock(hdev); - } - - send_settings_rsp(cmd->sk, cmd->opcode, hdev); - - /* Only call new_setting for power on as power off is deferred - * to hdev->power_off work which does call hci_dev_do_close. - */ - if (cp->val) - new_settings(hdev, cmd->sk); - } else { - mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_POWERED, - mgmt_status(err)); + if (test_bit(HCI_ISCAN, &hdev->flags) || + test_bit(HCI_PSCAN, &hdev->flags)) { + u8 scan = 0x00; + hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - mgmt_pending_free(cmd); -} + hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, false); -static int set_powered_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_mode *cp = cmd->param; + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + __hci_req_disable_advertising(&req); - BT_DBG("%s", hdev->name); + discov_stopped = hci_req_stop_discovery(&req); - return hci_set_powered_sync(hdev, cp->val); + list_for_each_entry(conn, &hdev->conn_hash.list, list) { + /* 0x15 == Terminated due to Power Off */ + __hci_abort_conn(&req, conn, 0x15); + } + + err = hci_req_run(&req, clean_up_hci_complete); + if (!err && discov_stopped) + hci_discovery_set_state(hdev, DISCOVERY_STOPPING); + + return err; } static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, @@ -1281,20 +1227,43 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - cmd = mgmt_pending_new(sk, MGMT_OP_SET_POWERED, hdev, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } - err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, - mgmt_set_powered_complete); + if (cp->val) { + queue_work(hdev->req_workqueue, &hdev->power_on); + err = 0; + } else { + /* Disconnect connections, stop scans, etc */ + err = clean_up_hci_state(hdev); + if (!err) + queue_delayed_work(hdev->req_workqueue, &hdev->power_off, + HCI_POWER_OFF_TIMEOUT); + + /* ENODATA means there were no HCI commands queued */ + if (err == -ENODATA) { + cancel_delayed_work(&hdev->power_off); + queue_work(hdev->req_workqueue, &hdev->power_off.work); + err = 0; + } + } failed: hci_dev_unlock(hdev); return err; } +static int new_settings(struct hci_dev *hdev, struct sock *skip) +{ + __le32 ev = cpu_to_le32(get_current_settings(hdev)); + + return mgmt_limited_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, + sizeof(ev), HCI_MGMT_SETTING_EVENTS, skip); +} + int mgmt_new_settings(struct hci_dev *hdev) { return new_settings(hdev, NULL); @@ -1376,20 +1345,23 @@ static u8 mgmt_le_support(struct hci_dev *hdev) return MGMT_STATUS_SUCCESS; } -static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, - int err) +void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status) { - struct mgmt_pending_cmd *cmd = data; + struct mgmt_pending_cmd *cmd; - bt_dev_dbg(hdev, "err %d", err); + bt_dev_dbg(hdev, "status 0x%02x", status); hci_dev_lock(hdev); - if (err) { - u8 mgmt_err = mgmt_status(err); + cmd = pending_find(MGMT_OP_SET_DISCOVERABLE, hdev); + if (!cmd) + goto unlock; + + if (status) { + u8 mgmt_err = mgmt_status(status); mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - goto done; + goto remove_cmd; } if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE) && @@ -1401,18 +1373,13 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, send_settings_rsp(cmd->sk, MGMT_OP_SET_DISCOVERABLE, hdev); new_settings(hdev, cmd->sk); -done: - mgmt_pending_free(cmd); +remove_cmd: + mgmt_pending_remove(cmd); + +unlock: hci_dev_unlock(hdev); } -static int set_discoverable_sync(struct hci_dev *hdev, void *data) -{ - BT_DBG("%s", hdev->name); - - return hci_update_discoverable_sync(hdev); -} - static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -1511,7 +1478,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - cmd = mgmt_pending_new(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -1535,34 +1502,39 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, else hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - err = hci_cmd_sync_queue(hdev, set_discoverable_sync, cmd, - mgmt_set_discoverable_complete); + queue_work(hdev->req_workqueue, &hdev->discoverable_update); + err = 0; failed: hci_dev_unlock(hdev); return err; } -static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data, - int err) +void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status) { - struct mgmt_pending_cmd *cmd = data; + struct mgmt_pending_cmd *cmd; - bt_dev_dbg(hdev, "err %d", err); + bt_dev_dbg(hdev, "status 0x%02x", status); hci_dev_lock(hdev); - if (err) { - u8 mgmt_err = mgmt_status(err); + cmd = pending_find(MGMT_OP_SET_CONNECTABLE, hdev); + if (!cmd) + goto unlock; + + if (status) { + u8 mgmt_err = mgmt_status(status); mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); - goto done; + goto remove_cmd; } send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev); new_settings(hdev, cmd->sk); -done: - mgmt_pending_free(cmd); +remove_cmd: + mgmt_pending_remove(cmd); + +unlock: hci_dev_unlock(hdev); } @@ -1588,20 +1560,13 @@ static int set_connectable_update_settings(struct hci_dev *hdev, if (changed) { hci_req_update_scan(hdev); - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); return new_settings(hdev, sk); } return 0; } -static int set_connectable_sync(struct hci_dev *hdev, void *data) -{ - BT_DBG("%s", hdev->name); - - return hci_update_connectable_sync(hdev); -} - static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -1634,7 +1599,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - cmd = mgmt_pending_new(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -1651,8 +1616,8 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_CONNECTABLE); } - err = hci_cmd_sync_queue(hdev, set_connectable_sync, cmd, - mgmt_set_connectable_complete); + queue_work(hdev->req_workqueue, &hdev->connectable_update); + err = 0; failed: hci_dev_unlock(hdev); @@ -1687,7 +1652,12 @@ static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data, /* In limited privacy mode the change of bondable mode * may affect the local advertising address. */ - hci_update_discoverable(hdev); + if (hdev_is_powered(hdev) && + hci_dev_test_flag(hdev, HCI_ADVERTISING) && + hci_dev_test_flag(hdev, HCI_DISCOVERABLE) && + hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY)) + queue_work(hdev->req_workqueue, + &hdev->discoverable_update); err = new_settings(hdev, sk); } @@ -1766,69 +1736,6 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, return err; } -static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) -{ - struct cmd_lookup match = { NULL, hdev }; - struct mgmt_pending_cmd *cmd = data; - struct mgmt_mode *cp = cmd->param; - u8 enable = cp->val; - bool changed; - - if (err) { - u8 mgmt_err = mgmt_status(err); - - if (enable && hci_dev_test_and_clear_flag(hdev, - HCI_SSP_ENABLED)) { - hci_dev_clear_flag(hdev, HCI_HS_ENABLED); - new_settings(hdev, NULL); - } - - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp, - &mgmt_err); - return; - } - - if (enable) { - changed = !hci_dev_test_and_set_flag(hdev, HCI_SSP_ENABLED); - } else { - changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); - - if (!changed) - changed = hci_dev_test_and_clear_flag(hdev, - HCI_HS_ENABLED); - else - hci_dev_clear_flag(hdev, HCI_HS_ENABLED); - } - - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match); - - if (changed) - new_settings(hdev, match.sk); - - if (match.sk) - sock_put(match.sk); - - hci_update_eir_sync(hdev); -} - -static int set_ssp_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_mode *cp = cmd->param; - bool changed = false; - int err; - - if (cp->val) - changed = !hci_dev_test_and_set_flag(hdev, HCI_SSP_ENABLED); - - err = hci_write_ssp_mode_sync(hdev, cp->val); - - if (!err && changed) - hci_dev_clear_flag(hdev, HCI_SSP_ENABLED); - - return err; -} - static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; @@ -1890,18 +1797,19 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) } cmd = mgmt_pending_add(sk, MGMT_OP_SET_SSP, hdev, data, len); - if (!cmd) + if (!cmd) { err = -ENOMEM; - else - err = hci_cmd_sync_queue(hdev, set_ssp_sync, cmd, - set_ssp_complete); + goto failed; + } + if (!cp->val && hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) + hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE, + sizeof(cp->val), &cp->val); + + err = hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &cp->val); if (err < 0) { - err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, - MGMT_STATUS_FAILED); - - if (cmd) - mgmt_pending_remove(cmd); + mgmt_pending_remove(cmd); + goto failed; } failed: @@ -1970,17 +1878,18 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) return err; } -static void set_le_complete(struct hci_dev *hdev, void *data, int err) +static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct cmd_lookup match = { NULL, hdev }; - u8 status = mgmt_status(err); - bt_dev_dbg(hdev, "err %d", err); + hci_dev_lock(hdev); if (status) { + u8 mgmt_err = mgmt_status(status); + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp, - &status); - return; + &mgmt_err); + goto unlock; } mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match); @@ -1989,54 +1898,39 @@ static void set_le_complete(struct hci_dev *hdev, void *data, int err) if (match.sk) sock_put(match.sk); -} - -static int set_le_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_mode *cp = cmd->param; - u8 val = !!cp->val; - int err; - - if (!val) { - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - hci_disable_advertising_sync(hdev); - - if (ext_adv_capable(hdev)) - hci_remove_ext_adv_instance_sync(hdev, 0, cmd->sk); - } else { - hci_dev_set_flag(hdev, HCI_LE_ENABLED); - } - - err = hci_write_le_host_supported_sync(hdev, val, 0); /* Make sure the controller has a good default for * advertising data. Restrict the update to when LE * has actually been enabled. During power on, the * update in powered_update_hci will take care of it. */ - if (!err && hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { + struct hci_request req; + hci_req_init(&req, hdev); if (ext_adv_capable(hdev)) { - int status; + int err; - status = hci_setup_ext_adv_instance_sync(hdev, 0x00); - if (!status) - hci_update_scan_rsp_data_sync(hdev, 0x00); + err = __hci_req_setup_ext_adv_instance(&req, 0x00); + if (!err) + __hci_req_update_scan_rsp_data(&req, 0x00); } else { - hci_update_adv_data_sync(hdev, 0x00); - hci_update_scan_rsp_data_sync(hdev, 0x00); + __hci_req_update_adv_data(&req, 0x00); + __hci_req_update_scan_rsp_data(&req, 0x00); } - - hci_update_passive_scan(hdev); + hci_req_run(&req, NULL); + hci_update_background_scan(hdev); } - return err; +unlock: + hci_dev_unlock(hdev); } static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; + struct hci_cp_write_le_host_supported hci_cp; struct mgmt_pending_cmd *cmd; + struct hci_request req; int err; u8 val, enabled; @@ -2106,20 +2000,33 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) } cmd = mgmt_pending_add(sk, MGMT_OP_SET_LE, hdev, data, len); - if (!cmd) + if (!cmd) { err = -ENOMEM; - else - err = hci_cmd_sync_queue(hdev, set_le_sync, cmd, - set_le_complete); - - if (err < 0) { - err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_FAILED); - - if (cmd) - mgmt_pending_remove(cmd); + goto unlock; } + hci_req_init(&req, hdev); + + memset(&hci_cp, 0, sizeof(hci_cp)); + + if (val) { + hci_cp.le = val; + hci_cp.simul = 0x00; + } else { + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + __hci_req_disable_advertising(&req); + + if (ext_adv_capable(hdev)) + __hci_req_clear_ext_adv_sets(&req); + } + + hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(hci_cp), + &hci_cp); + + err = hci_req_run(&req, le_enable_complete); + if (err < 0) + mgmt_pending_remove(cmd); + unlock: hci_dev_unlock(hdev); return err; @@ -2167,33 +2074,37 @@ static u8 get_uuid_size(const u8 *uuid) return 16; } -static void mgmt_class_complete(struct hci_dev *hdev, void *data, int err) +static void mgmt_class_complete(struct hci_dev *hdev, u16 mgmt_op, u8 status) { - struct mgmt_pending_cmd *cmd = data; + struct mgmt_pending_cmd *cmd; - bt_dev_dbg(hdev, "err %d", err); + hci_dev_lock(hdev); + + cmd = pending_find(mgmt_op, hdev); + if (!cmd) + goto unlock; mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(err), hdev->dev_class, 3); + mgmt_status(status), hdev->dev_class, 3); - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); } -static int add_uuid_sync(struct hci_dev *hdev, void *data) +static void add_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - int err; + bt_dev_dbg(hdev, "status 0x%02x", status); - err = hci_update_class_sync(hdev); - if (err) - return err; - - return hci_update_eir_sync(hdev); + mgmt_class_complete(hdev, MGMT_OP_ADD_UUID, status); } static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_uuid *cp = data; struct mgmt_pending_cmd *cmd; + struct hci_request req; struct bt_uuid *uuid; int err; @@ -2219,17 +2130,28 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) list_add_tail(&uuid->list, &hdev->uuids); - cmd = mgmt_pending_new(sk, MGMT_OP_ADD_UUID, hdev, data, len); + hci_req_init(&req, hdev); + + __hci_req_update_class(&req); + __hci_req_update_eir(&req); + + err = hci_req_run(&req, add_uuid_complete); + if (err < 0) { + if (err != -ENODATA) + goto failed; + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0, + hdev->dev_class, 3); + goto failed; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_ADD_UUID, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } - err = hci_cmd_sync_queue(hdev, add_uuid_sync, cmd, mgmt_class_complete); - if (err < 0) { - mgmt_pending_free(cmd); - goto failed; - } + err = 0; failed: hci_dev_unlock(hdev); @@ -2250,15 +2172,11 @@ static bool enable_service_cache(struct hci_dev *hdev) return false; } -static int remove_uuid_sync(struct hci_dev *hdev, void *data) +static void remove_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - int err; + bt_dev_dbg(hdev, "status 0x%02x", status); - err = hci_update_class_sync(hdev); - if (err) - return err; - - return hci_update_eir_sync(hdev); + mgmt_class_complete(hdev, MGMT_OP_REMOVE_UUID, status); } static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, @@ -2268,6 +2186,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_pending_cmd *cmd; struct bt_uuid *match, *tmp; u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + struct hci_request req; int err, found; bt_dev_dbg(hdev, "sock %p", sk); @@ -2311,35 +2230,39 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, } update_class: - cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_UUID, hdev, data, len); + hci_req_init(&req, hdev); + + __hci_req_update_class(&req); + __hci_req_update_eir(&req); + + err = hci_req_run(&req, remove_uuid_complete); + if (err < 0) { + if (err != -ENODATA) + goto unlock; + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0, + hdev->dev_class, 3); + goto unlock; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_UUID, hdev, data, len); if (!cmd) { err = -ENOMEM; goto unlock; } - err = hci_cmd_sync_queue(hdev, remove_uuid_sync, cmd, - mgmt_class_complete); - if (err < 0) - mgmt_pending_free(cmd); + err = 0; unlock: hci_dev_unlock(hdev); return err; } -static int set_class_sync(struct hci_dev *hdev, void *data) +static void set_class_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - int err = 0; + bt_dev_dbg(hdev, "status 0x%02x", status); - if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) { - cancel_delayed_work_sync(&hdev->service_cache); - err = hci_update_eir_sync(hdev); - } - - if (err) - return err; - - return hci_update_class_sync(hdev); + mgmt_class_complete(hdev, MGMT_OP_SET_DEV_CLASS, status); } static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, @@ -2347,6 +2270,7 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_set_dev_class *cp = data; struct mgmt_pending_cmd *cmd; + struct hci_request req; int err; bt_dev_dbg(hdev, "sock %p", sk); @@ -2378,16 +2302,34 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - cmd = mgmt_pending_new(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len); + hci_req_init(&req, hdev); + + if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) { + hci_dev_unlock(hdev); + cancel_delayed_work_sync(&hdev->service_cache); + hci_dev_lock(hdev); + __hci_req_update_eir(&req); + } + + __hci_req_update_class(&req); + + err = hci_req_run(&req, set_class_complete); + if (err < 0) { + if (err != -ENODATA) + goto unlock; + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, + hdev->dev_class, 3); + goto unlock; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len); if (!cmd) { err = -ENOMEM; goto unlock; } - err = hci_cmd_sync_queue(hdev, set_class_sync, cmd, - mgmt_class_complete); - if (err < 0) - mgmt_pending_free(cmd); + err = 0; unlock: hci_dev_unlock(hdev); @@ -3285,70 +3227,65 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev, HCI_OP_USER_PASSKEY_NEG_REPLY, 0); } -static int adv_expire_sync(struct hci_dev *hdev, u32 flags) +static void adv_expire(struct hci_dev *hdev, u32 flags) { struct adv_info *adv_instance; + struct hci_request req; + int err; adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance); if (!adv_instance) - return 0; + return; /* stop if current instance doesn't need to be changed */ if (!(adv_instance->flags & flags)) - return 0; + return; cancel_adv_timeout(hdev); adv_instance = hci_get_next_instance(hdev, adv_instance->instance); if (!adv_instance) - return 0; + return; - hci_schedule_adv_instance_sync(hdev, adv_instance->instance, true); + hci_req_init(&req, hdev); + err = __hci_req_schedule_adv_instance(&req, adv_instance->instance, + true); + if (err) + return; - return 0; + hci_req_run(&req, NULL); } -static int name_changed_sync(struct hci_dev *hdev, void *data) +static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - return adv_expire_sync(hdev, MGMT_ADV_FLAG_LOCAL_NAME); -} + struct mgmt_cp_set_local_name *cp; + struct mgmt_pending_cmd *cmd; -static void set_name_complete(struct hci_dev *hdev, void *data, int err) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_set_local_name *cp = cmd->param; - u8 status = mgmt_status(err); + bt_dev_dbg(hdev, "status 0x%02x", status); - bt_dev_dbg(hdev, "err %d", err); + hci_dev_lock(hdev); + + cmd = pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); + if (!cmd) + goto unlock; + + cp = cmd->param; if (status) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, - status); + mgmt_status(status)); } else { mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, cp, sizeof(*cp)); if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - hci_cmd_sync_queue(hdev, name_changed_sync, NULL, NULL); + adv_expire(hdev, MGMT_ADV_FLAG_LOCAL_NAME); } mgmt_pending_remove(cmd); -} -static int set_name_sync(struct hci_dev *hdev, void *data) -{ - if (lmp_bredr_capable(hdev)) { - hci_update_name_sync(hdev); - hci_update_eir_sync(hdev); - } - - /* The name is stored in the scan response data and so - * no need to update the advertising data here. - */ - if (lmp_le_capable(hdev) && hci_dev_test_flag(hdev, HCI_ADVERTISING)) - hci_update_scan_rsp_data_sync(hdev, hdev->cur_adv_instance); - - return 0; +unlock: + hci_dev_unlock(hdev); } static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, @@ -3356,6 +3293,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_set_local_name *cp = data; struct mgmt_pending_cmd *cmd; + struct hci_request req; int err; bt_dev_dbg(hdev, "sock %p", sk); @@ -3391,34 +3329,35 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, } cmd = mgmt_pending_add(sk, MGMT_OP_SET_LOCAL_NAME, hdev, data, len); - if (!cmd) + if (!cmd) { err = -ENOMEM; - else - err = hci_cmd_sync_queue(hdev, set_name_sync, cmd, - set_name_complete); - - if (err < 0) { - err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, - MGMT_STATUS_FAILED); - - if (cmd) - mgmt_pending_remove(cmd); - goto failed; } memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name)); + hci_req_init(&req, hdev); + + if (lmp_bredr_capable(hdev)) { + __hci_req_update_name(&req); + __hci_req_update_eir(&req); + } + + /* The name is stored in the scan response data and so + * no need to update the advertising data here. + */ + if (lmp_le_capable(hdev) && hci_dev_test_flag(hdev, HCI_ADVERTISING)) + __hci_req_update_scan_rsp_data(&req, hdev->cur_adv_instance); + + err = hci_req_run(&req, set_name_complete); + if (err < 0) + mgmt_pending_remove(cmd); + failed: hci_dev_unlock(hdev); return err; } -static int appearance_changed_sync(struct hci_dev *hdev, void *data) -{ - return adv_expire_sync(hdev, MGMT_ADV_FLAG_APPEARANCE); -} - static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -3440,8 +3379,7 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, hdev->appearance = appearance; if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - hci_cmd_sync_queue(hdev, appearance_changed_sync, NULL, - NULL); + adv_expire(hdev, MGMT_ADV_FLAG_APPEARANCE); ext_info_changed(hdev, sk); } @@ -3487,26 +3425,23 @@ int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip) sizeof(ev), skip); } -static void set_default_phy_complete(struct hci_dev *hdev, void *data, int err) +static void set_default_phy_complete(struct hci_dev *hdev, u8 status, + u16 opcode, struct sk_buff *skb) { - struct mgmt_pending_cmd *cmd = data; - struct sk_buff *skb = cmd->skb; - u8 status = mgmt_status(err); + struct mgmt_pending_cmd *cmd; - if (!status) { - if (!skb) - status = MGMT_STATUS_FAILED; - else if (IS_ERR(skb)) - status = mgmt_status(PTR_ERR(skb)); - else - status = mgmt_status(skb->data[0]); - } + bt_dev_dbg(hdev, "status 0x%02x", status); - bt_dev_dbg(hdev, "status %d", status); + hci_dev_lock(hdev); + + cmd = pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev); + if (!cmd) + goto unlock; if (status) { mgmt_cmd_status(cmd->sk, hdev->id, - MGMT_OP_SET_PHY_CONFIGURATION, status); + MGMT_OP_SET_PHY_CONFIGURATION, + mgmt_status(status)); } else { mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_PHY_CONFIGURATION, 0, @@ -3515,56 +3450,19 @@ static void set_default_phy_complete(struct hci_dev *hdev, void *data, int err) mgmt_phy_configuration_changed(hdev, cmd->sk); } - if (skb && !IS_ERR(skb)) - kfree_skb(skb); - mgmt_pending_remove(cmd); -} -static int set_default_phy_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_set_phy_configuration *cp = cmd->param; - struct hci_cp_le_set_default_phy cp_phy; - u32 selected_phys = __le32_to_cpu(cp->selected_phys); - - memset(&cp_phy, 0, sizeof(cp_phy)); - - if (!(selected_phys & MGMT_PHY_LE_TX_MASK)) - cp_phy.all_phys |= 0x01; - - if (!(selected_phys & MGMT_PHY_LE_RX_MASK)) - cp_phy.all_phys |= 0x02; - - if (selected_phys & MGMT_PHY_LE_1M_TX) - cp_phy.tx_phys |= HCI_LE_SET_PHY_1M; - - if (selected_phys & MGMT_PHY_LE_2M_TX) - cp_phy.tx_phys |= HCI_LE_SET_PHY_2M; - - if (selected_phys & MGMT_PHY_LE_CODED_TX) - cp_phy.tx_phys |= HCI_LE_SET_PHY_CODED; - - if (selected_phys & MGMT_PHY_LE_1M_RX) - cp_phy.rx_phys |= HCI_LE_SET_PHY_1M; - - if (selected_phys & MGMT_PHY_LE_2M_RX) - cp_phy.rx_phys |= HCI_LE_SET_PHY_2M; - - if (selected_phys & MGMT_PHY_LE_CODED_RX) - cp_phy.rx_phys |= HCI_LE_SET_PHY_CODED; - - cmd->skb = __hci_cmd_sync(hdev, HCI_OP_LE_SET_DEFAULT_PHY, - sizeof(cp_phy), &cp_phy, HCI_CMD_TIMEOUT); - - return 0; +unlock: + hci_dev_unlock(hdev); } static int set_phy_configuration(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_phy_configuration *cp = data; + struct hci_cp_le_set_default_phy cp_phy; struct mgmt_pending_cmd *cmd; + struct hci_request req; u32 selected_phys, configurable_phys, supported_phys, unconfigure_phys; u16 pkt_type = (HCI_DH1 | HCI_DM1); bool changed = false; @@ -3668,21 +3566,45 @@ static int set_phy_configuration(struct sock *sk, struct hci_dev *hdev, cmd = mgmt_pending_add(sk, MGMT_OP_SET_PHY_CONFIGURATION, hdev, data, len); - if (!cmd) + if (!cmd) { err = -ENOMEM; - else - err = hci_cmd_sync_queue(hdev, set_default_phy_sync, cmd, - set_default_phy_complete); - - if (err < 0) { - err = mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_PHY_CONFIGURATION, - MGMT_STATUS_FAILED); - - if (cmd) - mgmt_pending_remove(cmd); + goto unlock; } + hci_req_init(&req, hdev); + + memset(&cp_phy, 0, sizeof(cp_phy)); + + if (!(selected_phys & MGMT_PHY_LE_TX_MASK)) + cp_phy.all_phys |= 0x01; + + if (!(selected_phys & MGMT_PHY_LE_RX_MASK)) + cp_phy.all_phys |= 0x02; + + if (selected_phys & MGMT_PHY_LE_1M_TX) + cp_phy.tx_phys |= HCI_LE_SET_PHY_1M; + + if (selected_phys & MGMT_PHY_LE_2M_TX) + cp_phy.tx_phys |= HCI_LE_SET_PHY_2M; + + if (selected_phys & MGMT_PHY_LE_CODED_TX) + cp_phy.tx_phys |= HCI_LE_SET_PHY_CODED; + + if (selected_phys & MGMT_PHY_LE_1M_RX) + cp_phy.rx_phys |= HCI_LE_SET_PHY_1M; + + if (selected_phys & MGMT_PHY_LE_2M_RX) + cp_phy.rx_phys |= HCI_LE_SET_PHY_2M; + + if (selected_phys & MGMT_PHY_LE_CODED_RX) + cp_phy.rx_phys |= HCI_LE_SET_PHY_CODED; + + hci_req_add(&req, HCI_OP_LE_SET_DEFAULT_PHY, sizeof(cp_phy), &cp_phy); + + err = hci_req_run_skb(&req, set_default_phy_complete); + if (err < 0) + mgmt_pending_remove(cmd); + unlock: hci_dev_unlock(hdev); @@ -3869,20 +3791,8 @@ static const u8 debug_uuid[16] = { }; #endif -/* 330859bc-7506-492d-9370-9a6f0614037f */ -static const u8 quality_report_uuid[16] = { - 0x7f, 0x03, 0x14, 0x06, 0x6f, 0x9a, 0x70, 0x93, - 0x2d, 0x49, 0x06, 0x75, 0xbc, 0x59, 0x08, 0x33, -}; - -/* a6695ace-ee7f-4fb9-881a-5fac66c629af */ -static const u8 offload_codecs_uuid[16] = { - 0xaf, 0x29, 0xc6, 0x66, 0xac, 0x5f, 0x1a, 0x88, - 0xb9, 0x4f, 0x7f, 0xee, 0xce, 0x5a, 0x69, 0xa6, -}; - /* 671b10b5-42c0-4696-9227-eb28d1b049d6 */ -static const u8 le_simultaneous_roles_uuid[16] = { +static const u8 simult_central_periph_uuid[16] = { 0xd6, 0x49, 0xb0, 0xd1, 0x28, 0xeb, 0x27, 0x92, 0x96, 0x46, 0xc0, 0x42, 0xb5, 0x10, 0x1b, 0x67, }; @@ -3896,7 +3806,7 @@ static const u8 rpa_resolution_uuid[16] = { static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - char buf[102]; /* Enough space for 5 features: 2 + 20 * 5 */ + char buf[62]; /* Enough space for 3 features */ struct mgmt_rp_read_exp_features_info *rp = (void *)buf; u16 idx = 0; u32 flags; @@ -3915,18 +3825,21 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, } #endif - if (hdev && hci_dev_le_state_simultaneous(hdev)) { - if (hci_dev_test_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES)) + if (hdev) { + if (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && + (hdev->le_states[4] & 0x08) && /* Central */ + (hdev->le_states[4] & 0x40) && /* Peripheral */ + (hdev->le_states[3] & 0x10)) /* Simultaneous */ flags = BIT(0); else flags = 0; - memcpy(rp->features[idx].uuid, le_simultaneous_roles_uuid, 16); + memcpy(rp->features[idx].uuid, simult_central_periph_uuid, 16); rp->features[idx].flags = cpu_to_le32(flags); idx++; } - if (hdev && ll_privacy_capable(hdev)) { + if (hdev && use_ll_privacy(hdev)) { if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) flags = BIT(0) | BIT(1); else @@ -3937,29 +3850,6 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, idx++; } - if (hdev && (aosp_has_quality_report(hdev) || - hdev->set_quality_report)) { - if (hci_dev_test_flag(hdev, HCI_QUALITY_REPORT)) - flags = BIT(0); - else - flags = 0; - - memcpy(rp->features[idx].uuid, quality_report_uuid, 16); - rp->features[idx].flags = cpu_to_le32(flags); - idx++; - } - - if (hdev && hdev->get_data_path_id) { - if (hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)) - flags = BIT(0); - else - flags = 0; - - memcpy(rp->features[idx].uuid, offload_codecs_uuid, 16); - rp->features[idx].flags = cpu_to_le32(flags); - idx++; - } - rp->feature_count = cpu_to_le16(idx); /* After reading the experimental features information, enable @@ -3981,30 +3871,26 @@ static int exp_ll_privacy_feature_changed(bool enabled, struct hci_dev *hdev, memcpy(ev.uuid, rpa_resolution_uuid, 16); ev.flags = cpu_to_le32((enabled ? BIT(0) : 0) | BIT(1)); - if (enabled && privacy_mode_capable(hdev)) - set_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, hdev->conn_flags); - else - clear_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, hdev->conn_flags); - return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, hdev, &ev, sizeof(ev), HCI_MGMT_EXP_FEATURE_EVENTS, skip); } -static int exp_feature_changed(struct hci_dev *hdev, const u8 *uuid, - bool enabled, struct sock *skip) +#ifdef CONFIG_BT_FEATURE_DEBUG +static int exp_debug_feature_changed(bool enabled, struct sock *skip) { struct mgmt_ev_exp_feature_changed ev; memset(&ev, 0, sizeof(ev)); - memcpy(ev.uuid, uuid, 16); + memcpy(ev.uuid, debug_uuid, 16); ev.flags = cpu_to_le32(enabled ? BIT(0) : 0); - return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, hdev, + return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, NULL, &ev, sizeof(ev), HCI_MGMT_EXP_FEATURE_EVENTS, skip); } +#endif #define EXP_FEAT(_uuid, _set_func) \ { \ @@ -4028,7 +3914,7 @@ static int set_zero_key_func(struct sock *sk, struct hci_dev *hdev, bt_dbg_set(false); if (changed) - exp_feature_changed(NULL, ZERO_KEY, false, sk); + exp_debug_feature_changed(false, sk); } #endif @@ -4038,8 +3924,7 @@ static int set_zero_key_func(struct sock *sk, struct hci_dev *hdev, changed = hci_dev_test_and_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY); if (changed) - exp_feature_changed(hdev, rpa_resolution_uuid, false, - sk); + exp_ll_privacy_feature_changed(false, hdev, sk); } hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); @@ -4090,7 +3975,7 @@ static int set_debug_func(struct sock *sk, struct hci_dev *hdev, &rp, sizeof(rp)); if (changed) - exp_feature_changed(hdev, debug_uuid, val, sk); + exp_debug_feature_changed(val, sk); return err; } @@ -4161,196 +4046,6 @@ static int set_rpa_resolution_func(struct sock *sk, struct hci_dev *hdev, return err; } -static int set_quality_report_func(struct sock *sk, struct hci_dev *hdev, - struct mgmt_cp_set_exp_feature *cp, - u16 data_len) -{ - struct mgmt_rp_set_exp_feature rp; - bool val, changed; - int err; - - /* Command requires to use a valid controller index */ - if (!hdev) - return mgmt_cmd_status(sk, MGMT_INDEX_NONE, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_INDEX); - - /* Parameters are limited to a single octet */ - if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_PARAMS); - - /* Only boolean on/off is supported */ - if (cp->param[0] != 0x00 && cp->param[0] != 0x01) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_PARAMS); - - hci_req_sync_lock(hdev); - - val = !!cp->param[0]; - changed = (val != hci_dev_test_flag(hdev, HCI_QUALITY_REPORT)); - - if (!aosp_has_quality_report(hdev) && !hdev->set_quality_report) { - err = mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_NOT_SUPPORTED); - goto unlock_quality_report; - } - - if (changed) { - if (hdev->set_quality_report) - err = hdev->set_quality_report(hdev, val); - else - err = aosp_set_quality_report(hdev, val); - - if (err) { - err = mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_FAILED); - goto unlock_quality_report; - } - - if (val) - hci_dev_set_flag(hdev, HCI_QUALITY_REPORT); - else - hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); - } - - bt_dev_dbg(hdev, "quality report enable %d changed %d", val, changed); - - memcpy(rp.uuid, quality_report_uuid, 16); - rp.flags = cpu_to_le32(val ? BIT(0) : 0); - hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); - - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, 0, - &rp, sizeof(rp)); - - if (changed) - exp_feature_changed(hdev, quality_report_uuid, val, sk); - -unlock_quality_report: - hci_req_sync_unlock(hdev); - return err; -} - -static int set_offload_codec_func(struct sock *sk, struct hci_dev *hdev, - struct mgmt_cp_set_exp_feature *cp, - u16 data_len) -{ - bool val, changed; - int err; - struct mgmt_rp_set_exp_feature rp; - - /* Command requires to use a valid controller index */ - if (!hdev) - return mgmt_cmd_status(sk, MGMT_INDEX_NONE, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_INDEX); - - /* Parameters are limited to a single octet */ - if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_PARAMS); - - /* Only boolean on/off is supported */ - if (cp->param[0] != 0x00 && cp->param[0] != 0x01) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_PARAMS); - - val = !!cp->param[0]; - changed = (val != hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)); - - if (!hdev->get_data_path_id) { - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_NOT_SUPPORTED); - } - - if (changed) { - if (val) - hci_dev_set_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED); - else - hci_dev_clear_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED); - } - - bt_dev_info(hdev, "offload codecs enable %d changed %d", - val, changed); - - memcpy(rp.uuid, offload_codecs_uuid, 16); - rp.flags = cpu_to_le32(val ? BIT(0) : 0); - hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); - err = mgmt_cmd_complete(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, 0, - &rp, sizeof(rp)); - - if (changed) - exp_feature_changed(hdev, offload_codecs_uuid, val, sk); - - return err; -} - -static int set_le_simultaneous_roles_func(struct sock *sk, struct hci_dev *hdev, - struct mgmt_cp_set_exp_feature *cp, - u16 data_len) -{ - bool val, changed; - int err; - struct mgmt_rp_set_exp_feature rp; - - /* Command requires to use a valid controller index */ - if (!hdev) - return mgmt_cmd_status(sk, MGMT_INDEX_NONE, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_INDEX); - - /* Parameters are limited to a single octet */ - if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_PARAMS); - - /* Only boolean on/off is supported */ - if (cp->param[0] != 0x00 && cp->param[0] != 0x01) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_PARAMS); - - val = !!cp->param[0]; - changed = (val != hci_dev_test_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES)); - - if (!hci_dev_le_state_simultaneous(hdev)) { - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_NOT_SUPPORTED); - } - - if (changed) { - if (val) - hci_dev_set_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES); - else - hci_dev_clear_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES); - } - - bt_dev_info(hdev, "LE simultaneous roles enable %d changed %d", - val, changed); - - memcpy(rp.uuid, le_simultaneous_roles_uuid, 16); - rp.flags = cpu_to_le32(val ? BIT(0) : 0); - hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); - err = mgmt_cmd_complete(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, 0, - &rp, sizeof(rp)); - - if (changed) - exp_feature_changed(hdev, le_simultaneous_roles_uuid, val, sk); - - return err; -} - static const struct mgmt_exp_feature { const u8 *uuid; int (*set_func)(struct sock *sk, struct hci_dev *hdev, @@ -4361,9 +4056,6 @@ static const struct mgmt_exp_feature { EXP_FEAT(debug_uuid, set_debug_func), #endif EXP_FEAT(rpa_resolution_uuid, set_rpa_resolution_func), - EXP_FEAT(quality_report_uuid, set_quality_report_func), - EXP_FEAT(offload_codecs_uuid, set_offload_codec_func), - EXP_FEAT(le_simultaneous_roles_uuid, set_le_simultaneous_roles_func), /* end with a null feature */ EXP_FEAT(NULL, NULL) @@ -4387,6 +4079,8 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_NOT_SUPPORTED); } +#define SUPPORTED_DEVICE_FLAGS() ((1U << HCI_CONN_FLAG_MAX) - 1) + static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -4394,7 +4088,7 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_rp_get_device_flags rp; struct bdaddr_list_with_flags *br_params; struct hci_conn_params *params; - u32 supported_flags; + u32 supported_flags = SUPPORTED_DEVICE_FLAGS(); u32 current_flags = 0; u8 status = MGMT_STATUS_INVALID_PARAMS; @@ -4403,9 +4097,6 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); - bitmap_to_arr32(&supported_flags, hdev->conn_flags, - __HCI_CONN_NUM_FLAGS); - memset(&rp, 0, sizeof(rp)); if (cp->addr.type == BDADDR_BREDR) { @@ -4415,8 +4106,7 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, if (!br_params) goto done; - bitmap_to_arr32(¤t_flags, br_params->flags, - __HCI_CONN_NUM_FLAGS); + current_flags = br_params->current_flags; } else { params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, le_addr_type(cp->addr.type)); @@ -4424,8 +4114,7 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, if (!params) goto done; - bitmap_to_arr32(¤t_flags, params->flags, - __HCI_CONN_NUM_FLAGS); + current_flags = params->current_flags; } bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); @@ -4463,16 +4152,13 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, struct bdaddr_list_with_flags *br_params; struct hci_conn_params *params; u8 status = MGMT_STATUS_INVALID_PARAMS; - u32 supported_flags; + u32 supported_flags = SUPPORTED_DEVICE_FLAGS(); u32 current_flags = __le32_to_cpu(cp->current_flags); bt_dev_dbg(hdev, "Set device flags %pMR (type 0x%x) = 0x%x", &cp->addr.bdaddr, cp->addr.type, __le32_to_cpu(current_flags)); - bitmap_to_arr32(&supported_flags, hdev->conn_flags, - __HCI_CONN_NUM_FLAGS); - if ((supported_flags | current_flags) != supported_flags) { bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)", current_flags, supported_flags); @@ -4487,7 +4173,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, cp->addr.type); if (br_params) { - bitmap_from_u64(br_params->flags, current_flags); + br_params->current_flags = current_flags; status = MGMT_STATUS_SUCCESS; } else { bt_dev_warn(hdev, "No such BR/EDR device %pMR (0x%x)", @@ -4497,15 +4183,8 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, le_addr_type(cp->addr.type)); if (params) { - bitmap_from_u64(params->flags, current_flags); + params->current_flags = current_flags; status = MGMT_STATUS_SUCCESS; - - /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY - * has been set. - */ - if (test_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, - params->flags)) - hci_update_passive_scan(hdev); } else { bt_dev_warn(hdev, "No such LE device %pMR (0x%x)", &cp->addr.bdaddr, @@ -4627,7 +4306,7 @@ int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status) hdev->adv_monitors_cnt++; if (monitor->state == ADV_MONITOR_STATE_NOT_REGISTERED) monitor->state = ADV_MONITOR_STATE_REGISTERED; - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); } err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, @@ -4853,7 +4532,7 @@ int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status) rp.monitor_handle = cp->monitor_handle; if (!status) - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); @@ -4932,33 +4611,28 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, status); } -static void read_local_oob_data_complete(struct hci_dev *hdev, void *data, int err) +static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, + u16 opcode, struct sk_buff *skb) { struct mgmt_rp_read_local_oob_data mgmt_rp; size_t rp_size = sizeof(mgmt_rp); - struct mgmt_pending_cmd *cmd = data; - struct sk_buff *skb = cmd->skb; - u8 status = mgmt_status(err); + struct mgmt_pending_cmd *cmd; - if (!status) { - if (!skb) - status = MGMT_STATUS_FAILED; - else if (IS_ERR(skb)) - status = mgmt_status(PTR_ERR(skb)); - else - status = mgmt_status(skb->data[0]); - } + bt_dev_dbg(hdev, "status %u", status); - bt_dev_dbg(hdev, "status %d", status); + cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev); + if (!cmd) + return; - if (status) { - mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, status); + if (status || !skb) { + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, + status ? mgmt_status(status) : MGMT_STATUS_FAILED); goto remove; } memset(&mgmt_rp, 0, sizeof(mgmt_rp)); - if (!bredr_sc_enabled(hdev)) { + if (opcode == HCI_OP_READ_LOCAL_OOB_DATA) { struct hci_rp_read_local_oob_data *rp = (void *) skb->data; if (skb->len < sizeof(*rp)) { @@ -4993,31 +4667,14 @@ static void read_local_oob_data_complete(struct hci_dev *hdev, void *data, int e MGMT_STATUS_SUCCESS, &mgmt_rp, rp_size); remove: - if (skb && !IS_ERR(skb)) - kfree_skb(skb); - - mgmt_pending_free(cmd); -} - -static int read_local_oob_data_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - - if (bredr_sc_enabled(hdev)) - cmd->skb = hci_read_local_oob_data_sync(hdev, true, cmd->sk); - else - cmd->skb = hci_read_local_oob_data_sync(hdev, false, cmd->sk); - - if (IS_ERR(cmd->skb)) - return PTR_ERR(cmd->skb); - else - return 0; + mgmt_pending_remove(cmd); } static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_pending_cmd *cmd; + struct hci_request req; int err; bt_dev_dbg(hdev, "sock %p", sk); @@ -5042,21 +4699,23 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, goto unlock; } - cmd = mgmt_pending_new(sk, MGMT_OP_READ_LOCAL_OOB_DATA, hdev, NULL, 0); - if (!cmd) + cmd = mgmt_pending_add(sk, MGMT_OP_READ_LOCAL_OOB_DATA, hdev, NULL, 0); + if (!cmd) { err = -ENOMEM; - else - err = hci_cmd_sync_queue(hdev, read_local_oob_data_sync, cmd, - read_local_oob_data_complete); - - if (err < 0) { - err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, - MGMT_STATUS_FAILED); - - if (cmd) - mgmt_pending_free(cmd); + goto unlock; } + hci_req_init(&req, hdev); + + if (bredr_sc_enabled(hdev)) + hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_EXT_DATA, 0, NULL); + else + hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL); + + err = hci_req_run_skb(&req, read_local_oob_data_complete); + if (err < 0) + mgmt_pending_remove(cmd); + unlock: hci_dev_unlock(hdev); return err; @@ -5228,6 +4887,13 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) } hci_dev_unlock(hdev); + + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_UNPAUSE_DISCOVERY, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Unpaused discovery"); + wake_up(&hdev->suspend_wait_q); + } } static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, @@ -5257,25 +4923,6 @@ static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, return true; } -static void start_discovery_complete(struct hci_dev *hdev, void *data, int err) -{ - struct mgmt_pending_cmd *cmd = data; - - bt_dev_dbg(hdev, "err %d", err); - - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), - cmd->param, 1); - mgmt_pending_free(cmd); - - hci_discovery_set_state(hdev, err ? DISCOVERY_STOPPED: - DISCOVERY_FINDING); -} - -static int start_discovery_sync(struct hci_dev *hdev, void *data) -{ - return hci_start_discovery_sync(hdev); -} - static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, u16 op, void *data, u16 len) { @@ -5327,20 +4974,17 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, else hdev->discovery.limited = false; - cmd = mgmt_pending_new(sk, op, hdev, data, len); + cmd = mgmt_pending_add(sk, op, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } - err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd, - start_discovery_complete); - if (err < 0) { - mgmt_pending_free(cmd); - goto failed; - } + cmd->cmd_complete = generic_cmd_complete; hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; failed: hci_dev_unlock(hdev); @@ -5362,6 +5006,13 @@ static int start_limited_discovery(struct sock *sk, struct hci_dev *hdev, data, len); } +static int service_discovery_cmd_complete(struct mgmt_pending_cmd *cmd, + u8 status) +{ + return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, 1); +} + static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -5430,13 +5081,15 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - cmd = mgmt_pending_new(sk, MGMT_OP_START_SERVICE_DISCOVERY, + cmd = mgmt_pending_add(sk, MGMT_OP_START_SERVICE_DISCOVERY, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } + cmd->cmd_complete = service_discovery_cmd_complete; + /* Clear the discovery filter first to free any previously * allocated memory for the UUID list. */ @@ -5460,14 +5113,9 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, } } - err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd, - start_discovery_complete); - if (err < 0) { - mgmt_pending_free(cmd); - goto failed; - } - hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; failed: hci_dev_unlock(hdev); @@ -5489,25 +5137,12 @@ void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status) } hci_dev_unlock(hdev); -} -static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err) -{ - struct mgmt_pending_cmd *cmd = data; - - bt_dev_dbg(hdev, "err %d", err); - - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), - cmd->param, 1); - mgmt_pending_free(cmd); - - if (!err) - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); -} - -static int stop_discovery_sync(struct hci_dev *hdev, void *data) -{ - return hci_stop_discovery_sync(hdev); + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_PAUSE_DISCOVERY, hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Paused discovery"); + wake_up(&hdev->suspend_wait_q); + } } static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, @@ -5535,20 +5170,17 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - cmd = mgmt_pending_new(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len); if (!cmd) { err = -ENOMEM; goto unlock; } - err = hci_cmd_sync_queue(hdev, stop_discovery_sync, cmd, - stop_discovery_complete); - if (err < 0) { - mgmt_pending_free(cmd); - goto unlock; - } + cmd->cmd_complete = generic_cmd_complete; hci_discovery_set_state(hdev, DISCOVERY_STOPPING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; unlock: hci_dev_unlock(hdev); @@ -5669,15 +5301,11 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data, return err; } -static int set_device_id_sync(struct hci_dev *hdev, void *data) -{ - return hci_update_eir_sync(hdev); -} - static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_device_id *cp = data; + struct hci_request req; int err; __u16 source; @@ -5699,32 +5327,38 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0); - hci_cmd_sync_queue(hdev, set_device_id_sync, NULL, NULL); + hci_req_init(&req, hdev); + __hci_req_update_eir(&req); + hci_req_run(&req, NULL); hci_dev_unlock(hdev); return err; } -static void enable_advertising_instance(struct hci_dev *hdev, int err) +static void enable_advertising_instance(struct hci_dev *hdev, u8 status, + u16 opcode) { - if (err) - bt_dev_err(hdev, "failed to re-configure advertising %d", err); - else - bt_dev_dbg(hdev, "status %d", err); + bt_dev_dbg(hdev, "status %u", status); } -static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) +static void set_advertising_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct cmd_lookup match = { NULL, hdev }; + struct hci_request req; u8 instance; struct adv_info *adv_instance; - u8 status = mgmt_status(err); + int err; + + hci_dev_lock(hdev); if (status) { + u8 mgmt_err = mgmt_status(status); + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, - cmd_status_rsp, &status); - return; + cmd_status_rsp, &mgmt_err); + goto unlock; } if (hci_dev_test_flag(hdev, HCI_LE_ADV)) @@ -5740,60 +5374,46 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) if (match.sk) sock_put(match.sk); + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_PAUSE_ADVERTISING, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Paused advertising"); + wake_up(&hdev->suspend_wait_q); + } else if (test_and_clear_bit(SUSPEND_UNPAUSE_ADVERTISING, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Unpaused advertising"); + wake_up(&hdev->suspend_wait_q); + } + /* If "Set Advertising" was just disabled and instance advertising was * set up earlier, then re-enable multi-instance advertising. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || list_empty(&hdev->adv_instances)) - return; + goto unlock; instance = hdev->cur_adv_instance; if (!instance) { adv_instance = list_first_entry_or_null(&hdev->adv_instances, struct adv_info, list); if (!adv_instance) - return; + goto unlock; instance = adv_instance->instance; } - err = hci_schedule_adv_instance_sync(hdev, instance, true); + hci_req_init(&req, hdev); - enable_advertising_instance(hdev, err); -} + err = __hci_req_schedule_adv_instance(&req, instance, true); -static int set_adv_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_mode *cp = cmd->param; - u8 val = !!cp->val; + if (!err) + err = hci_req_run(&req, enable_advertising_instance); - if (cp->val == 0x02) - hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE); - else - hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); + if (err) + bt_dev_err(hdev, "failed to re-configure advertising"); - cancel_adv_timeout(hdev); - - if (val) { - /* Switch to instance "0" for the Set Advertising setting. - * We cannot use update_[adv|scan_rsp]_data() here as the - * HCI_ADVERTISING flag is not yet set. - */ - hdev->cur_adv_instance = 0x00; - - if (ext_adv_capable(hdev)) { - hci_start_ext_adv_sync(hdev, 0x00); - } else { - hci_update_adv_data_sync(hdev, 0x00); - hci_update_scan_rsp_data_sync(hdev, 0x00); - hci_enable_advertising_sync(hdev); - } - } else { - hci_disable_advertising_sync(hdev); - } - - return 0; +unlock: + hci_dev_unlock(hdev); } static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, @@ -5801,6 +5421,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; + struct hci_request req; u8 val, status; int err; @@ -5811,6 +5432,13 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, status); + /* Enabling the experimental LL Privay support disables support for + * advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); @@ -5866,13 +5494,40 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, } cmd = mgmt_pending_add(sk, MGMT_OP_SET_ADVERTISING, hdev, data, len); - if (!cmd) + if (!cmd) { err = -ENOMEM; - else - err = hci_cmd_sync_queue(hdev, set_adv_sync, cmd, - set_advertising_complete); + goto unlock; + } - if (err < 0 && cmd) + hci_req_init(&req, hdev); + + if (cp->val == 0x02) + hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE); + else + hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); + + cancel_adv_timeout(hdev); + + if (val) { + /* Switch to instance "0" for the Set Advertising setting. + * We cannot use update_[adv|scan_rsp]_data() here as the + * HCI_ADVERTISING flag is not yet set. + */ + hdev->cur_adv_instance = 0x00; + + if (ext_adv_capable(hdev)) { + __hci_req_start_ext_adv(&req, 0x00); + } else { + __hci_req_update_adv_data(&req, 0x00); + __hci_req_update_scan_rsp_data(&req, 0x00); + __hci_req_enable_advertising(&req); + } + } else { + __hci_req_disable_advertising(&req); + } + + err = hci_req_run(&req, set_advertising_complete); + if (err < 0) mgmt_pending_remove(cmd); unlock: @@ -5965,23 +5620,38 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, * loaded. */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && - hdev->discovery.state == DISCOVERY_STOPPED) - hci_update_passive_scan(hdev); + hdev->discovery.state == DISCOVERY_STOPPED) { + struct hci_request req; + + hci_req_init(&req, hdev); + + hci_req_add_le_scan_disable(&req, false); + hci_req_add_le_passive_scan(&req); + + hci_req_run(&req, NULL); + } hci_dev_unlock(hdev); return err; } -static void fast_connectable_complete(struct hci_dev *hdev, void *data, int err) +static void fast_connectable_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { - struct mgmt_pending_cmd *cmd = data; + struct mgmt_pending_cmd *cmd; - bt_dev_dbg(hdev, "err %d", err); + bt_dev_dbg(hdev, "status 0x%02x", status); - if (err) { + hci_dev_lock(hdev); + + cmd = pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev); + if (!cmd) + goto unlock; + + if (status) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - mgmt_status(err)); + mgmt_status(status)); } else { struct mgmt_mode *cp = cmd->param; @@ -5994,15 +5664,10 @@ static void fast_connectable_complete(struct hci_dev *hdev, void *data, int err) new_settings(hdev, cmd->sk); } - mgmt_pending_free(cmd); -} + mgmt_pending_remove(cmd); -static int write_fast_connectable_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_mode *cp = cmd->param; - - return hci_write_fast_connectable_sync(hdev, cp->val); +unlock: + hci_dev_unlock(hdev); } static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, @@ -6010,49 +5675,58 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; + struct hci_request req; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) || hdev->hci_ver < BLUETOOTH_VER_1_2) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_FAST_CONNECTABLE, + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_NOT_SUPPORTED); if (cp->val != 0x00 && cp->val != 0x01) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_FAST_CONNECTABLE, + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); + if (pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_BUSY); + goto unlock; + } + if (!!cp->val == hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) { - err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); + err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, + hdev); goto unlock; } if (!hdev_is_powered(hdev)) { hci_dev_change_flag(hdev, HCI_FAST_CONNECTABLE); - err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); + err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, + hdev); new_settings(hdev, sk); goto unlock; } - cmd = mgmt_pending_new(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev, data, - len); - if (!cmd) + cmd = mgmt_pending_add(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev, + data, len); + if (!cmd) { err = -ENOMEM; - else - err = hci_cmd_sync_queue(hdev, write_fast_connectable_sync, cmd, - fast_connectable_complete); + goto unlock; + } + hci_req_init(&req, hdev); + + __hci_req_write_fast_connectable(&req, cp->val); + + err = hci_req_run(&req, fast_connectable_complete); if (err < 0) { - mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_FAILED); - - if (cmd) - mgmt_pending_free(cmd); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_FAILED); + mgmt_pending_remove(cmd); } unlock: @@ -6061,14 +5735,20 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, return err; } -static void set_bredr_complete(struct hci_dev *hdev, void *data, int err) +static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct mgmt_pending_cmd *cmd = data; + struct mgmt_pending_cmd *cmd; - bt_dev_dbg(hdev, "err %d", err); + bt_dev_dbg(hdev, "status 0x%02x", status); - if (err) { - u8 mgmt_err = mgmt_status(err); + hci_dev_lock(hdev); + + cmd = pending_find(MGMT_OP_SET_BREDR, hdev); + if (!cmd) + goto unlock; + + if (status) { + u8 mgmt_err = mgmt_status(status); /* We need to restore the flag if related HCI commands * failed. @@ -6081,31 +5761,17 @@ static void set_bredr_complete(struct hci_dev *hdev, void *data, int err) new_settings(hdev, cmd->sk); } - mgmt_pending_free(cmd); -} + mgmt_pending_remove(cmd); -static int set_bredr_sync(struct hci_dev *hdev, void *data) -{ - int status; - - status = hci_write_fast_connectable_sync(hdev, false); - - if (!status) - status = hci_update_scan_sync(hdev); - - /* Since only the advertising data flags will change, there - * is no need to update the scan response data. - */ - if (!status) - status = hci_update_adv_data_sync(hdev, hdev->cur_adv_instance); - - return status; +unlock: + hci_dev_unlock(hdev); } static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; + struct hci_request req; int err; bt_dev_dbg(hdev, "sock %p", sk); @@ -6177,19 +5843,15 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) } } - cmd = mgmt_pending_new(sk, MGMT_OP_SET_BREDR, hdev, data, len); - if (!cmd) + if (pending_find(MGMT_OP_SET_BREDR, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_BUSY); + goto unlock; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_SET_BREDR, hdev, data, len); + if (!cmd) { err = -ENOMEM; - else - err = hci_cmd_sync_queue(hdev, set_bredr_sync, cmd, - set_bredr_complete); - - if (err < 0) { - mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, - MGMT_STATUS_FAILED); - if (cmd) - mgmt_pending_free(cmd); - goto unlock; } @@ -6198,23 +5860,42 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) */ hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); + hci_req_init(&req, hdev); + + __hci_req_write_fast_connectable(&req, false); + __hci_req_update_scan(&req); + + /* Since only the advertising data flags will change, there + * is no need to update the scan response data. + */ + __hci_req_update_adv_data(&req, hdev->cur_adv_instance); + + err = hci_req_run(&req, set_bredr_complete); + if (err < 0) + mgmt_pending_remove(cmd); + unlock: hci_dev_unlock(hdev); return err; } -static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err) +static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - struct mgmt_pending_cmd *cmd = data; + struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; - bt_dev_dbg(hdev, "err %d", err); + bt_dev_dbg(hdev, "status %u", status); - if (err) { - u8 mgmt_err = mgmt_status(err); + hci_dev_lock(hdev); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); - goto done; + cmd = pending_find(MGMT_OP_SET_SECURE_CONN, hdev); + if (!cmd) + goto unlock; + + if (status) { + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status)); + goto remove; } cp = cmd->param; @@ -6234,23 +5915,13 @@ static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err) break; } - send_settings_rsp(cmd->sk, cmd->opcode, hdev); + send_settings_rsp(cmd->sk, MGMT_OP_SET_SECURE_CONN, hdev); new_settings(hdev, cmd->sk); -done: - mgmt_pending_free(cmd); -} - -static int set_secure_conn_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_mode *cp = cmd->param; - u8 val = !!cp->val; - - /* Force write of val */ - hci_dev_set_flag(hdev, HCI_SC_ENABLED); - - return hci_write_sc_support_sync(hdev, val); +remove: + mgmt_pending_remove(cmd); +unlock: + hci_dev_unlock(hdev); } static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, @@ -6258,6 +5929,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; + struct hci_request req; u8 val; int err; @@ -6276,7 +5948,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, - MGMT_STATUS_INVALID_PARAMS); + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); @@ -6307,6 +5979,12 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, goto failed; } + if (pending_find(MGMT_OP_SET_SECURE_CONN, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + MGMT_STATUS_BUSY); + goto failed; + } + val = !!cp->val; if (val == hci_dev_test_flag(hdev, HCI_SC_ENABLED) && @@ -6315,18 +5993,18 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, goto failed; } - cmd = mgmt_pending_new(sk, MGMT_OP_SET_SECURE_CONN, hdev, data, len); - if (!cmd) + cmd = mgmt_pending_add(sk, MGMT_OP_SET_SECURE_CONN, hdev, data, len); + if (!cmd) { err = -ENOMEM; - else - err = hci_cmd_sync_queue(hdev, set_secure_conn_sync, cmd, - set_secure_conn_complete); + goto failed; + } + hci_req_init(&req, hdev); + hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, 1, &val); + err = hci_req_run(&req, sc_enable_complete); if (err < 0) { - mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, - MGMT_STATUS_FAILED); - if (cmd) - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); + goto failed; } failed: @@ -6640,19 +6318,14 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, return err; } -static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err) +static int conn_info_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - struct mgmt_pending_cmd *cmd = data; struct hci_conn *conn = cmd->user_data; - struct mgmt_cp_get_conn_info *cp = cmd->param; struct mgmt_rp_get_conn_info rp; - u8 status; + int err; - bt_dev_dbg(hdev, "err %d", err); + memcpy(&rp.addr, cmd->param, sizeof(rp.addr)); - memcpy(&rp.addr, &cp->addr.bdaddr, sizeof(rp.addr)); - - status = mgmt_status(err); if (status == MGMT_STATUS_SUCCESS) { rp.rssi = conn->rssi; rp.tx_power = conn->tx_power; @@ -6663,60 +6336,69 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err) rp.max_tx_power = HCI_TX_POWER_INVALID; } - mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, - &rp, sizeof(rp)); + err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, + status, &rp, sizeof(rp)); - if (conn) { - hci_conn_drop(conn); - hci_conn_put(conn); - } - - mgmt_pending_free(cmd); -} - -static int get_conn_info_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_get_conn_info *cp = cmd->param; - struct hci_conn *conn; - int err; - __le16 handle; - - /* Make sure we are still connected */ - if (cp->addr.type == BDADDR_BREDR) - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, - &cp->addr.bdaddr); - else - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); - - if (!conn || conn != cmd->user_data || conn->state != BT_CONNECTED) { - if (cmd->user_data) { - hci_conn_drop(cmd->user_data); - hci_conn_put(cmd->user_data); - cmd->user_data = NULL; - } - return MGMT_STATUS_NOT_CONNECTED; - } - - handle = cpu_to_le16(conn->handle); - - /* Refresh RSSI each time */ - err = hci_read_rssi_sync(hdev, handle); - - /* For LE links TX power does not change thus we don't need to - * query for it once value is known. - */ - if (!err && (!bdaddr_type_is_le(cp->addr.type) || - conn->tx_power == HCI_TX_POWER_INVALID)) - err = hci_read_tx_power_sync(hdev, handle, 0x00); - - /* Max TX power needs to be read only once per connection */ - if (!err && conn->max_tx_power == HCI_TX_POWER_INVALID) - err = hci_read_tx_power_sync(hdev, handle, 0x01); + hci_conn_drop(conn); + hci_conn_put(conn); return err; } +static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status, + u16 opcode) +{ + struct hci_cp_read_rssi *cp; + struct mgmt_pending_cmd *cmd; + struct hci_conn *conn; + u16 handle; + u8 status; + + bt_dev_dbg(hdev, "status 0x%02x", hci_status); + + hci_dev_lock(hdev); + + /* Commands sent in request are either Read RSSI or Read Transmit Power + * Level so we check which one was last sent to retrieve connection + * handle. Both commands have handle as first parameter so it's safe to + * cast data on the same command struct. + * + * First command sent is always Read RSSI and we fail only if it fails. + * In other case we simply override error to indicate success as we + * already remembered if TX power value is actually valid. + */ + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_RSSI); + if (!cp) { + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER); + status = MGMT_STATUS_SUCCESS; + } else { + status = mgmt_status(hci_status); + } + + if (!cp) { + bt_dev_err(hdev, "invalid sent_cmd in conn_info response"); + goto unlock; + } + + handle = __le16_to_cpu(cp->handle); + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) { + bt_dev_err(hdev, "unknown handle (%u) in conn_info response", + handle); + goto unlock; + } + + cmd = pending_find_data(MGMT_OP_GET_CONN_INFO, hdev, conn); + if (!cmd) + goto unlock; + + cmd->cmd_complete(cmd, status); + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -6759,6 +6441,12 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } + if (pending_find_data(MGMT_OP_GET_CONN_INFO, hdev, conn)) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_BUSY, &rp, sizeof(rp)); + goto unlock; + } + /* To avoid client trying to guess when to poll again for information we * calculate conn info age as random value between min/max set in hdev. */ @@ -6772,28 +6460,49 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, if (time_after(jiffies, conn->conn_info_timestamp + msecs_to_jiffies(conn_info_age)) || !conn->conn_info_timestamp) { + struct hci_request req; + struct hci_cp_read_tx_power req_txp_cp; + struct hci_cp_read_rssi req_rssi_cp; struct mgmt_pending_cmd *cmd; - cmd = mgmt_pending_new(sk, MGMT_OP_GET_CONN_INFO, hdev, data, - len); - if (!cmd) + hci_req_init(&req, hdev); + req_rssi_cp.handle = cpu_to_le16(conn->handle); + hci_req_add(&req, HCI_OP_READ_RSSI, sizeof(req_rssi_cp), + &req_rssi_cp); + + /* For LE links TX power does not change thus we don't need to + * query for it once value is known. + */ + if (!bdaddr_type_is_le(cp->addr.type) || + conn->tx_power == HCI_TX_POWER_INVALID) { + req_txp_cp.handle = cpu_to_le16(conn->handle); + req_txp_cp.type = 0x00; + hci_req_add(&req, HCI_OP_READ_TX_POWER, + sizeof(req_txp_cp), &req_txp_cp); + } + + /* Max TX power needs to be read only once per connection */ + if (conn->max_tx_power == HCI_TX_POWER_INVALID) { + req_txp_cp.handle = cpu_to_le16(conn->handle); + req_txp_cp.type = 0x01; + hci_req_add(&req, HCI_OP_READ_TX_POWER, + sizeof(req_txp_cp), &req_txp_cp); + } + + err = hci_req_run(&req, conn_info_refresh_complete); + if (err < 0) + goto unlock; + + cmd = mgmt_pending_add(sk, MGMT_OP_GET_CONN_INFO, hdev, + data, len); + if (!cmd) { err = -ENOMEM; - else - err = hci_cmd_sync_queue(hdev, get_conn_info_sync, - cmd, get_conn_info_complete); - - if (err < 0) { - mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, - MGMT_STATUS_FAILED, &rp, sizeof(rp)); - - if (cmd) - mgmt_pending_free(cmd); - goto unlock; } hci_conn_hold(conn); cmd->user_data = hci_conn_get(conn); + cmd->cmd_complete = conn_info_cmd_complete; conn->conn_info_timestamp = jiffies; } else { @@ -6811,76 +6520,82 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, return err; } -static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err) +static int clock_info_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_get_clock_info *cp = cmd->param; - struct mgmt_rp_get_clock_info rp; struct hci_conn *conn = cmd->user_data; - u8 status = mgmt_status(err); - - bt_dev_dbg(hdev, "err %d", err); + struct mgmt_rp_get_clock_info rp; + struct hci_dev *hdev; + int err; memset(&rp, 0, sizeof(rp)); - bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); - rp.addr.type = cp->addr.type; + memcpy(&rp.addr, cmd->param, sizeof(rp.addr)); - if (err) + if (status) goto complete; - rp.local_clock = cpu_to_le32(hdev->clock); + hdev = hci_dev_get(cmd->index); + if (hdev) { + rp.local_clock = cpu_to_le32(hdev->clock); + hci_dev_put(hdev); + } if (conn) { rp.piconet_clock = cpu_to_le32(conn->clock); rp.accuracy = cpu_to_le16(conn->clock_accuracy); - hci_conn_drop(conn); - hci_conn_put(conn); } complete: - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, - sizeof(rp)); - - mgmt_pending_free(cmd); -} - -static int get_clock_info_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_get_clock_info *cp = cmd->param; - struct hci_cp_read_clock hci_cp; - struct hci_conn *conn = cmd->user_data; - int err; - - memset(&hci_cp, 0, sizeof(hci_cp)); - err = hci_read_clock_sync(hdev, &hci_cp); + err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, + sizeof(rp)); if (conn) { - /* Make sure connection still exists */ - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, - &cp->addr.bdaddr); - - if (conn && conn == cmd->user_data && - conn->state == BT_CONNECTED) { - hci_cp.handle = cpu_to_le16(conn->handle); - hci_cp.which = 0x01; /* Piconet clock */ - err = hci_read_clock_sync(hdev, &hci_cp); - } else if (cmd->user_data) { - hci_conn_drop(cmd->user_data); - hci_conn_put(cmd->user_data); - cmd->user_data = NULL; - } + hci_conn_drop(conn); + hci_conn_put(conn); } return err; } +static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + struct hci_cp_read_clock *hci_cp; + struct mgmt_pending_cmd *cmd; + struct hci_conn *conn; + + bt_dev_dbg(hdev, "status %u", status); + + hci_dev_lock(hdev); + + hci_cp = hci_sent_cmd_data(hdev, HCI_OP_READ_CLOCK); + if (!hci_cp) + goto unlock; + + if (hci_cp->which) { + u16 handle = __le16_to_cpu(hci_cp->handle); + conn = hci_conn_hash_lookup_handle(hdev, handle); + } else { + conn = NULL; + } + + cmd = pending_find_data(MGMT_OP_GET_CLOCK_INFO, hdev, conn); + if (!cmd) + goto unlock; + + cmd->cmd_complete(cmd, mgmt_status(status)); + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, - u16 len) + u16 len) { struct mgmt_cp_get_clock_info *cp = data; struct mgmt_rp_get_clock_info rp; + struct hci_cp_read_clock hci_cp; struct mgmt_pending_cmd *cmd; + struct hci_request req; struct hci_conn *conn; int err; @@ -6918,25 +6633,31 @@ static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, conn = NULL; } - cmd = mgmt_pending_new(sk, MGMT_OP_GET_CLOCK_INFO, hdev, data, len); - if (!cmd) + cmd = mgmt_pending_add(sk, MGMT_OP_GET_CLOCK_INFO, hdev, data, len); + if (!cmd) { err = -ENOMEM; - else - err = hci_cmd_sync_queue(hdev, get_clock_info_sync, cmd, - get_clock_info_complete); - - if (err < 0) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, - MGMT_STATUS_FAILED, &rp, sizeof(rp)); - - if (cmd) - mgmt_pending_free(cmd); - - } else if (conn) { - hci_conn_hold(conn); - cmd->user_data = hci_conn_get(conn); + goto unlock; } + cmd->cmd_complete = clock_info_cmd_complete; + + hci_req_init(&req, hdev); + + memset(&hci_cp, 0, sizeof(hci_cp)); + hci_req_add(&req, HCI_OP_READ_CLOCK, sizeof(hci_cp), &hci_cp); + + if (conn) { + hci_conn_hold(conn); + cmd->user_data = hci_conn_get(conn); + + hci_cp.handle = cpu_to_le16(conn->handle); + hci_cp.which = 0x01; /* Piconet clock */ + hci_req_add(&req, HCI_OP_READ_CLOCK, sizeof(hci_cp), &hci_cp); + } + + err = hci_req_run(&req, get_clock_info_complete); + if (err < 0) + mgmt_pending_remove(cmd); unlock: hci_dev_unlock(hdev); @@ -7017,11 +6738,6 @@ static void device_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk); } -static int add_device_sync(struct hci_dev *hdev, void *data) -{ - return hci_update_passive_scan_sync(hdev); -} - static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -7030,7 +6746,6 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, struct hci_conn_params *params; int err; u32 current_flags = 0; - u32 supported_flags; bt_dev_dbg(hdev, "sock %p", sk); @@ -7102,20 +6817,15 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type); if (params) - bitmap_to_arr32(¤t_flags, params->flags, - __HCI_CONN_NUM_FLAGS); + current_flags = params->current_flags; } - err = hci_cmd_sync_queue(hdev, add_device_sync, NULL, NULL); - if (err < 0) - goto unlock; + hci_update_background_scan(hdev); added: device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); - bitmap_to_arr32(&supported_flags, hdev->conn_flags, - __HCI_CONN_NUM_FLAGS); device_flags_changed(NULL, hdev, &cp->addr.bdaddr, cp->addr.type, - supported_flags, current_flags); + SUPPORTED_DEVICE_FLAGS(), current_flags); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, MGMT_STATUS_SUCCESS, &cp->addr, @@ -7137,11 +6847,6 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk); } -static int remove_device_sync(struct hci_dev *hdev, void *data) -{ - return hci_update_passive_scan_sync(hdev); -} - static int remove_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -7221,6 +6926,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, list_del(¶ms->action); list_del(¶ms->list); kfree(params); + hci_update_background_scan(hdev); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); } else { @@ -7257,9 +6963,9 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, } bt_dev_dbg(hdev, "All LE connection parameters were removed"); - } - hci_cmd_sync_queue(hdev, remove_device_sync, NULL, NULL); + hci_update_background_scan(hdev); + } complete: err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, @@ -7463,28 +7169,22 @@ static int set_public_address(struct sock *sk, struct hci_dev *hdev, return err; } -static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data, - int err) +static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status, + u16 opcode, struct sk_buff *skb) { const struct mgmt_cp_read_local_oob_ext_data *mgmt_cp; struct mgmt_rp_read_local_oob_ext_data *mgmt_rp; u8 *h192, *r192, *h256, *r256; - struct mgmt_pending_cmd *cmd = data; - struct sk_buff *skb = cmd->skb; - u8 status = mgmt_status(err); + struct mgmt_pending_cmd *cmd; u16 eir_len; - - if (!status) { - if (!skb) - status = MGMT_STATUS_FAILED; - else if (IS_ERR(skb)) - status = mgmt_status(PTR_ERR(skb)); - else - status = mgmt_status(skb->data[0]); - } + int err; bt_dev_dbg(hdev, "status %u", status); + cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev); + if (!cmd) + return; + mgmt_cp = cmd->param; if (status) { @@ -7495,7 +7195,7 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data, r192 = NULL; h256 = NULL; r256 = NULL; - } else if (!bredr_sc_enabled(hdev)) { + } else if (opcode == HCI_OP_READ_LOCAL_OOB_DATA) { struct hci_rp_read_local_oob_data *rp; if (skb->len != sizeof(*rp)) { @@ -7576,9 +7276,6 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data, mgmt_rp, sizeof(*mgmt_rp) + eir_len, HCI_MGMT_OOB_DATA_EVENTS, cmd->sk); done: - if (skb && !IS_ERR(skb)) - kfree_skb(skb); - kfree(mgmt_rp); mgmt_pending_remove(cmd); } @@ -7587,6 +7284,7 @@ static int read_local_ssp_oob_req(struct hci_dev *hdev, struct sock *sk, struct mgmt_cp_read_local_oob_ext_data *cp) { struct mgmt_pending_cmd *cmd; + struct hci_request req; int err; cmd = mgmt_pending_add(sk, MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev, @@ -7594,9 +7292,14 @@ static int read_local_ssp_oob_req(struct hci_dev *hdev, struct sock *sk, if (!cmd) return -ENOMEM; - err = hci_cmd_sync_queue(hdev, read_local_oob_data_sync, cmd, - read_local_oob_ext_data_complete); + hci_req_init(&req, hdev); + if (bredr_sc_enabled(hdev)) + hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_EXT_DATA, 0, NULL); + else + hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL); + + err = hci_req_run_skb(&req, read_local_oob_ext_data_complete); if (err < 0) { mgmt_pending_remove(cmd); return err; @@ -7648,11 +7351,6 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev, if (!rp) return -ENOMEM; - if (!status && !lmp_ssp_capable(hdev)) { - status = MGMT_STATUS_NOT_SUPPORTED; - eir_len = 0; - } - if (status) goto complete; @@ -7820,6 +7518,13 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES, MGMT_STATUS_REJECTED); + /* Enabling the experimental LL Privay support disables support for + * advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES, + MGMT_STATUS_NOT_SUPPORTED); + hci_dev_lock(hdev); rp_len = sizeof(*rp) + hdev->adv_instance_cnt; @@ -7857,7 +7562,7 @@ static u8 calculate_name_len(struct hci_dev *hdev) { u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3]; - return eir_append_local_name(hdev, buf, 0); + return append_local_name(hdev, buf, 0); } static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags, @@ -7976,66 +7681,58 @@ static bool adv_busy(struct hci_dev *hdev) pending_find(MGMT_OP_ADD_EXT_ADV_DATA, hdev)); } -static void add_adv_complete(struct hci_dev *hdev, struct sock *sk, u8 instance, - int err) +static void add_advertising_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { - struct adv_info *adv, *n; + struct mgmt_pending_cmd *cmd; + struct mgmt_cp_add_advertising *cp; + struct mgmt_rp_add_advertising rp; + struct adv_info *adv_instance, *n; + u8 instance; - bt_dev_dbg(hdev, "err %d", err); + bt_dev_dbg(hdev, "status %u", status); hci_dev_lock(hdev); - list_for_each_entry_safe(adv, n, &hdev->adv_instances, list) { - u8 instance; + cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev); + if (!cmd) + cmd = pending_find(MGMT_OP_ADD_EXT_ADV_DATA, hdev); - if (!adv->pending) + list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) { + if (!adv_instance->pending) continue; - if (!err) { - adv->pending = false; + if (!status) { + adv_instance->pending = false; continue; } - instance = adv->instance; + instance = adv_instance->instance; if (hdev->cur_adv_instance == instance) cancel_adv_timeout(hdev); hci_remove_adv_instance(hdev, instance); - mgmt_advertising_removed(sk, hdev, instance); + mgmt_advertising_removed(cmd ? cmd->sk : NULL, hdev, instance); } - hci_dev_unlock(hdev); -} - -static void add_advertising_complete(struct hci_dev *hdev, void *data, int err) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_add_advertising *cp = cmd->param; - struct mgmt_rp_add_advertising rp; - - memset(&rp, 0, sizeof(rp)); + if (!cmd) + goto unlock; + cp = cmd->param; rp.instance = cp->instance; - if (err) + if (status) mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(err)); + mgmt_status(status)); else mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(err), &rp, sizeof(rp)); + mgmt_status(status), &rp, sizeof(rp)); - add_adv_complete(hdev, cmd->sk, cp->instance, err); + mgmt_pending_remove(cmd); - mgmt_pending_free(cmd); -} - -static int add_advertising_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_add_advertising *cp = cmd->param; - - return hci_schedule_adv_instance_sync(hdev, cp->instance, true); +unlock: + hci_dev_unlock(hdev); } static int add_advertising(struct sock *sk, struct hci_dev *hdev, @@ -8051,6 +7748,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, struct adv_info *next_instance; int err; struct mgmt_pending_cmd *cmd; + struct hci_request req; bt_dev_dbg(hdev, "sock %p", sk); @@ -8059,6 +7757,13 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, status); + /* Enabling the experimental LL Privay support disables support for + * advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_NOT_SUPPORTED); + if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); @@ -8151,19 +7856,25 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, /* We're good to go, update advertising data, parameters, and start * advertising. */ - cmd = mgmt_pending_new(sk, MGMT_OP_ADD_ADVERTISING, hdev, data, + cmd = mgmt_pending_add(sk, MGMT_OP_ADD_ADVERTISING, hdev, data, data_len); if (!cmd) { err = -ENOMEM; goto unlock; } - cp->instance = schedule_instance; + hci_req_init(&req, hdev); - err = hci_cmd_sync_queue(hdev, add_advertising_sync, cmd, - add_advertising_complete); - if (err < 0) - mgmt_pending_free(cmd); + err = __hci_req_schedule_adv_instance(&req, schedule_instance, true); + + if (!err) + err = hci_req_run(&req, add_advertising_complete); + + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_FAILED); + mgmt_pending_remove(cmd); + } unlock: hci_dev_unlock(hdev); @@ -8171,25 +7882,30 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, return err; } -static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data, - int err) +static void add_ext_adv_params_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_add_ext_adv_params *cp = cmd->param; + struct mgmt_pending_cmd *cmd; + struct mgmt_cp_add_ext_adv_params *cp; struct mgmt_rp_add_ext_adv_params rp; - struct adv_info *adv; + struct adv_info *adv_instance; u32 flags; BT_DBG("%s", hdev->name); hci_dev_lock(hdev); - adv = hci_find_adv_instance(hdev, cp->instance); - if (!adv) + cmd = pending_find(MGMT_OP_ADD_EXT_ADV_PARAMS, hdev); + if (!cmd) + goto unlock; + + cp = cmd->param; + adv_instance = hci_find_adv_instance(hdev, cp->instance); + if (!adv_instance) goto unlock; rp.instance = cp->instance; - rp.tx_power = adv->tx_power; + rp.tx_power = adv_instance->tx_power; /* While we're at it, inform userspace of the available space for this * advertisement, given the flags that will be used. @@ -8198,44 +7914,39 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data, rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true); rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false); - if (err) { + if (status) { /* If this advertisement was previously advertising and we * failed to update it, we signal that it has been removed and * delete its structure */ - if (!adv->pending) + if (!adv_instance->pending) mgmt_advertising_removed(cmd->sk, hdev, cp->instance); hci_remove_adv_instance(hdev, cp->instance); mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(err)); + mgmt_status(status)); + } else { mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(err), &rp, sizeof(rp)); + mgmt_status(status), &rp, sizeof(rp)); } unlock: if (cmd) - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); hci_dev_unlock(hdev); } -static int add_ext_adv_params_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_add_ext_adv_params *cp = cmd->param; - - return hci_setup_ext_adv_instance_sync(hdev, cp->instance); -} - static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_cp_add_ext_adv_params *cp = data; struct mgmt_rp_add_ext_adv_params rp; struct mgmt_pending_cmd *cmd = NULL; + struct adv_info *adv_instance; + struct hci_request req; u32 flags, min_interval, max_interval; u16 timeout, duration; u8 status; @@ -8317,18 +8028,29 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev, /* Submit request for advertising params if ext adv available */ if (ext_adv_capable(hdev)) { - cmd = mgmt_pending_new(sk, MGMT_OP_ADD_EXT_ADV_PARAMS, hdev, - data, data_len); + hci_req_init(&req, hdev); + adv_instance = hci_find_adv_instance(hdev, cp->instance); + + /* Updating parameters of an active instance will return a + * Command Disallowed error, so we must first disable the + * instance if it is active. + */ + if (!adv_instance->pending) + __hci_req_disable_ext_adv_instance(&req, cp->instance); + + __hci_req_setup_ext_adv_instance(&req, cp->instance); + + err = hci_req_run(&req, add_ext_adv_params_complete); + + if (!err) + cmd = mgmt_pending_add(sk, MGMT_OP_ADD_EXT_ADV_PARAMS, + hdev, data, data_len); if (!cmd) { err = -ENOMEM; hci_remove_adv_instance(hdev, cp->instance); goto unlock; } - err = hci_cmd_sync_queue(hdev, add_ext_adv_params_sync, cmd, - add_ext_adv_params_complete); - if (err < 0) - mgmt_pending_free(cmd); } else { rp.instance = cp->instance; rp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE; @@ -8345,49 +8067,6 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev, return err; } -static void add_ext_adv_data_complete(struct hci_dev *hdev, void *data, int err) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_add_ext_adv_data *cp = cmd->param; - struct mgmt_rp_add_advertising rp; - - add_adv_complete(hdev, cmd->sk, cp->instance, err); - - memset(&rp, 0, sizeof(rp)); - - rp.instance = cp->instance; - - if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(err)); - else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(err), &rp, sizeof(rp)); - - mgmt_pending_free(cmd); -} - -static int add_ext_adv_data_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_add_ext_adv_data *cp = cmd->param; - int err; - - if (ext_adv_capable(hdev)) { - err = hci_update_adv_data_sync(hdev, cp->instance); - if (err) - return err; - - err = hci_update_scan_rsp_data_sync(hdev, cp->instance); - if (err) - return err; - - return hci_enable_ext_advertising_sync(hdev, cp->instance); - } - - return hci_schedule_adv_instance_sync(hdev, cp->instance, true); -} - static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -8398,6 +8077,7 @@ static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data, struct adv_info *adv_instance; int err = 0; struct mgmt_pending_cmd *cmd; + struct hci_request req; BT_DBG("%s", hdev->name); @@ -8439,52 +8119,78 @@ static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data, cp->data, cp->scan_rsp_len, cp->data + cp->adv_data_len); - /* If using software rotation, determine next instance to use */ - if (hdev->cur_adv_instance == cp->instance) { - /* If the currently advertised instance is being changed - * then cancel the current advertising and schedule the - * next instance. If there is only one instance then the - * overridden advertising data will be visible right - * away - */ - cancel_adv_timeout(hdev); - - next_instance = hci_get_next_instance(hdev, cp->instance); - if (next_instance) - schedule_instance = next_instance->instance; - } else if (!hdev->adv_instance_timeout) { - /* Immediately advertise the new instance if no other - * instance is currently being advertised. - */ - schedule_instance = cp->instance; - } - - /* If the HCI_ADVERTISING flag is set or there is no instance to - * be advertised then we have no HCI communication to make. - * Simply return. + /* We're good to go, update advertising data, parameters, and start + * advertising. */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || !schedule_instance) { - if (adv_instance->pending) { - mgmt_advertising_added(sk, hdev, cp->instance); - adv_instance->pending = false; + + hci_req_init(&req, hdev); + + hci_req_add(&req, HCI_OP_READ_LOCAL_NAME, 0, NULL); + + if (ext_adv_capable(hdev)) { + __hci_req_update_adv_data(&req, cp->instance); + __hci_req_update_scan_rsp_data(&req, cp->instance); + __hci_req_enable_ext_advertising(&req, cp->instance); + + } else { + /* If using software rotation, determine next instance to use */ + + if (hdev->cur_adv_instance == cp->instance) { + /* If the currently advertised instance is being changed + * then cancel the current advertising and schedule the + * next instance. If there is only one instance then the + * overridden advertising data will be visible right + * away + */ + cancel_adv_timeout(hdev); + + next_instance = hci_get_next_instance(hdev, + cp->instance); + if (next_instance) + schedule_instance = next_instance->instance; + } else if (!hdev->adv_instance_timeout) { + /* Immediately advertise the new instance if no other + * instance is currently being advertised. + */ + schedule_instance = cp->instance; } - rp.instance = cp->instance; - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, - MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); - goto unlock; + + /* If the HCI_ADVERTISING flag is set or there is no instance to + * be advertised then we have no HCI communication to make. + * Simply return. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !schedule_instance) { + if (adv_instance->pending) { + mgmt_advertising_added(sk, hdev, cp->instance); + adv_instance->pending = false; + } + rp.instance = cp->instance; + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_STATUS_SUCCESS, &rp, + sizeof(rp)); + goto unlock; + } + + err = __hci_req_schedule_adv_instance(&req, schedule_instance, + true); } - cmd = mgmt_pending_new(sk, MGMT_OP_ADD_EXT_ADV_DATA, hdev, data, + cmd = mgmt_pending_add(sk, MGMT_OP_ADD_EXT_ADV_DATA, hdev, data, data_len); if (!cmd) { err = -ENOMEM; goto clear_new_instance; } - err = hci_cmd_sync_queue(hdev, add_ext_adv_data_sync, cmd, - add_ext_adv_data_complete); + if (!err) + err = hci_req_run(&req, add_advertising_complete); + if (err < 0) { - mgmt_pending_free(cmd); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_STATUS_FAILED); + mgmt_pending_remove(cmd); goto clear_new_instance; } @@ -8507,53 +8213,54 @@ static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data, return err; } -static void remove_advertising_complete(struct hci_dev *hdev, void *data, - int err) +static void remove_advertising_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_remove_advertising *cp = cmd->param; + struct mgmt_pending_cmd *cmd; + struct mgmt_cp_remove_advertising *cp; struct mgmt_rp_remove_advertising rp; - bt_dev_dbg(hdev, "err %d", err); + bt_dev_dbg(hdev, "status %u", status); - memset(&rp, 0, sizeof(rp)); + hci_dev_lock(hdev); + + /* A failure status here only means that we failed to disable + * advertising. Otherwise, the advertising instance has been removed, + * so report success. + */ + cmd = pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev); + if (!cmd) + goto unlock; + + cp = cmd->param; rp.instance = cp->instance; - if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(err)); - else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, - MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, MGMT_STATUS_SUCCESS, + &rp, sizeof(rp)); + mgmt_pending_remove(cmd); - mgmt_pending_free(cmd); -} - -static int remove_advertising_sync(struct hci_dev *hdev, void *data) -{ - struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_remove_advertising *cp = cmd->param; - int err; - - err = hci_remove_advertising_sync(hdev, cmd->sk, cp->instance, true); - if (err) - return err; - - if (list_empty(&hdev->adv_instances)) - err = hci_disable_advertising_sync(hdev); - - return err; +unlock: + hci_dev_unlock(hdev); } static int remove_advertising(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_cp_remove_advertising *cp = data; + struct mgmt_rp_remove_advertising rp; struct mgmt_pending_cmd *cmd; + struct hci_request req; int err; bt_dev_dbg(hdev, "sock %p", sk); + /* Enabling the experimental LL Privay support disables support for + * advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_NOT_SUPPORTED); + hci_dev_lock(hdev); if (cp->instance && !hci_find_adv_instance(hdev, cp->instance)) { @@ -8577,17 +8284,44 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_ADVERTISING, hdev, data, + hci_req_init(&req, hdev); + + /* If we use extended advertising, instance is disabled and removed */ + if (ext_adv_capable(hdev)) { + __hci_req_disable_ext_adv_instance(&req, cp->instance); + __hci_req_remove_ext_adv_instance(&req, cp->instance); + } + + hci_req_clear_adv_instance(hdev, sk, &req, cp->instance, true); + + if (list_empty(&hdev->adv_instances)) + __hci_req_disable_advertising(&req); + + /* If no HCI commands have been collected so far or the HCI_ADVERTISING + * flag is set or the device isn't powered then we have no HCI + * communication to make. Simply return. + */ + if (skb_queue_empty(&req.cmd_q) || + !hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_ADVERTISING)) { + hci_req_purge(&req); + rp.instance = cp->instance; + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_ADVERTISING, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + goto unlock; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADVERTISING, hdev, data, data_len); if (!cmd) { err = -ENOMEM; goto unlock; } - err = hci_cmd_sync_queue(hdev, remove_advertising_sync, cmd, - remove_advertising_complete); + err = hci_req_run(&req, remove_advertising_complete); if (err < 0) - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); unlock: hci_dev_unlock(hdev); @@ -8829,6 +8563,31 @@ void mgmt_index_removed(struct hci_dev *hdev) HCI_MGMT_EXT_INDEX_EVENTS); } +/* This function requires the caller holds hdev->lock */ +static void restart_le_actions(struct hci_dev *hdev) +{ + struct hci_conn_params *p; + + list_for_each_entry(p, &hdev->le_conn_params, list) { + /* Needed for AUTO_OFF case where might not "really" + * have been powered off. + */ + list_del_init(&p->action); + + switch (p->auto_connect) { + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + list_add(&p->action, &hdev->pend_le_conns); + break; + case HCI_AUTO_CONN_REPORT: + list_add(&p->action, &hdev->pend_le_reports); + break; + default: + break; + } + } +} + void mgmt_power_on(struct hci_dev *hdev, int err) { struct cmd_lookup match = { NULL, hdev }; @@ -8839,7 +8598,7 @@ void mgmt_power_on(struct hci_dev *hdev, int err) if (!err) { restart_le_actions(hdev); - hci_update_passive_scan(hdev); + hci_update_background_scan(hdev); } mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); @@ -9054,19 +8813,11 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, u8 *name, u8 name_len) { - struct sk_buff *skb; - struct mgmt_ev_device_connected *ev; + char buf[512]; + struct mgmt_ev_device_connected *ev = (void *) buf; u16 eir_len = 0; u32 flags = 0; - if (conn->le_adv_data_len > 0) - skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED, - conn->le_adv_data_len); - else - skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED, - 2 + name_len + 5); - - ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, &conn->dst); ev->addr.type = link_to_bdaddr(conn->type, conn->dst_type); @@ -9080,26 +8831,24 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, * adding any BR/EDR data to the LE adv. */ if (conn->le_adv_data_len > 0) { - skb_put_data(skb, conn->le_adv_data, conn->le_adv_data_len); + memcpy(&ev->eir[eir_len], + conn->le_adv_data, conn->le_adv_data_len); eir_len = conn->le_adv_data_len; } else { - if (name_len > 0) { + if (name_len > 0) eir_len = eir_append_data(ev->eir, 0, EIR_NAME_COMPLETE, name, name_len); - skb_put(skb, eir_len); - } - if (memcmp(conn->dev_class, "\0\0\0", 3) != 0) { + if (memcmp(conn->dev_class, "\0\0\0", 3) != 0) eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, conn->dev_class, 3); - skb_put(skb, 5); - } } ev->eir_len = cpu_to_le16(eir_len); - mgmt_event_skb(skb, NULL); + mgmt_event(MGMT_EV_DEVICE_CONNECTED, hdev, buf, + sizeof(*ev) + eir_len, NULL); } static void disconnect_rsp(struct mgmt_pending_cmd *cmd, void *data) @@ -9405,6 +9154,74 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) sock_put(match.sk); } +static void clear_eir(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_eir cp; + + if (!lmp_ext_inq_capable(hdev)) + return; + + memset(hdev->eir, 0, sizeof(hdev->eir)); + + memset(&cp, 0, sizeof(cp)); + + hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); +} + +void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) +{ + struct cmd_lookup match = { NULL, hdev }; + struct hci_request req; + bool changed = false; + + if (status) { + u8 mgmt_err = mgmt_status(status); + + if (enable && hci_dev_test_and_clear_flag(hdev, + HCI_SSP_ENABLED)) { + hci_dev_clear_flag(hdev, HCI_HS_ENABLED); + new_settings(hdev, NULL); + } + + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp, + &mgmt_err); + return; + } + + if (enable) { + changed = !hci_dev_test_and_set_flag(hdev, HCI_SSP_ENABLED); + } else { + changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); + if (!changed) + changed = hci_dev_test_and_clear_flag(hdev, + HCI_HS_ENABLED); + else + hci_dev_clear_flag(hdev, HCI_HS_ENABLED); + } + + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match); + + if (changed) + new_settings(hdev, match.sk); + + if (match.sk) + sock_put(match.sk); + + hci_req_init(&req, hdev); + + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { + if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) + hci_req_add(&req, HCI_OP_WRITE_SSP_DEBUG_MODE, + sizeof(enable), &enable); + __hci_req_update_eir(&req); + } else { + clear_eir(&req); + } + + hci_req_run(&req, NULL); +} + static void sk_lookup(struct mgmt_pending_cmd *cmd, void *data) { struct cmd_lookup *match = data; @@ -9593,8 +9410,9 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) { - struct sk_buff *skb; - struct mgmt_ev_device_found *ev; + char buf[512]; + struct mgmt_ev_device_found *ev = (void *)buf; + size_t ev_size; /* Don't send events for a non-kernel initiated discovery. With * LE one exception is if we have pend_le_reports > 0 in which @@ -9629,13 +9447,13 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, } } - /* Allocate skb. The 5 extra bytes are for the potential CoD field */ - skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, - sizeof(*ev) + eir_len + scan_rsp_len + 5); - if (!skb) + /* Make sure that the buffer is big enough. The 5 extra bytes + * are for the potential CoD field. + */ + if (sizeof(*ev) + eir_len + scan_rsp_len + 5 > sizeof(buf)) return; - ev = skb_put(skb, sizeof(*ev)); + memset(buf, 0, sizeof(buf)); /* In case of device discovery with BR/EDR devices (pre 1.2), the * RSSI value was reported as 0 when not available. This behavior @@ -9656,57 +9474,44 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, if (eir_len > 0) /* Copy EIR or advertising data into event */ - skb_put_data(skb, eir, eir_len); + memcpy(ev->eir, eir, eir_len); - if (dev_class && !eir_get_data(eir, eir_len, EIR_CLASS_OF_DEV, NULL)) { - u8 eir_cod[5]; - - eir_len += eir_append_data(eir_cod, 0, EIR_CLASS_OF_DEV, - dev_class, 3); - skb_put_data(skb, eir_cod, sizeof(eir_cod)); - } + if (dev_class && !eir_get_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, + NULL)) + eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, + dev_class, 3); if (scan_rsp_len > 0) /* Append scan response data to event */ - skb_put_data(skb, scan_rsp, scan_rsp_len); + memcpy(ev->eir + eir_len, scan_rsp, scan_rsp_len); ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); + ev_size = sizeof(*ev) + eir_len + scan_rsp_len; - mgmt_event_skb(skb, NULL); + mgmt_event(MGMT_EV_DEVICE_FOUND, hdev, ev, ev_size, NULL); } void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, s8 rssi, u8 *name, u8 name_len) { - struct sk_buff *skb; struct mgmt_ev_device_found *ev; + char buf[sizeof(*ev) + HCI_MAX_NAME_LENGTH + 2]; u16 eir_len; - u32 flags; - if (name_len) - skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 2 + name_len); - else - skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 0); + ev = (struct mgmt_ev_device_found *) buf; + + memset(buf, 0, sizeof(buf)); - ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, bdaddr); ev->addr.type = link_to_bdaddr(link_type, addr_type); ev->rssi = rssi; - if (name) { - eir_len = eir_append_data(ev->eir, 0, EIR_NAME_COMPLETE, name, - name_len); - flags = 0; - skb_put(skb, eir_len); - } else { - eir_len = 0; - flags = MGMT_DEV_FOUND_NAME_REQUEST_FAILED; - } + eir_len = eir_append_data(ev->eir, 0, EIR_NAME_COMPLETE, name, + name_len); ev->eir_len = cpu_to_le16(eir_len); - ev->flags = cpu_to_le32(flags); - mgmt_event_skb(skb, NULL); + mgmt_event(MGMT_EV_DEVICE_FOUND, hdev, ev, sizeof(*ev) + eir_len, NULL); } void mgmt_discovering(struct hci_dev *hdev, u8 discovering) diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index edee60bbc7..0d0a6d77b9 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -56,70 +56,38 @@ static struct sk_buff *create_monitor_ctrl_event(__le16 index, u32 cookie, return skb; } -struct sk_buff *mgmt_alloc_skb(struct hci_dev *hdev, u16 opcode, - unsigned int size) -{ - struct sk_buff *skb; - - skb = alloc_skb(sizeof(struct mgmt_hdr) + size, GFP_KERNEL); - if (!skb) - return skb; - - skb_reserve(skb, sizeof(struct mgmt_hdr)); - bt_cb(skb)->mgmt.hdev = hdev; - bt_cb(skb)->mgmt.opcode = opcode; - - return skb; -} - -int mgmt_send_event_skb(unsigned short channel, struct sk_buff *skb, int flag, - struct sock *skip_sk) -{ - struct hci_dev *hdev; - struct mgmt_hdr *hdr; - int len = skb->len; - - if (!skb) - return -EINVAL; - - hdev = bt_cb(skb)->mgmt.hdev; - - /* Time stamp */ - __net_timestamp(skb); - - /* Send just the data, without headers, to the monitor */ - if (channel == HCI_CHANNEL_CONTROL) - hci_send_monitor_ctrl_event(hdev, bt_cb(skb)->mgmt.opcode, - skb->data, skb->len, - skb_get_ktime(skb), flag, skip_sk); - - hdr = skb_push(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(bt_cb(skb)->mgmt.opcode); - if (hdev) - hdr->index = cpu_to_le16(hdev->id); - else - hdr->index = cpu_to_le16(MGMT_INDEX_NONE); - hdr->len = cpu_to_le16(len); - - hci_send_to_channel(channel, skb, flag, skip_sk); - - kfree_skb(skb); - return 0; -} - int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, void *data, u16 data_len, int flag, struct sock *skip_sk) { struct sk_buff *skb; + struct mgmt_hdr *hdr; - skb = mgmt_alloc_skb(hdev, event, data_len); + skb = alloc_skb(sizeof(*hdr) + data_len, GFP_KERNEL); if (!skb) return -ENOMEM; + hdr = skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(event); + if (hdev) + hdr->index = cpu_to_le16(hdev->id); + else + hdr->index = cpu_to_le16(MGMT_INDEX_NONE); + hdr->len = cpu_to_le16(data_len); + if (data) skb_put_data(skb, data, data_len); - return mgmt_send_event_skb(channel, skb, flag, skip_sk); + /* Time stamp */ + __net_timestamp(skb); + + hci_send_to_channel(channel, skb, flag, skip_sk); + + if (channel == HCI_CHANNEL_CONTROL) + hci_send_monitor_ctrl_event(hdev, event, data, data_len, + skb_get_ktime(skb), flag, skip_sk); + + kfree_skb(skb); + return 0; } int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) @@ -259,7 +227,7 @@ void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, } } -struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, +struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, struct hci_dev *hdev, void *data, u16 len) { @@ -283,19 +251,6 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, cmd->sk = sk; sock_hold(sk); - return cmd; -} - -struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, - struct hci_dev *hdev, - void *data, u16 len) -{ - struct mgmt_pending_cmd *cmd; - - cmd = mgmt_pending_new(sk, opcode, hdev, data, len); - if (!cmd) - return NULL; - list_add(&cmd->list, &hdev->mgmt_pending); return cmd; diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h index 98e40395a3..6559f18921 100644 --- a/net/bluetooth/mgmt_util.h +++ b/net/bluetooth/mgmt_util.h @@ -27,15 +27,10 @@ struct mgmt_pending_cmd { void *param; size_t param_len; struct sock *sk; - struct sk_buff *skb; void *user_data; int (*cmd_complete)(struct mgmt_pending_cmd *cmd, u8 status); }; -struct sk_buff *mgmt_alloc_skb(struct hci_dev *hdev, u16 opcode, - unsigned int size); -int mgmt_send_event_skb(unsigned short channel, struct sk_buff *skb, int flag, - struct sock *skip_sk); int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, void *data, u16 data_len, int flag, struct sock *skip_sk); int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status); @@ -54,8 +49,5 @@ void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, struct hci_dev *hdev, void *data, u16 len); -struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, - struct hci_dev *hdev, - void *data, u16 len); void mgmt_pending_free(struct mgmt_pending_cmd *cmd); void mgmt_pending_remove(struct mgmt_pending_cmd *cmd); diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index 6a943634b3..b4bfae41e8 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -93,15 +93,12 @@ struct msft_data { struct list_head handle_map; __u16 pending_add_handle; __u16 pending_remove_handle; - __u8 resuming; - __u8 suspending; + __u8 reregistering; __u8 filter_enabled; }; static int __msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor); -static int __msft_remove_monitor(struct hci_dev *hdev, - struct adv_monitor *monitor, u16 handle); bool msft_monitor_supported(struct hci_dev *hdev) { @@ -156,7 +153,8 @@ static bool read_supported_features(struct hci_dev *hdev, return false; } -static void reregister_monitor(struct hci_dev *hdev, int handle) +/* This function requires the caller holds hdev->lock */ +static void reregister_monitor_on_restart(struct hci_dev *hdev, int handle) { struct adv_monitor *monitor; struct msft_data *msft = hdev->msft_data; @@ -165,9 +163,9 @@ static void reregister_monitor(struct hci_dev *hdev, int handle) while (1) { monitor = idr_get_next(&hdev->adv_monitors_idr, &handle); if (!monitor) { - /* All monitors have been resumed */ - msft->resuming = false; - hci_update_passive_scan(hdev); + /* All monitors have been reregistered */ + msft->reregistering = false; + hci_update_background_scan(hdev); return; } @@ -184,6 +182,102 @@ static void reregister_monitor(struct hci_dev *hdev, int handle) } } +void msft_do_open(struct hci_dev *hdev) +{ + struct msft_data *msft; + + if (hdev->msft_opcode == HCI_OP_NOP) + return; + + bt_dev_dbg(hdev, "Initialize MSFT extension"); + + msft = kzalloc(sizeof(*msft), GFP_KERNEL); + if (!msft) + return; + + if (!read_supported_features(hdev, msft)) { + kfree(msft); + return; + } + + INIT_LIST_HEAD(&msft->handle_map); + hdev->msft_data = msft; + + if (msft_monitor_supported(hdev)) { + msft->reregistering = true; + msft_set_filter_enable(hdev, true); + reregister_monitor_on_restart(hdev, 0); + } +} + +void msft_do_close(struct hci_dev *hdev) +{ + struct msft_data *msft = hdev->msft_data; + struct msft_monitor_advertisement_handle_data *handle_data, *tmp; + struct adv_monitor *monitor; + + if (!msft) + return; + + bt_dev_dbg(hdev, "Cleanup of MSFT extension"); + + hdev->msft_data = NULL; + + list_for_each_entry_safe(handle_data, tmp, &msft->handle_map, list) { + monitor = idr_find(&hdev->adv_monitors_idr, + handle_data->mgmt_handle); + + if (monitor && monitor->state == ADV_MONITOR_STATE_OFFLOADED) + monitor->state = ADV_MONITOR_STATE_REGISTERED; + + list_del(&handle_data->list); + kfree(handle_data); + } + + kfree(msft->evt_prefix); + kfree(msft); +} + +void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct msft_data *msft = hdev->msft_data; + u8 event; + + if (!msft) + return; + + /* When the extension has defined an event prefix, check that it + * matches, and otherwise just return. + */ + if (msft->evt_prefix_len > 0) { + if (skb->len < msft->evt_prefix_len) + return; + + if (memcmp(skb->data, msft->evt_prefix, msft->evt_prefix_len)) + return; + + skb_pull(skb, msft->evt_prefix_len); + } + + /* Every event starts at least with an event code and the rest of + * the data is variable and depends on the event code. + */ + if (skb->len < 1) + return; + + event = *skb->data; + skb_pull(skb, 1); + + bt_dev_dbg(hdev, "MSFT vendor event %u", event); +} + +__u64 msft_get_features(struct hci_dev *hdev) +{ + struct msft_data *msft = hdev->msft_data; + + return msft ? msft->features : 0; +} + /* is_mgmt = true matches the handle exposed to userspace via mgmt. * is_mgmt = false matches the handle used by the msft controller. * This function requires the caller holds hdev->lock @@ -249,9 +343,14 @@ static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev, if (status && monitor) hci_free_adv_monitor(hdev, monitor); + /* If in restart/reregister sequence, keep registering. */ + if (msft->reregistering) + reregister_monitor_on_restart(hdev, + msft->pending_add_handle + 1); + hci_dev_unlock(hdev); - if (!msft->resuming) + if (!msft->reregistering) hci_add_adv_patterns_monitor_complete(hdev, status); } @@ -284,14 +383,7 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev, if (handle_data) { monitor = idr_find(&hdev->adv_monitors_idr, handle_data->mgmt_handle); - - if (monitor && monitor->state == ADV_MONITOR_STATE_OFFLOADED) - monitor->state = ADV_MONITOR_STATE_REGISTERED; - - /* Do not free the monitor if it is being removed due to - * suspend. It will be re-monitored on resume. - */ - if (monitor && !msft->suspending) + if (monitor) hci_free_adv_monitor(hdev, monitor); list_del(&handle_data->list); @@ -316,318 +408,7 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev, hci_dev_unlock(hdev); done: - if (!msft->suspending) - hci_remove_adv_monitor_complete(hdev, status); -} - -static int msft_remove_monitor_sync(struct hci_dev *hdev, - struct adv_monitor *monitor) -{ - struct msft_cp_le_cancel_monitor_advertisement cp; - struct msft_monitor_advertisement_handle_data *handle_data; - struct sk_buff *skb; - u8 status; - - handle_data = msft_find_handle_data(hdev, monitor->handle, true); - - /* If no matched handle, just remove without telling controller */ - if (!handle_data) - return -ENOENT; - - cp.sub_opcode = MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT; - cp.handle = handle_data->msft_handle; - - skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp, - HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) - return PTR_ERR(skb); - - status = skb->data[0]; - skb_pull(skb, 1); - - msft_le_cancel_monitor_advertisement_cb(hdev, status, hdev->msft_opcode, - skb); - - return status; -} - -/* This function requires the caller holds hci_req_sync_lock */ -int msft_suspend_sync(struct hci_dev *hdev) -{ - struct msft_data *msft = hdev->msft_data; - struct adv_monitor *monitor; - int handle = 0; - - if (!msft || !msft_monitor_supported(hdev)) - return 0; - - msft->suspending = true; - - while (1) { - monitor = idr_get_next(&hdev->adv_monitors_idr, &handle); - if (!monitor) - break; - - msft_remove_monitor_sync(hdev, monitor); - - handle++; - } - - /* All monitors have been removed */ - msft->suspending = false; - - return 0; -} - -static bool msft_monitor_rssi_valid(struct adv_monitor *monitor) -{ - struct adv_rssi_thresholds *r = &monitor->rssi; - - if (r->high_threshold < MSFT_RSSI_THRESHOLD_VALUE_MIN || - r->high_threshold > MSFT_RSSI_THRESHOLD_VALUE_MAX || - r->low_threshold < MSFT_RSSI_THRESHOLD_VALUE_MIN || - r->low_threshold > MSFT_RSSI_THRESHOLD_VALUE_MAX) - return false; - - /* High_threshold_timeout is not supported, - * once high_threshold is reached, events are immediately reported. - */ - if (r->high_threshold_timeout != 0) - return false; - - if (r->low_threshold_timeout > MSFT_RSSI_LOW_TIMEOUT_MAX) - return false; - - /* Sampling period from 0x00 to 0xFF are all allowed */ - return true; -} - -static bool msft_monitor_pattern_valid(struct adv_monitor *monitor) -{ - return msft_monitor_rssi_valid(monitor); - /* No additional check needed for pattern-based monitor */ -} - -static int msft_add_monitor_sync(struct hci_dev *hdev, - struct adv_monitor *monitor) -{ - struct msft_cp_le_monitor_advertisement *cp; - struct msft_le_monitor_advertisement_pattern_data *pattern_data; - struct msft_le_monitor_advertisement_pattern *pattern; - struct adv_pattern *entry; - size_t total_size = sizeof(*cp) + sizeof(*pattern_data); - ptrdiff_t offset = 0; - u8 pattern_count = 0; - struct sk_buff *skb; - u8 status; - - if (!msft_monitor_pattern_valid(monitor)) - return -EINVAL; - - list_for_each_entry(entry, &monitor->patterns, list) { - pattern_count++; - total_size += sizeof(*pattern) + entry->length; - } - - cp = kmalloc(total_size, GFP_KERNEL); - if (!cp) - return -ENOMEM; - - cp->sub_opcode = MSFT_OP_LE_MONITOR_ADVERTISEMENT; - cp->rssi_high = monitor->rssi.high_threshold; - cp->rssi_low = monitor->rssi.low_threshold; - cp->rssi_low_interval = (u8)monitor->rssi.low_threshold_timeout; - cp->rssi_sampling_period = monitor->rssi.sampling_period; - - cp->cond_type = MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN; - - pattern_data = (void *)cp->data; - pattern_data->count = pattern_count; - - list_for_each_entry(entry, &monitor->patterns, list) { - pattern = (void *)(pattern_data->data + offset); - /* the length also includes data_type and offset */ - pattern->length = entry->length + 2; - pattern->data_type = entry->ad_type; - pattern->start_byte = entry->offset; - memcpy(pattern->pattern, entry->value, entry->length); - offset += sizeof(*pattern) + entry->length; - } - - skb = __hci_cmd_sync(hdev, hdev->msft_opcode, total_size, cp, - HCI_CMD_TIMEOUT); - kfree(cp); - - if (IS_ERR(skb)) - return PTR_ERR(skb); - - status = skb->data[0]; - skb_pull(skb, 1); - - msft_le_monitor_advertisement_cb(hdev, status, hdev->msft_opcode, skb); - - return status; -} - -/* This function requires the caller holds hci_req_sync_lock */ -int msft_resume_sync(struct hci_dev *hdev) -{ - struct msft_data *msft = hdev->msft_data; - struct adv_monitor *monitor; - int handle = 0; - - if (!msft || !msft_monitor_supported(hdev)) - return 0; - - msft->resuming = true; - - while (1) { - monitor = idr_get_next(&hdev->adv_monitors_idr, &handle); - if (!monitor) - break; - - msft_add_monitor_sync(hdev, monitor); - - handle++; - } - - /* All monitors have been resumed */ - msft->resuming = false; - - return 0; -} - -void msft_do_open(struct hci_dev *hdev) -{ - struct msft_data *msft = hdev->msft_data; - - if (hdev->msft_opcode == HCI_OP_NOP) - return; - - if (!msft) { - bt_dev_err(hdev, "MSFT extension not registered"); - return; - } - - bt_dev_dbg(hdev, "Initialize MSFT extension"); - - /* Reset existing MSFT data before re-reading */ - kfree(msft->evt_prefix); - msft->evt_prefix = NULL; - msft->evt_prefix_len = 0; - msft->features = 0; - - if (!read_supported_features(hdev, msft)) { - hdev->msft_data = NULL; - kfree(msft); - return; - } - - if (msft_monitor_supported(hdev)) { - msft->resuming = true; - msft_set_filter_enable(hdev, true); - /* Monitors get removed on power off, so we need to explicitly - * tell the controller to re-monitor. - */ - reregister_monitor(hdev, 0); - } -} - -void msft_do_close(struct hci_dev *hdev) -{ - struct msft_data *msft = hdev->msft_data; - struct msft_monitor_advertisement_handle_data *handle_data, *tmp; - struct adv_monitor *monitor; - - if (!msft) - return; - - bt_dev_dbg(hdev, "Cleanup of MSFT extension"); - - /* The controller will silently remove all monitors on power off. - * Therefore, remove handle_data mapping and reset monitor state. - */ - list_for_each_entry_safe(handle_data, tmp, &msft->handle_map, list) { - monitor = idr_find(&hdev->adv_monitors_idr, - handle_data->mgmt_handle); - - if (monitor && monitor->state == ADV_MONITOR_STATE_OFFLOADED) - monitor->state = ADV_MONITOR_STATE_REGISTERED; - - list_del(&handle_data->list); - kfree(handle_data); - } -} - -void msft_register(struct hci_dev *hdev) -{ - struct msft_data *msft = NULL; - - bt_dev_dbg(hdev, "Register MSFT extension"); - - msft = kzalloc(sizeof(*msft), GFP_KERNEL); - if (!msft) { - bt_dev_err(hdev, "Failed to register MSFT extension"); - return; - } - - INIT_LIST_HEAD(&msft->handle_map); - hdev->msft_data = msft; -} - -void msft_unregister(struct hci_dev *hdev) -{ - struct msft_data *msft = hdev->msft_data; - - if (!msft) - return; - - bt_dev_dbg(hdev, "Unregister MSFT extension"); - - hdev->msft_data = NULL; - - kfree(msft->evt_prefix); - kfree(msft); -} - -void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) -{ - struct msft_data *msft = hdev->msft_data; - u8 event; - - if (!msft) - return; - - /* When the extension has defined an event prefix, check that it - * matches, and otherwise just return. - */ - if (msft->evt_prefix_len > 0) { - if (skb->len < msft->evt_prefix_len) - return; - - if (memcmp(skb->data, msft->evt_prefix, msft->evt_prefix_len)) - return; - - skb_pull(skb, msft->evt_prefix_len); - } - - /* Every event starts at least with an event code and the rest of - * the data is variable and depends on the event code. - */ - if (skb->len < 1) - return; - - event = *skb->data; - skb_pull(skb, 1); - - bt_dev_dbg(hdev, "MSFT vendor event %u", event); -} - -__u64 msft_get_features(struct hci_dev *hdev) -{ - struct msft_data *msft = hdev->msft_data; - - return msft ? msft->features : 0; + hci_remove_adv_monitor_complete(hdev, status); } static void msft_le_set_advertisement_filter_enable_cb(struct hci_dev *hdev, @@ -664,6 +445,35 @@ static void msft_le_set_advertisement_filter_enable_cb(struct hci_dev *hdev, hci_dev_unlock(hdev); } +static bool msft_monitor_rssi_valid(struct adv_monitor *monitor) +{ + struct adv_rssi_thresholds *r = &monitor->rssi; + + if (r->high_threshold < MSFT_RSSI_THRESHOLD_VALUE_MIN || + r->high_threshold > MSFT_RSSI_THRESHOLD_VALUE_MAX || + r->low_threshold < MSFT_RSSI_THRESHOLD_VALUE_MIN || + r->low_threshold > MSFT_RSSI_THRESHOLD_VALUE_MAX) + return false; + + /* High_threshold_timeout is not supported, + * once high_threshold is reached, events are immediately reported. + */ + if (r->high_threshold_timeout != 0) + return false; + + if (r->low_threshold_timeout > MSFT_RSSI_LOW_TIMEOUT_MAX) + return false; + + /* Sampling period from 0x00 to 0xFF are all allowed */ + return true; +} + +static bool msft_monitor_pattern_valid(struct adv_monitor *monitor) +{ + return msft_monitor_rssi_valid(monitor); + /* No additional check needed for pattern-based monitor */ +} + /* This function requires the caller holds hdev->lock */ static int __msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor) @@ -731,15 +541,15 @@ int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor) if (!msft) return -EOPNOTSUPP; - if (msft->resuming || msft->suspending) + if (msft->reregistering) return -EBUSY; return __msft_add_monitor_pattern(hdev, monitor); } /* This function requires the caller holds hdev->lock */ -static int __msft_remove_monitor(struct hci_dev *hdev, - struct adv_monitor *monitor, u16 handle) +int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, + u16 handle) { struct msft_cp_le_cancel_monitor_advertisement cp; struct msft_monitor_advertisement_handle_data *handle_data; @@ -747,6 +557,12 @@ static int __msft_remove_monitor(struct hci_dev *hdev, struct msft_data *msft = hdev->msft_data; int err = 0; + if (!msft) + return -EOPNOTSUPP; + + if (msft->reregistering) + return -EBUSY; + handle_data = msft_find_handle_data(hdev, monitor->handle, true); /* If no matched handle, just remove without telling controller */ @@ -766,21 +582,6 @@ static int __msft_remove_monitor(struct hci_dev *hdev, return err; } -/* This function requires the caller holds hdev->lock */ -int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, - u16 handle) -{ - struct msft_data *msft = hdev->msft_data; - - if (!msft) - return -EOPNOTSUPP; - - if (msft->resuming || msft->suspending) - return -EBUSY; - - return __msft_remove_monitor(hdev, monitor, handle); -} - void msft_req_add_set_filter_enable(struct hci_request *req, bool enable) { struct hci_dev *hdev = req->hdev; diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h index afcaf7d3b1..6e56d94b88 100644 --- a/net/bluetooth/msft.h +++ b/net/bluetooth/msft.h @@ -13,19 +13,15 @@ #if IS_ENABLED(CONFIG_BT_MSFTEXT) bool msft_monitor_supported(struct hci_dev *hdev); -void msft_register(struct hci_dev *hdev); -void msft_unregister(struct hci_dev *hdev); void msft_do_open(struct hci_dev *hdev); void msft_do_close(struct hci_dev *hdev); -void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb); +void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb); __u64 msft_get_features(struct hci_dev *hdev); int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor); int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, u16 handle); void msft_req_add_set_filter_enable(struct hci_request *req, bool enable); int msft_set_filter_enable(struct hci_dev *hdev, bool enable); -int msft_suspend_sync(struct hci_dev *hdev); -int msft_resume_sync(struct hci_dev *hdev); bool msft_curve_validity(struct hci_dev *hdev); #else @@ -35,12 +31,9 @@ static inline bool msft_monitor_supported(struct hci_dev *hdev) return false; } -static inline void msft_register(struct hci_dev *hdev) {} -static inline void msft_unregister(struct hci_dev *hdev) {} static inline void msft_do_open(struct hci_dev *hdev) {} static inline void msft_do_close(struct hci_dev *hdev) {} -static inline void msft_vendor_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) {} +static inline void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb) {} static inline __u64 msft_get_features(struct hci_dev *hdev) { return 0; } static inline int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor) @@ -62,16 +55,6 @@ static inline int msft_set_filter_enable(struct hci_dev *hdev, bool enable) return -EOPNOTSUPP; } -static inline int msft_suspend_sync(struct hci_dev *hdev) -{ - return -EOPNOTSUPP; -} - -static inline int msft_resume_sync(struct hci_dev *hdev) -{ - return -EOPNOTSUPP; -} - static inline bool msft_curve_validity(struct hci_dev *hdev) { return false; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 7324764384..f2bacb464c 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -549,58 +549,22 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel) return dlc; } -static int rfcomm_dlc_send_frag(struct rfcomm_dlc *d, struct sk_buff *frag) +int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb) { - int len = frag->len; + int len = skb->len; + + if (d->state != BT_CONNECTED) + return -ENOTCONN; BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len); if (len > d->mtu) return -EINVAL; - rfcomm_make_uih(frag, d->addr); - __skb_queue_tail(&d->tx_queue, frag); + rfcomm_make_uih(skb, d->addr); + skb_queue_tail(&d->tx_queue, skb); - return len; -} - -int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb) -{ - unsigned long flags; - struct sk_buff *frag, *next; - int len; - - if (d->state != BT_CONNECTED) - return -ENOTCONN; - - frag = skb_shinfo(skb)->frag_list; - skb_shinfo(skb)->frag_list = NULL; - - /* Queue all fragments atomically. */ - spin_lock_irqsave(&d->tx_queue.lock, flags); - - len = rfcomm_dlc_send_frag(d, skb); - if (len < 0 || !frag) - goto unlock; - - for (; frag; frag = next) { - int ret; - - next = frag->next; - - ret = rfcomm_dlc_send_frag(d, frag); - if (ret < 0) { - kfree_skb(frag); - goto unlock; - } - - len += ret; - } - -unlock: - spin_unlock_irqrestore(&d->tx_queue.lock, flags); - - if (len > 0 && !test_bit(RFCOMM_TX_THROTTLED, &d->flags)) + if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags)) rfcomm_schedule(); return len; } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 4bf4ea6cbb..2c95bb58f9 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -575,20 +575,46 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); sent = bt_sock_wait_ready(sk, msg->msg_flags); - - release_sock(sk); - if (sent) - return sent; + goto done; - skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE, - RFCOMM_SKB_TAIL_RESERVE); - if (IS_ERR(skb)) - return PTR_ERR(skb); + while (len) { + size_t size = min_t(size_t, len, d->mtu); + int err; - sent = rfcomm_dlc_send(d, skb); - if (sent < 0) - kfree_skb(skb); + skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) { + if (sent == 0) + sent = err; + break; + } + skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); + + err = memcpy_from_msg(skb_put(skb, size), msg, size); + if (err) { + kfree_skb(skb); + if (sent == 0) + sent = err; + break; + } + + skb->priority = sk->sk_priority; + + err = rfcomm_dlc_send(d, skb); + if (err < 0) { + kfree_skb(skb); + if (sent == 0) + sent = err; + break; + } + + sent += size; + len -= size; + } + +done: + release_sock(sk); return sent; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 8eabf41b29..6e047e178c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -69,7 +69,6 @@ struct sco_pinfo { __u32 flags; __u16 setting; __u8 cmsg_mask; - struct bt_codec codec; struct sco_conn *conn; }; @@ -253,7 +252,7 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) return -EOPNOTSUPP; hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, - sco_pi(sk)->setting, &sco_pi(sk)->codec); + sco_pi(sk)->setting); if (IS_ERR(hcon)) return PTR_ERR(hcon); @@ -281,10 +280,12 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) return err; } -static int sco_send_frame(struct sock *sk, struct sk_buff *skb) +static int sco_send_frame(struct sock *sk, void *buf, int len, + unsigned int msg_flags) { struct sco_conn *conn = sco_pi(sk)->conn; - int len = skb->len; + struct sk_buff *skb; + int err; /* Check outgoing MTU */ if (len > conn->mtu) @@ -292,6 +293,11 @@ static int sco_send_frame(struct sock *sk, struct sk_buff *skb) BT_DBG("sk %p len %d", sk, len); + skb = bt_skb_send_alloc(sk, len, msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return err; + + memcpy(skb_put(skb, len), buf, len); hci_send_sco(conn->hcon, skb); return len; @@ -435,7 +441,6 @@ static void __sco_sock_close(struct sock *sk) sock_set_flag(sk, SOCK_ZAPPED); break; } - } /* Must be called on unlocked socket. */ @@ -496,10 +501,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, sk->sk_state = BT_OPEN; sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; - sco_pi(sk)->codec.id = BT_CODEC_CVSD; - sco_pi(sk)->codec.cid = 0xffff; - sco_pi(sk)->codec.vid = 0xffff; - sco_pi(sk)->codec.data_path = 0x00; bt_sock_link(&sco_sk_list, sk); return sk; @@ -721,7 +722,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; - struct sk_buff *skb; + void *buf; int err; BT_DBG("sock %p, sk %p", sock, sk); @@ -733,21 +734,24 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; - skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0); - if (IS_ERR(skb)) - return PTR_ERR(skb); + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (memcpy_from_msg(buf, msg, len)) { + kfree(buf); + return -EFAULT; + } lock_sock(sk); if (sk->sk_state == BT_CONNECTED) - err = sco_send_frame(sk, skb); + err = sco_send_frame(sk, buf, len, msg->msg_flags); else err = -ENOTCONN; release_sock(sk); - - if (err < 0) - kfree_skb(skb); + kfree(buf); return err; } @@ -829,9 +833,6 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, int len, err = 0; struct bt_voice voice; u32 opt; - struct bt_codecs *codecs; - struct hci_dev *hdev; - __u8 buffer[255]; BT_DBG("sk %p", sk); @@ -879,16 +880,6 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, } sco_pi(sk)->setting = voice.setting; - hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, - BDADDR_BREDR); - if (!hdev) { - err = -EBADFD; - break; - } - if (enhanced_sco_capable(hdev) && - voice.setting == BT_VOICE_TRANSPARENT) - sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT; - hci_dev_put(hdev); break; case BT_PKT_STATUS: @@ -903,57 +894,6 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, sco_pi(sk)->cmsg_mask &= SCO_CMSG_PKT_STATUS; break; - case BT_CODEC: - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND && - sk->sk_state != BT_CONNECT2) { - err = -EINVAL; - break; - } - - hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, - BDADDR_BREDR); - if (!hdev) { - err = -EBADFD; - break; - } - - if (!hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)) { - hci_dev_put(hdev); - err = -EOPNOTSUPP; - break; - } - - if (!hdev->get_data_path_id) { - hci_dev_put(hdev); - err = -EOPNOTSUPP; - break; - } - - if (optlen < sizeof(struct bt_codecs) || - optlen > sizeof(buffer)) { - hci_dev_put(hdev); - err = -EINVAL; - break; - } - - if (copy_from_sockptr(buffer, optval, optlen)) { - hci_dev_put(hdev); - err = -EFAULT; - break; - } - - codecs = (void *)buffer; - - if (codecs->num_codecs > 1) { - hci_dev_put(hdev); - err = -EINVAL; - break; - } - - sco_pi(sk)->codec = codecs->codecs[0]; - hci_dev_put(hdev); - break; - default: err = -ENOPROTOOPT; break; @@ -1032,12 +972,6 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, struct bt_voice voice; u32 phys; int pkt_status; - int buf_len; - struct codec_list *c; - u8 num_codecs, i, __user *ptr; - struct hci_dev *hdev; - struct hci_codec_caps *caps; - struct bt_codec codec; BT_DBG("sk %p", sk); @@ -1102,101 +1036,6 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, err = -EFAULT; break; - case BT_CODEC: - num_codecs = 0; - buf_len = 0; - - hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); - if (!hdev) { - err = -EBADFD; - break; - } - - if (!hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)) { - hci_dev_put(hdev); - err = -EOPNOTSUPP; - break; - } - - if (!hdev->get_data_path_id) { - hci_dev_put(hdev); - err = -EOPNOTSUPP; - break; - } - - /* find total buffer size required to copy codec + caps */ - hci_dev_lock(hdev); - list_for_each_entry(c, &hdev->local_codecs, list) { - if (c->transport != HCI_TRANSPORT_SCO_ESCO) - continue; - num_codecs++; - for (i = 0, caps = c->caps; i < c->num_caps; i++) { - buf_len += 1 + caps->len; - caps = (void *)&caps->data[caps->len]; - } - buf_len += sizeof(struct bt_codec); - } - hci_dev_unlock(hdev); - - buf_len += sizeof(struct bt_codecs); - if (buf_len > len) { - hci_dev_put(hdev); - err = -ENOBUFS; - break; - } - ptr = optval; - - if (put_user(num_codecs, ptr)) { - hci_dev_put(hdev); - err = -EFAULT; - break; - } - ptr += sizeof(num_codecs); - - /* Iterate all the codecs supported over SCO and populate - * codec data - */ - hci_dev_lock(hdev); - list_for_each_entry(c, &hdev->local_codecs, list) { - if (c->transport != HCI_TRANSPORT_SCO_ESCO) - continue; - - codec.id = c->id; - codec.cid = c->cid; - codec.vid = c->vid; - err = hdev->get_data_path_id(hdev, &codec.data_path); - if (err < 0) - break; - codec.num_caps = c->num_caps; - if (copy_to_user(ptr, &codec, sizeof(codec))) { - err = -EFAULT; - break; - } - ptr += sizeof(codec); - - /* find codec capabilities data length */ - len = 0; - for (i = 0, caps = c->caps; i < c->num_caps; i++) { - len += 1 + caps->len; - caps = (void *)&caps->data[caps->len]; - } - - /* copy codec capabilities data */ - if (len && copy_to_user(ptr, c->caps, len)) { - err = -EFAULT; - break; - } - ptr += len; - } - - if (!err && put_user(buf_len, optlen)) - err = -EFAULT; - - hci_dev_unlock(hdev); - hci_dev_put(hdev); - - break; - default: err = -ENOPROTOOPT; break; diff --git a/net/bpf/Makefile b/net/bpf/Makefile index 1ebe270bde..1c0a98d8c2 100644 --- a/net/bpf/Makefile +++ b/net/bpf/Makefile @@ -1,5 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_BPF_SYSCALL) := test_run.o -ifeq ($(CONFIG_BPF_JIT),y) -obj-$(CONFIG_BPF_SYSCALL) += bpf_dummy_struct_ops.o -endif diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 46dd957559..b5f4ef3535 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -2,7 +2,6 @@ /* Copyright (c) 2017 Facebook */ #include -#include #include #include #include @@ -242,11 +241,9 @@ BTF_ID(func, bpf_kfunc_call_test2) BTF_ID(func, bpf_kfunc_call_test3) BTF_SET_END(test_sk_kfunc_ids) -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner) +bool bpf_prog_test_check_kfunc_call(u32 kfunc_id) { - if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id)) - return true; - return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner); + return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id); } static void *bpf_test_init(const union bpf_attr *kattr, u32 size, @@ -358,9 +355,13 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, return -EINVAL; if (ctx_size_in) { - info.ctx = memdup_user(ctx_in, ctx_size_in); - if (IS_ERR(info.ctx)) - return PTR_ERR(info.ctx); + info.ctx = kzalloc(ctx_size_in, GFP_USER); + if (!info.ctx) + return -ENOMEM; + if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) { + err = -EFAULT; + goto out; + } } else { info.ctx = NULL; } @@ -388,6 +389,7 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32))) err = -EFAULT; +out: kfree(info.ctx); return err; } @@ -481,7 +483,11 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) return -EINVAL; /* priority is allowed */ - /* ingress_ifindex is allowed */ + + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority), + offsetof(struct __sk_buff, ifindex))) + return -EINVAL; + /* ifindex is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex), @@ -505,18 +511,11 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) /* gso_size is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size), - offsetof(struct __sk_buff, hwtstamp))) - return -EINVAL; - - /* hwtstamp is allowed */ - - if (!range_is_zero(__skb, offsetofend(struct __sk_buff, hwtstamp), sizeof(struct __sk_buff))) return -EINVAL; skb->mark = __skb->mark; skb->priority = __skb->priority; - skb->skb_iif = __skb->ingress_ifindex; skb->tstamp = __skb->tstamp; memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN); @@ -533,7 +532,6 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) return -EINVAL; skb_shinfo(skb)->gso_segs = __skb->gso_segs; skb_shinfo(skb)->gso_size = __skb->gso_size; - skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp; return 0; } @@ -547,13 +545,11 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) __skb->mark = skb->mark; __skb->priority = skb->priority; - __skb->ingress_ifindex = skb->skb_iif; __skb->ifindex = skb->dev->ifindex; __skb->tstamp = skb->tstamp; memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); __skb->wire_len = cb->pkt_len; __skb->gso_segs = skb_shinfo(skb)->gso_segs; - __skb->hwtstamp = skb_shinfo(skb)->hwtstamps.hwtstamp; } static struct proto bpf_dummy_proto = { @@ -805,8 +801,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, if (ret) goto free_data; - if (repeat > 1) - bpf_prog_change_xdp(NULL, prog); + bpf_prog_change_xdp(NULL, prog); ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); /* We convert the xdp_buff back to an xdp_md before checking the return * code so the reference count of any held netdevice will be decremented @@ -827,8 +822,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, sizeof(struct xdp_md)); out: - if (repeat > 1) - bpf_prog_change_xdp(prog, NULL); + bpf_prog_change_xdp(prog, NULL); free_data: kfree(data); free_ctx: @@ -1047,9 +1041,13 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog, return -EINVAL; if (ctx_size_in) { - ctx = memdup_user(ctx_in, ctx_size_in); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); + ctx = kzalloc(ctx_size_in, GFP_USER); + if (!ctx) + return -ENOMEM; + if (copy_from_user(ctx, ctx_in, ctx_size_in)) { + err = -EFAULT; + goto out; + } } rcu_read_lock_trace(); diff --git a/net/bridge/br.c b/net/bridge/br.c index 1fac72cc61..d3a32c6813 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -36,7 +36,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v bool changed_addr; int err; - if (netif_is_bridge_master(dev)) { + if (dev->priv_flags & IFF_EBRIDGE) { err = br_vlan_bridge_event(dev, event, ptr); if (err) return notifier_from_errno(err); @@ -349,7 +349,7 @@ static void __net_exit br_net_exit(struct net *net) rtnl_lock(); for_each_netdev(net, dev) - if (netif_is_bridge_master(dev)) + if (dev->priv_flags & IFF_EBRIDGE) br_dev_delete(dev, &list); unregister_netdevice_many(&list); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 6ccda68bd4..46812b6597 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -32,6 +32,10 @@ static const struct rhashtable_params br_fdb_rht_params = { }; static struct kmem_cache *br_fdb_cache __read_mostly; +static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid); +static void fdb_notify(struct net_bridge *br, + const struct net_bridge_fdb_entry *, int, bool); int __init br_fdb_init(void) { @@ -83,128 +87,6 @@ static void fdb_rcu_free(struct rcu_head *head) kmem_cache_free(br_fdb_cache, ent); } -static int fdb_to_nud(const struct net_bridge *br, - const struct net_bridge_fdb_entry *fdb) -{ - if (test_bit(BR_FDB_LOCAL, &fdb->flags)) - return NUD_PERMANENT; - else if (test_bit(BR_FDB_STATIC, &fdb->flags)) - return NUD_NOARP; - else if (has_expired(br, fdb)) - return NUD_STALE; - else - return NUD_REACHABLE; -} - -static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, - const struct net_bridge_fdb_entry *fdb, - u32 portid, u32 seq, int type, unsigned int flags) -{ - const struct net_bridge_port *dst = READ_ONCE(fdb->dst); - unsigned long now = jiffies; - struct nda_cacheinfo ci; - struct nlmsghdr *nlh; - struct ndmsg *ndm; - - nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); - if (nlh == NULL) - return -EMSGSIZE; - - ndm = nlmsg_data(nlh); - ndm->ndm_family = AF_BRIDGE; - ndm->ndm_pad1 = 0; - ndm->ndm_pad2 = 0; - ndm->ndm_flags = 0; - ndm->ndm_type = 0; - ndm->ndm_ifindex = dst ? dst->dev->ifindex : br->dev->ifindex; - ndm->ndm_state = fdb_to_nud(br, fdb); - - if (test_bit(BR_FDB_OFFLOADED, &fdb->flags)) - ndm->ndm_flags |= NTF_OFFLOADED; - if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) - ndm->ndm_flags |= NTF_EXT_LEARNED; - if (test_bit(BR_FDB_STICKY, &fdb->flags)) - ndm->ndm_flags |= NTF_STICKY; - - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr)) - goto nla_put_failure; - if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) - goto nla_put_failure; - ci.ndm_used = jiffies_to_clock_t(now - fdb->used); - ci.ndm_confirmed = 0; - ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); - ci.ndm_refcnt = 0; - if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) - goto nla_put_failure; - - if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), - &fdb->key.vlan_id)) - goto nla_put_failure; - - if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) { - struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS); - u8 notify_bits = FDB_NOTIFY_BIT; - - if (!nest) - goto nla_put_failure; - if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags)) - notify_bits |= FDB_NOTIFY_INACTIVE_BIT; - - if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) { - nla_nest_cancel(skb, nest); - goto nla_put_failure; - } - - nla_nest_end(skb, nest); - } - - nlmsg_end(skb, nlh); - return 0; - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - -static inline size_t fdb_nlmsg_size(void) -{ - return NLMSG_ALIGN(sizeof(struct ndmsg)) - + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ - + nla_total_size(sizeof(u32)) /* NDA_MASTER */ - + nla_total_size(sizeof(u16)) /* NDA_VLAN */ - + nla_total_size(sizeof(struct nda_cacheinfo)) - + nla_total_size(0) /* NDA_FDB_EXT_ATTRS */ - + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */ -} - -static void fdb_notify(struct net_bridge *br, - const struct net_bridge_fdb_entry *fdb, int type, - bool swdev_notify) -{ - struct net *net = dev_net(br->dev); - struct sk_buff *skb; - int err = -ENOBUFS; - - if (swdev_notify) - br_switchdev_fdb_notify(br, fdb, type); - - skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC); - if (skb == NULL) - goto errout; - - err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0); - if (err < 0) { - /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */ - WARN_ON(err == -EMSGSIZE); - kfree_skb(skb); - goto errout; - } - rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); - return; -errout: - rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); -} - static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl, const unsigned char *addr, __u16 vid) @@ -375,66 +257,6 @@ void br_fdb_find_delete_local(struct net_bridge *br, spin_unlock_bh(&br->hash_lock); } -static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, - struct net_bridge_port *source, - const unsigned char *addr, - __u16 vid, - unsigned long flags) -{ - struct net_bridge_fdb_entry *fdb; - int err; - - fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); - if (!fdb) - return NULL; - - memcpy(fdb->key.addr.addr, addr, ETH_ALEN); - WRITE_ONCE(fdb->dst, source); - fdb->key.vlan_id = vid; - fdb->flags = flags; - fdb->updated = fdb->used = jiffies; - err = rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, &fdb->rhnode, - br_fdb_rht_params); - if (err) { - kmem_cache_free(br_fdb_cache, fdb); - return NULL; - } - - hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list); - - return fdb; -} - -static int fdb_add_local(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr, u16 vid) -{ - struct net_bridge_fdb_entry *fdb; - - if (!is_valid_ether_addr(addr)) - return -EINVAL; - - fdb = br_fdb_find(br, addr, vid); - if (fdb) { - /* it is okay to have multiple ports with same - * address, just use the first one. - */ - if (test_bit(BR_FDB_LOCAL, &fdb->flags)) - return 0; - br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n", - source ? source->dev->name : br->dev->name, addr, vid); - fdb_delete(br, fdb, true); - } - - fdb = fdb_create(br, source, addr, vid, - BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC)); - if (!fdb) - return -ENOMEM; - - fdb_add_hw_addr(br, addr); - fdb_notify(br, fdb, RTM_NEWNEIGH, true); - return 0; -} - void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) { struct net_bridge_vlan_group *vg; @@ -461,7 +283,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) insert: /* insert new address, may fail if invalid address or dup. */ - fdb_add_local(br, p, newaddr, 0); + fdb_insert(br, p, newaddr, 0); if (!vg || !vg->num_vlans) goto done; @@ -471,7 +293,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) * from under us. */ list_for_each_entry(v, &vg->vlan_list, vlist) - fdb_add_local(br, p, newaddr, v->vid); + fdb_insert(br, p, newaddr, v->vid); done: spin_unlock_bh(&br->hash_lock); @@ -491,7 +313,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) fdb_delete_local(br, NULL, f); - fdb_add_local(br, NULL, newaddr, 0); + fdb_insert(br, NULL, newaddr, 0); vg = br_vlan_group(br); if (!vg || !vg->num_vlans) goto out; @@ -506,7 +328,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) if (f && test_bit(BR_FDB_LOCAL, &f->flags) && !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) fdb_delete_local(br, NULL, f); - fdb_add_local(br, NULL, newaddr, v->vid); + fdb_insert(br, NULL, newaddr, v->vid); } out: spin_unlock_bh(&br->hash_lock); @@ -681,14 +503,71 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, return num; } +static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, + struct net_bridge_port *source, + const unsigned char *addr, + __u16 vid, + unsigned long flags) +{ + struct net_bridge_fdb_entry *fdb; + + fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); + if (fdb) { + memcpy(fdb->key.addr.addr, addr, ETH_ALEN); + WRITE_ONCE(fdb->dst, source); + fdb->key.vlan_id = vid; + fdb->flags = flags; + fdb->updated = fdb->used = jiffies; + if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, + &fdb->rhnode, + br_fdb_rht_params)) { + kmem_cache_free(br_fdb_cache, fdb); + fdb = NULL; + } else { + hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list); + } + } + return fdb; +} + +static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid) +{ + struct net_bridge_fdb_entry *fdb; + + if (!is_valid_ether_addr(addr)) + return -EINVAL; + + fdb = br_fdb_find(br, addr, vid); + if (fdb) { + /* it is okay to have multiple ports with same + * address, just use the first one. + */ + if (test_bit(BR_FDB_LOCAL, &fdb->flags)) + return 0; + br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n", + source ? source->dev->name : br->dev->name, addr, vid); + fdb_delete(br, fdb, true); + } + + fdb = fdb_create(br, source, addr, vid, + BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC)); + if (!fdb) + return -ENOMEM; + + fdb_add_hw_addr(br, addr); + fdb_notify(br, fdb, RTM_NEWNEIGH, true); + return 0; +} + /* Add entry for local address of interface */ -int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr, u16 vid) +int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid) { int ret; spin_lock_bh(&br->hash_lock); - ret = fdb_add_local(br, source, addr, vid); + ret = fdb_insert(br, source, addr, vid); spin_unlock_bh(&br->hash_lock); return ret; } @@ -759,6 +638,182 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } } +static int fdb_to_nud(const struct net_bridge *br, + const struct net_bridge_fdb_entry *fdb) +{ + if (test_bit(BR_FDB_LOCAL, &fdb->flags)) + return NUD_PERMANENT; + else if (test_bit(BR_FDB_STATIC, &fdb->flags)) + return NUD_NOARP; + else if (has_expired(br, fdb)) + return NUD_STALE; + else + return NUD_REACHABLE; +} + +static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, + const struct net_bridge_fdb_entry *fdb, + u32 portid, u32 seq, int type, unsigned int flags) +{ + const struct net_bridge_port *dst = READ_ONCE(fdb->dst); + unsigned long now = jiffies; + struct nda_cacheinfo ci; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); + if (nlh == NULL) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = 0; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dst ? dst->dev->ifindex : br->dev->ifindex; + ndm->ndm_state = fdb_to_nud(br, fdb); + + if (test_bit(BR_FDB_OFFLOADED, &fdb->flags)) + ndm->ndm_flags |= NTF_OFFLOADED; + if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) + ndm->ndm_flags |= NTF_EXT_LEARNED; + if (test_bit(BR_FDB_STICKY, &fdb->flags)) + ndm->ndm_flags |= NTF_STICKY; + + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr)) + goto nla_put_failure; + if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) + goto nla_put_failure; + ci.ndm_used = jiffies_to_clock_t(now - fdb->used); + ci.ndm_confirmed = 0; + ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); + ci.ndm_refcnt = 0; + if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) + goto nla_put_failure; + + if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), + &fdb->key.vlan_id)) + goto nla_put_failure; + + if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) { + struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS); + u8 notify_bits = FDB_NOTIFY_BIT; + + if (!nest) + goto nla_put_failure; + if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags)) + notify_bits |= FDB_NOTIFY_INACTIVE_BIT; + + if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) { + nla_nest_cancel(skb, nest); + goto nla_put_failure; + } + + nla_nest_end(skb, nest); + } + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static inline size_t fdb_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u32)) /* NDA_MASTER */ + + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + + nla_total_size(sizeof(struct nda_cacheinfo)) + + nla_total_size(0) /* NDA_FDB_EXT_ATTRS */ + + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */ +} + +static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, + const struct net_bridge_fdb_entry *fdb, + unsigned long action, const void *ctx) +{ + const struct net_bridge_port *p = READ_ONCE(fdb->dst); + struct switchdev_notifier_fdb_info item; + int err; + + item.addr = fdb->key.addr.addr; + item.vid = fdb->key.vlan_id; + item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags); + item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags); + item.info.dev = (!p || item.is_local) ? br->dev : p->dev; + item.info.ctx = ctx; + + err = nb->notifier_call(nb, action, &item); + return notifier_to_errno(err); +} + +int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding, + struct notifier_block *nb) +{ + struct net_bridge_fdb_entry *fdb; + struct net_bridge *br; + unsigned long action; + int err = 0; + + if (!nb) + return 0; + + if (!netif_is_bridge_master(br_dev)) + return -EINVAL; + + br = netdev_priv(br_dev); + + if (adding) + action = SWITCHDEV_FDB_ADD_TO_DEVICE; + else + action = SWITCHDEV_FDB_DEL_TO_DEVICE; + + rcu_read_lock(); + + hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) { + err = br_fdb_replay_one(br, nb, fdb, action, ctx); + if (err) + break; + } + + rcu_read_unlock(); + + return err; +} + +static void fdb_notify(struct net_bridge *br, + const struct net_bridge_fdb_entry *fdb, int type, + bool swdev_notify) +{ + struct net *net = dev_net(br->dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + if (swdev_notify) + br_switchdev_fdb_notify(br, fdb, type); + + skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0); + if (err < 0) { + /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); +} + /* Dump information about entries, in response to GETNEIGH */ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, @@ -770,7 +825,7 @@ int br_fdb_dump(struct sk_buff *skb, struct net_bridge_fdb_entry *f; int err = 0; - if (!netif_is_bridge_master(dev)) + if (!(dev->priv_flags & IFF_EBRIDGE)) return err; if (!filter_dev) { @@ -1021,7 +1076,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (netif_is_bridge_master(dev)) { + if (dev->priv_flags & IFF_EBRIDGE) { br = netdev_priv(dev); vg = br_vlan_group(br); } else { @@ -1118,7 +1173,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_bridge *br; int err; - if (netif_is_bridge_master(dev)) { + if (dev->priv_flags & IFF_EBRIDGE) { br = netdev_priv(dev); vg = br_vlan_group(br); } else { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 55f47cadb1..4a02f8bb27 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -274,7 +274,7 @@ static void destroy_nbp(struct net_bridge_port *p) p->br = NULL; p->dev = NULL; - dev_put_track(dev, &p->dev_tracker); + dev_put(dev); kobject_put(&p->kobj); } @@ -397,10 +397,10 @@ static int find_portno(struct net_bridge *br) if (!inuse) return -ENOMEM; - __set_bit(0, inuse); /* zero is reserved */ - list_for_each_entry(p, &br->port_list, list) - __set_bit(p->port_no, inuse); - + set_bit(0, inuse); /* zero is reserved */ + list_for_each_entry(p, &br->port_list, list) { + set_bit(p->port_no, inuse); + } index = find_first_zero_bit(inuse, BR_MAX_PORTS); bitmap_free(inuse); @@ -423,7 +423,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, return ERR_PTR(-ENOMEM); p->br = br; - dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL); + dev_hold(dev); p->dev = dev; p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; @@ -434,7 +434,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, br_stp_port_timer_init(p); err = br_multicast_add_port(p); if (err) { - dev_put_track(dev, &p->dev_tracker); + dev_put(dev); kfree(p); p = ERR_PTR(err); } @@ -471,7 +471,7 @@ int br_del_bridge(struct net *net, const char *name) if (dev == NULL) ret = -ENXIO; /* Could not find device */ - else if (!netif_is_bridge_master(dev)) { + else if (!(dev->priv_flags & IFF_EBRIDGE)) { /* Attempt to delete non bridge device! */ ret = -EPERM; } @@ -525,8 +525,8 @@ static void br_set_gso_limits(struct net_bridge *br) gso_max_size = min(gso_max_size, p->dev->gso_max_size); gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs); } - netif_set_gso_max_size(br->dev, gso_max_size); - netif_set_gso_max_segs(br->dev, gso_max_segs); + br->dev->gso_max_size = gso_max_size; + br->dev->gso_max_segs = gso_max_segs; } /* @@ -615,7 +615,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, err = dev_set_allmulti(dev, 1); if (err) { br_multicast_del_port(p); - dev_put_track(dev, &p->dev_tracker); kfree(p); /* kobject not yet init'd, manually free */ goto err1; } @@ -671,7 +670,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, else netdev_set_rx_headroom(dev, br_hr); - if (br_fdb_add_local(br, p, dev->dev_addr, 0)) + if (br_fdb_insert(br, p, dev->dev_addr, 0)) netdev_err(dev, "failed insert local address bridge forwarding table\n"); if (br->dev->addr_assign_type != NET_ADDR_SET) { @@ -725,10 +724,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, sysfs_remove_link(br->ifobj, p->dev->name); err2: br_multicast_del_port(p); - dev_put_track(dev, &p->dev_tracker); kobject_put(&p->kobj); dev_set_allmulti(dev, -1); err1: + dev_put(dev); return err; } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index f213ed1083..9922497e59 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -8,7 +8,6 @@ */ #include -#include #include #include #include @@ -27,7 +26,7 @@ static int get_bridge_ifindices(struct net *net, int *indices, int num) for_each_netdev_rcu(net, dev) { if (i >= num) break; - if (netif_is_bridge_master(dev)) + if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } rcu_read_unlock(); @@ -103,56 +102,37 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) return ret; } -#define BR_UARGS_MAX 4 -static int br_dev_read_uargs(unsigned long *args, size_t nr_args, - void __user **argp, void __user *data) -{ - int ret; - - if (nr_args < 2 || nr_args > BR_UARGS_MAX) - return -EINVAL; - - if (in_compat_syscall()) { - unsigned int cargs[BR_UARGS_MAX]; - int i; - - ret = copy_from_user(cargs, data, nr_args * sizeof(*cargs)); - if (ret) - goto fault; - - for (i = 0; i < nr_args; ++i) - args[i] = cargs[i]; - - *argp = compat_ptr(args[1]); - } else { - ret = copy_from_user(args, data, nr_args * sizeof(*args)); - if (ret) - goto fault; - *argp = (void __user *)args[1]; - } - - return 0; -fault: - return -EFAULT; -} - /* * Legacy ioctl's through SIOCDEVPRIVATE * This interface is deprecated because it was too difficult * to do the translation for 32/64bit ioctl compatibility. */ -int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, - void __user *data, int cmd) +int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p = NULL; unsigned long args[4]; void __user *argp; - int ret; + int ret = -EOPNOTSUPP; - ret = br_dev_read_uargs(args, ARRAY_SIZE(args), &argp, data); - if (ret) - return ret; + if (in_compat_syscall()) { + unsigned int cargs[4]; + + if (copy_from_user(cargs, data, sizeof(cargs))) + return -EFAULT; + + args[0] = cargs[0]; + args[1] = cargs[1]; + args[2] = cargs[2]; + args[3] = cargs[3]; + + argp = compat_ptr(args[1]); + } else { + if (copy_from_user(args, data, sizeof(args))) + return -EFAULT; + + argp = (void __user *)args[1]; + } switch (args[0]) { case BRCTL_ADD_IF: @@ -321,9 +301,6 @@ int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, case BRCTL_GET_FDB_ENTRIES: return get_fdb_entries(br, argp, args[2], args[3]); - - default: - ret = -EOPNOTSUPP; } if (!ret) { @@ -336,15 +313,12 @@ int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, return ret; } -static int old_deviceless(struct net *net, void __user *data) +static int old_deviceless(struct net *net, void __user *uarg) { unsigned long args[3]; - void __user *argp; - int ret; - ret = br_dev_read_uargs(args, ARRAY_SIZE(args), &argp, data); - if (ret) - return ret; + if (copy_from_user(args, uarg, sizeof(args))) + return -EFAULT; switch (args[0]) { case BRCTL_GET_VERSION: @@ -363,7 +337,7 @@ static int old_deviceless(struct net *net, void __user *data) args[2] = get_bridge_ifindices(net, indices, args[2]); - ret = copy_to_user(argp, indices, + ret = copy_to_user((void __user *)args[1], indices, array_size(args[2], sizeof(int))) ? -EFAULT : args[2]; @@ -379,7 +353,7 @@ static int old_deviceless(struct net *net, void __user *data) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(buf, argp, IFNAMSIZ)) + if (copy_from_user(buf, (void __user *)args[1], IFNAMSIZ)) return -EFAULT; buf[IFNAMSIZ-1] = 0; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 4556d91395..0281453f77 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -422,7 +422,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = net->dev_base_seq; for_each_netdev_rcu(net, dev) { - if (netif_is_bridge_master(dev)) { + if (dev->priv_flags & IFF_EBRIDGE) { struct net_bridge *br = netdev_priv(dev); struct br_port_msg *bpm; @@ -552,16 +552,252 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg) return nlmsg_size; } +struct br_mdb_complete_info { + struct net_bridge_port *port; + struct br_ip ip; +}; + +static void br_mdb_complete(struct net_device *dev, int err, void *priv) +{ + struct br_mdb_complete_info *data = priv; + struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port *port = data->port; + struct net_bridge *br = port->br; + + if (err) + goto err; + + spin_lock_bh(&br->multicast_lock); + mp = br_mdb_ip_get(br, &data->ip); + if (!mp) + goto out; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->key.port != port) + continue; + p->flags |= MDB_PG_FLAGS_OFFLOAD; + } +out: + spin_unlock_bh(&br->multicast_lock); +err: + kfree(priv); +} + +static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb, + const struct net_bridge_mdb_entry *mp) +{ + if (mp->addr.proto == htons(ETH_P_IP)) + ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr); +#if IS_ENABLED(CONFIG_IPV6) + else if (mp->addr.proto == htons(ETH_P_IPV6)) + ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr); +#endif + else + ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr); + + mdb->vid = mp->addr.vid; +} + +static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, + const struct switchdev_obj_port_mdb *mdb, + unsigned long action, const void *ctx, + struct netlink_ext_ack *extack) +{ + struct switchdev_notifier_port_obj_info obj_info = { + .info = { + .dev = dev, + .extack = extack, + .ctx = ctx, + }, + .obj = &mdb->obj, + }; + int err; + + err = nb->notifier_call(nb, action, &obj_info); + return notifier_to_errno(err); +} + +static int br_mdb_queue_one(struct list_head *mdb_list, + enum switchdev_obj_id id, + const struct net_bridge_mdb_entry *mp, + struct net_device *orig_dev) +{ + struct switchdev_obj_port_mdb *mdb; + + mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); + if (!mdb) + return -ENOMEM; + + mdb->obj.id = id; + mdb->obj.orig_dev = orig_dev; + br_switchdev_mdb_populate(mdb, mp); + list_add_tail(&mdb->obj.list, mdb_list); + + return 0; +} + +int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, + const void *ctx, bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + const struct net_bridge_mdb_entry *mp; + struct switchdev_obj *obj, *tmp; + struct net_bridge *br; + unsigned long action; + LIST_HEAD(mdb_list); + int err = 0; + + ASSERT_RTNL(); + + if (!nb) + return 0; + + if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev)) + return -EINVAL; + + br = netdev_priv(br_dev); + + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) + return 0; + + /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, + * because the write-side protection is br->multicast_lock. But we + * need to emulate the [ blocking ] calling context of a regular + * switchdev event, so since both br->multicast_lock and RCU read side + * critical sections are atomic, we have no choice but to pick the RCU + * read side lock, queue up all our events, leave the critical section + * and notify switchdev from blocking context. + */ + rcu_read_lock(); + + hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { + struct net_bridge_port_group __rcu * const *pp; + const struct net_bridge_port_group *p; + + if (mp->host_joined) { + err = br_mdb_queue_one(&mdb_list, + SWITCHDEV_OBJ_ID_HOST_MDB, + mp, br_dev); + if (err) { + rcu_read_unlock(); + goto out_free_mdb; + } + } + + for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; + pp = &p->next) { + if (p->key.port->dev != dev) + continue; + + err = br_mdb_queue_one(&mdb_list, + SWITCHDEV_OBJ_ID_PORT_MDB, + mp, dev); + if (err) { + rcu_read_unlock(); + goto out_free_mdb; + } + } + } + + rcu_read_unlock(); + + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; + + list_for_each_entry(obj, &mdb_list, list) { + err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj), + action, ctx, extack); + if (err) + goto out_free_mdb; + } + +out_free_mdb: + list_for_each_entry_safe(obj, tmp, &mdb_list, list) { + list_del(&obj->list); + kfree(SWITCHDEV_OBJ_PORT_MDB(obj)); + } + + return err; +} + +static void br_mdb_switchdev_host_port(struct net_device *dev, + struct net_device *lower_dev, + struct net_bridge_mdb_entry *mp, + int type) +{ + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = SWITCHDEV_OBJ_ID_HOST_MDB, + .flags = SWITCHDEV_F_DEFER, + .orig_dev = dev, + }, + }; + + br_switchdev_mdb_populate(&mdb, mp); + + switch (type) { + case RTM_NEWMDB: + switchdev_port_obj_add(lower_dev, &mdb.obj, NULL); + break; + case RTM_DELMDB: + switchdev_port_obj_del(lower_dev, &mdb.obj); + break; + } +} + +static void br_mdb_switchdev_host(struct net_device *dev, + struct net_bridge_mdb_entry *mp, int type) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower_dev, iter) + br_mdb_switchdev_host_port(dev, lower_dev, mp, type); +} + void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, int type) { + struct br_mdb_complete_info *complete_info; + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = SWITCHDEV_OBJ_ID_PORT_MDB, + .flags = SWITCHDEV_F_DEFER, + }, + }; struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; - br_switchdev_mdb_notify(dev, mp, pg, type); + if (pg) { + br_switchdev_mdb_populate(&mdb, mp); + + mdb.obj.orig_dev = pg->key.port->dev; + switch (type) { + case RTM_NEWMDB: + complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); + if (!complete_info) + break; + complete_info->port = pg->key.port; + complete_info->ip = mp->addr; + mdb.obj.complete_priv = complete_info; + mdb.obj.complete = br_mdb_complete; + if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL)) + kfree(complete_info); + break; + case RTM_DELMDB: + switchdev_port_obj_del(pg->key.port->dev, &mdb.obj); + break; + } + } else { + br_mdb_switchdev_host(dev, mp, type); + } skb = nlmsg_new(rtnl_mdb_nlmsg_size(pg), GFP_ATOMIC); if (!skb) @@ -780,7 +1016,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (!netif_is_bridge_master(dev)) { + if (!(dev->priv_flags & IFF_EBRIDGE)) { NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge"); return -EOPNOTSUPP; } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 4fd882686b..68c0d0f928 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -967,7 +967,7 @@ static int brnf_device_event(struct notifier_block *unused, unsigned long event, struct net *net; int ret; - if (event != NETDEV_REGISTER || !netif_is_bridge_master(dev)) + if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE)) return NOTIFY_DONE; ASSERT_RTNL(); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 2ff83d8423..e365cf82f0 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -106,7 +106,7 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev, p = br_port_get_check_rcu(dev); if (p) vg = nbp_vlan_group_rcu(p); - } else if (netif_is_bridge_master(dev)) { + } else if (dev->priv_flags & IFF_EBRIDGE) { br = netdev_priv(dev); vg = br_vlan_group_rcu(br); } @@ -1050,7 +1050,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) p = br_port_get_rtnl(dev); /* We want to accept dev as bridge itself as well */ - if (!p && !netif_is_bridge_master(dev)) + if (!p && !(dev->priv_flags & IFF_EBRIDGE)) return -EINVAL; err = br_afspec(br, p, afspec, RTM_DELLINK, &changed, NULL); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2661dda1a9..bd218c2b2c 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -346,7 +346,6 @@ struct net_bridge_mdb_entry { struct net_bridge_port { struct net_bridge *br; struct net_device *dev; - netdevice_tracker dev_tracker; struct list_head list; unsigned long flags; @@ -770,8 +769,8 @@ struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, unsigned long off); -int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr, u16 vid); +int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid); void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid, unsigned long flags); @@ -795,6 +794,8 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, bool swdev_notify); void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool offloaded); +int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding, + struct notifier_block *nb); /* br_forward.c */ enum br_pkt_type { @@ -959,6 +960,9 @@ int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack); bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on); +int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, + const void *ctx, bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack); int br_rports_fill_info(struct sk_buff *skb, const struct net_bridge_mcast *brmctx); int br_multicast_dump_querier_state(struct sk_buff *skb, @@ -1398,6 +1402,14 @@ static inline bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, return false; } +static inline int br_mdb_replay(struct net_device *br_dev, + struct net_device *dev, const void *ctx, + bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline bool br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1, const struct net_bridge_mcast *brmctx2) @@ -1455,6 +1467,9 @@ void br_vlan_notify(const struct net_bridge *br, const struct net_bridge_port *p, u16 vid, u16 vid_range, int cmd); +int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, + const void *ctx, bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack); bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *range_end); @@ -1701,11 +1716,13 @@ static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, return true; } -static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid) +static inline int br_vlan_replay(struct net_device *br_dev, + struct net_device *dev, const void *ctx, + bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return 0; + return -EOPNOTSUPP; } - #endif /* br_vlan_options.c */ @@ -1980,10 +1997,6 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, struct netlink_ext_ack *extack); void br_switchdev_fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, int type); -void br_switchdev_mdb_notify(struct net_device *dev, - struct net_bridge_mdb_entry *mp, - struct net_bridge_port_group *pg, - int type); int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, struct netlink_ext_ack *extack); int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid); @@ -2070,13 +2083,6 @@ br_switchdev_fdb_notify(struct net_bridge *br, { } -static inline void br_switchdev_mdb_notify(struct net_device *dev, - struct net_bridge_mdb_entry *mp, - struct net_bridge_port_group *pg, - int type) -{ -} - static inline void br_switchdev_frame_unmark(struct sk_buff *skb) { } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 75204d36d7..ba55851fe1 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -233,7 +233,7 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN); memcpy(br->bridge_id.addr, addr, ETH_ALEN); - eth_hw_addr_set(br->dev, addr); + memcpy(br->dev->dev_addr, addr, ETH_ALEN); list_for_each_entry(p, &br->port_list, list) { if (ether_addr_equal(p->designated_bridge.addr, oldaddr)) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index f8fbaaa7c5..6bf518d78f 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include "br_private.h" @@ -123,38 +122,28 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, return 0; } -static void br_switchdev_fdb_populate(struct net_bridge *br, - struct switchdev_notifier_fdb_info *item, - const struct net_bridge_fdb_entry *fdb, - const void *ctx) -{ - const struct net_bridge_port *p = READ_ONCE(fdb->dst); - - item->addr = fdb->key.addr.addr; - item->vid = fdb->key.vlan_id; - item->added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); - item->offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags); - item->is_local = test_bit(BR_FDB_LOCAL, &fdb->flags); - item->info.dev = (!p || item->is_local) ? br->dev : p->dev; - item->info.ctx = ctx; -} - void br_switchdev_fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, int type) { - struct switchdev_notifier_fdb_info item; - - br_switchdev_fdb_populate(br, &item, fdb, NULL); + const struct net_bridge_port *dst = READ_ONCE(fdb->dst); + struct switchdev_notifier_fdb_info info = { + .addr = fdb->key.addr.addr, + .vid = fdb->key.vlan_id, + .added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags), + .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags), + .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags), + }; + struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev; switch (type) { case RTM_DELNEIGH: call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_DEVICE, - item.info.dev, &item.info, NULL); + dev, &info.info, NULL); break; case RTM_NEWNEIGH: call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_DEVICE, - item.info.dev, &item.info, NULL); + dev, &info.info, NULL); break; } } @@ -281,397 +270,6 @@ static void nbp_switchdev_del(struct net_bridge_port *p) } } -static int -br_switchdev_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, - const struct net_bridge_fdb_entry *fdb, - unsigned long action, const void *ctx) -{ - struct switchdev_notifier_fdb_info item; - int err; - - br_switchdev_fdb_populate(br, &item, fdb, ctx); - - err = nb->notifier_call(nb, action, &item); - return notifier_to_errno(err); -} - -static int -br_switchdev_fdb_replay(const struct net_device *br_dev, const void *ctx, - bool adding, struct notifier_block *nb) -{ - struct net_bridge_fdb_entry *fdb; - struct net_bridge *br; - unsigned long action; - int err = 0; - - if (!nb) - return 0; - - if (!netif_is_bridge_master(br_dev)) - return -EINVAL; - - br = netdev_priv(br_dev); - - if (adding) - action = SWITCHDEV_FDB_ADD_TO_DEVICE; - else - action = SWITCHDEV_FDB_DEL_TO_DEVICE; - - rcu_read_lock(); - - hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) { - err = br_switchdev_fdb_replay_one(br, nb, fdb, action, ctx); - if (err) - break; - } - - rcu_read_unlock(); - - return err; -} - -static int -br_switchdev_vlan_replay_one(struct notifier_block *nb, - struct net_device *dev, - struct switchdev_obj_port_vlan *vlan, - const void *ctx, unsigned long action, - struct netlink_ext_ack *extack) -{ - struct switchdev_notifier_port_obj_info obj_info = { - .info = { - .dev = dev, - .extack = extack, - .ctx = ctx, - }, - .obj = &vlan->obj, - }; - int err; - - err = nb->notifier_call(nb, action, &obj_info); - return notifier_to_errno(err); -} - -static int br_switchdev_vlan_replay(struct net_device *br_dev, - struct net_device *dev, - const void *ctx, bool adding, - struct notifier_block *nb, - struct netlink_ext_ack *extack) -{ - struct net_bridge_vlan_group *vg; - struct net_bridge_vlan *v; - struct net_bridge_port *p; - struct net_bridge *br; - unsigned long action; - int err = 0; - u16 pvid; - - ASSERT_RTNL(); - - if (!nb) - return 0; - - if (!netif_is_bridge_master(br_dev)) - return -EINVAL; - - if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) - return -EINVAL; - - if (netif_is_bridge_master(dev)) { - br = netdev_priv(dev); - vg = br_vlan_group(br); - p = NULL; - } else { - p = br_port_get_rtnl(dev); - if (WARN_ON(!p)) - return -EINVAL; - vg = nbp_vlan_group(p); - br = p->br; - } - - if (!vg) - return 0; - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; - - pvid = br_get_pvid(vg); - - list_for_each_entry(v, &vg->vlan_list, vlist) { - struct switchdev_obj_port_vlan vlan = { - .obj.orig_dev = dev, - .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - .flags = br_vlan_flags(v, pvid), - .vid = v->vid, - }; - - if (!br_vlan_should_use(v)) - continue; - - err = br_switchdev_vlan_replay_one(nb, dev, &vlan, ctx, - action, extack); - if (err) - return err; - } - - return err; -} - -#ifdef CONFIG_BRIDGE_IGMP_SNOOPING -struct br_switchdev_mdb_complete_info { - struct net_bridge_port *port; - struct br_ip ip; -}; - -static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *priv) -{ - struct br_switchdev_mdb_complete_info *data = priv; - struct net_bridge_port_group __rcu **pp; - struct net_bridge_port_group *p; - struct net_bridge_mdb_entry *mp; - struct net_bridge_port *port = data->port; - struct net_bridge *br = port->br; - - if (err) - goto err; - - spin_lock_bh(&br->multicast_lock); - mp = br_mdb_ip_get(br, &data->ip); - if (!mp) - goto out; - for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; - pp = &p->next) { - if (p->key.port != port) - continue; - p->flags |= MDB_PG_FLAGS_OFFLOAD; - } -out: - spin_unlock_bh(&br->multicast_lock); -err: - kfree(priv); -} - -static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb, - const struct net_bridge_mdb_entry *mp) -{ - if (mp->addr.proto == htons(ETH_P_IP)) - ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr); -#if IS_ENABLED(CONFIG_IPV6) - else if (mp->addr.proto == htons(ETH_P_IPV6)) - ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr); -#endif - else - ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr); - - mdb->vid = mp->addr.vid; -} - -static void br_switchdev_host_mdb_one(struct net_device *dev, - struct net_device *lower_dev, - struct net_bridge_mdb_entry *mp, - int type) -{ - struct switchdev_obj_port_mdb mdb = { - .obj = { - .id = SWITCHDEV_OBJ_ID_HOST_MDB, - .flags = SWITCHDEV_F_DEFER, - .orig_dev = dev, - }, - }; - - br_switchdev_mdb_populate(&mdb, mp); - - switch (type) { - case RTM_NEWMDB: - switchdev_port_obj_add(lower_dev, &mdb.obj, NULL); - break; - case RTM_DELMDB: - switchdev_port_obj_del(lower_dev, &mdb.obj); - break; - } -} - -static void br_switchdev_host_mdb(struct net_device *dev, - struct net_bridge_mdb_entry *mp, int type) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(dev, lower_dev, iter) - br_switchdev_host_mdb_one(dev, lower_dev, mp, type); -} - -static int -br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, - const struct switchdev_obj_port_mdb *mdb, - unsigned long action, const void *ctx, - struct netlink_ext_ack *extack) -{ - struct switchdev_notifier_port_obj_info obj_info = { - .info = { - .dev = dev, - .extack = extack, - .ctx = ctx, - }, - .obj = &mdb->obj, - }; - int err; - - err = nb->notifier_call(nb, action, &obj_info); - return notifier_to_errno(err); -} - -static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, - enum switchdev_obj_id id, - const struct net_bridge_mdb_entry *mp, - struct net_device *orig_dev) -{ - struct switchdev_obj_port_mdb *mdb; - - mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); - if (!mdb) - return -ENOMEM; - - mdb->obj.id = id; - mdb->obj.orig_dev = orig_dev; - br_switchdev_mdb_populate(mdb, mp); - list_add_tail(&mdb->obj.list, mdb_list); - - return 0; -} - -void br_switchdev_mdb_notify(struct net_device *dev, - struct net_bridge_mdb_entry *mp, - struct net_bridge_port_group *pg, - int type) -{ - struct br_switchdev_mdb_complete_info *complete_info; - struct switchdev_obj_port_mdb mdb = { - .obj = { - .id = SWITCHDEV_OBJ_ID_PORT_MDB, - .flags = SWITCHDEV_F_DEFER, - }, - }; - - if (!pg) - return br_switchdev_host_mdb(dev, mp, type); - - br_switchdev_mdb_populate(&mdb, mp); - - mdb.obj.orig_dev = pg->key.port->dev; - switch (type) { - case RTM_NEWMDB: - complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); - if (!complete_info) - break; - complete_info->port = pg->key.port; - complete_info->ip = mp->addr; - mdb.obj.complete_priv = complete_info; - mdb.obj.complete = br_switchdev_mdb_complete; - if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL)) - kfree(complete_info); - break; - case RTM_DELMDB: - switchdev_port_obj_del(pg->key.port->dev, &mdb.obj); - break; - } -} -#endif - -static int -br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack) -{ -#ifdef CONFIG_BRIDGE_IGMP_SNOOPING - const struct net_bridge_mdb_entry *mp; - struct switchdev_obj *obj, *tmp; - struct net_bridge *br; - unsigned long action; - LIST_HEAD(mdb_list); - int err = 0; - - ASSERT_RTNL(); - - if (!nb) - return 0; - - if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev)) - return -EINVAL; - - br = netdev_priv(br_dev); - - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) - return 0; - - /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, - * because the write-side protection is br->multicast_lock. But we - * need to emulate the [ blocking ] calling context of a regular - * switchdev event, so since both br->multicast_lock and RCU read side - * critical sections are atomic, we have no choice but to pick the RCU - * read side lock, queue up all our events, leave the critical section - * and notify switchdev from blocking context. - */ - rcu_read_lock(); - - hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { - struct net_bridge_port_group __rcu * const *pp; - const struct net_bridge_port_group *p; - - if (mp->host_joined) { - err = br_switchdev_mdb_queue_one(&mdb_list, - SWITCHDEV_OBJ_ID_HOST_MDB, - mp, br_dev); - if (err) { - rcu_read_unlock(); - goto out_free_mdb; - } - } - - for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; - pp = &p->next) { - if (p->key.port->dev != dev) - continue; - - err = br_switchdev_mdb_queue_one(&mdb_list, - SWITCHDEV_OBJ_ID_PORT_MDB, - mp, dev); - if (err) { - rcu_read_unlock(); - goto out_free_mdb; - } - } - } - - rcu_read_unlock(); - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; - - list_for_each_entry(obj, &mdb_list, list) { - err = br_switchdev_mdb_replay_one(nb, dev, - SWITCHDEV_OBJ_PORT_MDB(obj), - action, ctx, extack); - if (err) - goto out_free_mdb; - } - -out_free_mdb: - list_for_each_entry_safe(obj, tmp, &mdb_list, list) { - list_del(&obj->list); - kfree(SWITCHDEV_OBJ_PORT_MDB(obj)); - } - - if (err) - return err; -#endif - - return 0; -} - static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb, @@ -681,17 +279,15 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx, struct net_device *dev = p->dev; int err; - err = br_switchdev_vlan_replay(br_dev, dev, ctx, true, blocking_nb, - extack); + err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack); if (err && err != -EOPNOTSUPP) return err; - err = br_switchdev_mdb_replay(br_dev, dev, ctx, true, blocking_nb, - extack); + err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack); if (err && err != -EOPNOTSUPP) return err; - err = br_switchdev_fdb_replay(br_dev, ctx, true, atomic_nb); + err = br_fdb_replay(br_dev, ctx, true, atomic_nb); if (err && err != -EOPNOTSUPP) return err; @@ -706,11 +302,11 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, struct net_device *br_dev = p->br->dev; struct net_device *dev = p->dev; - br_switchdev_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL); + br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL); - br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); + br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); - br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb); + br_fdb_replay(br_dev, ctx, false, atomic_nb); } /* Let the bridge know that this port is offloaded, so that it can assign a diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 3f7ca88c2a..7b0c197721 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -36,14 +36,15 @@ static ssize_t store_bridge_parm(struct device *d, struct net_bridge *br = to_bridge(d); struct netlink_ext_ack extack = {0}; unsigned long val; + char *endp; int err; if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = kstrtoul(buf, 0, &val); - if (err != 0) - return err; + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; if (!rtnl_trylock()) return restart_syscall(); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 1402d5ca24..10e63ea6a1 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -293,7 +293,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, /* Add the dev mac and count the vlan only if it's usable */ if (br_vlan_should_use(v)) { - err = br_fdb_add_local(br, p, dev->dev_addr, v->vid); + err = br_fdb_insert(br, p, dev->dev_addr, v->vid); if (err) { br_err(br, "failed insert local address into bridge forwarding table\n"); goto out_filt; @@ -683,7 +683,8 @@ static int br_vlan_add_existing(struct net_bridge *br, goto err_flags; } /* It was only kept for port vlans, now make it real */ - err = br_fdb_add_local(br, NULL, br->dev->dev_addr, vlan->vid); + err = br_fdb_insert(br, NULL, br->dev->dev_addr, + vlan->vid); if (err) { br_err(br, "failed to insert local address into bridge forwarding table\n"); goto err_fdb_insert; @@ -1063,7 +1064,7 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid, if (br_vlan_delete(br, old_pvid)) br_vlan_notify(br, NULL, old_pvid, 0, RTM_DELVLAN); br_vlan_notify(br, NULL, pvid, 0, RTM_NEWVLAN); - __set_bit(0, changed); + set_bit(0, changed); } list_for_each_entry(p, &br->port_list, list) { @@ -1085,7 +1086,7 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid, if (nbp_vlan_delete(p, old_pvid)) br_vlan_notify(br, p, old_pvid, 0, RTM_DELVLAN); br_vlan_notify(p->br, p, pvid, 0, RTM_NEWVLAN); - __set_bit(p->port_no, changed); + set_bit(p->port_no, changed); } br->default_pvid = pvid; @@ -1860,6 +1861,90 @@ void br_vlan_notify(const struct net_bridge *br, kfree_skb(skb); } +static int br_vlan_replay_one(struct notifier_block *nb, + struct net_device *dev, + struct switchdev_obj_port_vlan *vlan, + const void *ctx, unsigned long action, + struct netlink_ext_ack *extack) +{ + struct switchdev_notifier_port_obj_info obj_info = { + .info = { + .dev = dev, + .extack = extack, + .ctx = ctx, + }, + .obj = &vlan->obj, + }; + int err; + + err = nb->notifier_call(nb, action, &obj_info); + return notifier_to_errno(err); +} + +int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, + const void *ctx, bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct net_bridge_port *p; + struct net_bridge *br; + unsigned long action; + int err = 0; + u16 pvid; + + ASSERT_RTNL(); + + if (!nb) + return 0; + + if (!netif_is_bridge_master(br_dev)) + return -EINVAL; + + if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) + return -EINVAL; + + if (netif_is_bridge_master(dev)) { + br = netdev_priv(dev); + vg = br_vlan_group(br); + p = NULL; + } else { + p = br_port_get_rtnl(dev); + if (WARN_ON(!p)) + return -EINVAL; + vg = nbp_vlan_group(p); + br = p->br; + } + + if (!vg) + return 0; + + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; + + pvid = br_get_pvid(vg); + + list_for_each_entry(v, &vg->vlan_list, vlist) { + struct switchdev_obj_port_vlan vlan = { + .obj.orig_dev = dev, + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .flags = br_vlan_flags(v, pvid), + .vid = v->vid, + }; + + if (!br_vlan_should_use(v)) + continue; + + err = br_vlan_replay_one(nb, dev, &vlan, ctx, action, extack); + if (err) + return err; + } + + return err; +} + /* check if v_curr can enter a range ending in range_end */ bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *range_end) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index cb949436bc..c0b121df4a 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -58,21 +58,28 @@ static const struct ebt_table frame_filter = { .me = THIS_MODULE, }; +static unsigned int +ebt_filter_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ebt_do_table(skb, state, priv); +} + static const struct nf_hook_ops ebt_ops_filter[] = { { - .hook = ebt_do_table, + .hook = ebt_filter_hook, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_IN, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_do_table, + .hook = ebt_filter_hook, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_do_table, + .hook = ebt_filter_hook, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_FILTER_OTHER, diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 5ee0531ae5..4078151c22 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -58,21 +58,27 @@ static const struct ebt_table frame_nat = { .me = THIS_MODULE, }; +static unsigned int ebt_nat_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ebt_do_table(skb, state, priv); +} + static const struct nf_hook_ops ebt_ops_nat[] = { { - .hook = ebt_do_table, + .hook = ebt_nat_hook, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_NAT_DST_OTHER, }, { - .hook = ebt_do_table, + .hook = ebt_nat_hook, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, .priority = NF_BR_PRI_NAT_SRC, }, { - .hook = ebt_do_table, + .hook = ebt_nat_hook, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_NAT_DST_BRIDGED, diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f2dbefb61c..ba045f3511 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -189,10 +189,10 @@ ebt_get_target_c(const struct ebt_entry *e) } /* Do some firewalling */ -unsigned int ebt_do_table(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) +unsigned int ebt_do_table(struct sk_buff *skb, + const struct nf_hook_state *state, + struct ebt_table *table) { - struct ebt_table *table = priv; unsigned int hook = state->hook; int i, nentries; struct ebt_entry *point; @@ -1073,7 +1073,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, */ if (repl->num_counters && copy_to_user(repl->counters, counterstmp, - array_size(repl->num_counters, sizeof(struct ebt_counter)))) { + repl->num_counters * sizeof(struct ebt_counter))) { /* Silent error, can't fail, new table is already in place */ net_warn_ratelimited("ebtables: counters copy to user failed while replacing table\n"); } @@ -1401,8 +1401,7 @@ static int do_update_counters(struct net *net, const char *name, goto unlock_mutex; } - if (copy_from_user(tmp, counters, - array_size(num_counters, sizeof(*counters)))) { + if (copy_from_user(tmp, counters, num_counters * sizeof(*counters))) { ret = -EFAULT; goto unlock_mutex; } @@ -1535,7 +1534,7 @@ static int copy_counters_to_user(struct ebt_table *t, write_unlock_bh(&t->lock); if (copy_to_user(user, counterstmp, - array_size(nentries, sizeof(struct ebt_counter)))) + nentries * sizeof(struct ebt_counter))) ret = -EFAULT; vfree(counterstmp); return ret; diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index c1ef9cc89b..97805ec424 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -100,25 +100,6 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = { .dump = nft_meta_get_dump, }; -static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - int i; - - for (i = 0; i < NFT_REG32_NUM; i++) { - if (!track->regs[i].selector) - continue; - - if (track->regs[i].selector->ops != &nft_meta_bridge_get_ops) - continue; - - track->regs[i].selector = NULL; - track->regs[i].bitwise = NULL; - } - - return false; -} - static const struct nft_expr_ops nft_meta_bridge_set_ops = { .type = &nft_meta_bridge_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), @@ -126,7 +107,6 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = { .init = nft_meta_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, - .reduce = nft_meta_bridge_set_reduce, .validate = nft_meta_set_validate, }; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 2b8892d502..e12fd3cad6 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -6,7 +6,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ -#include #include #include #include diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 4be6b04879..b02e1292f7 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -81,7 +81,7 @@ static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, layr->up->ctrlcmd(layr->up, ctrl, layr->id); } -static struct cflayer *cfusbl_create(int phyid, const u8 ethaddr[ETH_ALEN], +static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], u8 braddr[ETH_ALEN]) { struct cfusbl *this = kmalloc(sizeof(struct cfusbl), GFP_ATOMIC); diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index aee11c74d3..40cd57ad0a 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -128,6 +128,7 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt) if (pkt != NULL) cfpkt_destroy(pkt); layr->incomplete_frm = NULL; + expectlen = 0; spin_unlock(&layr->sync); return -EPROTO; } diff --git a/net/can/bcm.c b/net/can/bcm.c index 95d209b52e..508f67de0b 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -193,7 +193,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) { char ifname[IFNAMSIZ]; struct net *net = m->private; - struct sock *sk = (struct sock *)pde_data(m->file->f_inode); + struct sock *sk = (struct sock *)PDE_DATA(m->file->f_inode); struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; @@ -625,7 +625,7 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer) struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer); if (bcm_rx_thr_flush(op)) { - hrtimer_forward_now(hrtimer, op->kt_ival2); + hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2); return HRTIMER_RESTART; } else { /* rearm throttle handling */ diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c index 7e51f12804..5622763ad4 100644 --- a/net/ceph/buffer.c +++ b/net/ceph/buffer.c @@ -7,7 +7,7 @@ #include #include -#include /* for kvmalloc */ +#include /* for ceph_kvmalloc */ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) { @@ -17,7 +17,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) if (!b) return NULL; - b->vec.iov_base = kvmalloc(len, gfp); + b->vec.iov_base = ceph_kvmalloc(len, gfp); if (!b->vec.iov_base) { kfree(b); return NULL; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 4c6441536d..97d6ea763e 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -190,14 +190,41 @@ int ceph_compare_options(struct ceph_options *new_opt, } EXPORT_SYMBOL(ceph_compare_options); -int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid) +/* + * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are + * compatible with (a superset of) GFP_KERNEL. This is because while the + * actual pages are allocated with the specified flags, the page table pages + * are always allocated with GFP_KERNEL. + * + * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO. + */ +void *ceph_kvmalloc(size_t size, gfp_t flags) +{ + void *p; + + if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) { + p = kvmalloc(size, flags); + } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) { + unsigned int nofs_flag = memalloc_nofs_save(); + p = kvmalloc(size, GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); + } else { + unsigned int noio_flag = memalloc_noio_save(); + p = kvmalloc(size, GFP_KERNEL); + memalloc_noio_restore(noio_flag); + } + + return p; +} + +static int parse_fsid(const char *str, struct ceph_fsid *fsid) { int i = 0; char tmp[3]; int err = -EINVAL; int d; - dout("%s '%s'\n", __func__, str); + dout("parse_fsid '%s'\n", str); tmp[2] = 0; while (*str && i < 16) { if (ispunct(*str)) { @@ -217,10 +244,9 @@ int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid) if (i == 16) err = 0; - dout("%s ret %d got fsid %pU\n", __func__, err, fsid); + dout("parse_fsid ret %d got fsid %pU\n", err, fsid); return err; } -EXPORT_SYMBOL(ceph_parse_fsid); /* * ceph options @@ -246,7 +272,6 @@ enum { Opt_cephx_sign_messages, Opt_tcp_nodelay, Opt_abort_on_full, - Opt_rxbounce, }; enum { @@ -296,7 +321,6 @@ static const struct fs_parameter_spec ceph_parameters[] = { fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout), fsparam_enum ("read_from_replica", Opt_read_from_replica, ceph_param_read_from_replica), - fsparam_flag ("rxbounce", Opt_rxbounce), fsparam_enum ("ms_mode", Opt_ms_mode, ceph_param_ms_mode), fsparam_string ("secret", Opt_secret), @@ -398,14 +422,14 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name, } int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, - struct fc_log *l, char delim) + struct fc_log *l) { struct p_log log = {.prefix = "libceph", .log = l}; int ret; - /* ip1[:port1][ip2[:port2]...] */ + /* ip1[:port1][,ip2[:port2]...] */ ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON, - &opt->num_mon, delim); + &opt->num_mon); if (ret) { error_plog(&log, "Failed to parse monitor IPs: %d", ret); return ret; @@ -431,7 +455,8 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, case Opt_ip: err = ceph_parse_ips(param->string, param->string + param->size, - &opt->my_addr, 1, NULL, ','); + &opt->my_addr, + 1, NULL); if (err) { error_plog(&log, "Failed to parse ip: %d", err); return err; @@ -440,7 +465,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, break; case Opt_fsid: - err = ceph_parse_fsid(param->string, &opt->fsid); + err = parse_fsid(param->string, &opt->fsid); if (err) { error_plog(&log, "Failed to parse fsid: %d", err); return err; @@ -586,9 +611,6 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, case Opt_abort_on_full: opt->flags |= CEPH_OPT_ABORT_ON_FULL; break; - case Opt_rxbounce: - opt->flags |= CEPH_OPT_RXBOUNCE; - break; default: BUG(); @@ -665,8 +687,6 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, seq_puts(m, "notcp_nodelay,"); if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL)) seq_puts(m, "abort_on_full,"); - if (opt->flags & CEPH_OPT_RXBOUNCE) - seq_puts(m, "rxbounce,"); if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) seq_printf(m, "mount_timeout=%d,", diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 051d22c0e4..92d89b3316 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -147,7 +147,7 @@ void ceph_crypto_key_destroy(struct ceph_crypto_key *key) static const u8 *aes_iv = (u8 *)CEPH_AES_IV; /* - * Should be used for buffers allocated with kvmalloc(). + * Should be used for buffers allocated with ceph_kvmalloc(). * Currently these are encrypt out-buffer (ceph_buffer) and decrypt * in-buffer (msg front). * diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d3bb656308..57d043b382 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -515,10 +515,6 @@ static void ceph_con_reset_protocol(struct ceph_connection *con) ceph_msg_put(con->out_msg); con->out_msg = NULL; } - if (con->bounce_page) { - __free_page(con->bounce_page); - con->bounce_page = NULL; - } if (ceph_msgr2(from_msgr(con->msgr))) ceph_con_v2_reset_protocol(con); @@ -1271,31 +1267,30 @@ static int ceph_parse_server_name(const char *name, size_t namelen, */ int ceph_parse_ips(const char *c, const char *end, struct ceph_entity_addr *addr, - int max_count, int *count, char delim) + int max_count, int *count) { int i, ret = -EINVAL; const char *p = c; dout("parse_ips on '%.*s'\n", (int)(end-c), c); for (i = 0; i < max_count; i++) { - char cur_delim = delim; const char *ipend; int port; + char delim = ','; if (*p == '[') { - cur_delim = ']'; + delim = ']'; p++; } - ret = ceph_parse_server_name(p, end - p, &addr[i], cur_delim, - &ipend); + ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend); if (ret) goto bad; ret = -EINVAL; p = ipend; - if (cur_delim == ']') { + if (delim == ']') { if (*p != ']') { dout("missing matching ']'\n"); goto bad; @@ -1331,11 +1326,11 @@ int ceph_parse_ips(const char *c, const char *end, addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY; addr[i].nonce = 0; - dout("%s got %s\n", __func__, ceph_pr_addr(&addr[i])); + dout("parse_ips got %s\n", ceph_pr_addr(&addr[i])); if (p == end) break; - if (*p != delim) + if (*p != ',') goto bad; p++; } @@ -1925,7 +1920,7 @@ struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, /* front */ if (front_len) { - m->front.iov_base = kvmalloc(front_len, flags); + m->front.iov_base = ceph_kvmalloc(front_len, flags); if (m->front.iov_base == NULL) { dout("ceph_msg_new can't allocate %d bytes\n", front_len); diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 6b014eca3a..2cb5ffdf07 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -992,7 +992,8 @@ static int read_partial_message_section(struct ceph_connection *con, static int read_partial_msg_data(struct ceph_connection *con) { - struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; + struct ceph_msg *msg = con->in_msg; + struct ceph_msg_data_cursor *cursor = &msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); struct page *page; size_t page_offset; @@ -1000,6 +1001,9 @@ static int read_partial_msg_data(struct ceph_connection *con) u32 crc = 0; int ret; + if (!msg->num_data_items) + return -EIO; + if (do_datacrc) crc = con->in_data_crc; while (cursor->total_resid) { @@ -1027,46 +1031,6 @@ static int read_partial_msg_data(struct ceph_connection *con) return 1; /* must return > 0 to indicate success */ } -static int read_partial_msg_data_bounce(struct ceph_connection *con) -{ - struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; - struct page *page; - size_t off, len; - u32 crc; - int ret; - - if (unlikely(!con->bounce_page)) { - con->bounce_page = alloc_page(GFP_NOIO); - if (!con->bounce_page) { - pr_err("failed to allocate bounce page\n"); - return -ENOMEM; - } - } - - crc = con->in_data_crc; - while (cursor->total_resid) { - if (!cursor->resid) { - ceph_msg_data_advance(cursor, 0); - continue; - } - - page = ceph_msg_data_next(cursor, &off, &len, NULL); - ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len); - if (ret <= 0) { - con->in_data_crc = crc; - return ret; - } - - crc = crc32c(crc, page_address(con->bounce_page), ret); - memcpy_to_page(page, off, page_address(con->bounce_page), ret); - - ceph_msg_data_advance(cursor, ret); - } - con->in_data_crc = crc; - - return 1; /* must return > 0 to indicate success */ -} - /* * read (part of) a message. */ @@ -1177,13 +1141,7 @@ static int read_partial_message(struct ceph_connection *con) /* (page) data */ if (data_len) { - if (!m->num_data_items) - return -EIO; - - if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) - ret = read_partial_msg_data_bounce(con); - else - ret = read_partial_msg_data(con); + ret = read_partial_msg_data(con); if (ret <= 0) return ret; } diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c81379f93a..cc40ce4e02 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -57,9 +57,8 @@ #define IN_S_HANDLE_CONTROL_REMAINDER 3 #define IN_S_PREPARE_READ_DATA 4 #define IN_S_PREPARE_READ_DATA_CONT 5 -#define IN_S_PREPARE_READ_ENC_PAGE 6 -#define IN_S_HANDLE_EPILOGUE 7 -#define IN_S_FINISH_SKIP 8 +#define IN_S_HANDLE_EPILOGUE 6 +#define IN_S_FINISH_SKIP 7 #define OUT_S_QUEUE_DATA 1 #define OUT_S_QUEUE_DATA_CONT 2 @@ -309,7 +308,7 @@ static void *alloc_conn_buf(struct ceph_connection *con, int len) if (WARN_ON(con->v2.conn_buf_cnt >= ARRAY_SIZE(con->v2.conn_bufs))) return NULL; - buf = kvmalloc(len, GFP_NOIO); + buf = ceph_kvmalloc(len, GFP_NOIO); if (!buf) return NULL; @@ -1033,41 +1032,22 @@ static int decrypt_control_remainder(struct ceph_connection *con) padded_len(rem_len) + CEPH_GCM_TAG_LEN); } -static int decrypt_tail(struct ceph_connection *con) +static int decrypt_message(struct ceph_connection *con) { - struct sg_table enc_sgt = {}; struct sg_table sgt = {}; - int tail_len; int ret; - tail_len = tail_onwire_len(con->in_msg, true); - ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages, - con->v2.in_enc_page_cnt, 0, tail_len, - GFP_NOIO); - if (ret) - goto out; - ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf), MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf), con->v2.in_buf, true); if (ret) goto out; - dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con, - con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents); - ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len); - if (ret) - goto out; - - WARN_ON(!con->v2.in_enc_page_cnt); - ceph_release_page_vector(con->v2.in_enc_pages, - con->v2.in_enc_page_cnt); - con->v2.in_enc_pages = NULL; - con->v2.in_enc_page_cnt = 0; + ret = gcm_crypt(con, false, sgt.sgl, sgt.sgl, + tail_onwire_len(con->in_msg, true)); out: sg_free_table(&sgt); - sg_free_table(&enc_sgt); return ret; } @@ -1753,157 +1733,54 @@ static int prepare_read_control_remainder(struct ceph_connection *con) return 0; } -static int prepare_read_data(struct ceph_connection *con) +static void prepare_read_data(struct ceph_connection *con) { struct bio_vec bv; - con->in_data_crc = -1; + if (!con_secure(con)) + con->in_data_crc = -1; ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg, data_len(con->in_msg)); get_bvec_at(&con->v2.in_cursor, &bv); - if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { - if (unlikely(!con->bounce_page)) { - con->bounce_page = alloc_page(GFP_NOIO); - if (!con->bounce_page) { - pr_err("failed to allocate bounce page\n"); - return -ENOMEM; - } - } - - bv.bv_page = con->bounce_page; - bv.bv_offset = 0; - set_in_bvec(con, &bv); - } else { - set_in_bvec(con, &bv); - } + set_in_bvec(con, &bv); con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT; - return 0; } static void prepare_read_data_cont(struct ceph_connection *con) { struct bio_vec bv; - if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { - con->in_data_crc = crc32c(con->in_data_crc, - page_address(con->bounce_page), - con->v2.in_bvec.bv_len); - - get_bvec_at(&con->v2.in_cursor, &bv); - memcpy_to_page(bv.bv_page, bv.bv_offset, - page_address(con->bounce_page), - con->v2.in_bvec.bv_len); - } else { + if (!con_secure(con)) con->in_data_crc = ceph_crc32c_page(con->in_data_crc, con->v2.in_bvec.bv_page, con->v2.in_bvec.bv_offset, con->v2.in_bvec.bv_len); - } ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len); if (con->v2.in_cursor.total_resid) { get_bvec_at(&con->v2.in_cursor, &bv); - if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { - bv.bv_page = con->bounce_page; - bv.bv_offset = 0; - set_in_bvec(con, &bv); - } else { - set_in_bvec(con, &bv); - } + set_in_bvec(con, &bv); WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT); return; } /* - * We've read all data. Prepare to read epilogue. + * We've read all data. Prepare to read data padding (if any) + * and epilogue. */ reset_in_kvecs(con); - add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); - con->v2.in_state = IN_S_HANDLE_EPILOGUE; -} - -static int prepare_read_tail_plain(struct ceph_connection *con) -{ - struct ceph_msg *msg = con->in_msg; - - if (!front_len(msg) && !middle_len(msg)) { - WARN_ON(!data_len(msg)); - return prepare_read_data(con); - } - - reset_in_kvecs(con); - if (front_len(msg)) { - add_in_kvec(con, msg->front.iov_base, front_len(msg)); - WARN_ON(msg->front.iov_len != front_len(msg)); - } - if (middle_len(msg)) { - add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg)); - WARN_ON(msg->middle->vec.iov_len != middle_len(msg)); - } - - if (data_len(msg)) { - con->v2.in_state = IN_S_PREPARE_READ_DATA; + if (con_secure(con)) { + if (need_padding(data_len(con->in_msg))) + add_in_kvec(con, DATA_PAD(con->v2.in_buf), + padding_len(data_len(con->in_msg))); + add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_SECURE_LEN); } else { add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); - con->v2.in_state = IN_S_HANDLE_EPILOGUE; } - return 0; -} - -static void prepare_read_enc_page(struct ceph_connection *con) -{ - struct bio_vec bv; - - dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i, - con->v2.in_enc_resid); - WARN_ON(!con->v2.in_enc_resid); - - bv.bv_page = con->v2.in_enc_pages[con->v2.in_enc_i]; - bv.bv_offset = 0; - bv.bv_len = min(con->v2.in_enc_resid, (int)PAGE_SIZE); - - set_in_bvec(con, &bv); - con->v2.in_enc_i++; - con->v2.in_enc_resid -= bv.bv_len; - - if (con->v2.in_enc_resid) { - con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE; - return; - } - - /* - * We are set to read the last piece of ciphertext (ending - * with epilogue) + auth tag. - */ - WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt); con->v2.in_state = IN_S_HANDLE_EPILOGUE; } -static int prepare_read_tail_secure(struct ceph_connection *con) -{ - struct page **enc_pages; - int enc_page_cnt; - int tail_len; - - tail_len = tail_onwire_len(con->in_msg, true); - WARN_ON(!tail_len); - - enc_page_cnt = calc_pages_for(0, tail_len); - enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO); - if (IS_ERR(enc_pages)) - return PTR_ERR(enc_pages); - - WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt); - con->v2.in_enc_pages = enc_pages; - con->v2.in_enc_page_cnt = enc_page_cnt; - con->v2.in_enc_resid = tail_len; - con->v2.in_enc_i = 0; - - prepare_read_enc_page(con); - return 0; -} - static void __finish_skip(struct ceph_connection *con) { con->in_seq++; @@ -2712,26 +2589,47 @@ static int __handle_control(struct ceph_connection *con, void *p) } msg = con->in_msg; /* set in process_message_header() */ + if (!front_len(msg) && !middle_len(msg)) { + if (!data_len(msg)) + return process_message(con); + + prepare_read_data(con); + return 0; + } + + reset_in_kvecs(con); if (front_len(msg)) { WARN_ON(front_len(msg) > msg->front_alloc_len); + add_in_kvec(con, msg->front.iov_base, front_len(msg)); msg->front.iov_len = front_len(msg); + + if (con_secure(con) && need_padding(front_len(msg))) + add_in_kvec(con, FRONT_PAD(con->v2.in_buf), + padding_len(front_len(msg))); } else { msg->front.iov_len = 0; } if (middle_len(msg)) { WARN_ON(middle_len(msg) > msg->middle->alloc_len); + add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg)); msg->middle->vec.iov_len = middle_len(msg); + + if (con_secure(con) && need_padding(middle_len(msg))) + add_in_kvec(con, MIDDLE_PAD(con->v2.in_buf), + padding_len(middle_len(msg))); } else if (msg->middle) { msg->middle->vec.iov_len = 0; } - if (!front_len(msg) && !middle_len(msg) && !data_len(msg)) - return process_message(con); - - if (con_secure(con)) - return prepare_read_tail_secure(con); - - return prepare_read_tail_plain(con); + if (data_len(msg)) { + con->v2.in_state = IN_S_PREPARE_READ_DATA; + } else { + add_in_kvec(con, con->v2.in_buf, + con_secure(con) ? CEPH_EPILOGUE_SECURE_LEN : + CEPH_EPILOGUE_PLAIN_LEN); + con->v2.in_state = IN_S_HANDLE_EPILOGUE; + } + return 0; } static int handle_preamble(struct ceph_connection *con) @@ -2819,7 +2717,7 @@ static int handle_epilogue(struct ceph_connection *con) int ret; if (con_secure(con)) { - ret = decrypt_tail(con); + ret = decrypt_message(con); if (ret) { if (ret == -EBADMSG) con->error_msg = "integrity error, bad epilogue auth tag"; @@ -2887,16 +2785,13 @@ static int populate_in_iter(struct ceph_connection *con) ret = handle_control_remainder(con); break; case IN_S_PREPARE_READ_DATA: - ret = prepare_read_data(con); + prepare_read_data(con); + ret = 0; break; case IN_S_PREPARE_READ_DATA_CONT: prepare_read_data_cont(con); ret = 0; break; - case IN_S_PREPARE_READ_ENC_PAGE: - prepare_read_enc_page(con); - ret = 0; - break; case IN_S_HANDLE_EPILOGUE: ret = handle_epilogue(con); break; @@ -3431,16 +3326,20 @@ void ceph_con_v2_revoke(struct ceph_connection *con) static void revoke_at_prepare_read_data(struct ceph_connection *con) { - int remaining; + int remaining; /* data + [data padding] + epilogue */ int resid; - WARN_ON(con_secure(con)); WARN_ON(!data_len(con->in_msg)); WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter)); resid = iov_iter_count(&con->v2.in_iter); WARN_ON(!resid); - remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN; + if (con_secure(con)) + remaining = padded_len(data_len(con->in_msg)) + + CEPH_EPILOGUE_SECURE_LEN; + else + remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN; + dout("%s con %p resid %d remaining %d\n", __func__, con, resid, remaining); con->v2.in_iter.count -= resid; @@ -3451,9 +3350,8 @@ static void revoke_at_prepare_read_data(struct ceph_connection *con) static void revoke_at_prepare_read_data_cont(struct ceph_connection *con) { int recved, resid; /* current piece of data */ - int remaining; + int remaining; /* [data padding] + epilogue */ - WARN_ON(con_secure(con)); WARN_ON(!data_len(con->in_msg)); WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter)); resid = iov_iter_count(&con->v2.in_iter); @@ -3465,7 +3363,12 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con) ceph_msg_data_advance(&con->v2.in_cursor, recved); WARN_ON(resid > con->v2.in_cursor.total_resid); - remaining = CEPH_EPILOGUE_PLAIN_LEN; + if (con_secure(con)) + remaining = padding_len(data_len(con->in_msg)) + + CEPH_EPILOGUE_SECURE_LEN; + else + remaining = CEPH_EPILOGUE_PLAIN_LEN; + dout("%s con %p total_resid %zu remaining %d\n", __func__, con, con->v2.in_cursor.total_resid, remaining); con->v2.in_iter.count -= resid; @@ -3473,26 +3376,11 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con) con->v2.in_state = IN_S_FINISH_SKIP; } -static void revoke_at_prepare_read_enc_page(struct ceph_connection *con) -{ - int resid; /* current enc page (not necessarily data) */ - - WARN_ON(!con_secure(con)); - WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter)); - resid = iov_iter_count(&con->v2.in_iter); - WARN_ON(!resid || resid > con->v2.in_bvec.bv_len); - - dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid, - con->v2.in_enc_resid); - con->v2.in_iter.count -= resid; - set_in_skip(con, resid + con->v2.in_enc_resid); - con->v2.in_state = IN_S_FINISH_SKIP; -} - static void revoke_at_handle_epilogue(struct ceph_connection *con) { int resid; + WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter)); resid = iov_iter_count(&con->v2.in_iter); WARN_ON(!resid); @@ -3511,9 +3399,6 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con) case IN_S_PREPARE_READ_DATA_CONT: revoke_at_prepare_read_data_cont(con); break; - case IN_S_PREPARE_READ_ENC_PAGE: - revoke_at_prepare_read_enc_page(con); - break; case IN_S_HANDLE_EPILOGUE: revoke_at_handle_epilogue(con); break; @@ -3547,13 +3432,6 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con) clear_out_sign_kvecs(con); free_conn_bufs(con); - if (con->v2.in_enc_pages) { - WARN_ON(!con->v2.in_enc_page_cnt); - ceph_release_page_vector(con->v2.in_enc_pages, - con->v2.in_enc_page_cnt); - con->v2.in_enc_pages = NULL; - con->v2.in_enc_page_cnt = 0; - } if (con->v2.out_enc_pages) { WARN_ON(!con->v2.out_enc_page_cnt); ceph_release_page_vector(con->v2.out_enc_pages, diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 6a6898ee40..013cbdb6cf 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1153,11 +1153,12 @@ static int build_initial_monmap(struct ceph_mon_client *monc) int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) { - int err; + int err = 0; dout("init\n"); memset(monc, 0, sizeof(*monc)); monc->client = cl; + monc->monmap = NULL; mutex_init(&monc->mutex); err = build_initial_monmap(monc); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 1c5815530e..ff8624a7c9 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5310,14 +5310,14 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) ceph_msgpool_destroy(&osdc->msgpool_op_reply); } -int osd_req_op_copy_from_init(struct ceph_osd_request *req, - u64 src_snapid, u64 src_version, - struct ceph_object_id *src_oid, - struct ceph_object_locator *src_oloc, - u32 src_fadvise_flags, - u32 dst_fadvise_flags, - u32 truncate_seq, u64 truncate_size, - u8 copy_from_flags) +static int osd_req_op_copy_from_init(struct ceph_osd_request *req, + u64 src_snapid, u64 src_version, + struct ceph_object_id *src_oid, + struct ceph_object_locator *src_oloc, + u32 src_fadvise_flags, + u32 dst_fadvise_flags, + u32 truncate_seq, u64 truncate_size, + u8 copy_from_flags) { struct ceph_osd_req_op *op; struct page **pages; @@ -5346,7 +5346,49 @@ int osd_req_op_copy_from_init(struct ceph_osd_request *req, op->indata_len, 0, false, true); return 0; } -EXPORT_SYMBOL(osd_req_op_copy_from_init); + +int ceph_osdc_copy_from(struct ceph_osd_client *osdc, + u64 src_snapid, u64 src_version, + struct ceph_object_id *src_oid, + struct ceph_object_locator *src_oloc, + u32 src_fadvise_flags, + struct ceph_object_id *dst_oid, + struct ceph_object_locator *dst_oloc, + u32 dst_fadvise_flags, + u32 truncate_seq, u64 truncate_size, + u8 copy_from_flags) +{ + struct ceph_osd_request *req; + int ret; + + req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->r_flags = CEPH_OSD_FLAG_WRITE; + + ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc); + ceph_oid_copy(&req->r_t.base_oid, dst_oid); + + ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid, + src_oloc, src_fadvise_flags, + dst_fadvise_flags, truncate_seq, + truncate_size, copy_from_flags); + if (ret) + goto out; + + ret = ceph_osdc_alloc_messages(req, GFP_KERNEL); + if (ret) + goto out; + + ceph_osdc_start_request(osdc, req, false); + ret = ceph_osdc_wait_request(osdc, req); + +out: + ceph_osdc_put_request(req); + return ret; +} +EXPORT_SYMBOL(ceph_osdc_copy_from); int __init ceph_osdc_setup(void) { diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 2823bb3cff..75b7380835 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -980,7 +980,7 @@ static struct crush_work *alloc_workspace(const struct crush_map *c) work_size = crush_work_size(c, CEPH_PG_MAX_SIZE); dout("%s work_size %zu bytes\n", __func__, work_size); - work = kvmalloc(work_size, GFP_NOIO); + work = ceph_kvmalloc(work_size, GFP_NOIO); if (!work) return NULL; @@ -1190,9 +1190,9 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max) if (max == map->max_osd) return 0; - state = kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS); - weight = kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS); - addr = kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS); + state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS); + weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS); + addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS); if (!state || !weight || !addr) { kvfree(state); kvfree(weight); @@ -1222,7 +1222,7 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max) if (map->osd_primary_affinity) { u32 *affinity; - affinity = kvmalloc(array_size(max, sizeof(*affinity)), + affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)), GFP_NOFS); if (!affinity) return -ENOMEM; @@ -1503,7 +1503,7 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff) if (!map->osd_primary_affinity) { int i; - map->osd_primary_affinity = kvmalloc( + map->osd_primary_affinity = ceph_kvmalloc( array_size(map->max_osd, sizeof(*map->osd_primary_affinity)), GFP_NOFS); if (!map->osd_primary_affinity) diff --git a/net/core/Makefile b/net/core/Makefile index a8e4f73769..4268846f2f 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -11,9 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ - fib_notifier.o xdp.o flow_offload.o gro.o - -obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o + fib_notifier.o xdp.o flow_offload.o obj-y += net-sysfs.o obj-$(CONFIG_PAGE_POOL) += page_pool.o diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index d9c37fd108..68d2cbf833 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -13,7 +13,6 @@ #include #include #include -#include DEFINE_BPF_STORAGE_CACHE(sk_cache); @@ -23,8 +22,7 @@ bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) struct bpf_local_storage *sk_storage; struct bpf_local_storage_map *smap; - sk_storage = - rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); + sk_storage = rcu_dereference(sk->sk_bpf_storage); if (!sk_storage) return NULL; @@ -260,7 +258,6 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, { struct bpf_local_storage_data *sdata; - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) return (unsigned long)NULL; @@ -291,7 +288,6 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) { - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!sk || !sk_fullsock(sk)) return -EINVAL; @@ -420,7 +416,6 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, void *, value, u64, flags) { - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (in_hardirq() || in_nmi()) return (unsigned long)NULL; @@ -430,7 +425,6 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, struct sock *, sk) { - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (in_hardirq() || in_nmi()) return -EPERM; @@ -935,7 +929,7 @@ static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), PTR_TO_BTF_ID_OR_NULL }, { offsetof(struct bpf_iter__bpf_sk_storage_map, value), - PTR_TO_BUF | PTR_MAYBE_NULL }, + PTR_TO_RDWR_BUF_OR_NULL }, }, .seq_info = &iter_seq_info, }; diff --git a/net/core/datagram.c b/net/core/datagram.c index ee290776c6..15ab9ffb27 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -646,8 +646,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, skb->truesize += truesize; if (sk && sk->sk_type == SOCK_STREAM) { sk_wmem_queued_add(sk, truesize); - if (!skb_zcopy_pure(skb)) - sk_mem_charge(sk, truesize); + sk_mem_charge(sk, truesize); } else { refcount_add(truesize, &skb->sk->sk_wmem_alloc); } diff --git a/net/core/dev.c b/net/core/dev.c index 1baab07820..33dc2a3ff7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -140,7 +140,7 @@ #include #include #include -#include +#include #include #include #include @@ -153,10 +153,16 @@ #include "net-sysfs.h" +#define MAX_GRO_SKBS 8 + +/* This should be increased if a protocol with a bigger head is added. */ +#define GRO_MAX_HEAD (MAX_HEADER + 128) static DEFINE_SPINLOCK(ptype_lock); +static DEFINE_SPINLOCK(offload_lock); struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; struct list_head ptype_all __read_mostly; /* Taps */ +static struct list_head offload_base __read_mostly; static int netif_rx_internal(struct sk_buff *skb); static int call_netdevice_notifiers_info(unsigned long val, @@ -297,12 +303,6 @@ static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net, return NULL; } -bool netdev_name_in_use(struct net *net, const char *name) -{ - return netdev_name_node_lookup(net, name); -} -EXPORT_SYMBOL(netdev_name_in_use); - int netdev_name_node_alt_create(struct net_device *dev, const char *name) { struct netdev_name_node *name_node; @@ -365,12 +365,12 @@ static void list_netdevice(struct net_device *dev) ASSERT_RTNL(); - write_lock(&dev_base_lock); + write_lock_bh(&dev_base_lock); list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); netdev_name_node_add(net, dev->name_node); hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); - write_unlock(&dev_base_lock); + write_unlock_bh(&dev_base_lock); dev_base_seq_inc(net); } @@ -383,11 +383,11 @@ static void unlist_netdevice(struct net_device *dev) ASSERT_RTNL(); /* Unlink dev from the device chain */ - write_lock(&dev_base_lock); + write_lock_bh(&dev_base_lock); list_del_rcu(&dev->dev_list); netdev_name_node_del(dev->name_node); hlist_del_rcu(&dev->index_hlist); - write_unlock(&dev_base_lock); + write_unlock_bh(&dev_base_lock); dev_base_seq_inc(dev_net(dev)); } @@ -598,6 +598,84 @@ void dev_remove_pack(struct packet_type *pt) EXPORT_SYMBOL(dev_remove_pack); +/** + * dev_add_offload - register offload handlers + * @po: protocol offload declaration + * + * Add protocol offload handlers to the networking stack. The passed + * &proto_offload is linked into kernel lists and may not be freed until + * it has been removed from the kernel lists. + * + * This call does not sleep therefore it can not + * guarantee all CPU's that are in middle of receiving packets + * will see the new offload handlers (until the next received packet). + */ +void dev_add_offload(struct packet_offload *po) +{ + struct packet_offload *elem; + + spin_lock(&offload_lock); + list_for_each_entry(elem, &offload_base, list) { + if (po->priority < elem->priority) + break; + } + list_add_rcu(&po->list, elem->list.prev); + spin_unlock(&offload_lock); +} +EXPORT_SYMBOL(dev_add_offload); + +/** + * __dev_remove_offload - remove offload handler + * @po: packet offload declaration + * + * Remove a protocol offload handler that was previously added to the + * kernel offload handlers by dev_add_offload(). The passed &offload_type + * is removed from the kernel lists and can be freed or reused once this + * function returns. + * + * The packet type might still be in use by receivers + * and must not be freed until after all the CPU's have gone + * through a quiescent state. + */ +static void __dev_remove_offload(struct packet_offload *po) +{ + struct list_head *head = &offload_base; + struct packet_offload *po1; + + spin_lock(&offload_lock); + + list_for_each_entry(po1, head, list) { + if (po == po1) { + list_del_rcu(&po->list); + goto out; + } + } + + pr_warn("dev_remove_offload: %p not found\n", po); +out: + spin_unlock(&offload_lock); +} + +/** + * dev_remove_offload - remove packet offload handler + * @po: packet offload declaration + * + * Remove a packet offload handler that was previously added to the kernel + * offload handlers by dev_add_offload(). The passed &offload_type is + * removed from the kernel lists and can be freed or reused once this + * function returns. + * + * This call sleeps to guarantee that no CPU is looking at the packet + * type after return. + */ +void dev_remove_offload(struct packet_offload *po) +{ + __dev_remove_offload(po); + + synchronize_net(); +} +EXPORT_SYMBOL(dev_remove_offload); + /******************************************************************************* * * Device Interface Subroutines @@ -1055,7 +1133,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) } snprintf(buf, IFNAMSIZ, name, i); - if (!netdev_name_in_use(net, buf)) + if (!__dev_get_by_name(net, buf)) return i; /* It is possible to run out of possible slots @@ -1109,7 +1187,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev, if (strchr(name, '%')) return dev_alloc_name_ns(net, dev, name); - else if (netdev_name_in_use(net, name)) + else if (__dev_get_by_name(net, name)) return -EEXIST; else if (dev->name != name) strlcpy(dev->name, name, IFNAMSIZ); @@ -1188,15 +1266,15 @@ int dev_change_name(struct net_device *dev, const char *newname) netdev_adjacent_rename_links(dev, oldname); - write_lock(&dev_base_lock); + write_lock_bh(&dev_base_lock); netdev_name_node_del(dev->name_node); - write_unlock(&dev_base_lock); + write_unlock_bh(&dev_base_lock); synchronize_rcu(); - write_lock(&dev_base_lock); + write_lock_bh(&dev_base_lock); netdev_name_node_add(net, dev->name_node); - write_unlock(&dev_base_lock); + write_unlock_bh(&dev_base_lock); ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); ret = notifier_to_errno(ret); @@ -1212,8 +1290,8 @@ int dev_change_name(struct net_device *dev, const char *newname) old_assign_type = NET_NAME_RENAMED; goto rollback; } else { - netdev_err(dev, "name change rollback failed: %d\n", - ret); + pr_err("%s: name change rollback failed: %d\n", + dev->name, ret); } } @@ -1377,7 +1455,6 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) int ret; ASSERT_RTNL(); - dev_addr_check(dev); if (!netif_device_present(dev)) { /* may be detached because parent is runtime-suspended */ @@ -2268,7 +2345,7 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) /* If TC0 is invalidated disable TC mapping */ if (tc->offset + tc->count > txq) { - netdev_warn(dev, "Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n"); + pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n"); dev->num_tc = 0; return; } @@ -2279,8 +2356,8 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) tc = &dev->tc_to_txq[q]; if (tc->offset + tc->count > txq) { - netdev_warn(dev, "Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n", - i, q); + pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n", + i, q); netdev_set_prio_tc_map(dev, i, 0); } } @@ -3232,6 +3309,40 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) return __vlan_get_protocol(skb, type, depth); } +/** + * skb_mac_gso_segment - mac layer segmentation handler. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + */ +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_offload *ptype; + int vlan_depth = skb->mac_len; + __be16 type = skb_network_protocol(skb, &vlan_depth); + + if (unlikely(!type)) + return ERR_PTR(-EINVAL); + + __skb_pull(skb, vlan_depth); + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &offload_base, list) { + if (ptype->type == type && ptype->callbacks.gso_segment) { + segs = ptype->callbacks.gso_segment(skb, features); + break; + } + } + rcu_read_unlock(); + + __skb_push(skb, skb->data - skb_mac_header(skb)); + + return segs; +} +EXPORT_SYMBOL(skb_mac_gso_segment); + + /* openvswitch calls this on rx path, so we need a different check. */ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) @@ -3305,7 +3416,7 @@ EXPORT_SYMBOL(__skb_gso_segment); #ifdef CONFIG_BUG static void do_netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb) { - netdev_err(dev, "hw csum failure\n"); + pr_err("%s: hw csum failure\n", dev ? dev->name : ""); skb_dump(KERN_ERR, skb, true); dump_stack(); } @@ -3396,7 +3507,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, { u16 gso_segs = skb_shinfo(skb)->gso_segs; - if (gso_segs > READ_ONCE(dev->gso_max_segs)) + if (gso_segs > dev->gso_max_segs) return features & ~NETIF_F_GSO_MASK; if (!skb_shinfo(skb)->gso_type) { @@ -3719,12 +3830,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, * separate lock before trying to get qdisc main lock. * This permits qdisc->running owner to get the lock more * often and dequeue packets faster. - * On PREEMPT_RT it is possible to preempt the qdisc owner during xmit - * and then other tasks will only enqueue packets. The packets will be - * sent after the qdisc owner is scheduled again. To prevent this - * scenario the task always serialize on the lock. */ - contended = qdisc_is_running(q) || IS_ENABLED(CONFIG_PREEMPT_RT); + contended = qdisc_is_running(q); if (unlikely(contended)) spin_lock(&q->busylock); @@ -3820,7 +3927,6 @@ EXPORT_SYMBOL(dev_loopback_xmit); static struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { -#ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress); struct tcf_result cl_res; @@ -3856,7 +3962,6 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) default: break; } -#endif /* CONFIG_NET_CLS_ACT */ return skb; } @@ -4050,20 +4155,13 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) qdisc_pkt_len_init(skb); #ifdef CONFIG_NET_CLS_ACT skb->tc_at_ingress = 0; -#endif -#ifdef CONFIG_NET_EGRESS +# ifdef CONFIG_NET_EGRESS if (static_branch_unlikely(&egress_needed_key)) { - if (nf_hook_egress_active()) { - skb = nf_hook_egress(skb, &rc, dev); - if (!skb) - goto out; - } - nf_skip_egress(skb, true); skb = sch_handle_egress(skb, &rc, dev); if (!skb) goto out; - nf_skip_egress(skb, false); } +# endif #endif /* If device/qdisc don't need skb->dst, release it right now while * its hot in this cpu cache. @@ -4210,6 +4308,8 @@ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ int dev_rx_weight __read_mostly = 64; int dev_tx_weight __read_mostly = 64; +/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */ +int gro_normal_batch __read_mostly = 8; /* Called with irq disabled */ static inline void ____napi_schedule(struct softnet_data *sd, @@ -4712,7 +4812,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, case XDP_PASS: break; default: - bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act); + bpf_warn_invalid_xdp_action(act); fallthrough; case XDP_ABORTED: trace_xdp_exception(skb->dev, xdp_prog, act); @@ -4899,8 +4999,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) trace_consume_skb(skb); else - trace_kfree_skb(skb, net_tx_action, - SKB_DROP_REASON_NOT_SPECIFIED); + trace_kfree_skb(skb, net_tx_action); if (skb->fclone != SKB_FCLONE_UNAVAILABLE) __kfree_skb(skb); @@ -5207,7 +5306,6 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, if (static_branch_unlikely(&ingress_needed_key)) { bool another = false; - nf_skip_egress(skb, true); skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev, &another); if (another) @@ -5215,7 +5313,6 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, if (!skb) goto out; - nf_skip_egress(skb, false); if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) goto out; } @@ -5553,7 +5650,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb) return ret; } -void netif_receive_skb_list_internal(struct list_head *head) +static void netif_receive_skb_list_internal(struct list_head *head) { struct sk_buff *skb, *next; struct list_head sublist; @@ -5731,6 +5828,551 @@ static void flush_all_backlogs(void) cpus_read_unlock(); } +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) +{ + list_add_tail(&skb->list, &napi->rx_list); + napi->rx_count += segs; + if (napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); +} + +static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) +{ + struct packet_offload *ptype; + __be16 type = skb->protocol; + struct list_head *head = &offload_base; + int err = -ENOENT; + + BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); + + if (NAPI_GRO_CB(skb)->count == 1) { + skb_shinfo(skb)->gso_size = 0; + goto out; + } + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, head, list) { + if (ptype->type != type || !ptype->callbacks.gro_complete) + continue; + + err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, + ipv6_gro_complete, inet_gro_complete, + skb, 0); + break; + } + rcu_read_unlock(); + + if (err) { + WARN_ON(&ptype->list == head); + kfree_skb(skb); + return NET_RX_SUCCESS; + } + +out: + gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count); + return NET_RX_SUCCESS; +} + +static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, + bool flush_old) +{ + struct list_head *head = &napi->gro_hash[index].list; + struct sk_buff *skb, *p; + + list_for_each_entry_safe_reverse(skb, p, head, list) { + if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) + return; + skb_list_del_init(skb); + napi_gro_complete(napi, skb); + napi->gro_hash[index].count--; + } + + if (!napi->gro_hash[index].count) + __clear_bit(index, &napi->gro_bitmask); +} + +/* napi->gro_hash[].list contains packets ordered by age. + * youngest packets at the head of it. + * Complete skbs in reverse order to reduce latencies. + */ +void napi_gro_flush(struct napi_struct *napi, bool flush_old) +{ + unsigned long bitmask = napi->gro_bitmask; + unsigned int i, base = ~0U; + + while ((i = ffs(bitmask)) != 0) { + bitmask >>= i; + base += i; + __napi_gro_flush_chain(napi, base, flush_old); + } +} +EXPORT_SYMBOL(napi_gro_flush); + +static void gro_list_prepare(const struct list_head *head, + const struct sk_buff *skb) +{ + unsigned int maclen = skb->dev->hard_header_len; + u32 hash = skb_get_hash_raw(skb); + struct sk_buff *p; + + list_for_each_entry(p, head, list) { + unsigned long diffs; + + NAPI_GRO_CB(p)->flush = 0; + + if (hash != skb_get_hash_raw(p)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; + diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb); + if (skb_vlan_tag_present(p)) + diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb); + diffs |= skb_metadata_differs(p, skb); + if (maclen == ETH_HLEN) + diffs |= compare_ether_header(skb_mac_header(p), + skb_mac_header(skb)); + else if (!diffs) + diffs = memcmp(skb_mac_header(p), + skb_mac_header(skb), + maclen); + + /* in most common scenarions 'slow_gro' is 0 + * otherwise we are already on some slower paths + * either skip all the infrequent tests altogether or + * avoid trying too hard to skip each of them individually + */ + if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) { +#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + struct tc_skb_ext *skb_ext; + struct tc_skb_ext *p_ext; +#endif + + diffs |= p->sk != skb->sk; + diffs |= skb_metadata_dst_cmp(p, skb); + diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb); + +#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + skb_ext = skb_ext_find(skb, TC_SKB_EXT); + p_ext = skb_ext_find(p, TC_SKB_EXT); + + diffs |= (!!p_ext) ^ (!!skb_ext); + if (!diffs && unlikely(skb_ext)) + diffs |= p_ext->chain ^ skb_ext->chain; +#endif + } + + NAPI_GRO_CB(p)->same_flow = !diffs; + } +} + +static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) +{ + const struct skb_shared_info *pinfo = skb_shinfo(skb); + const skb_frag_t *frag0 = &pinfo->frags[0]; + + NAPI_GRO_CB(skb)->data_offset = 0; + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; + + if (!skb_headlen(skb) && pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0)) && + (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); + NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, + skb_frag_size(frag0), + skb->end - skb->tail); + } +} + +static void gro_pull_from_frag0(struct sk_buff *skb, int grow) +{ + struct skb_shared_info *pinfo = skb_shinfo(skb); + + BUG_ON(skb->end - skb->tail < grow); + + memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); + + skb->data_len -= grow; + skb->tail += grow; + + skb_frag_off_add(&pinfo->frags[0], grow); + skb_frag_size_sub(&pinfo->frags[0], grow); + + if (unlikely(!skb_frag_size(&pinfo->frags[0]))) { + skb_frag_unref(skb, 0); + memmove(pinfo->frags, pinfo->frags + 1, + --pinfo->nr_frags * sizeof(pinfo->frags[0])); + } +} + +static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) +{ + struct sk_buff *oldest; + + oldest = list_last_entry(head, struct sk_buff, list); + + /* We are called with head length >= MAX_GRO_SKBS, so this is + * impossible. + */ + if (WARN_ON_ONCE(!oldest)) + return; + + /* Do not adjust napi->gro_hash[].count, caller is adding a new + * SKB to the chain. + */ + skb_list_del_init(oldest); + napi_gro_complete(napi, oldest); +} + +static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); + struct gro_list *gro_list = &napi->gro_hash[bucket]; + struct list_head *head = &offload_base; + struct packet_offload *ptype; + __be16 type = skb->protocol; + struct sk_buff *pp = NULL; + enum gro_result ret; + int same_flow; + int grow; + + if (netif_elide_gro(skb->dev)) + goto normal; + + gro_list_prepare(&gro_list->list, skb); + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, head, list) { + if (ptype->type != type || !ptype->callbacks.gro_receive) + continue; + + skb_set_network_header(skb, skb_gro_offset(skb)); + skb_reset_mac_len(skb); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb); + NAPI_GRO_CB(skb)->free = 0; + NAPI_GRO_CB(skb)->encap_mark = 0; + NAPI_GRO_CB(skb)->recursion_counter = 0; + NAPI_GRO_CB(skb)->is_fou = 0; + NAPI_GRO_CB(skb)->is_atomic = 1; + NAPI_GRO_CB(skb)->gro_remcsum_start = 0; + + /* Setup for GRO checksum validation */ + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + NAPI_GRO_CB(skb)->csum = skb->csum; + NAPI_GRO_CB(skb)->csum_valid = 1; + NAPI_GRO_CB(skb)->csum_cnt = 0; + break; + case CHECKSUM_UNNECESSARY: + NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; + NAPI_GRO_CB(skb)->csum_valid = 0; + break; + default: + NAPI_GRO_CB(skb)->csum_cnt = 0; + NAPI_GRO_CB(skb)->csum_valid = 0; + } + + pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, + ipv6_gro_receive, inet_gro_receive, + &gro_list->list, skb); + break; + } + rcu_read_unlock(); + + if (&ptype->list == head) + goto normal; + + if (PTR_ERR(pp) == -EINPROGRESS) { + ret = GRO_CONSUMED; + goto ok; + } + + same_flow = NAPI_GRO_CB(skb)->same_flow; + ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; + + if (pp) { + skb_list_del_init(pp); + napi_gro_complete(napi, pp); + gro_list->count--; + } + + if (same_flow) + goto ok; + + if (NAPI_GRO_CB(skb)->flush) + goto normal; + + if (unlikely(gro_list->count >= MAX_GRO_SKBS)) + gro_flush_oldest(napi, &gro_list->list); + else + gro_list->count++; + + NAPI_GRO_CB(skb)->count = 1; + NAPI_GRO_CB(skb)->age = jiffies; + NAPI_GRO_CB(skb)->last = skb; + skb_shinfo(skb)->gso_size = skb_gro_len(skb); + list_add(&skb->list, &gro_list->list); + ret = GRO_HELD; + +pull: + grow = skb_gro_offset(skb) - skb_headlen(skb); + if (grow > 0) + gro_pull_from_frag0(skb, grow); +ok: + if (gro_list->count) { + if (!test_bit(bucket, &napi->gro_bitmask)) + __set_bit(bucket, &napi->gro_bitmask); + } else if (test_bit(bucket, &napi->gro_bitmask)) { + __clear_bit(bucket, &napi->gro_bitmask); + } + + return ret; + +normal: + ret = GRO_NORMAL; + goto pull; +} + +struct packet_offload *gro_find_receive_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_receive) + continue; + return ptype; + } + return NULL; +} +EXPORT_SYMBOL(gro_find_receive_by_type); + +struct packet_offload *gro_find_complete_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_complete) + continue; + return ptype; + } + return NULL; +} +EXPORT_SYMBOL(gro_find_complete_by_type); + +static gro_result_t napi_skb_finish(struct napi_struct *napi, + struct sk_buff *skb, + gro_result_t ret) +{ + switch (ret) { + case GRO_NORMAL: + gro_normal_one(napi, skb, 1); + break; + + case GRO_MERGED_FREE: + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + napi_skb_free_stolen_head(skb); + else if (skb->fclone != SKB_FCLONE_UNAVAILABLE) + __kfree_skb(skb); + else + __kfree_skb_defer(skb); + break; + + case GRO_HELD: + case GRO_MERGED: + case GRO_CONSUMED: + break; + } + + return ret; +} + +gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + gro_result_t ret; + + skb_mark_napi_id(skb, napi); + trace_napi_gro_receive_entry(skb); + + skb_gro_reset_offset(skb, 0); + + ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); + trace_napi_gro_receive_exit(ret); + + return ret; +} +EXPORT_SYMBOL(napi_gro_receive); + +static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) +{ + if (unlikely(skb->pfmemalloc)) { + consume_skb(skb); + return; + } + __skb_pull(skb, skb_headlen(skb)); + /* restore the reserve we had after netdev_alloc_skb_ip_align() */ + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); + __vlan_hwaccel_clear_tag(skb); + skb->dev = napi->dev; + skb->skb_iif = 0; + + /* eth_type_trans() assumes pkt_type is PACKET_HOST */ + skb->pkt_type = PACKET_HOST; + + skb->encapsulation = 0; + skb_shinfo(skb)->gso_type = 0; + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); + if (unlikely(skb->slow_gro)) { + skb_orphan(skb); + skb_ext_reset(skb); + nf_reset_ct(skb); + skb->slow_gro = 0; + } + + napi->skb = skb; +} + +struct sk_buff *napi_get_frags(struct napi_struct *napi) +{ + struct sk_buff *skb = napi->skb; + + if (!skb) { + skb = napi_alloc_skb(napi, GRO_MAX_HEAD); + if (skb) { + napi->skb = skb; + skb_mark_napi_id(skb, napi); + } + } + return skb; +} +EXPORT_SYMBOL(napi_get_frags); + +static gro_result_t napi_frags_finish(struct napi_struct *napi, + struct sk_buff *skb, + gro_result_t ret) +{ + switch (ret) { + case GRO_NORMAL: + case GRO_HELD: + __skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + if (ret == GRO_NORMAL) + gro_normal_one(napi, skb, 1); + break; + + case GRO_MERGED_FREE: + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + napi_skb_free_stolen_head(skb); + else + napi_reuse_skb(napi, skb); + break; + + case GRO_MERGED: + case GRO_CONSUMED: + break; + } + + return ret; +} + +/* Upper GRO stack assumes network header starts at gro_offset=0 + * Drivers could call both napi_gro_frags() and napi_gro_receive() + * We copy ethernet header into skb->data to have a common layout. + */ +static struct sk_buff *napi_frags_skb(struct napi_struct *napi) +{ + struct sk_buff *skb = napi->skb; + const struct ethhdr *eth; + unsigned int hlen = sizeof(*eth); + + napi->skb = NULL; + + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb, hlen); + + if (unlikely(skb_gro_header_hard(skb, hlen))) { + eth = skb_gro_header_slow(skb, hlen, 0); + if (unlikely(!eth)) { + net_warn_ratelimited("%s: dropping impossible skb from %s\n", + __func__, napi->dev->name); + napi_reuse_skb(napi, skb); + return NULL; + } + } else { + eth = (const struct ethhdr *)skb->data; + gro_pull_from_frag0(skb, hlen); + NAPI_GRO_CB(skb)->frag0 += hlen; + NAPI_GRO_CB(skb)->frag0_len -= hlen; + } + __skb_pull(skb, hlen); + + /* + * This works because the only protocols we care about don't require + * special handling. + * We'll fix it up properly in napi_frags_finish() + */ + skb->protocol = eth->h_proto; + + return skb; +} + +gro_result_t napi_gro_frags(struct napi_struct *napi) +{ + gro_result_t ret; + struct sk_buff *skb = napi_frags_skb(napi); + + trace_napi_gro_frags_entry(skb); + + ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); + trace_napi_gro_frags_exit(ret); + + return ret; +} +EXPORT_SYMBOL(napi_gro_frags); + +/* Compute the checksum from gro_offset and return the folded value + * after adding in any pseudo checksum. + */ +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb) +{ + __wsum wsum; + __sum16 sum; + + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0); + + /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */ + sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum)); + /* See comments in __skb_checksum_complete(). */ + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev, skb); + } + + NAPI_GRO_CB(skb)->csum = wsum; + NAPI_GRO_CB(skb)->csum_valid = 1; + + return sum; +} +EXPORT_SYMBOL(__skb_gro_checksum_complete); + static void net_rps_send_ipi(struct softnet_data *remsd) { #ifdef CONFIG_RPS @@ -6268,28 +6910,19 @@ EXPORT_SYMBOL(netif_napi_add); void napi_disable(struct napi_struct *n) { - unsigned long val, new; - might_sleep(); set_bit(NAPI_STATE_DISABLE, &n->state); - for ( ; ; ) { - val = READ_ONCE(n->state); - if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) { - usleep_range(20, 200); - continue; - } - - new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC; - new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL); - - if (cmpxchg(&n->state, val, new) == val) - break; - } + while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) + msleep(1); + while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state)) + msleep(1); hrtimer_cancel(&n->timer); + clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); clear_bit(NAPI_STATE_DISABLE, &n->state); + clear_bit(NAPI_STATE_THREADED, &n->state); } EXPORT_SYMBOL(napi_disable); @@ -6367,8 +7000,8 @@ static int __napi_poll(struct napi_struct *n, bool *repoll) } if (unlikely(work > weight)) - netdev_err_once(n->dev, "NAPI poll function %pS returned %d, exceeding its budget of %d.\n", - n->poll, work, weight); + pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n", + n->poll, work, weight); if (likely(work < weight)) return work; @@ -6542,7 +7175,6 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) struct netdev_adjacent { struct net_device *dev; - netdevice_tracker dev_tracker; /* upper master flag, there can only be one master device per list */ bool master; @@ -7307,7 +7939,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->ref_nr = 1; adj->private = private; adj->ignore = false; - dev_hold_track(adj_dev, &adj->dev_tracker, GFP_KERNEL); + dev_hold(adj_dev); pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", dev->name, adj_dev->name, adj->ref_nr, adj_dev->name); @@ -7336,8 +7968,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); free_adj: - dev_put_track(adj_dev, &adj->dev_tracker); kfree(adj); + dev_put(adj_dev); return ret; } @@ -7378,7 +8010,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, list_del_rcu(&adj->list); pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); - dev_put_track(adj_dev, &adj->dev_tracker); + dev_put(adj_dev); kfree_rcu(adj, rcu); } @@ -7923,7 +8555,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) dev->flags &= ~IFF_PROMISC; else { dev->promiscuity -= inc; - netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n"); + pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n", + dev->name); return -EOVERFLOW; } } @@ -7993,7 +8626,8 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) dev->flags &= ~IFF_ALLMULTI; else { dev->allmulti -= inc; - netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n"); + pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n", + dev->name); return -EOVERFLOW; } } @@ -8530,11 +9164,14 @@ int dev_get_port_parent_id(struct net_device *dev, } err = devlink_compat_switch_id_get(dev, ppid); - if (!recurse || err != -EOPNOTSUPP) + if (!err || err != -EOPNOTSUPP) return err; + if (!recurse) + return -EOPNOTSUPP; + netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = dev_get_port_parent_id(lower_dev, ppid, true); + err = dev_get_port_parent_id(lower_dev, ppid, recurse); if (err) break; if (!first.id_len) @@ -8567,17 +9204,35 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) EXPORT_SYMBOL(netdev_port_same_parent_id); /** - * dev_change_proto_down - set carrier according to proto_down. + * dev_change_proto_down - update protocol port state information + * @dev: device + * @proto_down: new value + * + * This info can be used by switch drivers to set the phys state of the + * port. + */ +int dev_change_proto_down(struct net_device *dev, bool proto_down) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_change_proto_down) + return -EOPNOTSUPP; + if (!netif_device_present(dev)) + return -ENODEV; + return ops->ndo_change_proto_down(dev, proto_down); +} +EXPORT_SYMBOL(dev_change_proto_down); + +/** + * dev_change_proto_down_generic - generic implementation for + * ndo_change_proto_down that sets carrier according to + * proto_down. * * @dev: device * @proto_down: new value */ -int dev_change_proto_down(struct net_device *dev, bool proto_down) +int dev_change_proto_down_generic(struct net_device *dev, bool proto_down) { - if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN)) - return -EOPNOTSUPP; - if (!netif_device_present(dev)) - return -ENODEV; if (proto_down) netif_carrier_off(dev); else @@ -8585,7 +9240,7 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) dev->proto_down = proto_down; return 0; } -EXPORT_SYMBOL(dev_change_proto_down); +EXPORT_SYMBOL(dev_change_proto_down_generic); /** * dev_change_proto_down_reason - proto down reason @@ -9266,11 +9921,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } - if ((features & NETIF_F_GRO_HW) && (features & NETIF_F_LRO)) { - netdev_dbg(dev, "Dropping LRO feature since HW-GRO is requested.\n"); - features &= ~NETIF_F_LRO; - } - if (features & NETIF_F_HW_TLS_TX) { bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); @@ -9876,7 +10526,6 @@ static void netdev_wait_allrefs(struct net_device *dev) netdev_unregister_timeout_secs * HZ)) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", dev->name, refcnt); - ref_tracker_dir_print(&dev->refcnt_tracker, 10); warning_time = jiffies; } } @@ -10167,7 +10816,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p; - ref_tracker_dir_init(&dev->refcnt_tracker, 128); #ifdef CONFIG_PCPU_DEV_REFCNT dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) @@ -10187,7 +10835,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; - dev->gro_max_size = GRO_MAX_SIZE; dev->upper_level = 1; dev->lower_level = 1; #ifdef CONFIG_LOCKDEP @@ -10231,7 +10878,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->ethtool_ops) dev->ethtool_ops = &default_ethtool_ops; - nf_hook_netdev_init(dev); + nf_hook_ingress_init(dev); return dev; @@ -10285,7 +10932,6 @@ void free_netdev(struct net_device *dev) list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); - ref_tracker_dir_exit(&dev->refcnt_tracker); #ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; @@ -10518,7 +11164,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, * we can use it in the destination network namespace. */ err = -EEXIST; - if (netdev_name_in_use(net, dev->name)) { + if (__dev_get_by_name(net, dev->name)) { /* We get here if we can't use the current device name */ if (!pat) goto out; @@ -10871,7 +11517,7 @@ static void __net_exit default_device_exit(struct net *net) /* Push remaining network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); - if (netdev_name_in_use(&init_net, fb_name)) + if (__dev_get_by_name(&init_net, fb_name)) snprintf(fb_name, IFNAMSIZ, "dev%%d"); err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { @@ -10978,6 +11624,8 @@ static int __init net_dev_init(void) for (i = 0; i < PTYPE_HASH_SIZE; i++) INIT_LIST_HEAD(&ptype_base[i]); + INIT_LIST_HEAD(&offload_base); + if (register_pernet_subsys(&netdev_net_ops)) goto out; diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index bead38ca50..f0cb383441 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -16,35 +16,6 @@ * General list handling functions */ -static int __hw_addr_insert(struct netdev_hw_addr_list *list, - struct netdev_hw_addr *new, int addr_len) -{ - struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL; - struct netdev_hw_addr *ha; - - while (*ins_point) { - int diff; - - ha = rb_entry(*ins_point, struct netdev_hw_addr, node); - diff = memcmp(new->addr, ha->addr, addr_len); - if (diff == 0) - diff = memcmp(&new->type, &ha->type, sizeof(new->type)); - - parent = *ins_point; - if (diff < 0) - ins_point = &parent->rb_left; - else if (diff > 0) - ins_point = &parent->rb_right; - else - return -EEXIST; - } - - rb_link_node_rcu(&new->node, parent, ins_point); - rb_insert_color(&new->node, &list->tree); - - return 0; -} - static struct netdev_hw_addr* __hw_addr_create(const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync) @@ -79,6 +50,11 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, if (addr_len > MAX_ADDR_LEN) return -EINVAL; + ha = list_first_entry(&list->list, struct netdev_hw_addr, list); + if (ha && !memcmp(addr, ha->addr, addr_len) && + (!addr_type || addr_type == ha->type)) + goto found_it; + while (*ins_point) { int diff; @@ -93,6 +69,7 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, } else if (diff > 0) { ins_point = &parent->rb_right; } else { +found_it: if (exclusive) return -EEXIST; if (global) { @@ -117,8 +94,16 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, if (!ha) return -ENOMEM; - rb_link_node(&ha->node, parent, ins_point); - rb_insert_color(&ha->node, &list->tree); + /* The first address in dev->dev_addrs is pointed to by dev->dev_addr + * and mutated freely by device drivers and netdev ops, so if we insert + * it into the tree we'll end up with an invalid rbtree. + */ + if (list->count > 0) { + rb_link_node(&ha->node, parent, ins_point); + rb_insert_color(&ha->node, &list->tree); + } else { + RB_CLEAR_NODE(&ha->node); + } list_add_tail_rcu(&ha->list, &list->list); list->count++; @@ -153,7 +138,8 @@ static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, if (--ha->refcount) return 0; - rb_erase(&ha->node, &list->tree); + if (!RB_EMPTY_NODE(&ha->node)) + rb_erase(&ha->node, &list->tree); list_del_rcu(&ha->list); kfree_rcu(ha, rcu_head); @@ -165,8 +151,18 @@ static struct netdev_hw_addr *__hw_addr_lookup(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { + struct netdev_hw_addr *ha; struct rb_node *node; + /* The first address isn't inserted into the tree because in the dev->dev_addrs + * list it's the address pointed to by dev->dev_addr which is freely mutated + * in place, so we need to check it separately. + */ + ha = list_first_entry(&list->list, struct netdev_hw_addr, list); + if (ha && !memcmp(addr, ha->addr, addr_len) && + (!addr_type || addr_type == ha->type)) + return ha; + node = list->tree.rb_node; while (node) { @@ -502,21 +498,6 @@ EXPORT_SYMBOL(__hw_addr_init); * Device addresses handling functions */ -/* Check that netdev->dev_addr is not written to directly as this would - * break the rbtree layout. All changes should go thru dev_addr_set() and co. - * Remove this check in mid-2024. - */ -void dev_addr_check(struct net_device *dev) -{ - if (!memcmp(dev->dev_addr, dev->dev_addr_shadow, MAX_ADDR_LEN)) - return; - - netdev_warn(dev, "Current addr: %*ph\n", MAX_ADDR_LEN, dev->dev_addr); - netdev_warn(dev, "Expected addr: %*ph\n", - MAX_ADDR_LEN, dev->dev_addr_shadow); - netdev_WARN(dev, "Incorrect netdev->dev_addr\n"); -} - /** * dev_addr_flush - Flush device address list * @dev: device @@ -528,11 +509,11 @@ void dev_addr_check(struct net_device *dev) void dev_addr_flush(struct net_device *dev) { /* rtnl_mutex must be held here */ - dev_addr_check(dev); __hw_addr_flush(&dev->dev_addrs); dev->dev_addr = NULL; } +EXPORT_SYMBOL(dev_addr_flush); /** * dev_addr_init - Init device address list @@ -566,21 +547,7 @@ int dev_addr_init(struct net_device *dev) } return err; } - -void dev_addr_mod(struct net_device *dev, unsigned int offset, - const void *addr, size_t len) -{ - struct netdev_hw_addr *ha; - - dev_addr_check(dev); - - ha = container_of(dev->dev_addr, struct netdev_hw_addr, addr[0]); - rb_erase(&ha->node, &dev->dev_addrs.tree); - memcpy(&ha->addr[offset], addr, len); - memcpy(&dev->dev_addr_shadow[offset], addr, len); - WARN_ON(__hw_addr_insert(&dev->dev_addrs, ha, dev->addr_len)); -} -EXPORT_SYMBOL(dev_addr_mod); +EXPORT_SYMBOL(dev_addr_init); /** * dev_addr_add - Add a device address diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 1b807d119d..0e87237fd8 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -192,7 +192,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr) if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; - if (cfg.flags & ~HWTSTAMP_FLAG_MASK) + if (cfg.flags) /* reserved for future extensions */ return -EINVAL; tx_type = cfg.tx_type; @@ -313,7 +313,6 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); const struct net_device_ops *ops; - netdevice_tracker dev_tracker; if (!dev) return -ENODEV; @@ -382,10 +381,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -ENODEV; if (!netif_is_bridge_master(dev)) return -EOPNOTSUPP; - dev_hold_track(dev, &dev_tracker, GFP_KERNEL); + dev_hold(dev); rtnl_unlock(); err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); - dev_put_track(dev, &dev_tracker); + dev_put(dev); rtnl_lock(); return err; @@ -519,7 +518,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, case SIOCETHTOOL: dev_load(net, ifr->ifr_name); + rtnl_lock(); ret = dev_ethtool(net, ifr, data); + rtnl_unlock(); if (colon) *colon = ':'; return ret; diff --git a/net/core/devlink.c b/net/core/devlink.c index fcd9f6d85c..db76c55e1a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -7,7 +7,6 @@ * Copyright (c) 2016 Jiri Pirko */ -#include #include #include #include @@ -31,92 +30,6 @@ #define CREATE_TRACE_POINTS #include -#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ - (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX) - -struct devlink_dev_stats { - u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; - u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; -}; - -struct devlink { - u32 index; - struct list_head port_list; - struct list_head rate_list; - struct list_head sb_list; - struct list_head dpipe_table_list; - struct list_head resource_list; - struct list_head param_list; - struct list_head region_list; - struct list_head reporter_list; - struct mutex reporters_lock; /* protects reporter_list */ - struct devlink_dpipe_headers *dpipe_headers; - struct list_head trap_list; - struct list_head trap_group_list; - struct list_head trap_policer_list; - const struct devlink_ops *ops; - u64 features; - struct xarray snapshot_ids; - struct devlink_dev_stats stats; - struct device *dev; - possible_net_t _net; - /* Serializes access to devlink instance specific objects such as - * port, sb, dpipe, resource, params, region, traps and more. - */ - struct mutex lock; - u8 reload_failed:1; - refcount_t refcount; - struct completion comp; - char priv[] __aligned(NETDEV_ALIGN); -}; - -/** - * struct devlink_resource - devlink resource - * @name: name of the resource - * @id: id, per devlink instance - * @size: size of the resource - * @size_new: updated size of the resource, reload is needed - * @size_valid: valid in case the total size of the resource is valid - * including its children - * @parent: parent resource - * @size_params: size parameters - * @list: parent list - * @resource_list: list of child resources - * @occ_get: occupancy getter callback - * @occ_get_priv: occupancy getter callback priv - */ -struct devlink_resource { - const char *name; - u64 id; - u64 size; - u64 size_new; - bool size_valid; - struct devlink_resource *parent; - struct devlink_resource_size_params size_params; - struct list_head list; - struct list_head resource_list; - devlink_resource_occ_get_t *occ_get; - void *occ_get_priv; -}; - -void *devlink_priv(struct devlink *devlink) -{ - return &devlink->priv; -} -EXPORT_SYMBOL_GPL(devlink_priv); - -struct devlink *priv_to_devlink(void *priv) -{ - return container_of(priv, struct devlink, priv); -} -EXPORT_SYMBOL_GPL(priv_to_devlink); - -struct device *devlink_to_dev(const struct devlink *devlink) -{ - return devlink->dev; -} -EXPORT_SYMBOL_GPL(devlink_to_dev); - static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = { { .name = "destination mac", @@ -132,7 +45,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ethernet = { .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet), .global = true, }; -EXPORT_SYMBOL_GPL(devlink_dpipe_header_ethernet); +EXPORT_SYMBOL(devlink_dpipe_header_ethernet); static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = { { @@ -149,7 +62,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv4 = { .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4), .global = true, }; -EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv4); +EXPORT_SYMBOL(devlink_dpipe_header_ipv4); static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = { { @@ -166,7 +79,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv6 = { .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6), .global = true, }; -EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv6); +EXPORT_SYMBOL(devlink_dpipe_header_ipv6); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr); @@ -182,22 +95,6 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); #define DEVLINK_REGISTERED XA_MARK_1 -/* devlink instances are open to the access from the user space after - * devlink_register() call. Such logical barrier allows us to have certain - * expectations related to locking. - * - * Before *_register() - we are in initialization stage and no parallel - * access possible to the devlink instance. All drivers perform that phase - * by implicitly holding device_lock. - * - * After *_register() - users and driver can access devlink instance at - * the same time. - */ -#define ASSERT_DEVLINK_REGISTERED(d) \ - WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) -#define ASSERT_DEVLINK_NOT_REGISTERED(d) \ - WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) - /* devlink_mutex * * An overall lock guarding every operation coming from userspace. @@ -212,17 +109,15 @@ struct net *devlink_net(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_net); -void devlink_put(struct devlink *devlink) +static void devlink_put(struct devlink *devlink) { if (refcount_dec_and_test(&devlink->refcount)) complete(&devlink->comp); } -struct devlink *__must_check devlink_try_get(struct devlink *devlink) +static bool __must_check devlink_try_get(struct devlink *devlink) { - if (refcount_inc_not_zero(&devlink->refcount)) - return devlink; - return NULL; + return refcount_inc_not_zero(&devlink->refcount); } static struct devlink *devlink_get_from_attrs(struct net *net, @@ -847,7 +742,6 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) int err; WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL); - WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -1146,15 +1040,11 @@ static int devlink_nl_port_fill(struct sk_buff *msg, static void devlink_port_notify(struct devlink_port *devlink_port, enum devlink_command cmd) { - struct devlink *devlink = devlink_port->devlink; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) - return; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -1165,22 +1055,19 @@ static void devlink_port_notify(struct devlink_port *devlink_port, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(devlink_port->devlink), msg, 0, + DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } static void devlink_rate_notify(struct devlink_rate *devlink_rate, enum devlink_command cmd) { - struct devlink *devlink = devlink_rate->devlink; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) - return; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -1191,8 +1078,9 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(devlink_rate->devlink), msg, 0, + DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, @@ -3397,7 +3285,7 @@ void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry) kfree(value[value_index].mask); } } -EXPORT_SYMBOL_GPL(devlink_dpipe_entry_clear); +EXPORT_SYMBOL(devlink_dpipe_entry_clear); static int devlink_dpipe_entries_fill(struct genl_info *info, enum devlink_command cmd, int flags, @@ -4064,6 +3952,9 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, struct net *curr_net; int err; + if (!devlink->reload_enabled) + return -EOPNOTSUPP; + memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); @@ -4131,7 +4022,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) u32 actions_performed; int err; - if (!(devlink->features & DEVLINK_F_RELOAD)) + if (!devlink_reload_supported(devlink->ops)) return -EOPNOTSUPP; err = devlink_resources_validate(devlink, NULL, info); @@ -4260,9 +4151,6 @@ static void __devlink_flash_update_notify(struct devlink *devlink, cmd != DEVLINK_CMD_FLASH_UPDATE_END && cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) - return; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -4462,21 +4350,6 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE, }, - { - .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP, - .name = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME, - .type = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE, - }, - { - .id = DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, - .name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME, - .type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE, - }, - { - .id = DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, - .name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME, - .type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE, - }, }; static int devlink_param_generic_verify(const struct devlink_param *param) @@ -4649,6 +4522,8 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, return -EOPNOTSUPP; param_value[i] = param_item->driverinit_value; } else { + if (!param_item->published) + continue; ctx.cmode = i; err = devlink_param_get(devlink, param, &ctx); if (err) @@ -4724,7 +4599,6 @@ static void devlink_param_notify(struct devlink *devlink, WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL && cmd != DEVLINK_CMD_PORT_PARAM_NEW && cmd != DEVLINK_CMD_PORT_PARAM_DEL); - ASSERT_DEVLINK_REGISTERED(devlink); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -4974,6 +4848,47 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, info, DEVLINK_CMD_PARAM_NEW); } +static int devlink_param_register_one(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *param, + enum devlink_command cmd) +{ + struct devlink_param_item *param_item; + + if (devlink_param_find_by_name(param_list, param->name)) + return -EEXIST; + + if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT)) + WARN_ON(param->get || param->set); + else + WARN_ON(!param->get || !param->set); + + param_item = kzalloc(sizeof(*param_item), GFP_KERNEL); + if (!param_item) + return -ENOMEM; + param_item->param = param; + + list_add_tail(¶m_item->list, param_list); + devlink_param_notify(devlink, port_index, param_item, cmd); + return 0; +} + +static void devlink_param_unregister_one(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *param, + enum devlink_command cmd) +{ + struct devlink_param_item *param_item; + + param_item = devlink_param_find_by_name(param_list, param->name); + WARN_ON(!param_item); + devlink_param_notify(devlink, port_index, param_item, cmd); + list_del(¶m_item->list); + kfree(param_item); +} + static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { @@ -5155,11 +5070,6 @@ static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink, if (err) goto nla_put_failure; - err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS, - region->max_snapshots); - if (err) - goto nla_put_failure; - err = devlink_nl_region_snapshots_id_put(msg, devlink, region); if (err) goto nla_put_failure; @@ -5235,19 +5145,17 @@ static void devlink_nl_region_notify(struct devlink_region *region, struct devlink_snapshot *snapshot, enum devlink_command cmd) { - struct devlink *devlink = region->devlink; struct sk_buff *msg; WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) - return; msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0); if (IS_ERR(msg)) return; - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(region->devlink), msg, 0, + DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } /** @@ -6361,21 +6269,23 @@ static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, return 0; } -static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) +int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) { if (fmsg->putting_binary) return -EINVAL; return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); } +EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put); -static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) +int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) { if (fmsg->putting_binary) return -EINVAL; return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); } +EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put); int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) { @@ -6386,13 +6296,14 @@ int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) } EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put); -static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) +int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) { if (fmsg->putting_binary) return -EINVAL; return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); } +EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put); int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) { @@ -7012,12 +6923,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, static void devlink_recover_notify(struct devlink_health_reporter *reporter, enum devlink_command cmd) { - struct devlink *devlink = reporter->devlink; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER); - WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -7029,8 +6938,9 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(reporter->devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } void @@ -8987,25 +8897,6 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops) return true; } -/** - * devlink_set_features - Set devlink supported features - * - * @devlink: devlink - * @features: devlink support features - * - * This interface allows us to set reload ops separatelly from - * the devlink_alloc. - */ -void devlink_set_features(struct devlink *devlink, u64 features) -{ - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - - WARN_ON(features & DEVLINK_F_RELOAD && - !devlink_reload_supported(devlink->ops)); - devlink->features = features; -} -EXPORT_SYMBOL_GPL(devlink_set_features); - /** * devlink_alloc_ns - Allocate new devlink instance resources * in specific namespace @@ -9065,104 +8956,18 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, } EXPORT_SYMBOL_GPL(devlink_alloc_ns); -static void -devlink_trap_policer_notify(struct devlink *devlink, - const struct devlink_trap_policer_item *policer_item, - enum devlink_command cmd); -static void -devlink_trap_group_notify(struct devlink *devlink, - const struct devlink_trap_group_item *group_item, - enum devlink_command cmd); -static void devlink_trap_notify(struct devlink *devlink, - const struct devlink_trap_item *trap_item, - enum devlink_command cmd); - -static void devlink_notify_register(struct devlink *devlink) -{ - struct devlink_trap_policer_item *policer_item; - struct devlink_trap_group_item *group_item; - struct devlink_param_item *param_item; - struct devlink_trap_item *trap_item; - struct devlink_port *devlink_port; - struct devlink_rate *rate_node; - struct devlink_region *region; - - devlink_notify(devlink, DEVLINK_CMD_NEW); - list_for_each_entry(devlink_port, &devlink->port_list, list) - devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); - - list_for_each_entry(policer_item, &devlink->trap_policer_list, list) - devlink_trap_policer_notify(devlink, policer_item, - DEVLINK_CMD_TRAP_POLICER_NEW); - - list_for_each_entry(group_item, &devlink->trap_group_list, list) - devlink_trap_group_notify(devlink, group_item, - DEVLINK_CMD_TRAP_GROUP_NEW); - - list_for_each_entry(trap_item, &devlink->trap_list, list) - devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW); - - list_for_each_entry(rate_node, &devlink->rate_list, list) - devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); - - list_for_each_entry(region, &devlink->region_list, list) - devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); - - list_for_each_entry(param_item, &devlink->param_list, list) - devlink_param_notify(devlink, 0, param_item, - DEVLINK_CMD_PARAM_NEW); -} - -static void devlink_notify_unregister(struct devlink *devlink) -{ - struct devlink_trap_policer_item *policer_item; - struct devlink_trap_group_item *group_item; - struct devlink_param_item *param_item; - struct devlink_trap_item *trap_item; - struct devlink_port *devlink_port; - struct devlink_rate *rate_node; - struct devlink_region *region; - - list_for_each_entry_reverse(param_item, &devlink->param_list, list) - devlink_param_notify(devlink, 0, param_item, - DEVLINK_CMD_PARAM_DEL); - - list_for_each_entry_reverse(region, &devlink->region_list, list) - devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL); - - list_for_each_entry_reverse(rate_node, &devlink->rate_list, list) - devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL); - - list_for_each_entry_reverse(trap_item, &devlink->trap_list, list) - devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL); - - list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list) - devlink_trap_group_notify(devlink, group_item, - DEVLINK_CMD_TRAP_GROUP_DEL); - list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list, - list) - devlink_trap_policer_notify(devlink, policer_item, - DEVLINK_CMD_TRAP_POLICER_DEL); - - list_for_each_entry_reverse(devlink_port, &devlink->port_list, list) - devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); - devlink_notify(devlink, DEVLINK_CMD_DEL); -} - /** * devlink_register - Register devlink instance * * @devlink: devlink */ -void devlink_register(struct devlink *devlink) +int devlink_register(struct devlink *devlink) { - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - /* Make sure that we are in .probe() routine */ - mutex_lock(&devlink_mutex); xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); - devlink_notify_register(devlink); + devlink_notify(devlink, DEVLINK_CMD_NEW); mutex_unlock(&devlink_mutex); + return 0; } EXPORT_SYMBOL_GPL(devlink_register); @@ -9173,19 +8978,53 @@ EXPORT_SYMBOL_GPL(devlink_register); */ void devlink_unregister(struct devlink *devlink) { - ASSERT_DEVLINK_REGISTERED(devlink); - /* Make sure that we are in .remove() routine */ - devlink_put(devlink); wait_for_completion(&devlink->comp); mutex_lock(&devlink_mutex); - devlink_notify_unregister(devlink); + WARN_ON(devlink_reload_supported(devlink->ops) && + devlink->reload_enabled); + devlink_notify(devlink, DEVLINK_CMD_DEL); xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_unregister); +/** + * devlink_reload_enable - Enable reload of devlink instance + * + * @devlink: devlink + * + * Should be called at end of device initialization + * process when reload operation is supported. + */ +void devlink_reload_enable(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + devlink->reload_enabled = true; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_reload_enable); + +/** + * devlink_reload_disable - Disable reload of devlink instance + * + * @devlink: devlink + * + * Should be called at the beginning of device cleanup + * process when reload operation is supported. + */ +void devlink_reload_disable(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + /* Mutex is taken which ensures that no reload operation is in + * progress while setting up forbidded flag. + */ + devlink->reload_enabled = false; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_reload_disable); + /** * devlink_free - Free devlink instance resources * @@ -9193,8 +9032,6 @@ EXPORT_SYMBOL_GPL(devlink_unregister); */ void devlink_free(struct devlink *devlink) { - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - mutex_destroy(&devlink->reporters_lock); mutex_destroy(&devlink->lock); WARN_ON(!list_empty(&devlink->trap_policer_list)); @@ -9948,38 +9785,34 @@ int devlink_resource_register(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devlink_resource_register); -static void devlink_resource_unregister(struct devlink *devlink, - struct devlink_resource *resource) -{ - struct devlink_resource *tmp, *child_resource; - - list_for_each_entry_safe(child_resource, tmp, &resource->resource_list, - list) { - devlink_resource_unregister(devlink, child_resource); - list_del(&child_resource->list); - kfree(child_resource); - } -} - /** * devlink_resources_unregister - free all resources * * @devlink: devlink + * @resource: resource */ -void devlink_resources_unregister(struct devlink *devlink) +void devlink_resources_unregister(struct devlink *devlink, + struct devlink_resource *resource) { struct devlink_resource *tmp, *child_resource; + struct list_head *resource_list; - mutex_lock(&devlink->lock); + if (resource) + resource_list = &resource->resource_list; + else + resource_list = &devlink->resource_list; - list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list, - list) { - devlink_resource_unregister(devlink, child_resource); + if (!resource) + mutex_lock(&devlink->lock); + + list_for_each_entry_safe(child_resource, tmp, resource_list, list) { + devlink_resources_unregister(devlink, child_resource); list_del(&child_resource->list); kfree(child_resource); } - mutex_unlock(&devlink->lock); + if (!resource) + mutex_unlock(&devlink->lock); } EXPORT_SYMBOL_GPL(devlink_resources_unregister); @@ -10104,6 +9937,73 @@ static int devlink_param_verify(const struct devlink_param *param) return devlink_param_driver_verify(param); } +static int __devlink_param_register_one(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *param, + enum devlink_command reg_cmd) +{ + int err; + + err = devlink_param_verify(param); + if (err) + return err; + + return devlink_param_register_one(devlink, port_index, + param_list, param, reg_cmd); +} + +static int __devlink_params_register(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *params, + size_t params_count, + enum devlink_command reg_cmd, + enum devlink_command unreg_cmd) +{ + const struct devlink_param *param = params; + int i; + int err; + + mutex_lock(&devlink->lock); + for (i = 0; i < params_count; i++, param++) { + err = __devlink_param_register_one(devlink, port_index, + param_list, param, reg_cmd); + if (err) + goto rollback; + } + + mutex_unlock(&devlink->lock); + return 0; + +rollback: + if (!i) + goto unlock; + for (param--; i > 0; i--, param--) + devlink_param_unregister_one(devlink, port_index, param_list, + param, unreg_cmd); +unlock: + mutex_unlock(&devlink->lock); + return err; +} + +static void __devlink_params_unregister(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *params, + size_t params_count, + enum devlink_command cmd) +{ + const struct devlink_param *param = params; + int i; + + mutex_lock(&devlink->lock); + for (i = 0; i < params_count; i++, param++) + devlink_param_unregister_one(devlink, 0, param_list, param, + cmd); + mutex_unlock(&devlink->lock); +} + /** * devlink_params_register - register configuration parameters * @@ -10117,25 +10017,10 @@ int devlink_params_register(struct devlink *devlink, const struct devlink_param *params, size_t params_count) { - const struct devlink_param *param = params; - int i, err; - - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - - for (i = 0; i < params_count; i++, param++) { - err = devlink_param_register(devlink, param); - if (err) - goto rollback; - } - return 0; - -rollback: - if (!i) - return err; - - for (param--; i > 0; i--, param--) - devlink_param_unregister(devlink, param); - return err; + return __devlink_params_register(devlink, 0, &devlink->param_list, + params, params_count, + DEVLINK_CMD_PARAM_NEW, + DEVLINK_CMD_PARAM_DEL); } EXPORT_SYMBOL_GPL(devlink_params_register); @@ -10149,13 +10034,9 @@ void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count) { - const struct devlink_param *param = params; - int i; - - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - - for (i = 0; i < params_count; i++, param++) - devlink_param_unregister(devlink, param); + return __devlink_params_unregister(devlink, 0, &devlink->param_list, + params, params_count, + DEVLINK_CMD_PARAM_DEL); } EXPORT_SYMBOL_GPL(devlink_params_unregister); @@ -10171,26 +10052,13 @@ EXPORT_SYMBOL_GPL(devlink_params_unregister); int devlink_param_register(struct devlink *devlink, const struct devlink_param *param) { - struct devlink_param_item *param_item; + int err; - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - - WARN_ON(devlink_param_verify(param)); - WARN_ON(devlink_param_find_by_name(&devlink->param_list, param->name)); - - if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT)) - WARN_ON(param->get || param->set); - else - WARN_ON(!param->get || !param->set); - - param_item = kzalloc(sizeof(*param_item), GFP_KERNEL); - if (!param_item) - return -ENOMEM; - - param_item->param = param; - - list_add_tail(¶m_item->list, &devlink->param_list); - return 0; + mutex_lock(&devlink->lock); + err = __devlink_param_register_one(devlink, 0, &devlink->param_list, + param, DEVLINK_CMD_PARAM_NEW); + mutex_unlock(&devlink->lock); + return err; } EXPORT_SYMBOL_GPL(devlink_param_register); @@ -10202,18 +10070,195 @@ EXPORT_SYMBOL_GPL(devlink_param_register); void devlink_param_unregister(struct devlink *devlink, const struct devlink_param *param) { - struct devlink_param_item *param_item; - - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - - param_item = - devlink_param_find_by_name(&devlink->param_list, param->name); - WARN_ON(!param_item); - list_del(¶m_item->list); - kfree(param_item); + mutex_lock(&devlink->lock); + devlink_param_unregister_one(devlink, 0, &devlink->param_list, param, + DEVLINK_CMD_PARAM_DEL); + mutex_unlock(&devlink->lock); } EXPORT_SYMBOL_GPL(devlink_param_unregister); +/** + * devlink_params_publish - publish configuration parameters + * + * @devlink: devlink + * + * Publish previously registered configuration parameters. + */ +void devlink_params_publish(struct devlink *devlink) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (param_item->published) + continue; + param_item->published = true; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_NEW); + } +} +EXPORT_SYMBOL_GPL(devlink_params_publish); + +/** + * devlink_params_unpublish - unpublish configuration parameters + * + * @devlink: devlink + * + * Unpublish previously registered configuration parameters. + */ +void devlink_params_unpublish(struct devlink *devlink) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (!param_item->published) + continue; + param_item->published = false; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_DEL); + } +} +EXPORT_SYMBOL_GPL(devlink_params_unpublish); + +/** + * devlink_param_publish - publish one configuration parameter + * + * @devlink: devlink + * @param: one configuration parameter + * + * Publish previously registered configuration parameter. + */ +void devlink_param_publish(struct devlink *devlink, + const struct devlink_param *param) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (param_item->param != param || param_item->published) + continue; + param_item->published = true; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_NEW); + break; + } +} +EXPORT_SYMBOL_GPL(devlink_param_publish); + +/** + * devlink_param_unpublish - unpublish one configuration parameter + * + * @devlink: devlink + * @param: one configuration parameter + * + * Unpublish previously registered configuration parameter. + */ +void devlink_param_unpublish(struct devlink *devlink, + const struct devlink_param *param) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (param_item->param != param || !param_item->published) + continue; + param_item->published = false; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_DEL); + break; + } +} +EXPORT_SYMBOL_GPL(devlink_param_unpublish); + +/** + * devlink_port_params_register - register port configuration parameters + * + * @devlink_port: devlink port + * @params: configuration parameters array + * @params_count: number of parameters provided + * + * Register the configuration parameters supported by the port. + */ +int devlink_port_params_register(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ + return __devlink_params_register(devlink_port->devlink, + devlink_port->index, + &devlink_port->param_list, params, + params_count, + DEVLINK_CMD_PORT_PARAM_NEW, + DEVLINK_CMD_PORT_PARAM_DEL); +} +EXPORT_SYMBOL_GPL(devlink_port_params_register); + +/** + * devlink_port_params_unregister - unregister port configuration + * parameters + * + * @devlink_port: devlink port + * @params: configuration parameters array + * @params_count: number of parameters provided + */ +void devlink_port_params_unregister(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ + return __devlink_params_unregister(devlink_port->devlink, + devlink_port->index, + &devlink_port->param_list, + params, params_count, + DEVLINK_CMD_PORT_PARAM_DEL); +} +EXPORT_SYMBOL_GPL(devlink_port_params_unregister); + +static int +__devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id, + union devlink_param_value *init_val) +{ + struct devlink_param_item *param_item; + + param_item = devlink_param_find_by_id(param_list, param_id); + if (!param_item) + return -EINVAL; + + if (!param_item->driverinit_value_valid || + !devlink_param_cmode_is_supported(param_item->param, + DEVLINK_PARAM_CMODE_DRIVERINIT)) + return -EOPNOTSUPP; + + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(init_val->vstr, param_item->driverinit_value.vstr); + else + *init_val = param_item->driverinit_value; + + return 0; +} + +static int +__devlink_param_driverinit_value_set(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, u32 param_id, + union devlink_param_value init_val, + enum devlink_command cmd) +{ + struct devlink_param_item *param_item; + + param_item = devlink_param_find_by_id(param_list, param_id); + if (!param_item) + return -EINVAL; + + if (!devlink_param_cmode_is_supported(param_item->param, + DEVLINK_PARAM_CMODE_DRIVERINIT)) + return -EOPNOTSUPP; + + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, init_val.vstr); + else + param_item->driverinit_value = init_val; + param_item->driverinit_value_valid = true; + + devlink_param_notify(devlink, port_index, param_item, cmd); + return 0; +} + /** * devlink_param_driverinit_value_get - get configuration parameter * value for driver initializing @@ -10228,26 +10273,11 @@ EXPORT_SYMBOL_GPL(devlink_param_unregister); int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *init_val) { - struct devlink_param_item *param_item; - if (!devlink_reload_supported(devlink->ops)) return -EOPNOTSUPP; - param_item = devlink_param_find_by_id(&devlink->param_list, param_id); - if (!param_item) - return -EINVAL; - - if (!param_item->driverinit_value_valid || - !devlink_param_cmode_is_supported(param_item->param, - DEVLINK_PARAM_CMODE_DRIVERINIT)) - return -EOPNOTSUPP; - - if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) - strcpy(init_val->vstr, param_item->driverinit_value.vstr); - else - *init_val = param_item->driverinit_value; - - return 0; + return __devlink_param_driverinit_value_get(&devlink->param_list, + param_id, init_val); } EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get); @@ -10266,27 +10296,62 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get); int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val) { - struct devlink_param_item *param_item; - - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - - param_item = devlink_param_find_by_id(&devlink->param_list, param_id); - if (!param_item) - return -EINVAL; - - if (!devlink_param_cmode_is_supported(param_item->param, - DEVLINK_PARAM_CMODE_DRIVERINIT)) - return -EOPNOTSUPP; - - if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) - strcpy(param_item->driverinit_value.vstr, init_val.vstr); - else - param_item->driverinit_value = init_val; - param_item->driverinit_value_valid = true; - return 0; + return __devlink_param_driverinit_value_set(devlink, 0, + &devlink->param_list, + param_id, init_val, + DEVLINK_CMD_PARAM_NEW); } EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set); +/** + * devlink_port_param_driverinit_value_get - get configuration parameter + * value for driver initializing + * + * @devlink_port: devlink_port + * @param_id: parameter ID + * @init_val: value of parameter in driverinit configuration mode + * + * This function should be used by the driver to get driverinit + * configuration for initialization after reload command. + */ +int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value *init_val) +{ + struct devlink *devlink = devlink_port->devlink; + + if (!devlink_reload_supported(devlink->ops)) + return -EOPNOTSUPP; + + return __devlink_param_driverinit_value_get(&devlink_port->param_list, + param_id, init_val); +} +EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_get); + +/** + * devlink_port_param_driverinit_value_set - set value of configuration + * parameter for driverinit + * configuration mode + * + * @devlink_port: devlink_port + * @param_id: parameter ID + * @init_val: value of parameter to set for driverinit configuration mode + * + * This function should be used by the driver to set driverinit + * configuration mode default value. + */ +int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value init_val) +{ + return __devlink_param_driverinit_value_set(devlink_port->devlink, + devlink_port->index, + &devlink_port->param_list, + param_id, init_val, + DEVLINK_CMD_PORT_PARAM_NEW); +} +EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_set); + /** * devlink_param_value_changed - notify devlink on a parameter's value * change. Should be called by the driver @@ -10310,6 +10375,50 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id) } EXPORT_SYMBOL_GPL(devlink_param_value_changed); +/** + * devlink_port_param_value_changed - notify devlink on a parameter's value + * change. Should be called by the driver + * right after the change. + * + * @devlink_port: devlink_port + * @param_id: parameter ID + * + * This function should be used by the driver to notify devlink on value + * change, excluding driverinit configuration mode. + * For driverinit configuration mode driver should use the function + * devlink_port_param_driverinit_value_set() instead. + */ +void devlink_port_param_value_changed(struct devlink_port *devlink_port, + u32 param_id) +{ + struct devlink_param_item *param_item; + + param_item = devlink_param_find_by_id(&devlink_port->param_list, + param_id); + WARN_ON(!param_item); + + devlink_param_notify(devlink_port->devlink, devlink_port->index, + param_item, DEVLINK_CMD_PORT_PARAM_NEW); +} +EXPORT_SYMBOL_GPL(devlink_port_param_value_changed); + +/** + * devlink_param_value_str_fill - Safely fill-up the string preventing + * from overflow of the preallocated buffer + * + * @dst_val: destination devlink_param_value + * @src: source buffer + */ +void devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src) +{ + size_t len; + + len = strlcpy(dst_val->vstr, src, __DEVLINK_PARAM_MAX_STRING_VALUE); + WARN_ON(len >= __DEVLINK_PARAM_MAX_STRING_VALUE); +} +EXPORT_SYMBOL_GPL(devlink_param_value_str_fill); + /** * devlink_region_create - create a new address region * @@ -10728,8 +10837,6 @@ devlink_trap_group_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW && cmd != DEVLINK_CMD_TRAP_GROUP_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) - return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -10771,8 +10878,6 @@ static void devlink_trap_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW && cmd != DEVLINK_CMD_TRAP_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) - return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -11154,8 +11259,6 @@ devlink_trap_policer_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW && cmd != DEVLINK_CMD_TRAP_POLICER_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) - return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -11324,36 +11427,45 @@ static void __devlink_compat_running_version(struct devlink *devlink, nlmsg_free(msg); } -static struct devlink_port *netdev_to_devlink_port(struct net_device *dev) -{ - if (!dev->netdev_ops->ndo_get_devlink_port) - return NULL; - - return dev->netdev_ops->ndo_get_devlink_port(dev); -} - -void devlink_compat_running_version(struct devlink *devlink, +void devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) { - if (!devlink->ops->info_get) - return; + struct devlink *devlink; + + dev_hold(dev); + rtnl_unlock(); + + devlink = netdev_to_devlink(dev); + if (!devlink || !devlink->ops->info_get) + goto out; mutex_lock(&devlink->lock); __devlink_compat_running_version(devlink, buf, len); mutex_unlock(&devlink->lock); + +out: + rtnl_lock(); + dev_put(dev); } -int devlink_compat_flash_update(struct devlink *devlink, const char *file_name) +int devlink_compat_flash_update(struct net_device *dev, const char *file_name) { struct devlink_flash_update_params params = {}; + struct devlink *devlink; int ret; - if (!devlink->ops->flash_update) - return -EOPNOTSUPP; + dev_hold(dev); + rtnl_unlock(); + + devlink = netdev_to_devlink(dev); + if (!devlink || !devlink->ops->flash_update) { + ret = -EOPNOTSUPP; + goto out; + } ret = request_firmware(¶ms.fw, file_name, devlink->dev); if (ret) - return ret; + goto out; mutex_lock(&devlink->lock); devlink_flash_update_begin_notify(devlink); @@ -11363,6 +11475,10 @@ int devlink_compat_flash_update(struct devlink *devlink, const char *file_name) release_firmware(params.fw); +out: + rtnl_lock(); + dev_put(dev); + return ret; } @@ -11420,7 +11536,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) if (!net_eq(devlink_net(devlink), net)) goto retry; - WARN_ON(!(devlink->features & DEVLINK_F_RELOAD)); + WARN_ON(!devlink_reload_supported(devlink->ops)); err = devlink_reload(devlink, &init_net, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, DEVLINK_RELOAD_LIMIT_UNSPEC, diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d5dc6be252..1d99b731e5 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -110,8 +110,7 @@ static u32 net_dm_queue_len = 1000; struct net_dm_alert_ops { void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb, - void *location, - enum skb_drop_reason reason); + void *location); void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, int work, int budget); void (*work_item_func)(struct work_struct *work); @@ -263,9 +262,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) spin_unlock_irqrestore(&data->lock, flags); } -static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, - void *location, - enum skb_drop_reason reason) +static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) { trace_drop_common(skb, location); } @@ -497,8 +494,7 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = { static void net_dm_packet_trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, - void *location, - enum skb_drop_reason reason) + void *location) { ktime_t tstamp = ktime_get_real(); struct per_cpu_dm_data *data; @@ -858,7 +854,7 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata) } hw_metadata->input_dev = metadata->input_dev; - dev_hold_track(hw_metadata->input_dev, &hw_metadata->dev_tracker, GFP_ATOMIC); + dev_hold(hw_metadata->input_dev); return hw_metadata; @@ -872,9 +868,9 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata) } static void -net_dm_hw_metadata_free(struct devlink_trap_metadata *hw_metadata) +net_dm_hw_metadata_free(const struct devlink_trap_metadata *hw_metadata) { - dev_put_track(hw_metadata->input_dev, &hw_metadata->dev_tracker); + dev_put(hw_metadata->input_dev); kfree(hw_metadata->fa_cookie); kfree(hw_metadata->trap_name); kfree(hw_metadata->trap_group_name); diff --git a/net/core/dst.c b/net/core/dst.c index d16c2c9bfe..497ef9b3fc 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -49,7 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, unsigned short flags) { dst->dev = dev; - dev_hold_track(dev, &dst->dev_tracker, GFP_ATOMIC); + dev_hold(dev); dst->ops = ops; dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; @@ -117,7 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) if (dst->ops->destroy) dst->ops->destroy(dst); - dev_put_track(dst->dev, &dst->dev_tracker); + dev_put(dst->dev); lwtstate_put(dst->lwtstate); @@ -159,8 +159,8 @@ void dst_dev_put(struct dst_entry *dst) dst->input = dst_discard; dst->output = dst_discard_out; dst->dev = blackhole_netdev; - dev_replace_track(dev, blackhole_netdev, &dst->dev_tracker, - GFP_ATOMIC); + dev_hold(dst->dev); + dev_put(dev); } EXPORT_SYMBOL(dst_dev_put); diff --git a/net/core/failover.c b/net/core/failover.c index dcaa92a85e..b5cd3c7272 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -252,7 +252,7 @@ struct failover *failover_register(struct net_device *dev, return ERR_PTR(-ENOMEM); rcu_assign_pointer(failover->ops, ops); - dev_hold_track(dev, &failover->dev_tracker, GFP_KERNEL); + dev_hold(dev); dev->priv_flags |= IFF_FAILOVER; rcu_assign_pointer(failover->failover_dev, dev); @@ -285,7 +285,7 @@ void failover_unregister(struct failover *failover) failover_dev->name); failover_dev->priv_flags &= ~IFF_FAILOVER; - dev_put_track(failover_dev, &failover->dev_tracker); + dev_put(failover_dev); spin_lock(&failover_lock); list_del(&failover->list); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 75282222e0..1bb567a3b3 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -750,27 +750,6 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, return 0; } -static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = { - [FRA_UNSPEC] = { .strict_start_type = FRA_DPORT_RANGE + 1 }, - [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, - [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, - [FRA_PRIORITY] = { .type = NLA_U32 }, - [FRA_FWMARK] = { .type = NLA_U32 }, - [FRA_FLOW] = { .type = NLA_U32 }, - [FRA_TUN_ID] = { .type = NLA_U64 }, - [FRA_FWMASK] = { .type = NLA_U32 }, - [FRA_TABLE] = { .type = NLA_U32 }, - [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, - [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, - [FRA_GOTO] = { .type = NLA_U32 }, - [FRA_L3MDEV] = { .type = NLA_U8 }, - [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, - [FRA_PROTOCOL] = { .type = NLA_U8 }, - [FRA_IP_PROTO] = { .type = NLA_U8 }, - [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, - [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) } -}; - int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -795,7 +774,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, } err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX, - fib_rule_policy, extack); + ops->policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Error parsing msg"); goto errout; @@ -903,7 +882,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX, - fib_rule_policy, extack); + ops->policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Error parsing msg"); goto errout; diff --git a/net/core/filter.c b/net/core/filter.c index 9eb7858422..76e406965b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -301,7 +301,7 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, break; case SKF_AD_PKTTYPE: - *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET); + *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET()); *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX); #ifdef __BIG_ENDIAN_BITFIELD *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5); @@ -323,7 +323,7 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, offsetof(struct sk_buff, vlan_tci)); break; case SKF_AD_VLAN_TAG_PRESENT: - *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET); + *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET()); if (PKT_VLAN_PRESENT_BIT) *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT); if (PKT_VLAN_PRESENT_BIT < 7) @@ -1242,9 +1242,10 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) int err, new_len, old_len = fp->len; bool seen_ld_abs = false; - /* We are free to overwrite insns et al right here as it won't be used at - * this point in time anymore internally after the migration to the eBPF - * instruction representation. + /* We are free to overwrite insns et al right here as it + * won't be used at this point in time anymore internally + * after the migration to the internal BPF instruction + * representation. */ BUILD_BUG_ON(sizeof(struct sock_filter) != sizeof(struct bpf_insn)); @@ -1335,8 +1336,8 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, */ bpf_jit_compile(fp); - /* JIT compiler couldn't process this filter, so do the eBPF translation - * for the optimized interpreter. + /* JIT compiler couldn't process this filter, so do the + * internal BPF translation for the optimized interpreter. */ if (!fp->jited) fp = bpf_migrate_filter(fp); @@ -1712,7 +1713,7 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_PTR_TO_MEM, .arg4_type = ARG_CONST_SIZE, .arg5_type = ARG_ANYTHING, }; @@ -2017,9 +2018,9 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { .gpl_only = false, .pkt_access = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, + .arg1_type = ARG_PTR_TO_MEM_OR_NULL, .arg2_type = ARG_CONST_SIZE_OR_ZERO, - .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, + .arg3_type = ARG_PTR_TO_MEM_OR_NULL, .arg4_type = ARG_CONST_SIZE_OR_ZERO, .arg5_type = ARG_ANYTHING, }; @@ -2540,7 +2541,7 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, - .arg2_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM_OR_NULL, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; @@ -3960,35 +3961,10 @@ u32 xdp_master_redirect(struct xdp_buff *xdp) } EXPORT_SYMBOL_GPL(xdp_master_redirect); -static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri, - struct net_device *dev, - struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) -{ - enum bpf_map_type map_type = ri->map_type; - void *fwd = ri->tgt_value; - u32 map_id = ri->map_id; - int err; - - ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ - ri->map_type = BPF_MAP_TYPE_UNSPEC; - - err = __xsk_map_redirect(fwd, xdp); - if (unlikely(err)) - goto err; - - _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index); - return 0; -err: - _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); - return err; -} - -static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, - struct net_device *dev, - struct xdp_frame *xdpf, - struct bpf_prog *xdp_prog) +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) { + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; @@ -3998,11 +3974,6 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ ri->map_type = BPF_MAP_TYPE_UNSPEC; - if (unlikely(!xdpf)) { - err = -EOVERFLOW; - goto err; - } - switch (map_type) { case BPF_MAP_TYPE_DEVMAP: fallthrough; @@ -4010,14 +3981,17 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, map = READ_ONCE(ri->map); if (unlikely(map)) { WRITE_ONCE(ri->map, NULL); - err = dev_map_enqueue_multi(xdpf, dev, map, + err = dev_map_enqueue_multi(xdp, dev, map, ri->flags & BPF_F_EXCLUDE_INGRESS); } else { - err = dev_map_enqueue(fwd, xdpf, dev); + err = dev_map_enqueue(fwd, xdp, dev); } break; case BPF_MAP_TYPE_CPUMAP: - err = cpu_map_enqueue(fwd, xdpf, dev); + err = cpu_map_enqueue(fwd, xdp, dev); + break; + case BPF_MAP_TYPE_XSKMAP: + err = __xsk_map_redirect(fwd, xdp); break; case BPF_MAP_TYPE_UNSPEC: if (map_id == INT_MAX) { @@ -4026,7 +4000,7 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, err = -EINVAL; break; } - err = dev_xdp_enqueue(fwd, xdpf, dev); + err = dev_xdp_enqueue(fwd, xdp, dev); break; } fallthrough; @@ -4043,34 +4017,8 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); return err; } - -int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) -{ - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - enum bpf_map_type map_type = ri->map_type; - - if (map_type == BPF_MAP_TYPE_XSKMAP) - return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); - - return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp), - xdp_prog); -} EXPORT_SYMBOL_GPL(xdp_do_redirect); -int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, - struct xdp_frame *xdpf, struct bpf_prog *xdp_prog) -{ - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - enum bpf_map_type map_type = ri->map_type; - - if (map_type == BPF_MAP_TYPE_XSKMAP) - return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); - - return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog); -} -EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); - static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, @@ -4229,7 +4177,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; @@ -4243,7 +4191,7 @@ const struct bpf_func_proto bpf_skb_output_proto = { .arg1_btf_id = &bpf_skb_output_btf_ids[0], .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; @@ -4426,7 +4374,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, }; @@ -4452,7 +4400,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, }; @@ -4622,7 +4570,7 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; @@ -4636,7 +4584,7 @@ const struct bpf_func_proto bpf_xdp_output_proto = { .arg1_btf_id = &bpf_xdp_output_btf_ids[0], .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; @@ -5025,12 +4973,6 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname, goto err_clear; switch (optname) { - case SO_RCVBUF: - *((int *)optval) = sk->sk_rcvbuf; - break; - case SO_SNDBUF: - *((int *)optval) = sk->sk_sndbuf; - break; case SO_MARK: *((int *)optval) = sk->sk_mark; break; @@ -5130,7 +5072,7 @@ const struct bpf_func_proto bpf_sk_setsockopt_proto = { .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE, }; @@ -5164,7 +5106,7 @@ static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE, }; @@ -5198,7 +5140,7 @@ static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE, }; @@ -5373,7 +5315,7 @@ static const struct bpf_func_proto bpf_bind_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, }; @@ -5961,7 +5903,7 @@ static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_PTR_TO_MEM, .arg4_type = ARG_CONST_SIZE }; @@ -5971,7 +5913,7 @@ static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_PTR_TO_MEM, .arg4_type = ARG_CONST_SIZE }; @@ -6014,7 +5956,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_PTR_TO_MEM, .arg4_type = ARG_CONST_SIZE }; @@ -6102,7 +6044,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_action_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_PTR_TO_MEM, .arg4_type = ARG_CONST_SIZE }; @@ -6327,7 +6269,7 @@ static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, @@ -6346,7 +6288,7 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, @@ -6365,7 +6307,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, @@ -6402,7 +6344,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, @@ -6425,7 +6367,7 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, @@ -6448,7 +6390,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, @@ -6467,7 +6409,7 @@ static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, @@ -6486,7 +6428,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, @@ -6505,7 +6447,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, @@ -6818,9 +6760,9 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = { .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE, }; @@ -6887,9 +6829,9 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = { .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, - .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_PTR_TO_MEM, .arg5_type = ARG_CONST_SIZE, }; @@ -7118,7 +7060,7 @@ static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, }; @@ -7830,10 +7772,6 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type break; case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): return false; - case bpf_ctx_range(struct __sk_buff, hwtstamp): - if (type == BPF_WRITE || size != sizeof(__u64)) - return false; - break; case bpf_ctx_range(struct __sk_buff, tstamp): if (size != sizeof(__u64)) return false; @@ -7843,9 +7781,6 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type return false; info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; break; - case offsetofend(struct __sk_buff, gso_size) ... offsetof(struct __sk_buff, hwtstamp) - 1: - /* Explicitly prohibit access to padding in __sk_buff. */ - return false; default: /* Only narrow read access allowed for now. */ if (type == BPF_WRITE) { @@ -7874,7 +7809,6 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): - case bpf_ctx_range(struct __sk_buff, hwtstamp): return false; } @@ -7945,7 +7879,6 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): - case bpf_ctx_range(struct __sk_buff, hwtstamp): return false; } @@ -8092,7 +8025,7 @@ static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, * (Fast-path, otherwise approximation that we might be * a clone, do the rest in helper.) */ - *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET); + *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET()); *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK); *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7); @@ -8244,13 +8177,13 @@ static bool xdp_is_valid_access(int off, int size, return __is_valid_xdp_access(off, size); } -void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act) +void bpf_warn_invalid_xdp_action(u32 act) { const u32 act_max = XDP_REDIRECT; - pr_warn_once("%s XDP return value %u on prog %s (id %d) dev %s, expect packet loss!\n", + pr_warn_once("%s XDP return value %u, expect packet loss!\n", act > act_max ? "Illegal" : "Driver unsupported", - act, prog->aux->name, prog->aux->id, dev ? dev->name : "N/A"); + act); } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); @@ -8447,7 +8380,6 @@ static bool sk_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): - case bpf_ctx_range(struct __sk_buff, hwtstamp): return false; } @@ -8680,7 +8612,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, pkt_type): *target_size = 1; *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, - PKT_TYPE_OFFSET); + PKT_TYPE_OFFSET()); *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX); #ifdef __BIG_ENDIAN_BITFIELD *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5); @@ -8705,7 +8637,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, vlan_present): *target_size = 1; *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, - PKT_VLAN_PRESENT_OFFSET); + PKT_VLAN_PRESENT_OFFSET()); if (PKT_VLAN_PRESENT_BIT) *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT); if (PKT_VLAN_PRESENT_BIT < 7) @@ -8959,17 +8891,6 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, offsetof(struct sk_buff, sk)); break; - case offsetof(struct __sk_buff, hwtstamp): - BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8); - BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0); - - insn = bpf_convert_shinfo_access(si, insn); - *insn++ = BPF_LDX_MEM(BPF_DW, - si->dst_reg, si->dst_reg, - bpf_target_off(struct skb_shared_info, - hwtstamps, 8, - target_size)); - break; } return insn - insn_buf; @@ -10536,10 +10457,8 @@ BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx, return -EINVAL; if (unlikely(sk && sk_is_refcounted(sk))) return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */ - if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN)) - return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */ - if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE)) - return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */ + if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED)) + return -ESOCKTNOSUPPORT; /* reject connected sockets */ /* Check if socket is suitable for packet L3/L4 protocol */ if (sk && sk->sk_protocol != ctx->protocol) @@ -10606,7 +10525,6 @@ static bool sk_lookup_is_valid_access(int off, int size, case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): case bpf_ctx_range(struct bpf_sk_lookup, remote_port): case bpf_ctx_range(struct bpf_sk_lookup, local_port): - case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex): bpf_ctx_record_field_size(info, sizeof(__u32)); return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32)); @@ -10696,12 +10614,6 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, bpf_target_off(struct bpf_sk_lookup_kern, dport, 2, target_size)); break; - - case offsetof(struct bpf_sk_lookup, ingress_ifindex): - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - bpf_target_off(struct bpf_sk_lookup_kern, - ingress_ifindex, 4, target_size)); - break; } return insn - insn_buf; @@ -10726,10 +10638,14 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); } -BTF_ID_LIST_GLOBAL(btf_sock_ids, MAX_BTF_SOCK_TYPE) +#ifdef CONFIG_DEBUG_INFO_BTF +BTF_ID_LIST_GLOBAL(btf_sock_ids) #define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type) BTF_SOCK_TYPE_xxx #undef BTF_SOCK_TYPE +#else +u32 btf_sock_ids[MAX_BTF_SOCK_TYPE]; +#endif BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) { @@ -10841,26 +10757,6 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = { .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6], }; -BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk) -{ - /* unix_sock type is not generated in dwarf and hence btf, - * trigger an explicit type generation here. - */ - BTF_TYPE_EMIT(struct unix_sock); - if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX) - return (unsigned long)sk; - - return (unsigned long)NULL; -} - -const struct bpf_func_proto bpf_skc_to_unix_sock_proto = { - .func = bpf_skc_to_unix_sock, - .gpl_only = false, - .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, - .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, - .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX], -}; - BPF_CALL_1(bpf_sock_from_file, struct file *, file) { return (unsigned long)sock_from_file(file); @@ -10900,9 +10796,6 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_skc_to_udp6_sock: func = &bpf_skc_to_udp6_sock_proto; break; - case BPF_FUNC_skc_to_unix_sock: - func = &bpf_skc_to_unix_sock_proto; - break; case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 15833e1d6e..edffdaa875 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -1198,8 +1197,9 @@ bool __skb_flow_dissect(const struct net *net, break; } + proto = hdr->proto; nhoff += PPPOE_SES_HLEN; - switch (hdr->proto) { + switch (proto) { case htons(PPP_IP): proto = htons(ETH_P_IP); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; @@ -1308,11 +1308,6 @@ bool __skb_flow_dissect(const struct net *net, switch (ip_proto) { case IPPROTO_GRE: - if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) { - fdret = FLOW_DISSECT_RET_OUT_GOOD; - break; - } - fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector, target_container, data, &proto, &nhoff, &hlen, flags); @@ -1370,11 +1365,6 @@ bool __skb_flow_dissect(const struct net *net, break; } case IPPROTO_IPIP: - if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) { - fdret = FLOW_DISSECT_RET_OUT_GOOD; - break; - } - proto = htons(ETH_P_IP); key_control->flags |= FLOW_DIS_ENCAPSULATION; @@ -1387,11 +1377,6 @@ bool __skb_flow_dissect(const struct net *net, break; case IPPROTO_IPV6: - if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) { - fdret = FLOW_DISSECT_RET_OUT_GOOD; - break; - } - proto = htons(ETH_P_IPV6); key_control->flags |= FLOW_DIS_ENCAPSULATION; @@ -1462,7 +1447,7 @@ bool __skb_flow_dissect(const struct net *net, } EXPORT_SYMBOL(__skb_flow_dissect); -static siphash_aligned_key_t hashrnd; +static siphash_key_t hashrnd __read_mostly; static __always_inline void __flow_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 73f68d4625..6beaea1356 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include -#include #include #include #include @@ -28,26 +27,6 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions) } EXPORT_SYMBOL(flow_rule_alloc); -struct flow_offload_action *offload_action_alloc(unsigned int num_actions) -{ - struct flow_offload_action *fl_action; - int i; - - fl_action = kzalloc(struct_size(fl_action, action.entries, num_actions), - GFP_KERNEL); - if (!fl_action) - return NULL; - - fl_action->action.num_entries = num_actions; - /* Pre-fill each action hw_stats with DONT_CARE. - * Caller can override this if it wants stats for a given action. - */ - for (i = 0; i < num_actions; i++) - fl_action->action.entries[i].hw_stats = FLOW_ACTION_HW_STATS_DONT_CARE; - - return fl_action; -} - #define FLOW_DISSECTOR_MATCH(__rule, __type, __out) \ const struct flow_match *__m = &(__rule)->match; \ struct flow_dissector *__d = (__m)->dissector; \ @@ -418,8 +397,6 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) existing_qdiscs_register(cb, cb_priv); mutex_unlock(&flow_indr_block_lock); - tcf_action_reoffload_cb(cb, cb_priv, true); - return 0; } EXPORT_SYMBOL(flow_indr_dev_register); @@ -472,7 +449,6 @@ void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, __flow_block_indr_cleanup(release, cb_priv, &cleanup_list); mutex_unlock(&flow_indr_block_lock); - tcf_action_reoffload_cb(cb, cb_priv, false); flow_block_indr_notify(&cleanup_list); kfree(indr_dev); } @@ -573,25 +549,19 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, void (*cleanup)(struct flow_block_cb *block_cb)) { struct flow_indr_dev *this; - u32 count = 0; - int err; mutex_lock(&flow_indr_block_lock); - if (bo) { - if (bo->command == FLOW_BLOCK_BIND) - indir_dev_add(data, dev, sch, type, cleanup, bo); - else if (bo->command == FLOW_BLOCK_UNBIND) - indir_dev_remove(data); - } - list_for_each_entry(this, &flow_block_indr_dev_list, list) { - err = this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); - if (!err) - count++; - } + if (bo->command == FLOW_BLOCK_BIND) + indir_dev_add(data, dev, sch, type, cleanup, bo); + else if (bo->command == FLOW_BLOCK_UNBIND) + indir_dev_remove(data); + + list_for_each_entry(this, &flow_block_indr_dev_list, list) + this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); mutex_unlock(&flow_indr_block_lock); - return (bo && list_empty(&bo->cb_list)) ? -EOPNOTSUPP : count; + return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0; } EXPORT_SYMBOL(flow_indr_dev_setup_offload); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 4fcbdd71c5..8e582e29a4 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -40,10 +40,10 @@ */ struct net_rate_estimator { - struct gnet_stats_basic_sync *bstats; + struct gnet_stats_basic_packed *bstats; spinlock_t *stats_lock; - bool running; - struct gnet_stats_basic_sync __percpu *cpu_bstats; + seqcount_t *running; + struct gnet_stats_basic_cpu __percpu *cpu_bstats; u8 ewma_log; u8 intvl_log; /* period : (250ms << intvl_log) */ @@ -60,13 +60,13 @@ struct net_rate_estimator { }; static void est_fetch_counters(struct net_rate_estimator *e, - struct gnet_stats_basic_sync *b) + struct gnet_stats_basic_packed *b) { - gnet_stats_basic_sync_init(b); + memset(b, 0, sizeof(*b)); if (e->stats_lock) spin_lock(e->stats_lock); - gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running); + __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats); if (e->stats_lock) spin_unlock(e->stats_lock); @@ -76,18 +76,14 @@ static void est_fetch_counters(struct net_rate_estimator *e, static void est_timer(struct timer_list *t) { struct net_rate_estimator *est = from_timer(est, t, timer); - struct gnet_stats_basic_sync b; - u64 b_bytes, b_packets; + struct gnet_stats_basic_packed b; u64 rate, brate; est_fetch_counters(est, &b); - b_bytes = u64_stats_read(&b.bytes); - b_packets = u64_stats_read(&b.packets); - - brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log); + brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log); brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log); - rate = (b_packets - est->last_packets) << (10 - est->intvl_log); + rate = (b.packets - est->last_packets) << (10 - est->intvl_log); rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log); write_seqcount_begin(&est->seq); @@ -95,8 +91,8 @@ static void est_timer(struct timer_list *t) est->avpps += rate; write_seqcount_end(&est->seq); - est->last_bytes = b_bytes; - est->last_packets = b_packets; + est->last_bytes = b.bytes; + est->last_packets = b.packets; est->next_jiffies += ((HZ/4) << est->intvl_log); @@ -113,9 +109,7 @@ static void est_timer(struct timer_list *t) * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @lock: lock for statistics and control path - * @running: true if @bstats represents a running qdisc, thus @bstats' - * internal values might change during basic reads. Only used - * if @bstats_cpu is NULL + * @running: qdisc running seqcount * @opt: rate estimator configuration TLV * * Creates a new rate estimator with &bstats as source and &rate_est @@ -127,16 +121,16 @@ static void est_timer(struct timer_list *t) * Returns 0 on success or a negative error code. * */ -int gen_new_estimator(struct gnet_stats_basic_sync *bstats, - struct gnet_stats_basic_sync __percpu *cpu_bstats, +int gen_new_estimator(struct gnet_stats_basic_packed *bstats, + struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, - bool running, + seqcount_t *running, struct nlattr *opt) { struct gnet_estimator *parm = nla_data(opt); struct net_rate_estimator *old, *est; - struct gnet_stats_basic_sync b; + struct gnet_stats_basic_packed b; int intvl_log; if (nla_len(opt) < sizeof(*parm)) @@ -170,8 +164,8 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats, est_fetch_counters(est, &b); if (lock) local_bh_enable(); - est->last_bytes = u64_stats_read(&b.bytes); - est->last_packets = u64_stats_read(&b.packets); + est->last_bytes = b.bytes; + est->last_packets = b.packets; if (lock) spin_lock_bh(lock); @@ -220,9 +214,7 @@ EXPORT_SYMBOL(gen_kill_estimator); * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @lock: lock for statistics and control path - * @running: true if @bstats represents a running qdisc, thus @bstats' - * internal values might change during basic reads. Only used - * if @cpu_bstats is NULL + * @running: qdisc running seqcount (might be NULL) * @opt: rate estimator configuration TLV * * Replaces the configuration of a rate estimator by calling @@ -230,11 +222,11 @@ EXPORT_SYMBOL(gen_kill_estimator); * * Returns 0 on success or a negative error code. */ -int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, - struct gnet_stats_basic_sync __percpu *cpu_bstats, +int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, + struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, - bool running, struct nlattr *opt) + seqcount_t *running, struct nlattr *opt) { return gen_new_estimator(bstats, cpu_bstats, rate_est, lock, running, opt); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index a10335b4ba..e491b083b3 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -18,7 +18,7 @@ #include #include #include -#include + static inline int gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr) @@ -114,112 +114,63 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, } EXPORT_SYMBOL(gnet_stats_start_copy); -/* Must not be inlined, due to u64_stats seqcount_t lockdep key */ -void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b) +static void +__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, + struct gnet_stats_basic_cpu __percpu *cpu) { - u64_stats_set(&b->bytes, 0); - u64_stats_set(&b->packets, 0); - u64_stats_init(&b->syncp); -} -EXPORT_SYMBOL(gnet_stats_basic_sync_init); - -static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, - struct gnet_stats_basic_sync __percpu *cpu) -{ - u64 t_bytes = 0, t_packets = 0; int i; for_each_possible_cpu(i) { - struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); + struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i); unsigned int start; u64 bytes, packets; do { start = u64_stats_fetch_begin_irq(&bcpu->syncp); - bytes = u64_stats_read(&bcpu->bytes); - packets = u64_stats_read(&bcpu->packets); + bytes = bcpu->bstats.bytes; + packets = bcpu->bstats.packets; } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); - t_bytes += bytes; - t_packets += packets; + bstats->bytes += bytes; + bstats->packets += packets; } - _bstats_update(bstats, t_bytes, t_packets); } -void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, - struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b, bool running) +void +__gnet_stats_copy_basic(const seqcount_t *running, + struct gnet_stats_basic_packed *bstats, + struct gnet_stats_basic_cpu __percpu *cpu, + struct gnet_stats_basic_packed *b) { - unsigned int start; - u64 bytes = 0; - u64 packets = 0; - - WARN_ON_ONCE((cpu || running) && in_hardirq()); + unsigned int seq; if (cpu) { - gnet_stats_add_basic_cpu(bstats, cpu); + __gnet_stats_copy_basic_cpu(bstats, cpu); return; } do { if (running) - start = u64_stats_fetch_begin_irq(&b->syncp); - bytes = u64_stats_read(&b->bytes); - packets = u64_stats_read(&b->packets); - } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); - - _bstats_update(bstats, bytes, packets); -} -EXPORT_SYMBOL(gnet_stats_add_basic); - -static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, - struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b, bool running) -{ - unsigned int start; - - if (cpu) { - u64 t_bytes = 0, t_packets = 0; - int i; - - for_each_possible_cpu(i) { - struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); - unsigned int start; - u64 bytes, packets; - - do { - start = u64_stats_fetch_begin_irq(&bcpu->syncp); - bytes = u64_stats_read(&bcpu->bytes); - packets = u64_stats_read(&bcpu->packets); - } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); - - t_bytes += bytes; - t_packets += packets; - } - *ret_bytes = t_bytes; - *ret_packets = t_packets; - return; - } - do { - if (running) - start = u64_stats_fetch_begin_irq(&b->syncp); - *ret_bytes = u64_stats_read(&b->bytes); - *ret_packets = u64_stats_read(&b->packets); - } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); + seq = read_seqcount_begin(running); + bstats->bytes = b->bytes; + bstats->packets = b->packets; + } while (running && read_seqcount_retry(running, seq)); } +EXPORT_SYMBOL(__gnet_stats_copy_basic); static int -___gnet_stats_copy_basic(struct gnet_dump *d, - struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b, - int type, bool running) +___gnet_stats_copy_basic(const seqcount_t *running, + struct gnet_dump *d, + struct gnet_stats_basic_cpu __percpu *cpu, + struct gnet_stats_basic_packed *b, + int type) { - u64 bstats_bytes, bstats_packets; + struct gnet_stats_basic_packed bstats = {0}; - gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running); + __gnet_stats_copy_basic(running, &bstats, cpu, b); if (d->compat_tc_stats && type == TCA_STATS_BASIC) { - d->tc_stats.bytes = bstats_bytes; - d->tc_stats.packets = bstats_packets; + d->tc_stats.bytes = bstats.bytes; + d->tc_stats.packets = bstats.packets; } if (d->tail) { @@ -227,28 +178,24 @@ ___gnet_stats_copy_basic(struct gnet_dump *d, int res; memset(&sb, 0, sizeof(sb)); - sb.bytes = bstats_bytes; - sb.packets = bstats_packets; + sb.bytes = bstats.bytes; + sb.packets = bstats.packets; res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD); - if (res < 0 || sb.packets == bstats_packets) + if (res < 0 || sb.packets == bstats.packets) return res; /* emit 64bit stats only if needed */ - return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets, - sizeof(bstats_packets), TCA_STATS_PAD); + return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets, + sizeof(bstats.packets), TCA_STATS_PAD); } return 0; } /** * gnet_stats_copy_basic - copy basic statistics into statistic TLV + * @running: seqcount_t pointer * @d: dumping handle * @cpu: copy statistic per cpu * @b: basic statistics - * @running: true if @b represents a running qdisc, thus @b's - * internal values might change during basic reads. - * Only used if @cpu is NULL - * - * Context: task; must not be run from IRQ or BH contexts * * Appends the basic statistics to the top level TLV created by * gnet_stats_start_copy(). @@ -257,25 +204,22 @@ ___gnet_stats_copy_basic(struct gnet_dump *d, * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic(struct gnet_dump *d, - struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b, - bool running) +gnet_stats_copy_basic(const seqcount_t *running, + struct gnet_dump *d, + struct gnet_stats_basic_cpu __percpu *cpu, + struct gnet_stats_basic_packed *b) { - return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running); + return ___gnet_stats_copy_basic(running, d, cpu, b, + TCA_STATS_BASIC); } EXPORT_SYMBOL(gnet_stats_copy_basic); /** * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV + * @running: seqcount_t pointer * @d: dumping handle * @cpu: copy statistic per cpu * @b: basic statistics - * @running: true if @b represents a running qdisc, thus @b's - * internal values might change during basic reads. - * Only used if @cpu is NULL - * - * Context: task; must not be run from IRQ or BH contexts * * Appends the basic statistics to the top level TLV created by * gnet_stats_start_copy(). @@ -284,12 +228,13 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic_hw(struct gnet_dump *d, - struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b, - bool running) +gnet_stats_copy_basic_hw(const seqcount_t *running, + struct gnet_dump *d, + struct gnet_stats_basic_cpu __percpu *cpu, + struct gnet_stats_basic_packed *b) { - return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running); + return ___gnet_stats_copy_basic(running, d, cpu, b, + TCA_STATS_BASIC_HW); } EXPORT_SYMBOL(gnet_stats_copy_basic_hw); @@ -337,15 +282,16 @@ gnet_stats_copy_rate_est(struct gnet_dump *d, } EXPORT_SYMBOL(gnet_stats_copy_rate_est); -static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *q) +static void +__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *q) { int i; for_each_possible_cpu(i) { const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); - qstats->qlen += qcpu->backlog; + qstats->qlen = 0; qstats->backlog += qcpu->backlog; qstats->drops += qcpu->drops; qstats->requeues += qcpu->requeues; @@ -353,21 +299,24 @@ static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats, } } -void gnet_stats_add_queue(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *cpu, - const struct gnet_stats_queue *q) +void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *cpu, + const struct gnet_stats_queue *q, + __u32 qlen) { if (cpu) { - gnet_stats_add_queue_cpu(qstats, cpu); + __gnet_stats_copy_queue_cpu(qstats, cpu); } else { - qstats->qlen += q->qlen; - qstats->backlog += q->backlog; - qstats->drops += q->drops; - qstats->requeues += q->requeues; - qstats->overlimits += q->overlimits; + qstats->qlen = q->qlen; + qstats->backlog = q->backlog; + qstats->drops = q->drops; + qstats->requeues = q->requeues; + qstats->overlimits = q->overlimits; } + + qstats->qlen = qlen; } -EXPORT_SYMBOL(gnet_stats_add_queue); +EXPORT_SYMBOL(__gnet_stats_copy_queue); /** * gnet_stats_copy_queue - copy queue statistics into statistics TLV @@ -390,8 +339,7 @@ gnet_stats_copy_queue(struct gnet_dump *d, { struct gnet_stats_queue qstats = {0}; - gnet_stats_add_queue(&qstats, cpu_q, q); - qstats.qlen = qlen; + __gnet_stats_copy_queue(&qstats, cpu_q, q, qlen); if (d->compat_tc_stats) { d->tc_stats.drops = qstats.drops; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index b0f5344d11..1a455847da 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -55,7 +55,7 @@ static void rfc2863_policy(struct net_device *dev) if (operstate == dev->operstate) return; - write_lock(&dev_base_lock); + write_lock_bh(&dev_base_lock); switch(dev->link_mode) { case IF_LINK_MODE_TESTING: @@ -74,7 +74,7 @@ static void rfc2863_policy(struct net_device *dev) dev->operstate = operstate; - write_unlock(&dev_base_lock); + write_unlock_bh(&dev_base_lock); } @@ -109,7 +109,7 @@ static void linkwatch_add_event(struct net_device *dev) spin_lock_irqsave(&lweventlist_lock, flags); if (list_empty(&dev->link_watch_list)) { list_add_tail(&dev->link_watch_list, &lweventlist); - dev_hold_track(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); + dev_hold(dev); } spin_unlock_irqrestore(&lweventlist_lock, flags); } @@ -166,9 +166,6 @@ static void linkwatch_do_dev(struct net_device *dev) netdev_state_change(dev); } - /* Note: our callers are responsible for - * calling netdev_tracker_free(). - */ dev_put(dev); } @@ -212,10 +209,6 @@ static void __linkwatch_run_queue(int urgent_only) list_add_tail(&dev->link_watch_list, &lweventlist); continue; } - /* We must free netdev tracker under - * the spinlock protection. - */ - netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); spin_unlock_irq(&lweventlist_lock); linkwatch_do_dev(dev); do_dev--; @@ -239,10 +232,6 @@ void linkwatch_forget_dev(struct net_device *dev) if (!list_empty(&dev->link_watch_list)) { list_del_init(&dev->link_watch_list); clean = 1; - /* We must release netdev tracker under - * the spinlock protection. - */ - netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); } spin_unlock_irqrestore(&lweventlist_lock, flags); if (clean) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 349480ef68..2f7940bcf7 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -2,7 +2,6 @@ /* Copyright (c) 2016 Thomas Graf */ -#include #include #include #include diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ec0bf737b0..ff049733cc 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -122,8 +122,6 @@ static void neigh_mark_dead(struct neighbour *n) list_del_init(&n->gc_list); atomic_dec(&n->tbl->gc_entries); } - if (!list_empty(&n->managed_list)) - list_del_init(&n->managed_list); } static void neigh_update_gc_list(struct neighbour *n) @@ -132,6 +130,7 @@ static void neigh_update_gc_list(struct neighbour *n) write_lock_bh(&n->tbl->lock); write_lock(&n->lock); + if (n->dead) goto out; @@ -150,59 +149,32 @@ static void neigh_update_gc_list(struct neighbour *n) list_add_tail(&n->gc_list, &n->tbl->gc_list); atomic_inc(&n->tbl->gc_entries); } + out: write_unlock(&n->lock); write_unlock_bh(&n->tbl->lock); } -static void neigh_update_managed_list(struct neighbour *n) +static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, + int *notify) { - bool on_managed_list, add_to_managed; - - write_lock_bh(&n->tbl->lock); - write_lock(&n->lock); - if (n->dead) - goto out; - - add_to_managed = n->flags & NTF_MANAGED; - on_managed_list = !list_empty(&n->managed_list); - - if (!add_to_managed && on_managed_list) - list_del_init(&n->managed_list); - else if (add_to_managed && !on_managed_list) - list_add_tail(&n->managed_list, &n->tbl->managed_list); -out: - write_unlock(&n->lock); - write_unlock_bh(&n->tbl->lock); -} - -static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify, - bool *gc_update, bool *managed_update) -{ - u32 ndm_flags, old_flags = neigh->flags; + bool rc = false; + u8 ndm_flags; if (!(flags & NEIGH_UPDATE_F_ADMIN)) - return; + return rc; - ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; - ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0; - - if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) { + ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; + if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { if (ndm_flags & NTF_EXT_LEARNED) neigh->flags |= NTF_EXT_LEARNED; else neigh->flags &= ~NTF_EXT_LEARNED; + rc = true; *notify = 1; - *gc_update = true; - } - if ((old_flags ^ ndm_flags) & NTF_MANAGED) { - if (ndm_flags & NTF_MANAGED) - neigh->flags |= NTF_MANAGED; - else - neigh->flags &= ~NTF_MANAGED; - *notify = 1; - *managed_update = true; } + + return rc; } static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, @@ -407,7 +379,7 @@ EXPORT_SYMBOL(neigh_ifdown); static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev, - u32 flags, bool exempt_from_gc) + u8 flags, bool exempt_from_gc) { struct neighbour *n = NULL; unsigned long now = jiffies; @@ -450,7 +422,6 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, refcount_set(&n->refcnt, 1); n->dead = 1; INIT_LIST_HEAD(&n->gc_list); - INIT_LIST_HEAD(&n->managed_list); atomic_inc(&tbl->entries); out: @@ -607,7 +578,7 @@ EXPORT_SYMBOL(neigh_lookup_nodev); static struct neighbour * ___neigh_create(struct neigh_table *tbl, const void *pkey, - struct net_device *dev, u32 flags, + struct net_device *dev, u8 flags, bool exempt_from_gc, bool want_ref) { u32 hash_val, key_len = tbl->key_len; @@ -624,7 +595,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, memcpy(n->primary_key, pkey, key_len); n->dev = dev; - dev_hold_track(dev, &n->dev_tracker, GFP_ATOMIC); + dev_hold(dev); /* Protocol specific setup. */ if (tbl->constructor && (error = tbl->constructor(n)) < 0) { @@ -679,8 +650,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, n->dead = 0; if (!exempt_from_gc) list_add_tail(&n->gc_list, &n->tbl->gc_list); - if (n->flags & NTF_MANAGED) - list_add_tail(&n->managed_list, &n->tbl->managed_list); + if (want_ref) neigh_hold(n); rcu_assign_pointer(n->next, @@ -770,10 +740,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; - dev_hold_track(dev, &n->dev_tracker, GFP_KERNEL); + dev_hold(dev); if (tbl->pconstructor && tbl->pconstructor(n)) { - dev_put_track(dev, &n->dev_tracker); + dev_put(dev); kfree(n); n = NULL; goto out; @@ -805,7 +775,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); - dev_put_track(n->dev, &n->dev_tracker); + dev_put(n->dev); kfree(n); return 0; } @@ -838,7 +808,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, n->next = NULL; if (tbl->pdestructor) tbl->pdestructor(n); - dev_put_track(n->dev, &n->dev_tracker); + dev_put(n->dev); kfree(n); } return -ENOENT; @@ -879,7 +849,7 @@ void neigh_destroy(struct neighbour *neigh) if (dev->netdev_ops->ndo_neigh_destroy) dev->netdev_ops->ndo_neigh_destroy(dev, neigh); - dev_put_track(dev, &neigh->dev_tracker); + dev_put(dev); neigh_parms_put(neigh->parms); neigh_dbg(2, "neigh %p is destroyed\n", neigh); @@ -1133,8 +1103,7 @@ static void neigh_timer_handler(struct timer_list *t) neigh_release(neigh); } -int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, - const bool immediate_ok) +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { int rc; bool immediate_probe = false; @@ -1155,17 +1124,12 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh_del_timer(neigh); - neigh->nud_state = NUD_INCOMPLETE; + neigh->nud_state = NUD_INCOMPLETE; neigh->updated = now; - if (!immediate_ok) { - next = now + 1; - } else { - immediate_probe = true; - next = now + max(NEIGH_VAR(neigh->parms, - RETRANS_TIME), - HZ / 100); - } + next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), + HZ/100); neigh_add_timer(neigh, next); + immediate_probe = true; } else { neigh->nud_state = NUD_FAILED; neigh->updated = jiffies; @@ -1240,6 +1204,8 @@ static void neigh_update_hhs(struct neighbour *neigh) } } + + /* Generic update routine. -- lladdr is new lladdr or NULL, if it is not supplied. -- new is new state. @@ -1251,7 +1217,6 @@ static void neigh_update_hhs(struct neighbour *neigh) if it is different. NEIGH_UPDATE_F_ADMIN means that the change is administrative. NEIGH_UPDATE_F_USE means that the entry is user triggered. - NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed. NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing NTF_ROUTER flag. NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as @@ -1259,15 +1224,17 @@ static void neigh_update_hhs(struct neighbour *neigh) Caller MUST hold reference count on the entry. */ + static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid, struct netlink_ext_ack *extack) { - bool gc_update = false, managed_update = false; - int update_isrouter = 0; - struct net_device *dev; - int err, notify = 0; + bool ext_learn_change = false; u8 old; + int err; + int notify = 0; + struct net_device *dev; + int update_isrouter = 0; trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); @@ -1286,8 +1253,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, (old & (NUD_NOARP | NUD_PERMANENT))) goto out; - neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update); - if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) { + ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); + if (flags & NEIGH_UPDATE_F_USE) { new = old & ~NUD_PERMANENT; neigh->nud_state = new; err = 0; @@ -1437,13 +1404,15 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, if (update_isrouter) neigh_update_is_router(neigh, flags, ¬ify); write_unlock_bh(&neigh->lock); - if (((new ^ old) & NUD_PERMANENT) || gc_update) + + if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) neigh_update_gc_list(neigh); - if (managed_update) - neigh_update_managed_list(neigh); + if (notify) neigh_update_notify(neigh, nlmsg_pid); + trace_neigh_update_done(neigh, err); + return err; } @@ -1569,20 +1538,6 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) } EXPORT_SYMBOL(neigh_direct_output); -static void neigh_managed_work(struct work_struct *work) -{ - struct neigh_table *tbl = container_of(work, struct neigh_table, - managed_work.work); - struct neighbour *neigh; - - write_lock_bh(&tbl->lock); - list_for_each_entry(neigh, &tbl->managed_list, managed_list) - neigh_event_send_probe(neigh, NULL, false); - queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, - NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME)); - write_unlock_bh(&tbl->lock); -} - static void neigh_proxy_process(struct timer_list *t) { struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); @@ -1671,13 +1626,13 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, refcount_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); - dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL); + dev_hold(dev); p->dev = dev; write_pnet(&p->net, net); p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { - dev_put_track(dev, &p->dev_tracker); + dev_put(dev); kfree(p); return NULL; } @@ -1708,7 +1663,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) list_del(&parms->list); parms->dead = 1; write_unlock_bh(&tbl->lock); - dev_put_track(parms->dev, &parms->dev_tracker); + dev_put(parms->dev); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); } EXPORT_SYMBOL(neigh_parms_release); @@ -1729,8 +1684,6 @@ void neigh_table_init(int index, struct neigh_table *tbl) INIT_LIST_HEAD(&tbl->parms_list); INIT_LIST_HEAD(&tbl->gc_list); - INIT_LIST_HEAD(&tbl->managed_list); - list_add(&tbl->parms.list, &tbl->parms_list); write_pnet(&tbl->parms.net, &init_net); refcount_set(&tbl->parms.refcnt, 1); @@ -1762,13 +1715,9 @@ void neigh_table_init(int index, struct neigh_table *tbl) WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); rwlock_init(&tbl->lock); - INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, tbl->parms.reachable_time); - INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work); - queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0); - timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); skb_queue_head_init_class(&tbl->proxy_queue, &neigh_table_proxy_queue_class); @@ -1784,7 +1733,6 @@ int neigh_table_clear(int index, struct neigh_table *tbl) { neigh_tables[index] = NULL; /* It is not clean... Fix it to unload IPv6 module safely */ - cancel_delayed_work_sync(&tbl->managed_work); cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); pneigh_queue_purge(&tbl->proxy_queue); @@ -1840,7 +1788,6 @@ const struct nla_policy nda_policy[NDA_MAX+1] = { [NDA_MASTER] = { .type = NLA_U32 }, [NDA_PROTOCOL] = { .type = NLA_U8 }, [NDA_NH_ID] = { .type = NLA_U32 }, - [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK), [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED }, }; @@ -1913,7 +1860,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | - NEIGH_UPDATE_F_OVERRIDE_ISROUTER; + NEIGH_UPDATE_F_OVERRIDE_ISROUTER; struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; @@ -1922,7 +1869,6 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, struct neighbour *neigh; void *dst, *lladdr; u8 protocol = 0; - u32 ndm_flags; int err; ASSERT_RTNL(); @@ -1938,15 +1884,6 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, } ndm = nlmsg_data(nlh); - ndm_flags = ndm->ndm_flags; - if (tb[NDA_FLAGS_EXT]) { - u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]); - - BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE < - (sizeof(ndm->ndm_flags) * BITS_PER_BYTE + - hweight32(NTF_EXT_MASK))); - ndm_flags |= (ext << NTF_EXT_SHIFT); - } if (ndm->ndm_ifindex) { dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { @@ -1974,18 +1911,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[NDA_PROTOCOL]) protocol = nla_get_u8(tb[NDA_PROTOCOL]); - if (ndm_flags & NTF_PROXY) { - struct pneigh_entry *pn; - if (ndm_flags & NTF_MANAGED) { - NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination"); - goto out; - } + if (ndm->ndm_flags & NTF_PROXY) { + struct pneigh_entry *pn; err = -ENOBUFS; pn = pneigh_lookup(tbl, net, dst, dev, 1); if (pn) { - pn->flags = ndm_flags; + pn->flags = ndm->ndm_flags; if (protocol) pn->protocol = protocol; err = 0; @@ -2005,23 +1938,17 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, neigh = neigh_lookup(tbl, dst, dev); if (neigh == NULL) { - bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT; - bool exempt_from_gc = ndm_permanent || - ndm_flags & NTF_EXT_LEARNED; + bool exempt_from_gc; if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { err = -ENOENT; goto out; } - if (ndm_permanent && (ndm_flags & NTF_MANAGED)) { - NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry"); - err = -EINVAL; - goto out; - } + exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || + ndm->ndm_flags & NTF_EXT_LEARNED; neigh = ___neigh_create(tbl, dst, dev, - ndm_flags & - (NTF_EXT_LEARNED | NTF_MANAGED), + ndm->ndm_flags & NTF_EXT_LEARNED, exempt_from_gc, true); if (IS_ERR(neigh)) { err = PTR_ERR(neigh); @@ -2041,18 +1968,16 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (protocol) neigh->protocol = protocol; - if (ndm_flags & NTF_EXT_LEARNED) + if (ndm->ndm_flags & NTF_EXT_LEARNED) flags |= NEIGH_UPDATE_F_EXT_LEARNED; - if (ndm_flags & NTF_ROUTER) + if (ndm->ndm_flags & NTF_ROUTER) flags |= NEIGH_UPDATE_F_ISROUTER; - if (ndm_flags & NTF_MANAGED) - flags |= NEIGH_UPDATE_F_MANAGED; - if (ndm_flags & NTF_USE) + if (ndm->ndm_flags & NTF_USE) flags |= NEIGH_UPDATE_F_USE; err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, NETLINK_CB(skb).portid, extack); - if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) { + if (!err && ndm->ndm_flags & NTF_USE) { neigh_event_send(neigh, NULL); err = 0; } @@ -2507,7 +2432,6 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, u32 pid, u32 seq, int type, unsigned int flags) { - u32 neigh_flags, neigh_flags_ext; unsigned long now = jiffies; struct nda_cacheinfo ci; struct nlmsghdr *nlh; @@ -2517,14 +2441,11 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, if (nlh == NULL) return -EMSGSIZE; - neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT; - neigh_flags = neigh->flags & NTF_OLD_MASK; - ndm = nlmsg_data(nlh); ndm->ndm_family = neigh->ops->family; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = neigh_flags; + ndm->ndm_flags = neigh->flags; ndm->ndm_type = neigh->type; ndm->ndm_ifindex = neigh->dev->ifindex; @@ -2555,8 +2476,6 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) goto nla_put_failure; - if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext)) - goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -2570,7 +2489,6 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, u32 pid, u32 seq, int type, unsigned int flags, struct neigh_table *tbl) { - u32 neigh_flags, neigh_flags_ext; struct nlmsghdr *nlh; struct ndmsg *ndm; @@ -2578,14 +2496,11 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, if (nlh == NULL) return -EMSGSIZE; - neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT; - neigh_flags = pn->flags & NTF_OLD_MASK; - ndm = nlmsg_data(nlh); ndm->ndm_family = tbl->family; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = neigh_flags | NTF_PROXY; + ndm->ndm_flags = pn->flags | NTF_PROXY; ndm->ndm_type = RTN_UNICAST; ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; ndm->ndm_state = NUD_NONE; @@ -2595,8 +2510,6 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) goto nla_put_failure; - if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext)) - goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -2912,7 +2825,6 @@ static inline size_t neigh_nlmsg_size(void) + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ + nla_total_size(sizeof(struct nda_cacheinfo)) + nla_total_size(4) /* NDA_PROBES */ - + nla_total_size(4) /* NDA_FLAGS_EXT */ + nla_total_size(1); /* NDA_PROTOCOL */ } @@ -2941,7 +2853,6 @@ static inline size_t pneigh_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ - + nla_total_size(4) /* NDA_FLAGS_EXT */ + nla_total_size(1); /* NDA_PROTOCOL */ } @@ -3370,7 +3281,7 @@ EXPORT_SYMBOL(neigh_seq_stop); static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) { - struct neigh_table *tbl = pde_data(file_inode(seq->file)); + struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); int cpu; if (*pos == 0) @@ -3387,7 +3298,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct neigh_table *tbl = pde_data(file_inode(seq->file)); + struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); int cpu; for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { @@ -3407,7 +3318,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v) static int neigh_stat_seq_show(struct seq_file *seq, void *v) { - struct neigh_table *tbl = pde_data(file_inode(seq->file)); + struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { @@ -3776,6 +3687,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, neigh_proc_base_reachable_time; } + /* Don't export sysctls to unprivileged users */ + if (neigh_parms_net(p)->user_ns != &init_user_ns) + t->neigh_vars[0].procname = NULL; + switch (neigh_parms_family(p)) { case AF_INET: p_name = "ipv4"; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index fbddf96620..d7f9ee830d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -213,7 +213,7 @@ static ssize_t speed_show(struct device *dev, if (!rtnl_trylock()) return restart_syscall(); - if (netif_running(netdev) && netif_device_present(netdev)) { + if (netif_running(netdev)) { struct ethtool_link_ksettings cmd; if (!__ethtool_get_link_ksettings(netdev, &cmd)) @@ -488,6 +488,14 @@ static ssize_t proto_down_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct net_device *netdev = to_net_dev(dev); + + /* The check is also done in change_proto_down; this helps returning + * early without hitting the trylock/restart in netdev_store. + */ + if (!netdev->netdev_ops->ndo_change_proto_down) + return -EOPNOTSUPP; + return netdev_store(dev, attr, buf, len, change_proto_down); } NETDEVICE_SHOW_RW(proto_down, fmt_dec); @@ -1004,7 +1012,7 @@ static void rx_queue_release(struct kobject *kobj) #endif memset(kobj, 0, sizeof(*kobj)); - dev_put_track(queue->dev, &queue->dev_tracker); + dev_put(queue->dev); } static const void *rx_queue_namespace(struct kobject *kobj) @@ -1044,7 +1052,7 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) /* Kobject_put later will trigger rx_queue_release call which * decreases dev refcount: Take that reference here */ - dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL); + dev_hold(queue->dev); kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, @@ -1193,7 +1201,11 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = { static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf) { - unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout); + unsigned long trans_timeout; + + spin_lock_irq(&queue->_xmit_lock); + trans_timeout = queue->trans_timeout; + spin_unlock_irq(&queue->_xmit_lock); return sprintf(buf, fmt_ulong, trans_timeout); } @@ -1440,7 +1452,7 @@ static ssize_t xps_queue_show(struct net_device *dev, unsigned int index, for (i = map->len; i--;) { if (map->queues[i] == index) { - __set_bit(j, mask); + set_bit(j, mask); break; } } @@ -1607,7 +1619,7 @@ static void netdev_queue_release(struct kobject *kobj) struct netdev_queue *queue = to_netdev_queue(kobj); memset(kobj, 0, sizeof(*kobj)); - dev_put_track(queue->dev, &queue->dev_tracker); + dev_put(queue->dev); } static const void *netdev_queue_namespace(struct kobject *kobj) @@ -1647,7 +1659,7 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) /* Kobject_put later will trigger netdev_queue_release call * which decreases dev refcount: Take that reference here */ - dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL); + dev_hold(queue->dev); kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, @@ -1694,13 +1706,6 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) int i; int error = 0; - /* Tx queue kobjects are allowed to be updated when a device is being - * unregistered, but solely to remove queues from qdiscs. Any path - * adding queues should be fixed. - */ - WARN(dev->reg_state == NETREG_UNREGISTERING && new_num > old_num, - "New queues can't be registered after device unregistration."); - for (i = old_num; i < new_num; i++) { error = netdev_queue_add_kobject(dev, i); if (error) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a5b5bb99c6..9702d2b0d9 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -313,8 +313,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) LIST_HEAD(net_exit_list); refcount_set(&net->ns.count, 1); - ref_tracker_dir_init(&net->refcnt_tracker, 128); - refcount_set(&net->passive, 1); get_random_bytes(&net->hash_mix, sizeof(u32)); preempt_disable(); @@ -639,7 +637,6 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) { - ref_tracker_dir_exit(&net->refcnt_tracker); /* Cleanup the network namespace in process context */ if (llist_add(&net->cleanup_list, &cleanup_list)) queue_work(netns_wq, &net_cleanup_work); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index db724463e7..edfc0f8011 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -776,7 +776,7 @@ int netpoll_setup(struct netpoll *np) err = __netpoll_setup(np, ndev); if (err) goto put; - netdev_tracker_alloc(ndev, &np->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return 0; @@ -853,7 +853,7 @@ void netpoll_cleanup(struct netpoll *np) if (!np->dev) goto out; __netpoll_cleanup(np); - dev_put_track(np->dev, &np->dev_tracker); + dev_put(np->dev); np->dev = NULL; out: rtnl_unlock(); diff --git a/net/core/of_net.c b/net/core/of_net.c index f1a9bf7578..dbac3a172a 100644 --- a/net/core/of_net.c +++ b/net/core/of_net.c @@ -143,28 +143,3 @@ int of_get_mac_address(struct device_node *np, u8 *addr) return of_get_mac_addr_nvmem(np, addr); } EXPORT_SYMBOL(of_get_mac_address); - -/** - * of_get_ethdev_address() - * @np: Caller's Device Node - * @dev: Pointer to netdevice which address will be updated - * - * Search the device tree for the best MAC address to use. - * If found set @dev->dev_addr to that address. - * - * See documentation of of_get_mac_address() for more information on how - * the best address is determined. - * - * Return: 0 on success and errno in case of error. - */ -int of_get_ethdev_address(struct device_node *np, struct net_device *dev) -{ - u8 addr[ETH_ALEN]; - int ret; - - ret = of_get_mac_address(np, addr); - if (!ret) - eth_hw_addr_set(dev, addr); - return ret; -} -EXPORT_SYMBOL(of_get_ethdev_address); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index bd62c01a2e..1a6978427d 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -130,6 +130,9 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */ #endif + /* Slower-path: Get pages from locked ring queue */ + spin_lock(&r->consumer_lock); + /* Refill alloc array, but only if NUMA match */ do { page = __ptr_ring_consume(r); @@ -154,6 +157,7 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) if (likely(pool->alloc.count > 0)) page = pool->alloc.cache[--pool->alloc.count]; + spin_unlock(&r->consumer_lock); return page; } @@ -213,8 +217,6 @@ static void page_pool_set_pp_info(struct page_pool *pool, { page->pp = pool; page->pp_magic |= PP_SIGNATURE; - if (pool->p.init_callback) - pool->p.init_callback(page, pool->p.init_arg); } static void page_pool_clear_pp_info(struct page *page) @@ -689,12 +691,10 @@ static void page_pool_release_retry(struct work_struct *wq) schedule_delayed_work(&pool->release_dw, DEFER_TIME); } -void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), - struct xdp_mem_info *mem) +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)) { refcount_inc(&pool->user_cnt); pool->disconnect = disconnect; - pool->xdp_mem_id = mem->id; } void page_pool_destroy(struct page_pool *pool) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 84b62cd7bc..a3d74e2704 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -410,7 +410,6 @@ struct pktgen_dev { * device name (not when the inject is * started as it used to do.) */ - netdevice_tracker dev_tracker; char odevname[32]; struct flow_state *flows; unsigned int cflows; /* Concurrent flows (config) */ @@ -546,7 +545,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, static int pgctrl_open(struct inode *inode, struct file *file) { - return single_open(file, pgctrl_show, pde_data(inode)); + return single_open(file, pgctrl_show, PDE_DATA(inode)); } static const struct proc_ops pktgen_proc_ops = { @@ -1811,7 +1810,7 @@ static ssize_t pktgen_if_write(struct file *file, static int pktgen_if_open(struct inode *inode, struct file *file) { - return single_open(file, pktgen_if_show, pde_data(inode)); + return single_open(file, pktgen_if_show, PDE_DATA(inode)); } static const struct proc_ops pktgen_if_proc_ops = { @@ -1948,7 +1947,7 @@ static ssize_t pktgen_thread_write(struct file *file, static int pktgen_thread_open(struct inode *inode, struct file *file) { - return single_open(file, pktgen_thread_show, pde_data(inode)); + return single_open(file, pktgen_thread_show, PDE_DATA(inode)); } static const struct proc_ops pktgen_thread_proc_ops = { @@ -2100,7 +2099,7 @@ static int pktgen_setup_dev(const struct pktgen_net *pn, /* Clean old setups */ if (pkt_dev->odev) { - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + dev_put(pkt_dev->odev); pkt_dev->odev = NULL; } @@ -2118,7 +2117,6 @@ static int pktgen_setup_dev(const struct pktgen_net *pn, err = -ENETDOWN; } else { pkt_dev->odev = odev; - netdev_tracker_alloc(odev, &pkt_dev->dev_tracker, GFP_KERNEL); return 0; } @@ -3807,7 +3805,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) return add_dev_to_thread(t, pkt_dev); out2: - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + dev_put(pkt_dev->odev); out1: #ifdef CONFIG_XFRM free_SAs(pkt_dev); @@ -3901,7 +3899,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, /* Dis-associate from the interface */ if (pkt_dev->odev) { - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + dev_put(pkt_dev->odev); pkt_dev->odev = NULL; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2fb8eb6791..91d7a5a5a0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -301,7 +301,7 @@ int rtnl_unregister(int protocol, int msgtype) } link = rtnl_dereference(tab[msgindex]); - RCU_INIT_POINTER(tab[msgindex], NULL); + rcu_assign_pointer(tab[msgindex], NULL); rtnl_unlock(); kfree_rcu(link, rcu); @@ -337,7 +337,7 @@ void rtnl_unregister_all(int protocol) if (!link) continue; - RCU_INIT_POINTER(tab[msgindex], NULL); + rcu_assign_pointer(tab[msgindex], NULL); kfree_rcu(link, rcu); } rtnl_unlock(); @@ -842,9 +842,9 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } if (dev->operstate != operstate) { - write_lock(&dev_base_lock); + write_lock_bh(&dev_base_lock); dev->operstate = operstate; - write_unlock(&dev_base_lock); + write_unlock_bh(&dev_base_lock); netdev_state_change(dev); } } @@ -1026,7 +1026,6 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */ - + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ @@ -1731,7 +1730,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || - nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif @@ -1884,7 +1882,6 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED }, [IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), [IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING }, - [IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2304,14 +2301,6 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], } } - if (tb[IFLA_GRO_MAX_SIZE]) { - u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]); - - if (gro_max_size > GRO_MAX_SIZE) { - NL_SET_ERR_MSG(extack, "too big gro_max_size"); - return -EINVAL; - } - } return 0; } @@ -2552,12 +2541,13 @@ static int do_set_proto_down(struct net_device *dev, struct netlink_ext_ack *extack) { struct nlattr *pdreason[IFLA_PROTO_DOWN_REASON_MAX + 1]; + const struct net_device_ops *ops = dev->netdev_ops; unsigned long mask = 0; u32 value; bool proto_down; int err; - if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN)) { + if (!ops->ndo_change_proto_down) { NL_SET_ERR_MSG(extack, "Protodown not supported by device"); return -EOPNOTSUPP; } @@ -2780,16 +2770,7 @@ static int do_setlink(const struct sk_buff *skb, } if (dev->gso_max_segs ^ max_segs) { - netif_set_gso_max_segs(dev, max_segs); - status |= DO_SETLINK_MODIFIED; - } - } - - if (tb[IFLA_GRO_MAX_SIZE]) { - u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]); - - if (dev->gro_max_size ^ gro_max_size) { - netif_set_gro_max_size(dev, gro_max_size); + dev->gso_max_segs = max_segs; status |= DO_SETLINK_MODIFIED; } } @@ -2800,11 +2781,11 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_LINKMODE]) { unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]); - write_lock(&dev_base_lock); + write_lock_bh(&dev_base_lock); if (dev->link_mode ^ value) status |= DO_SETLINK_NOTIFY; dev->link_mode = value; - write_unlock(&dev_base_lock); + write_unlock_bh(&dev_base_lock); } if (tb[IFLA_VFINFO_LIST]) { @@ -3225,8 +3206,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, dev->mtu = mtu; } if (tb[IFLA_ADDRESS]) { - __dev_addr_set(dev, nla_data(tb[IFLA_ADDRESS]), - nla_len(tb[IFLA_ADDRESS])); + memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), + nla_len(tb[IFLA_ADDRESS])); dev->addr_assign_type = NET_ADDR_SET; } if (tb[IFLA_BROADCAST]) @@ -3243,9 +3224,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, if (tb[IFLA_GSO_MAX_SIZE]) netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE])); if (tb[IFLA_GSO_MAX_SEGS]) - netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS])); - if (tb[IFLA_GRO_MAX_SIZE]) - netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE])); + dev->gso_max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]); return dev; } @@ -3829,8 +3808,9 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; + size_t if_info_size; - skb = nlmsg_new(if_nlmsg_size(dev, 0), flags); + skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags); if (skb == NULL) goto errout; @@ -4408,7 +4388,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; if (br_dev != netdev_master_upper_dev_get(dev) && - !netif_is_bridge_master(dev)) + !(dev->priv_flags & IFF_EBRIDGE)) continue; cops = ops; } diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 9b84437744..b5bc680d47 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -19,8 +19,8 @@ #include #include -static siphash_aligned_key_t net_secret; -static siphash_aligned_key_t ts_secret; +static siphash_key_t net_secret __read_mostly; +static siphash_key_t ts_secret __read_mostly; static __always_inline void net_secret_init(void) { diff --git a/net/core/selftests.c b/net/core/selftests.c index acb1ee97bb..9077fa9698 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -15,8 +15,8 @@ #include struct net_packet_attrs { - const unsigned char *src; - const unsigned char *dst; + unsigned char *src; + unsigned char *dst; u32 ip_src; u32 ip_dst; bool tcp; @@ -173,8 +173,8 @@ static int net_test_loopback_validate(struct sk_buff *skb, struct net_device *orig_ndev) { struct net_test_priv *tpriv = pt->af_packet_priv; - const unsigned char *src = tpriv->packet->src; - const unsigned char *dst = tpriv->packet->dst; + unsigned char *src = tpriv->packet->src; + unsigned char *dst = tpriv->packet->dst; struct netsfhdr *shdr; struct ethhdr *ehdr; struct udphdr *uhdr; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b8138c3725..6cb7ec85c9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -70,7 +70,6 @@ #include #include #include -#include #include #include @@ -136,31 +135,34 @@ struct napi_alloc_cache { static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); -void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) +static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask, + unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask); +} + +void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) +{ fragsz = SKB_DATA_ALIGN(fragsz); - return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask); } EXPORT_SYMBOL(__napi_alloc_frag_align); void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { + struct page_frag_cache *nc; void *data; fragsz = SKB_DATA_ALIGN(fragsz); if (in_hardirq() || irqs_disabled()) { - struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); - + nc = this_cpu_ptr(&netdev_alloc_cache); data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); } else { - struct napi_alloc_cache *nc; - local_bh_disable(); - nc = this_cpu_ptr(&napi_alloc_cache); - data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask); local_bh_enable(); } return data; @@ -396,9 +398,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, { struct kmem_cache *cache; struct sk_buff *skb; - unsigned int osize; - bool pfmemalloc; u8 *data; + bool pfmemalloc; cache = (flags & SKB_ALLOC_FCLONE) ? skbuff_fclone_cache : skbuff_head_cache; @@ -430,8 +431,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * Put skb_shared_info exactly at the end of allocated zone, * to allow max possible filling before reallocation. */ - osize = ksize(data); - size = SKB_WITH_OVERHEAD(osize); + size = SKB_WITH_OVERHEAD(ksize(data)); prefetchw(data + size); /* @@ -440,7 +440,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); - __build_skb_around(skb, data, osize); + __build_skb_around(skb, data, 0); skb->pfmemalloc = pfmemalloc; if (flags & SKB_ALLOC_FCLONE) { @@ -681,7 +681,7 @@ static void skb_release_data(struct sk_buff *skb) * while trying to recycle fragments on __skb_frag_unref() we need * to make one SKB responsible for triggering the recycle path. * So disable the recycling bit if an SKB is cloned and we have - * additional references to the fragmented part of the SKB. + * additional references to to the fragmented part of the SKB. * Eventually the last SKB will have the recycling bit set and it's * dataref set to 0, which will trigger the recycling */ @@ -759,23 +759,21 @@ void __kfree_skb(struct sk_buff *skb) EXPORT_SYMBOL(__kfree_skb); /** - * kfree_skb_reason - free an sk_buff with special reason + * kfree_skb - free an sk_buff * @skb: buffer to free - * @reason: reason why this skb is dropped * * Drop a reference to the buffer and free it if the usage count has - * hit zero. Meanwhile, pass the drop reason to 'kfree_skb' - * tracepoint. + * hit zero. */ -void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +void kfree_skb(struct sk_buff *skb) { if (!skb_unref(skb)) return; - trace_kfree_skb(skb, __builtin_return_address(0), reason); + trace_kfree_skb(skb, __builtin_return_address(0)); __kfree_skb(skb); } -EXPORT_SYMBOL(kfree_skb_reason); +EXPORT_SYMBOL(kfree_skb); void kfree_skb_list(struct sk_buff *segs) { @@ -994,10 +992,12 @@ void napi_consume_skb(struct sk_buff *skb, int budget) } EXPORT_SYMBOL(napi_consume_skb); -/* Make sure a field is contained by headers group */ +/* Make sure a field is enclosed inside headers_start/headers_end section */ #define CHECK_SKB_FIELD(field) \ - BUILD_BUG_ON(offsetof(struct sk_buff, field) != \ - offsetof(struct sk_buff, headers.field)); \ + BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ + offsetof(struct sk_buff, headers_start)); \ + BUILD_BUG_ON(offsetof(struct sk_buff, field) > \ + offsetof(struct sk_buff, headers_end)); \ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { @@ -1009,12 +1009,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) __skb_ext_copy(new, old); __nf_copy(new, old, false); - /* Note : this field could be in the headers group. + /* Note : this field could be in headers_start/headers_end section * It is not yet because we do not want to have a 16 bit hole */ new->queue_mapping = old->queue_mapping; - memcpy(&new->headers, &old->headers, sizeof(new->headers)); + memcpy(&new->headers_start, &old->headers_start, + offsetof(struct sk_buff, headers_end) - + offsetof(struct sk_buff, headers_start)); CHECK_SKB_FIELD(protocol); CHECK_SKB_FIELD(csum); CHECK_SKB_FIELD(hash); @@ -2025,30 +2027,6 @@ void *skb_pull(struct sk_buff *skb, unsigned int len) } EXPORT_SYMBOL(skb_pull); -/** - * skb_pull_data - remove data from the start of a buffer returning its - * original position. - * @skb: buffer to use - * @len: amount of data to remove - * - * This function removes data from the start of a buffer, returning - * the memory to the headroom. A pointer to the original data in the buffer - * is returned after checking if there is enough data to pull. Once the - * data has been pulled future pushes will overwrite the old data. - */ -void *skb_pull_data(struct sk_buff *skb, size_t len) -{ - void *data = skb->data; - - if (skb->len < len) - return NULL; - - skb_pull(skb, len); - - return data; -} -EXPORT_SYMBOL(skb_pull_data); - /** * skb_trim - remove end from a buffer * @skb: buffer to alter @@ -3455,9 +3433,8 @@ static inline void skb_split_no_header(struct sk_buff *skb, void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY; - skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags; + skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; skb_zerocopy_clone(skb1, skb, 0); if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); @@ -3876,6 +3853,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, list_skb = list_skb->next; err = 0; + delta_truesize += nskb->truesize; if (skb_shared(nskb)) { tmp = skb_clone(nskb, GFP_ATOMIC); if (tmp) { @@ -3900,7 +3878,6 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, tail = nskb; delta_len += nskb->len; - delta_truesize += nskb->truesize; skb_push(nskb, -skb_network_offset(nskb) + offset); @@ -3941,6 +3918,32 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, } EXPORT_SYMBOL_GPL(skb_segment_list); +int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) +{ + if (unlikely(p->len + skb->len >= 65536)) + return -E2BIG; + + if (NAPI_GRO_CB(p)->last == p) + skb_shinfo(p)->frag_list = skb; + else + NAPI_GRO_CB(p)->last->next = skb; + + skb_pull(skb, skb_gro_offset(skb)); + + NAPI_GRO_CB(p)->last = skb; + NAPI_GRO_CB(p)->count++; + p->data_len += skb->len; + + /* sk owenrship - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; + p->truesize += skb->truesize; + p->len += skb->len; + + NAPI_GRO_CB(skb)->same_flow = 1; + + return 0; +} + /** * skb_segment - Perform protocol segmentation on skb. * @head_skb: buffer to segment @@ -4293,6 +4296,122 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, } EXPORT_SYMBOL_GPL(skb_segment); +int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) +{ + struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); + unsigned int offset = skb_gro_offset(skb); + unsigned int headlen = skb_headlen(skb); + unsigned int len = skb_gro_len(skb); + unsigned int delta_truesize; + unsigned int new_truesize; + struct sk_buff *lp; + + if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) + return -E2BIG; + + lp = NAPI_GRO_CB(p)->last; + pinfo = skb_shinfo(lp); + + if (headlen <= offset) { + skb_frag_t *frag; + skb_frag_t *frag2; + int i = skbinfo->nr_frags; + int nr_frags = pinfo->nr_frags + i; + + if (nr_frags > MAX_SKB_FRAGS) + goto merge; + + offset -= headlen; + pinfo->nr_frags = nr_frags; + skbinfo->nr_frags = 0; + + frag = pinfo->frags + nr_frags; + frag2 = skbinfo->frags + i; + do { + *--frag = *--frag2; + } while (--i); + + skb_frag_off_add(frag, offset); + skb_frag_size_sub(frag, offset); + + /* all fragments truesize : remove (head size + sk_buff) */ + new_truesize = SKB_TRUESIZE(skb_end_offset(skb)); + delta_truesize = skb->truesize - new_truesize; + + skb->truesize = new_truesize; + skb->len -= skb->data_len; + skb->data_len = 0; + + NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE; + goto done; + } else if (skb->head_frag) { + int nr_frags = pinfo->nr_frags; + skb_frag_t *frag = pinfo->frags + nr_frags; + struct page *page = virt_to_head_page(skb->head); + unsigned int first_size = headlen - offset; + unsigned int first_offset; + + if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) + goto merge; + + first_offset = skb->data - + (unsigned char *)page_address(page) + + offset; + + pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; + + __skb_frag_set_page(frag, page); + skb_frag_off_set(frag, first_offset); + skb_frag_size_set(frag, first_size); + + memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); + /* We dont need to clear skbinfo->nr_frags here */ + + new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff)); + delta_truesize = skb->truesize - new_truesize; + skb->truesize = new_truesize; + NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; + goto done; + } + +merge: + /* sk owenrship - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; + delta_truesize = skb->truesize; + if (offset > headlen) { + unsigned int eat = offset - headlen; + + skb_frag_off_add(&skbinfo->frags[0], eat); + skb_frag_size_sub(&skbinfo->frags[0], eat); + skb->data_len -= eat; + skb->len -= eat; + offset = headlen; + } + + __skb_pull(skb, offset); + + if (NAPI_GRO_CB(p)->last == p) + skb_shinfo(p)->frag_list = skb; + else + NAPI_GRO_CB(p)->last->next = skb; + NAPI_GRO_CB(p)->last = skb; + __skb_header_release(skb); + lp = p; + +done: + NAPI_GRO_CB(p)->count++; + p->data_len += len; + p->truesize += delta_truesize; + p->len += len; + if (lp != p) { + lp->data_len += len; + lp->truesize += delta_truesize; + lp->len += len; + } + NAPI_GRO_CB(skb)->same_flow = 1; + return 0; +} + #ifdef CONFIG_SKB_EXTENSIONS #define SKB_EXT_ALIGN_VALUE 8 #define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE) @@ -4310,9 +4429,6 @@ static const u8 skb_ext_type_len[] = { #if IS_ENABLED(CONFIG_MPTCP) [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), #endif -#if IS_ENABLED(CONFIG_MCTP_FLOWS) - [SKB_EXT_MCTP] = SKB_EXT_CHUNKSIZEOF(struct mctp_flow), -#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4329,9 +4445,6 @@ static __always_inline unsigned int skb_ext_total_length(void) #endif #if IS_ENABLED(CONFIG_MPTCP) skb_ext_type_len[SKB_EXT_MPTCP] + -#endif -#if IS_ENABLED(CONFIG_MCTP_FLOWS) - skb_ext_type_len[SKB_EXT_MCTP] + #endif 0; } @@ -4729,7 +4842,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; - if (sk_is_tcp(sk)) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) serr->ee.ee_data -= atomic_read(&sk->sk_tskey); } @@ -4798,7 +4912,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if (tsonly) { #ifdef CONFIG_INET if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && - sk_is_tcp(sk)) { + sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) { skb = tcp_get_timestamping_opt_stats(sk, orig_skb, ack_skb); opt_stats = true; @@ -6403,14 +6518,6 @@ static void skb_ext_put_sp(struct sec_path *sp) } #endif -#ifdef CONFIG_MCTP_FLOWS -static void skb_ext_put_mctp(struct mctp_flow *flow) -{ - if (flow->key) - mctp_key_unref(flow->key); -} -#endif - void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) { struct skb_ext *ext = skb->extensions; @@ -6446,10 +6553,6 @@ void __skb_ext_put(struct skb_ext *ext) if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH)) skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH)); #endif -#ifdef CONFIG_MCTP_FLOWS - if (__skb_ext_exist(ext, SKB_EXT_MCTP)) - skb_ext_put_mctp(skb_ext_get_ptr(ext, SKB_EXT_MCTP)); -#endif kmem_cache_free(skbuff_ext_cache, ext); } diff --git a/net/core/sock.c b/net/core/sock.c index 6eb174805b..deaed1b206 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -144,6 +144,8 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +static void sock_inuse_add(struct net *net, int val); + /** * sk_ns_capable - General socket capability test * @sk: Socket to use a capability on or through @@ -325,10 +327,7 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); noreclaim_flag = memalloc_noreclaim_save(); - ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv, - tcp_v6_do_rcv, - tcp_v4_do_rcv, - sk, skb); + ret = sk->sk_backlog_rcv(sk, skb); memalloc_noreclaim_restore(noreclaim_flag); return ret; @@ -351,7 +350,7 @@ void sk_error_report(struct sock *sk) } EXPORT_SYMBOL(sk_error_report); -int sock_get_timeout(long timeo, void *optval, bool old_timeval) +static int sock_get_timeout(long timeo, void *optval, bool old_timeval) { struct __kernel_sock_timeval tv; @@ -380,11 +379,12 @@ int sock_get_timeout(long timeo, void *optval, bool old_timeval) *(struct __kernel_sock_timeval *)optval = tv; return sizeof(tv); } -EXPORT_SYMBOL(sock_get_timeout); -int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, - sockptr_t optval, int optlen, bool old_timeval) +static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, + bool old_timeval) { + struct __kernel_sock_timeval tv; + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32; @@ -393,8 +393,8 @@ int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, if (copy_from_sockptr(&tv32, optval, sizeof(tv32))) return -EFAULT; - tv->tv_sec = tv32.tv_sec; - tv->tv_usec = tv32.tv_usec; + tv.tv_sec = tv32.tv_sec; + tv.tv_usec = tv32.tv_usec; } else if (old_timeval) { struct __kernel_old_timeval old_tv; @@ -402,28 +402,14 @@ int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, return -EINVAL; if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv))) return -EFAULT; - tv->tv_sec = old_tv.tv_sec; - tv->tv_usec = old_tv.tv_usec; + tv.tv_sec = old_tv.tv_sec; + tv.tv_usec = old_tv.tv_usec; } else { - if (optlen < sizeof(*tv)) + if (optlen < sizeof(tv)) return -EINVAL; - if (copy_from_sockptr(tv, optval, sizeof(*tv))) + if (copy_from_sockptr(&tv, optval, sizeof(tv))) return -EFAULT; } - - return 0; -} -EXPORT_SYMBOL(sock_copy_user_timeval); - -static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, - bool old_timeval) -{ - struct __kernel_sock_timeval tv; - int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval); - - if (err) - return err; - if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) return -EDOM; @@ -875,7 +861,8 @@ int sock_set_timestamping(struct sock *sk, int optname, if (val & SOF_TIMESTAMPING_OPT_ID && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { - if (sk_is_tcp(sk)) { + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return -EINVAL; @@ -962,53 +949,6 @@ void sock_set_mark(struct sock *sk, u32 val) } EXPORT_SYMBOL(sock_set_mark); -static void sock_release_reserved_memory(struct sock *sk, int bytes) -{ - /* Round down bytes to multiple of pages */ - bytes &= ~(SK_MEM_QUANTUM - 1); - - WARN_ON(bytes > sk->sk_reserved_mem); - sk->sk_reserved_mem -= bytes; - sk_mem_reclaim(sk); -} - -static int sock_reserve_memory(struct sock *sk, int bytes) -{ - long allocated; - bool charged; - int pages; - - if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk)) - return -EOPNOTSUPP; - - if (!bytes) - return 0; - - pages = sk_mem_pages(bytes); - - /* pre-charge to memcg */ - charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages, - GFP_KERNEL | __GFP_RETRY_MAYFAIL); - if (!charged) - return -ENOMEM; - - /* pre-charge to forward_alloc */ - allocated = sk_memory_allocated_add(sk, pages); - /* If the system goes into memory pressure with this - * precharge, give up and return error. - */ - if (allocated > sk_prot_mem_limits(sk, 1)) { - sk_memory_allocated_sub(sk, pages); - mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); - return -ENOMEM; - } - sk->sk_forward_alloc += pages << SK_MEM_QUANTUM_SHIFT; - - sk->sk_reserved_mem += pages << SK_MEM_QUANTUM_SHIFT; - - return 0; -} - /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -1137,7 +1077,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname, case SO_PRIORITY: if ((val >= 0 && val <= 6) || - ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) sk->sk_priority = val; else @@ -1283,8 +1222,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, clear_bit(SOCK_PASSSEC, &sock->flags); break; case SO_MARK: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && - !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1374,7 +1312,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname, case SO_ZEROCOPY: if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { - if (!(sk_is_tcp(sk) || + if (!((sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP) || (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP))) ret = -ENOTSUPP; @@ -1430,23 +1369,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ~SOCK_BUF_LOCK_MASK); break; - case SO_RESERVE_MEM: - { - int delta; - - if (val < 0) { - ret = -EINVAL; - break; - } - - delta = val - sk->sk_reserved_mem; - if (delta < 0) - sock_release_reserved_memory(sk, -delta); - else - ret = sock_reserve_memory(sk, delta); - break; - } - default: ret = -ENOPROTOOPT; break; @@ -1830,10 +1752,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK; break; - case SO_RESERVE_MEM: - v.val = sk->sk_reserved_mem; - break; - default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -1985,7 +1903,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_lock_init(sk); sk->sk_net_refcnt = kern ? 0 : 1; if (likely(sk->sk_net_refcnt)) { - get_net_track(net, &sk->ns_tracker, priority); + get_net(net); sock_inuse_add(net, 1); } @@ -2041,7 +1959,7 @@ static void __sk_destruct(struct rcu_head *head) put_pid(sk->sk_peer_pid); if (likely(sk->sk_net_refcnt)) - put_net_track(sock_net(sk), &sk->ns_tracker); + put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } @@ -2049,9 +1967,6 @@ void sk_destruct(struct sock *sk) { bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); - WARN_ON_ONCE(!llist_empty(&sk->defer_list)); - sk_defer_free_flush(sk); - if (rcu_access_pointer(sk->sk_reuseport_cb)) { reuseport_detach_sock(sk); use_call_rcu = true; @@ -2131,7 +2046,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* SANITY */ if (likely(newsk->sk_net_refcnt)) { - get_net_track(sock_net(newsk), &newsk->ns_tracker, priority); + get_net(sock_net(newsk)); sock_inuse_add(sock_net(newsk), 1); } sk_node_init(&newsk->sk_node); @@ -2152,7 +2067,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_dst_pending_confirm = 0; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; - newsk->sk_reserved_mem = 0; atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; @@ -2252,22 +2166,17 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) u32 max_segs = 1; sk_dst_set(sk, dst); - sk->sk_route_caps = dst->dev->features; - if (sk_is_tcp(sk)) - sk->sk_route_caps |= NETIF_F_GSO; + sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps; if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; - if (unlikely(sk->sk_gso_disabled)) - sk->sk_route_caps &= ~NETIF_F_GSO_MASK; + sk->sk_route_caps &= ~sk->sk_route_nocaps; if (sk_can_gso(sk)) { if (dst->header_len && !xfrm_dst_offload_ok(dst)) { sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; - /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */ - sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size); - /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ - max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1); + sk->sk_gso_max_size = dst->dev->gso_max_size; + max_segs = max_t(u32, dst->dev->gso_max_segs, 1); } } sk->sk_gso_max_segs = max_segs; @@ -3297,7 +3206,7 @@ void lock_sock_nested(struct sock *sk, int subclass) might_sleep(); spin_lock_bh(&sk->sk_lock.slock); - if (sock_owned_by_user_nocheck(sk)) + if (sk->sk_lock.owned) __lock_sock(sk); sk->sk_lock.owned = 1; spin_unlock_bh(&sk->sk_lock.slock); @@ -3328,7 +3237,7 @@ bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) might_sleep(); spin_lock_bh(&sk->sk_lock.slock); - if (!sock_owned_by_user_nocheck(sk)) { + if (!sk->sk_lock.owned) { /* * Fast path return with bottom halves disabled and * sock::sk_lock.slock held. @@ -3543,8 +3452,19 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) } #ifdef CONFIG_PROC_FS +#define PROTO_INUSE_NR 64 /* should be enough for the first time */ +struct prot_inuse { + int val[PROTO_INUSE_NR]; +}; + static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); +void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) +{ + __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val); +} +EXPORT_SYMBOL_GPL(sock_prot_inuse_add); + int sock_prot_inuse_get(struct net *net, struct proto *prot) { int cpu, idx = prot->inuse_idx; @@ -3557,12 +3477,17 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot) } EXPORT_SYMBOL_GPL(sock_prot_inuse_get); +static void sock_inuse_add(struct net *net, int val) +{ + this_cpu_add(*net->core.sock_inuse, val); +} + int sock_inuse_get(struct net *net) { int cpu, res = 0; for_each_possible_cpu(cpu) - res += per_cpu_ptr(net->core.prot_inuse, cpu)->all; + res += *per_cpu_ptr(net->core.sock_inuse, cpu); return res; } @@ -3574,12 +3499,22 @@ static int __net_init sock_inuse_init_net(struct net *net) net->core.prot_inuse = alloc_percpu(struct prot_inuse); if (net->core.prot_inuse == NULL) return -ENOMEM; + + net->core.sock_inuse = alloc_percpu(int); + if (net->core.sock_inuse == NULL) + goto out; + return 0; + +out: + free_percpu(net->core.prot_inuse); + return -ENOMEM; } static void __net_exit sock_inuse_exit_net(struct net *net) { free_percpu(net->core.prot_inuse); + free_percpu(net->core.sock_inuse); } static struct pernet_operations net_inuse_ops = { @@ -3625,6 +3560,9 @@ static inline void release_proto_idx(struct proto *prot) { } +static void sock_inuse_add(struct net *net, int val) +{ +} #endif static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index f7cf74cdd3..c9c45b935f 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -1,6 +1,5 @@ /* License: GPL */ -#include #include #include #include diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 1827669eed..8288b5382f 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -521,6 +521,12 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops) ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB; } +static bool sk_is_tcp(const struct sock *sk) +{ + return sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP; +} + static bool sock_map_redirect_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) @@ -1569,7 +1575,7 @@ static struct bpf_iter_reg sock_map_iter_reg = { .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__sockmap, key), - PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY }, + PTR_TO_RDONLY_BUF_OR_NULL }, { offsetof(struct bpf_iter__sockmap, sk), PTR_TO_BTF_ID_OR_NULL }, }, diff --git a/net/core/stream.c b/net/core/stream.c index 06b36c730c..a166a32b41 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -199,7 +199,7 @@ void sk_stream_kill_queues(struct sock *sk) WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ - sk_mem_reclaim_final(sk); + sk_mem_reclaim(sk); WARN_ON(sk->sk_wmem_queued); WARN_ON(sk->sk_forward_alloc); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7b4d485aac..5f88526ad6 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -6,7 +6,6 @@ * Added /proc/sys/net/core directory entry (empty =) ). [MS] */ -#include #include #include #include diff --git a/net/core/xdp.c b/net/core/xdp.c index 7aba355049..cc92ccb384 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -110,15 +110,20 @@ static void mem_allocator_disconnect(void *allocator) mutex_unlock(&mem_id_lock); } -void xdp_unreg_mem_model(struct xdp_mem_info *mem) +void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) { struct xdp_mem_allocator *xa; - int type = mem->type; - int id = mem->id; + int type = xdp_rxq->mem.type; + int id = xdp_rxq->mem.id; /* Reset mem info to defaults */ - mem->id = 0; - mem->type = 0; + xdp_rxq->mem.id = 0; + xdp_rxq->mem.type = 0; + + if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { + WARN(1, "Missing register, driver bug"); + return; + } if (id == 0) return; @@ -130,17 +135,6 @@ void xdp_unreg_mem_model(struct xdp_mem_info *mem) rcu_read_unlock(); } } -EXPORT_SYMBOL_GPL(xdp_unreg_mem_model); - -void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) -{ - if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { - WARN(1, "Missing register, driver bug"); - return; - } - - xdp_unreg_mem_model(&xdp_rxq->mem); -} EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) @@ -149,6 +143,8 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) if (xdp_rxq->reg_state == REG_STATE_UNUSED) return; + WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); + xdp_rxq_info_unreg_mem_model(xdp_rxq); xdp_rxq->reg_state = REG_STATE_UNREGISTERED; @@ -165,11 +161,6 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, struct net_device *dev, u32 queue_index, unsigned int napi_id) { - if (!dev) { - WARN(1, "Missing net_device from driver"); - return -ENODEV; - } - if (xdp_rxq->reg_state == REG_STATE_UNUSED) { WARN(1, "Driver promised not to register this"); return -EINVAL; @@ -180,6 +171,11 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, xdp_rxq_info_unreg(xdp_rxq); } + if (!dev) { + WARN(1, "Missing net_device from driver"); + return -ENODEV; + } + /* State either UNREGISTERED or NEW */ xdp_rxq_info_init(xdp_rxq); xdp_rxq->dev = dev; @@ -265,24 +261,28 @@ static bool __is_supported_mem_type(enum xdp_mem_type type) return true; } -static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, - enum xdp_mem_type type, - void *allocator) +int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, + enum xdp_mem_type type, void *allocator) { struct xdp_mem_allocator *xdp_alloc; gfp_t gfp = GFP_KERNEL; int id, errno, ret; void *ptr; - if (!__is_supported_mem_type(type)) - return ERR_PTR(-EOPNOTSUPP); + if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { + WARN(1, "Missing register, driver bug"); + return -EFAULT; + } - mem->type = type; + if (!__is_supported_mem_type(type)) + return -EOPNOTSUPP; + + xdp_rxq->mem.type = type; if (!allocator) { if (type == MEM_TYPE_PAGE_POOL) - return ERR_PTR(-EINVAL); /* Setup time check page_pool req */ - return NULL; + return -EINVAL; /* Setup time check page_pool req */ + return 0; } /* Delay init of rhashtable to save memory if feature isn't used */ @@ -292,13 +292,13 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, mutex_unlock(&mem_id_lock); if (ret < 0) { WARN_ON(1); - return ERR_PTR(ret); + return ret; } } xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); if (!xdp_alloc) - return ERR_PTR(-ENOMEM); + return -ENOMEM; mutex_lock(&mem_id_lock); id = __mem_id_cyclic_get(gfp); @@ -306,61 +306,31 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, errno = id; goto err; } - mem->id = id; - xdp_alloc->mem = *mem; + xdp_rxq->mem.id = id; + xdp_alloc->mem = xdp_rxq->mem; xdp_alloc->allocator = allocator; /* Insert allocator into ID lookup table */ ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); if (IS_ERR(ptr)) { - ida_simple_remove(&mem_id_pool, mem->id); - mem->id = 0; + ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id); + xdp_rxq->mem.id = 0; errno = PTR_ERR(ptr); goto err; } if (type == MEM_TYPE_PAGE_POOL) - page_pool_use_xdp_mem(allocator, mem_allocator_disconnect, mem); + page_pool_use_xdp_mem(allocator, mem_allocator_disconnect); mutex_unlock(&mem_id_lock); - return xdp_alloc; -err: - mutex_unlock(&mem_id_lock); - kfree(xdp_alloc); - return ERR_PTR(errno); -} - -int xdp_reg_mem_model(struct xdp_mem_info *mem, - enum xdp_mem_type type, void *allocator) -{ - struct xdp_mem_allocator *xdp_alloc; - - xdp_alloc = __xdp_reg_mem_model(mem, type, allocator); - if (IS_ERR(xdp_alloc)) - return PTR_ERR(xdp_alloc); - return 0; -} -EXPORT_SYMBOL_GPL(xdp_reg_mem_model); - -int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, - enum xdp_mem_type type, void *allocator) -{ - struct xdp_mem_allocator *xdp_alloc; - - if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { - WARN(1, "Missing register, driver bug"); - return -EFAULT; - } - - xdp_alloc = __xdp_reg_mem_model(&xdp_rxq->mem, type, allocator); - if (IS_ERR(xdp_alloc)) - return PTR_ERR(xdp_alloc); - trace_mem_connect(xdp_alloc, xdp_rxq); return 0; +err: + mutex_unlock(&mem_id_lock); + kfree(xdp_alloc); + return errno; } - EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); /* XDP RX runs under NAPI protection, and in different delivery error diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index b441ab330f..dc4fb699b5 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -2073,8 +2073,52 @@ u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev) } EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask); +static void dcbnl_flush_dev(struct net_device *dev) +{ + struct dcb_app_type *itr, *tmp; + + spin_lock_bh(&dcb_lock); + + list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) { + if (itr->ifindex == dev->ifindex) { + list_del(&itr->list); + kfree(itr); + } + } + + spin_unlock_bh(&dcb_lock); +} + +static int dcbnl_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_UNREGISTER: + if (!dev->dcbnl_ops) + return NOTIFY_DONE; + + dcbnl_flush_dev(dev); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block dcbnl_nb __read_mostly = { + .notifier_call = dcbnl_netdevice_event, +}; + static int __init dcbnl_init(void) { + int err; + + err = register_netdevice_notifier(&dcbnl_nb); + if (err) + return err; + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0); rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a976b4d298..fc44dadc77 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -238,6 +238,17 @@ void dccp_destroy_sock(struct sock *sk) EXPORT_SYMBOL_GPL(dccp_destroy_sock); +static inline int dccp_listen_start(struct sock *sk, int backlog) +{ + struct dccp_sock *dp = dccp_sk(sk); + + dp->dccps_role = DCCP_ROLE_LISTEN; + /* do not start to listen if feature negotiation setup fails */ + if (dccp_feat_finalise_settings(dp)) + return -EPROTO; + return inet_csk_listen_start(sk, backlog); +} + static inline int dccp_need_reset(int state) { return state != DCCP_CLOSED && state != DCCP_LISTEN && @@ -920,17 +931,11 @@ int inet_dccp_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != DCCP_LISTEN) { - struct dccp_sock *dp = dccp_sk(sk); - - dp->dccps_role = DCCP_ROLE_LISTEN; - - /* do not start to listen if feature negotiation setup fails */ - if (dccp_feat_finalise_settings(dp)) { - err = -EPROTO; - goto out; - } - - err = inet_csk_listen_start(sk); + /* + * FIXME: here it probably should be sk->sk_prot->listen_start + * see tcp_listen_start + */ + err = dccp_listen_start(sk, backlog); if (err) goto out; } diff --git a/net/dccp/trace.h b/net/dccp/trace.h index 5a43b3508c..5062421bee 100644 --- a/net/dccp/trace.h +++ b/net/dccp/trace.h @@ -60,7 +60,9 @@ TRACE_EVENT(dccp_probe, __entry->tx_t_ipi = hc->tx_t_ipi; } else { __entry->tx_s = 0; - memset_startat(__entry, 0, tx_rtt); + memset(&__entry->tx_rtt, 0, (void *)&__entry->tx_t_ipi - + (void *)&__entry->tx_rtt + + sizeof(__entry->tx_t_ipi)); } ), diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index c59be5b044..7ab788f41a 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -38,7 +38,6 @@ *******************************************************************************/ #include -#include #include #include #include diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index ee73057529..4a4e3c1774 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -101,6 +101,10 @@ static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, return err; } +static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = { + FRA_GENERIC_POLICY, +}; + static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct dn_fib_rule *r = (struct dn_fib_rule *)rule; @@ -231,6 +235,7 @@ static const struct fib_rules_ops __net_initconst dn_fib_rules_ops_template = { .fill = dn_fib_rule_fill, .flush_cache = dn_fib_rule_flush_cache, .nlgroup = RTNLGRP_DECnet_RULE, + .policy = dn_fib_rule_policy, .owner = THIS_MODULE, .fro_net = &init_net, }; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 8cb87b5067..d8ee15f1c7 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -92,6 +92,13 @@ config NET_DSA_TAG_KSZ Say Y if you want to enable support for tagging frames for the Microchip 8795/9477/9893 families of switches. +config NET_DSA_TAG_RTL4_A + tristate "Tag driver for Realtek 4 byte protocol A tags" + help + Say Y or M if you want to enable support for tagging frames for the + Realtek switches with 4 byte protocol A tags, sich as found in + the Realtek RTL8366RB. + config NET_DSA_TAG_OCELOT tristate "Tag driver for Ocelot family of switches, using NPI port" select PACKING @@ -119,19 +126,6 @@ config NET_DSA_TAG_QCA Say Y or M if you want to enable support for tagging frames for the Qualcomm Atheros QCA8K switches. -config NET_DSA_TAG_RTL4_A - tristate "Tag driver for Realtek 4 byte protocol A tags" - help - Say Y or M if you want to enable support for tagging frames for the - Realtek switches with 4 byte protocol A tags, sich as found in - the Realtek RTL8366RB. - -config NET_DSA_TAG_RTL8_4 - tristate "Tag driver for Realtek 8 byte protocol 4 tags" - help - Say Y or M if you want to enable support for tagging frames for Realtek - switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC. - config NET_DSA_TAG_LAN9303 tristate "Tag driver for SMSC/Microchip LAN9303 family of switches" help diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 9f75820e7c..67ea009f24 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -10,13 +10,12 @@ obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o +obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o -obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o -obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index c43f7446a7..4ff03fb262 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -280,22 +280,23 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, } #ifdef CONFIG_PM_SLEEP -static bool dsa_port_is_initialized(const struct dsa_port *dp) +static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) { + const struct dsa_port *dp = dsa_to_port(ds, p); + return dp->type == DSA_PORT_TYPE_USER && dp->slave; } int dsa_switch_suspend(struct dsa_switch *ds) { - struct dsa_port *dp; - int ret = 0; + int i, ret = 0; /* Suspend slave network devices */ - dsa_switch_for_each_port(dp, ds) { - if (!dsa_port_is_initialized(dp)) + for (i = 0; i < ds->num_ports; i++) { + if (!dsa_is_port_initialized(ds, i)) continue; - ret = dsa_slave_suspend(dp->slave); + ret = dsa_slave_suspend(dsa_to_port(ds, i)->slave); if (ret) return ret; } @@ -309,8 +310,7 @@ EXPORT_SYMBOL_GPL(dsa_switch_suspend); int dsa_switch_resume(struct dsa_switch *ds) { - struct dsa_port *dp; - int ret = 0; + int i, ret = 0; if (ds->ops->resume) ret = ds->ops->resume(ds); @@ -319,11 +319,11 @@ int dsa_switch_resume(struct dsa_switch *ds) return ret; /* Resume slave network devices */ - dsa_switch_for_each_port(dp, ds) { - if (!dsa_port_is_initialized(dp)) + for (i = 0; i < ds->num_ports; i++) { + if (!dsa_is_port_initialized(ds, i)) continue; - ret = dsa_slave_resume(dp->slave); + ret = dsa_slave_resume(dsa_to_port(ds, i)->slave); if (ret) return ret; } @@ -407,7 +407,7 @@ EXPORT_SYMBOL_GPL(dsa_devlink_resource_register); void dsa_devlink_resources_unregister(struct dsa_switch *ds) { - devlink_resources_unregister(ds->devlink); + devlink_resources_unregister(ds->devlink, NULL); } EXPORT_SYMBOL_GPL(dsa_devlink_resources_unregister); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index dcad3100b1..e9911b18bd 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -129,52 +129,35 @@ void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag) } } -struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst, - const struct net_device *br) -{ - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_bridge_dev_get(dp) == br) - return dp->bridge; - - return NULL; -} - static int dsa_bridge_num_find(const struct net_device *bridge_dev) { struct dsa_switch_tree *dst; + struct dsa_port *dp; - list_for_each_entry(dst, &dsa_tree_list, list) { - struct dsa_bridge *bridge; + /* When preparing the offload for a port, it will have a valid + * dp->bridge_dev pointer but a not yet valid dp->bridge_num. + * However there might be other ports having the same dp->bridge_dev + * and a valid dp->bridge_num, so just ignore this port. + */ + list_for_each_entry(dst, &dsa_tree_list, list) + list_for_each_entry(dp, &dst->ports, list) + if (dp->bridge_dev == bridge_dev && + dp->bridge_num != -1) + return dp->bridge_num; - bridge = dsa_tree_bridge_find(dst, bridge_dev); - if (bridge) - return bridge->num; - } - - return 0; + return -1; } -unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max) +int dsa_bridge_num_get(const struct net_device *bridge_dev, int max) { - unsigned int bridge_num = dsa_bridge_num_find(bridge_dev); + int bridge_num = dsa_bridge_num_find(bridge_dev); - /* Switches without FDB isolation support don't get unique - * bridge numbering - */ - if (!max) - return 0; - - if (!bridge_num) { - /* First port that requests FDB isolation or TX forwarding - * offload for this bridge - */ - bridge_num = find_next_zero_bit(&dsa_fwd_offloading_bridges, - DSA_MAX_NUM_OFFLOADING_BRIDGES, - 1); + if (bridge_num < 0) { + /* First port that offloads TX forwarding for this bridge */ + bridge_num = find_first_zero_bit(&dsa_fwd_offloading_bridges, + DSA_MAX_NUM_OFFLOADING_BRIDGES); if (bridge_num >= max) - return 0; + return -1; set_bit(bridge_num, &dsa_fwd_offloading_bridges); } @@ -182,14 +165,13 @@ unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max) return bridge_num; } -void dsa_bridge_num_put(const struct net_device *bridge_dev, - unsigned int bridge_num) +void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num) { - /* Since we refcount bridges, we know that when we call this function - * it is no longer in use, so we can just go ahead and remove it from - * the bit mask. + /* Check if the bridge is still in use, otherwise it is time + * to clean it up so we can reuse this bridge_num later. */ - clear_bit(bridge_num, &dsa_fwd_offloading_bridges); + if (dsa_bridge_num_find(bridge_dev) < 0) + clear_bit(bridge_num, &dsa_fwd_offloading_bridges); } struct dsa_switch *dsa_switch_find(int tree_index, int sw_index) @@ -417,8 +399,11 @@ static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) if (!dsa_port_is_cpu(cpu_dp)) continue; - /* Prefer a local CPU port */ - dsa_switch_for_each_port(dp, cpu_dp->ds) { + list_for_each_entry(dp, &dst->ports, list) { + /* Prefer a local CPU port */ + if (dp->ds != cpu_dp->ds) + continue; + /* Prefer the first local CPU port found */ if (dp->cpu_dp) continue; @@ -451,7 +436,6 @@ static int dsa_port_setup(struct dsa_port *dp) if (dp->setup) return 0; - mutex_init(&dp->addr_lists_lock); INIT_LIST_HEAD(&dp->fdbs); INIT_LIST_HEAD(&dp->mdbs); @@ -561,7 +545,6 @@ static void dsa_port_teardown(struct dsa_port *dp) struct devlink_port *dlp = &dp->devlink_port; struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a, *tmp; - struct net_device *slave; if (!dp->setup) return; @@ -583,11 +566,9 @@ static void dsa_port_teardown(struct dsa_port *dp) dsa_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: - slave = dp->slave; - - if (slave) { + if (dp->slave) { + dsa_slave_destroy(dp->slave); dp->slave = NULL; - dsa_slave_destroy(slave); } break; } @@ -821,16 +802,17 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) { const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; struct dsa_switch_tree *dst = ds->dst; - struct dsa_port *cpu_dp; - int err; + int port, err; if (tag_ops->proto == dst->default_proto) - goto connect; + return 0; + + for (port = 0; port < ds->num_ports; port++) { + if (!dsa_is_cpu_port(ds, port)) + continue; - dsa_switch_for_each_cpu_port(cpu_dp, ds) { rtnl_lock(); - err = ds->ops->change_tag_protocol(ds, cpu_dp->index, - tag_ops->proto); + err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto); rtnl_unlock(); if (err) { dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n", @@ -839,30 +821,7 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) } } -connect: - if (tag_ops->connect) { - err = tag_ops->connect(ds); - if (err) - return err; - } - - if (ds->ops->connect_tag_protocol) { - err = ds->ops->connect_tag_protocol(ds, tag_ops->proto); - if (err) { - dev_err(ds->dev, - "Unable to connect to tag protocol \"%s\": %pe\n", - tag_ops->name, ERR_PTR(err)); - goto disconnect; - } - } - return 0; - -disconnect: - if (tag_ops->disconnect) - tag_ops->disconnect(ds); - - return err; } static int dsa_switch_setup(struct dsa_switch *ds) @@ -891,13 +850,19 @@ static int dsa_switch_setup(struct dsa_switch *ds) dl_priv = devlink_priv(ds->devlink); dl_priv->ds = ds; + err = devlink_register(ds->devlink); + if (err) + goto free_devlink; + /* Setup devlink port instances now, so that the switch * setup() can register regions etc, against the ports */ - dsa_switch_for_each_port(dp, ds) { - err = dsa_port_devlink_setup(dp); - if (err) - goto unregister_devlink_ports; + list_for_each_entry(dp, &ds->dst->ports, list) { + if (dp->ds == ds) { + err = dsa_port_devlink_setup(dp); + if (err) + goto unregister_devlink_ports; + } } err = dsa_switch_register_notifier(ds); @@ -914,6 +879,8 @@ static int dsa_switch_setup(struct dsa_switch *ds) if (err) goto teardown; + devlink_params_publish(ds->devlink); + if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = mdiobus_alloc(); if (!ds->slave_mii_bus) { @@ -929,7 +896,7 @@ static int dsa_switch_setup(struct dsa_switch *ds) } ds->setup = true; - devlink_register(ds->devlink); + return 0; free_slave_mii_bus: @@ -941,10 +908,14 @@ static int dsa_switch_setup(struct dsa_switch *ds) unregister_notifier: dsa_switch_unregister_notifier(ds); unregister_devlink_ports: - dsa_switch_for_each_port(dp, ds) - dsa_port_devlink_teardown(dp); + list_for_each_entry(dp, &ds->dst->ports, list) + if (dp->ds == ds) + dsa_port_devlink_teardown(dp); + devlink_unregister(ds->devlink); +free_devlink: devlink_free(ds->devlink); ds->devlink = NULL; + return err; } @@ -955,23 +926,22 @@ static void dsa_switch_teardown(struct dsa_switch *ds) if (!ds->setup) return; - if (ds->devlink) - devlink_unregister(ds->devlink); - if (ds->slave_mii_bus && ds->ops->phy_read) { mdiobus_unregister(ds->slave_mii_bus); mdiobus_free(ds->slave_mii_bus); ds->slave_mii_bus = NULL; } + dsa_switch_unregister_notifier(ds); + if (ds->ops->teardown) ds->ops->teardown(ds); - dsa_switch_unregister_notifier(ds); - if (ds->devlink) { - dsa_switch_for_each_port(dp, ds) - dsa_port_devlink_teardown(dp); + list_for_each_entry(dp, &ds->dst->ports, list) + if (dp->ds == ds) + dsa_port_devlink_teardown(dp); + devlink_unregister(ds->devlink); devlink_free(ds->devlink); ds->devlink = NULL; } @@ -1006,28 +976,23 @@ static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) dsa_switch_teardown(dp->ds); } -/* Bring shared ports up first, then non-shared ports */ -static int dsa_tree_setup_ports(struct dsa_switch_tree *dst) +static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) { struct dsa_port *dp; - int err = 0; + int err; list_for_each_entry(dp, &dst->ports, list) { - if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) { - err = dsa_port_setup(dp); - if (err) - goto teardown; - } + err = dsa_switch_setup(dp->ds); + if (err) + goto teardown; } list_for_each_entry(dp, &dst->ports, list) { - if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) { - err = dsa_port_setup(dp); - if (err) { - err = dsa_port_reinit_as_unused(dp); - if (err) - goto teardown; - } + err = dsa_port_setup(dp); + if (err) { + err = dsa_port_reinit_as_unused(dp); + if (err) + goto teardown; } } @@ -1036,21 +1001,7 @@ static int dsa_tree_setup_ports(struct dsa_switch_tree *dst) teardown: dsa_tree_teardown_ports(dst); - return err; -} - -static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) -{ - struct dsa_port *dp; - int err = 0; - - list_for_each_entry(dp, &dst->ports, list) { - err = dsa_switch_setup(dp->ds); - if (err) { - dsa_tree_teardown_switches(dst); - break; - } - } + dsa_tree_teardown_switches(dst); return err; } @@ -1060,8 +1011,6 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst) struct dsa_port *dp; int err; - rtnl_lock(); - list_for_each_entry(dp, &dst->ports, list) { if (dsa_port_is_cpu(dp)) { err = dsa_master_setup(dp->master, dp); @@ -1070,8 +1019,6 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst) } } - rtnl_unlock(); - return 0; } @@ -1079,13 +1026,9 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) { struct dsa_port *dp; - rtnl_lock(); - list_for_each_entry(dp, &dst->ports, list) if (dsa_port_is_cpu(dp)) dsa_master_teardown(dp->master); - - rtnl_unlock(); } static int dsa_tree_setup_lags(struct dsa_switch_tree *dst) @@ -1141,13 +1084,9 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (err) goto teardown_switches; - err = dsa_tree_setup_ports(dst); - if (err) - goto teardown_master; - err = dsa_tree_setup_lags(dst); if (err) - goto teardown_ports; + goto teardown_master; dst->setup = true; @@ -1155,11 +1094,10 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) return 0; -teardown_ports: - dsa_tree_teardown_ports(dst); teardown_master: dsa_tree_teardown_master(dst); teardown_switches: + dsa_tree_teardown_ports(dst); dsa_tree_teardown_switches(dst); teardown_cpu_ports: dsa_tree_teardown_cpu_ports(dst); @@ -1176,10 +1114,10 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_lags(dst); - dsa_tree_teardown_ports(dst); - dsa_tree_teardown_master(dst); + dsa_tree_teardown_ports(dst); + dsa_tree_teardown_switches(dst); dsa_tree_teardown_cpu_ports(dst); @@ -1194,37 +1132,6 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dst->setup = false; } -static int dsa_tree_bind_tag_proto(struct dsa_switch_tree *dst, - const struct dsa_device_ops *tag_ops) -{ - const struct dsa_device_ops *old_tag_ops = dst->tag_ops; - struct dsa_notifier_tag_proto_info info; - int err; - - dst->tag_ops = tag_ops; - - /* Notify the switches from this tree about the connection - * to the new tagger - */ - info.tag_ops = tag_ops; - err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_CONNECT, &info); - if (err && err != -EOPNOTSUPP) - goto out_disconnect; - - /* Notify the old tagger about the disconnection from this tree */ - info.tag_ops = old_tag_ops; - dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info); - - return 0; - -out_disconnect: - info.tag_ops = tag_ops; - dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info); - dst->tag_ops = old_tag_ops; - - return err; -} - /* Since the dsa/tagging sysfs device attribute is per master, the assumption * is that all DSA switches within a tree share the same tagger, otherwise * they would have formed disjoint trees (different "dsa,member" values). @@ -1250,23 +1157,20 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, goto out_unlock; list_for_each_entry(dp, &dst->ports, list) { - if (!dsa_port_is_user(dp)) + if (!dsa_is_user_port(dp->ds, dp->index)) continue; if (dp->slave->flags & IFF_UP) goto out_unlock; } - /* Notify the tag protocol change */ info.tag_ops = tag_ops; err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info); - if (err) - return err; - - err = dsa_tree_bind_tag_proto(dst, tag_ops); if (err) goto out_unwind_tagger; + dst->tag_ops = tag_ops; + rtnl_unlock(); return 0; @@ -1284,8 +1188,8 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) struct dsa_switch_tree *dst = ds->dst; struct dsa_port *dp; - dsa_switch_for_each_port(dp, ds) - if (dp->index == index) + list_for_each_entry(dp, &dst->ports, list) + if (dp->ds == ds && dp->index == index) return dp; dp = kzalloc(sizeof(*dp), GFP_KERNEL); @@ -1294,6 +1198,7 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) dp->ds = ds; dp->index = index; + dp->bridge_num = -1; INIT_LIST_HEAD(&dp->list); list_add_tail(&dp->list, &dst->ports); @@ -1475,7 +1380,7 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds, } if (reg >= ds->num_ports) { - dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%u)\n", + dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%zu)\n", port, reg, ds->num_ports); of_node_put(port); err = -EINVAL; @@ -1630,9 +1535,12 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) static void dsa_switch_release_ports(struct dsa_switch *ds) { + struct dsa_switch_tree *dst = ds->dst; struct dsa_port *dp, *next; - dsa_switch_for_each_port_safe(dp, next, ds) { + list_for_each_entry_safe(dp, next, &dst->ports, list) { + if (dp->ds != ds) + continue; list_del(&dp->list); kfree(dp); } @@ -1718,23 +1626,42 @@ EXPORT_SYMBOL_GPL(dsa_unregister_switch); void dsa_switch_shutdown(struct dsa_switch *ds) { struct net_device *master, *slave_dev; + LIST_HEAD(unregister_list); struct dsa_port *dp; mutex_lock(&dsa2_mutex); rtnl_lock(); - dsa_switch_for_each_user_port(dp, ds) { + list_for_each_entry(dp, &ds->dst->ports, list) { + if (dp->ds != ds) + continue; + + if (!dsa_port_is_user(dp)) + continue; + master = dp->cpu_dp->master; slave_dev = dp->slave; netdev_upper_dev_unlink(master, slave_dev); + /* Just unlinking ourselves as uppers of the master is not + * sufficient. When the master net device unregisters, that will + * also call dev_close, which we will catch as NETDEV_GOING_DOWN + * and trigger a dev_close on our own devices (dsa_slave_close). + * In turn, that will call dev_mc_unsync on the master's net + * device. If the master is also a DSA switch port, this will + * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on + * its own master. Lockdep will complain about the fact that + * all cascaded masters have the same dsa_master_addr_list_lock_key, + * which it normally would not do if the cascaded masters would + * be in a proper upper/lower relationship, which we've just + * destroyed. + * To suppress the lockdep warnings, let's actually unregister + * the DSA slave interfaces too, to avoid the nonsensical + * multicast address list synchronization on shutdown. + */ + unregister_netdevice_queue(slave_dev, &unregister_list); } - - /* Disconnect from further netdevice notifiers on the master, - * since netdev_uses_dsa() will now return false. - */ - dsa_switch_for_each_cpu_port(dp, ds) - dp->master->dsa_ptr = NULL; + unregister_netdevice_many(&unregister_list); rtnl_unlock(); mutex_unlock(&dsa2_mutex); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 23c79e91ac..33ab7d7af9 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -8,7 +8,6 @@ #define __DSA_PRIV_H #include -#include #include #include #include @@ -25,6 +24,8 @@ enum { DSA_NOTIFIER_FDB_DEL, DSA_NOTIFIER_HOST_FDB_ADD, DSA_NOTIFIER_HOST_FDB_DEL, + DSA_NOTIFIER_HSR_JOIN, + DSA_NOTIFIER_HSR_LEAVE, DSA_NOTIFIER_LAG_CHANGE, DSA_NOTIFIER_LAG_JOIN, DSA_NOTIFIER_LAG_LEAVE, @@ -36,8 +37,10 @@ enum { DSA_NOTIFIER_VLAN_DEL, DSA_NOTIFIER_MTU, DSA_NOTIFIER_TAG_PROTO, - DSA_NOTIFIER_TAG_PROTO_CONNECT, - DSA_NOTIFIER_TAG_PROTO_DISCONNECT, + DSA_NOTIFIER_MRP_ADD, + DSA_NOTIFIER_MRP_DEL, + DSA_NOTIFIER_MRP_ADD_RING_ROLE, + DSA_NOTIFIER_MRP_DEL_RING_ROLE, DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, }; @@ -49,11 +52,10 @@ struct dsa_notifier_ageing_time_info { /* DSA_NOTIFIER_BRIDGE_* */ struct dsa_notifier_bridge_info { - struct dsa_bridge bridge; + struct net_device *br; int tree_index; int sw_index; int port; - bool tx_fwd_offload; }; /* DSA_NOTIFIER_FDB_* */ @@ -101,6 +103,20 @@ struct dsa_notifier_tag_proto_info { const struct dsa_device_ops *tag_ops; }; +/* DSA_NOTIFIER_MRP_* */ +struct dsa_notifier_mrp_info { + const struct switchdev_obj_mrp *mrp; + int sw_index; + int port; +}; + +/* DSA_NOTIFIER_MRP_* */ +struct dsa_notifier_mrp_ring_role_info { + const struct switchdev_obj_ring_role_mrp *mrp; + int sw_index; + int port; +}; + /* DSA_NOTIFIER_TAG_8021Q_VLAN_* */ struct dsa_notifier_tag_8021q_vlan_info { int tree_index; @@ -123,6 +139,13 @@ struct dsa_switchdev_event_work { bool host_addr; }; +/* DSA_NOTIFIER_HSR_* */ +struct dsa_notifier_hsr_info { + struct net_device *hsr; + int sw_index; + int port; +}; + struct dsa_slave_priv { /* Copy of CPU port xmit for faster access in slave transmit hot path */ struct sk_buff * (*xmit)(struct sk_buff *skb, @@ -234,13 +257,54 @@ int dsa_port_mrp_add_ring_role(const struct dsa_port *dp, const struct switchdev_obj_ring_role_mrp *mrp); int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, const struct switchdev_obj_ring_role_mrp *mrp); -int dsa_port_phylink_create(struct dsa_port *dp); int dsa_port_link_register_of(struct dsa_port *dp); void dsa_port_link_unregister_of(struct dsa_port *dp); int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast); void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast); +extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; + +static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, + const struct net_device *dev) +{ + return dsa_port_to_bridge_port(dp) == dev; +} + +static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, + const struct net_device *bridge_dev) +{ + /* DSA ports connected to a bridge, and event was emitted + * for the bridge. + */ + return dp->bridge_dev == bridge_dev; +} + +/* Returns true if any port of this tree offloads the given net_device */ +static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, + const struct net_device *dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge_port(dp, dev)) + return true; + + return false; +} + +/* Returns true if any port of this tree offloads the given bridge */ +static inline bool dsa_tree_offloads_bridge(struct dsa_switch_tree *dst, + const struct net_device *bridge_dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge(dp, bridge_dev)) + return true; + + return false; +} /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; @@ -280,7 +344,7 @@ dsa_slave_to_master(const struct net_device *dev) static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) { struct dsa_port *dp = dsa_slave_to_port(skb->dev); - struct net_device *br = dsa_port_bridge_dev_get(dp); + struct net_device *br = dp->bridge_dev; struct net_device *dev = skb->dev; struct net_device *upper_dev; u16 vid, pvid, proto; @@ -350,7 +414,7 @@ dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) if (dp->type != DSA_PORT_TYPE_USER) continue; - if (!dp->bridge) + if (!dp->bridge_dev) continue; if (dp->stp_state != BR_STATE_LEARNING && @@ -379,7 +443,7 @@ dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) /* If the ingress port offloads the bridge, we mark the frame as autonomously * forwarded by hardware, so the software bridge doesn't forward in twice, back * to us, because we already did. However, if we're in fallback mode and we do - * software bridging, we are not offloading it, therefore the dp->bridge + * software bridging, we are not offloading it, therefore the dp->bridge_dev * pointer is not populated, and flooding needs to be done by software (we are * effectively operating in standalone ports mode). */ @@ -387,7 +451,7 @@ static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb) { struct dsa_port *dp = dsa_slave_to_port(skb->dev); - skb->offload_fwd_mark = !!(dp->bridge); + skb->offload_fwd_mark = !!(dp->bridge_dev); } /* Helper for removing DSA header tags from packets in the RX path. @@ -481,11 +545,8 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, struct net_device *master, const struct dsa_device_ops *tag_ops, const struct dsa_device_ops *old_tag_ops); -unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max); -void dsa_bridge_num_put(const struct net_device *bridge_dev, - unsigned int bridge_num); -struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst, - const struct net_device *br); +int dsa_bridge_num_get(const struct net_device *bridge_dev, int max); +void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num); /* tag_8021q.c */ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, diff --git a/net/dsa/master.c b/net/dsa/master.c index 880f910b23..e8e1985762 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -260,21 +260,16 @@ static void dsa_netdev_ops_set(struct net_device *dev, dev->dsa_ptr->netdev_ops = ops; } -/* Keep the master always promiscuous if the tagging protocol requires that - * (garbles MAC DA) or if it doesn't support unicast filtering, case in which - * it would revert to promiscuous mode as soon as we call dev_uc_add() on it - * anyway. - */ static void dsa_master_set_promiscuity(struct net_device *dev, int inc) { const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops; - if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_master) + if (!ops->promisc_on_master) return; - ASSERT_RTNL(); - + rtnl_lock(); dev_set_promiscuity(dev, inc); + rtnl_unlock(); } static ssize_t tagging_show(struct device *d, struct device_attribute *attr, @@ -335,13 +330,28 @@ static const struct attribute_group dsa_group = { .attrs = dsa_slave_attrs, }; +static void dsa_master_reset_mtu(struct net_device *dev) +{ + int err; + + rtnl_lock(); + err = dev_set_mtu(dev, ETH_DATA_LEN); + if (err) + netdev_dbg(dev, + "Unable to reset MTU to exclude DSA overheads\n"); + rtnl_unlock(); +} + static struct lock_class_key dsa_master_addr_list_lock_key; int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { + const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops; struct dsa_switch *ds = cpu_dp->ds; struct device_link *consumer_link; - int ret; + int mtu, ret; + + mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops); /* The DSA master must use SET_NETDEV_DEV for this to work. */ consumer_link = device_link_add(ds->dev, dev->dev.parent, @@ -351,6 +361,13 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) "Failed to create a device link to DSA switch %s\n", dev_name(ds->dev)); + rtnl_lock(); + ret = dev_set_mtu(dev, mtu); + rtnl_unlock(); + if (ret) + netdev_warn(dev, "error %d setting MTU to %d to include DSA overhead\n", + ret, mtu); + /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get * sent to the tag format's receive function. @@ -388,6 +405,7 @@ void dsa_master_teardown(struct net_device *dev) sysfs_remove_group(&dev->dev.kobj, &dsa_group); dsa_netdev_ops_set(dev, NULL); dsa_master_ethtool_teardown(dev); + dsa_master_reset_mtu(dev); dsa_master_set_promiscuity(dev, -1); dev->dsa_ptr = NULL; diff --git a/net/dsa/port.c b/net/dsa/port.c index 1a40c52f5a..616330a16d 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -130,7 +130,7 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy) return err; } - if (!dp->bridge) + if (!dp->bridge_dev) dsa_port_set_state_now(dp, BR_STATE_FORWARDING, false); if (dp->pl) @@ -158,7 +158,7 @@ void dsa_port_disable_rt(struct dsa_port *dp) if (dp->pl) phylink_stop(dp->pl); - if (!dp->bridge) + if (!dp->bridge_dev) dsa_port_set_state_now(dp, BR_STATE_DISABLED, false); if (ds->ops->port_disable) @@ -221,7 +221,7 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp, struct netlink_ext_ack *extack) { struct net_device *brport_dev = dsa_port_to_bridge_port(dp); - struct net_device *br = dsa_port_bridge_dev_get(dp); + struct net_device *br = dp->bridge_dev; int err; err = dsa_port_inherit_brport_flags(dp, extack); @@ -270,55 +270,52 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) */ } -static int dsa_port_bridge_create(struct dsa_port *dp, - struct net_device *br, - struct netlink_ext_ack *extack) +static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp, + struct net_device *bridge_dev) { + int bridge_num = dp->bridge_num; struct dsa_switch *ds = dp->ds; - struct dsa_bridge *bridge; - bridge = dsa_tree_bridge_find(ds->dst, br); - if (bridge) { - refcount_inc(&bridge->refcount); - dp->bridge = bridge; - return 0; - } - - bridge = kzalloc(sizeof(*bridge), GFP_KERNEL); - if (!bridge) - return -ENOMEM; - - refcount_set(&bridge->refcount, 1); - - bridge->dev = br; - - bridge->num = dsa_bridge_num_get(br, ds->max_num_bridges); - if (ds->max_num_bridges && !bridge->num) { - NL_SET_ERR_MSG_MOD(extack, - "Range of offloadable bridges exceeded"); - kfree(bridge); - return -EOPNOTSUPP; - } - - dp->bridge = bridge; - - return 0; -} - -static void dsa_port_bridge_destroy(struct dsa_port *dp, - const struct net_device *br) -{ - struct dsa_bridge *bridge = dp->bridge; - - dp->bridge = NULL; - - if (!refcount_dec_and_test(&bridge->refcount)) + /* No bridge TX forwarding offload => do nothing */ + if (!ds->ops->port_bridge_tx_fwd_unoffload || dp->bridge_num == -1) return; - if (bridge->num) - dsa_bridge_num_put(br, bridge->num); + dp->bridge_num = -1; - kfree(bridge); + dsa_bridge_num_put(bridge_dev, bridge_num); + + /* Notify the chips only once the offload has been deactivated, so + * that they can update their configuration accordingly. + */ + ds->ops->port_bridge_tx_fwd_unoffload(ds, dp->index, bridge_dev, + bridge_num); +} + +static bool dsa_port_bridge_tx_fwd_offload(struct dsa_port *dp, + struct net_device *bridge_dev) +{ + struct dsa_switch *ds = dp->ds; + int bridge_num, err; + + if (!ds->ops->port_bridge_tx_fwd_offload) + return false; + + bridge_num = dsa_bridge_num_get(bridge_dev, + ds->num_fwd_offloading_bridges); + if (bridge_num < 0) + return false; + + dp->bridge_num = bridge_num; + + /* Notify the driver */ + err = ds->ops->port_bridge_tx_fwd_offload(ds, dp->index, bridge_dev, + bridge_num); + if (err) { + dsa_port_bridge_tx_fwd_unoffload(dp, bridge_dev); + return false; + } + + return true; } int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, @@ -328,32 +325,30 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, .tree_index = dp->ds->dst->index, .sw_index = dp->ds->index, .port = dp->index, + .br = br, }; struct net_device *dev = dp->slave; struct net_device *brport_dev; + bool tx_fwd_offload; int err; /* Here the interface is already bridged. Reflect the current * configuration so that drivers can program their chips accordingly. */ - err = dsa_port_bridge_create(dp, br, extack); - if (err) - return err; + dp->bridge_dev = br; brport_dev = dsa_port_to_bridge_port(dp); - info.bridge = *dp->bridge; err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info); if (err) goto out_rollback; - /* Drivers which support bridge TX forwarding should set this */ - dp->bridge->tx_fwd_offload = info.tx_fwd_offload; + tx_fwd_offload = dsa_port_bridge_tx_fwd_offload(dp, br); err = switchdev_bridge_port_offload(brport_dev, dev, dp, &dsa_slave_switchdev_notifier, &dsa_slave_switchdev_blocking_notifier, - dp->bridge->tx_fwd_offload, extack); + tx_fwd_offload, extack); if (err) goto out_rollback_unbridge; @@ -370,7 +365,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, out_rollback_unbridge: dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); out_rollback: - dsa_port_bridge_destroy(dp, br); + dp->bridge_dev = NULL; return err; } @@ -385,8 +380,6 @@ void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br) switchdev_bridge_port_unoffload(brport_dev, dp, &dsa_slave_switchdev_notifier, &dsa_slave_switchdev_blocking_notifier); - - dsa_flush_workqueue(); } void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) @@ -395,21 +388,16 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) .tree_index = dp->ds->dst->index, .sw_index = dp->ds->index, .port = dp->index, + .br = br, }; int err; - /* If the port could not be offloaded to begin with, then - * there is nothing to do. - */ - if (!dp->bridge) - return; - - info.bridge = *dp->bridge; - /* Here the port is already unbridged. Reflect the current configuration * so that drivers can program their chips accordingly. */ - dsa_port_bridge_destroy(dp, br); + dp->bridge_dev = NULL; + + dsa_port_bridge_tx_fwd_unoffload(dp, br); err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); if (err) @@ -487,15 +475,12 @@ int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag, void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag) { - struct net_device *br = dsa_port_bridge_dev_get(dp); - - if (br) - dsa_port_pre_bridge_leave(dp, br); + if (dp->bridge_dev) + dsa_port_pre_bridge_leave(dp, dp->bridge_dev); } void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag) { - struct net_device *br = dsa_port_bridge_dev_get(dp); struct dsa_notifier_lag_info info = { .sw_index = dp->ds->index, .port = dp->index, @@ -509,8 +494,8 @@ void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag) /* Port might have been part of a LAG that in turn was * attached to a bridge. */ - if (br) - dsa_port_bridge_leave(dp, br); + if (dp->bridge_dev) + dsa_port_bridge_leave(dp, dp->bridge_dev); dp->lag_tx_enabled = false; dp->lag_dev = NULL; @@ -530,17 +515,16 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; - struct dsa_port *other_dp; - int err; + int err, i; /* VLAN awareness was off, so the question is "can we turn it on". * We may have had 8021q uppers, those need to go. Make sure we don't * enter an inconsistent state: deny changing the VLAN awareness state * as long as we have 8021q uppers. */ - if (vlan_filtering && dsa_port_is_user(dp)) { - struct net_device *br = dsa_port_bridge_dev_get(dp); + if (vlan_filtering && dsa_is_user_port(ds, dp->index)) { struct net_device *upper_dev, *slave = dp->slave; + struct net_device *br = dp->bridge_dev; struct list_head *iter; netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) { @@ -573,16 +557,18 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, * different ports of the same switch device and one of them has a * different setting than what is being requested. */ - dsa_switch_for_each_port(other_dp, ds) { - struct net_device *other_br = dsa_port_bridge_dev_get(other_dp); + for (i = 0; i < ds->num_ports; i++) { + struct net_device *other_bridge; + other_bridge = dsa_to_port(ds, i)->bridge_dev; + if (!other_bridge) + continue; /* If it's the same bridge, it also has same * vlan_filtering setting => no need to check */ - if (!other_br || other_br == dsa_port_bridge_dev_get(dp)) + if (other_bridge == dp->bridge_dev) continue; - - if (br_vlan_enabled(other_br) != vlan_filtering) { + if (br_vlan_enabled(other_bridge) != vlan_filtering) { NL_SET_ERR_MSG_MOD(extack, "VLAN filtering is a global setting"); return false; @@ -621,16 +607,20 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, return err; if (ds->vlan_filtering_is_global) { - struct dsa_port *other_dp; + int port; ds->vlan_filtering = vlan_filtering; - dsa_switch_for_each_user_port(other_dp, ds) { - struct net_device *slave = dp->slave; + for (port = 0; port < ds->num_ports; port++) { + struct net_device *slave; + + if (!dsa_is_user_port(ds, port)) + continue; /* We might be called in the unbind path, so not * all slave devices might still be registered. */ + slave = dsa_to_port(ds, port)->slave; if (!slave) continue; @@ -666,13 +656,13 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, */ bool dsa_port_skip_vlan_configuration(struct dsa_port *dp) { - struct net_device *br = dsa_port_bridge_dev_get(dp); struct dsa_switch *ds = dp->ds; - if (!br) + if (!dp->bridge_dev) return false; - return !ds->configure_vlan_while_not_filtering && !br_vlan_enabled(br); + return (!ds->configure_vlan_while_not_filtering && + !br_vlan_enabled(dp->bridge_dev)); } int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock) @@ -788,15 +778,9 @@ int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, struct dsa_port *cpu_dp = dp->cpu_dp; int err; - /* Avoid a call to __dev_set_promiscuity() on the master, which - * requires rtnl_lock(), since we can't guarantee that is held here, - * and we can't take it either. - */ - if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) { - err = dev_uc_add(cpu_dp->master, addr); - if (err) - return err; - } + err = dev_uc_add(cpu_dp->master, addr); + if (err) + return err; return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info); } @@ -813,11 +797,9 @@ int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, struct dsa_port *cpu_dp = dp->cpu_dp; int err; - if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) { - err = dev_uc_del(cpu_dp->master, addr); - if (err) - return err; - } + err = dev_uc_del(cpu_dp->master, addr); + if (err) + return err; return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info); } @@ -922,45 +904,49 @@ int dsa_port_vlan_del(struct dsa_port *dp, int dsa_port_mrp_add(const struct dsa_port *dp, const struct switchdev_obj_mrp *mrp) { - struct dsa_switch *ds = dp->ds; + struct dsa_notifier_mrp_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .mrp = mrp, + }; - if (!ds->ops->port_mrp_add) - return -EOPNOTSUPP; - - return ds->ops->port_mrp_add(ds, dp->index, mrp); + return dsa_port_notify(dp, DSA_NOTIFIER_MRP_ADD, &info); } int dsa_port_mrp_del(const struct dsa_port *dp, const struct switchdev_obj_mrp *mrp) { - struct dsa_switch *ds = dp->ds; + struct dsa_notifier_mrp_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .mrp = mrp, + }; - if (!ds->ops->port_mrp_del) - return -EOPNOTSUPP; - - return ds->ops->port_mrp_del(ds, dp->index, mrp); + return dsa_port_notify(dp, DSA_NOTIFIER_MRP_DEL, &info); } int dsa_port_mrp_add_ring_role(const struct dsa_port *dp, const struct switchdev_obj_ring_role_mrp *mrp) { - struct dsa_switch *ds = dp->ds; + struct dsa_notifier_mrp_ring_role_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .mrp = mrp, + }; - if (!ds->ops->port_mrp_add_ring_role) - return -EOPNOTSUPP; - - return ds->ops->port_mrp_add_ring_role(ds, dp->index, mrp); + return dsa_port_notify(dp, DSA_NOTIFIER_MRP_ADD_RING_ROLE, &info); } int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, const struct switchdev_obj_ring_role_mrp *mrp) { - struct dsa_switch *ds = dp->ds; + struct dsa_notifier_mrp_ring_role_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .mrp = mrp, + }; - if (!ds->ops->port_mrp_del_ring_role) - return -EOPNOTSUPP; - - return ds->ops->port_mrp_del_ring_role(ds, dp->index, mrp); + return dsa_port_notify(dp, DSA_NOTIFIER_MRP_DEL_RING_ROLE, &info); } void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, @@ -996,11 +982,8 @@ static void dsa_port_phylink_validate(struct phylink_config *config, struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; - if (!ds->ops->phylink_validate) { - if (config->mac_capabilities) - phylink_generic_validate(config, supported, state); + if (!ds->ops->phylink_validate) return; - } ds->ops->phylink_validate(ds, dp->index, supported, state); } @@ -1058,7 +1041,7 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config, struct phy_device *phydev = NULL; struct dsa_switch *ds = dp->ds; - if (dsa_port_is_user(dp)) + if (dsa_is_user_port(ds, dp->index)) phydev = dp->slave->phydev; if (!ds->ops->phylink_mac_link_down) { @@ -1090,7 +1073,7 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, speed, duplex, tx_pause, rx_pause); } -static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { +const struct phylink_mac_ops dsa_port_phylink_mac_ops = { .validate = dsa_port_phylink_validate, .mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state, .mac_config = dsa_port_phylink_mac_config, @@ -1099,36 +1082,6 @@ static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { .mac_link_up = dsa_port_phylink_mac_link_up, }; -int dsa_port_phylink_create(struct dsa_port *dp) -{ - struct dsa_switch *ds = dp->ds; - phy_interface_t mode; - int err; - - err = of_get_phy_mode(dp->dn, &mode); - if (err) - mode = PHY_INTERFACE_MODE_NA; - - /* Presence of phylink_mac_link_state or phylink_mac_an_restart is - * an indicator of a legacy phylink driver. - */ - if (ds->ops->phylink_mac_link_state || - ds->ops->phylink_mac_an_restart) - dp->pl_config.legacy_pre_march2020 = true; - - if (ds->ops->phylink_get_caps) - ds->ops->phylink_get_caps(ds, dp->index, &dp->pl_config); - - dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn), - mode, &dsa_port_phylink_mac_ops); - if (IS_ERR(dp->pl)) { - pr_err("error creating PHYLINK: %ld\n", PTR_ERR(dp->pl)); - return PTR_ERR(dp->pl); - } - - return 0; -} - static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable) { struct dsa_switch *ds = dp->ds; @@ -1205,15 +1158,23 @@ static int dsa_port_phylink_register(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; struct device_node *port_dn = dp->dn; + phy_interface_t mode; int err; + err = of_get_phy_mode(port_dn, &mode); + if (err) + mode = PHY_INTERFACE_MODE_NA; + dp->pl_config.dev = ds->dev; dp->pl_config.type = PHYLINK_DEV; dp->pl_config.pcs_poll = ds->pcs_poll; - err = dsa_port_phylink_create(dp); - if (err) - return err; + dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn), + mode, &dsa_port_phylink_mac_ops); + if (IS_ERR(dp->pl)) { + pr_err("error creating PHYLINK: %ld\n", PTR_ERR(dp->pl)); + return PTR_ERR(dp->pl); + } err = phylink_of_phy_connect(dp->pl, port_dn, 0); if (err && err != -ENODEV) { @@ -1332,15 +1293,16 @@ EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count); int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr) { - struct dsa_switch *ds = dp->ds; + struct dsa_notifier_hsr_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .hsr = hsr, + }; int err; - if (!ds->ops->port_hsr_join) - return -EOPNOTSUPP; - dp->hsr_dev = hsr; - err = ds->ops->port_hsr_join(ds, dp->index, hsr); + err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_JOIN, &info); if (err) dp->hsr_dev = NULL; @@ -1349,18 +1311,20 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr) void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr) { - struct dsa_switch *ds = dp->ds; + struct dsa_notifier_hsr_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .hsr = hsr, + }; int err; dp->hsr_dev = NULL; - if (ds->ops->port_hsr_leave) { - err = ds->ops->port_hsr_leave(ds, dp->index, hsr); - if (err) - dev_err(dp->ds->dev, - "port %d failed to leave HSR %s: %pe\n", - dp->index, hsr->name, ERR_PTR(err)); - } + err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_LEAVE, &info); + if (err) + dev_err(dp->ds->dev, + "port %d failed to notify DSA_NOTIFIER_HSR_LEAVE: %pe\n", + dp->index, ERR_PTR(err)); } int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 22241afcac..a2bf2d8ac6 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -174,7 +174,7 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a) dev_uc_del(master, dev->dev_addr); out: - eth_hw_addr_set(dev, addr->sa_data); + ether_addr_copy(dev->dev_addr, addr->sa_data); return 0; } @@ -289,14 +289,14 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, ret = dsa_port_set_state(dp, attr->u.stp_state, true); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: - if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) return -EOPNOTSUPP; ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering, extack); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: - if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) return -EOPNOTSUPP; ret = dsa_port_ageing_time(dp, attr->u.ageing_time); @@ -363,7 +363,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, /* Deny adding a bridge VLAN when there is already an 802.1Q upper with * the same VID. */ - if (br_vlan_enabled(dsa_port_bridge_dev_get(dp))) { + if (br_vlan_enabled(dp->bridge_dev)) { rcu_read_lock(); err = dsa_slave_vlan_check_for_8021q_uppers(dev, &vlan); rcu_read_unlock(); @@ -409,7 +409,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx, err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); @@ -421,13 +421,13 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx, err = dsa_slave_vlan_add(dev, obj, extack); break; case SWITCHDEV_OBJ_ID_MRP: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mrp_add(dp, SWITCHDEV_OBJ_MRP(obj)); break; case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mrp_add_ring_role(dp, @@ -483,7 +483,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx, err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); @@ -495,13 +495,13 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx, err = dsa_slave_vlan_del(dev, obj); break; case SWITCHDEV_OBJ_ID_MRP: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mrp_del(dp, SWITCHDEV_OBJ_MRP(obj)); break; case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mrp_del_ring_role(dp, @@ -789,37 +789,6 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) return -EOPNOTSUPP; } -static void dsa_slave_get_eth_phy_stats(struct net_device *dev, - struct ethtool_eth_phy_stats *phy_stats) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_eth_phy_stats) - ds->ops->get_eth_phy_stats(ds, dp->index, phy_stats); -} - -static void dsa_slave_get_eth_mac_stats(struct net_device *dev, - struct ethtool_eth_mac_stats *mac_stats) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_eth_mac_stats) - ds->ops->get_eth_mac_stats(ds, dp->index, mac_stats); -} - -static void -dsa_slave_get_eth_ctrl_stats(struct net_device *dev, - struct ethtool_eth_ctrl_stats *ctrl_stats) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_eth_ctrl_stats) - ds->ops->get_eth_ctrl_stats(ds, dp->index, ctrl_stats); -} - static void dsa_slave_net_selftest(struct net_device *ndev, struct ethtool_test *etest, u64 *buf) { @@ -1564,7 +1533,7 @@ static void dsa_bridge_mtu_normalization(struct dsa_port *dp) if (!dp->ds->mtu_enforcement_ingress) return; - if (!dp->bridge) + if (!dp->bridge_dev) return; INIT_LIST_HEAD(&hw_port_list); @@ -1580,7 +1549,7 @@ static void dsa_bridge_mtu_normalization(struct dsa_port *dp) if (other_dp->type != DSA_PORT_TYPE_USER) continue; - if (!dsa_port_bridge_same(dp, other_dp)) + if (other_dp->bridge_dev != dp->bridge_dev) continue; if (!other_dp->ds->mtu_enforcement_ingress) @@ -1726,9 +1695,6 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_strings = dsa_slave_get_strings, .get_ethtool_stats = dsa_slave_get_ethtool_stats, .get_sset_count = dsa_slave_get_sset_count, - .get_eth_phy_stats = dsa_slave_get_eth_phy_stats, - .get_eth_mac_stats = dsa_slave_get_eth_mac_stats, - .get_eth_ctrl_stats = dsa_slave_get_eth_ctrl_stats, .set_wol = dsa_slave_set_wol, .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, @@ -1851,9 +1817,14 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct device_node *port_dn = dp->dn; struct dsa_switch *ds = dp->ds; + phy_interface_t mode; u32 phy_flags = 0; int ret; + ret = of_get_phy_mode(port_dn, &mode); + if (ret) + mode = PHY_INTERFACE_MODE_NA; + dp->pl_config.dev = &slave_dev->dev; dp->pl_config.type = PHYLINK_NETDEV; @@ -1866,9 +1837,13 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) dp->pl_config.poll_fixed_state = true; } - ret = dsa_port_phylink_create(dp); - if (ret) - return ret; + dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn), mode, + &dsa_port_phylink_mac_ops); + if (IS_ERR(dp->pl)) { + netdev_err(slave_dev, + "error creating PHYLINK: %ld\n", PTR_ERR(dp->pl)); + return PTR_ERR(dp->pl); + } if (ds->ops->get_phy_flags) phy_flags = ds->ops->get_phy_flags(ds, dp->index); @@ -1979,7 +1954,7 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; if (!is_zero_ether_addr(port->mac)) - eth_hw_addr_set(slave_dev, port->mac); + ether_addr_copy(slave_dev->dev_addr, port->mac); else eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; @@ -2011,6 +1986,13 @@ int dsa_slave_create(struct dsa_port *port) port->slave = slave_dev; dsa_slave_setup_tagger(slave_dev); + rtnl_lock(); + ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN); + rtnl_unlock(); + if (ret && ret != -EOPNOTSUPP) + dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n", + ret, ETH_DATA_LEN, port->index); + netif_carrier_off(slave_dev); ret = dsa_slave_phy_setup(slave_dev); @@ -2023,11 +2005,6 @@ int dsa_slave_create(struct dsa_port *port) rtnl_lock(); - ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN); - if (ret && ret != -EOPNOTSUPP) - dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n", - ret, ETH_DATA_LEN, port->index); - ret = register_netdevice(slave_dev); if (ret) { netdev_err(master, "error %d registering interface %s\n", @@ -2218,7 +2195,7 @@ dsa_prevent_bridging_8021q_upper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *ext_ack; - struct net_device *slave, *br; + struct net_device *slave; struct dsa_port *dp; ext_ack = netdev_notifier_info_to_extack(&info->info); @@ -2231,12 +2208,11 @@ dsa_prevent_bridging_8021q_upper(struct net_device *dev, return NOTIFY_DONE; dp = dsa_slave_to_port(slave); - br = dsa_port_bridge_dev_get(dp); - if (!br) + if (!dp->bridge_dev) return NOTIFY_DONE; /* Deny enslaving a VLAN device into a VLAN-aware bridge */ - if (br_vlan_enabled(br) && + if (br_vlan_enabled(dp->bridge_dev) && netif_is_bridge_master(info->upper_dev) && info->linking) { NL_SET_ERR_MSG_MOD(ext_ack, "Cannot enslave VLAN device into VLAN aware bridge"); @@ -2251,7 +2227,7 @@ dsa_slave_check_8021q_upper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct net_device *br = dsa_port_bridge_dev_get(dp); + struct net_device *br = dp->bridge_dev; struct bridge_vlan_info br_info; struct netlink_ext_ack *extack; int err = NOTIFY_DONE; @@ -2358,7 +2334,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, dst = cpu_dp->ds->dst; list_for_each_entry(dp, &dst->ports, list) { - if (!dsa_port_is_user(dp)) + if (!dsa_is_user_port(dp->ds, dp->index)) continue; list_add(&dp->slave->close_list, &close_list); @@ -2403,6 +2379,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) dp = dsa_to_port(ds, switchdev_work->port); + rtnl_lock(); switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: if (switchdev_work->host_addr) @@ -2437,7 +2414,9 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) break; } + rtnl_unlock(); + dev_put(switchdev_work->dev); kfree(switchdev_work); } @@ -2448,7 +2427,7 @@ static bool dsa_foreign_dev_check(const struct net_device *dev, struct dsa_switch_tree *dst = dp->ds->dst; if (netif_is_bridge_master(foreign_dev)) - return !dsa_tree_offloads_bridge_dev(dst, foreign_dev); + return !dsa_tree_offloads_bridge(dst, foreign_dev); if (netif_is_bridge_port(foreign_dev)) return !dsa_tree_offloads_bridge_port(dst, foreign_dev); @@ -2458,9 +2437,10 @@ static bool dsa_foreign_dev_check(const struct net_device *dev, } static int dsa_slave_fdb_event(struct net_device *dev, - struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info) + const struct net_device *orig_dev, + const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info, + unsigned long event) { struct dsa_switchdev_event_work *switchdev_work; struct dsa_port *dp = dsa_slave_to_port(dev); @@ -2509,11 +2489,31 @@ static int dsa_slave_fdb_event(struct net_device *dev, switchdev_work->vid = fdb_info->vid; switchdev_work->host_addr = host_addr; + /* Hold a reference for dsa_fdb_offload_notify */ + dev_hold(dev); dsa_schedule_work(&switchdev_work->work); return 0; } +static int +dsa_slave_fdb_add_to_device(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info) +{ + return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info, + SWITCHDEV_FDB_ADD_TO_DEVICE); +} + +static int +dsa_slave_fdb_del_to_device(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info) +{ + return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info, + SWITCHDEV_FDB_DEL_TO_DEVICE); +} + /* Called under rcu_read_lock() */ static int dsa_slave_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) @@ -2528,12 +2528,18 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, dsa_slave_port_attr_set); return notifier_from_errno(err); case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = switchdev_handle_fdb_add_to_device(dev, ptr, + dsa_slave_dev_check, + dsa_foreign_dev_check, + dsa_slave_fdb_add_to_device, + NULL); + return notifier_from_errno(err); case SWITCHDEV_FDB_DEL_TO_DEVICE: - err = switchdev_handle_fdb_event_to_device(dev, event, ptr, - dsa_slave_dev_check, - dsa_foreign_dev_check, - dsa_slave_fdb_event, - NULL); + err = switchdev_handle_fdb_del_to_device(dev, ptr, + dsa_slave_dev_check, + dsa_foreign_dev_check, + dsa_slave_fdb_del_to_device, + NULL); return notifier_from_errno(err); default: return NOTIFY_DONE; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index e3c7d2627a..fb69f2f142 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -17,11 +17,14 @@ static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds, unsigned int ageing_time) { - struct dsa_port *dp; + int i; + + for (i = 0; i < ds->num_ports; ++i) { + struct dsa_port *dp = dsa_to_port(ds, i); - dsa_switch_for_each_port(dp, ds) if (dp->ageing_time && dp->ageing_time < ageing_time) ageing_time = dp->ageing_time; + } return ageing_time; } @@ -46,10 +49,10 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds, return 0; } -static bool dsa_port_mtu_match(struct dsa_port *dp, - struct dsa_notifier_mtu_info *info) +static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port, + struct dsa_notifier_mtu_info *info) { - if (dp->ds->index == info->sw_index && dp->index == info->port) + if (ds->index == info->sw_index && port == info->port) return true; /* Do not propagate to other switches in the tree if the notifier was @@ -58,7 +61,7 @@ static bool dsa_port_mtu_match(struct dsa_port *dp, if (info->targeted_match) return false; - if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) + if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) return true; return false; @@ -67,16 +70,14 @@ static bool dsa_port_mtu_match(struct dsa_port *dp, static int dsa_switch_mtu(struct dsa_switch *ds, struct dsa_notifier_mtu_info *info) { - struct dsa_port *dp; - int ret; + int port, ret; if (!ds->ops->port_change_mtu) return -EOPNOTSUPP; - dsa_switch_for_each_port(dp, ds) { - if (dsa_port_mtu_match(dp, info)) { - ret = ds->ops->port_change_mtu(ds, dp->index, - info->mtu); + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_mtu_match(ds, port, info)) { + ret = ds->ops->port_change_mtu(ds, port, info->mtu); if (ret) return ret; } @@ -95,8 +96,7 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, if (!ds->ops->port_bridge_join) return -EOPNOTSUPP; - err = ds->ops->port_bridge_join(ds, info->port, info->bridge, - &info->tx_fwd_offload); + err = ds->ops->port_bridge_join(ds, info->port, info->br); if (err) return err; } @@ -105,7 +105,7 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, ds->ops->crosschip_bridge_join) { err = ds->ops->crosschip_bridge_join(ds, info->tree_index, info->sw_index, - info->port, info->bridge); + info->port, info->br); if (err) return err; } @@ -120,25 +120,23 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, struct netlink_ext_ack extack = {0}; bool change_vlan_filtering = false; bool vlan_filtering; - struct dsa_port *dp; - int err; + int err, port; if (dst->index == info->tree_index && ds->index == info->sw_index && ds->ops->port_bridge_leave) - ds->ops->port_bridge_leave(ds, info->port, info->bridge); + ds->ops->port_bridge_leave(ds, info->port, info->br); if ((dst->index != info->tree_index || ds->index != info->sw_index) && ds->ops->crosschip_bridge_leave) ds->ops->crosschip_bridge_leave(ds, info->tree_index, info->sw_index, info->port, - info->bridge); + info->br); - if (ds->needs_standalone_vlan_filtering && - !br_vlan_enabled(info->bridge.dev)) { + if (ds->needs_standalone_vlan_filtering && !br_vlan_enabled(info->br)) { change_vlan_filtering = true; vlan_filtering = true; } else if (!ds->needs_standalone_vlan_filtering && - br_vlan_enabled(info->bridge.dev)) { + br_vlan_enabled(info->br)) { change_vlan_filtering = true; vlan_filtering = false; } @@ -152,10 +150,12 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, * VLAN-aware bridge. */ if (change_vlan_filtering && ds->vlan_filtering_is_global) { - dsa_switch_for_each_port(dp, ds) { - struct net_device *br = dsa_port_bridge_dev_get(dp); + for (port = 0; port < ds->num_ports; port++) { + struct net_device *bridge_dev; - if (br && br_vlan_enabled(br)) { + bridge_dev = dsa_to_port(ds, port)->bridge_dev; + + if (bridge_dev && br_vlan_enabled(bridge_dev)) { change_vlan_filtering = false; break; } @@ -179,19 +179,19 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, * DSA links) that sit between the targeted port on which the notifier was * emitted and its dedicated CPU port. */ -static bool dsa_port_host_address_match(struct dsa_port *dp, - int info_sw_index, int info_port) +static bool dsa_switch_host_address_match(struct dsa_switch *ds, int port, + int info_sw_index, int info_port) { struct dsa_port *targeted_dp, *cpu_dp; struct dsa_switch *targeted_ds; - targeted_ds = dsa_switch_find(dp->ds->dst->index, info_sw_index); + targeted_ds = dsa_switch_find(ds->dst->index, info_sw_index); targeted_dp = dsa_to_port(targeted_ds, info_port); cpu_dp = targeted_dp->cpu_dp; - if (dsa_switch_is_upstream_of(dp->ds, targeted_ds)) - return dp->index == dsa_towards_port(dp->ds, cpu_dp->ds->index, - cpu_dp->index); + if (dsa_switch_is_upstream_of(ds, targeted_ds)) + return port == dsa_towards_port(ds, cpu_dp->ds->index, + cpu_dp->index); return false; } @@ -209,36 +209,31 @@ static struct dsa_mac_addr *dsa_mac_addr_find(struct list_head *addr_list, return NULL; } -static int dsa_port_do_mdb_add(struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb) +static int dsa_switch_do_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) { - struct dsa_switch *ds = dp->ds; + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_mac_addr *a; - int port = dp->index; - int err = 0; + int err; /* No need to bother with refcounting for user ports */ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) return ds->ops->port_mdb_add(ds, port, mdb); - mutex_lock(&dp->addr_lists_lock); - a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid); if (a) { refcount_inc(&a->refcount); - goto out; + return 0; } a = kzalloc(sizeof(*a), GFP_KERNEL); - if (!a) { - err = -ENOMEM; - goto out; - } + if (!a) + return -ENOMEM; err = ds->ops->port_mdb_add(ds, port, mdb); if (err) { kfree(a); - goto out; + return err; } ether_addr_copy(a->addr, mdb->addr); @@ -246,80 +241,64 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp, refcount_set(&a->refcount, 1); list_add_tail(&a->list, &dp->mdbs); -out: - mutex_unlock(&dp->addr_lists_lock); - - return err; + return 0; } -static int dsa_port_do_mdb_del(struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb) +static int dsa_switch_do_mdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) { - struct dsa_switch *ds = dp->ds; + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_mac_addr *a; - int port = dp->index; - int err = 0; + int err; /* No need to bother with refcounting for user ports */ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) return ds->ops->port_mdb_del(ds, port, mdb); - mutex_lock(&dp->addr_lists_lock); - a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid); - if (!a) { - err = -ENOENT; - goto out; - } + if (!a) + return -ENOENT; if (!refcount_dec_and_test(&a->refcount)) - goto out; + return 0; err = ds->ops->port_mdb_del(ds, port, mdb); if (err) { refcount_set(&a->refcount, 1); - goto out; + return err; } list_del(&a->list); kfree(a); -out: - mutex_unlock(&dp->addr_lists_lock); - - return err; + return 0; } -static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, - u16 vid) +static int dsa_switch_do_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid) { - struct dsa_switch *ds = dp->ds; + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_mac_addr *a; - int port = dp->index; - int err = 0; + int err; /* No need to bother with refcounting for user ports */ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) return ds->ops->port_fdb_add(ds, port, addr, vid); - mutex_lock(&dp->addr_lists_lock); - a = dsa_mac_addr_find(&dp->fdbs, addr, vid); if (a) { refcount_inc(&a->refcount); - goto out; + return 0; } a = kzalloc(sizeof(*a), GFP_KERNEL); - if (!a) { - err = -ENOMEM; - goto out; - } + if (!a) + return -ENOMEM; err = ds->ops->port_fdb_add(ds, port, addr, vid); if (err) { kfree(a); - goto out; + return err; } ether_addr_copy(a->addr, addr); @@ -327,63 +306,53 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, refcount_set(&a->refcount, 1); list_add_tail(&a->list, &dp->fdbs); -out: - mutex_unlock(&dp->addr_lists_lock); - - return err; + return 0; } -static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr, - u16 vid) +static int dsa_switch_do_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid) { - struct dsa_switch *ds = dp->ds; + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_mac_addr *a; - int port = dp->index; - int err = 0; + int err; /* No need to bother with refcounting for user ports */ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) return ds->ops->port_fdb_del(ds, port, addr, vid); - mutex_lock(&dp->addr_lists_lock); - a = dsa_mac_addr_find(&dp->fdbs, addr, vid); - if (!a) { - err = -ENOENT; - goto out; - } + if (!a) + return -ENOENT; if (!refcount_dec_and_test(&a->refcount)) - goto out; + return 0; err = ds->ops->port_fdb_del(ds, port, addr, vid); if (err) { refcount_set(&a->refcount, 1); - goto out; + return err; } list_del(&a->list); kfree(a); -out: - mutex_unlock(&dp->addr_lists_lock); - - return err; + return 0; } static int dsa_switch_host_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - struct dsa_port *dp; int err = 0; + int port; if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - dsa_switch_for_each_port(dp, ds) { - if (dsa_port_host_address_match(dp, info->sw_index, - info->port)) { - err = dsa_port_do_fdb_add(dp, info->addr, info->vid); + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_host_address_match(ds, port, info->sw_index, + info->port)) { + err = dsa_switch_do_fdb_add(ds, port, info->addr, + info->vid); if (err) break; } @@ -395,16 +364,17 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds, static int dsa_switch_host_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - struct dsa_port *dp; int err = 0; + int port; if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - dsa_switch_for_each_port(dp, ds) { - if (dsa_port_host_address_match(dp, info->sw_index, - info->port)) { - err = dsa_port_do_fdb_del(dp, info->addr, info->vid); + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_host_address_match(ds, port, info->sw_index, + info->port)) { + err = dsa_switch_do_fdb_del(ds, port, info->addr, + info->vid); if (err) break; } @@ -417,24 +387,40 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); - struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - return dsa_port_do_fdb_add(dp, info->addr, info->vid); + return dsa_switch_do_fdb_add(ds, port, info->addr, info->vid); } static int dsa_switch_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); - struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - return dsa_port_do_fdb_del(dp, info->addr, info->vid); + return dsa_switch_do_fdb_del(ds, port, info->addr, info->vid); +} + +static int dsa_switch_hsr_join(struct dsa_switch *ds, + struct dsa_notifier_hsr_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_hsr_join) + return ds->ops->port_hsr_join(ds, info->port, info->hsr); + + return -EOPNOTSUPP; +} + +static int dsa_switch_hsr_leave(struct dsa_switch *ds, + struct dsa_notifier_hsr_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_hsr_leave) + return ds->ops->port_hsr_leave(ds, info->port, info->hsr); + + return -EOPNOTSUPP; } static int dsa_switch_lag_change(struct dsa_switch *ds, @@ -482,39 +468,37 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); - struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_mdb_add) return -EOPNOTSUPP; - return dsa_port_do_mdb_add(dp, info->mdb); + return dsa_switch_do_mdb_add(ds, port, info->mdb); } static int dsa_switch_mdb_del(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); - struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_mdb_del) return -EOPNOTSUPP; - return dsa_port_do_mdb_del(dp, info->mdb); + return dsa_switch_do_mdb_del(ds, port, info->mdb); } static int dsa_switch_host_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { - struct dsa_port *dp; int err = 0; + int port; if (!ds->ops->port_mdb_add) return -EOPNOTSUPP; - dsa_switch_for_each_port(dp, ds) { - if (dsa_port_host_address_match(dp, info->sw_index, - info->port)) { - err = dsa_port_do_mdb_add(dp, info->mdb); + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_host_address_match(ds, port, info->sw_index, + info->port)) { + err = dsa_switch_do_mdb_add(ds, port, info->mdb); if (err) break; } @@ -526,16 +510,16 @@ static int dsa_switch_host_mdb_add(struct dsa_switch *ds, static int dsa_switch_host_mdb_del(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { - struct dsa_port *dp; int err = 0; + int port; if (!ds->ops->port_mdb_del) return -EOPNOTSUPP; - dsa_switch_for_each_port(dp, ds) { - if (dsa_port_host_address_match(dp, info->sw_index, - info->port)) { - err = dsa_port_do_mdb_del(dp, info->mdb); + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_host_address_match(ds, port, info->sw_index, + info->port)) { + err = dsa_switch_do_mdb_del(ds, port, info->mdb); if (err) break; } @@ -544,13 +528,13 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds, return err; } -static bool dsa_port_vlan_match(struct dsa_port *dp, - struct dsa_notifier_vlan_info *info) +static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port, + struct dsa_notifier_vlan_info *info) { - if (dp->ds->index == info->sw_index && dp->index == info->port) + if (ds->index == info->sw_index && port == info->port) return true; - if (dsa_port_is_dsa(dp)) + if (dsa_is_dsa_port(ds, port)) return true; return false; @@ -559,15 +543,14 @@ static bool dsa_port_vlan_match(struct dsa_port *dp, static int dsa_switch_vlan_add(struct dsa_switch *ds, struct dsa_notifier_vlan_info *info) { - struct dsa_port *dp; - int err; + int port, err; if (!ds->ops->port_vlan_add) return -EOPNOTSUPP; - dsa_switch_for_each_port(dp, ds) { - if (dsa_port_vlan_match(dp, info)) { - err = ds->ops->port_vlan_add(ds, dp->index, info->vlan, + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_vlan_match(ds, port, info)) { + err = ds->ops->port_vlan_add(ds, port, info->vlan, info->extack); if (err) return err; @@ -596,90 +579,92 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds, struct dsa_notifier_tag_proto_info *info) { const struct dsa_device_ops *tag_ops = info->tag_ops; - struct dsa_port *dp, *cpu_dp; - int err; + int port, err; if (!ds->ops->change_tag_protocol) return -EOPNOTSUPP; ASSERT_RTNL(); - dsa_switch_for_each_cpu_port(cpu_dp, ds) { - err = ds->ops->change_tag_protocol(ds, cpu_dp->index, - tag_ops->proto); + for (port = 0; port < ds->num_ports; port++) { + if (!dsa_is_cpu_port(ds, port)) + continue; + + err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto); if (err) return err; - dsa_port_set_tag_protocol(cpu_dp, tag_ops); + dsa_port_set_tag_protocol(dsa_to_port(ds, port), tag_ops); } /* Now that changing the tag protocol can no longer fail, let's update * the remaining bits which are "duplicated for faster access", and the * bits that depend on the tagger, such as the MTU. */ - dsa_switch_for_each_user_port(dp, ds) { - struct net_device *slave = dp->slave; + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_user_port(ds, port)) { + struct net_device *slave; - dsa_slave_setup_tagger(slave); + slave = dsa_to_port(ds, port)->slave; + dsa_slave_setup_tagger(slave); - /* rtnl_mutex is held in dsa_tree_change_tag_proto */ - dsa_slave_change_mtu(slave, slave->mtu); + /* rtnl_mutex is held in dsa_tree_change_tag_proto */ + dsa_slave_change_mtu(slave, slave->mtu); + } } return 0; } -/* We use the same cross-chip notifiers to inform both the tagger side, as well - * as the switch side, of connection and disconnection events. - * Since ds->tagger_data is owned by the tagger, it isn't a hard error if the - * switch side doesn't support connecting to this tagger, and therefore, the - * fact that we don't disconnect the tagger side doesn't constitute a memory - * leak: the tagger will still operate with persistent per-switch memory, just - * with the switch side unconnected to it. What does constitute a hard error is - * when the switch side supports connecting but fails. - */ -static int -dsa_switch_connect_tag_proto(struct dsa_switch *ds, - struct dsa_notifier_tag_proto_info *info) +static int dsa_switch_mrp_add(struct dsa_switch *ds, + struct dsa_notifier_mrp_info *info) { - const struct dsa_device_ops *tag_ops = info->tag_ops; - int err; - - /* Notify the new tagger about the connection to this switch */ - if (tag_ops->connect) { - err = tag_ops->connect(ds); - if (err) - return err; - } - - if (!ds->ops->connect_tag_protocol) + if (!ds->ops->port_mrp_add) return -EOPNOTSUPP; - /* Notify the switch about the connection to the new tagger */ - err = ds->ops->connect_tag_protocol(ds, tag_ops->proto); - if (err) { - /* Revert the new tagger's connection to this tree */ - if (tag_ops->disconnect) - tag_ops->disconnect(ds); - return err; - } + if (ds->index == info->sw_index) + return ds->ops->port_mrp_add(ds, info->port, info->mrp); + + return 0; +} + +static int dsa_switch_mrp_del(struct dsa_switch *ds, + struct dsa_notifier_mrp_info *info) +{ + if (!ds->ops->port_mrp_del) + return -EOPNOTSUPP; + + if (ds->index == info->sw_index) + return ds->ops->port_mrp_del(ds, info->port, info->mrp); return 0; } static int -dsa_switch_disconnect_tag_proto(struct dsa_switch *ds, - struct dsa_notifier_tag_proto_info *info) +dsa_switch_mrp_add_ring_role(struct dsa_switch *ds, + struct dsa_notifier_mrp_ring_role_info *info) { - const struct dsa_device_ops *tag_ops = info->tag_ops; + if (!ds->ops->port_mrp_add_ring_role) + return -EOPNOTSUPP; - /* Notify the tagger about the disconnection from this switch */ - if (tag_ops->disconnect && ds->tagger_data) - tag_ops->disconnect(ds); + if (ds->index == info->sw_index) + return ds->ops->port_mrp_add_ring_role(ds, info->port, + info->mrp); + + return 0; +} + +static int +dsa_switch_mrp_del_ring_role(struct dsa_switch *ds, + struct dsa_notifier_mrp_ring_role_info *info) +{ + if (!ds->ops->port_mrp_del_ring_role) + return -EOPNOTSUPP; + + if (ds->index == info->sw_index) + return ds->ops->port_mrp_del_ring_role(ds, info->port, + info->mrp); - /* No need to notify the switch, since it shouldn't have any - * resources to tear down - */ return 0; } @@ -711,6 +696,12 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_HOST_FDB_DEL: err = dsa_switch_host_fdb_del(ds, info); break; + case DSA_NOTIFIER_HSR_JOIN: + err = dsa_switch_hsr_join(ds, info); + break; + case DSA_NOTIFIER_HSR_LEAVE: + err = dsa_switch_hsr_leave(ds, info); + break; case DSA_NOTIFIER_LAG_CHANGE: err = dsa_switch_lag_change(ds, info); break; @@ -744,11 +735,17 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_TAG_PROTO: err = dsa_switch_change_tag_proto(ds, info); break; - case DSA_NOTIFIER_TAG_PROTO_CONNECT: - err = dsa_switch_connect_tag_proto(ds, info); + case DSA_NOTIFIER_MRP_ADD: + err = dsa_switch_mrp_add(ds, info); break; - case DSA_NOTIFIER_TAG_PROTO_DISCONNECT: - err = dsa_switch_disconnect_tag_proto(ds, info); + case DSA_NOTIFIER_MRP_DEL: + err = dsa_switch_mrp_del(ds, info); + break; + case DSA_NOTIFIER_MRP_ADD_RING_ROLE: + err = dsa_switch_mrp_add_ring_role(ds, info); + break; + case DSA_NOTIFIER_MRP_DEL_RING_ROLE: + err = dsa_switch_mrp_del_ring_role(ds, info); break; case DSA_NOTIFIER_TAG_8021Q_VLAN_ADD: err = dsa_switch_tag_8021q_vlan_add(ds, info); diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 27712a81c9..f8f7b7c34e 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -6,6 +6,7 @@ * dsa_8021q_netdev_ops is registered for API compliance and not used * directly by callers. */ +#include #include #include @@ -67,34 +68,32 @@ #define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \ DSA_8021Q_PORT_MASK) -u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num) +u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num) { - /* The VBID value of 0 is reserved for precise TX, but it is also - * reserved/invalid for the bridge_num, so all is well. - */ - return DSA_8021Q_DIR_TX | DSA_8021Q_VBID(bridge_num); + /* The VBID value of 0 is reserved for precise TX */ + return DSA_8021Q_DIR_TX | DSA_8021Q_VBID(bridge_num + 1); } EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid); /* Returns the VID to be inserted into the frame from xmit for switch steering * instructions on egress. Encodes switch ID and port ID. */ -u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp) +u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port) { - return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(dp->ds->index) | - DSA_8021Q_PORT(dp->index); + return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(ds->index) | + DSA_8021Q_PORT(port); } -EXPORT_SYMBOL_GPL(dsa_tag_8021q_tx_vid); +EXPORT_SYMBOL_GPL(dsa_8021q_tx_vid); /* Returns the VID that will be installed as pvid for this switch port, sent as * tagged egress towards the CPU port and decoded by the rcv function. */ -u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp) +u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) { - return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(dp->ds->index) | - DSA_8021Q_PORT(dp->index); + return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) | + DSA_8021Q_PORT(port); } -EXPORT_SYMBOL_GPL(dsa_tag_8021q_rx_vid); +EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid); /* Returns the decoded switch ID from the RX VID. */ int dsa_8021q_rx_switch_id(u16 vid) @@ -140,13 +139,12 @@ dsa_tag_8021q_vlan_find(struct dsa_8021q_context *ctx, int port, u16 vid) return NULL; } -static int dsa_port_do_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, - u16 flags) +static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port, + u16 vid, u16 flags) { - struct dsa_8021q_context *ctx = dp->ds->tag_8021q_ctx; - struct dsa_switch *ds = dp->ds; + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_tag_8021q_vlan *v; - int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -177,12 +175,12 @@ static int dsa_port_do_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, return 0; } -static int dsa_port_do_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) +static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port, + u16 vid) { - struct dsa_8021q_context *ctx = dp->ds->tag_8021q_ctx; - struct dsa_switch *ds = dp->ds; + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_tag_8021q_vlan *v; - int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -209,16 +207,14 @@ static int dsa_port_do_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) } static bool -dsa_port_tag_8021q_vlan_match(struct dsa_port *dp, - struct dsa_notifier_tag_8021q_vlan_info *info) +dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port, + struct dsa_notifier_tag_8021q_vlan_info *info) { - struct dsa_switch *ds = dp->ds; - - if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) + if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) return true; if (ds->dst->index == info->tree_index && ds->index == info->sw_index) - return dp->index == info->port; + return port == info->port; return false; } @@ -226,8 +222,7 @@ dsa_port_tag_8021q_vlan_match(struct dsa_port *dp, int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, struct dsa_notifier_tag_8021q_vlan_info *info) { - struct dsa_port *dp; - int err; + int port, err; /* Since we use dsa_broadcast(), there might be other switches in other * trees which don't support tag_8021q, so don't return an error. @@ -237,20 +232,21 @@ int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, if (!ds->ops->tag_8021q_vlan_add || !ds->tag_8021q_ctx) return 0; - dsa_switch_for_each_port(dp, ds) { - if (dsa_port_tag_8021q_vlan_match(dp, info)) { + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) { u16 flags = 0; - if (dsa_port_is_user(dp)) + if (dsa_is_user_port(ds, port)) flags |= BRIDGE_VLAN_INFO_UNTAGGED; if (vid_is_dsa_8021q_rxvlan(info->vid) && dsa_8021q_rx_switch_id(info->vid) == ds->index && - dsa_8021q_rx_source_port(info->vid) == dp->index) + dsa_8021q_rx_source_port(info->vid) == port) flags |= BRIDGE_VLAN_INFO_PVID; - err = dsa_port_do_tag_8021q_vlan_add(dp, info->vid, - flags); + err = dsa_switch_do_tag_8021q_vlan_add(ds, port, + info->vid, + flags); if (err) return err; } @@ -262,15 +258,15 @@ int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, struct dsa_notifier_tag_8021q_vlan_info *info) { - struct dsa_port *dp; - int err; + int port, err; if (!ds->ops->tag_8021q_vlan_del || !ds->tag_8021q_ctx) return 0; - dsa_switch_for_each_port(dp, ds) { - if (dsa_port_tag_8021q_vlan_match(dp, info)) { - err = dsa_port_do_tag_8021q_vlan_del(dp, info->vid); + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) { + err = dsa_switch_do_tag_8021q_vlan_del(ds, port, + info->vid); if (err) return err; } @@ -326,18 +322,19 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+ * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3 */ -static bool -dsa_port_tag_8021q_bridge_match(struct dsa_port *dp, - struct dsa_notifier_bridge_info *info) +static bool dsa_tag_8021q_bridge_match(struct dsa_switch *ds, int port, + struct dsa_notifier_bridge_info *info) { + struct dsa_port *dp = dsa_to_port(ds, port); + /* Don't match on self */ - if (dp->ds->dst->index == info->tree_index && - dp->ds->index == info->sw_index && - dp->index == info->port) + if (ds->dst->index == info->tree_index && + ds->index == info->sw_index && + port == info->port) return false; if (dsa_port_is_user(dp)) - return dsa_port_offloads_bridge(dp, &info->bridge); + return dp->bridge_dev == info->br; return false; } @@ -347,21 +344,21 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, { struct dsa_switch *targeted_ds; struct dsa_port *targeted_dp; - struct dsa_port *dp; u16 targeted_rx_vid; - int err; + int err, port; if (!ds->tag_8021q_ctx) return 0; targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); targeted_dp = dsa_to_port(targeted_ds, info->port); - targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp); + targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); - dsa_switch_for_each_port(dp, ds) { - u16 rx_vid = dsa_tag_8021q_rx_vid(dp); + for (port = 0; port < ds->num_ports; port++) { + struct dsa_port *dp = dsa_to_port(ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); - if (!dsa_port_tag_8021q_bridge_match(dp, info)) + if (!dsa_tag_8021q_bridge_match(ds, port, info)) continue; /* Install the RX VID of the targeted port in our VLAN table */ @@ -383,20 +380,21 @@ int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, { struct dsa_switch *targeted_ds; struct dsa_port *targeted_dp; - struct dsa_port *dp; u16 targeted_rx_vid; + int port; if (!ds->tag_8021q_ctx) return 0; targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); targeted_dp = dsa_to_port(targeted_ds, info->port); - targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp); + targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); - dsa_switch_for_each_port(dp, ds) { - u16 rx_vid = dsa_tag_8021q_rx_vid(dp); + for (port = 0; port < ds->num_ports; port++) { + struct dsa_port *dp = dsa_to_port(ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); - if (!dsa_port_tag_8021q_bridge_match(dp, info)) + if (!dsa_tag_8021q_bridge_match(ds, port, info)) continue; /* Remove the RX VID of the targeted port from our VLAN table */ @@ -410,9 +408,10 @@ int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, } int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, - struct dsa_bridge bridge) + struct net_device *br, + int bridge_num) { - u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge.num); + u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num); return dsa_port_tag_8021q_vlan_add(dsa_to_port(ds, port), tx_vid, true); @@ -420,9 +419,10 @@ int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_offload); void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, - struct dsa_bridge bridge) + struct net_device *br, + int bridge_num) { - u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge.num); + u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num); dsa_port_tag_8021q_vlan_del(dsa_to_port(ds, port), tx_vid, true); } @@ -433,8 +433,8 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) { struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_tag_8021q_rx_vid(dp); - u16 tx_vid = dsa_tag_8021q_tx_vid(dp); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + u16 tx_vid = dsa_8021q_tx_vid(ds, port); struct net_device *master; int err; @@ -478,8 +478,8 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) { struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_tag_8021q_rx_vid(dp); - u16 tx_vid = dsa_tag_8021q_tx_vid(dp); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + u16 tx_vid = dsa_8021q_tx_vid(ds, port); struct net_device *master; /* The CPU port is implicitly configured by diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 8abf39dcac..b3da4b2ea1 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -132,7 +132,6 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, u8 *dsa_header; if (skb->offload_fwd_mark) { - unsigned int bridge_num = dsa_port_bridge_num_get(dp); struct dsa_switch_tree *dst = dp->ds->dst; cmd = DSA_CMD_FORWARD; @@ -141,7 +140,7 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, * packets on behalf of a virtual switch device with an index * past the physical switches. */ - tag_dev = dst->last_switch + bridge_num; + tag_dev = dst->last_switch + 1 + dp->bridge_num; tag_port = 0; } else { cmd = DSA_CMD_FROM_CPU; @@ -166,7 +165,7 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, dsa_header[2] &= ~0x10; } } else { - struct net_device *br = dsa_port_bridge_dev_get(dp); + struct net_device *br = dp->bridge_dev; u16 vid; vid = br ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE; diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 3509fc967c..fa1d60d13a 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -6,6 +6,7 @@ #include #include +#include #include #include "dsa_priv.h" diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 0d81f172b7..6e0518aa3a 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -4,55 +4,14 @@ #include #include "dsa_priv.h" -/* If the port is under a VLAN-aware bridge, remove the VLAN header from the - * payload and move it into the DSA tag, which will make the switch classify - * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, - * which is the pvid of standalone and VLAN-unaware bridge ports. - */ -static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp, - u64 *vlan_tci, u64 *tag_type) -{ - struct net_device *br = dsa_port_bridge_dev_get(dp); - struct vlan_ethhdr *hdr; - u16 proto, tci; - - if (!br || !br_vlan_enabled(br)) { - *vlan_tci = 0; - *tag_type = IFH_TAG_TYPE_C; - return; - } - - hdr = (struct vlan_ethhdr *)skb_mac_header(skb); - br_vlan_get_proto(br, &proto); - - if (ntohs(hdr->h_vlan_proto) == proto) { - __skb_vlan_pop(skb, &tci); - *vlan_tci = tci; - } else { - rcu_read_lock(); - br_vlan_get_pvid_rcu(br, &tci); - rcu_read_unlock(); - *vlan_tci = tci; - } - - *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; -} - static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, __be32 ifh_prefix, void **ifh) { struct dsa_port *dp = dsa_slave_to_port(netdev); struct dsa_switch *ds = dp->ds; - u64 vlan_tci, tag_type; void *injection; __be32 *prefix; u32 rew_op = 0; - u64 qos_class; - - ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type); - - qos_class = netdev_get_num_tc(netdev) ? - netdev_get_prio_tc_map(netdev, skb->priority) : skb->priority; injection = skb_push(skb, OCELOT_TAG_LEN); prefix = skb_push(skb, OCELOT_SHORT_PREFIX_LEN); @@ -61,9 +20,7 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, memset(injection, 0, OCELOT_TAG_LEN); ocelot_ifh_set_bypass(injection, 1); ocelot_ifh_set_src(injection, ds->num_ports); - ocelot_ifh_set_qos_class(injection, qos_class); - ocelot_ifh_set_vlan_tci(injection, vlan_tci); - ocelot_ifh_set_tag_type(injection, tag_type); + ocelot_ifh_set_qos_class(injection, skb->priority); rew_op = ocelot_ptp_rew_op(skb); if (rew_op) diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 68982b2789..3412051981 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -12,39 +12,25 @@ #include #include "dsa_priv.h" -struct ocelot_8021q_tagger_private { - struct ocelot_8021q_tagger_data data; /* Must be first */ - struct kthread_worker *xmit_worker; -}; - static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp, struct sk_buff *skb) { - struct ocelot_8021q_tagger_private *priv = dp->ds->tagger_data; - struct ocelot_8021q_tagger_data *data = &priv->data; - void (*xmit_work_fn)(struct kthread_work *work); struct felix_deferred_xmit_work *xmit_work; - struct kthread_worker *xmit_worker; - - xmit_work_fn = data->xmit_work_fn; - xmit_worker = priv->xmit_worker; - - if (!xmit_work_fn || !xmit_worker) - return NULL; + struct felix_port *felix_port = dp->priv; xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC); if (!xmit_work) return NULL; /* Calls felix_port_deferred_xmit in felix.c */ - kthread_init_work(&xmit_work->work, xmit_work_fn); + kthread_init_work(&xmit_work->work, felix_port->xmit_work_fn); /* Increase refcount so the kfree_skb in dsa_slave_xmit * won't really free the packet. */ xmit_work->dp = dp; xmit_work->skb = skb_get(skb); - kthread_queue_work(xmit_worker, &xmit_work->work); + kthread_queue_work(felix_port->xmit_worker, &xmit_work->work); return NULL; } @@ -53,9 +39,9 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct net_device *netdev) { struct dsa_port *dp = dsa_slave_to_port(netdev); + u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); - u16 tx_vid = dsa_tag_8021q_tx_vid(dp); struct ethhdr *hdr = eth_hdr(skb); if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest)) @@ -81,43 +67,11 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, return skb; } -static void ocelot_disconnect(struct dsa_switch *ds) -{ - struct ocelot_8021q_tagger_private *priv = ds->tagger_data; - - kthread_destroy_worker(priv->xmit_worker); - kfree(priv); - ds->tagger_data = NULL; -} - -static int ocelot_connect(struct dsa_switch *ds) -{ - struct ocelot_8021q_tagger_private *priv; - int err; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->xmit_worker = kthread_create_worker(0, "felix_xmit"); - if (IS_ERR(priv->xmit_worker)) { - err = PTR_ERR(priv->xmit_worker); - kfree(priv); - return err; - } - - ds->tagger_data = priv; - - return 0; -} - static const struct dsa_device_ops ocelot_8021q_netdev_ops = { .name = "ocelot-8021q", .proto = DSA_TAG_PROTO_OCELOT_8021Q, .xmit = ocelot_xmit, .rcv = ocelot_rcv, - .connect = ocelot_connect, - .disconnect = ocelot_disconnect, .needed_headroom = VLAN_HLEN, .promisc_on_master = true, }; diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index 6d928ee3ef..f920487ae1 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -54,7 +54,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, p = (__be16 *)tag; *p = htons(RTL4_A_ETHERTYPE); - out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT); + out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT) | (2 << 8); /* The lower bits indicate the port number */ out |= BIT(dp->index); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 72d5e0ef8d..2edede9dda 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "dsa_priv.h" @@ -53,25 +54,11 @@ #define SJA1110_TX_TRAILER_LEN 4 #define SJA1110_MAX_PADDING_LEN 15 -#define SJA1105_HWTS_RX_EN 0 - -struct sja1105_tagger_private { - struct sja1105_tagger_data data; /* Must be first */ - unsigned long state; - /* Protects concurrent access to the meta state machine - * from taggers running on multiple ports on SMP systems - */ - spinlock_t meta_lock; - struct sk_buff *stampable_skb; - struct kthread_worker *xmit_worker; +enum sja1110_meta_tstamp { + SJA1110_META_TSTAMP_TX = 0, + SJA1110_META_TSTAMP_RX = 1, }; -static struct sja1105_tagger_private * -sja1105_tagger_private(struct dsa_switch *ds) -{ - return ds->tagger_data; -} - /* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */ static inline bool sja1105_is_link_local(const struct sk_buff *skb) { @@ -138,30 +125,16 @@ static inline bool sja1105_is_meta_frame(const struct sk_buff *skb) static struct sk_buff *sja1105_defer_xmit(struct dsa_port *dp, struct sk_buff *skb) { - struct sja1105_tagger_data *tagger_data = sja1105_tagger_data(dp->ds); - struct sja1105_tagger_private *priv = sja1105_tagger_private(dp->ds); - void (*xmit_work_fn)(struct kthread_work *work); - struct sja1105_deferred_xmit_work *xmit_work; - struct kthread_worker *xmit_worker; + struct sja1105_port *sp = dp->priv; - xmit_work_fn = tagger_data->xmit_work_fn; - xmit_worker = priv->xmit_worker; + if (!dsa_port_is_sja1105(dp)) + return skb; - if (!xmit_work_fn || !xmit_worker) - return NULL; - - xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC); - if (!xmit_work) - return NULL; - - kthread_init_work(&xmit_work->work, xmit_work_fn); /* Increase refcount so the kfree_skb in dsa_slave_xmit * won't really free the packet. */ - xmit_work->dp = dp; - xmit_work->skb = skb_get(skb); - - kthread_queue_work(xmit_worker, &xmit_work->work); + skb_queue_tail(&sp->xmit_queue, skb_get(skb)); + kthread_queue_work(sp->xmit_worker, &sp->xmit_work); return NULL; } @@ -185,17 +158,18 @@ static u16 sja1105_xmit_tpid(struct dsa_port *dp) * we're sure about that). It may not be on this port though, so we * need to find it. */ - dsa_switch_for_each_port(other_dp, ds) { - struct net_device *br = dsa_port_bridge_dev_get(other_dp); + list_for_each_entry(other_dp, &ds->dst->ports, list) { + if (other_dp->ds != ds) + continue; - if (!br) + if (!other_dp->bridge_dev) continue; /* Error is returned only if CONFIG_BRIDGE_VLAN_FILTERING, * which seems pointless to handle, as our port cannot become * VLAN-aware in that case. */ - br_vlan_get_proto(br, &proto); + br_vlan_get_proto(other_dp->bridge_dev, &proto); return proto; } @@ -209,8 +183,7 @@ static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, struct net_device *netdev) { struct dsa_port *dp = dsa_slave_to_port(netdev); - unsigned int bridge_num = dsa_port_bridge_num_get(dp); - struct net_device *br = dsa_port_bridge_dev_get(dp); + struct net_device *br = dp->bridge_dev; u16 tx_vid; /* If the port is under a VLAN-aware bridge, just slide the @@ -226,7 +199,7 @@ static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, * TX VLAN that targets the bridge's entire broadcast domain, * instead of just the specific port. */ - tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num); + tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(dp->bridge_num); return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), tx_vid); } @@ -265,9 +238,9 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct net_device *netdev) { struct dsa_port *dp = dsa_slave_to_port(netdev); + u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); - u16 tx_vid = dsa_tag_8021q_tx_vid(dp); if (skb->offload_fwd_mark) return sja1105_imprecise_xmit(skb, netdev); @@ -293,9 +266,9 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb, { struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone; struct dsa_port *dp = dsa_slave_to_port(netdev); + u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); - u16 tx_vid = dsa_tag_8021q_tx_vid(dp); __be32 *tx_trailer; __be16 *tx_header; int trailer_pos; @@ -382,32 +355,32 @@ static struct sk_buff */ if (is_link_local) { struct dsa_port *dp = dsa_slave_to_port(skb->dev); - struct sja1105_tagger_private *priv; - struct dsa_switch *ds = dp->ds; + struct sja1105_port *sp = dp->priv; - priv = sja1105_tagger_private(ds); + if (unlikely(!dsa_port_is_sja1105(dp))) + return skb; - if (!test_bit(SJA1105_HWTS_RX_EN, &priv->state)) + if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state)) /* Do normal processing. */ return skb; - spin_lock(&priv->meta_lock); + spin_lock(&sp->data->meta_lock); /* Was this a link-local frame instead of the meta * that we were expecting? */ - if (priv->stampable_skb) { - dev_err_ratelimited(ds->dev, + if (sp->data->stampable_skb) { + dev_err_ratelimited(dp->ds->dev, "Expected meta frame, is %12llx " "in the DSA master multicast filter?\n", SJA1105_META_DMAC); - kfree_skb(priv->stampable_skb); + kfree_skb(sp->data->stampable_skb); } /* Hold a reference to avoid dsa_switch_rcv * from freeing the skb. */ - priv->stampable_skb = skb_get(skb); - spin_unlock(&priv->meta_lock); + sp->data->stampable_skb = skb_get(skb); + spin_unlock(&sp->data->meta_lock); /* Tell DSA we got nothing */ return NULL; @@ -420,37 +393,37 @@ static struct sk_buff */ } else if (is_meta) { struct dsa_port *dp = dsa_slave_to_port(skb->dev); - struct sja1105_tagger_private *priv; - struct dsa_switch *ds = dp->ds; + struct sja1105_port *sp = dp->priv; struct sk_buff *stampable_skb; - priv = sja1105_tagger_private(ds); + if (unlikely(!dsa_port_is_sja1105(dp))) + return skb; /* Drop the meta frame if we're not in the right state * to process it. */ - if (!test_bit(SJA1105_HWTS_RX_EN, &priv->state)) + if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state)) return NULL; - spin_lock(&priv->meta_lock); + spin_lock(&sp->data->meta_lock); - stampable_skb = priv->stampable_skb; - priv->stampable_skb = NULL; + stampable_skb = sp->data->stampable_skb; + sp->data->stampable_skb = NULL; /* Was this a meta frame instead of the link-local * that we were expecting? */ if (!stampable_skb) { - dev_err_ratelimited(ds->dev, + dev_err_ratelimited(dp->ds->dev, "Unexpected meta frame\n"); - spin_unlock(&priv->meta_lock); + spin_unlock(&sp->data->meta_lock); return NULL; } if (stampable_skb->dev != skb->dev) { - dev_err_ratelimited(ds->dev, + dev_err_ratelimited(dp->ds->dev, "Meta frame on wrong port\n"); - spin_unlock(&priv->meta_lock); + spin_unlock(&sp->data->meta_lock); return NULL; } @@ -461,36 +434,12 @@ static struct sk_buff skb = stampable_skb; sja1105_transfer_meta(skb, meta); - spin_unlock(&priv->meta_lock); + spin_unlock(&sp->data->meta_lock); } return skb; } -static bool sja1105_rxtstamp_get_state(struct dsa_switch *ds) -{ - struct sja1105_tagger_private *priv = sja1105_tagger_private(ds); - - return test_bit(SJA1105_HWTS_RX_EN, &priv->state); -} - -static void sja1105_rxtstamp_set_state(struct dsa_switch *ds, bool on) -{ - struct sja1105_tagger_private *priv = sja1105_tagger_private(ds); - - if (on) - set_bit(SJA1105_HWTS_RX_EN, &priv->state); - else - clear_bit(SJA1105_HWTS_RX_EN, &priv->state); - - /* Initialize the meta state machine to a known state */ - if (!priv->stampable_skb) - return; - - kfree_skb(priv->stampable_skb); - priv->stampable_skb = NULL; -} - static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb) { u16 tpid = ntohs(eth_hdr(skb)->h_proto); @@ -577,12 +526,48 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, is_meta); } +static void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, + u8 ts_id, enum sja1110_meta_tstamp dir, + u64 tstamp) +{ + struct sk_buff *skb, *skb_tmp, *skb_match = NULL; + struct dsa_port *dp = dsa_to_port(ds, port); + struct skb_shared_hwtstamps shwt = {0}; + struct sja1105_port *sp = dp->priv; + + if (!dsa_port_is_sja1105(dp)) + return; + + /* We don't care about RX timestamps on the CPU port */ + if (dir == SJA1110_META_TSTAMP_RX) + return; + + spin_lock(&sp->data->skb_txtstamp_queue.lock); + + skb_queue_walk_safe(&sp->data->skb_txtstamp_queue, skb, skb_tmp) { + if (SJA1105_SKB_CB(skb)->ts_id != ts_id) + continue; + + __skb_unlink(skb, &sp->data->skb_txtstamp_queue); + skb_match = skb; + + break; + } + + spin_unlock(&sp->data->skb_txtstamp_queue.lock); + + if (WARN_ON(!skb_match)) + return; + + shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp)); + skb_complete_tx_timestamp(skb_match, &shwt); +} + static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header) { u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN; int switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header); int n_ts = SJA1110_RX_HEADER_N_TS(rx_header); - struct sja1105_tagger_data *tagger_data; struct net_device *master = skb->dev; struct dsa_port *cpu_dp; struct dsa_switch *ds; @@ -596,10 +581,6 @@ static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header) return NULL; } - tagger_data = sja1105_tagger_data(ds); - if (!tagger_data->meta_tstamp_handler) - return NULL; - for (i = 0; i <= n_ts; i++) { u8 ts_id, source_port, dir; u64 tstamp; @@ -609,8 +590,8 @@ static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header) dir = (buf[1] & BIT(3)) >> 3; tstamp = be64_to_cpu(*(__be64 *)(buf + 2)); - tagger_data->meta_tstamp_handler(ds, source_port, ts_id, dir, - tstamp); + sja1110_process_meta_tstamp(ds, source_port, ts_id, dir, + tstamp); buf += SJA1110_META_TSTAMP_SIZE; } @@ -741,53 +722,11 @@ static void sja1110_flow_dissect(const struct sk_buff *skb, __be16 *proto, *proto = ((__be16 *)skb->data)[(VLAN_HLEN / 2) - 1]; } -static void sja1105_disconnect(struct dsa_switch *ds) -{ - struct sja1105_tagger_private *priv = ds->tagger_data; - - kthread_destroy_worker(priv->xmit_worker); - kfree(priv); - ds->tagger_data = NULL; -} - -static int sja1105_connect(struct dsa_switch *ds) -{ - struct sja1105_tagger_data *tagger_data; - struct sja1105_tagger_private *priv; - struct kthread_worker *xmit_worker; - int err; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - spin_lock_init(&priv->meta_lock); - - xmit_worker = kthread_create_worker(0, "dsa%d:%d_xmit", - ds->dst->index, ds->index); - if (IS_ERR(xmit_worker)) { - err = PTR_ERR(xmit_worker); - kfree(priv); - return err; - } - - priv->xmit_worker = xmit_worker; - /* Export functions for switch driver use */ - tagger_data = &priv->data; - tagger_data->rxtstamp_get_state = sja1105_rxtstamp_get_state; - tagger_data->rxtstamp_set_state = sja1105_rxtstamp_set_state; - ds->tagger_data = priv; - - return 0; -} - static const struct dsa_device_ops sja1105_netdev_ops = { .name = "sja1105", .proto = DSA_TAG_PROTO_SJA1105, .xmit = sja1105_xmit, .rcv = sja1105_rcv, - .connect = sja1105_connect, - .disconnect = sja1105_disconnect, .needed_headroom = VLAN_HLEN, .flow_dissect = sja1105_flow_dissect, .promisc_on_master = true, @@ -801,8 +740,6 @@ static const struct dsa_device_ops sja1110_netdev_ops = { .proto = DSA_TAG_PROTO_SJA1110, .xmit = sja1110_xmit, .rcv = sja1110_rcv, - .connect = sja1105_connect, - .disconnect = sja1105_disconnect, .flow_dissect = sja1110_flow_dissect, .needed_headroom = SJA1110_HEADER_LEN + VLAN_HLEN, .needed_tailroom = SJA1110_RX_TRAILER_LEN + SJA1110_MAX_PADDING_LEN, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index ebcc812735..73fce94674 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include @@ -305,7 +304,7 @@ void eth_commit_mac_addr_change(struct net_device *dev, void *p) { struct sockaddr *addr = p; - eth_hw_addr_set(dev, addr->sa_data); + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); } EXPORT_SYMBOL(eth_commit_mac_addr_change); @@ -436,10 +435,11 @@ struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb) type = eh->h_proto; + rcu_read_lock(); ptype = gro_find_receive_by_type(type); if (ptype == NULL) { flush = 1; - goto out; + goto out_unlock; } skb_gro_pull(skb, sizeof(*eh)); @@ -449,6 +449,8 @@ struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb) ipv6_gro_receive, inet_gro_receive, head, skb); +out_unlock: + rcu_read_unlock(); out: skb_gro_flush_final(skb, pp, flush); @@ -466,12 +468,14 @@ int eth_gro_complete(struct sk_buff *skb, int nhoff) if (skb->encapsulation) skb_set_inner_mac_header(skb, nhoff); + rcu_read_lock(); ptype = gro_find_complete_by_type(type); if (ptype != NULL) err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, ipv6_gro_complete, inet_gro_complete, skb, nhoff + sizeof(*eh)); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(eth_gro_complete); @@ -518,26 +522,6 @@ int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) } EXPORT_SYMBOL(eth_platform_get_mac_address); -/** - * platform_get_ethdev_address - Set netdev's MAC address from a given device - * @dev: Pointer to the device - * @netdev: Pointer to netdev to write the address to - * - * Wrapper around eth_platform_get_mac_address() which writes the address - * directly to netdev->dev_addr. - */ -int platform_get_ethdev_address(struct device *dev, struct net_device *netdev) -{ - u8 addr[ETH_ALEN] __aligned(2); - int ret; - - ret = eth_platform_get_mac_address(dev, addr); - if (!ret) - eth_hw_addr_set(netdev, addr); - return ret; -} -EXPORT_SYMBOL(platform_get_ethdev_address); - /** * nvmem_get_mac_address - Obtain the MAC address from an nvmem cell named * 'mac-address' associated with given device. @@ -573,81 +557,4 @@ int nvmem_get_mac_address(struct device *dev, void *addrbuf) return 0; } - -static int fwnode_get_mac_addr(struct fwnode_handle *fwnode, - const char *name, char *addr) -{ - int ret; - - ret = fwnode_property_read_u8_array(fwnode, name, addr, ETH_ALEN); - if (ret) - return ret; - - if (!is_valid_ether_addr(addr)) - return -EINVAL; - return 0; -} - -/** - * fwnode_get_mac_address - Get the MAC from the firmware node - * @fwnode: Pointer to the firmware node - * @addr: Address of buffer to store the MAC in - * - * Search the firmware node for the best MAC address to use. 'mac-address' is - * checked first, because that is supposed to contain to "most recent" MAC - * address. If that isn't set, then 'local-mac-address' is checked next, - * because that is the default address. If that isn't set, then the obsolete - * 'address' is checked, just in case we're using an old device tree. - * - * Note that the 'address' property is supposed to contain a virtual address of - * the register set, but some DTS files have redefined that property to be the - * MAC address. - * - * All-zero MAC addresses are rejected, because those could be properties that - * exist in the firmware tables, but were not updated by the firmware. For - * example, the DTS could define 'mac-address' and 'local-mac-address', with - * zero MAC addresses. Some older U-Boots only initialized 'local-mac-address'. - * In this case, the real MAC is in 'local-mac-address', and 'mac-address' - * exists but is all zeros. - */ -int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr) -{ - if (!fwnode_get_mac_addr(fwnode, "mac-address", addr) || - !fwnode_get_mac_addr(fwnode, "local-mac-address", addr) || - !fwnode_get_mac_addr(fwnode, "address", addr)) - return 0; - - return -ENOENT; -} -EXPORT_SYMBOL(fwnode_get_mac_address); - -/** - * device_get_mac_address - Get the MAC for a given device - * @dev: Pointer to the device - * @addr: Address of buffer to store the MAC in - */ -int device_get_mac_address(struct device *dev, char *addr) -{ - return fwnode_get_mac_address(dev_fwnode(dev), addr); -} -EXPORT_SYMBOL(device_get_mac_address); - -/** - * device_get_ethdev_address - Set netdev's MAC address from a given device - * @dev: Pointer to the device - * @netdev: Pointer to netdev to write the address to - * - * Wrapper around device_get_mac_address() which writes the address - * directly to netdev->dev_addr. - */ -int device_get_ethdev_address(struct device *dev, struct net_device *netdev) -{ - u8 addr[ETH_ALEN]; - int ret; - - ret = device_get_mac_address(dev, addr); - if (!ret) - eth_hw_addr_set(netdev, addr); - return ret; -} -EXPORT_SYMBOL(device_get_ethdev_address); +EXPORT_SYMBOL(nvmem_get_mac_address); diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index b76432e70e..0a19470efb 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ - tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o + tunnels.o fec.o eeprom.o stats.o phc_vclocks.o diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index 920aac02fe..63560bbb7d 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -96,7 +96,7 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev_put: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } @@ -353,7 +353,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev_put: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 4031588620..6a070dc8e4 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -219,6 +219,6 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 487bdf3455..46776ea42a 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -336,6 +336,6 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 0c52100159..c63e0739dc 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -89,7 +89,6 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = { [ETHTOOL_RX_COPYBREAK] = "rx-copybreak", [ETHTOOL_TX_COPYBREAK] = "tx-copybreak", [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout", - [ETHTOOL_TX_COPYBREAK_BUF_SIZE] = "tx-copybreak-buf-size", }; const char diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c index d73888c7d1..f99912d795 100644 --- a/net/ethtool/debug.c +++ b/net/ethtool/debug.c @@ -123,6 +123,6 @@ int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c index 45c42b2d5f..e10bfcc078 100644 --- a/net/ethtool/eee.c +++ b/net/ethtool/eee.c @@ -185,6 +185,6 @@ int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/features.c b/net/ethtool/features.c index 55d449a2d3..1c9f4df273 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -136,6 +136,7 @@ static void ethnl_features_to_bitmap(unsigned long *dest, netdev_features_t val) const unsigned int words = BITS_TO_LONGS(NETDEV_FEATURE_COUNT); unsigned int i; + bitmap_zero(dest, NETDEV_FEATURE_COUNT); for (i = 0; i < words; i++) dest[i] = (unsigned long)(val >> (i * BITS_PER_LONG)); } @@ -283,6 +284,6 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c index 9f5a134e2e..8738dafd54 100644 --- a/net/ethtool/fec.c +++ b/net/ethtool/fec.c @@ -305,6 +305,6 @@ int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 326e14ee05..e4983f473a 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -8,7 +8,6 @@ */ #include -#include #include #include #include @@ -33,29 +32,6 @@ #include #include "common.h" -/* State held across locks and calls for commands which have devlink fallback */ -struct ethtool_devlink_compat { - struct devlink *devlink; - union { - struct ethtool_flash efl; - struct ethtool_drvinfo info; - }; -}; - -static struct devlink *netdev_to_devlink_get(struct net_device *dev) -{ - struct devlink_port *devlink_port; - - if (!dev->netdev_ops->ndo_get_devlink_port) - return NULL; - - devlink_port = dev->netdev_ops->ndo_get_devlink_port(dev); - if (!devlink_port) - return NULL; - - return devlink_try_get(devlink_port->devlink); -} - /* * Some useful ethtool_ops methods that're device independent. * If we find that all drivers want to do the same thing here, @@ -113,8 +89,7 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr) if (copy_to_user(useraddr, &cmd, sizeof(cmd))) return -EFAULT; useraddr += sizeof(cmd); - if (copy_to_user(useraddr, features, - array_size(copy_size, sizeof(*features)))) + if (copy_to_user(useraddr, features, copy_size * sizeof(*features))) return -EFAULT; return 0; @@ -360,7 +335,7 @@ EXPORT_SYMBOL(ethtool_intersect_link_masks); void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32) { - linkmode_zero(dst); + bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); dst[0] = legacy_u32; } EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode); @@ -375,10 +350,11 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) { __ETHTOOL_DECLARE_LINK_MODE_MASK(ext); - linkmode_zero(ext); + bitmap_zero(ext, __ETHTOOL_LINK_MODE_MASK_NBITS); bitmap_fill(ext, 32); bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS); - if (linkmode_intersects(ext, src)) { + if (bitmap_intersects(ext, src, + __ETHTOOL_LINK_MODE_MASK_NBITS)) { /* src mask goes beyond bit 31 */ retval = false; } @@ -721,23 +697,22 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) return ret; } -static int -ethtool_get_drvinfo(struct net_device *dev, struct ethtool_devlink_compat *rsp) +static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, + void __user *useraddr) { + struct ethtool_drvinfo info; const struct ethtool_ops *ops = dev->ethtool_ops; - rsp->info.cmd = ETHTOOL_GDRVINFO; - strlcpy(rsp->info.version, UTS_RELEASE, sizeof(rsp->info.version)); + memset(&info, 0, sizeof(info)); + info.cmd = ETHTOOL_GDRVINFO; + strlcpy(info.version, UTS_RELEASE, sizeof(info.version)); if (ops->get_drvinfo) { - ops->get_drvinfo(dev, &rsp->info); + ops->get_drvinfo(dev, &info); } else if (dev->dev.parent && dev->dev.parent->driver) { - strlcpy(rsp->info.bus_info, dev_name(dev->dev.parent), - sizeof(rsp->info.bus_info)); - strlcpy(rsp->info.driver, dev->dev.parent->driver->name, - sizeof(rsp->info.driver)); - } else if (dev->rtnl_link_ops) { - strlcpy(rsp->info.driver, dev->rtnl_link_ops->kind, - sizeof(rsp->info.driver)); + strlcpy(info.bus_info, dev_name(dev->dev.parent), + sizeof(info.bus_info)); + strlcpy(info.driver, dev->dev.parent->driver->name, + sizeof(info.driver)); } else { return -EOPNOTSUPP; } @@ -751,27 +726,30 @@ ethtool_get_drvinfo(struct net_device *dev, struct ethtool_devlink_compat *rsp) rc = ops->get_sset_count(dev, ETH_SS_TEST); if (rc >= 0) - rsp->info.testinfo_len = rc; + info.testinfo_len = rc; rc = ops->get_sset_count(dev, ETH_SS_STATS); if (rc >= 0) - rsp->info.n_stats = rc; + info.n_stats = rc; rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); if (rc >= 0) - rsp->info.n_priv_flags = rc; + info.n_priv_flags = rc; } if (ops->get_regs_len) { int ret = ops->get_regs_len(dev); if (ret > 0) - rsp->info.regdump_len = ret; + info.regdump_len = ret; } if (ops->get_eeprom_len) - rsp->info.eedump_len = ops->get_eeprom_len(dev); + info.eedump_len = ops->get_eeprom_len(dev); - if (!rsp->info.fw_version[0]) - rsp->devlink = netdev_to_devlink_get(dev); + if (!info.fw_version[0]) + devlink_compat_running_version(dev, info.fw_version, + sizeof(info.fw_version)); + if (copy_to_user(useraddr, &info, sizeof(info))) + return -EFAULT; return 0; } @@ -821,7 +799,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, goto out; useraddr += offsetof(struct ethtool_sset_info, data); - if (copy_to_user(useraddr, info_buf, array_size(idx, sizeof(u32)))) + if (copy_to_user(useraddr, info_buf, idx * sizeof(u32))) goto out; ret = 0; @@ -1044,7 +1022,7 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, { int i; - if (copy_from_user(indir, useraddr, array_size(size, sizeof(indir[0])))) + if (copy_from_user(indir, useraddr, size * sizeof(indir[0]))) return -EFAULT; /* Validate ring indices */ @@ -1559,10 +1537,6 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr, ret = getter(dev, &eeprom, data); if (ret) break; - if (!eeprom.len) { - ret = -EIO; - break; - } if (copy_to_user(userbuf, data, eeprom.len)) { ret = -EFAULT; break; @@ -1747,13 +1721,11 @@ static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) { struct ethtool_ringparam ringparam = { .cmd = ETHTOOL_GRINGPARAM }; - struct kernel_ethtool_ringparam kernel_ringparam = {}; if (!dev->ethtool_ops->get_ringparam) return -EOPNOTSUPP; - dev->ethtool_ops->get_ringparam(dev, &ringparam, - &kernel_ringparam, NULL); + dev->ethtool_ops->get_ringparam(dev, &ringparam); if (copy_to_user(useraddr, &ringparam, sizeof(ringparam))) return -EFAULT; @@ -1763,7 +1735,6 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) { struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM }; - struct kernel_ethtool_ringparam kernel_ringparam; int ret; if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam) @@ -1772,7 +1743,7 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) return -EFAULT; - dev->ethtool_ops->get_ringparam(dev, &max, &kernel_ringparam, NULL); + dev->ethtool_ops->get_ringparam(dev, &max); /* ensure new ring parameters are within the maximums */ if (ringparam.rx_pending > max.rx_max_pending || @@ -1781,8 +1752,7 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) ringparam.tx_pending > max.tx_max_pending) return -EINVAL; - ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, - &kernel_ringparam, NULL); + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam); if (!ret) ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL); return ret; @@ -1921,7 +1891,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) if (copy_to_user(useraddr, &test, sizeof(test))) goto out; useraddr += sizeof(test); - if (copy_to_user(useraddr, data, array_size(test.len, sizeof(u64)))) + if (copy_to_user(useraddr, data, test.len * sizeof(u64))) goto out; ret = 0; @@ -1963,8 +1933,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) goto out; useraddr += sizeof(gstrings); if (gstrings.len && - copy_to_user(useraddr, data, - array_size(gstrings.len, ETH_GSTRING_LEN))) + copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) goto out; ret = 0; @@ -1990,7 +1959,6 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) struct ethtool_value id; static bool busy; const struct ethtool_ops *ops = dev->ethtool_ops; - netdevice_tracker dev_tracker; int rc; if (!ops->set_phys_id) @@ -2010,7 +1978,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) * removal of the device. */ busy = true; - dev_hold_track(dev, &dev_tracker, GFP_KERNEL); + dev_hold(dev); rtnl_unlock(); if (rc == 0) { @@ -2034,7 +2002,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) } rtnl_lock(); - dev_put_track(dev, &dev_tracker); + dev_put(dev); busy = false; (void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); @@ -2096,9 +2064,9 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count)) return -EOPNOTSUPP; - if (phydev && !ops->get_ethtool_phy_stats && + if (dev->phydev && !ops->get_ethtool_phy_stats && phy_ops && phy_ops->get_sset_count) - n_stats = phy_ops->get_sset_count(phydev); + n_stats = phy_ops->get_sset_count(dev->phydev); else n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); if (n_stats < 0) @@ -2117,9 +2085,9 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) if (!data) return -ENOMEM; - if (phydev && !ops->get_ethtool_phy_stats && + if (dev->phydev && !ops->get_ethtool_phy_stats && phy_ops && phy_ops->get_stats) { - ret = phy_ops->get_stats(phydev, &stats, data); + ret = phy_ops->get_stats(dev->phydev, &stats, data); if (ret < 0) goto out; } else { @@ -2205,15 +2173,19 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr, return actor(dev, edata.data); } -static int -ethtool_flash_device(struct net_device *dev, struct ethtool_devlink_compat *req) +static noinline_for_stack int ethtool_flash_device(struct net_device *dev, + char __user *useraddr) { - if (!dev->ethtool_ops->flash_device) { - req->devlink = netdev_to_devlink_get(dev); - return 0; - } + struct ethtool_flash efl; - return dev->ethtool_ops->flash_device(dev, &req->efl); + if (copy_from_user(&efl, useraddr, sizeof(efl))) + return -EFAULT; + efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0; + + if (!dev->ethtool_ops->flash_device) + return devlink_compat_flash_update(dev, efl.data); + + return dev->ethtool_ops->flash_device(dev, &efl); } static int ethtool_set_dump(struct net_device *dev, @@ -2405,7 +2377,6 @@ static int ethtool_tunable_valid(const struct ethtool_tunable *tuna) switch (tuna->id) { case ETHTOOL_RX_COPYBREAK: case ETHTOOL_TX_COPYBREAK: - case ETHTOOL_TX_COPYBREAK_BUF_SIZE: if (tuna->len != sizeof(u32) || tuna->type_id != ETHTOOL_TUNABLE_U32) return -EINVAL; @@ -2724,19 +2695,19 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr) /* The main entry point in this file. Called from net/core/dev_ioctl.c */ -static int -__dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, - u32 ethcmd, struct ethtool_devlink_compat *devlink_state) +int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr) { - struct net_device *dev; - u32 sub_cmd; + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + u32 ethcmd, sub_cmd; int rc; netdev_features_t old_features; - dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) return -ENODEV; + if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) + return -EFAULT; + if (ethcmd == ETHTOOL_PERQUEUE) { if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd))) return -EFAULT; @@ -2810,7 +2781,7 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, rc = ethtool_set_settings(dev, useraddr); break; case ETHTOOL_GDRVINFO: - rc = ethtool_get_drvinfo(dev, devlink_state); + rc = ethtool_get_drvinfo(dev, useraddr); break; case ETHTOOL_GREGS: rc = ethtool_get_regs(dev, useraddr); @@ -2912,7 +2883,7 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); break; case ETHTOOL_FLASHDEV: - rc = ethtool_flash_device(dev, devlink_state); + rc = ethtool_flash_device(dev, useraddr); break; case ETHTOOL_RESET: rc = ethtool_reset(dev, useraddr); @@ -3024,60 +2995,6 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, return rc; } -int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr) -{ - struct ethtool_devlink_compat *state; - u32 ethcmd; - int rc; - - if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) - return -EFAULT; - - state = kzalloc(sizeof(*state), GFP_KERNEL); - if (!state) - return -ENOMEM; - - switch (ethcmd) { - case ETHTOOL_FLASHDEV: - if (copy_from_user(&state->efl, useraddr, sizeof(state->efl))) { - rc = -EFAULT; - goto exit_free; - } - state->efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0; - break; - } - - rtnl_lock(); - rc = __dev_ethtool(net, ifr, useraddr, ethcmd, state); - rtnl_unlock(); - if (rc) - goto exit_free; - - switch (ethcmd) { - case ETHTOOL_FLASHDEV: - if (state->devlink) - rc = devlink_compat_flash_update(state->devlink, - state->efl.data); - break; - case ETHTOOL_GDRVINFO: - if (state->devlink) - devlink_compat_running_version(state->devlink, - state->info.fw_version, - sizeof(state->info.fw_version)); - if (copy_to_user(useraddr, &state->info, sizeof(state->info))) { - rc = -EFAULT; - goto exit_free; - } - break; - } - -exit_free: - if (state->devlink) - devlink_put(state->devlink); - kfree(state); - return rc; -} - struct ethtool_rx_flow_key { struct flow_dissector_key_basic basic; union { diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c index efa0f7f488..b91839870e 100644 --- a/net/ethtool/linkinfo.c +++ b/net/ethtool/linkinfo.c @@ -149,6 +149,6 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index 99b29b4fe9..f9eda596f3 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -358,6 +358,6 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5fe8f4ae2c..b3729bdafb 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -142,8 +142,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, } req_info->dev = dev; - if (dev) - netdev_tracker_alloc(dev, &req_info->dev_tracker, GFP_KERNEL); req_info->flags = flags; return 0; } @@ -285,7 +283,6 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_MODULE_EEPROM_GET] = ðnl_module_eeprom_request_ops, [ETHTOOL_MSG_STATS_GET] = ðnl_stats_request_ops, [ETHTOOL_MSG_PHC_VCLOCKS_GET] = ðnl_phc_vclocks_request_ops, - [ETHTOOL_MSG_MODULE_GET] = ðnl_module_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -402,7 +399,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ops->cleanup_data(reply_data); genlmsg_end(rskb, reply_payload); - dev_put_track(req_info->dev, &req_info->dev_tracker); + dev_put(req_info->dev); kfree(reply_data); kfree(req_info); return genlmsg_reply(rskb, info); @@ -414,7 +411,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) if (ops->cleanup_data) ops->cleanup_data(reply_data); err_dev: - dev_put_track(req_info->dev, &req_info->dev_tracker); + dev_put(req_info->dev); kfree(reply_data); kfree(req_info); return ret; @@ -550,7 +547,7 @@ static int ethnl_default_start(struct netlink_callback *cb) * same parser as for non-dump (doit) requests is used, it * would take reference to the device if it finds one */ - dev_put_track(req_info->dev, &req_info->dev_tracker); + dev_put(req_info->dev); req_info->dev = NULL; } if (ret < 0) @@ -597,7 +594,6 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_PAUSE_NTF] = ðnl_pause_request_ops, [ETHTOOL_MSG_EEE_NTF] = ðnl_eee_request_ops, [ETHTOOL_MSG_FEC_NTF] = ðnl_fec_request_ops, - [ETHTOOL_MSG_MODULE_NTF] = ðnl_module_request_ops, }; /* default notification handler */ @@ -637,6 +633,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, if (ret < 0) goto err_cleanup; reply_len = ret + ethnl_reply_header_size(); + ret = -ENOMEM; skb = genlmsg_new(reply_len, GFP_KERNEL); if (!skb) goto err_cleanup; @@ -690,7 +687,6 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_PAUSE_NTF] = ethnl_default_notify, [ETHTOOL_MSG_EEE_NTF] = ethnl_default_notify, [ETHTOOL_MSG_FEC_NTF] = ethnl_default_notify, - [ETHTOOL_MSG_MODULE_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) @@ -1004,22 +1000,6 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_phc_vclocks_get_policy, .maxattr = ARRAY_SIZE(ethnl_phc_vclocks_get_policy) - 1, }, - { - .cmd = ETHTOOL_MSG_MODULE_GET, - .doit = ethnl_default_doit, - .start = ethnl_default_start, - .dumpit = ethnl_default_dumpit, - .done = ethnl_default_done, - .policy = ethnl_module_get_policy, - .maxattr = ARRAY_SIZE(ethnl_module_get_policy) - 1, - }, - { - .cmd = ETHTOOL_MSG_MODULE_SET, - .flags = GENL_UNS_ADMIN_PERM, - .doit = ethnl_set_module, - .policy = ethnl_module_set_policy, - .maxattr = ARRAY_SIZE(ethnl_module_set_policy) - 1, - }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 75856db299..e8987e2803 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -222,7 +222,6 @@ static inline unsigned int ethnl_reply_header_size(void) /** * struct ethnl_req_info - base type of request information for GET requests * @dev: network device the request is for (may be null) - * @dev_tracker: refcount tracker for @dev reference * @flags: request flags common for all request types * * This is a common base for request specific structures holding data from @@ -231,15 +230,9 @@ static inline unsigned int ethnl_reply_header_size(void) */ struct ethnl_req_info { struct net_device *dev; - netdevice_tracker dev_tracker; u32 flags; }; -static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) -{ - dev_put_track(req_info->dev, &req_info->dev_tracker); -} - /** * struct ethnl_reply_data - base type of reply data for GET requests * @dev: device for current reply message; in single shot requests it is @@ -344,7 +337,6 @@ extern const struct ethnl_request_ops ethnl_fec_request_ops; extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops; extern const struct ethnl_request_ops ethnl_stats_request_ops; extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops; -extern const struct ethnl_request_ops ethnl_module_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -363,7 +355,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_BUF_LEN + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; @@ -381,8 +373,6 @@ extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1]; extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1]; extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1]; extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1]; -extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1]; -extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1]; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); @@ -401,7 +391,6 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_start(struct netlink_callback *cb); int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info); -int ethnl_set_module(struct sk_buff *skb, struct genl_info *info); extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN]; extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN]; diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c index a8c113d244..ee1e5806bc 100644 --- a/net/ethtool/pause.c +++ b/net/ethtool/pause.c @@ -181,6 +181,6 @@ int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c index 4c7bfa81e4..fc9f3be23a 100644 --- a/net/ethtool/privflags.c +++ b/net/ethtool/privflags.c @@ -196,6 +196,6 @@ int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index c1d5f5e0fd..4e097812a9 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -10,7 +10,6 @@ struct rings_req_info { struct rings_reply_data { struct ethnl_reply_data base; struct ethtool_ringparam ringparam; - struct kernel_ethtool_ringparam kernel_ringparam; }; #define RINGS_REPDATA(__reply_base) \ @@ -26,7 +25,6 @@ static int rings_prepare_data(const struct ethnl_req_info *req_base, struct genl_info *info) { struct rings_reply_data *data = RINGS_REPDATA(reply_base); - struct netlink_ext_ack *extack = info ? info->extack : NULL; struct net_device *dev = reply_base->dev; int ret; @@ -35,8 +33,7 @@ static int rings_prepare_data(const struct ethnl_req_info *req_base, ret = ethnl_ops_begin(dev); if (ret < 0) return ret; - dev->ethtool_ops->get_ringparam(dev, &data->ringparam, - &data->kernel_ringparam, extack); + dev->ethtool_ops->get_ringparam(dev, &data->ringparam); ethnl_ops_complete(dev); return 0; @@ -52,8 +49,7 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _RINGS_RX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */ - nla_total_size(sizeof(u32)) + /* _RINGS_TX */ - nla_total_size(sizeof(u32)); /* _RINGS_RX_BUF_LEN */ + nla_total_size(sizeof(u32)); /* _RINGS_TX */ } static int rings_fill_reply(struct sk_buff *skb, @@ -61,7 +57,6 @@ static int rings_fill_reply(struct sk_buff *skb, const struct ethnl_reply_data *reply_base) { const struct rings_reply_data *data = RINGS_REPDATA(reply_base); - const struct kernel_ethtool_ringparam *kernel_ringparam = &data->kernel_ringparam; const struct ethtool_ringparam *ringparam = &data->ringparam; if ((ringparam->rx_max_pending && @@ -83,10 +78,7 @@ static int rings_fill_reply(struct sk_buff *skb, (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_MAX, ringparam->tx_max_pending) || nla_put_u32(skb, ETHTOOL_A_RINGS_TX, - ringparam->tx_pending))) || - (kernel_ringparam->rx_buf_len && - (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, - kernel_ringparam->rx_buf_len)))) + ringparam->tx_pending)))) return -EMSGSIZE; return 0; @@ -113,12 +105,10 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_RX_MINI] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, - [ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1), }; int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) { - struct kernel_ethtool_ringparam kernel_ringparam = {}; struct ethtool_ringparam ringparam = {}; struct ethnl_req_info req_info = {}; struct nlattr **tb = info->attrs; @@ -144,7 +134,7 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) ret = ethnl_ops_begin(dev); if (ret < 0) goto out_rtnl; - ops->get_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); + ops->get_ringparam(dev, &ringparam); ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); ethnl_update_u32(&ringparam.rx_mini_pending, @@ -152,8 +142,6 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) ethnl_update_u32(&ringparam.rx_jumbo_pending, tb[ETHTOOL_A_RINGS_RX_JUMBO], &mod); ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod); - ethnl_update_u32(&kernel_ringparam.rx_buf_len, - tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod); ret = 0; if (!mod) goto out_ops; @@ -176,17 +164,7 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) goto out_ops; } - if (kernel_ringparam.rx_buf_len != 0 && - !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) { - ret = -EOPNOTSUPP; - NL_SET_ERR_MSG_ATTR(info->extack, - tb[ETHTOOL_A_RINGS_RX_BUF_LEN], - "setting rx buf len not supported"); - goto out_ops; - } - - ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, - &kernel_ringparam, info->extack); + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam); if (ret < 0) goto out_ops; ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL); @@ -196,6 +174,6 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c index a20e0a24ff..ec07f5765e 100644 --- a/net/ethtool/stats.c +++ b/net/ethtool/stats.c @@ -14,12 +14,10 @@ struct stats_req_info { struct stats_reply_data { struct ethnl_reply_data base; - struct_group(stats, - struct ethtool_eth_phy_stats phy_stats; - struct ethtool_eth_mac_stats mac_stats; - struct ethtool_eth_ctrl_stats ctrl_stats; - struct ethtool_rmon_stats rmon_stats; - ); + struct ethtool_eth_phy_stats phy_stats; + struct ethtool_eth_mac_stats mac_stats; + struct ethtool_eth_ctrl_stats ctrl_stats; + struct ethtool_rmon_stats rmon_stats; const struct ethtool_rmon_hist_range *rmon_ranges; }; @@ -119,7 +117,10 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base, /* Mark all stats as unset (see ETHTOOL_STAT_NOT_SET) to prevent them * from being reported to user space in case driver did not set them. */ - memset(&data->stats, 0xff, sizeof(data->stats)); + memset(&data->phy_stats, 0xff, sizeof(data->phy_stats)); + memset(&data->mac_stats, 0xff, sizeof(data->mac_stats)); + memset(&data->ctrl_stats, 0xff, sizeof(data->ctrl_stats)); + memset(&data->rmon_stats, 0xff, sizeof(data->rmon_stats)); if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) && dev->ethtool_ops->get_eth_phy_stats) diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c index efde335366..e7f2ee0d24 100644 --- a/net/ethtool/tunnels.c +++ b/net/ethtool/tunnels.c @@ -195,7 +195,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info) if (ret) goto err_free_msg; rtnl_unlock(); - ethnl_parse_header_dev_put(&req_info); + dev_put(req_info.dev); genlmsg_end(rskb, reply_payload); return genlmsg_reply(rskb, info); @@ -204,7 +204,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info) nlmsg_free(rskb); err_unlock_rtnl: rtnl_unlock(); - ethnl_parse_header_dev_put(&req_info); + dev_put(req_info.dev); return ret; } @@ -230,7 +230,7 @@ int ethnl_tunnel_info_start(struct netlink_callback *cb) sock_net(cb->skb->sk), cb->extack, false); if (ctx->req_info.dev) { - ethnl_parse_header_dev_put(&ctx->req_info); + dev_put(ctx->req_info.dev); ctx->req_info.dev = NULL; } diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c index 88f435e764..ada7df2331 100644 --- a/net/ethtool/wol.c +++ b/net/ethtool/wol.c @@ -165,6 +165,6 @@ int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info) out_rtnl: rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + dev_put(dev); return ret; } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e57fdad9ef..26c32407f0 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -30,13 +30,13 @@ static bool is_slave_up(struct net_device *dev) static void __hsr_set_operstate(struct net_device *dev, int transition) { - write_lock(&dev_base_lock); + write_lock_bh(&dev_base_lock); if (dev->operstate != transition) { dev->operstate = transition; - write_unlock(&dev_base_lock); + write_unlock_bh(&dev_base_lock); netdev_state_change(dev); } else { - write_unlock(&dev_base_lock); + write_unlock_bh(&dev_base_lock); } } @@ -309,9 +309,9 @@ static void send_hsr_supervision_frame(struct hsr_port *master, } spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); - hsr_stag->tlv.HSR_TLV_type = type; + hsr_stag->HSR_TLV_type = type; /* TODO: Why 12 in HSRv0? */ - hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ? + hsr_stag->HSR_TLV_length = hsr->prot_version ? sizeof(struct hsr_sup_payload) : 12; /* Payload: MacAddressA */ @@ -350,8 +350,8 @@ static void send_prp_supervision_frame(struct hsr_port *master, spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); hsr->sup_sequence_nr++; - hsr_stag->tlv.HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD; - hsr_stag->tlv.HSR_TLV_length = sizeof(struct hsr_sup_payload); + hsr_stag->HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD; + hsr_stag->HSR_TLV_length = sizeof(struct hsr_sup_payload); /* Payload: MacAddressA */ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); @@ -493,7 +493,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], INIT_LIST_HEAD(&hsr->self_node_db); spin_lock_init(&hsr->list_lock); - eth_hw_addr_set(hsr_dev, slave[0]->dev_addr); + ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr); /* initialize protocol specific functions */ if (protocol_version == PRP_V1) { diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index e59cbb4f0c..ceb8afb2a6 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -37,8 +37,6 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) struct ethhdr *eth_hdr; struct hsr_sup_tag *hsr_sup_tag; struct hsrv1_ethhdr_sp *hsr_V1_hdr; - struct hsr_sup_tlv *hsr_sup_tlv; - u16 total_length = 0; WARN_ON_ONCE(!skb_mac_header_was_set(skb)); eth_hdr = (struct ethhdr *)skb_mac_header(skb); @@ -55,63 +53,23 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) /* Get the supervision header from correct location. */ if (eth_hdr->h_proto == htons(ETH_P_HSR)) { /* Okay HSRv1. */ - total_length = sizeof(struct hsrv1_ethhdr_sp); - if (!pskb_may_pull(skb, total_length)) - return false; - hsr_V1_hdr = (struct hsrv1_ethhdr_sp *)skb_mac_header(skb); if (hsr_V1_hdr->hsr.encap_proto != htons(ETH_P_PRP)) return false; hsr_sup_tag = &hsr_V1_hdr->hsr_sup; } else { - total_length = sizeof(struct hsrv0_ethhdr_sp); - if (!pskb_may_pull(skb, total_length)) - return false; - hsr_sup_tag = &((struct hsrv0_ethhdr_sp *)skb_mac_header(skb))->hsr_sup; } - if (hsr_sup_tag->tlv.HSR_TLV_type != HSR_TLV_ANNOUNCE && - hsr_sup_tag->tlv.HSR_TLV_type != HSR_TLV_LIFE_CHECK && - hsr_sup_tag->tlv.HSR_TLV_type != PRP_TLV_LIFE_CHECK_DD && - hsr_sup_tag->tlv.HSR_TLV_type != PRP_TLV_LIFE_CHECK_DA) + if (hsr_sup_tag->HSR_TLV_type != HSR_TLV_ANNOUNCE && + hsr_sup_tag->HSR_TLV_type != HSR_TLV_LIFE_CHECK && + hsr_sup_tag->HSR_TLV_type != PRP_TLV_LIFE_CHECK_DD && + hsr_sup_tag->HSR_TLV_type != PRP_TLV_LIFE_CHECK_DA) return false; - if (hsr_sup_tag->tlv.HSR_TLV_length != 12 && - hsr_sup_tag->tlv.HSR_TLV_length != sizeof(struct hsr_sup_payload)) - return false; - - /* Get next tlv */ - total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length; - if (!pskb_may_pull(skb, total_length)) - return false; - skb_pull(skb, total_length); - hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data; - skb_push(skb, total_length); - - /* if this is a redbox supervision frame we need to verify - * that more data is available - */ - if (hsr_sup_tlv->HSR_TLV_type == PRP_TLV_REDBOX_MAC) { - /* tlv length must be a length of a mac address */ - if (hsr_sup_tlv->HSR_TLV_length != sizeof(struct hsr_sup_payload)) - return false; - - /* make sure another tlv follows */ - total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tlv->HSR_TLV_length; - if (!pskb_may_pull(skb, total_length)) - return false; - - /* get next tlv */ - skb_pull(skb, total_length); - hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data; - skb_push(skb, total_length); - } - - /* end of tlvs must follow at the end */ - if (hsr_sup_tlv->HSR_TLV_type == HSR_TLV_EOT && - hsr_sup_tlv->HSR_TLV_length != 0) + if (hsr_sup_tag->HSR_TLV_length != 12 && + hsr_sup_tag->HSR_TLV_length != sizeof(struct hsr_sup_payload)) return false; return true; diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 0775f0f95d..e319494793 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -76,8 +76,8 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db, * frames from self that's been looped over the HSR ring. */ int hsr_create_self_node(struct hsr_priv *hsr, - const unsigned char addr_a[ETH_ALEN], - const unsigned char addr_b[ETH_ALEN]) + unsigned char addr_a[ETH_ALEN], + unsigned char addr_b[ETH_ALEN]) { struct list_head *self_node_db = &hsr->self_node_db; struct hsr_node *node, *oldnode; @@ -265,14 +265,11 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) struct hsr_port *port_rcv = frame->port_rcv; struct hsr_priv *hsr = port_rcv->hsr; struct hsr_sup_payload *hsr_sp; - struct hsr_sup_tlv *hsr_sup_tlv; struct hsr_node *node_real; struct sk_buff *skb = NULL; struct list_head *node_db; struct ethhdr *ethhdr; int i; - unsigned int pull_size = 0; - unsigned int total_pull_size = 0; /* Here either frame->skb_hsr or frame->skb_prp should be * valid as supervision frame always will have protocol @@ -287,26 +284,18 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) if (!skb) return; - /* Leave the ethernet header. */ - pull_size = sizeof(struct ethhdr); - skb_pull(skb, pull_size); - total_pull_size += pull_size; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + /* Leave the ethernet header. */ + skb_pull(skb, sizeof(struct ethhdr)); + /* And leave the HSR tag. */ - if (ethhdr->h_proto == htons(ETH_P_HSR)) { - pull_size = sizeof(struct ethhdr); - skb_pull(skb, pull_size); - total_pull_size += pull_size; - } + if (ethhdr->h_proto == htons(ETH_P_HSR)) + skb_pull(skb, sizeof(struct hsr_tag)); /* And leave the HSR sup tag. */ - pull_size = sizeof(struct hsr_tag); - skb_pull(skb, pull_size); - total_pull_size += pull_size; + skb_pull(skb, sizeof(struct hsr_sup_tag)); - /* get HSR sup payload */ hsr_sp = (struct hsr_sup_payload *)skb->data; /* Merge node_curr (registered on macaddress_B) into node_real */ @@ -323,37 +312,6 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) /* Node has already been merged */ goto done; - /* Leave the first HSR sup payload. */ - pull_size = sizeof(struct hsr_sup_payload); - skb_pull(skb, pull_size); - total_pull_size += pull_size; - - /* Get second supervision tlv */ - hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data; - /* And check if it is a redbox mac TLV */ - if (hsr_sup_tlv->HSR_TLV_type == PRP_TLV_REDBOX_MAC) { - /* We could stop here after pushing hsr_sup_payload, - * or proceed and allow macaddress_B and for redboxes. - */ - /* Sanity check length */ - if (hsr_sup_tlv->HSR_TLV_length != 6) - goto done; - - /* Leave the second HSR sup tlv. */ - pull_size = sizeof(struct hsr_sup_tlv); - skb_pull(skb, pull_size); - total_pull_size += pull_size; - - /* Get redbox mac address. */ - hsr_sp = (struct hsr_sup_payload *)skb->data; - - /* Check if redbox mac and node mac are equal. */ - if (!ether_addr_equal(node_real->macaddress_A, hsr_sp->macaddress_A)) { - /* This is a redbox supervision frame for a VDAN! */ - goto done; - } - } - ether_addr_copy(node_real->macaddress_B, ethhdr->h_source); for (i = 0; i < HSR_PT_PORTS; i++) { if (!node_curr->time_in_stale[i] && @@ -373,8 +331,11 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) kfree_rcu(node_curr, rcu_head); done: - /* Push back here */ - skb_push(skb, total_pull_size); + /* PRP uses v0 header */ + if (ethhdr->h_proto == htons(ETH_P_HSR)) + skb_push(skb, sizeof(struct hsrv1_ethhdr_sp)); + else + skb_push(skb, sizeof(struct hsrv0_ethhdr_sp)); } /* 'skb' is a frame meant for this host, that is to be passed to upper layers. diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index bdbb8c822b..d9628e7a5f 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -48,8 +48,8 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, void hsr_prune_nodes(struct timer_list *t); int hsr_create_self_node(struct hsr_priv *hsr, - const unsigned char addr_a[ETH_ALEN], - const unsigned char addr_b[ETH_ALEN]); + unsigned char addr_a[ETH_ALEN], + unsigned char addr_b[ETH_ALEN]); void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, unsigned char addr[ETH_ALEN]); diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index b099c31501..f7e284f23b 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -75,7 +75,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (port->type == HSR_PT_SLAVE_A) { - eth_hw_addr_set(master->dev, dev->dev_addr); + ether_addr_copy(master->dev->dev_addr, dev->dev_addr); call_netdevice_notifiers(NETDEV_CHANGEADDR, master->dev); } diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 043e4e9a16..53d1f7a824 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -35,15 +35,13 @@ * HSR_NODE_FORGET_TIME? */ #define PRUNE_PERIOD 3000 /* ms */ -#define HSR_TLV_EOT 0 /* End of TLVs */ + #define HSR_TLV_ANNOUNCE 22 #define HSR_TLV_LIFE_CHECK 23 /* PRP V1 life check for Duplicate discard */ #define PRP_TLV_LIFE_CHECK_DD 20 /* PRP V1 life check for Duplicate Accept */ #define PRP_TLV_LIFE_CHECK_DA 21 -/* PRP V1 life redundancy box MAC address */ -#define PRP_TLV_REDBOX_MAC 30 /* HSR Tag. * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB, @@ -96,18 +94,14 @@ struct hsr_vlan_ethhdr { struct hsr_tag hsr_tag; } __packed; -struct hsr_sup_tlv { - u8 HSR_TLV_type; - u8 HSR_TLV_length; -}; - /* HSR/PRP Supervision Frame data types. * Field names as defined in the IEC:2010 standard for HSR. */ struct hsr_sup_tag { - __be16 path_and_HSR_ver; - __be16 sequence_nr; - struct hsr_sup_tlv tlv; + __be16 path_and_HSR_ver; + __be16 sequence_nr; + __u8 HSR_TLV_type; + __u8 HSR_TLV_length; } __packed; struct hsr_sup_payload { diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 2cf62718a2..3297e7fa99 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -157,7 +157,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, lowpan_802154_dev(ldev)->wdev = wdev; /* Set the lowpan hardware address to the wpan hardware address. */ - __dev_addr_set(ldev, wdev->dev_addr, IEEE802154_ADDR_LEN); + memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN); /* We need headroom for possible wpan_dev_hard_header call and tailroom * for encryption/fcs handling. The lowpan interface will replace * the IPv6 header with 6LoWPAN header. At worst case the 6LoWPAN diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 72fde2888a..77534b44b8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -99,7 +99,6 @@ #include #include #include -#include #include #include #include @@ -134,9 +133,13 @@ void inet_sock_destruct(struct sock *sk) struct inet_sock *inet = inet_sk(sk); __skb_queue_purge(&sk->sk_receive_queue); + if (sk->sk_rx_skb_cache) { + __kfree_skb(sk->sk_rx_skb_cache); + sk->sk_rx_skb_cache = NULL; + } __skb_queue_purge(&sk->sk_error_queue); - sk_mem_reclaim_final(sk); + sk_mem_reclaim(sk); if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { pr_err("Attempt to release TCP socket in state %d %p\n", @@ -151,7 +154,7 @@ void inet_sock_destruct(struct sock *sk) WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(sk->sk_wmem_queued); - WARN_ON(sk_forward_alloc_get(sk)); + WARN_ON(sk->sk_forward_alloc); kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); @@ -225,7 +228,7 @@ int inet_listen(struct socket *sock, int backlog) tcp_fastopen_init_key_once(sock_net(sk)); } - err = inet_csk_listen_start(sk); + err = inet_csk_listen_start(sk, backlog); if (err) goto out; tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL); @@ -489,8 +492,11 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, * is temporarily down) */ err = -EADDRNOTAVAIL; - if (!inet_addr_valid_or_nonlocal(net, inet, addr->sin_addr.s_addr, - chk_addr_ret)) + if (!inet_can_nonlocal_bind(net, inet) && + addr->sin_addr.s_addr != htonl(INADDR_ANY) && + chk_addr_ret != RTN_LOCAL && + chk_addr_ret != RTN_MULTICAST && + chk_addr_ret != RTN_BROADCAST) goto out; snum = ntohs(addr->sin_port); @@ -531,8 +537,6 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk); if (err) { inet->inet_saddr = inet->inet_rcv_saddr = 0; - if (sk->sk_prot->put_port) - sk->sk_prot->put_port(sk); goto out_release_sock; } } @@ -1457,18 +1461,19 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) proto = iph->protocol; + rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) - goto out; + goto out_unlock; if (*(u8 *)iph != 0x45) - goto out; + goto out_unlock; if (ip_is_fragment(iph)) - goto out; + goto out_unlock; if (unlikely(ip_fast_csum((u8 *)iph, 5))) - goto out; + goto out_unlock; id = ntohl(*(__be32 *)&iph->id); flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); @@ -1545,6 +1550,9 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive, ops->callbacks.gro_receive, head, skb); +out_unlock: + rcu_read_unlock(); + out: skb_gro_flush_final(skb, pp, flush); @@ -1617,9 +1625,10 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff) csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; + rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) - goto out; + goto out_unlock; /* Only need to add sizeof(*iph) to get to the next hdr below * because any hdr with option will have been flushed in @@ -1629,7 +1638,9 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff) tcp4_gro_complete, udp4_gro_complete, skb, nhoff + sizeof(*iph)); -out: +out_unlock: + rcu_read_unlock(); + return err; } @@ -1660,6 +1671,12 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, } EXPORT_SYMBOL_GPL(inet_ctl_sock_create); +u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offt) +{ + return *(((unsigned long *)per_cpu_ptr(mib, cpu)) + offt); +} +EXPORT_SYMBOL_GPL(snmp_get_cpu_field); + unsigned long snmp_fold_field(void __percpu *mib, int offt) { unsigned long res = 0; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4db0325f6e..922dd73e57 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1247,8 +1247,6 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_change_info *change_info; - struct in_device *in_dev; - bool evict_nocarrier; switch (event) { case NETDEV_CHANGEADDR: @@ -1259,14 +1257,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, change_info = ptr; if (change_info->flags_changed & IFF_NOARP) neigh_changeaddr(&arp_tbl, dev); - - in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) - evict_nocarrier = true; - else - evict_nocarrier = IN_DEV_ARP_EVICT_NOCARRIER(in_dev); - - if (evict_nocarrier && !netif_carrier_ok(dev)) + if (!netif_carrier_ok(dev)) neigh_carrier_down(&arp_tbl, dev); break; default: @@ -1299,6 +1290,21 @@ static struct packet_type arp_packet_type __read_mostly = { .func = arp_rcv, }; +static int arp_proc_init(void); + +void __init arp_init(void) +{ + neigh_table_init(NEIGH_ARP_TABLE, &arp_tbl); + + dev_add_pack(&arp_packet_type); + arp_proc_init(); +#ifdef CONFIG_SYSCTL + neigh_sysctl_register(NULL, &arp_tbl.parms, NULL); +#endif + register_netdevice_notifier(&arp_netdev_notifier); +} + +#ifdef CONFIG_PROC_FS #if IS_ENABLED(CONFIG_AX25) /* ------------------------------------------------------------------------ */ @@ -1436,14 +1442,16 @@ static struct pernet_operations arp_net_ops = { .exit = arp_net_exit, }; -void __init arp_init(void) +static int __init arp_proc_init(void) { - neigh_table_init(NEIGH_ARP_TABLE, &arp_tbl); - - dev_add_pack(&arp_packet_type); - register_pernet_subsys(&arp_net_ops); -#ifdef CONFIG_SYSCTL - neigh_sysctl_register(NULL, &arp_tbl.parms, NULL); -#endif - register_netdevice_notifier(&arp_netdev_notifier); + return register_pernet_subsys(&arp_net_ops); } + +#else /* CONFIG_PROC_FS */ + +static int __init arp_proc_init(void) +{ + return 0; +} + +#endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index de610cb836..d3a2dbd13e 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -81,7 +81,14 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (!bpf_tracing_btf_ctx_access(off, size, type, prog, info)) + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + + if (!btf_ctx_access(off, size, type, prog, info)) return false; if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id) @@ -169,7 +176,7 @@ static u32 prog_ops_moff(const struct bpf_prog *prog) t = bpf_tcp_congestion_ops.type; m = &btf_type_member(t)[midx]; - return __btf_member_bit_offset(t, m) / 8; + return btf_member_bit_offset(t, m) / 8; } static const struct bpf_func_proto * @@ -218,13 +225,41 @@ BTF_ID(func, tcp_reno_cong_avoid) BTF_ID(func, tcp_reno_undo_cwnd) BTF_ID(func, tcp_slow_start) BTF_ID(func, tcp_cong_avoid_ai) +#ifdef CONFIG_X86 +#ifdef CONFIG_DYNAMIC_FTRACE +#if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC) +BTF_ID(func, cubictcp_init) +BTF_ID(func, cubictcp_recalc_ssthresh) +BTF_ID(func, cubictcp_cong_avoid) +BTF_ID(func, cubictcp_state) +BTF_ID(func, cubictcp_cwnd_event) +BTF_ID(func, cubictcp_acked) +#endif +#if IS_BUILTIN(CONFIG_TCP_CONG_DCTCP) +BTF_ID(func, dctcp_init) +BTF_ID(func, dctcp_update_alpha) +BTF_ID(func, dctcp_cwnd_event) +BTF_ID(func, dctcp_ssthresh) +BTF_ID(func, dctcp_cwnd_undo) +BTF_ID(func, dctcp_state) +#endif +#if IS_BUILTIN(CONFIG_TCP_CONG_BBR) +BTF_ID(func, bbr_init) +BTF_ID(func, bbr_main) +BTF_ID(func, bbr_sndbuf_expand) +BTF_ID(func, bbr_undo_cwnd) +BTF_ID(func, bbr_cwnd_event) +BTF_ID(func, bbr_ssthresh) +BTF_ID(func, bbr_min_tso_segs) +BTF_ID(func, bbr_set_state) +#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_X86 */ BTF_SET_END(bpf_tcp_ca_kfunc_ids) -static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner) +static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id) { - if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id)) - return true; - return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner); + return btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id); } static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { @@ -246,7 +281,7 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, utcp_ca = (const struct tcp_congestion_ops *)udata; tcp_ca = (struct tcp_congestion_ops *)kdata; - moff = __btf_member_bit_offset(t, member) / 8; + moff = btf_member_bit_offset(t, member) / 8; switch (moff) { case offsetof(struct tcp_congestion_ops, flags): if (utcp_ca->flags & ~TCP_CONG_MASK) @@ -276,7 +311,7 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, static int bpf_tcp_ca_check_member(const struct btf_type *t, const struct btf_member *member) { - if (is_unsupported(__btf_member_bit_offset(t, member) / 8)) + if (is_unsupported(btf_member_bit_offset(t, member) / 8)) return -ENOTSUPP; return 0; } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 62d5f99760..099259fc82 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -73,7 +73,7 @@ struct cipso_v4_map_cache_entry { static struct cipso_v4_map_cache_bkt *cipso_v4_cache; /* Restricted bitmap (tag #1) flags */ -int cipso_v4_rbm_optfmt; +int cipso_v4_rbm_optfmt = 0; int cipso_v4_rbm_strictvalid = 1; /* diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 48f337ccf9..4a8550c492 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index fba2bffd65..4744c7839d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -75,7 +75,6 @@ static struct ipv4_devconf ipv4_devconf = { [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, - [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1, }, }; @@ -88,7 +87,6 @@ static struct ipv4_devconf ipv4_devconf_dflt = { [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, - [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1, }, }; @@ -243,7 +241,7 @@ void in_dev_finish_destroy(struct in_device *idev) #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif - dev_put_track(dev, &idev->dev_tracker); + dev_put(dev); if (!idev->dead) pr_err("Freeing alive in_device %p\n", idev); else @@ -271,7 +269,7 @@ static struct in_device *inetdev_init(struct net_device *dev) if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) dev_disable_lro(dev); /* Reference in_dev->dev */ - dev_hold_track(dev, &in_dev->dev_tracker, GFP_KERNEL); + dev_hold(dev); /* Account for reference dev->ip_ptr (below) */ refcount_set(&in_dev->refcnt, 1); @@ -2534,8 +2532,6 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), - DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER, - "arp_evict_nocarrier"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION, "force_igmp_version"), diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 851f542928..e1b1d080e9 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -671,7 +671,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index d87f02a6e9..8e4e9aa121 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 85117b4521..4d61ddd8a0 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -436,9 +436,6 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, if (net->ipv4.fib_has_custom_local_routes || fib4_has_custom_rules(net)) goto full_check; - /* Within the same container, it is regarded as a martian source, - * and the same host but different containers are not. - */ if (inet_lookup_ifaddr_rcu(net, src)) return -EINVAL; diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c index 0e23ade744..0c28bd469a 100644 --- a/net/ipv4/fib_notifier.c +++ b/net/ipv4/fib_notifier.c @@ -6,6 +6,7 @@ #include #include #include +#include #include int call_fib4_notifier(struct notifier_block *nb, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index e0b6c8b6de..d279cb8ac1 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -216,6 +216,11 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } +static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { + FRA_GENERIC_POLICY, + [FRA_FLOW] = { .type = NLA_U32 }, +}; + static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, @@ -381,6 +386,7 @@ static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = { .nlmsg_payload = fib4_rule_nlmsg_payload, .flush_cache = fib4_rule_flush_cache, .nlgroup = RTNLGRP_IPV4_RULE, + .policy = fib4_rule_policy, .owner = THIS_MODULE, }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2dd375f740..d244c57b73 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -52,7 +52,6 @@ static DEFINE_SPINLOCK(fib_info_lock); static struct hlist_head *fib_info_hash; static struct hlist_head *fib_info_laddrhash; static unsigned int fib_info_hash_size; -static unsigned int fib_info_hash_bits; static unsigned int fib_info_cnt; #define DEVINDEX_HASHBITS 8 @@ -210,7 +209,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) void fib_nh_common_release(struct fib_nh_common *nhc) { - dev_put_track(nhc->nhc_dev, &nhc->nhc_dev_tracker); + dev_put(nhc->nhc_dev); lwtstate_put(nhc->nhc_lwtstate); rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); rt_fibinfo_free(&nhc->nhc_rth_input); @@ -1051,7 +1050,7 @@ static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); if (!err) { nh->fib_nh_dev = fib6_nh.fib_nh_dev; - dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_KERNEL); + dev_hold(nh->fib_nh_dev); nh->fib_nh_oif = nh->fib_nh_dev->ifindex; nh->fib_nh_scope = RT_SCOPE_LINK; @@ -1135,7 +1134,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, if (!netif_carrier_ok(dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; nh->fib_nh_dev = dev; - dev_hold_track(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + dev_hold(dev); nh->fib_nh_scope = RT_SCOPE_LINK; return 0; } @@ -1189,7 +1188,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, "No egress device for nexthop gateway"); goto out; } - dev_hold_track(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + dev_hold(dev); if (!netif_carrier_ok(dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; @@ -1223,7 +1222,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, } nh->fib_nh_dev = in_dev->dev; - dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + dev_hold(nh->fib_nh_dev); nh->fib_nh_scope = RT_SCOPE_HOST; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; @@ -1248,13 +1247,13 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, return err; } -static struct hlist_head * -fib_info_laddrhash_bucket(const struct net *net, __be32 val) +static inline unsigned int fib_laddr_hashfn(__be32 val) { - u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val, - fib_info_hash_bits); + unsigned int mask = (fib_info_hash_size - 1); - return &fib_info_laddrhash[slot]; + return ((__force u32)val ^ + ((__force u32)val >> 7) ^ + ((__force u32)val >> 14)) & mask; } static struct hlist_head *fib_info_hash_alloc(int bytes) @@ -1290,7 +1289,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; fib_info_hash_size = new_size; - fib_info_hash_bits = ilog2(new_size); for (i = 0; i < old_size; i++) { struct hlist_head *head = &fib_info_hash[i]; @@ -1308,20 +1306,21 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, } fib_info_hash = new_info_hash; - fib_info_laddrhash = new_laddrhash; for (i = 0; i < old_size; i++) { - struct hlist_head *lhead = &old_laddrhash[i]; + struct hlist_head *lhead = &fib_info_laddrhash[i]; struct hlist_node *n; struct fib_info *fi; hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { struct hlist_head *ldest; + unsigned int new_hash; - ldest = fib_info_laddrhash_bucket(fi->fib_net, - fi->fib_prefsrc); + new_hash = fib_laddr_hashfn(fi->fib_prefsrc); + ldest = &new_laddrhash[new_hash]; hlist_add_head(&fi->fib_lhash, ldest); } } + fib_info_laddrhash = new_laddrhash; spin_unlock_bh(&fib_info_lock); @@ -1554,8 +1553,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg, err = -ENODEV; if (!nh->fib_nh_dev) goto failure; - netdev_tracker_alloc(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, - GFP_KERNEL); } else { int linkdown = 0; @@ -1606,7 +1603,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, if (fi->fib_prefsrc) { struct hlist_head *head; - head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc); + head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; hlist_add_head(&fi->fib_lhash, head); } if (fi->nh) { @@ -1878,16 +1875,16 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, */ int fib_sync_down_addr(struct net_device *dev, __be32 local) { + int ret = 0; + unsigned int hash = fib_laddr_hashfn(local); + struct hlist_head *head = &fib_info_laddrhash[hash]; int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; struct net *net = dev_net(dev); - struct hlist_head *head; struct fib_info *fi; - int ret = 0; if (!fib_info_laddrhash || local == 0) return 0; - head = fib_info_laddrhash_bucket(net, local); hlist_for_each_entry(fi, head, fib_lhash) { if (!net_eq(fi->fib_net, net) || fi->fib_tb_id != tb_id) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 0d085cc8d9..8fcbc6258e 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -247,14 +246,17 @@ static struct sk_buff *fou_gro_receive(struct sock *sk, /* Flag this frame as already having an outer encap header */ NAPI_GRO_CB(skb)->is_fou = 1; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); if (!ops || !ops->callbacks.gro_receive) - goto out; + goto out_unlock; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); -out: +out_unlock: + rcu_read_unlock(); + return pp; } @@ -266,16 +268,19 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, const struct net_offload *ops; int err = -ENOSYS; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) - goto out; + goto out_unlock; err = ops->callbacks.gro_complete(skb, nhoff); skb_set_inner_mac_header(skb, nhoff); -out: +out_unlock: + rcu_read_unlock(); + return err; } @@ -433,14 +438,17 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, /* Flag this frame as already having an outer encap header */ NAPI_GRO_CB(skb)->is_fou = 1; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) - goto out; + goto out_unlock; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); flush = 0; +out_unlock: + rcu_read_unlock(); out: skb_gro_flush_final_remcsum(skb, pp, flush, &grc); @@ -477,16 +485,18 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) return err; } + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) - goto out; + goto out_unlock; err = ops->callbacks.gro_complete(skb, nhoff + guehlen); skb_set_inner_mac_header(skb, nhoff + guehlen); -out: +out_unlock: + rcu_read_unlock(); return err; } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 07073fa352..1121a9d5fe 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -10,7 +10,6 @@ #include #include #include -#include static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t features) @@ -163,9 +162,10 @@ static struct sk_buff *gre_gro_receive(struct list_head *head, type = greh->protocol; + rcu_read_lock(); ptype = gro_find_receive_by_type(type); if (!ptype) - goto out; + goto out_unlock; grehlen = GRE_HEADER_SECTION; @@ -179,13 +179,13 @@ static struct sk_buff *gre_gro_receive(struct list_head *head, if (skb_gro_header_hard(skb, hlen)) { greh = skb_gro_header_slow(skb, hlen, off); if (unlikely(!greh)) - goto out; + goto out_unlock; } /* Don't bother verifying checksum if we're going to flush anyway. */ if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush) { if (skb_gro_checksum_simple_validate(skb)) - goto out; + goto out_unlock; skb_gro_checksum_try_convert(skb, IPPROTO_GRE, null_compute_pseudo); @@ -229,6 +229,8 @@ static struct sk_buff *gre_gro_receive(struct list_head *head, pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); flush = 0; +out_unlock: + rcu_read_unlock(); out: skb_gro_flush_final(skb, pp, flush); @@ -253,10 +255,13 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff) if (greh->flags & GRE_CSUM) grehlen += GRE_HEADER_SECTION; + rcu_read_lock(); ptype = gro_find_complete_by_type(type); if (ptype) err = ptype->callbacks.gro_complete(skb, nhoff + grehlen); + rcu_read_unlock(); + skb_set_inner_mac_header(skb, nhoff + grehlen); return err; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 2ad3c7b42d..d2e2b3d18c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2558,6 +2558,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, msf->imsf_fmode = pmc->sfmode; psl = rtnl_dereference(pmc->sflist); if (!psl) { + len = 0; count = 0; } else { count = psl->sl_count; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index fc2a985f60..62a67fdc34 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1035,7 +1035,7 @@ void inet_csk_prepare_forced_close(struct sock *sk) } EXPORT_SYMBOL(inet_csk_prepare_forced_close); -int inet_csk_listen_start(struct sock *sk) +int inet_csk_listen_start(struct sock *sk, int backlog) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 581b5b2d72..ae70e07c52 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -272,7 +272,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), - .idiag_fmem = sk_forward_alloc_get(sk), + .idiag_fmem = sk->sk_forward_alloc, .idiag_tmem = sk_wmem_alloc_get(sk), }; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 30ab717ff1..7573726774 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -307,7 +307,7 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, - __be32 daddr, u16 hnum, const int dif) + __be32 daddr, u16 hnum) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -315,8 +315,8 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net, if (hashinfo != &tcp_hashinfo) return NULL; /* only TCP is supported */ - no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport, - daddr, hnum, dif, &sk); + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, + saddr, sport, daddr, hnum, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; @@ -340,7 +340,7 @@ struct sock *__inet_lookup_listener(struct net *net, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { result = inet_lookup_run_bpf(net, hashinfo, skb, doff, - saddr, sport, daddr, hnum, dif); + saddr, sport, daddr, hnum); if (result) goto done; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 99db2e41ed..e7f3e37e4a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -987,7 +987,7 @@ static int ipgre_tunnel_init(struct net_device *dev) __gre_tunnel_init(dev); - __dev_addr_set(dev, &iph->saddr, 4); + memcpy(dev->dev_addr, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); dev->flags = IFF_NOARP; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7911916a48..131066d031 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -679,6 +679,7 @@ struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state) struct sk_buff *skb2; struct iphdr *iph; + len = state->left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > state->mtu) len = state->mtu; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 445a9ecaef..b297bb2855 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -576,7 +576,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) return err; } -void __ip_sock_set_tos(struct sock *sk, int val) +static void __ip_sock_set_tos(struct sock *sk, int val) { if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; @@ -886,8 +886,6 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname, return ip_mc_leave_group(sk, &mreq); } -DEFINE_STATIC_KEY_FALSE(ip4_min_ttl); - static int do_ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -1354,14 +1352,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, goto e_inval; if (val < 0 || val > 255) goto e_inval; - - if (val) - static_branch_enable(&ip4_min_ttl); - - /* tcp_v4_err() and tcp_v4_rcv() might read min_ttl - * while we are changint it. - */ - WRITE_ONCE(inet->min_ttl, val); + inet->min_ttl = val; break; default: diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 5a473319d3..fe9101d3d6 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -834,7 +834,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, t->parms.i_key = p->i_key; t->parms.o_key = p->o_key; if (dev->type != ARPHRD_ETHER) { - __dev_addr_set(dev, &p->iph.saddr, 4); + memcpy(dev->dev_addr, &p->iph.saddr, 4); memcpy(dev->broadcast, &p->iph.daddr, 4); } ip_tunnel_add(itn, t); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 8c2bd1d9dd..efe25a0172 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -425,7 +425,7 @@ static int vti_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; - __dev_addr_set(dev, &iph->saddr, 4); + memcpy(dev->dev_addr, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); dev->flags = IFF_NOARP; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 9d41d5d5cd..816d8aad5a 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -262,11 +262,6 @@ static int __init ic_open_devs(void) dev->name, able, d->xid); } } - /* Devices with a complex topology like SFP ethernet interfaces needs - * the rtnl_lock at init. The carrier wait-loop must therefore run - * without holding it. - */ - rtnl_unlock(); /* no point in waiting if we could not bring up at least one device */ if (!ic_first_dev) @@ -279,13 +274,9 @@ static int __init ic_open_devs(void) msecs_to_jiffies(carrier_timeout * 1000))) { int wait, elapsed; - rtnl_lock(); for_each_netdev(&init_net, dev) - if (ic_is_init_dev(dev) && netif_carrier_ok(dev)) { - rtnl_unlock(); + if (ic_is_init_dev(dev) && netif_carrier_ok(dev)) goto have_carrier; - } - rtnl_unlock(); msleep(1); @@ -298,6 +289,7 @@ static int __init ic_open_devs(void) next_msg = jiffies + msecs_to_jiffies(20000); } have_carrier: + rtnl_unlock(); *last = NULL; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 123ea63a04..3aa78ccbec 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -380,7 +380,7 @@ static int ipip_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - __dev_addr_set(dev, &tunnel->parms.iph.saddr, 4); + memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); tunnel->tun_hlen = 0; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 29bbe2b08a..aea29d97f8 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -195,6 +195,10 @@ static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) return 1; } +static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { + FRA_GENERIC_POLICY, +}; + static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, struct netlink_ext_ack *extack) @@ -227,6 +231,7 @@ static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { .compare = ipmr_rule_compare, .fill = ipmr_rule_fill, .nlgroup = RTNLGRP_IPV4_RULE, + .policy = ipmr_rule_policy, .owner = THIS_MODULE, }; @@ -693,7 +698,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); - dev_put_track(dev, &v->dev_tracker); + dev_put(dev); return 0; } @@ -893,7 +898,6 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* And finish update writing critical data */ write_lock_bh(&mrt_lock); v->dev = dev; - netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); if (v->flags & VIFF_REGISTER) mrt->mroute_reg_vif_num = vifi; if (vifi+1 > mrt->maxvif) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index aab384126f..63cb953bd0 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -58,6 +58,14 @@ config NF_TABLES_ARP endif # NF_TABLES +config NF_FLOW_TABLE_IPV4 + tristate "Netfilter flow table IPv4 module" + depends on NF_FLOW_TABLE + help + This option adds the flow table IPv4 support. + + To compile it as a module, choose M here. + config NF_DUP_IPV4 tristate "Netfilter IPv4 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 93bad11842..f38fb1368d 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -24,6 +24,9 @@ obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o +# flow table support +obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o + # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ffc0cab7cf..c53f14b943 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -179,11 +179,10 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) return (void *)entry + entry->next_offset; } -unsigned int arpt_do_table(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) +unsigned int arpt_do_table(struct sk_buff *skb, + const struct nf_hook_state *state, + struct xt_table *table) { - const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 78cd5ee244..3de78416ec 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -26,6 +26,14 @@ static const struct xt_table packet_filter = { .priority = NF_IP_PRI_FILTER, }; +/* The work comes in here from netfilter.c */ +static unsigned int +arptable_filter_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return arpt_do_table(skb, state, priv); +} + static struct nf_hook_ops *arpfilter_ops __read_mostly; static int arptable_filter_table_init(struct net *net) @@ -64,7 +72,7 @@ static int __init arptable_filter_init(void) if (ret < 0) return ret; - arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table); + arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook); if (IS_ERR(arpfilter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(arpfilter_ops); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2ed7c58b47..13acb687c1 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -222,11 +222,10 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ipt_do_table(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) +ipt_do_table(struct sk_buff *skb, + const struct nf_hook_state *state, + struct xt_table *table) { - const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct iphdr *ip; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index b9062f4552..0eb0e2ab9b 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -28,6 +28,13 @@ static const struct xt_table packet_filter = { .priority = NF_IP_PRI_FILTER, }; +static unsigned int +iptable_filter_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ipt_do_table(skb, state, priv); +} + static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ @@ -83,7 +90,7 @@ static int __init iptable_filter_init(void) if (ret < 0) return ret; - filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table); + filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook); if (IS_ERR(filter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 56f6ecc434..45d7e072e6 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -29,27 +29,34 @@ static const struct xt_table nf_nat_ipv4_table = { .af = NFPROTO_IPV4, }; +static unsigned int iptable_nat_do_chain(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ipt_do_table(skb, state, priv); +} + static const struct nf_hook_ops nf_nat_ipv4_ops[] = { { - .hook = ipt_do_table, + .hook = iptable_nat_do_chain, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_NAT_DST, }, { - .hook = ipt_do_table, + .hook = iptable_nat_do_chain, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_NAT_SRC, }, { - .hook = ipt_do_table, + .hook = iptable_nat_do_chain, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST, }, { - .hook = ipt_do_table, + .hook = iptable_nat_do_chain, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index ca5e5b2158..8265c67657 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -32,6 +32,14 @@ static const struct xt_table packet_raw_before_defrag = { .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG, }; +/* The work comes in here from netfilter.c. */ +static unsigned int +iptable_raw_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ipt_do_table(skb, state, priv); +} + static struct nf_hook_ops *rawtable_ops __read_mostly; static int iptable_raw_table_init(struct net *net) @@ -82,7 +90,7 @@ static int __init iptable_raw_init(void) if (ret < 0) return ret; - rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table); + rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook); if (IS_ERR(rawtable_ops)) { xt_unregister_template(table); return PTR_ERR(rawtable_ops); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index d885443cb2..f519162a2f 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -33,6 +33,13 @@ static const struct xt_table security_table = { .priority = NF_IP_PRI_SECURITY, }; +static unsigned int +iptable_security_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ipt_do_table(skb, state, priv); +} + static struct nf_hook_ops *sectbl_ops __read_mostly; static int iptable_security_table_init(struct net *net) @@ -71,7 +78,7 @@ static int __init iptable_security_init(void) if (ret < 0) return ret; - sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table); + sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook); if (IS_ERR(sectbl_ops)) { xt_unregister_template(&security_table); return PTR_ERR(sectbl_ops); diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c index e69de29bb2..aba65fe903 100644 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include + +static struct nf_flowtable_type flowtable_ipv4 = { + .family = NFPROTO_IPV4, + .init = nf_flow_table_init, + .setup = nf_flow_table_offload_setup, + .action = nf_flow_rule_route_ipv4, + .free = nf_flow_table_free, + .hook = nf_flow_offload_ip_hook, + .owner = THIS_MODULE, +}; + +static int __init nf_flow_ipv4_module_init(void) +{ + nft_register_flowtable_type(&flowtable_ipv4); + + return 0; +} + +static void __exit nf_flow_ipv4_module_exit(void) +{ + nft_unregister_flowtable_type(&flowtable_ipv4); +} + +module_init(nf_flow_ipv4_module_init); +module_exit(nf_flow_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NF_FLOWTABLE(AF_INET); +MODULE_DESCRIPTION("Netfilter flow table support"); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index eeafeccebb..5dbd4b5505 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -1919,6 +1918,9 @@ static void nh_rt_cache_flush(struct net *net, struct nexthop *nh, if (!replaced_nh->is_group) return; + /* new dsts must use only the new nexthop group */ + synchronize_net(); + nhg = rtnl_dereference(replaced_nh->nh_grp); for (i = 0; i < nhg->num_nh; i++) { struct nh_grp_entry *nhge = &nhg->nh_entries[i]; @@ -2000,10 +2002,9 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old, rcu_assign_pointer(old->nh_grp, newg); - /* Make sure concurrent readers are not using 'oldg' anymore. */ - synchronize_net(); - if (newg->resilient) { + /* Make sure concurrent readers are not using 'oldg' anymore. */ + synchronize_net(); rcu_assign_pointer(oldg->res_table, tmp_table); rcu_assign_pointer(oldg->spare->res_table, tmp_table); } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3ee947557b..36e89b6873 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -318,11 +318,15 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); - chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) + chk_addr_ret = RTN_LOCAL; + else + chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); - if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk), - addr->sin_addr.s_addr, - chk_addr_ret)) + if ((!inet_can_nonlocal_bind(net, isk) && + chk_addr_ret != RTN_LOCAL) || + chk_addr_ret == RTN_MULTICAST || + chk_addr_ret == RTN_BROADCAST) return -EADDRNOTAVAIL; #if IS_ENABLED(CONFIG_IPV6) @@ -1001,7 +1005,6 @@ struct proto ping_prot = { .hash = ping_hash, .unhash = ping_unhash, .get_port = ping_get_port, - .put_port = ping_unhash, .obj_size = sizeof(struct inet_sock), }; EXPORT_SYMBOL(ping_prot); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9f97b9cbf7..b868905207 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -99,8 +99,8 @@ int raw_hash_sk(struct sock *sk) write_lock_bh(&h->lock); sk_add_node(sk, head); - write_unlock_bh(&h->lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + write_unlock_bh(&h->lock); return 0; } @@ -717,7 +717,6 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; - struct net *net = sock_net(sk); u32 tb_id = RT_TABLE_LOCAL; int ret = -EINVAL; int chk_addr_ret; @@ -727,16 +726,16 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto out; if (sk->sk_bound_dev_if) - tb_id = l3mdev_fib_table_by_index(net, - sk->sk_bound_dev_if) ? : tb_id; + tb_id = l3mdev_fib_table_by_index(sock_net(sk), + sk->sk_bound_dev_if) ? : tb_id; - chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); + chk_addr_ret = inet_addr_type_table(sock_net(sk), addr->sin_addr.s_addr, + tb_id); ret = -EADDRNOTAVAIL; - if (!inet_addr_valid_or_nonlocal(net, inet, addr->sin_addr.s_addr, - chk_addr_ret)) + if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && + chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; - inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->inet_saddr = 0; /* Use device */ @@ -974,7 +973,7 @@ struct proto raw_prot = { static struct sock *raw_get_first(struct seq_file *seq) { struct sock *sk; - struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; @@ -990,7 +989,7 @@ static struct sock *raw_get_first(struct seq_file *seq) static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) { - struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); do { @@ -1019,7 +1018,7 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) void *raw_seq_start(struct seq_file *seq, loff_t *pos) __acquires(&h->lock) { - struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); read_lock(&h->lock); return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; @@ -1042,7 +1041,7 @@ EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) __releases(&h->lock) { - struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); read_unlock(&h->lock); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f33ad1f383..2383366058 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -61,11 +61,15 @@ #define pr_fmt(fmt) "IPv4: " fmt #include +#include #include +#include #include #include #include +#include #include +#include #include #include #include @@ -80,17 +84,20 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -102,6 +109,7 @@ #endif #include #include +#include #include "fib_lookup.h" @@ -110,15 +118,14 @@ #define RT_GC_TIMEOUT (300*HZ) -#define DEFAULT_MIN_PMTU (512 + 20 + 20) -#define DEFAULT_MTU_EXPIRES (10 * 60 * HZ) - static int ip_rt_max_size; static int ip_rt_redirect_number __read_mostly = 9; static int ip_rt_redirect_load __read_mostly = HZ / 50; static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); static int ip_rt_error_cost __read_mostly = HZ; static int ip_rt_error_burst __read_mostly = 5 * HZ; +static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; +static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; @@ -603,7 +610,7 @@ static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) static u32 fnhe_hashfun(__be32 daddr) { - static siphash_aligned_key_t fnhe_hash_key; + static siphash_key_t fnhe_hash_key __read_mostly; u64 hval; net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key)); @@ -1019,13 +1026,13 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (old_mtu < mtu) return; - if (mtu < net->ipv4.ip_rt_min_pmtu) { + if (mtu < ip_rt_min_pmtu) { lock = true; - mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); + mtu = min(old_mtu, ip_rt_min_pmtu); } if (rt->rt_pmtu == mtu && !lock && - time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2)) + time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) return; rcu_read_lock(); @@ -1035,7 +1042,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) fib_select_path(net, &res, fl4, NULL); nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, - jiffies + net->ipv4.ip_rt_mtu_expires); + jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); } @@ -1532,9 +1539,8 @@ void rt_flush_dev(struct net_device *dev) if (rt->dst.dev != dev) continue; rt->dst.dev = blackhole_netdev; - dev_replace_track(dev, blackhole_netdev, - &rt->dst.dev_tracker, - GFP_ATOMIC); + dev_hold(rt->dst.dev); + dev_put(dev); } spin_unlock_bh(&ul->lock); } @@ -2821,7 +2827,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or new->output = dst_discard_out; new->dev = net->loopback_dev; - dev_hold_track(new->dev, &new->dev_tracker, GFP_ATOMIC); + dev_hold(new->dev); rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; @@ -3535,6 +3541,21 @@ static struct ctl_table ipv4_route_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "mtu_expires", + .data = &ip_rt_mtu_expires, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "min_pmtu", + .data = &ip_rt_min_pmtu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &ip_min_valid_pmtu, + }, { .procname = "min_adv_mss", .data = &ip_rt_min_advmss, @@ -3547,28 +3568,13 @@ static struct ctl_table ipv4_route_table[] = { static const char ipv4_route_flush_procname[] = "flush"; -static struct ctl_table ipv4_route_netns_table[] = { +static struct ctl_table ipv4_route_flush_table[] = { { .procname = ipv4_route_flush_procname, .maxlen = sizeof(int), .mode = 0200, .proc_handler = ipv4_sysctl_rtcache_flush, }, - { - .procname = "min_pmtu", - .data = &init_net.ipv4.ip_rt_min_pmtu, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &ip_min_valid_pmtu, - }, - { - .procname = "mtu_expires", - .data = &init_net.ipv4.ip_rt_mtu_expires, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, { }, }; @@ -3576,11 +3582,9 @@ static __net_init int sysctl_route_net_init(struct net *net) { struct ctl_table *tbl; - tbl = ipv4_route_netns_table; + tbl = ipv4_route_flush_table; if (!net_eq(net, &init_net)) { - int i; - - tbl = kmemdup(tbl, sizeof(ipv4_route_netns_table), GFP_KERNEL); + tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); if (!tbl) goto err_dup; @@ -3589,12 +3593,6 @@ static __net_init int sysctl_route_net_init(struct net *net) if (tbl[0].procname != ipv4_route_flush_procname) tbl[0].procname = NULL; } - - /* Update the variables to point into the current struct net - * except for the first element flush - */ - for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++) - tbl[i].data += (void *)net - (void *)&init_net; } tbl[0].extra1 = net; @@ -3604,7 +3602,7 @@ static __net_init int sysctl_route_net_init(struct net *net) return 0; err_reg: - if (tbl != ipv4_route_netns_table) + if (tbl != ipv4_route_flush_table) kfree(tbl); err_dup: return -ENOMEM; @@ -3616,7 +3614,7 @@ static __net_exit void sysctl_route_net_exit(struct net *net) tbl = net->ipv4.route_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.route_hdr); - BUG_ON(tbl == ipv4_route_netns_table); + BUG_ON(tbl == ipv4_route_flush_table); kfree(tbl); } @@ -3626,18 +3624,6 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { }; #endif -static __net_init int netns_ip_rt_init(struct net *net) -{ - /* Set default value for namespaceified sysctls */ - net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU; - net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES; - return 0; -} - -static struct pernet_operations __net_initdata ip_rt_ops = { - .init = netns_ip_rt_init, -}; - static __net_init int rt_genid_init(struct net *net) { atomic_set(&net->ipv4.rt_genid, 0); @@ -3743,7 +3729,6 @@ int __init ip_rt_init(void) #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); #endif - register_pernet_subsys(&ip_rt_ops); register_pernet_subsys(&rt_genid_ops); register_pernet_subsys(&ipv4_inetpeer_ops); return 0; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 2cb3b852d1..33792cf55a 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -7,6 +7,8 @@ */ #include +#include +#include #include #include #include @@ -14,7 +16,7 @@ #include #include -static siphash_aligned_key_t syncookie_secret[2]; +static siphash_key_t syncookie_secret[2] __read_mostly; #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 97eb547749..6f1e64d492 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -6,16 +6,25 @@ * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] */ +#include +#include #include +#include +#include #include #include #include +#include +#include +#include #include #include #include +#include #include #include #include +#include #include #include #include @@ -585,6 +594,18 @@ static struct ctl_table ipv4_table[] = { .extra1 = &sysctl_fib_sync_mem_min, .extra2 = &sysctl_fib_sync_mem_max, }, + { + .procname = "tcp_rx_skb_cache", + .data = &tcp_rx_skb_cache_key.key, + .mode = 0644, + .proc_handler = proc_do_static_key, + }, + { + .procname = "tcp_tx_skb_cache", + .data = &tcp_tx_skb_cache_key.key, + .mode = 0644, + .proc_handler = proc_do_static_key, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 02cb275e54..f48f1059b3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -260,6 +260,7 @@ #include #include #include +#include #include #include #include @@ -292,7 +293,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); long sysctl_tcp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_tcp_mem); -atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */ +atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); #if IS_ENABLED(CONFIG_SMC) @@ -303,7 +304,7 @@ EXPORT_SYMBOL(tcp_have_smc); /* * Current number of TCP sockets. */ -struct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp; +struct percpu_counter tcp_sockets_allocated; EXPORT_SYMBOL(tcp_sockets_allocated); /* @@ -324,6 +325,11 @@ struct tcp_splice_state { unsigned long tcp_memory_pressure __read_mostly; EXPORT_SYMBOL_GPL(tcp_memory_pressure); +DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); +EXPORT_SYMBOL(tcp_rx_skb_cache_key); + +DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); + void tcp_enter_memory_pressure(struct sock *sk) { unsigned long val; @@ -456,6 +462,7 @@ void tcp_init_sock(struct sock *sk) WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); sk_sockets_allocated_inc(sk); + sk->sk_route_forced_caps = NETIF_F_GSO; } EXPORT_SYMBOL(tcp_init_sock); @@ -545,11 +552,10 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (state != TCP_SYN_SENT && (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) { int target = sock_rcvlowat(sk, 0, INT_MAX); - u16 urg_data = READ_ONCE(tp->urg_data); - if (unlikely(urg_data) && - READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) && - !sock_flag(sk, SOCK_URGINLINE)) + if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) && + !sock_flag(sk, SOCK_URGINLINE) && + tp->urg_data) target++; if (tcp_stream_is_readable(sk, target)) @@ -574,7 +580,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } else mask |= EPOLLOUT | EPOLLWRNORM; - if (urg_data & TCP_URG_VALID) + if (tp->urg_data & TCP_URG_VALID) mask |= EPOLLPRI; } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* Active TCP fastopen socket with defer_connect @@ -608,7 +614,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) unlock_sock_fast(sk, slow); break; case SIOCATMARK: - answ = READ_ONCE(tp->urg_data) && + answ = tp->urg_data && READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq); break; case SIOCOUTQ: @@ -638,7 +644,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) } EXPORT_SYMBOL(tcp_ioctl); -void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) +static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; @@ -649,13 +655,15 @@ static inline bool forced_push(const struct tcp_sock *tp) return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); } -void tcp_skb_entail(struct sock *sk, struct sk_buff *skb) +static void skb_entail(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + skb->csum = 0; tcb->seq = tcb->end_seq = tp->write_seq; tcb->tcp_flags = TCPHDR_ACK; + tcb->sacked = 0; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); sk_wmem_queued_add(sk, skb->truesize); @@ -842,7 +850,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } release_sock(sk); - sk_defer_free_flush(sk); if (spliced) return spliced; @@ -851,19 +858,33 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } EXPORT_SYMBOL(tcp_splice_read); -struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, - bool force_schedule) +struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, + bool force_schedule) { struct sk_buff *skb; + if (likely(!size)) { + skb = sk->sk_tx_skb_cache; + if (skb) { + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); + sk->sk_tx_skb_cache = NULL; + pskb_trim(skb, 0); + INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); + skb_shinfo(skb)->tx_flags = 0; + memset(TCP_SKB_CB(skb), 0, sizeof(struct tcp_skb_cb)); + return skb; + } + } + /* The TCP header must be at least 32-bit aligned. */ + size = ALIGN(size, 4); + if (unlikely(tcp_under_memory_pressure(sk))) sk_mem_reclaim_partial(sk); - skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp); + skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); if (likely(skb)) { bool mem_scheduled; - skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); if (force_schedule) { mem_scheduled = true; sk_forced_mem_schedule(sk, skb->truesize); @@ -871,8 +892,12 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, mem_scheduled = sk_wmem_schedule(sk, skb->truesize); } if (likely(mem_scheduled)) { - skb_reserve(skb, MAX_TCP_HEADER); - skb->ip_summed = CHECKSUM_PARTIAL; + skb_reserve(skb, sk->sk_prot->max_header); + /* + * Make sure that we have exactly size bytes + * available to the caller, no more, no less. + */ + skb->reserved_tailroom = skb->end - skb->tail - size; INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); return skb; } @@ -925,36 +950,18 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) * importantly be able to generate EPOLLOUT for Edge Trigger epoll() * users. */ -void tcp_remove_empty_skb(struct sock *sk) +void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) { - struct sk_buff *skb = tcp_write_queue_tail(sk); - if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { tcp_unlink_write_queue(skb, sk); if (tcp_write_queue_empty(sk)) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - tcp_wmem_free_skb(sk, skb); + sk_wmem_free_skb(sk, skb); } } -/* skb changing from pure zc to mixed, must charge zc */ -static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb) -{ - if (unlikely(skb_zcopy_pure(skb))) { - u32 extra = skb->truesize - - SKB_TRUESIZE(skb_end_offset(skb)); - - if (!sk_wmem_schedule(sk, extra)) - return -ENOMEM; - - sk_mem_charge(sk, extra); - skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY; - } - return 0; -} - -static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, - struct page *page, int offset, size_t *size) +struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, + struct page *page, int offset, size_t *size) { struct sk_buff *skb = tcp_write_queue_tail(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -967,15 +974,15 @@ static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, if (!sk_stream_memory_free(sk)) return NULL; - skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, - tcp_rtx_and_write_queues_empty(sk)); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, + tcp_rtx_and_write_queues_empty(sk)); if (!skb) return NULL; #ifdef CONFIG_TLS_DEVICE skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); #endif - tcp_skb_entail(sk, skb); + skb_entail(sk, skb); copy = size_goal; } @@ -988,7 +995,7 @@ static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, tcp_mark_push(tp, skb); goto new_segment; } - if (tcp_downgrade_zcopy_pure(sk, skb) || !sk_wmem_schedule(sk, copy)) + if (!sk_wmem_schedule(sk, copy)) return NULL; if (can_coalesce) { @@ -1006,6 +1013,7 @@ static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, skb->truesize += copy; sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); + skb->ip_summed = CHECKSUM_PARTIAL; WRITE_ONCE(tp->write_seq, tp->write_seq + copy); TCP_SKB_CB(skb)->end_seq += copy; tcp_skb_pcount_set(skb, 0); @@ -1096,7 +1104,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, return copied; do_error: - tcp_remove_empty_skb(sk); + tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk)); if (copied) goto out; out_err: @@ -1295,14 +1303,15 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); - skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, - first_skb); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, + first_skb); if (!skb) goto wait_for_space; process_backlog++; + skb->ip_summed = CHECKSUM_PARTIAL; - tcp_skb_entail(sk, skb); + skb_entail(sk, skb); copy = size_goal; /* All packets are restored as if they have @@ -1317,7 +1326,14 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) if (copy > msg_data_left(msg)) copy = msg_data_left(msg); - if (!zc) { + /* Where to copy to? */ + if (skb_availroom(skb) > 0 && !zc) { + /* We have some space in skb head. Superb! */ + copy = min_t(int, copy, skb_availroom(skb)); + err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); + if (err) + goto do_fault; + } else if (!zc) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); @@ -1336,8 +1352,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) copy = min_t(int, copy, pfrag->size - pfrag->offset); - if (tcp_downgrade_zcopy_pure(sk, skb) || - !sk_wmem_schedule(sk, copy)) + if (!sk_wmem_schedule(sk, copy)) goto wait_for_space; err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, @@ -1357,16 +1372,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) } pfrag->offset += copy; } else { - /* First append to a fragless skb builds initial - * pure zerocopy skb - */ - if (!skb->len) - skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY; - - if (!skb_zcopy_pure(skb)) { - if (!sk_wmem_schedule(sk, copy)) - goto wait_for_space; - } + if (!sk_wmem_schedule(sk, copy)) + goto wait_for_space; err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); if (err == -EMSGSIZE || err == -EEXIST) { @@ -1425,7 +1432,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) return copied + copied_syn; do_error: - tcp_remove_empty_skb(sk); + skb = tcp_write_queue_tail(sk); +do_fault: + tcp_remove_empty_skb(sk, skb); if (copied + copied_syn) goto out; @@ -1475,7 +1484,7 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) char c = tp->urg_data; if (!(flags & MSG_PEEK)) - WRITE_ONCE(tp->urg_data, TCP_URG_READ); + tp->urg_data = TCP_URG_READ; /* Read urgent data. */ msg->msg_flags |= MSG_OOB; @@ -1589,36 +1598,6 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) tcp_send_ack(sk); } -void __sk_defer_free_flush(struct sock *sk) -{ - struct llist_node *head; - struct sk_buff *skb, *n; - - head = llist_del_all(&sk->defer_list); - llist_for_each_entry_safe(skb, n, head, ll_node) { - prefetch(n); - skb_mark_not_on_list(skb); - __kfree_skb(skb); - } -} -EXPORT_SYMBOL(__sk_defer_free_flush); - -static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) -{ - __skb_unlink(skb, &sk->sk_receive_queue); - if (likely(skb->destructor == sock_rfree)) { - sock_rfree(skb); - skb->destructor = NULL; - skb->sk = NULL; - if (!skb_queue_empty(&sk->sk_receive_queue) || - !llist_empty(&sk->defer_list)) { - llist_add(&skb->ll_node, &sk->defer_list); - return; - } - } - __kfree_skb(skb); -} - static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; @@ -1638,7 +1617,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) * splitted a fat GRO packet, while we released socket lock * in skb_splice_bits() */ - tcp_eat_recv_skb(sk, skb); + sk_eat_skb(sk, skb); } return NULL; } @@ -1672,7 +1651,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, len = skb->len - offset; /* Stop reading if we hit a patch of urgent data */ - if (unlikely(tp->urg_data)) { + if (tp->urg_data) { u32 urg_offset = tp->urg_seq - seq; if (urg_offset < len) len = urg_offset; @@ -1704,11 +1683,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, continue; } if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { - tcp_eat_recv_skb(sk, skb); + sk_eat_skb(sk, skb); ++seq; break; } - tcp_eat_recv_skb(sk, skb); + sk_eat_skb(sk, skb); if (!desc->count) break; WRITE_ONCE(tp->copied_seq, seq); @@ -2368,7 +2347,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, u32 offset; /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ - if (unlikely(tp->urg_data) && tp->urg_seq == *seq) { + if (tp->urg_data && tp->urg_seq == *seq) { if (copied) break; if (signal_pending(current)) { @@ -2411,10 +2390,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, break; if (copied) { - if (!timeo || - sk->sk_err || + if (sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || + !timeo || signal_pending(current)) break; } else { @@ -2448,12 +2427,13 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, } } + tcp_cleanup_rbuf(sk, copied); + if (copied >= target) { /* Do not sleep, just process backlog. */ - __sk_flush_backlog(sk); + release_sock(sk); + lock_sock(sk); } else { - tcp_cleanup_rbuf(sk, copied); - sk_defer_free_flush(sk); sk_wait_data(sk, &timeo, last); } @@ -2473,7 +2453,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, used = len; /* Do we have urgent data here? */ - if (unlikely(tp->urg_data)) { + if (tp->urg_data) { u32 urg_offset = tp->urg_seq - *seq; if (urg_offset < used) { if (!urg_offset) { @@ -2507,8 +2487,8 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, tcp_rcv_space_adjust(sk); skip_copy: - if (unlikely(tp->urg_data) && after(tp->copied_seq, tp->urg_seq)) { - WRITE_ONCE(tp->urg_data, 0); + if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { + tp->urg_data = 0; tcp_fast_path_check(sk); } @@ -2523,14 +2503,14 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; if (!(flags & MSG_PEEK)) - tcp_eat_recv_skb(sk, skb); + sk_eat_skb(sk, skb); continue; found_fin_ok: /* Process the FIN. */ WRITE_ONCE(*seq, *seq + 1); if (!(flags & MSG_PEEK)) - tcp_eat_recv_skb(sk, skb); + sk_eat_skb(sk, skb); break; } while (len > 0); @@ -2572,7 +2552,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss, &cmsg_flags); release_sock(sk); - sk_defer_free_flush(sk); if (cmsg_flags && ret >= 0) { if (cmsg_flags & TCP_CMSG_TS) @@ -2952,7 +2931,7 @@ static void tcp_rtx_queue_purge(struct sock *sk) * list_del(&skb->tcp_tsorted_anchor) */ tcp_rtx_queue_unlink(skb, sk); - tcp_wmem_free_skb(sk, skb); + sk_wmem_free_skb(sk, skb); } } @@ -2963,9 +2942,14 @@ void tcp_write_queue_purge(struct sock *sk) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { tcp_skb_tsorted_anchor_cleanup(skb); - tcp_wmem_free_skb(sk, skb); + sk_wmem_free_skb(sk, skb); } tcp_rtx_queue_purge(sk); + skb = sk->sk_tx_skb_cache; + if (skb) { + __kfree_skb(skb); + sk->sk_tx_skb_cache = NULL; + } INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); @@ -3002,8 +2986,12 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); + if (sk->sk_rx_skb_cache) { + __kfree_skb(sk->sk_rx_skb_cache); + sk->sk_rx_skb_cache = NULL; + } WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); - WRITE_ONCE(tp->urg_data, 0); + tp->urg_data = 0; tcp_write_queue_purge(sk); tcp_fastopen_active_disable_ofo_check(sk); skb_rbtree_purge(&tp->out_of_order_queue); @@ -3097,7 +3085,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_frag.page = NULL; sk->sk_frag.offset = 0; } - sk_defer_free_flush(sk); + sk_error_report(sk); return 0; } @@ -3215,7 +3203,7 @@ static void tcp_enable_tx_delay(void) * TCP_CORK can be set together with TCP_NODELAY and it is stronger than * TCP_NODELAY. */ -void __tcp_sock_set_cork(struct sock *sk, bool on) +static void __tcp_sock_set_cork(struct sock *sk, bool on) { struct tcp_sock *tp = tcp_sk(sk); @@ -3243,7 +3231,7 @@ EXPORT_SYMBOL(tcp_sock_set_cork); * However, when TCP_NODELAY is set we make an explicit push, which overrides * even TCP_CORK for currently queued segments. */ -void __tcp_sock_set_nodelay(struct sock *sk, bool on) +static void __tcp_sock_set_nodelay(struct sock *sk, bool on) { if (on) { tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; @@ -3812,12 +3800,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) tcp_get_info_chrono_stats(tp, info); info->tcpi_segs_out = tp->segs_out; - - /* segs_in and data_segs_in can be updated from tcp_segs_in() from BH */ - info->tcpi_segs_in = READ_ONCE(tp->segs_in); - info->tcpi_data_segs_in = READ_ONCE(tp->data_segs_in); + info->tcpi_segs_in = tp->segs_in; info->tcpi_min_rtt = tcp_min_rtt(tp); + info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; @@ -4226,7 +4212,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname, &zc, &len, err); release_sock(sk); - sk_defer_free_flush(sk); if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags)) goto zerocopy_rcv_cmsg; switch (len) { diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index ec5550089b..6274462b86 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -56,8 +56,6 @@ * otherwise TCP stack falls back to an internal pacing using one high * resolution timer per TCP socket and may use more resources. */ -#include -#include #include #include #include @@ -1154,38 +1152,14 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; -BTF_SET_START(tcp_bbr_kfunc_ids) -#ifdef CONFIG_X86 -#ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, bbr_init) -BTF_ID(func, bbr_main) -BTF_ID(func, bbr_sndbuf_expand) -BTF_ID(func, bbr_undo_cwnd) -BTF_ID(func, bbr_cwnd_event) -BTF_ID(func, bbr_ssthresh) -BTF_ID(func, bbr_min_tso_segs) -BTF_ID(func, bbr_set_state) -#endif -#endif -BTF_SET_END(tcp_bbr_kfunc_ids) - -static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set); - static int __init bbr_register(void) { - int ret; - BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); - ret = tcp_register_congestion_control(&tcp_bbr_cong_ops); - if (ret) - return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&tcp_bbr_cong_ops); } static void __exit bbr_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); tcp_unregister_congestion_control(&tcp_bbr_cong_ops); } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index e07837e23b..8d2d4d652f 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -25,8 +25,6 @@ */ #include -#include -#include #include #include #include @@ -485,25 +483,8 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; -BTF_SET_START(tcp_cubic_kfunc_ids) -#ifdef CONFIG_X86 -#ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, cubictcp_init) -BTF_ID(func, cubictcp_recalc_ssthresh) -BTF_ID(func, cubictcp_cong_avoid) -BTF_ID(func, cubictcp_state) -BTF_ID(func, cubictcp_cwnd_event) -BTF_ID(func, cubictcp_acked) -#endif -#endif -BTF_SET_END(tcp_cubic_kfunc_ids) - -static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set); - static int __init cubictcp_register(void) { - int ret; - BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); /* Precompute a bunch of the scaling factors that are used per-packet @@ -534,16 +515,11 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); - ret = tcp_register_congestion_control(&cubictcp); - if (ret) - return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&cubictcp); } static void __exit cubictcp_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); tcp_unregister_congestion_control(&cubictcp); } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 0d7ab3cc7b..79f705450c 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -36,8 +36,6 @@ * Glenn Judd */ -#include -#include #include #include #include @@ -238,36 +236,14 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; -BTF_SET_START(tcp_dctcp_kfunc_ids) -#ifdef CONFIG_X86 -#ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, dctcp_init) -BTF_ID(func, dctcp_update_alpha) -BTF_ID(func, dctcp_cwnd_event) -BTF_ID(func, dctcp_ssthresh) -BTF_ID(func, dctcp_cwnd_undo) -BTF_ID(func, dctcp_state) -#endif -#endif -BTF_SET_END(tcp_dctcp_kfunc_ids) - -static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set); - static int __init dctcp_register(void) { - int ret; - BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE); - ret = tcp_register_congestion_control(&dctcp); - if (ret) - return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&dctcp); } static void __exit dctcp_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); tcp_unregister_congestion_control(&dctcp); } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index fdbcf2a6d0..59412d6354 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -1,7 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include +#include #include +#include #include #include +#include +#include #include void tcp_fastopen_init_key_once(struct net *net) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bfe4112e00..509f577869 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -500,11 +500,8 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb, room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; - if (room <= 0) - return; - /* Check #1 */ - if (!tcp_under_memory_pressure(sk)) { + if (room > 0 && !tcp_under_memory_pressure(sk)) { unsigned int truesize = truesize_adjust(adjust, skb); int incr; @@ -521,11 +518,6 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb, tp->rcv_ssthresh += min(room, incr); inet_csk(sk)->icsk_ack.quick |= 1; } - } else { - /* Under pressure: - * Adjust rcv_ssthresh according to reserved mem - */ - tcp_adjust_rcv_ssthresh(sk); } } @@ -3231,6 +3223,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, long seq_rtt_us = -1L; long ca_rtt_us = -1L; u32 pkts_acked = 0; + u32 last_in_flight = 0; bool rtt_update; int flag = 0; @@ -3266,6 +3259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, if (!first_ackt) first_ackt = last_ackt; + last_in_flight = TCP_SKB_CB(skb)->tx.in_flight; if (before(start_seq, reord)) reord = start_seq; if (!after(scb->end_seq, tp->high_seq)) @@ -3331,8 +3325,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt); ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt); - if (pkts_acked == 1 && fully_acked && !prior_sacked && - (tp->snd_una - prior_snd_una) < tp->mss_cache && + if (pkts_acked == 1 && last_in_flight < tp->mss_cache && + last_in_flight && !prior_sacked && fully_acked && sack->rate->prior_delivered + 1 == tp->delivered && !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) { /* Conservatively mark a delayed ACK. It's typically @@ -3389,10 +3383,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, if (icsk->icsk_ca_ops->pkts_acked) { struct ack_sample sample = { .pkts_acked = pkts_acked, - .rtt_us = sack->rate->rtt_us }; + .rtt_us = sack->rate->rtt_us, + .in_flight = last_in_flight }; - sample.in_flight = tp->mss_cache * - (tp->delivered - sack->rate->prior_delivered); icsk->icsk_ca_ops->pkts_acked(sk, &sample); } @@ -3603,7 +3596,7 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, } /* RFC 5961 7 [ACK Throttling] */ -static void tcp_send_challenge_ack(struct sock *sk) +static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) { /* unprotected vars, we dont care of overwrites */ static u32 challenge_timestamp; @@ -3765,7 +3758,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ if (before(ack, prior_snd_una - tp->max_window)) { if (!(flag & FLAG_NO_CHALLENGE_ACK)) - tcp_send_challenge_ack(sk); + tcp_send_challenge_ack(sk, skb); return -1; } goto old_ack; @@ -5355,7 +5348,7 @@ static int tcp_prune_queue(struct sock *sk) if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); else if (tcp_under_memory_pressure(sk)) - tcp_adjust_rcv_ssthresh(sk); + tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; @@ -5390,7 +5383,7 @@ static int tcp_prune_queue(struct sock *sk) return -1; } -static bool tcp_should_expand_sndbuf(struct sock *sk) +static bool tcp_should_expand_sndbuf(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); @@ -5401,18 +5394,8 @@ static bool tcp_should_expand_sndbuf(struct sock *sk) return false; /* If we are under global TCP memory pressure, do not expand. */ - if (tcp_under_memory_pressure(sk)) { - int unused_mem = sk_unused_reserved_mem(sk); - - /* Adjust sndbuf according to reserved mem. But make sure - * it never goes below SOCK_MIN_SNDBUF. - * See sk_stream_moderate_sndbuf() for more details. - */ - if (unused_mem > SOCK_MIN_SNDBUF) - WRITE_ONCE(sk->sk_sndbuf, unused_mem); - + if (tcp_under_memory_pressure(sk)) return false; - } /* If we are under soft global TCP memory pressure, do not expand. */ if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0)) @@ -5593,7 +5576,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) } } - WRITE_ONCE(tp->urg_data, TCP_URG_NOTYET); + tp->urg_data = TCP_URG_NOTYET; WRITE_ONCE(tp->urg_seq, ptr); /* Disable header prediction. */ @@ -5606,11 +5589,11 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t struct tcp_sock *tp = tcp_sk(sk); /* Check if we get a new urgent pointer - normally not. */ - if (unlikely(th->urg)) + if (th->urg) tcp_check_urg(sk, th); /* Do we wait for any urgent data? - normally not... */ - if (unlikely(tp->urg_data == TCP_URG_NOTYET)) { + if (tp->urg_data == TCP_URG_NOTYET) { u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) - th->syn; @@ -5619,7 +5602,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t u8 tmp; if (skb_copy_bits(skb, ptr, &tmp, 1)) BUG(); - WRITE_ONCE(tp->urg_data, TCP_URG_VALID | tmp); + tp->urg_data = TCP_URG_VALID | tmp; if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk); } @@ -5728,7 +5711,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (tp->syn_fastopen && !tp->data_segs_in && sk->sk_state == TCP_ESTABLISHED) tcp_fastopen_active_disable(sk); - tcp_send_challenge_ack(sk); + tcp_send_challenge_ack(sk, skb); } goto discard; } @@ -5743,7 +5726,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (syn_inerr) TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); - tcp_send_challenge_ack(sk); + tcp_send_challenge_ack(sk, skb); goto discard; } @@ -6458,7 +6441,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (!acceptable) { if (sk->sk_state == TCP_SYN_RECV) return 1; /* send one RST */ - tcp_send_challenge_ack(sk); + tcp_send_challenge_ack(sk, skb); goto discard; } switch (sk->sk_state) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fec656f5a3..0fe9461647 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -508,12 +508,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) if (sk->sk_state == TCP_CLOSE) goto out; - if (static_branch_unlikely(&ip4_min_ttl)) { - /* min_ttl can be changed concurrently from do_ip_setsockopt() */ - if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto out; - } + if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto out; } tp = tcp_sk(sk); @@ -1182,7 +1179,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, if (!md5sig) return -ENOMEM; - sk_gso_disable(sk); + sk_nocaps_add(sk, NETIF_F_GSO_MASK); INIT_HLIST_HEAD(&md5sig->head); rcu_assign_pointer(tp->md5sig_info, md5sig); } @@ -1620,7 +1617,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, */ tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index, key->flags, key->key, key->keylen, GFP_ATOMIC); - sk_gso_disable(newsk); + sk_nocaps_add(newsk, NETIF_F_GSO_MASK); } #endif @@ -1803,7 +1800,8 @@ int tcp_v4_early_demux(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { - u32 limit, tail_gso_size, tail_gso_segs; + u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf); + u32 tail_gso_size, tail_gso_segs; struct skb_shared_info *shinfo; const struct tcphdr *th; struct tcphdr *thtail; @@ -1911,7 +1909,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) * to reduce memory overhead, so add a little headroom here. * Few sockets backlog are possibly concurrently non empty. */ - limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf) + 64*1024; + limit += 64*1024; if (unlikely(sk_add_backlog(sk, skb, limit))) { bh_unlock_sock(sk); @@ -1965,16 +1963,15 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, int tcp_v4_rcv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); + struct sk_buff *skb_to_free; int sdif = inet_sdif(skb); int dif = inet_iif(skb); const struct iphdr *iph; const struct tcphdr *th; bool refcounted; struct sock *sk; - int drop_reason; int ret; - drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -1986,10 +1983,8 @@ int tcp_v4_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; - if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { - drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; + if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) goto bad_packet; - } if (!pskb_may_pull(skb, th->doff * 4)) goto discard_it; @@ -2077,13 +2072,9 @@ int tcp_v4_rcv(struct sk_buff *skb) return 0; } } - - if (static_branch_unlikely(&ip4_min_ttl)) { - /* min_ttl can be changed concurrently from do_ip_setsockopt() */ - if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto discard_and_relse; - } + if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto discard_and_relse; } if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) @@ -2094,10 +2085,8 @@ int tcp_v4_rcv(struct sk_buff *skb) nf_reset_ct(skb); - if (tcp_filter(sk, skb)) { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + if (tcp_filter(sk, skb)) goto discard_and_relse; - } th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); @@ -2111,17 +2100,21 @@ int tcp_v4_rcv(struct sk_buff *skb) sk_incoming_cpu_update(sk); - sk_defer_free_flush(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { + skb_to_free = sk->sk_rx_skb_cache; + sk->sk_rx_skb_cache = NULL; ret = tcp_v4_do_rcv(sk, skb); } else { if (tcp_add_backlog(sk, skb)) goto discard_and_relse; + skb_to_free = NULL; } bh_unlock_sock(sk); + if (skb_to_free) + __kfree_skb(skb_to_free); put_and_return: if (refcounted) @@ -2130,7 +2123,6 @@ int tcp_v4_rcv(struct sk_buff *skb) return ret; no_tcp_socket: - drop_reason = SKB_DROP_REASON_NO_SOCKET; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; @@ -2138,7 +2130,6 @@ int tcp_v4_rcv(struct sk_buff *skb) if (tcp_checksum_complete(skb)) { csum_error: - drop_reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: @@ -2149,7 +2140,7 @@ int tcp_v4_rcv(struct sk_buff *skb) discard_it: /* Discard frame. */ - kfree_skb_reason(skb, drop_reason); + kfree_skb(skb); return 0; discard_and_relse: @@ -3002,7 +2993,7 @@ static unsigned short seq_file_family(const struct seq_file *seq) #endif /* Iterated from proc fs */ - afinfo = pde_data(file_inode(seq->file)); + afinfo = PDE_DATA(file_inode(seq->file)); return afinfo->family; } @@ -3084,7 +3075,6 @@ struct proto tcp_prot = { .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, - .put_port = inet_put_port, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = tcp_bpf_update_proto, #endif diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7c2d3ac236..0a4f3f1614 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -19,7 +19,14 @@ * Jorge Cwik, */ +#include +#include +#include +#include +#include +#include #include +#include #include #include @@ -829,8 +836,8 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; - /* record sk_napi_id and sk_rx_queue_mapping of child. */ - sk_mark_napi_id_set(child, skb); + /* record NAPI ID of child */ + sk_mark_napi_id(child, skb); tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index ab552356bd..95db7a11ba 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -25,6 +25,7 @@ * 1) Add mechanism to deal with reverse congestion. */ +#include #include #include #include diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 30abde86db..fc61cd3fea 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -8,7 +8,6 @@ #include #include -#include #include #include diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5079832af5..0492f69427 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -394,6 +394,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) skb->ip_summed = CHECKSUM_PARTIAL; TCP_SKB_CB(skb)->tcp_flags = flags; + TCP_SKB_CB(skb)->sacked = 0; tcp_skb_pcount_set(skb, 1); @@ -408,13 +409,13 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) return tp->snd_una != tp->snd_up; } -#define OPTION_SACK_ADVERTISE BIT(0) -#define OPTION_TS BIT(1) -#define OPTION_MD5 BIT(2) -#define OPTION_WSCALE BIT(3) -#define OPTION_FAST_OPEN_COOKIE BIT(8) -#define OPTION_SMC BIT(9) -#define OPTION_MPTCP BIT(10) +#define OPTION_SACK_ADVERTISE (1 << 0) +#define OPTION_TS (1 << 1) +#define OPTION_MD5 (1 << 2) +#define OPTION_WSCALE (1 << 3) +#define OPTION_FAST_OPEN_COOKIE (1 << 8) +#define OPTION_SMC (1 << 9) +#define OPTION_MPTCP (1 << 10) static void smc_options_write(__be32 *ptr, u16 *options) { @@ -1255,6 +1256,8 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); skb->skb_mstamp_ns = tp->tcp_wstamp_ns; if (clone_it) { + TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq + - tp->snd_una; oskb = skb; tcp_skb_tsorted_save(oskb) { @@ -1359,7 +1362,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Calculate the MD5 hash, as we have all we need now */ if (md5) { - sk_gso_disable(sk); + sk_nocaps_add(sk, NETIF_F_GSO_MASK); tp->af_specific->calc_md5_hash(opts.hash_location, md5, sk, skb); } @@ -1563,7 +1566,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, return -ENOMEM; /* Get a new skb... force flag on. */ - buff = tcp_stream_alloc_skb(sk, nsize, gfp, true); + buff = sk_stream_alloc_skb(sk, nsize, gfp, true); if (!buff) return -ENOMEM; /* We'll just try again later. */ skb_copy_decrypted(buff, skb); @@ -1589,6 +1592,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, skb_split(skb, buff, len); + buff->ip_summed = CHECKSUM_PARTIAL; + buff->tstamp = skb->tstamp; tcp_fragment_tstamp(skb, buff); @@ -1673,12 +1678,12 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) delta_truesize = __pskb_trim_head(skb, len); TCP_SKB_CB(skb)->seq += len; + skb->ip_summed = CHECKSUM_PARTIAL; if (delta_truesize) { skb->truesize -= delta_truesize; sk_wmem_queued_add(sk, -delta_truesize); - if (!skb_zcopy_pure(skb)) - sk_mem_uncharge(sk, delta_truesize); + sk_mem_uncharge(sk, delta_truesize); } /* Any change of skb->len requires recalculation of tso factor. */ @@ -2118,7 +2123,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, skb, len, mss_now, gfp); - buff = tcp_stream_alloc_skb(sk, 0, gfp, true); + buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) return -ENOMEM; skb_copy_decrypted(buff, skb); @@ -2139,8 +2144,12 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->tcp_flags = flags; + /* This packet was never sent out yet, so no SACK bits. */ + TCP_SKB_CB(buff)->sacked = 0; + tcp_skb_fragment_eor(skb, buff); + buff->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); tcp_fragment_tstamp(skb, buff); @@ -2296,9 +2305,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) if (len <= skb->len) break; - if (unlikely(TCP_SKB_CB(skb)->eor) || - tcp_has_tx_tstamp(skb) || - !skb_pure_zcopy_same(skb, next)) + if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb)) return false; len -= skb->len; @@ -2383,7 +2390,7 @@ static int tcp_mtu_probe(struct sock *sk) return -1; /* We're allowed to probe. Build it now. */ - nskb = tcp_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); + nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); if (!nskb) return -1; sk_wmem_queued_add(sk, nskb->truesize); @@ -2396,6 +2403,9 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; + TCP_SKB_CB(nskb)->sacked = 0; + nskb->csum = 0; + nskb->ip_summed = CHECKSUM_PARTIAL; tcp_insert_write_queue_before(nskb, skb, sk); tcp_highest_sack_replace(sk, skb, nskb); @@ -2415,7 +2425,7 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor; tcp_skb_collapse_tstamp(nskb, skb); tcp_unlink_write_queue(skb, sk); - tcp_wmem_free_skb(sk, skb); + sk_wmem_free_skb(sk, skb); } else { TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & ~(TCPHDR_FIN|TCPHDR_PSH); @@ -2959,7 +2969,8 @@ u32 __tcp_select_window(struct sock *sk) icsk->icsk_ack.quick = 0; if (tcp_under_memory_pressure(sk)) - tcp_adjust_rcv_ssthresh(sk); + tp->rcv_ssthresh = min(tp->rcv_ssthresh, + 4U * tp->advmss); /* free_space might become our new window, make sure we don't * increase it due to wscale. @@ -3037,9 +3048,13 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); - if (next_skb_size && !tcp_skb_shift(skb, next_skb, 1, next_skb_size)) - return false; - + if (next_skb_size) { + if (next_skb_size <= skb_availroom(skb)) + skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), + next_skb_size); + else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size)) + return false; + } tcp_highest_sack_replace(sk, next_skb, skb); /* Update sequence range on original skb. */ @@ -3742,9 +3757,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) /* limit to order-0 allocations */ space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); - syn_data = tcp_stream_alloc_skb(sk, space, sk->sk_allocation, false); + syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false); if (!syn_data) goto fallback; + syn_data->ip_summed = CHECKSUM_PARTIAL; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); if (space) { int copied = copy_from_iter(skb_put(syn_data, space), space, @@ -3822,7 +3838,7 @@ int tcp_connect(struct sock *sk) return 0; } - buff = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, true); + buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true); if (unlikely(!buff)) return -ENOBUFS; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index fbab921670..0de6935659 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -65,7 +65,6 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; - TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; } @@ -87,7 +86,6 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, if (!rs->prior_delivered || after(scb->tx.delivered, rs->prior_delivered)) { - rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; @@ -140,10 +138,6 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, } rs->delivered = tp->delivered - rs->prior_delivered; - rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; - /* delivered_ce occupies less than 32 bits in the skb control block */ - rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK; - /* Model sending data and receiving ACKs as separate pipeline phases * for a window. Usually the ACK phase is longer, but with ACK * compression the send phase can be longer. To be safe we use the diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0903609394..835b9d6e4e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -74,11 +74,11 @@ #define pr_fmt(fmt) "UDP: " fmt -#include #include #include #include #include +#include #include #include #include @@ -123,7 +123,7 @@ EXPORT_SYMBOL(udp_table); long sysctl_udp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_udp_mem); -atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp; +atomic_long_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); #define MAX_UDP_PORTS 65536 @@ -460,7 +460,7 @@ static struct sock *udp4_lookup_run_bpf(struct net *net, struct udp_table *udptable, struct sk_buff *skb, __be32 saddr, __be16 sport, - __be32 daddr, u16 hnum, const int dif) + __be32 daddr, u16 hnum) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -468,8 +468,8 @@ static struct sock *udp4_lookup_run_bpf(struct net *net, if (udptable != &udp_table) return NULL; /* only UDP is supported */ - no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport, - daddr, hnum, dif, &sk); + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, + saddr, sport, daddr, hnum, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; @@ -505,7 +505,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { sk = udp4_lookup_run_bpf(net, udptable, skb, - saddr, sport, daddr, hnum, dif); + saddr, sport, daddr, hnum); if (sk) { result = sk; goto done; @@ -2411,9 +2411,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, __be32 saddr, daddr; struct net *net = dev_net(skb->dev); bool refcounted; - int drop_reason; - - drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; /* * Validate the packet. @@ -2469,7 +2466,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp_lib_checksum_complete(skb)) goto csum_error; - drop_reason = SKB_DROP_REASON_NO_SOCKET; __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); @@ -2477,11 +2473,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * Hmm. We got an UDP packet to a port to which we * don't wanna listen. Ignore it. */ - kfree_skb_reason(skb, drop_reason); + kfree_skb(skb); return 0; short_packet: - drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), @@ -2494,7 +2489,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - drop_reason = SKB_DROP_REASON_UDP_CSUM; net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), @@ -2502,7 +2496,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); drop: __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); - kfree_skb_reason(skb, drop_reason); + kfree_skb(skb); return 0; } @@ -2933,7 +2927,6 @@ struct proto udp_prot = { .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, - .put_port = udp_lib_unhash, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = udp_bpf_update_proto, #endif @@ -2960,7 +2953,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) if (state->bpf_seq_afinfo) afinfo = state->bpf_seq_afinfo; else - afinfo = pde_data(file_inode(seq->file)); + afinfo = PDE_DATA(file_inode(seq->file)); for (state->bucket = start; state->bucket <= afinfo->udp_table->mask; ++state->bucket) { @@ -2993,7 +2986,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) if (state->bpf_seq_afinfo) afinfo = state->bpf_seq_afinfo; else - afinfo = pde_data(file_inode(seq->file)); + afinfo = PDE_DATA(file_inode(seq->file)); do { sk = sk_next(sk); @@ -3050,7 +3043,7 @@ void udp_seq_stop(struct seq_file *seq, void *v) if (state->bpf_seq_afinfo) afinfo = state->bpf_seq_afinfo; else - afinfo = pde_data(file_inode(seq->file)); + afinfo = PDE_DATA(file_inode(seq->file)); if (state->bucket <= afinfo->udp_table->mask) spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6d1a4bec26..86d32a1e62 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include @@ -425,33 +424,6 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, return segs; } -static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) -{ - if (unlikely(p->len + skb->len >= 65536)) - return -E2BIG; - - if (NAPI_GRO_CB(p)->last == p) - skb_shinfo(p)->frag_list = skb; - else - NAPI_GRO_CB(p)->last->next = skb; - - skb_pull(skb, skb_gro_offset(skb)); - - NAPI_GRO_CB(p)->last = skb; - NAPI_GRO_CB(p)->count++; - p->data_len += skb->len; - - /* sk owenrship - if any - completely transferred to the aggregated packet */ - skb->destructor = NULL; - p->truesize += skb->truesize; - p->len += skb->len; - - NAPI_GRO_CB(skb)->same_flow = 1; - - return 0; -} - - #define UDP_GRO_CNT_MAX 64 static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *skb) @@ -628,11 +600,13 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) inet_gro_compute_pseudo); skip: NAPI_GRO_CB(skb)->is_ipv6 = 0; + rcu_read_lock(); if (static_branch_unlikely(&udp_encap_needed_key)) sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest); pp = udp_gro_receive(head, skb, uh, sk); + rcu_read_unlock(); return pp; flush: @@ -667,6 +641,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, uh->len = newlen; + rcu_read_lock(); sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb, udp4_lib_lookup_skb, skb, uh->source, uh->dest); if (sk && udp_sk(sk)->gro_complete) { @@ -687,6 +662,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, } else { err = udp_gro_complete_segment(skb); } + rcu_read_unlock(); if (skb->remcsum_offload) skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 8efaf8c3fe..b97e3635ac 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -2,8 +2,11 @@ #include #include #include +#include +#include #include #include +#include #include #include diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 9e83bcb6bc..9ebd54752e 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -77,7 +77,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.dst.dev = dev; - dev_hold_track(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); + dev_hold(dev); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 9d4f418f1b..f4555a88f8 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -8,7 +8,9 @@ #include #include +#include #include +#include #include static int ipip_output(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index bf2e5e5fe1..e504204bca 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -332,10 +332,10 @@ config IPV6_IOAM6_LWTUNNEL bool "IPv6: IOAM Pre-allocated Trace insertion support" depends on IPV6 select LWTUNNEL - select DST_CACHE help - Support for the insertion of IOAM Pre-allocated Trace - Header using the lightweight tunnels mechanism. + Support for the inline insertion of IOAM Pre-allocated + Trace Header (only on locally generated packets), using + the lightweight tunnels mechanism. If unsure, say N. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 3036a45e8a..1bc7e14321 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -5,14 +5,16 @@ obj-$(CONFIG_IPV6) += ipv6.o -ipv6-y := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ +ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o -ipv6-$(CONFIG_SYSCTL) += sysctl_net_ipv6.o +ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o + +ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ @@ -27,6 +29,8 @@ ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o +ipv6-objs += $(ipv6-y) + obj-$(CONFIG_INET6_AH) += ah6.o obj-$(CONFIG_INET6_ESP) += esp6.o obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o @@ -44,8 +48,7 @@ obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-$(CONFIG_IPV6_FOU) += fou6.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o -obj-$(CONFIG_INET) += output_core.o protocol.o \ - ip6_offload.o tcpv6_offload.o exthdrs_offload.o +obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6c8ab3e6e6..e852bbc839 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -241,7 +241,6 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ioam6_enabled = 0, .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, - .ndisc_evict_nocarrier = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -301,7 +300,6 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ioam6_enabled = 0, .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, - .ndisc_evict_nocarrier = 1, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -405,13 +403,13 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (ndev->cnf.forwarding) dev_disable_lro(dev); /* We refer to the device */ - dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL); + dev_hold(dev); if (snmp6_alloc_dev(ndev) < 0) { netdev_dbg(dev, "%s: cannot allocate memory for statistics\n", __func__); neigh_parms_release(&nd_tbl, ndev->nd_parms); - dev_put_track(dev, &ndev->dev_tracker); + dev_put(dev); kfree(ndev); return ERR_PTR(err); } @@ -2239,12 +2237,12 @@ static int addrconf_ifid_6lowpan(u8 *eui, struct net_device *dev) static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev) { - const union fwnet_hwaddr *ha; + union fwnet_hwaddr *ha; if (dev->addr_len != FWNET_ALEN) return -1; - ha = (const union fwnet_hwaddr *)dev->dev_addr; + ha = (union fwnet_hwaddr *)dev->dev_addr; memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id)); eui[0] ^= 2; @@ -3732,6 +3730,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) struct inet6_dev *idev; struct inet6_ifaddr *ifa, *tmp; bool keep_addr = false; + bool was_ready; int state, i; ASSERT_RTNL(); @@ -3797,7 +3796,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) addrconf_del_rs_timer(idev); - /* Step 2: clear flags for stateless addrconf */ + /* Step 2: clear flags for stateless addrconf, repeated down + * detection + */ + was_ready = idev->if_flags & IF_READY; if (!unregister) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); @@ -3871,7 +3873,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) if (unregister) { ipv6_ac_destroy_dev(idev); ipv6_mc_destroy_dev(idev); - } else { + } else if (was_ready) { ipv6_mc_down(idev); } @@ -4998,7 +5000,6 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto error; - spin_lock_bh(&ifa->lock); if (!((ifa->flags&IFA_F_PERMANENT) && (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; @@ -5020,7 +5021,6 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, preferred = INFINITY_LIFE_TIME; valid = INFINITY_LIFE_TIME; } - spin_unlock_bh(&ifa->lock); if (!ipv6_addr_any(&ifa->peer_addr)) { if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || @@ -5562,7 +5562,6 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled; array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; - array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier; } static inline size_t inet6_ifla6_size(void) @@ -7004,15 +7003,6 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_douintvec, }, - { - .procname = "ndisc_evict_nocarrier", - .data = &ipv6_devconf.ndisc_evict_nocarrier, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = proc_dou8vec_minmax, - .extra1 = (void *)SYSCTL_ZERO, - .extra2 = (void *)SYSCTL_ONE, - }, { /* sentinel */ } diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 881d1477d2..1d4054bb34 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -263,7 +263,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) #ifdef NET_REFCNT_DEBUG pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); #endif - dev_put_track(dev, &idev->dev_tracker); + dev_put(dev); if (!idev->dead) { pr_warn("Freeing alive inet6 device %p\n", idev); return; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8fe7900f19..dab4a04759 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -337,8 +337,11 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr); rcu_read_unlock(); - if (!inet_addr_valid_or_nonlocal(net, inet, v4addr, - chk_addr_ret)) { + if (!inet_can_nonlocal_bind(net, inet) && + v4addr != htonl(INADDR_ANY) && + chk_addr_ret != RTN_LOCAL && + chk_addr_ret != RTN_MULTICAST && + chk_addr_ret != RTN_BROADCAST) { err = -EADDRNOTAVAIL; goto out; } @@ -413,8 +416,6 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, if (err) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); - if (sk->sk_prot->put_port) - sk->sk_prot->put_port(sk); goto out; } } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index b5995c1f4d..828e625142 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -175,6 +175,7 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des * See 11.3.2 of RFC 3775 for details. */ if (opt[off] == IPV6_TLV_HAO) { + struct in6_addr final_addr; struct ipv6_destopt_hao *hao; hao = (struct ipv6_destopt_hao *)&opt[off]; @@ -183,7 +184,9 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des hao->length); goto bad; } - swap(hao->addr, iph->saddr); + final_addr = hao->addr; + hao->addr = iph->saddr; + iph->saddr = final_addr; } break; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 8bb2c407b4..883b53fd78 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -114,6 +114,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, static void esp_ssg_unref(struct xfrm_state *x, void *tmp) { + struct esp_output_extra *extra = esp_tmp_extra(tmp); struct crypto_aead *aead = x->data; int extralen = 0; u8 *iv; @@ -121,7 +122,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) struct scatterlist *sg; if (x->props.flags & XFRM_STATE_ESN) - extralen += sizeof(struct esp_output_extra); + extralen += sizeof(*extra); iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); @@ -707,7 +708,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index ba5e81cd56..a349d47980 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 77e34aec7e..3a871a09f9 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -686,6 +686,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int accept_source_route = net->ipv6.devconf_all->accept_source_route; + idev = __in6_dev_get(skb->dev); if (idev && accept_source_route > idev->cnf.accept_source_route) accept_source_route = idev->cnf.accept_source_route; @@ -978,7 +979,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); - ioam6_fill_trace_data(skb, ns, trace, true); + ioam6_fill_trace_data(skb, ns, trace); break; default: break; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ec029c86ae..dcedfe29d9 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -340,6 +340,10 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, return 1; } +static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = { + FRA_GENERIC_POLICY, +}; + static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, @@ -455,6 +459,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .fill = fib6_rule_fill, .nlmsg_payload = fib6_rule_nlmsg_payload, .nlgroup = RTNLGRP_IPV6_RULE, + .policy = fib6_rule_policy, .owner = THIS_MODULE, .fro_net = &init_net, }; diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 47447f0241..a1ac0e3d8c 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -610,11 +610,7 @@ int ila_xlat_init_net(struct net *net) if (err) return err; - err = rhashtable_init(&ilan->xlat.rhash_table, &rht_params); - if (err) { - free_bucket_spinlocks(ilan->xlat.locks); - return err; - } + rhashtable_init(&ilan->xlat.rhash_table, &rht_params); return 0; } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 4514444e96..67c9114835 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -165,7 +165,7 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, - const u16 hnum, const int dif) + const u16 hnum) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -173,8 +173,8 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net, if (hashinfo != &tcp_hashinfo) return NULL; /* only TCP is supported */ - no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport, - daddr, hnum, dif, &sk); + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, + saddr, sport, daddr, hnum, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; @@ -198,7 +198,7 @@ struct sock *inet6_lookup_listener(struct net *net, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, - saddr, sport, daddr, hnum, dif); + saddr, sport, daddr, hnum); if (result) goto done; } diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index e159eb4328..d128172bb5 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -13,12 +13,10 @@ #include #include #include -#include #include #include #include -#include static void ioam6_ns_release(struct ioam6_namespace *ns) { @@ -633,7 +631,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, struct ioam6_trace_hdr *trace, struct ioam6_schema *sc, - u8 sclen, bool is_input) + u8 sclen) { struct __kernel_sock_timeval ts; u64 raw64; @@ -647,7 +645,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* hop_lim and node_id */ if (trace->type.bit0) { byte = ipv6_hdr(skb)->hop_limit; - if (is_input) + if (skb->dev) byte--; raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; @@ -719,19 +717,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* queue depth */ if (trace->type.bit6) { - struct netdev_queue *queue; - struct Qdisc *qdisc; - __u32 qlen, backlog; - - if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { - *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); - } else { - queue = skb_get_tx_queue(skb_dst(skb)->dev, skb); - qdisc = rcu_dereference(queue->qdisc); - qdisc_qstats_qlen_backlog(qdisc, &qlen, &backlog); - - *(__be32 *)data = cpu_to_be32(backlog); - } + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } @@ -744,7 +730,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* hop_lim and node_id (wide) */ if (trace->type.bit8) { byte = ipv6_hdr(skb)->hop_limit; - if (is_input) + if (skb->dev) byte--; raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; @@ -860,8 +846,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* called with rcu_read_lock() */ void ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, - struct ioam6_trace_hdr *trace, - bool is_input) + struct ioam6_trace_hdr *trace) { struct ioam6_schema *sc; u8 sclen = 0; @@ -891,7 +876,7 @@ void ioam6_fill_trace_data(struct sk_buff *skb, return; } - __ioam6_fill_trace_data(skb, ns, trace, sc, sclen, is_input); + __ioam6_fill_trace_data(skb, ns, trace, sc, sclen); trace->remlen -= trace->nodelen + sclen; } diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index f90a87389f..9b7b726f8f 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -16,26 +17,18 @@ #include #include #include -#include -#include -#include -#include -#include #define IOAM6_MASK_SHORT_FIELDS 0xff100000 #define IOAM6_MASK_WIDE_FIELDS 0xe00000 struct ioam6_lwt_encap { - struct ipv6_hopopt_hdr eh; - u8 pad[2]; /* 2-octet padding for 4n-alignment */ - struct ioam6_hdr ioamh; - struct ioam6_trace_hdr traceh; + struct ipv6_hopopt_hdr eh; + u8 pad[2]; /* 2-octet padding for 4n-alignment */ + struct ioam6_hdr ioamh; + struct ioam6_trace_hdr traceh; } __packed; struct ioam6_lwt { - struct dst_cache cache; - u8 mode; - struct in6_addr tundst; struct ioam6_lwt_encap tuninfo; }; @@ -49,19 +42,34 @@ static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt) return &ioam6_lwt_state(lwt)->tuninfo; } -static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt) +static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt) { return &(ioam6_lwt_state(lwt)->tuninfo.traceh); } static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { - [IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8, - IOAM6_IPTUNNEL_MODE_MIN, - IOAM6_IPTUNNEL_MODE_MAX), - [IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)), }; +static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype, + struct ioam6_trace_hdr *trace) +{ + struct ioam6_trace_hdr *data; + struct nlattr *nla; + int len; + + len = sizeof(*trace); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, trace, len); + + return 0; +} + static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) { u32 fields; @@ -93,10 +101,9 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1]; struct ioam6_lwt_encap *tuninfo; struct ioam6_trace_hdr *trace; - struct lwtunnel_state *lwt; - struct ioam6_lwt *ilwt; - int len_aligned, err; - u8 mode; + struct lwtunnel_state *s; + int len_aligned; + int len, err; if (family != AF_INET6) return -EINVAL; @@ -106,16 +113,6 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, if (err < 0) return err; - if (!tb[IOAM6_IPTUNNEL_MODE]) - mode = IOAM6_IPTUNNEL_MODE_INLINE; - else - mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]); - - if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) { - NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination"); - return -EINVAL; - } - if (!tb[IOAM6_IPTUNNEL_TRACE]) { NL_SET_ERR_MSG(extack, "missing trace"); return -EINVAL; @@ -128,24 +125,15 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, return -EINVAL; } - len_aligned = ALIGN(trace->remlen * 4, 8); - lwt = lwtunnel_state_alloc(sizeof(*ilwt) + len_aligned); - if (!lwt) + len = sizeof(*tuninfo) + trace->remlen * 4; + len_aligned = ALIGN(len, 8); + + s = lwtunnel_state_alloc(len_aligned); + if (!s) return -ENOMEM; - ilwt = ioam6_lwt_state(lwt); - err = dst_cache_init(&ilwt->cache, GFP_ATOMIC); - if (err) { - kfree(lwt); - return err; - } - - ilwt->mode = mode; - if (tb[IOAM6_IPTUNNEL_DST]) - ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]); - - tuninfo = ioam6_lwt_info(lwt); - tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1; + tuninfo = ioam6_lwt_info(s); + tuninfo->eh.hdrlen = (len_aligned >> 3) - 1; tuninfo->pad[0] = IPV6_TLV_PADN; tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC; tuninfo->ioamh.opt_type = IPV6_TLV_IOAM; @@ -154,39 +142,27 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, memcpy(&tuninfo->traceh, trace, sizeof(*trace)); - if (len_aligned - trace->remlen * 4) { + len = len_aligned - len; + if (len == 1) { + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1; + } else if (len > 0) { tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN; - tuninfo->traceh.data[trace->remlen * 4 + 1] = 2; + tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2; } - lwt->type = LWTUNNEL_ENCAP_IOAM6; - lwt->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + s->type = LWTUNNEL_ENCAP_IOAM6; + s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; - *ts = lwt; + *ts = s; return 0; } -static int ioam6_do_fill(struct net *net, struct sk_buff *skb) +static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo) { struct ioam6_trace_hdr *trace; - struct ioam6_namespace *ns; - - trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) - + sizeof(struct ipv6_hopopt_hdr) + 2 - + sizeof(struct ioam6_hdr)); - - ns = ioam6_namespace(net, trace->namespace_id); - if (ns) - ioam6_fill_trace_data(skb, ns, trace, false); - - return 0; -} - -static int ioam6_do_inline(struct net *net, struct sk_buff *skb, - struct ioam6_lwt_encap *tuninfo) -{ struct ipv6hdr *oldhdr, *hdr; + struct ioam6_namespace *ns; int hdrlen, err; hdrlen = (tuninfo->eh.hdrlen + 1) << 3; @@ -215,200 +191,80 @@ static int ioam6_do_inline(struct net *net, struct sk_buff *skb, hdr->nexthdr = NEXTHDR_HOP; hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); - return ioam6_do_fill(net, skb); -} + trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) + + sizeof(struct ipv6_hopopt_hdr) + 2 + + sizeof(struct ioam6_hdr)); -static int ioam6_do_encap(struct net *net, struct sk_buff *skb, - struct ioam6_lwt_encap *tuninfo, - struct in6_addr *tundst) -{ - struct dst_entry *dst = skb_dst(skb); - struct ipv6hdr *hdr, *inner_hdr; - int hdrlen, len, err; + ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id); + if (ns) + ioam6_fill_trace_data(skb, ns, trace); - hdrlen = (tuninfo->eh.hdrlen + 1) << 3; - len = sizeof(*hdr) + hdrlen; - - err = skb_cow_head(skb, len + skb->mac_len); - if (unlikely(err)) - return err; - - inner_hdr = ipv6_hdr(skb); - - skb_push(skb, len); - skb_reset_network_header(skb); - skb_mac_header_rebuild(skb); - skb_set_transport_header(skb, sizeof(*hdr)); - - tuninfo->eh.nexthdr = NEXTHDR_IPV6; - memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen); - - hdr = ipv6_hdr(skb); - memcpy(hdr, inner_hdr, sizeof(*hdr)); - - hdr->nexthdr = NEXTHDR_HOP; - hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); - hdr->daddr = *tundst; - ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr, - IPV6_PREFER_SRC_PUBLIC, &hdr->saddr); - - skb_postpush_rcsum(skb, hdr, len); - - return ioam6_do_fill(net, skb); + return 0; } static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - struct in6_addr orig_daddr; - struct ioam6_lwt *ilwt; + struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate; int err = -EINVAL; if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ilwt = ioam6_lwt_state(dst->lwtstate); - orig_daddr = ipv6_hdr(skb)->daddr; + /* Only for packets we send and + * that do not contain a Hop-by-Hop yet + */ + if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) + goto out; - switch (ilwt->mode) { - case IOAM6_IPTUNNEL_MODE_INLINE: -do_inline: - /* Direct insertion - if there is no Hop-by-Hop yet */ - if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) - goto out; - - err = ioam6_do_inline(net, skb, &ilwt->tuninfo); - if (unlikely(err)) - goto drop; - - break; - case IOAM6_IPTUNNEL_MODE_ENCAP: -do_encap: - /* Encapsulation (ip6ip6) */ - err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst); - if (unlikely(err)) - goto drop; - - break; - case IOAM6_IPTUNNEL_MODE_AUTO: - /* Automatic (RFC8200 compliant): - * - local packets -> INLINE mode - * - in-transit packets -> ENCAP mode - */ - if (!skb->dev) - goto do_inline; - - goto do_encap; - default: - goto drop; - } - - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + err = ioam6_do_inline(skb, ioam6_lwt_info(lwt)); if (unlikely(err)) goto drop; - if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { - preempt_disable(); - dst = dst_cache_get(&ilwt->cache); - preempt_enable(); + err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev)); + if (unlikely(err)) + goto drop; - if (unlikely(!dst)) { - struct ipv6hdr *hdr = ipv6_hdr(skb); - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - fl6.daddr = hdr->daddr; - fl6.saddr = hdr->saddr; - fl6.flowlabel = ip6_flowinfo(hdr); - fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = hdr->nexthdr; - - dst = ip6_route_output(net, NULL, &fl6); - if (dst->error) { - err = dst->error; - dst_release(dst); - goto drop; - } - - preempt_disable(); - dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); - preempt_enable(); - } - - skb_dst_drop(skb); - skb_dst_set(skb, dst); - - return dst_output(net, sk, skb); - } out: - return dst->lwtstate->orig_output(net, sk, skb); + return lwt->orig_output(net, sk, skb); + drop: kfree_skb(skb); return err; } -static void ioam6_destroy_state(struct lwtunnel_state *lwt) -{ - dst_cache_destroy(&ioam6_lwt_state(lwt)->cache); -} - static int ioam6_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { - struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); - int err; + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); - err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode); - if (err) - goto ret; + if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace)) + return -EMSGSIZE; - if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) { - err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst); - if (err) - goto ret; - } - - err = nla_put(skb, IOAM6_IPTUNNEL_TRACE, sizeof(ilwt->tuninfo.traceh), - &ilwt->tuninfo.traceh); -ret: - return err; + return 0; } static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) { - struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); - int nlsize; + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); - nlsize = nla_total_size(sizeof(ilwt->mode)) + - nla_total_size(sizeof(ilwt->tuninfo.traceh)); - - if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) - nlsize += nla_total_size(sizeof(ilwt->tundst)); - - return nlsize; + return nla_total_size(sizeof(*trace)); } static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) { - struct ioam6_trace_hdr *trace_a = ioam6_lwt_trace(a); - struct ioam6_trace_hdr *trace_b = ioam6_lwt_trace(b); - struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a); - struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b); + struct ioam6_trace_hdr *a_hdr = ioam6_trace(a); + struct ioam6_trace_hdr *b_hdr = ioam6_trace(b); - return (ilwt_a->mode != ilwt_b->mode || - (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE && - !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) || - trace_a->namespace_id != trace_b->namespace_id); + return (a_hdr->namespace_id != b_hdr->namespace_id); } static const struct lwtunnel_encap_ops ioam6_iptun_ops = { - .build_state = ioam6_build_state, - .destroy_state = ioam6_destroy_state, + .build_state = ioam6_build_state, .output = ioam6_output, - .fill_encap = ioam6_fill_encap_info, + .fill_encap = ioam6_fill_encap_info, .get_encap_size = ioam6_encap_nlsize, - .cmp_encap = ioam6_encap_cmp, - .owner = THIS_MODULE, + .cmp_encap = ioam6_encap_cmp, + .owner = THIS_MODULE, }; int __init ioam6_iptunnel_init(void) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 413f66781e..a506e57c40 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -15,7 +15,6 @@ #define pr_fmt(fmt) "IPv6: " fmt -#include #include #include #include diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 8753e9cec3..466a5610e3 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -403,7 +403,7 @@ static void ip6erspan_tunnel_uninit(struct net_device *dev) ip6erspan_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + dev_put(dev); } static void ip6gre_tunnel_uninit(struct net_device *dev) @@ -416,7 +416,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) if (ign->fb_tunnel_dev == dev) WRITE_ONCE(ign->fb_tunnel_dev, NULL); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + dev_put(dev); } @@ -1090,7 +1090,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) struct flowi6 *fl6 = &t->fl.u.ip6; if (dev->type != ARPHRD_ETHER) { - __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); } @@ -1499,7 +1499,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) } ip6gre_tnl_init_features(dev); - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + dev_hold(dev); return 0; cleanup_dst_cache_init: @@ -1524,7 +1524,7 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (tunnel->parms.collect_md) return 0; - __dev_addr_set(dev, &tunnel->parms.laddr, sizeof(struct in6_addr)); + memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); if (ipv6_addr_any(&tunnel->parms.raddr)) @@ -1891,7 +1891,7 @@ static int ip6erspan_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip6erspan_tnl_link_config(tunnel, 1); - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + dev_hold(dev); return 0; cleanup_dst_cache_init: diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 5f577e2145..172565d125 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -210,6 +210,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, flush += ntohs(iph->payload_len) != skb_gro_len(skb); + rcu_read_lock(); proto = iph->nexthdr; ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) { @@ -222,7 +223,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) - goto out; + goto out_unlock; iph = ipv6_hdr(skb); } @@ -249,9 +250,9 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, * memcmp() alone below is sufficient, right? */ if ((first_word & htonl(0xF00FFFFF)) || - !ipv6_addr_equal(&iph->saddr, &iph2->saddr) || - !ipv6_addr_equal(&iph->daddr, &iph2->daddr) || - *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) { + !ipv6_addr_equal(&iph->saddr, &iph2->saddr) || + !ipv6_addr_equal(&iph->daddr, &iph2->daddr) || + *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) { not_same_flow: NAPI_GRO_CB(p)->same_flow = 0; continue; @@ -280,6 +281,9 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive, ops->callbacks.gro_receive, head, skb); +out_unlock: + rcu_read_unlock(); + out: skb_gro_flush_final(skb, pp, flush); @@ -329,14 +333,18 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); + rcu_read_lock(); + nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) - goto out; + goto out_unlock; err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete, udp6_gro_complete, skb, nhoff); -out: +out_unlock: + rcu_read_unlock(); + return err; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 304a295de8..61970fd839 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -977,7 +977,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, fail_toobig: if (skb->sk && dst_allfrag(skb_dst(skb))) - sk_gso_disable(skb->sk); + sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; @@ -1408,8 +1408,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (np->frag_size) mtu = np->frag_size; } - if (mtu < IPV6_MIN_MTU) - return -EINVAL; cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; @@ -1471,8 +1469,6 @@ static int __ip6_append_data(struct sock *sk, fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - - sizeof(struct frag_hdr); headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + @@ -1480,6 +1476,13 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; + if (mtu < fragheaderlen || + ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr)) + goto emsgsize; + + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); + /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit * the first fragment */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 97ade833f5..fa8da8ff35 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -383,7 +383,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) else ip6_tnl_unlink(ip6n, t); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + dev_put(dev); } /** @@ -1449,7 +1449,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) unsigned int mtu; int t_hlen; - __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); /* Set up flowi template */ @@ -1883,7 +1883,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; - dev_hold_track(dev, &t->dev_tracker, GFP_KERNEL); + dev_hold(dev); return 0; destroy_dst: diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 3a434d7592..42c37ec832 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -293,7 +293,7 @@ static void vti6_dev_uninit(struct net_device *dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else vti6_tnl_unlink(ip6n, t); - dev_put_track(dev, &t->dev_tracker); + dev_put(dev); } static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, @@ -660,7 +660,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu) struct net_device *tdev = NULL; int mtu; - __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | @@ -936,7 +936,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - dev_hold_track(dev, &t->dev_tracker, GFP_KERNEL); + dev_hold(dev); return 0; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8a2db926b5..6a4065d81a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -182,6 +182,10 @@ static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) return 1; } +static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { + FRA_GENERIC_POLICY, +}; + static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, struct netlink_ext_ack *extack) @@ -214,6 +218,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { .compare = ip6mr_rule_compare, .fill = ip6mr_rule_fill, .nlgroup = RTNLGRP_IPV6_RULE, + .policy = ip6mr_rule_policy, .owner = THIS_MODULE, }; @@ -743,7 +748,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, if ((v->flags & MIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); - dev_put_track(dev, &v->dev_tracker); + dev_put(dev); return 0; } @@ -916,7 +921,6 @@ static int mif6_add(struct net *net, struct mr_table *mrt, /* And finish update writing critical data */ write_lock_bh(&mrt_lock); v->dev = dev; - netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) mrt->mroute_reg_vif_num = vifi; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a733803a71..e4bdb09c55 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -55,8 +55,6 @@ struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); -DEFINE_STATIC_KEY_FALSE(ip6_min_hopcount); - int ip6_ra_control(struct sock *sk, int sel) { struct ip6_ra_chain *ra, *new_ra, **rap; @@ -471,10 +469,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_protocol == IPPROTO_TCP) { struct inet_connection_sock *icsk = inet_csk(sk); - + local_bh_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); - + local_bh_enable(); sk->sk_prot = &tcp_prot; icsk->icsk_af_ops = &ipv4_specific; sk->sk_socket->ops = &inet_stream_ops; @@ -485,10 +483,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_protocol == IPPROTO_UDPLITE) prot = &udplite_prot; - + local_bh_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); - + local_bh_enable(); sk->sk_prot = prot; sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; @@ -599,14 +597,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, /* RFC 3542, 6.5: default traffic class of 0x0 */ if (val == -1) val = 0; - if (sk->sk_type == SOCK_STREAM) { - val &= ~INET_ECN_MASK; - val |= np->tclass & INET_ECN_MASK; - } - if (np->tclass != val) { - np->tclass = val; - sk_dst_reset(sk); - } + np->tclass = val; retv = 0; break; @@ -959,14 +950,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, goto e_inval; if (val < 0 || val > 255) goto e_inval; - - if (val) - static_branch_enable(&ip6_min_hopcount); - - /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount - * while we are changing it. - */ - WRITE_ONCE(np->min_hopcount, val); + np->min_hopcount = val; retv = 0; break; case IPV6_DONTFRAG: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a8861db52c..909f937bef 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1371,27 +1371,23 @@ static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld, } /* called with rcu_read_lock() */ -int igmp6_event_query(struct sk_buff *skb) +void igmp6_event_query(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); - if (!idev) - return -EINVAL; - - if (idev->dead) { - kfree_skb(skb); - return -ENODEV; - } + if (!idev || idev->dead) + goto out; spin_lock_bh(&idev->mc_query_lock); if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) { __skb_queue_tail(&idev->mc_query_queue, skb); if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0)) in6_dev_hold(idev); + skb = NULL; } spin_unlock_bh(&idev->mc_query_lock); - - return 0; +out: + kfree_skb(skb); } static void __mld_query_work(struct sk_buff *skb) @@ -1542,27 +1538,23 @@ static void mld_query_work(struct work_struct *work) } /* called with rcu_read_lock() */ -int igmp6_event_report(struct sk_buff *skb) +void igmp6_event_report(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); - if (!idev) - return -EINVAL; - - if (idev->dead) { - kfree_skb(skb); - return -ENODEV; - } + if (!idev || idev->dead) + goto out; spin_lock_bh(&idev->mc_report_lock); if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) { __skb_queue_tail(&idev->mc_report_queue, skb); if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0)) in6_dev_hold(idev); + skb = NULL; } spin_unlock_bh(&idev->mc_report_lock); - - return 0; +out: + kfree_skb(skb); } static void __mld_report_work(struct sk_buff *skb) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f03b597e41..4b098521a4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -142,7 +142,7 @@ struct neigh_table nd_tbl = { }; EXPORT_SYMBOL_GPL(nd_tbl); -void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, +void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, int data_len, int pad) { int space = __ndisc_opt_addr_space(data_len, pad); @@ -165,7 +165,7 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option); static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type, - const void *data, u8 icmp6_type) + void *data, u8 icmp6_type) { __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len, ndisc_addr_option_pad(skb->dev->type)); @@ -1794,7 +1794,6 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; - bool evict_nocarrier; switch (event) { case NETDEV_CHANGEADDR: @@ -1811,19 +1810,10 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, in6_dev_put(idev); break; case NETDEV_CHANGE: - idev = in6_dev_get(dev); - if (!idev) - evict_nocarrier = true; - else { - evict_nocarrier = idev->cnf.ndisc_evict_nocarrier && - net->ipv6.devconf_all->ndisc_evict_nocarrier; - in6_dev_put(idev); - } - change_info = ptr; if (change_info->flags_changed & IFF_NOARP) neigh_changeaddr(&nd_tbl, dev); - if (evict_nocarrier && !netif_carrier_ok(dev)) + if (!netif_carrier_ok(dev)) neigh_carrier_down(&nd_tbl, dev); break; case NETDEV_DOWN: diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 0ba62f4868..f22233e44e 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -47,6 +47,14 @@ config NFT_FIB_IPV6 endif # NF_TABLES_IPV6 endif # NF_TABLES +config NF_FLOW_TABLE_IPV6 + tristate "Netfilter flow table IPv6 module" + depends on NF_FLOW_TABLE + help + This option adds the flow table IPv6 support. + + To compile it as a module, choose M here. + config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b8d6dc9aee..b85383606d 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -28,6 +28,9 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o +# flow table support +obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o + # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2d816277f2..a579ea14a6 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -247,10 +247,10 @@ ip6t_next_entry(const struct ip6t_entry *entry) /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ip6t_do_table(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) +ip6t_do_table(struct sk_buff *skb, + const struct nf_hook_state *state, + struct xt_table *table) { - const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); /* Initializing verdict to NF_DROP keeps gcc happy. */ diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index df785ebda0..727ee80970 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -27,6 +27,14 @@ static const struct xt_table packet_filter = { .priority = NF_IP6_PRI_FILTER, }; +/* The work comes in here from netfilter.c. */ +static unsigned int +ip6table_filter_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip6t_do_table(skb, state, priv); +} + static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ @@ -82,7 +90,7 @@ static int __init ip6table_filter_init(void) if (ret < 0) return ret; - filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table); + filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook); if (IS_ERR(filter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index bf3cb3a136..921c1723a0 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -31,27 +31,34 @@ static const struct xt_table nf_nat_ipv6_table = { .af = NFPROTO_IPV6, }; +static unsigned int ip6table_nat_do_chain(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip6t_do_table(skb, state, priv); +} + static const struct nf_hook_ops nf_nat_ipv6_ops[] = { { - .hook = ip6t_do_table, + .hook = ip6table_nat_do_chain, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_NAT_DST, }, { - .hook = ip6t_do_table, + .hook = ip6table_nat_do_chain, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_NAT_SRC, }, { - .hook = ip6t_do_table, + .hook = ip6table_nat_do_chain, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST, }, { - .hook = ip6t_do_table, + .hook = ip6table_nat_do_chain, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 08861d5d1f..4f2a04af71 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -31,6 +31,14 @@ static const struct xt_table packet_raw_before_defrag = { .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG, }; +/* The work comes in here from netfilter.c. */ +static unsigned int +ip6table_raw_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip6t_do_table(skb, state, priv); +} + static struct nf_hook_ops *rawtable_ops __read_mostly; static int ip6table_raw_table_init(struct net *net) @@ -80,7 +88,7 @@ static int __init ip6table_raw_init(void) return ret; /* Register hooks */ - rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table); + rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook); if (IS_ERR(rawtable_ops)) { xt_unregister_template(table); return PTR_ERR(rawtable_ops); diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 4df14a9bae..931674034d 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -32,6 +32,13 @@ static const struct xt_table security_table = { .priority = NF_IP6_PRI_SECURITY, }; +static unsigned int +ip6table_security_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip6t_do_table(skb, state, priv); +} + static struct nf_hook_ops *sectbl_ops __read_mostly; static int ip6table_security_table_init(struct net *net) @@ -70,7 +77,7 @@ static int __init ip6table_security_init(void) if (ret < 0) return ret; - sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table); + sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook); if (IS_ERR(sectbl_ops)) { xt_unregister_template(&security_table); return PTR_ERR(sectbl_ops); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 9256f6ba87..6ac88fe24a 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -177,7 +177,6 @@ struct proto pingv6_prot = { .hash = ping_hash, .unhash = ping_unhash, .get_port = ping_get_port, - .put_port = ping_unhash, .obj_size = sizeof(struct raw6_sock), }; EXPORT_SYMBOL_GPL(pingv6_prot); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ea1cf414a9..e0766bdf20 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -182,9 +182,8 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) if (rt_dev == dev) { rt->dst.dev = blackhole_netdev; - dev_replace_track(rt_dev, blackhole_netdev, - &rt->dst.dev_tracker, - GFP_ATOMIC); + dev_hold(rt->dst.dev); + dev_put(rt_dev); } } spin_unlock_bh(&ul->lock); @@ -329,7 +328,9 @@ static const struct rt6_info ip6_blk_hole_entry_template = { static void rt6_info_init(struct rt6_info *rt) { - memset_after(rt, 0, dst); + struct dst_entry *dst = &rt->dst; + + memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); INIT_LIST_HEAD(&rt->rt6i_uncached); } @@ -593,7 +594,6 @@ struct __rt6_probe_work { struct work_struct work; struct in6_addr target; struct net_device *dev; - netdevice_tracker dev_tracker; }; static void rt6_probe_deferred(struct work_struct *w) @@ -604,7 +604,7 @@ static void rt6_probe_deferred(struct work_struct *w) addrconf_addr_solict_mult(&work->target, &mcaddr); ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0); - dev_put_track(work->dev, &work->dev_tracker); + dev_put(work->dev); kfree(work); } @@ -658,7 +658,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) } else { INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; - dev_hold_track(dev, &work->dev_tracker, GFP_ATOMIC); + dev_hold(dev); work->dev = dev; schedule_work(&work->work); } @@ -1485,7 +1485,7 @@ static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) static u32 rt6_exception_hash(const struct in6_addr *dst, const struct in6_addr *src) { - static siphash_aligned_key_t rt6_exception_key; + static siphash_key_t rt6_exception_key __read_mostly; struct { struct in6_addr dst; struct in6_addr src; @@ -3628,8 +3628,6 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, } fib6_nh->fib_nh_dev = dev; - netdev_tracker_alloc(dev, &fib6_nh->fib_nh_dev_tracker, gfp_flags); - fib6_nh->fib_nh_oif = dev->ifindex; err = 0; out: @@ -3660,8 +3658,24 @@ void fib6_nh_release(struct fib6_nh *fib6_nh) rcu_read_unlock(); - fib6_nh_release_dsts(fib6_nh); - free_percpu(fib6_nh->rt6i_pcpu); + if (fib6_nh->rt6i_pcpu) { + int cpu; + + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + dst_dev_put(&pcpu_rt->dst); + dst_release(&pcpu_rt->dst); + *ppcpu_rt = NULL; + } + } + + free_percpu(fib6_nh->rt6i_pcpu); + } fib_nh_common_release(&fib6_nh->nh_common); } @@ -6340,11 +6354,11 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, static struct ctl_table ipv6_route_table_template[] = { { - .procname = "max_size", - .data = &init_net.ipv6.sysctl.ip6_rt_max_size, + .procname = "flush", + .data = &init_net.ipv6.sysctl.flush_delay, .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .mode = 0200, + .proc_handler = ipv6_sysctl_rtcache_flush }, { .procname = "gc_thresh", @@ -6354,11 +6368,11 @@ static struct ctl_table ipv6_route_table_template[] = { .proc_handler = proc_dointvec, }, { - .procname = "flush", - .data = &init_net.ipv6.sysctl.flush_delay, + .procname = "max_size", + .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), - .mode = 0200, - .proc_handler = ipv6_sysctl_rtcache_flush + .mode = 0644, + .proc_handler = proc_dointvec, }, { .procname = "gc_min_interval", @@ -6430,10 +6444,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) GFP_KERNEL); if (table) { - table[0].data = &net->ipv6.sysctl.ip6_rt_max_size; + table[0].data = &net->ipv6.sysctl.flush_delay; + table[0].extra1 = net; table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; - table[2].data = &net->ipv6.sysctl.flush_delay; - table[2].extra1 = net; + table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; @@ -6445,7 +6459,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) - table[1].procname = NULL; + table[0].procname = NULL; } return table; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 73aaabf0e9..fa6b64c95d 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -433,11 +433,7 @@ static int __net_init seg6_net_init(struct net *net) net->ipv6.seg6_data = sdata; #ifdef CONFIG_IPV6_SEG6_HMAC - if (seg6_hmac_net_init(net)) { - kfree(rcu_dereference_raw(sdata->tun_src)); - kfree(sdata); - return -ENOMEM; - } + seg6_hmac_net_init(net); #endif return 0; @@ -451,7 +447,7 @@ static void __net_exit seg6_net_exit(struct net *net) seg6_hmac_net_exit(net); #endif - kfree(rcu_dereference_raw(sdata->tun_src)); + kfree(sdata->tun_src); kfree(sdata); } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 29bc4e7c30..687d95dce0 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -405,7 +405,9 @@ int __net_init seg6_hmac_net_init(struct net *net) { struct seg6_pernet_data *sdata = seg6_pernet(net); - return rhashtable_init(&sdata->hmac_infos, &rht_params); + rhashtable_init(&sdata->hmac_infos, &rht_params); + + return 0; } EXPORT_SYMBOL(seg6_hmac_net_init); diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 9fbe243a0e..ef88489c71 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -7,7 +7,6 @@ * eBPF support: Mathieu Xhonneux */ -#include #include #include #include diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index c0b138c209..626cb53aa5 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -204,7 +204,7 @@ static int ipip6_tunnel_create(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); int err; - __dev_addr_set(dev, &t->parms.iph.saddr, 4); + memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); memcpy(dev->broadcast, &t->parms.iph.daddr, 4); if ((__force u16)t->parms.i_flags & SIT_ISATAP) @@ -521,7 +521,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) ipip6_tunnel_del_prl(tunnel, NULL); } dst_cache_reset(&tunnel->dst_cache); - dev_put_track(dev, &tunnel->dev_tracker); + dev_put(dev); } static int ipip6_err(struct sk_buff *skb, u32 info) @@ -956,7 +956,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr); } - if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { + if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); dev->stats.tx_carrier_errors++; goto tx_error_icmp; @@ -1149,7 +1149,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, synchronize_net(); t->parms.iph.saddr = p->iph.saddr; t->parms.iph.daddr = p->iph.daddr; - __dev_addr_set(t->dev, &p->iph.saddr, 4); + memcpy(t->dev->dev_addr, &p->iph.saddr, 4); memcpy(t->dev->broadcast, &p->iph.daddr, 4); ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; @@ -1463,7 +1463,7 @@ static int ipip6_tunnel_init(struct net_device *dev) dev->tstats = NULL; return err; } - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + dev_hold(dev); return 0; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index d1b61d0036..e8cfb9e997 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -20,7 +20,7 @@ #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -static siphash_aligned_key_t syncookie6_secret[2]; +static siphash_key_t syncookie6_secret[2] __read_mostly; /* RFC 2460, Section 8.3: * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..] diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 075ee8a2df..8eedf59e9c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -72,7 +72,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); -INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); +static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; const struct inet_connection_sock_af_ops ipv6_specific; @@ -414,12 +414,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == TCP_CLOSE) goto out; - if (static_branch_unlikely(&ip6_min_hopcount)) { - /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ - if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto out; - } + if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto out; } tp = tcp_sk(sk); @@ -572,7 +569,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, static void tcp_v6_reqsk_destructor(struct request_sock *req) { kfree(inet_rsk(req)->ipv6_opt); - consume_skb(inet_rsk(req)->pktopts); + kfree_skb(inet_rsk(req)->pktopts); } #ifdef CONFIG_TCP_MD5SIG @@ -969,6 +966,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowlabel = label; buff->ip_summed = CHECKSUM_PARTIAL; + buff->csum = 0; __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); @@ -1263,6 +1261,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); + newinet = inet_sk(newsk); newnp = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); @@ -1466,8 +1465,7 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, * This is because we cannot sleep with the original spinlock * held. */ -INDIRECT_CALLABLE_SCOPE -int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) +static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct sk_buff *opt_skb = NULL; @@ -1596,7 +1594,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } } - consume_skb(opt_skb); + kfree_skb(opt_skb); return 0; } @@ -1626,6 +1624,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { + struct sk_buff *skb_to_free; int sdif = inet6_sdif(skb); int dif = inet6_iif(skb); const struct tcphdr *th; @@ -1731,13 +1730,9 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) return 0; } } - - if (static_branch_unlikely(&ip6_min_hopcount)) { - /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ - if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto discard_and_relse; - } + if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto discard_and_relse; } if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) @@ -1761,17 +1756,21 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) sk_incoming_cpu_update(sk); - sk_defer_free_flush(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { + skb_to_free = sk->sk_rx_skb_cache; + sk->sk_rx_skb_cache = NULL; ret = tcp_v6_do_rcv(sk, skb); } else { if (tcp_add_backlog(sk, skb)) goto discard_and_relse; + skb_to_free = NULL; } bh_unlock_sock(sk); + if (skb_to_free) + __kfree_skb(skb_to_free); put_and_return: if (refcounted) sock_put(sk); @@ -1898,7 +1897,9 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) { - __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); + struct ipv6_pinfo *np = inet6_sk(sk); + + __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr); } const struct inet_connection_sock_af_ops ipv6_specific = { @@ -2181,7 +2182,6 @@ struct proto tcpv6_prot = { .hash = inet6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, - .put_port = inet_put_port, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = tcp_bpf_update_proto, #endif diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 39db5a2268..1796856bc2 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -7,7 +7,6 @@ */ #include #include -#include #include #include #include diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 528b81ef19..932c6f2a54 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -17,7 +17,6 @@ * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file. */ -#include #include #include #include @@ -197,7 +196,7 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, - u16 hnum, const int dif) + u16 hnum) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -205,8 +204,8 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net, if (udptable != &udp_table) return NULL; /* only UDP is supported */ - no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport, - daddr, hnum, dif, &sk); + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, + saddr, sport, daddr, hnum, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; @@ -242,7 +241,7 @@ struct sock *__udp6_lib_lookup(struct net *net, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { sk = udp6_lookup_run_bpf(net, udptable, skb, - saddr, sport, daddr, hnum, dif); + saddr, sport, daddr, hnum); if (sk) { result = sk; goto done; @@ -702,9 +701,9 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - __UDP6_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); + __UDP_INC_STATS(sock_net(sk), + UDP_MIB_INDATAGRAMS, + is_udplite); return -ret; } } @@ -1733,7 +1732,6 @@ struct proto udpv6_prot = { .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, - .put_port = udp_lib_unhash, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = udp_bpf_update_proto, #endif diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 7720d04ed3..b3d9ed96e5 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -13,7 +13,6 @@ #include #include #include "ip6_offload.h" -#include static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, netdev_features_t features) @@ -145,11 +144,13 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) skip: NAPI_GRO_CB(skb)->is_ipv6 = 1; + rcu_read_lock(); if (static_branch_unlikely(&udpv6_encap_needed_key)) sk = udp6_gro_lookup_skb(skb, uh->source, uh->dest); pp = udp_gro_receive(head, skb, uh, sk); + rcu_read_unlock(); return pp; flush: diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index fad687ee6d..af7a4b8b1e 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -74,11 +74,11 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, struct rt6_info *rt = (struct rt6_info *)xdst->route; xdst->u.dst.dev = dev; - dev_hold_track(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); + dev_hold(dev); xdst->u.rt6.rt6i_idev = in6_dev_get(dev); if (!xdst->u.rt6.rt6i_idev) { - dev_put_track(dev, &xdst->u.dst.dev_tracker); + dev_put(dev); return -ENODEV; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a1760add5b..18316ee3c6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -13,7 +13,6 @@ #define KMSG_COMPONENT "af_iucv" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include #include #include #include @@ -143,7 +142,7 @@ static inline size_t iucv_msg_length(struct iucv_message *msg) * iucv_sock_in_state() - check for specific states * @sk: sock structure * @state: first iucv sk state - * @state2: second iucv sk state + * @state: second iucv sk state * * Returns true if the socket in either in the first or second state. */ @@ -173,7 +172,7 @@ static inline int iucv_below_msglim(struct sock *sk) (atomic_read(&iucv->pendings) <= 0)); } -/* +/** * iucv_sock_wake_msglim() - Wake up thread waiting on msg limit */ static void iucv_sock_wake_msglim(struct sock *sk) @@ -188,7 +187,7 @@ static void iucv_sock_wake_msglim(struct sock *sk) rcu_read_unlock(); } -/* +/** * afiucv_hs_send() - send a message through HiperSockets transport */ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, @@ -474,7 +473,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, atomic_set(&iucv->msg_recv, 0); iucv->path = NULL; iucv->sk_txnotify = afiucv_hs_callback_txnotify; - memset(&iucv->init, 0, sizeof(iucv->init)); + memset(&iucv->src_user_id , 0, 32); if (pr_iucv) iucv->transport = AF_IUCV_TRANS_IUCV; else @@ -1832,9 +1831,9 @@ static void afiucv_swap_src_dest(struct sk_buff *skb) memset(skb->data, 0, ETH_HLEN); } -/* +/** * afiucv_hs_callback_syn - react on received SYN - */ + **/ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) { struct af_iucv_trans_hdr *trans_hdr = iucv_trans_hdr(skb); @@ -1897,9 +1896,9 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) return NET_RX_SUCCESS; } -/* +/** * afiucv_hs_callback_synack() - react on received SYN-ACK - */ + **/ static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb) { struct iucv_sock *iucv = iucv_sk(sk); @@ -1918,9 +1917,9 @@ static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb) return NET_RX_SUCCESS; } -/* +/** * afiucv_hs_callback_synfin() - react on received SYN_FIN - */ + **/ static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb) { struct iucv_sock *iucv = iucv_sk(sk); @@ -1938,9 +1937,9 @@ static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb) return NET_RX_SUCCESS; } -/* +/** * afiucv_hs_callback_fin() - react on received FIN - */ + **/ static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb) { struct iucv_sock *iucv = iucv_sk(sk); @@ -1961,9 +1960,9 @@ static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb) return NET_RX_SUCCESS; } -/* +/** * afiucv_hs_callback_win() - react on received WIN - */ + **/ static int afiucv_hs_callback_win(struct sock *sk, struct sk_buff *skb) { struct iucv_sock *iucv = iucv_sk(sk); @@ -1979,9 +1978,9 @@ static int afiucv_hs_callback_win(struct sock *sk, struct sk_buff *skb) return NET_RX_SUCCESS; } -/* +/** * afiucv_hs_callback_rx() - react on received data - */ + **/ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) { struct iucv_sock *iucv = iucv_sk(sk); @@ -2023,11 +2022,11 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) return NET_RX_SUCCESS; } -/* +/** * afiucv_hs_rcv() - base function for arriving data through HiperSockets * transport * called from netif RX softirq - */ + **/ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { @@ -2129,10 +2128,10 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, return err; } -/* +/** * afiucv_hs_callback_txnotify() - handle send notifications from HiperSockets * transport - */ + **/ static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify n) { struct iucv_sock *iucv = iucv_sk(sk); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 8f4d49a7d3..f3343a8541 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -276,8 +276,8 @@ static union iucv_param *iucv_param[NR_CPUS]; static union iucv_param *iucv_param_irq[NR_CPUS]; /** - * __iucv_call_b2f0 - * @command: identifier of IUCV call to CP. + * iucv_call_b2f0 + * @code: identifier of IUCV call to CP. * @parm: pointer to a struct iucv_parm block * * Calls CP to execute IUCV commands. @@ -309,7 +309,7 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm) return ccode == 1 ? parm->ctrl.iprcode : ccode; } -/* +/** * iucv_query_maxconn * * Determines the maximum number of connections that may be established. @@ -493,8 +493,8 @@ static void iucv_retrieve_cpu(void *data) cpumask_clear_cpu(cpu, &iucv_buffer_cpumask); } -/* - * iucv_setmask_mp +/** + * iucv_setmask_smp * * Allow iucv interrupts on all cpus. */ @@ -512,7 +512,7 @@ static void iucv_setmask_mp(void) cpus_read_unlock(); } -/* +/** * iucv_setmask_up * * Allow iucv interrupts on a single cpu. @@ -529,7 +529,7 @@ static void iucv_setmask_up(void) smp_call_function_single(cpu, iucv_block_cpu, NULL, 1); } -/* +/** * iucv_enable * * This function makes iucv ready for use. It allocates the pathid @@ -564,7 +564,7 @@ static int iucv_enable(void) return rc; } -/* +/** * iucv_disable * * This function shuts down iucv. It disables iucv interrupts, retrieves @@ -1347,9 +1347,8 @@ EXPORT_SYMBOL(iucv_message_send); * @srccls: source class of message * @buffer: address of send buffer or address of struct iucv_array * @size: length of send buffer - * @answer: address of answer buffer or address of struct iucv_array + * @ansbuf: address of answer buffer or address of struct iucv_array * @asize: size of reply buffer - * @residual: ignored * * This function transmits data to another application. Data to be * transmitted is in a buffer. The receiver of the send is expected to @@ -1401,6 +1400,13 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg, } EXPORT_SYMBOL(iucv_message_send2way); +/** + * iucv_path_pending + * @data: Pointer to external interrupt buffer + * + * Process connection pending work item. Called from tasklet while holding + * iucv_table_lock. + */ struct iucv_path_pending { u16 ippathid; u8 ipflags1; @@ -1414,13 +1420,6 @@ struct iucv_path_pending { u8 res4[3]; } __packed; -/** - * iucv_path_pending - * @data: Pointer to external interrupt buffer - * - * Process connection pending work item. Called from tasklet while holding - * iucv_table_lock. - */ static void iucv_path_pending(struct iucv_irq_data *data) { struct iucv_path_pending *ipp = (void *) data; @@ -1462,6 +1461,13 @@ static void iucv_path_pending(struct iucv_irq_data *data) iucv_sever_pathid(ipp->ippathid, error); } +/** + * iucv_path_complete + * @data: Pointer to external interrupt buffer + * + * Process connection complete work item. Called from tasklet while holding + * iucv_table_lock. + */ struct iucv_path_complete { u16 ippathid; u8 ipflags1; @@ -1475,13 +1481,6 @@ struct iucv_path_complete { u8 res4[3]; } __packed; -/** - * iucv_path_complete - * @data: Pointer to external interrupt buffer - * - * Process connection complete work item. Called from tasklet while holding - * iucv_table_lock. - */ static void iucv_path_complete(struct iucv_irq_data *data) { struct iucv_path_complete *ipc = (void *) data; @@ -1493,6 +1492,13 @@ static void iucv_path_complete(struct iucv_irq_data *data) path->handler->path_complete(path, ipc->ipuser); } +/** + * iucv_path_severed + * @data: Pointer to external interrupt buffer + * + * Process connection severed work item. Called from tasklet while holding + * iucv_table_lock. + */ struct iucv_path_severed { u16 ippathid; u8 res1; @@ -1505,13 +1511,6 @@ struct iucv_path_severed { u8 res5[3]; } __packed; -/** - * iucv_path_severed - * @data: Pointer to external interrupt buffer - * - * Process connection severed work item. Called from tasklet while holding - * iucv_table_lock. - */ static void iucv_path_severed(struct iucv_irq_data *data) { struct iucv_path_severed *ips = (void *) data; @@ -1529,6 +1528,13 @@ static void iucv_path_severed(struct iucv_irq_data *data) } } +/** + * iucv_path_quiesced + * @data: Pointer to external interrupt buffer + * + * Process connection quiesced work item. Called from tasklet while holding + * iucv_table_lock. + */ struct iucv_path_quiesced { u16 ippathid; u8 res1; @@ -1541,13 +1547,6 @@ struct iucv_path_quiesced { u8 res5[3]; } __packed; -/** - * iucv_path_quiesced - * @data: Pointer to external interrupt buffer - * - * Process connection quiesced work item. Called from tasklet while holding - * iucv_table_lock. - */ static void iucv_path_quiesced(struct iucv_irq_data *data) { struct iucv_path_quiesced *ipq = (void *) data; @@ -1557,6 +1556,13 @@ static void iucv_path_quiesced(struct iucv_irq_data *data) path->handler->path_quiesced(path, ipq->ipuser); } +/** + * iucv_path_resumed + * @data: Pointer to external interrupt buffer + * + * Process connection resumed work item. Called from tasklet while holding + * iucv_table_lock. + */ struct iucv_path_resumed { u16 ippathid; u8 res1; @@ -1569,13 +1575,6 @@ struct iucv_path_resumed { u8 res5[3]; } __packed; -/** - * iucv_path_resumed - * @data: Pointer to external interrupt buffer - * - * Process connection resumed work item. Called from tasklet while holding - * iucv_table_lock. - */ static void iucv_path_resumed(struct iucv_irq_data *data) { struct iucv_path_resumed *ipr = (void *) data; @@ -1585,6 +1584,13 @@ static void iucv_path_resumed(struct iucv_irq_data *data) path->handler->path_resumed(path, ipr->ipuser); } +/** + * iucv_message_complete + * @data: Pointer to external interrupt buffer + * + * Process message complete work item. Called from tasklet while holding + * iucv_table_lock. + */ struct iucv_message_complete { u16 ippathid; u8 ipflags1; @@ -1600,13 +1606,6 @@ struct iucv_message_complete { u8 res2[3]; } __packed; -/** - * iucv_message_complete - * @data: Pointer to external interrupt buffer - * - * Process message complete work item. Called from tasklet while holding - * iucv_table_lock. - */ static void iucv_message_complete(struct iucv_irq_data *data) { struct iucv_message_complete *imc = (void *) data; @@ -1625,6 +1624,13 @@ static void iucv_message_complete(struct iucv_irq_data *data) } } +/** + * iucv_message_pending + * @data: Pointer to external interrupt buffer + * + * Process message pending work item. Called from tasklet while holding + * iucv_table_lock. + */ struct iucv_message_pending { u16 ippathid; u8 ipflags1; @@ -1647,13 +1653,6 @@ struct iucv_message_pending { u8 res2[3]; } __packed; -/** - * iucv_message_pending - * @data: Pointer to external interrupt buffer - * - * Process message pending work item. Called from tasklet while holding - * iucv_table_lock. - */ static void iucv_message_pending(struct iucv_irq_data *data) { struct iucv_message_pending *imp = (void *) data; @@ -1674,7 +1673,7 @@ static void iucv_message_pending(struct iucv_irq_data *data) } } -/* +/** * iucv_tasklet_fn: * * This tasklet loops over the queue of irq buffers created by @@ -1718,7 +1717,7 @@ static void iucv_tasklet_fn(unsigned long ignored) spin_unlock(&iucv_table_lock); } -/* +/** * iucv_work_fn: * * This work function loops over the queue of path pending irq blocks @@ -1749,8 +1748,9 @@ static void iucv_work_fn(struct work_struct *work) spin_unlock_bh(&iucv_table_lock); } -/* +/** * iucv_external_interrupt + * @code: irq code * * Handles external interrupts coming in from CP. * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn(). diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 71899e5a5a..11a715d76a 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 7499c51b18..93271a2632 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -250,15 +250,15 @@ struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, session_list = l2tp_session_id_hash(tunnel, session_id); - rcu_read_lock_bh(); - hlist_for_each_entry_rcu(session, session_list, hlist) + read_lock_bh(&tunnel->hlist_lock); + hlist_for_each_entry(session, session_list, hlist) if (session->session_id == session_id) { l2tp_session_inc_refcount(session); - rcu_read_unlock_bh(); + read_unlock_bh(&tunnel->hlist_lock); return session; } - rcu_read_unlock_bh(); + read_unlock_bh(&tunnel->hlist_lock); return NULL; } @@ -291,18 +291,18 @@ struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth) struct l2tp_session *session; int count = 0; - rcu_read_lock_bh(); + read_lock_bh(&tunnel->hlist_lock); for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { - hlist_for_each_entry_rcu(session, &tunnel->session_hlist[hash], hlist) { + hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { l2tp_session_inc_refcount(session); - rcu_read_unlock_bh(); + read_unlock_bh(&tunnel->hlist_lock); return session; } } } - rcu_read_unlock_bh(); + read_unlock_bh(&tunnel->hlist_lock); return NULL; } @@ -347,7 +347,7 @@ int l2tp_session_register(struct l2tp_session *session, head = l2tp_session_id_hash(tunnel, session->session_id); - spin_lock_bh(&tunnel->hlist_lock); + write_lock_bh(&tunnel->hlist_lock); if (!tunnel->acpt_newsess) { err = -ENODEV; goto err_tlock; @@ -384,8 +384,8 @@ int l2tp_session_register(struct l2tp_session *session, l2tp_tunnel_inc_refcount(tunnel); } - hlist_add_head_rcu(&session->hlist, head); - spin_unlock_bh(&tunnel->hlist_lock); + hlist_add_head(&session->hlist, head); + write_unlock_bh(&tunnel->hlist_lock); trace_register_session(session); @@ -394,7 +394,7 @@ int l2tp_session_register(struct l2tp_session *session, err_tlock_pnlock: spin_unlock_bh(&pn->l2tp_session_hlist_lock); err_tlock: - spin_unlock_bh(&tunnel->hlist_lock); + write_unlock_bh(&tunnel->hlist_lock); return err; } @@ -1170,9 +1170,9 @@ static void l2tp_session_unhash(struct l2tp_session *session) /* Remove the session from core hashes */ if (tunnel) { /* Remove from the per-tunnel hash */ - spin_lock_bh(&tunnel->hlist_lock); - hlist_del_init_rcu(&session->hlist); - spin_unlock_bh(&tunnel->hlist_lock); + write_lock_bh(&tunnel->hlist_lock); + hlist_del_init(&session->hlist); + write_unlock_bh(&tunnel->hlist_lock); /* For L2TPv3 we have a per-net hash: remove from there, too */ if (tunnel->version != L2TP_HDR_VER_2) { @@ -1181,9 +1181,8 @@ static void l2tp_session_unhash(struct l2tp_session *session) spin_lock_bh(&pn->l2tp_session_hlist_lock); hlist_del_init_rcu(&session->global_hlist); spin_unlock_bh(&pn->l2tp_session_hlist_lock); + synchronize_rcu(); } - - synchronize_rcu(); } } @@ -1191,19 +1190,22 @@ static void l2tp_session_unhash(struct l2tp_session *session) */ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { - struct l2tp_session *session; int hash; + struct hlist_node *walk; + struct hlist_node *tmp; + struct l2tp_session *session; - spin_lock_bh(&tunnel->hlist_lock); + write_lock_bh(&tunnel->hlist_lock); tunnel->acpt_newsess = false; for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { again: - hlist_for_each_entry_rcu(session, &tunnel->session_hlist[hash], hlist) { - hlist_del_init_rcu(&session->hlist); + hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) { + session = hlist_entry(walk, struct l2tp_session, hlist); + hlist_del_init(&session->hlist); - spin_unlock_bh(&tunnel->hlist_lock); + write_unlock_bh(&tunnel->hlist_lock); l2tp_session_delete(session); - spin_lock_bh(&tunnel->hlist_lock); + write_lock_bh(&tunnel->hlist_lock); /* Now restart from the beginning of this hash * chain. We always remove a session from the @@ -1213,7 +1215,7 @@ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) goto again; } } - spin_unlock_bh(&tunnel->hlist_lock); + write_unlock_bh(&tunnel->hlist_lock); } /* Tunnel socket destroy hook for UDP encapsulation */ @@ -1406,7 +1408,7 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, tunnel->magic = L2TP_TUNNEL_MAGIC; sprintf(&tunnel->name[0], "tunl %u", tunnel_id); - spin_lock_init(&tunnel->hlist_lock); + rwlock_init(&tunnel->hlist_lock); tunnel->acpt_newsess = true; tunnel->encap = encap; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a88e070b43..98ea98eb95 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -160,7 +160,7 @@ struct l2tp_tunnel { unsigned long dead; struct rcu_head rcu; - spinlock_t hlist_lock; /* write-protection for session_hlist */ + rwlock_t hlist_lock; /* protect session_hlist */ bool acpt_newsess; /* indicates whether this tunnel accepts * new sessions. Protected by hlist_lock. */ diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 9d1aafe75f..bca75bef82 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -32,8 +32,7 @@ static struct dentry *rootdir; struct l2tp_dfs_seq_data { - struct net *net; - netns_tracker ns_tracker; + struct net *net; int tunnel_idx; /* current tunnel */ int session_idx; /* index of session within current tunnel */ struct l2tp_tunnel *tunnel; @@ -121,21 +120,24 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v) static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) { struct l2tp_tunnel *tunnel = v; - struct l2tp_session *session; int session_count = 0; int hash; + struct hlist_node *walk; + struct hlist_node *tmp; - rcu_read_lock_bh(); + read_lock_bh(&tunnel->hlist_lock); for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { - hlist_for_each_entry_rcu(session, &tunnel->session_hlist[hash], hlist) { - /* Session ID of zero is a dummy/reserved value used by pppol2tp */ + hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) { + struct l2tp_session *session; + + session = hlist_entry(walk, struct l2tp_session, hlist); if (session->session_id == 0) continue; session_count++; } } - rcu_read_unlock_bh(); + read_unlock_bh(&tunnel->hlist_lock); seq_printf(m, "\nTUNNEL %u peer %u", tunnel->tunnel_id, tunnel->peer_tunnel_id); if (tunnel->sock) { @@ -282,7 +284,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file) rc = PTR_ERR(pd->net); goto err_free_pd; } - netns_tracker_alloc(pd->net, &pd->ns_tracker, GFP_KERNEL); + rc = seq_open(file, &l2tp_dfs_seq_ops); if (rc) goto err_free_net; @@ -294,7 +296,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file) return rc; err_free_net: - put_net_track(pd->net, &pd->ns_tracker); + put_net(pd->net); err_free_pd: kfree(pd); goto out; @@ -308,7 +310,7 @@ static int l2tp_dfs_seq_release(struct inode *inode, struct file *file) seq = file->private_data; pd = seq->private; if (pd->net) - put_net_track(pd->net, &pd->ns_tracker); + put_net(pd->net); kfree(pd); seq_release(inode, file); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 26c00ebf4f..3086f4a6ae 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -224,7 +224,7 @@ static int llc_ui_release(struct socket *sock) } else { release_sock(sk); } - dev_put_track(llc->dev, &llc->dev_tracker); + dev_put(llc->dev); sock_put(sk); llc_sk_free(sk); out: @@ -295,7 +295,6 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); if (!llc->dev) goto out; - netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL); rc = -EUSERS; llc->laddr.lsap = llc_ui_autoport(); if (!llc->laddr.lsap) @@ -363,7 +362,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) } else llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); - dev_hold_track(llc->dev, &llc->dev_tracker, GFP_ATOMIC); + dev_hold(llc->dev); rcu_read_unlock(); if (!llc->dev) goto out; diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 40ca3c1e42..647c0554d0 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -781,7 +781,7 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) if (nskb) { struct llc_sap *sap = llc->sap; - const u8 *dmac = llc->daddr.mac; + u8 *dmac = llc->daddr.mac; if (llc->dev->flags & IFF_LOOPBACK) dmac = llc->dev->dev_addr; diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index dde9bf08a5..ad6547736c 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -80,7 +80,7 @@ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) * establishment will inform to upper layer via calling it's confirm * function and passing proper information. */ -int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac, u8 dsap) +int llc_establish_connection(struct sock *sk, u8 *lmac, u8 *dmac, u8 dsap) { int rc = -EISCONN; struct llc_addr laddr, daddr; diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c index 5a6466fc62..b9ad087bcb 100644 --- a/net/llc/llc_output.c +++ b/net/llc/llc_output.c @@ -56,7 +56,7 @@ int llc_mac_hdr_init(struct sk_buff *skb, * package primitive as an event and send to SAP event handler */ int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb, - const unsigned char *dmac, unsigned char dsap) + unsigned char *dmac, unsigned char dsap) { int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, sap->laddr.lsap, diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 07e9abb597..a4eccb9822 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -26,7 +26,7 @@ #include #include -static void llc_ui_format_mac(struct seq_file *seq, const u8 *addr) +static void llc_ui_format_mac(struct seq_file *seq, u8 *addr) { seq_printf(seq, "%pM", addr); } @@ -195,7 +195,7 @@ static int llc_seq_core_show(struct seq_file *seq, void *v) timer_pending(&llc->pf_cycle_timer.timer), timer_pending(&llc->rej_sent_timer.timer), timer_pending(&llc->busy_state_timer.timer), - !!sk->sk_backlog.tail, sock_owned_by_user_nocheck(sk)); + !!sk->sk_backlog.tail, !!sk->sk_lock.owned); out: return 0; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 7d2925bb96..ef729b1e39 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -478,7 +478,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, size_t len) { u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num; - struct ieee802_11_elems *elems = NULL; + struct ieee802_11_elems elems = { }; u8 dialog_token; int ies_len; @@ -496,18 +496,16 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, ies_len = len - offsetof(struct ieee80211_mgmt, u.action.u.addba_req.variable); if (ies_len) { - elems = ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable, - ies_len, true, mgmt->bssid, NULL); - if (!elems || elems->parse_error) - goto free; + ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable, + ies_len, true, &elems, mgmt->bssid, NULL); + if (elems.parse_error) + return; } __ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, buf_size, true, false, - elems ? elems->addba_ext_ie : NULL); -free: - kfree(elems); + elems.addba_ext_ie); } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 87a208089c..1bf83b8d84 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #include @@ -112,36 +112,6 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, return 0; } -static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, - struct cfg80211_mbssid_config params) -{ - struct ieee80211_sub_if_data *tx_sdata; - - sdata->vif.mbssid_tx_vif = NULL; - sdata->vif.bss_conf.bssid_index = 0; - sdata->vif.bss_conf.nontransmitted = false; - sdata->vif.bss_conf.ema_ap = false; - - if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev) - return -EINVAL; - - tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params.tx_wdev); - if (!tx_sdata) - return -EINVAL; - - if (tx_sdata == sdata) { - sdata->vif.mbssid_tx_vif = &sdata->vif; - } else { - sdata->vif.mbssid_tx_vif = &tx_sdata->vif; - sdata->vif.bss_conf.nontransmitted = true; - sdata->vif.bss_conf.bssid_index = params.index; - } - if (params.ema) - sdata->vif.bss_conf.ema_ap = true; - - return 0; -} - static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, const char *name, unsigned char name_assign_type, @@ -1137,14 +1107,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, changed |= BSS_CHANGED_HE_BSS_COLOR; } - if (sdata->vif.type == NL80211_IFTYPE_AP && - params->mbssid_config.tx_wdev) { - err = ieee80211_set_ap_mbssid_options(sdata, - params->mbssid_config); - if (err) - return err; - } - mutex_lock(&local->mtx); err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); @@ -3201,18 +3163,6 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_csa_finish); -void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_tx) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_local *local = sdata->local; - - sdata->csa_block_tx = block_tx; - sdata_info(sdata, "channel switch failed, disconnecting\n"); - ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); -} -EXPORT_SYMBOL(ieee80211_channel_switch_disconnect); - static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, u32 *changed) { @@ -4283,21 +4233,6 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, changed |= BSS_CHANGED_HE_BSS_COLOR; ieee80211_bss_info_change_notify(sdata, changed); - - if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) { - struct ieee80211_sub_if_data *child; - - mutex_lock(&sdata->local->iflist_mtx); - list_for_each_entry(child, &sdata->local->interfaces, list) { - if (child != sdata && child->vif.mbssid_tx_vif == &sdata->vif) { - child->vif.bss_conf.he_bss_color.color = color; - child->vif.bss_conf.he_bss_color.enabled = enable; - ieee80211_bss_info_change_notify(child, - BSS_CHANGED_HE_BSS_COLOR); - } - } - mutex_unlock(&sdata->local->iflist_mtx); - } } static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) @@ -4382,9 +4317,6 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, sdata_assert_lock(sdata); - if (sdata->vif.bss_conf.nontransmitted) - return -EINVAL; - mutex_lock(&local->mtx); /* don't allow another color change if one is already active or if csa @@ -4416,18 +4348,6 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, return err; } -static int -ieee80211_set_radar_background(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - - if (!local->ops->set_radar_background) - return -EOPNOTSUPP; - - return local->ops->set_radar_background(&local->hw, chandef); -} - const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -4532,5 +4452,4 @@ const struct cfg80211_ops mac80211_config_ops = { .reset_tid_config = ieee80211_reset_tid_config, .set_sar_specs = ieee80211_set_sar_specs, .color_change = ieee80211_color_change, - .set_radar_background = ieee80211_set_radar_background, }; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 9479f2787e..8be28cfd6f 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -5,7 +5,7 @@ * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2020 Intel Corporation */ #include @@ -153,20 +153,20 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, rcu_read_lock(); p += scnprintf(p, - bufsz + buf - p, + bufsz+buf-p, "target %uus interval %uus ecn %s\n", codel_time_to_us(sta->cparams.target), codel_time_to_us(sta->cparams.interval), sta->cparams.ecn ? "yes" : "no"); p += scnprintf(p, - bufsz + buf - p, + bufsz+buf-p, "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n"); for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { if (!sta->sta.txq[i]) continue; txqi = to_txq_info(sta->sta.txq[i]); - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, bufsz+buf-p, "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n", txqi->txq.tid, txqi->txq.ac, @@ -314,24 +314,17 @@ STA_OPS_RW(aql); static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char *buf, *p; - ssize_t bufsz = 71 + IEEE80211_NUM_TIDS * 40; + char buf[71 + IEEE80211_NUM_TIDS * 40], *p = buf; int i; struct sta_info *sta = file->private_data; struct tid_ampdu_rx *tid_rx; struct tid_ampdu_tx *tid_tx; - ssize_t ret; - - buf = kzalloc(bufsz, GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; rcu_read_lock(); - p += scnprintf(p, bufsz + buf - p, "next dialog_token: %#02x\n", + p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n", sta->ampdu_mlme.dialog_token_allocator + 1); - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n"); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { @@ -341,27 +334,25 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]); tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid); - p += scnprintf(p, bufsz + buf - p, "%02d", i); - p += scnprintf(p, bufsz + buf - p, "\t\t%x", + p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i); + p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", tid_rx_valid); - p += scnprintf(p, bufsz + buf - p, "\t%#.2x", + p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", tid_rx_valid ? sta->ampdu_mlme.tid_rx_token[i] : 0); - p += scnprintf(p, bufsz + buf - p, "\t%#.3x", + p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", tid_rx ? tid_rx->ssn : 0); - p += scnprintf(p, bufsz + buf - p, "\t\t%x", !!tid_tx); - p += scnprintf(p, bufsz + buf - p, "\t%#.2x", + p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_tx); + p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", tid_tx ? tid_tx->dialog_token : 0); - p += scnprintf(p, bufsz + buf - p, "\t%03d", + p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d", tid_tx ? skb_queue_len(&tid_tx->pending) : 0); - p += scnprintf(p, bufsz + buf - p, "\n"); + p += scnprintf(p, sizeof(buf) + buf - p, "\n"); } rcu_read_unlock(); - ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); - kfree(buf); - return ret; + return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf, @@ -443,22 +434,15 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, if (_cond) \ p += scnprintf(p, sizeof(buf)+buf-p, "\t" _str "\n"); \ } while (0) - char *buf, *p; + char buf[512], *p = buf; int i; - ssize_t bufsz = 512; struct sta_info *sta = file->private_data; struct ieee80211_sta_ht_cap *htc = &sta->sta.ht_cap; - ssize_t ret; - buf = kzalloc(bufsz, GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; - - p += scnprintf(p, bufsz + buf - p, "ht %ssupported\n", + p += scnprintf(p, sizeof(buf) + buf - p, "ht %ssupported\n", htc->ht_supported ? "" : "not "); if (htc->ht_supported) { - p += scnprintf(p, bufsz + buf - p, "cap: %#.4x\n", htc->cap); + p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.4x\n", htc->cap); PRINT_HT_CAP((htc->cap & BIT(0)), "RX LDPC"); PRINT_HT_CAP((htc->cap & BIT(1)), "HT20/HT40"); @@ -500,90 +484,81 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, PRINT_HT_CAP((htc->cap & BIT(15)), "L-SIG TXOP protection"); - p += scnprintf(p, bufsz + buf - p, "ampdu factor/density: %d/%d\n", + p += scnprintf(p, sizeof(buf)+buf-p, "ampdu factor/density: %d/%d\n", htc->ampdu_factor, htc->ampdu_density); - p += scnprintf(p, bufsz + buf - p, "MCS mask:"); + p += scnprintf(p, sizeof(buf)+buf-p, "MCS mask:"); for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) - p += scnprintf(p, bufsz + buf - p, " %.2x", + p += scnprintf(p, sizeof(buf)+buf-p, " %.2x", htc->mcs.rx_mask[i]); - p += scnprintf(p, bufsz + buf - p, "\n"); + p += scnprintf(p, sizeof(buf)+buf-p, "\n"); /* If not set this is meaningless */ if (le16_to_cpu(htc->mcs.rx_highest)) { - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf)+buf-p, "MCS rx highest: %d Mbps\n", le16_to_cpu(htc->mcs.rx_highest)); } - p += scnprintf(p, bufsz + buf - p, "MCS tx params: %x\n", + p += scnprintf(p, sizeof(buf)+buf-p, "MCS tx params: %x\n", htc->mcs.tx_params); } - ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); - kfree(buf); - return ret; + return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } STA_OPS(ht_capa); static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char *buf, *p; + char buf[512], *p = buf; struct sta_info *sta = file->private_data; struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap; - ssize_t ret; - ssize_t bufsz = 512; - buf = kzalloc(bufsz, GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; - - p += scnprintf(p, bufsz + buf - p, "VHT %ssupported\n", + p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n", vhtc->vht_supported ? "" : "not "); if (vhtc->vht_supported) { - p += scnprintf(p, bufsz + buf - p, "cap: %#.8x\n", + p += scnprintf(p, sizeof(buf) + buf - p, "cap: %#.8x\n", vhtc->cap); #define PFLAG(a, b) \ do { \ if (vhtc->cap & IEEE80211_VHT_CAP_ ## a) \ - p += scnprintf(p, bufsz + buf - p, \ + p += scnprintf(p, sizeof(buf) + buf - p, \ "\t\t%s\n", b); \ } while (0) switch (vhtc->cap & 0x3) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\tMAX-MPDU-3895\n"); break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\tMAX-MPDU-7991\n"); break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\tMAX-MPDU-11454\n"); break; default: - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\tMAX-MPDU-UNKNOWN\n"); } switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case 0: - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\t80Mhz\n"); break; case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\t160Mhz\n"); break; case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\t80+80Mhz\n"); break; default: - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\tUNKNOWN-MHZ: 0x%x\n", (vhtc->cap >> 2) & 0x3); } @@ -591,15 +566,15 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, PFLAG(SHORT_GI_80, "SHORT-GI-80"); PFLAG(SHORT_GI_160, "SHORT-GI-160"); PFLAG(TXSTBC, "TXSTBC"); - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\tRXSTBC_%d\n", (vhtc->cap >> 8) & 0x7); PFLAG(SU_BEAMFORMER_CAPABLE, "SU-BEAMFORMER-CAPABLE"); PFLAG(SU_BEAMFORMEE_CAPABLE, "SU-BEAMFORMEE-CAPABLE"); - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\tBEAMFORMEE-STS: 0x%x\n", (vhtc->cap & IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK) >> IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT); - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\tSOUNDING-DIMENSIONS: 0x%x\n", (vhtc->cap & IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK) >> IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT); @@ -607,36 +582,34 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, PFLAG(MU_BEAMFORMEE_CAPABLE, "MU-BEAMFORMEE-CAPABLE"); PFLAG(VHT_TXOP_PS, "TXOP-PS"); PFLAG(HTC_VHT, "HTC-VHT"); - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\tMPDU-LENGTH-EXPONENT: 0x%x\n", (vhtc->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK) >> IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT); PFLAG(VHT_LINK_ADAPTATION_VHT_UNSOL_MFB, "LINK-ADAPTATION-VHT-UNSOL-MFB"); - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf) + buf - p, "\t\tLINK-ADAPTATION-VHT-MRQ-MFB: 0x%x\n", (vhtc->cap & IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB) >> 26); PFLAG(RX_ANTENNA_PATTERN, "RX-ANTENNA-PATTERN"); PFLAG(TX_ANTENNA_PATTERN, "TX-ANTENNA-PATTERN"); - p += scnprintf(p, bufsz + buf - p, "RX MCS: %.4x\n", + p += scnprintf(p, sizeof(buf)+buf-p, "RX MCS: %.4x\n", le16_to_cpu(vhtc->vht_mcs.rx_mcs_map)); if (vhtc->vht_mcs.rx_highest) - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf)+buf-p, "MCS RX highest: %d Mbps\n", le16_to_cpu(vhtc->vht_mcs.rx_highest)); - p += scnprintf(p, bufsz + buf - p, "TX MCS: %.4x\n", + p += scnprintf(p, sizeof(buf)+buf-p, "TX MCS: %.4x\n", le16_to_cpu(vhtc->vht_mcs.tx_mcs_map)); if (vhtc->vht_mcs.tx_highest) - p += scnprintf(p, bufsz + buf - p, + p += scnprintf(p, sizeof(buf)+buf-p, "MCS TX highest: %d Mbps\n", le16_to_cpu(vhtc->vht_mcs.tx_highest)); #undef PFLAG } - ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); - kfree(buf); - return ret; + return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } STA_OPS(vht_capa); @@ -936,15 +909,14 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf, PFLAG(PHY, 9, RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB, "RX-FULL-BW-SU-USING-MU-WITH-NON-COMP-SIGB"); - switch (u8_get_bits(cap[9], - IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK)) { - case IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_0US: + switch (cap[9] & IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK) { + case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_0US: PRINT("NOMINAL-PACKET-PADDING-0US"); break; - case IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_8US: + case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_8US: PRINT("NOMINAL-PACKET-PADDING-8US"); break; - case IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US: + case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US: PRINT("NOMINAL-PACKET-PADDING-16US"); break; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 4e2fc1a086..c336267f45 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1486,26 +1486,4 @@ static inline void drv_twt_teardown_request(struct ieee80211_local *local, trace_drv_return_void(local); } -static inline int drv_net_fill_forward_path(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, - struct net_device_path_ctx *ctx, - struct net_device_path *path) -{ - int ret = -EOPNOTSUPP; - - sdata = get_bss_sdata(sdata); - if (!check_sdata_in_driver(sdata)) - return -EIO; - - trace_drv_net_fill_forward_path(local, sdata, sta); - if (local->ops->net_fill_forward_path) - ret = local->ops->net_fill_forward_path(&local->hw, - &sdata->vif, sta, - ctx, path); - trace_drv_return_int(local, ret); - - return ret; -} - #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index b2253df544..99a2e30b38 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -14,9 +14,7 @@ #include "driver-ops.h" static int ieee80211_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *rp, - struct kernel_ethtool_ringparam *kernel_rp, - struct netlink_ext_ack *extack) + struct ethtool_ringparam *rp) { struct ieee80211_local *local = wiphy_priv(dev->ieee80211_ptr->wiphy); @@ -27,9 +25,7 @@ static int ieee80211_set_ringparam(struct net_device *dev, } static void ieee80211_get_ringparam(struct net_device *dev, - struct ethtool_ringparam *rp, - struct kernel_ethtool_ringparam *kernel_rp, - struct netlink_ext_ack *extack) + struct ethtool_ringparam *rp) { struct ieee80211_local *local = wiphy_priv(dev->ieee80211_ptr->wiphy); diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c index e1d4cfd991..a13ae14893 100644 --- a/net/mac80211/fils_aead.c +++ b/net/mac80211/fils_aead.c @@ -219,8 +219,7 @@ int fils_encrypt_assoc_req(struct sk_buff *skb, { struct ieee80211_mgmt *mgmt = (void *)skb->data; u8 *capab, *ies, *encr; - const u8 *addr[5 + 1]; - const struct element *session; + const u8 *addr[5 + 1], *session; size_t len[5 + 1]; size_t crypt_len; @@ -232,12 +231,12 @@ int fils_encrypt_assoc_req(struct sk_buff *skb, ies = mgmt->u.assoc_req.variable; } - session = cfg80211_find_ext_elem(WLAN_EID_EXT_FILS_SESSION, - ies, skb->data + skb->len - ies); - if (!session || session->datalen != 1 + 8) + session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION, + ies, skb->data + skb->len - ies); + if (!session || session[1] != 1 + 8) return -EINVAL; /* encrypt after FILS Session element */ - encr = (u8 *)session->data + 1 + 8; + encr = (u8 *)session + 2 + 1 + 8; /* AES-SIV AAD vectors */ @@ -271,8 +270,7 @@ int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgmt *mgmt = (void *)frame; u8 *capab, *ies, *encr; - const u8 *addr[5 + 1]; - const struct element *session; + const u8 *addr[5 + 1], *session; size_t len[5 + 1]; int res; size_t crypt_len; @@ -282,16 +280,16 @@ int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata, capab = (u8 *)&mgmt->u.assoc_resp.capab_info; ies = mgmt->u.assoc_resp.variable; - session = cfg80211_find_ext_elem(WLAN_EID_EXT_FILS_SESSION, - ies, frame + *frame_len - ies); - if (!session || session->datalen != 1 + 8) { + session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION, + ies, frame + *frame_len - ies); + if (!session || session[1] != 1 + 8) { mlme_dbg(sdata, "No (valid) FILS Session element in (Re)Association Response frame from %pM", mgmt->sa); return -EINVAL; } /* decrypt after FILS Session element */ - encr = (u8 *)session->data + 1 + 8; + encr = (u8 *)session + 2 + 1 + 8; /* AES-SIV AAD vectors */ diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 0416c4d222..5d6ca4c3e6 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -9,7 +9,7 @@ * Copyright 2009, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright(c) 2018-2021 Intel Corporation + * Copyright(c) 2018-2020 Intel Corporation */ #include @@ -1589,7 +1589,7 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status) { size_t baselen; - struct ieee802_11_elems *elems; + struct ieee802_11_elems elems; BUILD_BUG_ON(offsetof(typeof(mgmt->u.probe_resp), variable) != offsetof(typeof(mgmt->u.beacon), variable)); @@ -1602,14 +1602,10 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; - elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable, - len - baselen, false, - mgmt->bssid, NULL); + ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, + false, &elems, mgmt->bssid, NULL); - if (elems) { - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, elems); - kfree(elems); - } + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); } void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, @@ -1618,7 +1614,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; u16 fc; - struct ieee802_11_elems *elems; + struct ieee802_11_elems elems; int ies_len; rx_status = IEEE80211_SKB_RXCB(skb); @@ -1655,16 +1651,15 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, if (ies_len < 0) break; - elems = ieee802_11_parse_elems( + ieee802_11_parse_elems( mgmt->u.action.u.chan_switch.variable, - ies_len, true, mgmt->bssid, NULL); + ies_len, true, &elems, mgmt->bssid, NULL); - if (elems && !elems->parse_error) - ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt, - skb->len, - rx_status, - elems); - kfree(elems); + if (elems.parse_error) + break; + + ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt, skb->len, + rx_status, &elems); break; } } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 330ea62231..6a88195e5a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -376,7 +376,7 @@ struct ieee80211_mgd_auth_data { u8 key[WLAN_KEY_LEN_WEP104]; u8 key_len, key_idx; - bool done; + bool done, waiting; bool peer_confirmed; bool timeout_started; @@ -631,9 +631,10 @@ struct ieee80211_if_ocb { */ struct ieee802_11_elems; struct ieee80211_mesh_sync_ops { - void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, u16 stype, - struct ieee80211_mgmt *mgmt, unsigned int len, - const struct ieee80211_meshconf_ie *mesh_cfg, + void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, + u16 stype, + struct ieee80211_mgmt *mgmt, + struct ieee802_11_elems *elems, struct ieee80211_rx_status *rx_status); /* should be called with beacon_data under RCU read lock */ @@ -1261,9 +1262,6 @@ struct ieee80211_local { */ bool suspended; - /* suspending is true during the whole suspend process */ - bool suspending; - /* * Resuming is true while suspended, but when we're reprogramming the * hardware -- at that time it's allowed to use ieee80211_queue_work() @@ -1483,7 +1481,7 @@ struct ieee80211_local { }; static inline struct ieee80211_sub_if_data * -IEEE80211_DEV_TO_SUB_IF(const struct net_device *dev) +IEEE80211_DEV_TO_SUB_IF(struct net_device *dev) { return netdev_priv(dev); } @@ -1530,7 +1528,6 @@ struct ieee80211_csa_ie { struct ieee802_11_elems { const u8 *ie_start; size_t total_len; - u32 crc; /* pointers to IEs */ const struct ieee80211_tdls_lnkie *lnk_id; @@ -1540,6 +1537,7 @@ struct ieee802_11_elems { const u8 *supp_rates; const u8 *ds_params; const struct ieee80211_tim_ie *tim; + const u8 *challenge; const u8 *rsn; const u8 *rsnx; const u8 *erp_info; @@ -1593,6 +1591,7 @@ struct ieee802_11_elems { u8 ssid_len; u8 supp_rates_len; u8 tim_len; + u8 challenge_len; u8 rsn_len; u8 rsnx_len; u8 ext_supp_rates_len; @@ -2215,18 +2214,18 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb_tid(sdata, skb, 7); } -struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len, - bool action, - u64 filter, u32 crc, - const u8 *transmitter_bssid, - const u8 *bss_bssid); -static inline struct ieee802_11_elems * -ieee802_11_parse_elems(const u8 *start, size_t len, bool action, - const u8 *transmitter_bssid, - const u8 *bss_bssid) +u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, + struct ieee802_11_elems *elems, + u64 filter, u32 crc, u8 *transmitter_bssid, + u8 *bss_bssid); +static inline void ieee802_11_parse_elems(const u8 *start, size_t len, + bool action, + struct ieee802_11_elems *elems, + u8 *transmitter_bssid, + u8 *bss_bssid) { - return ieee802_11_parse_elems_crc(start, len, action, 0, 0, - transmitter_bssid, bss_bssid); + ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0, + transmitter_bssid, bss_bssid); } diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 4153147843..041859b5b7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -632,46 +632,17 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do ieee80211_add_virtual_monitor(local); } -static void ieee80211_stop_mbssid(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_sub_if_data *tx_sdata, *non_tx_sdata, *tmp_sdata; - struct ieee80211_vif *tx_vif = sdata->vif.mbssid_tx_vif; - - if (!tx_vif) - return; - - tx_sdata = vif_to_sdata(tx_vif); - sdata->vif.mbssid_tx_vif = NULL; - - list_for_each_entry_safe(non_tx_sdata, tmp_sdata, - &tx_sdata->local->interfaces, list) { - if (non_tx_sdata != sdata && non_tx_sdata != tx_sdata && - non_tx_sdata->vif.mbssid_tx_vif == tx_vif && - ieee80211_sdata_running(non_tx_sdata)) { - non_tx_sdata->vif.mbssid_tx_vif = NULL; - dev_close(non_tx_sdata->wdev.netdev); - } - } - - if (sdata != tx_sdata && ieee80211_sdata_running(tx_sdata)) { - tx_sdata->vif.mbssid_tx_vif = NULL; - dev_close(tx_sdata->wdev.netdev); - } -} - static int ieee80211_stop(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* close dependent VLAN and MBSSID interfaces before locking wiphy */ + /* close all dependent VLAN interfaces before locking wiphy */ if (sdata->vif.type == NL80211_IFTYPE_AP) { struct ieee80211_sub_if_data *vlan, *tmpsdata; list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, u.vlan.list) dev_close(vlan->dev); - - ieee80211_stop_mbssid(sdata); } wiphy_lock(sdata->local->hw.wiphy); @@ -789,64 +760,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_get_stats64 = ieee80211_get_stats64, }; -static int ieee80211_netdev_fill_forward_path(struct net_device_path_ctx *ctx, - struct net_device_path *path) -{ - struct ieee80211_sub_if_data *sdata; - struct ieee80211_local *local; - struct sta_info *sta; - int ret = -ENOENT; - - sdata = IEEE80211_DEV_TO_SUB_IF(ctx->dev); - local = sdata->local; - - if (!local->ops->net_fill_forward_path) - return -EOPNOTSUPP; - - rcu_read_lock(); - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP_VLAN: - sta = rcu_dereference(sdata->u.vlan.sta); - if (sta) - break; - if (sdata->wdev.use_4addr) - goto out; - if (is_multicast_ether_addr(ctx->daddr)) - goto out; - sta = sta_info_get_bss(sdata, ctx->daddr); - break; - case NL80211_IFTYPE_AP: - if (is_multicast_ether_addr(ctx->daddr)) - goto out; - sta = sta_info_get(sdata, ctx->daddr); - break; - case NL80211_IFTYPE_STATION: - if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) { - sta = sta_info_get(sdata, ctx->daddr); - if (sta && test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { - if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) - goto out; - - break; - } - } - - sta = sta_info_get(sdata, sdata->u.mgd.bssid); - break; - default: - goto out; - } - - if (!sta) - goto out; - - ret = drv_net_fill_forward_path(local, sdata, &sta->sta, ctx, path); -out: - rcu_read_unlock(); - - return ret; -} - static const struct net_device_ops ieee80211_dataif_8023_ops = { .ndo_open = ieee80211_open, .ndo_stop = ieee80211_stop, @@ -856,7 +769,6 @@ static const struct net_device_ops ieee80211_dataif_8023_ops = { .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_netdev_select_queue, .ndo_get_stats64 = ieee80211_get_stats64, - .ndo_fill_forward_path = ieee80211_netdev_fill_forward_path, }; static bool ieee80211_iftype_supports_hdr_offload(enum nl80211_iftype iftype) @@ -1198,7 +1110,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) * this interface, if it has the special null one. */ if (dev && is_zero_ether_addr(dev->dev_addr)) { - eth_hw_addr_set(dev, local->hw.wiphy->perm_addr); + memcpy(dev->dev_addr, + local->hw.wiphy->perm_addr, + ETH_ALEN); memcpy(dev->perm_addr, dev->dev_addr, ETH_ALEN); if (!is_valid_ether_addr(dev->dev_addr)) { @@ -2052,9 +1966,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ieee80211_assign_perm_addr(local, ndev->perm_addr, type); if (is_valid_ether_addr(params->macaddr)) - eth_hw_addr_set(ndev, params->macaddr); + memcpy(ndev->dev_addr, params->macaddr, ETH_ALEN); else - eth_hw_addr_set(ndev, ndev->perm_addr); + memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN); SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); /* don't use IEEE80211_DEV_TO_SUB_IF -- it checks too much */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 5311c3cd30..45fb517591 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1131,14 +1131,17 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->scan_ies_len += 2 + sizeof(struct ieee80211_vht_cap); + /* HE cap element is variable in size - set len to allow max size */ /* - * HE cap element is variable in size - set len to allow max size */ - if (supp_he) { + * TODO: 1 is added at the end of the calculation to accommodate for + * the temporary placing of the HE capabilities IE under EXT. + * Remove it once it is placed in the final place. + */ + if (supp_he) local->scan_ies_len += - 3 + sizeof(struct ieee80211_he_cap_elem) + + 2 + sizeof(struct ieee80211_he_cap_elem) + sizeof(struct ieee80211_he_mcs_nss_supp) + - IEEE80211_HE_PPE_THRES_MAX_LEN; - } + IEEE80211_HE_PPE_THRES_MAX_LEN + 1; if (!local->ops->hw_scan) { /* For hw_scan, driver needs to set these up. */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 15ac08d111..5dcfd53a4a 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1247,7 +1247,7 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *presp; struct beacon_data *bcn; struct ieee80211_mgmt *hdr; - struct ieee802_11_elems *elems; + struct ieee802_11_elems elems; size_t baselen; u8 *pos; @@ -1256,24 +1256,22 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; - elems = ieee802_11_parse_elems(pos, len - baselen, false, mgmt->bssid, - NULL); - if (!elems) - return; + ieee802_11_parse_elems(pos, len - baselen, false, &elems, mgmt->bssid, + NULL); - if (!elems->mesh_id) - goto free; + if (!elems.mesh_id) + return; /* 802.11-2012 10.1.4.3.2 */ if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) && !is_broadcast_ether_addr(mgmt->da)) || - elems->ssid_len != 0) - goto free; + elems.ssid_len != 0) + return; - if (elems->mesh_id_len != 0 && - (elems->mesh_id_len != ifmsh->mesh_id_len || - memcmp(elems->mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len))) - goto free; + if (elems.mesh_id_len != 0 && + (elems.mesh_id_len != ifmsh->mesh_id_len || + memcmp(elems.mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len))) + return; rcu_read_lock(); bcn = rcu_dereference(ifmsh->beacon); @@ -1297,8 +1295,6 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, presp); out: rcu_read_unlock(); -free: - kfree(elems); } static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, @@ -1309,7 +1305,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee802_11_elems *elems; + struct ieee802_11_elems elems; struct ieee80211_channel *channel; size_t baselen; int freq; @@ -1324,47 +1320,42 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; - elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable, - len - baselen, - false, mgmt->bssid, NULL); - if (!elems) - return; + ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, + false, &elems, mgmt->bssid, NULL); /* ignore non-mesh or secure / unsecure mismatch */ - if ((!elems->mesh_id || !elems->mesh_config) || - (elems->rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) || - (!elems->rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)) - goto free; + if ((!elems.mesh_id || !elems.mesh_config) || + (elems.rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) || + (!elems.rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)) + return; - if (elems->ds_params) - freq = ieee80211_channel_to_frequency(elems->ds_params[0], band); + if (elems.ds_params) + freq = ieee80211_channel_to_frequency(elems.ds_params[0], band); else freq = rx_status->freq; channel = ieee80211_get_channel(local->hw.wiphy, freq); if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) - goto free; + return; - if (mesh_matches_local(sdata, elems)) { + if (mesh_matches_local(sdata, &elems)) { mpl_dbg(sdata, "rssi_threshold=%d,rx_status->signal=%d\n", sdata->u.mesh.mshcfg.rssi_threshold, rx_status->signal); if (!sdata->u.mesh.user_mpm || sdata->u.mesh.mshcfg.rssi_threshold == 0 || sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal) - mesh_neighbour_update(sdata, mgmt->sa, elems, + mesh_neighbour_update(sdata, mgmt->sa, &elems, rx_status); if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT && !sdata->vif.csa_active) - ieee80211_mesh_process_chnswitch(sdata, elems, true); + ieee80211_mesh_process_chnswitch(sdata, &elems, true); } if (ifmsh->sync_ops) - ifmsh->sync_ops->rx_bcn_presp(sdata, stype, mgmt, len, - elems->mesh_config, rx_status); -free: - kfree(elems); + ifmsh->sync_ops->rx_bcn_presp(sdata, + stype, mgmt, &elems, rx_status); } int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) @@ -1456,7 +1447,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee802_11_elems *elems; + struct ieee802_11_elems elems; u16 pre_value; bool fwd_csa = true; size_t baselen; @@ -1469,37 +1460,33 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, pos = mgmt->u.action.u.chan_switch.variable; baselen = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch.variable); - elems = ieee802_11_parse_elems(pos, len - baselen, true, - mgmt->bssid, NULL); - if (!elems) + ieee802_11_parse_elems(pos, len - baselen, true, &elems, + mgmt->bssid, NULL); + + if (!mesh_matches_local(sdata, &elems)) return; - if (!mesh_matches_local(sdata, elems)) - goto free; - - ifmsh->chsw_ttl = elems->mesh_chansw_params_ie->mesh_ttl; + ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl; if (!--ifmsh->chsw_ttl) fwd_csa = false; - pre_value = le16_to_cpu(elems->mesh_chansw_params_ie->mesh_pre_value); + pre_value = le16_to_cpu(elems.mesh_chansw_params_ie->mesh_pre_value); if (ifmsh->pre_value >= pre_value) - goto free; + return; ifmsh->pre_value = pre_value; if (!sdata->vif.csa_active && - !ieee80211_mesh_process_chnswitch(sdata, elems, false)) { + !ieee80211_mesh_process_chnswitch(sdata, &elems, false)) { mcsa_dbg(sdata, "Failed to process CSA action frame"); - goto free; + return; } /* forward or re-broadcast the CSA frame */ if (fwd_csa) { - if (mesh_fwd_csa_frame(sdata, mgmt, len, elems) < 0) + if (mesh_fwd_csa_frame(sdata, mgmt, len, &elems) < 0) mcsa_dbg(sdata, "Failed to forward the CSA frame"); } -free: - kfree(elems); } static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 44a6fdb6ef..a05b615deb 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019, 2021 Intel Corporation + * Copyright (C) 2019 Intel Corporation * Author: Luis Carlos Cobo */ @@ -908,7 +908,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { - struct ieee802_11_elems *elems; + struct ieee802_11_elems elems; size_t baselen; u32 path_metric; struct sta_info *sta; @@ -926,41 +926,37 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt; - elems = ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, - len - baselen, false, mgmt->bssid, NULL); - if (!elems) - return; + ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, + len - baselen, false, &elems, mgmt->bssid, NULL); - if (elems->preq) { - if (elems->preq_len != 37) + if (elems.preq) { + if (elems.preq_len != 37) /* Right now we support just 1 destination and no AE */ - goto free; - path_metric = hwmp_route_info_get(sdata, mgmt, elems->preq, + return; + path_metric = hwmp_route_info_get(sdata, mgmt, elems.preq, MPATH_PREQ); if (path_metric) - hwmp_preq_frame_process(sdata, mgmt, elems->preq, + hwmp_preq_frame_process(sdata, mgmt, elems.preq, path_metric); } - if (elems->prep) { - if (elems->prep_len != 31) + if (elems.prep) { + if (elems.prep_len != 31) /* Right now we support no AE */ - goto free; - path_metric = hwmp_route_info_get(sdata, mgmt, elems->prep, + return; + path_metric = hwmp_route_info_get(sdata, mgmt, elems.prep, MPATH_PREP); if (path_metric) - hwmp_prep_frame_process(sdata, mgmt, elems->prep, + hwmp_prep_frame_process(sdata, mgmt, elems.prep, path_metric); } - if (elems->perr) { - if (elems->perr_len != 15) + if (elems.perr) { + if (elems.perr_len != 15) /* Right now we support only one destination per PERR */ - goto free; - hwmp_perr_frame_process(sdata, mgmt, elems->perr); + return; + hwmp_perr_frame_process(sdata, mgmt, elems.perr); } - if (elems->rann) - hwmp_rann_frame_process(sdata, mgmt, elems->rann); -free: - kfree(elems); + if (elems.rann) + hwmp_rann_frame_process(sdata, mgmt, elems.rann); } /** diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index a829470dd5..a6915847d7 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019, 2021 Intel Corporation + * Copyright (C) 2019 Intel Corporation * Author: Luis Carlos Cobo */ #include @@ -1200,7 +1200,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee802_11_elems *elems; + struct ieee802_11_elems elems; size_t baselen; u8 *baseaddr; @@ -1228,8 +1228,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; } - elems = ieee802_11_parse_elems(baseaddr, len - baselen, true, - mgmt->bssid, NULL); - mesh_process_plink_frame(sdata, mgmt, elems, rx_status); - kfree(elems); + ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems, + mgmt->bssid, NULL); + mesh_process_plink_frame(sdata, mgmt, &elems, rx_status); } diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 9e342cc250..fde93de2b8 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -3,7 +3,6 @@ * Copyright 2011-2012, Pavel Zubarev * Copyright 2011-2012, Marco Porsch * Copyright 2011-2012, cozybit Inc. - * Copyright (C) 2021 Intel Corporation */ #include "ieee80211_i.h" @@ -36,12 +35,12 @@ struct sync_method { /** * mesh_peer_tbtt_adjusting - check if an mp is currently adjusting its TBTT * - * @cfg: mesh config element from the mesh peer (or %NULL) + * @ie: information elements of a management frame from the mesh peer */ -static bool mesh_peer_tbtt_adjusting(const struct ieee80211_meshconf_ie *cfg) +static bool mesh_peer_tbtt_adjusting(struct ieee802_11_elems *ie) { - return cfg && - (cfg->meshconf_cap & IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING); + return (ie->mesh_config->meshconf_cap & + IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0; } void mesh_sync_adjust_tsf(struct ieee80211_sub_if_data *sdata) @@ -77,11 +76,11 @@ void mesh_sync_adjust_tsf(struct ieee80211_sub_if_data *sdata) } } -static void -mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype, - struct ieee80211_mgmt *mgmt, unsigned int len, - const struct ieee80211_meshconf_ie *mesh_cfg, - struct ieee80211_rx_status *rx_status) +static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, + u16 stype, + struct ieee80211_mgmt *mgmt, + struct ieee802_11_elems *elems, + struct ieee80211_rx_status *rx_status) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; @@ -102,7 +101,10 @@ mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype, */ if (ieee80211_have_rx_timestamp(rx_status)) t_r = ieee80211_calculate_rx_timestamp(local, rx_status, - len + FCS_LEN, 24); + 24 + 12 + + elems->total_len + + FCS_LEN, + 24); else t_r = drv_get_tsf(local, sdata); @@ -117,7 +119,7 @@ mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype, * dot11MeshNbrOffsetMaxNeighbor non-peer non-MBSS neighbors */ - if (mesh_peer_tbtt_adjusting(mesh_cfg)) { + if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) { msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", sta->sta.addr); goto no_sync; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e5ccf17618..6c8505edce 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -37,6 +37,7 @@ #define IEEE80211_AUTH_TIMEOUT_SAE (HZ * 2) #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) +#define IEEE80211_AUTH_WAIT_SAE_RETRY (HZ * 2) #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) #define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2) #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) @@ -164,15 +165,12 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, chandef->freq1_offset = channel->freq_offset; if (channel->band == NL80211_BAND_6GHZ) { - if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, chandef)) { - mlme_dbg(sdata, - "bad 6 GHz operation, disabling HT/VHT/HE\n"); + if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, chandef)) ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT | IEEE80211_STA_DISABLE_HE; - } else { + else ret = 0; - } vht_chandef = *chandef; goto out; } else if (sband->band == NL80211_BAND_S1GHZ) { @@ -193,7 +191,6 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); if (!ht_oper || !sta_ht_cap.ht_supported) { - mlme_dbg(sdata, "HT operation missing / HT not supported\n"); ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT | IEEE80211_STA_DISABLE_HE; @@ -227,7 +224,6 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { ieee80211_chandef_ht_oper(ht_oper, chandef); } else { - mlme_dbg(sdata, "40 MHz not supported\n"); /* 40 MHz (and 80 MHz) must be supported for VHT */ ret = IEEE80211_STA_DISABLE_VHT; /* also mark 40 MHz disabled */ @@ -236,7 +232,6 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } if (!vht_oper || !sband->vht_cap.vht_supported) { - mlme_dbg(sdata, "VHT operation missing / VHT not supported\n"); ret = IEEE80211_STA_DISABLE_VHT; goto out; } @@ -259,7 +254,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, &vht_chandef)) { if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) sdata_info(sdata, - "HE AP VHT information is invalid, disabling HE\n"); + "HE AP VHT information is invalid, disable HE\n"); ret = IEEE80211_STA_DISABLE_HE; goto out; } @@ -269,7 +264,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, &vht_chandef)) { if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, - "AP VHT information is invalid, disabling VHT\n"); + "AP VHT information is invalid, disable VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; goto out; } @@ -277,7 +272,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (!cfg80211_chandef_valid(&vht_chandef)) { if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, - "AP VHT information is invalid, disabling VHT\n"); + "AP VHT information is invalid, disable VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; goto out; } @@ -290,7 +285,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, - "AP VHT information doesn't match HT, disabling VHT\n"); + "AP VHT information doesn't match HT, disable VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; goto out; } @@ -655,6 +650,10 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, if (!he_cap || !reg_cap) return; + /* + * TODO: the 1 added is because this temporarily is under the EXTENSION + * IE. Get rid of it when it moves. + */ he_cap_size = 2 + 1 + sizeof(he_cap->he_cap_elem) + ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) + @@ -1502,7 +1501,6 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, fallthrough; case NL80211_BAND_2GHZ: case NL80211_BAND_60GHZ: - case NL80211_BAND_LC: chan_increment = 1; break; case NL80211_BAND_5GHZ: @@ -2271,7 +2269,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; u32 changed = 0; struct ieee80211_prep_tx_info info = { .subtype = stype, @@ -2421,10 +2418,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, cancel_delayed_work_sync(&ifmgd->tx_tspec_wk); sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; - - bss_conf->pwr_reduction = 0; - bss_conf->tx_pwr_env_num = 0; - memset(bss_conf->tx_pwr_env, 0, sizeof(bss_conf->tx_pwr_env)); } static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) @@ -2534,7 +2527,7 @@ static void ieee80211_mlme_send_probe_req(struct ieee80211_sub_if_data *sdata, static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - const struct element *ssid; + const u8 *ssid; u8 *dst = ifmgd->associated->bssid; u8 unicast_limit = max(1, max_probe_tries - 3); struct sta_info *sta; @@ -2571,14 +2564,14 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) int ssid_len; rcu_read_lock(); - ssid = ieee80211_bss_get_elem(ifmgd->associated, WLAN_EID_SSID); + ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); if (WARN_ON_ONCE(ssid == NULL)) ssid_len = 0; else - ssid_len = ssid->datalen; + ssid_len = ssid[1]; ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst, - ssid->data, ssid_len, + ssid + 2, ssid_len, ifmgd->associated->channel); rcu_read_unlock(); } @@ -2608,13 +2601,6 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, goto out; } - if (sdata->local->suspending) { - /* reschedule after resume */ - mutex_unlock(&sdata->local->mtx); - ieee80211_reset_ap_probe(sdata); - goto out; - } - if (beacon) { mlme_dbg_ratelimited(sdata, "detected beacon loss from AP (missed %d beacons) - probing\n", @@ -2661,7 +2647,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_bss *cbss; struct sk_buff *skb; - const struct element *ssid; + const u8 *ssid; int ssid_len; if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) @@ -2679,17 +2665,16 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, return NULL; rcu_read_lock(); - ssid = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID); - if (WARN_ONCE(!ssid || ssid->datalen > IEEE80211_MAX_SSID_LEN, - "invalid SSID element (len=%d)", - ssid ? ssid->datalen : -1)) + ssid = ieee80211_bss_get_ie(cbss, WLAN_EID_SSID); + if (WARN_ONCE(!ssid || ssid[1] > IEEE80211_MAX_SSID_LEN, + "invalid SSID element (len=%d)", ssid ? ssid[1] : -1)) ssid_len = 0; else - ssid_len = ssid->datalen; + ssid_len = ssid[1]; skb = ieee80211_build_probe_req(sdata, sdata->vif.addr, cbss->bssid, (u32) -1, cbss->channel, - ssid->data, ssid_len, + ssid + 2, ssid_len, NULL, 0, IEEE80211_PROBE_FLAG_DIRECTED); rcu_read_unlock(); @@ -2903,17 +2888,17 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; - const struct element *challenge; u8 *pos; + struct ieee802_11_elems elems; u32 tx_flags = 0; struct ieee80211_prep_tx_info info = { .subtype = IEEE80211_STYPE_AUTH, }; pos = mgmt->u.auth.variable; - challenge = cfg80211_find_elem(WLAN_EID_CHALLENGE, pos, - len - (pos - (u8 *)mgmt)); - if (!challenge) + ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems, + mgmt->bssid, auth_data->bss->bssid); + if (!elems.challenge) return; auth_data->expected_transaction = 4; drv_mgd_prepare_tx(sdata->local, sdata, &info); @@ -2921,8 +2906,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_INTFL_MLME_CONN_TX; ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0, - (void *)challenge, - challenge->datalen + sizeof(*challenge), + elems.challenge - 2, elems.challenge_len + 2, auth_data->bss->bssid, auth_data->bss->bssid, auth_data->key, auth_data->key_len, auth_data->key_idx, tx_flags); @@ -3011,8 +2995,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, (status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED || (auth_transaction == 1 && (status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT || - status_code == WLAN_STATUS_SAE_PK)))) + status_code == WLAN_STATUS_SAE_PK)))) { + /* waiting for userspace now */ + ifmgd->auth_data->waiting = true; + ifmgd->auth_data->timeout = + jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY; + ifmgd->auth_data->timeout_started = true; + run_again(sdata, ifmgd->auth_data->timeout); goto notify_driver; + } sdata_info(sdata, "%pM denied authentication (status %d)\n", mgmt->sa, status_code); @@ -3324,11 +3315,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, aid = 0; /* TODO */ } capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); - elems = ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, - mgmt->bssid, assoc_data->bss->bssid); - - if (!elems) - return false; + ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, elems, + mgmt->bssid, assoc_data->bss->bssid); if (elems->aid_resp) aid = le16_to_cpu(elems->aid_resp->aid); @@ -3350,8 +3338,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (!is_s1g && !elems->supp_rates) { sdata_info(sdata, "no SuppRates element in AssocResp\n"); - ret = false; - goto out; + return false; } sdata->vif.bss_conf.aid = aid; @@ -3373,7 +3360,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && (!elems->vht_cap_elem || !elems->vht_operation)))) { const struct cfg80211_bss_ies *ies; - struct ieee802_11_elems *bss_elems; + struct ieee802_11_elems bss_elems; rcu_read_lock(); ies = rcu_dereference(cbss->ies); @@ -3381,22 +3368,16 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, bss_ies = kmemdup(ies, sizeof(*ies) + ies->len, GFP_ATOMIC); rcu_read_unlock(); - if (!bss_ies) { - ret = false; - goto out; - } - - bss_elems = ieee802_11_parse_elems(bss_ies->data, bss_ies->len, - false, mgmt->bssid, - assoc_data->bss->bssid); - if (!bss_elems) { - ret = false; - goto out; - } + if (!bss_ies) + return false; + ieee802_11_parse_elems(bss_ies->data, bss_ies->len, + false, &bss_elems, + mgmt->bssid, + assoc_data->bss->bssid); if (assoc_data->wmm && - !elems->wmm_param && bss_elems->wmm_param) { - elems->wmm_param = bss_elems->wmm_param; + !elems->wmm_param && bss_elems.wmm_param) { + elems->wmm_param = bss_elems.wmm_param; sdata_info(sdata, "AP bug: WMM param missing from AssocResp\n"); } @@ -3405,32 +3386,30 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, * Also check if we requested HT/VHT, otherwise the AP doesn't * have to include the IEs in the (re)association response. */ - if (!elems->ht_cap_elem && bss_elems->ht_cap_elem && + if (!elems->ht_cap_elem && bss_elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { - elems->ht_cap_elem = bss_elems->ht_cap_elem; + elems->ht_cap_elem = bss_elems.ht_cap_elem; sdata_info(sdata, "AP bug: HT capability missing from AssocResp\n"); } - if (!elems->ht_operation && bss_elems->ht_operation && + if (!elems->ht_operation && bss_elems.ht_operation && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { - elems->ht_operation = bss_elems->ht_operation; + elems->ht_operation = bss_elems.ht_operation; sdata_info(sdata, "AP bug: HT operation missing from AssocResp\n"); } - if (!elems->vht_cap_elem && bss_elems->vht_cap_elem && + if (!elems->vht_cap_elem && bss_elems.vht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) { - elems->vht_cap_elem = bss_elems->vht_cap_elem; + elems->vht_cap_elem = bss_elems.vht_cap_elem; sdata_info(sdata, "AP bug: VHT capa missing from AssocResp\n"); } - if (!elems->vht_operation && bss_elems->vht_operation && + if (!elems->vht_operation && bss_elems.vht_operation && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) { - elems->vht_operation = bss_elems->vht_operation; + elems->vht_operation = bss_elems.vht_operation; sdata_info(sdata, "AP bug: VHT operation missing from AssocResp\n"); } - - kfree(bss_elems); } /* @@ -3675,7 +3654,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, ret = true; out: - kfree(elems); kfree(bss_ies); return ret; } @@ -3687,7 +3665,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data; u16 capab_info, status_code, aid; - struct ieee802_11_elems *elems; + struct ieee802_11_elems elems; int ac, uapsd_queues = -1; u8 *pos; bool reassoc; @@ -3744,20 +3722,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, fils_decrypt_assoc_resp(sdata, (u8 *)mgmt, &len, assoc_data) < 0) return; - elems = ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, - mgmt->bssid, assoc_data->bss->bssid); - if (!elems) - goto notify_driver; + ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems, + mgmt->bssid, assoc_data->bss->bssid); if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY && - elems->timeout_int && - elems->timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) { + elems.timeout_int && + elems.timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) { u32 tu, ms; - - cfg80211_assoc_comeback(sdata->dev, assoc_data->bss, - le32_to_cpu(elems->timeout_int->value)); - - tu = le32_to_cpu(elems->timeout_int->value); + tu = le32_to_cpu(elems.timeout_int->value); ms = tu * 1024 / 1000; sdata_info(sdata, "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n", @@ -3777,7 +3749,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, event.u.mlme.reason = status_code; drv_event_callback(sdata->local, sdata, &event); } else { - if (!ieee80211_assoc_success(sdata, cbss, mgmt, len, elems)) { + if (!ieee80211_assoc_success(sdata, cbss, mgmt, len, &elems)) { /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, cbss); @@ -3807,7 +3779,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len); notify_driver: drv_mgd_complete_tx(sdata->local, sdata, &info); - kfree(elems); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, @@ -4012,7 +3983,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; struct ieee80211_mgmt *mgmt = (void *) hdr; size_t baselen; - struct ieee802_11_elems *elems; + struct ieee802_11_elems elems; struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; @@ -4058,16 +4029,15 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon && ieee80211_rx_our_beacon(bssid, ifmgd->assoc_data->bss)) { - elems = ieee802_11_parse_elems(variable, len - baselen, false, - bssid, - ifmgd->assoc_data->bss->bssid); - if (!elems) - return; + ieee802_11_parse_elems(variable, + len - baselen, false, &elems, + bssid, + ifmgd->assoc_data->bss->bssid); ieee80211_rx_bss_info(sdata, mgmt, len, rx_status); - if (elems->dtim_period) - ifmgd->dtim_period = elems->dtim_period; + if (elems.dtim_period) + ifmgd->dtim_period = elems.dtim_period; ifmgd->have_beacon = true; ifmgd->assoc_data->need_beacon = false; if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { @@ -4075,17 +4045,17 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, le64_to_cpu(mgmt->u.beacon.timestamp); sdata->vif.bss_conf.sync_device_ts = rx_status->device_timestamp; - sdata->vif.bss_conf.sync_dtim_count = elems->dtim_count; + sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count; } - if (elems->mbssid_config_ie) + if (elems.mbssid_config_ie) bss_conf->profile_periodicity = - elems->mbssid_config_ie->profile_periodicity; + elems.mbssid_config_ie->profile_periodicity; else bss_conf->profile_periodicity = 0; - if (elems->ext_capab_len >= 11 && - (elems->ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) + if (elems.ext_capab_len >= 11 && + (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) bss_conf->ema_ap = true; else bss_conf->ema_ap = false; @@ -4094,7 +4064,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ifmgd->assoc_data->timeout = jiffies; ifmgd->assoc_data->timeout_started = true; run_again(sdata, ifmgd->assoc_data->timeout); - kfree(elems); return; } @@ -4126,15 +4095,13 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, */ if (!ieee80211_is_s1g_beacon(hdr->frame_control)) ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); - elems = ieee802_11_parse_elems_crc(variable, len - baselen, - false, care_about_ies, ncrc, - mgmt->bssid, bssid); - if (!elems) - return; - ncrc = elems->crc; + ncrc = ieee802_11_parse_elems_crc(variable, + len - baselen, false, &elems, + care_about_ies, ncrc, + mgmt->bssid, bssid); if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) && - ieee80211_check_tim(elems->tim, elems->tim_len, bss_conf->aid)) { + ieee80211_check_tim(elems.tim, elems.tim_len, bss_conf->aid)) { if (local->hw.conf.dynamic_ps_timeout > 0) { if (local->hw.conf.flags & IEEE80211_CONF_PS) { local->hw.conf.flags &= ~IEEE80211_CONF_PS; @@ -4204,12 +4171,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, le64_to_cpu(mgmt->u.beacon.timestamp); sdata->vif.bss_conf.sync_device_ts = rx_status->device_timestamp; - sdata->vif.bss_conf.sync_dtim_count = elems->dtim_count; + sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count; } if ((ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) || ieee80211_is_s1g_short_beacon(mgmt->frame_control)) - goto free; + return; ifmgd->beacon_crc = ncrc; ifmgd->beacon_crc_valid = true; @@ -4217,12 +4184,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, rx_status->device_timestamp, - elems, true); + &elems, true); if (!(ifmgd->flags & IEEE80211_STA_DISABLE_WMM) && - ieee80211_sta_wmm_params(local, sdata, elems->wmm_param, - elems->wmm_param_len, - elems->mu_edca_param_set)) + ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, + elems.wmm_param_len, + elems.mu_edca_param_set)) changed |= BSS_CHANGED_QOS; /* @@ -4231,7 +4198,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, */ if (!ifmgd->have_beacon) { /* a few bogus AP send dtim_period = 0 or no TIM IE */ - bss_conf->dtim_period = elems->dtim_period ?: 1; + bss_conf->dtim_period = elems.dtim_period ?: 1; changed |= BSS_CHANGED_BEACON_INFO; ifmgd->have_beacon = true; @@ -4243,9 +4210,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_ps_vif(sdata); } - if (elems->erp_info) { + if (elems.erp_info) { erp_valid = true; - erp_value = elems->erp_info[0]; + erp_value = elems.erp_info[0]; } else { erp_valid = false; } @@ -4258,12 +4225,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, bssid); - changed |= ieee80211_recalc_twt_req(sdata, sta, elems); + changed |= ieee80211_recalc_twt_req(sdata, sta, &elems); - if (ieee80211_config_bw(sdata, sta, elems->ht_cap_elem, - elems->vht_cap_elem, elems->ht_operation, - elems->vht_operation, elems->he_operation, - elems->s1g_oper, bssid, &changed)) { + if (ieee80211_config_bw(sdata, sta, elems.ht_cap_elem, + elems.vht_cap_elem, elems.ht_operation, + elems.vht_operation, elems.he_operation, + elems.s1g_oper, bssid, &changed)) { mutex_unlock(&local->sta_mtx); sdata_info(sdata, "failed to follow AP %pM bandwidth change, disconnect\n", @@ -4275,23 +4242,21 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, sizeof(deauth_buf), true, WLAN_REASON_DEAUTH_LEAVING, false); - goto free; + return; } - if (sta && elems->opmode_notif) - ieee80211_vht_handle_opmode(sdata, sta, *elems->opmode_notif, + if (sta && elems.opmode_notif) + ieee80211_vht_handle_opmode(sdata, sta, *elems.opmode_notif, rx_status->band); mutex_unlock(&local->sta_mtx); changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt, - elems->country_elem, - elems->country_elem_len, - elems->pwr_constr_elem, - elems->cisco_dtpc_elem); + elems.country_elem, + elems.country_elem_len, + elems.pwr_constr_elem, + elems.cisco_dtpc_elem); ieee80211_bss_info_change_notify(sdata, changed); -free: - kfree(elems); } void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata, @@ -4320,6 +4285,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; u16 fc; + struct ieee802_11_elems elems; int ies_len; rx_status = (struct ieee80211_rx_status *) skb->cb; @@ -4351,8 +4317,6 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; case IEEE80211_STYPE_ACTION: if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { - struct ieee802_11_elems *elems; - ies_len = skb->len - offsetof(struct ieee80211_mgmt, u.action.u.chan_switch.variable); @@ -4361,19 +4325,18 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; /* CSA IE cannot be overridden, no need for BSSID */ - elems = ieee802_11_parse_elems( - mgmt->u.action.u.chan_switch.variable, - ies_len, true, mgmt->bssid, NULL); + ieee802_11_parse_elems( + mgmt->u.action.u.chan_switch.variable, + ies_len, true, &elems, mgmt->bssid, NULL); - if (elems && !elems->parse_error) - ieee80211_sta_process_chanswitch(sdata, - rx_status->mactime, - rx_status->device_timestamp, - elems, false); - kfree(elems); + if (elems.parse_error) + break; + + ieee80211_sta_process_chanswitch(sdata, + rx_status->mactime, + rx_status->device_timestamp, + &elems, false); } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { - struct ieee802_11_elems *elems; - ies_len = skb->len - offsetof(struct ieee80211_mgmt, u.action.u.ext_chan_switch.variable); @@ -4385,22 +4348,21 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, * extended CSA IE can't be overridden, no need for * BSSID */ - elems = ieee802_11_parse_elems( - mgmt->u.action.u.ext_chan_switch.variable, - ies_len, true, mgmt->bssid, NULL); + ieee802_11_parse_elems( + mgmt->u.action.u.ext_chan_switch.variable, + ies_len, true, &elems, mgmt->bssid, NULL); - if (elems && !elems->parse_error) { - /* for the handling code pretend it was an IE */ - elems->ext_chansw_ie = - &mgmt->u.action.u.ext_chan_switch.data; + if (elems.parse_error) + break; - ieee80211_sta_process_chanswitch(sdata, - rx_status->mactime, - rx_status->device_timestamp, - elems, false); - } + /* for the handling code pretend this was also an IE */ + elems.ext_chansw_ie = + &mgmt->u.action.u.ext_chan_switch.data; - kfree(elems); + ieee80211_sta_process_chanswitch(sdata, + rx_status->mactime, + rx_status->device_timestamp, + &elems, false); } break; } @@ -4603,10 +4565,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (ifmgd->auth_data && ifmgd->auth_data->timeout_started && time_after(jiffies, ifmgd->auth_data->timeout)) { - if (ifmgd->auth_data->done) { + if (ifmgd->auth_data->done || ifmgd->auth_data->waiting) { /* - * ok ... we waited for assoc but userspace didn't, - * so let's just kill the auth data + * ok ... we waited for assoc or continuation but + * userspace didn't do it, so kill the auth data */ ieee80211_destroy_auth_data(sdata, false); } else if (ieee80211_auth(sdata)) { @@ -4919,7 +4881,7 @@ static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - const struct element *ht_cap_elem, *vht_cap_elem; + const u8 *ht_cap_ie, *vht_cap_ie; const struct ieee80211_ht_cap *ht_cap; const struct ieee80211_vht_cap *vht_cap; u8 chains = 1; @@ -4927,9 +4889,9 @@ static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_DISABLE_HT) return chains; - ht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_CAPABILITY); - if (ht_cap_elem && ht_cap_elem->datalen >= sizeof(*ht_cap)) { - ht_cap = (void *)ht_cap_elem->data; + ht_cap_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_CAPABILITY); + if (ht_cap_ie && ht_cap_ie[1] >= sizeof(*ht_cap)) { + ht_cap = (void *)(ht_cap_ie + 2); chains = ieee80211_mcs_to_chains(&ht_cap->mcs); /* * TODO: use "Tx Maximum Number Spatial Streams Supported" and @@ -4940,12 +4902,12 @@ static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT) return chains; - vht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY); - if (vht_cap_elem && vht_cap_elem->datalen >= sizeof(*vht_cap)) { + vht_cap_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_VHT_CAPABILITY); + if (vht_cap_ie && vht_cap_ie[1] >= sizeof(*vht_cap)) { u8 nss; u16 tx_mcs_map; - vht_cap = (void *)vht_cap_elem->data; + vht_cap = (void *)(vht_cap_ie + 2); tx_mcs_map = le16_to_cpu(vht_cap->supp_mcs.tx_mcs_map); for (nss = 8; nss > 0; nss--) { if (((tx_mcs_map >> (2 * (nss - 1))) & 3) != @@ -5038,22 +5000,10 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ; struct ieee80211_bss *bss = (void *)cbss->priv; - struct ieee802_11_elems *elems; - const struct cfg80211_bss_ies *ies; int ret; u32 i; bool have_80mhz; - rcu_read_lock(); - - ies = rcu_dereference(cbss->ies); - elems = ieee802_11_parse_elems(ies->data, ies->len, false, - NULL, NULL); - if (!elems) { - rcu_read_unlock(); - return -ENOMEM; - } - sband = local->hw.wiphy->bands[cbss->channel->band]; ifmgd->flags &= ~(IEEE80211_STA_DISABLE_40MHZ | @@ -5062,27 +5012,32 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, /* disable HT/VHT/HE if we don't support them */ if (!sband->ht_cap.ht_supported && !is_6ghz) { - mlme_dbg(sdata, "HT not supported, disabling HT/VHT/HE\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_HT; ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } if (!sband->vht_cap.vht_supported && is_5ghz) { - mlme_dbg(sdata, "VHT not supported, disabling VHT/HE\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } if (!ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif))) { - mlme_dbg(sdata, "HE not supported, disabling it\n"); + ieee80211_vif_type_p2p(&sdata->vif))) ifmgd->flags |= IEEE80211_STA_DISABLE_HE; - } + + rcu_read_lock(); if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && !is_6ghz) { - ht_oper = elems->ht_operation; - ht_cap = elems->ht_cap_elem; + const u8 *ht_oper_ie, *ht_cap_ie; + + ht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_OPERATION); + if (ht_oper_ie && ht_oper_ie[1] >= sizeof(*ht_oper)) + ht_oper = (void *)(ht_oper_ie + 2); + + ht_cap_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_CAPABILITY); + if (ht_cap_ie && ht_cap_ie[1] >= sizeof(*ht_cap)) + ht_cap = (void *)(ht_cap_ie + 2); if (!ht_cap) { ifmgd->flags |= IEEE80211_STA_DISABLE_HT; @@ -5091,7 +5046,12 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && !is_6ghz) { - vht_oper = elems->vht_operation; + const u8 *vht_oper_ie, *vht_cap; + + vht_oper_ie = ieee80211_bss_get_ie(cbss, + WLAN_EID_VHT_OPERATION); + if (vht_oper_ie && vht_oper_ie[1] >= sizeof(*vht_oper)) + vht_oper = (void *)(vht_oper_ie + 2); if (vht_oper && !ht_oper) { vht_oper = NULL; sdata_info(sdata, @@ -5101,40 +5061,25 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } - if (!elems->vht_cap_elem) { - sdata_info(sdata, - "bad VHT capabilities, disabling VHT\n"); + vht_cap = ieee80211_bss_get_ie(cbss, WLAN_EID_VHT_CAPABILITY); + if (!vht_cap || vht_cap[1] < sizeof(struct ieee80211_vht_cap)) { ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; vht_oper = NULL; } } if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) { - he_oper = elems->he_operation; + const struct cfg80211_bss_ies *ies; + const u8 *he_oper_ie; - if (is_6ghz) { - struct ieee80211_bss_conf *bss_conf; - u8 i, j = 0; - - bss_conf = &sdata->vif.bss_conf; - - if (elems->pwr_constr_elem) - bss_conf->pwr_reduction = *elems->pwr_constr_elem; - - BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) != - ARRAY_SIZE(elems->tx_pwr_env)); - - for (i = 0; i < elems->tx_pwr_env_num; i++) { - if (elems->tx_pwr_env_len[i] > - sizeof(bss_conf->tx_pwr_env[j])) - continue; - - bss_conf->tx_pwr_env_num++; - memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i], - elems->tx_pwr_env_len[i]); - j++; - } - } + ies = rcu_dereference(cbss->ies); + he_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, + ies->data, ies->len); + if (he_oper_ie && + he_oper_ie[1] >= ieee80211_he_oper_size(&he_oper_ie[3])) + he_oper = (void *)(he_oper_ie + 3); + else + he_oper = NULL; if (!ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper)) ifmgd->flags |= IEEE80211_STA_DISABLE_HE; @@ -5151,14 +5096,17 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, break; } - if (!have_80mhz) { - sdata_info(sdata, "80 MHz not supported, disabling VHT\n"); + if (!have_80mhz) ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; - } if (sband->band == NL80211_BAND_S1GHZ) { - s1g_oper = elems->s1g_oper; - if (!s1g_oper) + const u8 *s1g_oper_ie; + + s1g_oper_ie = ieee80211_bss_get_ie(cbss, + WLAN_EID_S1G_OPERATION); + if (s1g_oper_ie && s1g_oper_ie[1] >= sizeof(*s1g_oper)) + s1g_oper = (void *)(s1g_oper_ie + 2); + else sdata_info(sdata, "AP missing S1G operation element?\n"); } @@ -5174,9 +5122,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, local->rx_chains); rcu_read_unlock(); - /* the element data was RCU protected so no longer valid anyway */ - kfree(elems); - elems = NULL; if (ifmgd->flags & IEEE80211_STA_DISABLE_HE && is_6ghz) { sdata_info(sdata, "Rejecting non-HE 6/7 GHz connection"); @@ -5581,7 +5526,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, const struct cfg80211_bss_ies *beacon_ies; struct ieee80211_supported_band *sband; struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; - const struct element *ssid_elem, *ht_elem, *vht_elem; + const u8 *ssidie, *ht_ie, *vht_ie; int i, err; bool override = false; @@ -5590,14 +5535,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, return -ENOMEM; rcu_read_lock(); - ssid_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_SSID); - if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) { + ssidie = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID); + if (!ssidie || ssidie[1] > sizeof(assoc_data->ssid)) { rcu_read_unlock(); kfree(assoc_data); return -EINVAL; } - memcpy(assoc_data->ssid, ssid_elem->data, ssid_elem->datalen); - assoc_data->ssid_len = ssid_elem->datalen; + memcpy(assoc_data->ssid, ssidie + 2, ssidie[1]); + assoc_data->ssid_len = ssidie[1]; memcpy(bss_conf->ssid, assoc_data->ssid, assoc_data->ssid_len); bss_conf->ssid_len = assoc_data->ssid_len; rcu_read_unlock(); @@ -5711,21 +5656,19 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->supp_rates_len = bss->supp_rates_len; rcu_read_lock(); - ht_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_HT_OPERATION); - if (ht_elem && ht_elem->datalen >= sizeof(struct ieee80211_ht_operation)) + ht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_HT_OPERATION); + if (ht_ie && ht_ie[1] >= sizeof(struct ieee80211_ht_operation)) assoc_data->ap_ht_param = - ((struct ieee80211_ht_operation *)(ht_elem->data))->ht_param; + ((struct ieee80211_ht_operation *)(ht_ie + 2))->ht_param; else if (!is_6ghz) ifmgd->flags |= IEEE80211_STA_DISABLE_HT; - vht_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_VHT_CAPABILITY); - if (vht_elem && vht_elem->datalen >= sizeof(struct ieee80211_vht_cap)) { - memcpy(&assoc_data->ap_vht_cap, vht_elem->data, + vht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_VHT_CAPABILITY); + if (vht_ie && vht_ie[1] >= sizeof(struct ieee80211_vht_cap)) + memcpy(&assoc_data->ap_vht_cap, vht_ie + 2, sizeof(struct ieee80211_vht_cap)); - } else if (is_5ghz) { - sdata_info(sdata, "VHT capa missing/short, disabling VHT/HE\n"); + else if (is_5ghz) ifmgd->flags |= IEEE80211_STA_DISABLE_VHT | IEEE80211_STA_DISABLE_HE; - } rcu_read_unlock(); if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) && @@ -5799,21 +5742,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } if (req->flags & ASSOC_REQ_DISABLE_HT) { - mlme_dbg(sdata, "HT disabled by flag, disabling HT/VHT/HE\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_HT; ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } - if (req->flags & ASSOC_REQ_DISABLE_VHT) { - mlme_dbg(sdata, "VHT disabled by flag, disabling VHT\n"); + if (req->flags & ASSOC_REQ_DISABLE_VHT) ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; - } - if (req->flags & ASSOC_REQ_DISABLE_HE) { - mlme_dbg(sdata, "HE disabled by flag, disabling VHT\n"); + if (req->flags & ASSOC_REQ_DISABLE_HE) ifmgd->flags |= IEEE80211_STA_DISABLE_HE; - } err = ieee80211_prep_connection(sdata, req->bss, true, override); if (err) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 0ccb5701c7..7809a906d7 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -27,9 +27,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) if (!local->open_count) goto suspend; - local->suspending = true; - mb(); /* make suspending visible before any cancellation */ - ieee80211_scan_cancel(local); ieee80211_dfs_cac_cancel(local); @@ -179,7 +176,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* need suspended to be visible before quiescing is false */ barrier(); local->quiescing = false; - local->suspending = false; return 0; } diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 9c3b7fc377..72b44d4c42 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -18,6 +18,8 @@ #define AVG_AMPDU_SIZE 16 #define AVG_PKT_SIZE 1200 +#define SAMPLE_SWITCH_THR 100 + /* Number of bits for an average sized packet */ #define MCS_NBITS ((AVG_PKT_SIZE * AVG_AMPDU_SIZE) << 3) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 93680af62c..eab6283b34 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -465,12 +465,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, unsigned int stbc; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS)); - *pos = local->hw.radiotap_mcs_details; - if (status->enc_flags & RX_ENC_FLAG_HT_GF) - *pos |= IEEE80211_RADIOTAP_MCS_HAVE_FMT; - if (status->enc_flags & RX_ENC_FLAG_LDPC) - *pos |= IEEE80211_RADIOTAP_MCS_HAVE_FEC; - pos++; + *pos++ = local->hw.radiotap_mcs_details; *pos = 0; if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) *pos |= IEEE80211_RADIOTAP_MCS_SGI; @@ -2607,7 +2602,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, * address, so that the authenticator (e.g. hostapd) will see * the frame, but bridge won't forward it anywhere else. Note * that due to earlier filtering, the only other address can - * be the PAE group address. + * be the PAE group address, unless the hardware allowed them + * through in 802.3 offloaded mode. */ if (unlikely(skb->protocol == sdata->control_port_protocol && !ether_addr_equal(ehdr->h_dest, sdata->vif.addr))) @@ -2922,13 +2918,13 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) ether_addr_equal(sdata->vif.addr, hdr->addr3)) return RX_CONTINUE; - ac = ieee80211_select_queue_80211(sdata, skb, hdr); + ac = ieee802_1d_to_ac[skb->priority]; q = sdata->vif.hw_queue[ac]; if (ieee80211_queue_stopped(&local->hw, q)) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion); return RX_DROP_MONITOR; } - skb_set_queue_mapping(skb, q); + skb_set_queue_mapping(skb, ac); if (!--mesh_hdr->ttl) { if (!is_multicast_ether_addr(hdr->addr1)) @@ -2949,7 +2945,6 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (!fwd_skb) goto out; - fwd_skb->dev = sdata->dev; fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY); info = IEEE80211_SKB_CB(fwd_skb); @@ -3223,7 +3218,10 @@ static bool ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)rx->skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); struct ieee80211_sub_if_data *sdata = rx->sdata; + const struct ieee80211_sta_he_cap *hecap; + struct ieee80211_supported_band *sband; /* TWT actions are only supported in AP for the moment */ if (sdata->vif.type != NL80211_IFTYPE_AP) @@ -3232,7 +3230,14 @@ ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx) if (!rx->local->ops->add_twt_setup) return false; - if (!sdata->vif.bss_conf.twt_responder) + sband = rx->local->hw.wiphy->bands[status->band]; + hecap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); + if (!hecap) + return false; + + if (!(hecap->he_cap_elem.mac_cap_info[0] & + IEEE80211_HE_MAC_CAP0_TWT_RES)) return false; if (!rx->sta) @@ -4514,12 +4519,7 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, /* deliver to local stack */ skb->protocol = eth_type_trans(skb, fast_rx->dev); - memset(skb->cb, 0, sizeof(skb->cb)); - if (rx->list) - list_add_tail(&skb->list, rx->list); - else - netif_receive_skb(skb); - + ieee80211_deliver_skb_to_local_stack(skb, rx); } static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 5e6b275afc..6b50cb5e0e 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #include @@ -155,7 +155,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, }; bool signal_valid; struct ieee80211_sub_if_data *scan_sdata; - struct ieee802_11_elems *elems; + struct ieee802_11_elems elems; size_t baselen; u8 *elements; @@ -209,10 +209,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local, if (baselen > len) return NULL; - elems = ieee802_11_parse_elems(elements, len - baselen, false, - mgmt->bssid, cbss->bssid); - if (!elems) - return NULL; + ieee802_11_parse_elems(elements, len - baselen, false, &elems, + mgmt->bssid, cbss->bssid); /* In case the signal is invalid update the status */ signal_valid = channel == cbss->channel; @@ -220,17 +218,15 @@ ieee80211_bss_info_update(struct ieee80211_local *local, rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; bss = (void *)cbss->priv; - ieee80211_update_bss_from_elems(local, bss, elems, rx_status, beacon); + ieee80211_update_bss_from_elems(local, bss, &elems, rx_status, beacon); list_for_each_entry(non_tx_cbss, &cbss->nontrans_list, nontrans_list) { non_tx_bss = (void *)non_tx_cbss->priv; - ieee80211_update_bss_from_elems(local, non_tx_bss, elems, + ieee80211_update_bss_from_elems(local, non_tx_bss, &elems, rx_status, beacon); } - kfree(elems); - return bss; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 537535a889..2b5acb3758 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -444,7 +444,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, switch (i) { case NL80211_BAND_2GHZ: - case NL80211_BAND_LC: /* * We use both here, even if we cannot really know for * sure the station will support both, but the only use @@ -514,8 +513,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->cparams.target = MS2TIME(20); sta->cparams.interval = MS2TIME(100); sta->cparams.ecn = true; - sta->cparams.ce_threshold_selector = 0; - sta->cparams.ce_threshold_mask = 0; sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); @@ -644,13 +641,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { err = -EEXIST; - goto out_cleanup; + goto out_err; } sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL); if (!sinfo) { err = -ENOMEM; - goto out_cleanup; + goto out_err; } local->num_sta++; @@ -667,15 +664,6 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) list_add_tail_rcu(&sta->list, &local->sta_list); - /* update channel context before notifying the driver about state - * change, this enables driver using the updated channel context right away. - */ - if (sta->sta_state >= IEEE80211_STA_ASSOC) { - ieee80211_recalc_min_chandef(sta->sdata); - if (!sta->sta.support_p2p_ps) - ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); - } - /* notify driver */ err = sta_info_insert_drv_state(local, sdata, sta); if (err) @@ -683,6 +671,12 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) set_sta_flag(sta, WLAN_STA_INSERTED); + if (sta->sta_state >= IEEE80211_STA_ASSOC) { + ieee80211_recalc_min_chandef(sta->sdata); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); + } + /* accept BA sessions now */ clear_sta_flag(sta, WLAN_STA_BLOCK_BA); @@ -709,8 +703,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_drop_sta: local->num_sta--; synchronize_net(); - out_cleanup: cleanup_single_sta(sta); + out_err: mutex_unlock(&local->sta_mtx); kfree(sinfo); rcu_read_lock(); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 379fd36719..e7443fc466 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -176,7 +176,6 @@ struct sta_info; * @failed_bar_ssn: ssn of the last failed BAR tx attempt * @bar_pending: BAR needs to be re-sent * @amsdu: support A-MSDU withing A-MDPU - * @ssn: starting sequence number of the session * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 137be9ec94..45e532ad12 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -6,7 +6,7 @@ * Copyright 2014, Intel Corporation * Copyright 2014 Intel Mobile Communications GmbH * Copyright 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2019, 2021 Intel Corporation + * Copyright (C) 2019 Intel Corporation */ #include @@ -1684,7 +1684,7 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; - struct ieee802_11_elems *elems = NULL; + struct ieee802_11_elems elems; struct sta_info *sta; struct ieee80211_tdls_data *tf = (void *)skb->data; bool local_initiator; @@ -1718,20 +1718,16 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, goto call_drv; } - elems = ieee802_11_parse_elems(tf->u.chan_switch_resp.variable, - skb->len - baselen, false, NULL, NULL); - if (!elems) { - ret = -ENOMEM; - goto out; - } - - if (elems->parse_error) { + ieee802_11_parse_elems(tf->u.chan_switch_resp.variable, + skb->len - baselen, false, &elems, + NULL, NULL); + if (elems.parse_error) { tdls_dbg(sdata, "Invalid IEs in TDLS channel switch resp\n"); ret = -EINVAL; goto out; } - if (!elems->ch_sw_timing || !elems->lnk_id) { + if (!elems.ch_sw_timing || !elems.lnk_id) { tdls_dbg(sdata, "TDLS channel switch resp - missing IEs\n"); ret = -EINVAL; goto out; @@ -1739,15 +1735,15 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, /* validate the initiator is set correctly */ local_initiator = - !memcmp(elems->lnk_id->init_sta, sdata->vif.addr, ETH_ALEN); + !memcmp(elems.lnk_id->init_sta, sdata->vif.addr, ETH_ALEN); if (local_initiator == sta->sta.tdls_initiator) { tdls_dbg(sdata, "TDLS chan switch invalid lnk-id initiator\n"); ret = -EINVAL; goto out; } - params.switch_time = le16_to_cpu(elems->ch_sw_timing->switch_time); - params.switch_timeout = le16_to_cpu(elems->ch_sw_timing->switch_timeout); + params.switch_time = le16_to_cpu(elems.ch_sw_timing->switch_time); + params.switch_timeout = le16_to_cpu(elems.ch_sw_timing->switch_timeout); params.tmpl_skb = ieee80211_tdls_ch_sw_resp_tmpl_get(sta, ¶ms.ch_sw_tm_ie); @@ -1767,7 +1763,6 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, out: mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(params.tmpl_skb); - kfree(elems); return ret; } @@ -1776,7 +1771,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; - struct ieee802_11_elems *elems; + struct ieee802_11_elems elems; struct cfg80211_chan_def chandef; struct ieee80211_channel *chan; enum nl80211_channel_type chan_type; @@ -1836,27 +1831,22 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, return -EINVAL; } - elems = ieee802_11_parse_elems(tf->u.chan_switch_req.variable, - skb->len - baselen, false, NULL, NULL); - if (!elems) - return -ENOMEM; - - if (elems->parse_error) { + ieee802_11_parse_elems(tf->u.chan_switch_req.variable, + skb->len - baselen, false, &elems, NULL, NULL); + if (elems.parse_error) { tdls_dbg(sdata, "Invalid IEs in TDLS channel switch req\n"); - ret = -EINVAL; - goto free; + return -EINVAL; } - if (!elems->ch_sw_timing || !elems->lnk_id) { + if (!elems.ch_sw_timing || !elems.lnk_id) { tdls_dbg(sdata, "TDLS channel switch req - missing IEs\n"); - ret = -EINVAL; - goto free; + return -EINVAL; } - if (!elems->sec_chan_offs) { + if (!elems.sec_chan_offs) { chan_type = NL80211_CHAN_HT20; } else { - switch (elems->sec_chan_offs->sec_chan_offs) { + switch (elems.sec_chan_offs->sec_chan_offs) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: chan_type = NL80211_CHAN_HT40PLUS; break; @@ -1875,8 +1865,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, if (!cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &chandef, sdata->wdev.iftype)) { tdls_dbg(sdata, "TDLS chan switch to forbidden channel\n"); - ret = -EINVAL; - goto free; + return -EINVAL; } mutex_lock(&local->sta_mtx); @@ -1892,7 +1881,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, /* validate the initiator is set correctly */ local_initiator = - !memcmp(elems->lnk_id->init_sta, sdata->vif.addr, ETH_ALEN); + !memcmp(elems.lnk_id->init_sta, sdata->vif.addr, ETH_ALEN); if (local_initiator == sta->sta.tdls_initiator) { tdls_dbg(sdata, "TDLS chan switch invalid lnk-id initiator\n"); ret = -EINVAL; @@ -1900,16 +1889,16 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, } /* peer should have known better */ - if (!sta->sta.ht_cap.ht_supported && elems->sec_chan_offs && - elems->sec_chan_offs->sec_chan_offs) { + if (!sta->sta.ht_cap.ht_supported && elems.sec_chan_offs && + elems.sec_chan_offs->sec_chan_offs) { tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n"); ret = -ENOTSUPP; goto out; } params.chandef = &chandef; - params.switch_time = le16_to_cpu(elems->ch_sw_timing->switch_time); - params.switch_timeout = le16_to_cpu(elems->ch_sw_timing->switch_timeout); + params.switch_time = le16_to_cpu(elems.ch_sw_timing->switch_time); + params.switch_timeout = le16_to_cpu(elems.ch_sw_timing->switch_timeout); params.tmpl_skb = ieee80211_tdls_ch_sw_resp_tmpl_get(sta, @@ -1928,8 +1917,6 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, out: mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(params.tmpl_skb); -free: - kfree(elems); return ret; } diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index d91498f777..9e8381bef7 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2892,13 +2892,6 @@ TRACE_EVENT(drv_twt_teardown_request, ) ); -DEFINE_EVENT(sta_event, drv_net_fill_forward_path, - TP_PROTO(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta), - TP_ARGS(local, sdata, sta) -); - #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6d054fed06..a499b07fee 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -146,8 +146,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, rate = DIV_ROUND_UP(r->bitrate, 1 << shift); switch (sband->band) { - case NL80211_BAND_2GHZ: - case NL80211_BAND_LC: { + case NL80211_BAND_2GHZ: { u32 flag; if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) flag = IEEE80211_RATE_MANDATORY_G; @@ -3821,7 +3820,7 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) { struct ieee80211_local *local = hw_to_local(hw); struct airtime_sched_info *air_sched; - u64 now = ktime_get_coarse_boottime_ns(); + u64 now = ktime_get_boottime_ns(); struct ieee80211_txq *ret = NULL; struct airtime_info *air_info; struct txq_info *txqi = NULL; @@ -3948,7 +3947,7 @@ void ieee80211_update_airtime_weight(struct ieee80211_local *local, u64 weight_sum = 0; if (unlikely(!now)) - now = ktime_get_coarse_boottime_ns(); + now = ktime_get_boottime_ns(); lockdep_assert_held(&air_sched->lock); @@ -3974,7 +3973,7 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_local *local = hw_to_local(hw); struct txq_info *txqi = to_txq_info(txq); struct airtime_sched_info *air_sched; - u64 now = ktime_get_coarse_boottime_ns(); + u64 now = ktime_get_boottime_ns(); struct airtime_info *air_info; u8 ac = txq->ac; bool was_active; @@ -4032,7 +4031,7 @@ static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw, if (!purge) airtime_set_active(air_sched, air_info, - ktime_get_coarse_boottime_ns()); + ktime_get_boottime_ns()); rb_erase_cached(&txqi->schedule_order, &air_sched->active_txqs); @@ -4120,7 +4119,7 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, if (RB_EMPTY_NODE(&txqi->schedule_order)) goto out; - now = ktime_get_coarse_boottime_ns(); + now = ktime_get_boottime_ns(); /* Like in ieee80211_next_txq(), make sure the first station in the * scheduling order is eligible for transmission to avoid starvation. @@ -4191,11 +4190,11 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, ieee80211_aggr_check(sdata, sta, skb); - sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); - if (sta) { struct ieee80211_fast_tx *fast_tx; + sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); + fast_tx = rcu_dereference(sta->fast_tx); if (fast_tx && @@ -4988,115 +4987,6 @@ static int ieee80211_beacon_protect(struct sk_buff *skb, return 0; } -static void -ieee80211_beacon_get_finish(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_mutable_offsets *offs, - struct beacon_data *beacon, - struct sk_buff *skb, - struct ieee80211_chanctx_conf *chanctx_conf, - u16 csa_off_base) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_tx_info *info; - enum nl80211_band band; - struct ieee80211_tx_rate_control txrc; - - /* CSA offsets */ - if (offs && beacon) { - u16 i; - - for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; i++) { - u16 csa_off = beacon->cntdwn_counter_offsets[i]; - - if (!csa_off) - continue; - - offs->cntdwn_counter_offs[i] = csa_off_base + csa_off; - } - } - - band = chanctx_conf->def.chan->band; - info = IEEE80211_SKB_CB(skb); - info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; - info->flags |= IEEE80211_TX_CTL_NO_ACK; - info->band = band; - - memset(&txrc, 0, sizeof(txrc)); - txrc.hw = hw; - txrc.sband = local->hw.wiphy->bands[band]; - txrc.bss_conf = &sdata->vif.bss_conf; - txrc.skb = skb; - txrc.reported_rate.idx = -1; - if (sdata->beacon_rate_set && sdata->beacon_rateidx_mask[band]) - txrc.rate_idx_mask = sdata->beacon_rateidx_mask[band]; - else - txrc.rate_idx_mask = sdata->rc_rateidx_mask[band]; - txrc.bss = true; - rate_control_get_rate(sdata, NULL, &txrc); - - info->control.vif = vif; - info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT | - IEEE80211_TX_CTL_ASSIGN_SEQ | - IEEE80211_TX_CTL_FIRST_FRAGMENT; -} - -static struct sk_buff * -ieee80211_beacon_get_ap(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_mutable_offsets *offs, - bool is_template, - struct beacon_data *beacon, - struct ieee80211_chanctx_conf *chanctx_conf) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_if_ap *ap = &sdata->u.ap; - struct sk_buff *skb = NULL; - u16 csa_off_base = 0; - - if (beacon->cntdwn_counter_offsets[0]) { - if (!is_template) - ieee80211_beacon_update_cntdwn(vif); - - ieee80211_set_beacon_cntdwn(sdata, beacon); - } - - /* headroom, head length, - * tail length and maximum TIM length - */ - skb = dev_alloc_skb(local->tx_headroom + beacon->head_len + - beacon->tail_len + 256 + - local->hw.extra_beacon_tailroom); - if (!skb) - return NULL; - - skb_reserve(skb, local->tx_headroom); - skb_put_data(skb, beacon->head, beacon->head_len); - - ieee80211_beacon_add_tim(sdata, &ap->ps, skb, is_template); - - if (offs) { - offs->tim_offset = beacon->head_len; - offs->tim_length = skb->len - beacon->head_len; - offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0]; - - /* for AP the csa offsets are from tail */ - csa_off_base = skb->len; - } - - if (beacon->tail) - skb_put_data(skb, beacon->tail, beacon->tail_len); - - if (ieee80211_beacon_protect(skb, local, sdata) < 0) - return NULL; - - ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb, chanctx_conf, - csa_off_base); - return skb; -} - static struct sk_buff * __ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -5106,8 +4996,12 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_local *local = hw_to_local(hw); struct beacon_data *beacon = NULL; struct sk_buff *skb = NULL; + struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata = NULL; + enum nl80211_band band; + struct ieee80211_tx_rate_control txrc; struct ieee80211_chanctx_conf *chanctx_conf; + int csa_off_base = 0; rcu_read_lock(); @@ -5124,11 +5018,48 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_if_ap *ap = &sdata->u.ap; beacon = rcu_dereference(ap->beacon); - if (!beacon) - goto out; + if (beacon) { + if (beacon->cntdwn_counter_offsets[0]) { + if (!is_template) + ieee80211_beacon_update_cntdwn(vif); - skb = ieee80211_beacon_get_ap(hw, vif, offs, is_template, - beacon, chanctx_conf); + ieee80211_set_beacon_cntdwn(sdata, beacon); + } + + /* + * headroom, head length, + * tail length and maximum TIM length + */ + skb = dev_alloc_skb(local->tx_headroom + + beacon->head_len + + beacon->tail_len + 256 + + local->hw.extra_beacon_tailroom); + if (!skb) + goto out; + + skb_reserve(skb, local->tx_headroom); + skb_put_data(skb, beacon->head, beacon->head_len); + + ieee80211_beacon_add_tim(sdata, &ap->ps, skb, + is_template); + + if (offs) { + offs->tim_offset = beacon->head_len; + offs->tim_length = skb->len - beacon->head_len; + offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0]; + + /* for AP the csa offsets are from tail */ + csa_off_base = skb->len; + } + + if (beacon->tail) + skb_put_data(skb, beacon->tail, + beacon->tail_len); + + if (ieee80211_beacon_protect(skb, local, sdata) < 0) + goto out; + } else + goto out; } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_hdr *hdr; @@ -5154,9 +5085,6 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, hdr = (struct ieee80211_hdr *) skb->data; hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); - - ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb, - chanctx_conf, 0); } else if (ieee80211_vif_is_mesh(&sdata->vif)) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; @@ -5196,13 +5124,51 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, } skb_put_data(skb, beacon->tail, beacon->tail_len); - ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb, - chanctx_conf, 0); } else { WARN_ON(1); goto out; } + /* CSA offsets */ + if (offs && beacon) { + int i; + + for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; i++) { + u16 csa_off = beacon->cntdwn_counter_offsets[i]; + + if (!csa_off) + continue; + + offs->cntdwn_counter_offs[i] = csa_off_base + csa_off; + } + } + + band = chanctx_conf->def.chan->band; + + info = IEEE80211_SKB_CB(skb); + + info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + info->flags |= IEEE80211_TX_CTL_NO_ACK; + info->band = band; + + memset(&txrc, 0, sizeof(txrc)); + txrc.hw = hw; + txrc.sband = local->hw.wiphy->bands[band]; + txrc.bss_conf = &sdata->vif.bss_conf; + txrc.skb = skb; + txrc.reported_rate.idx = -1; + if (sdata->beacon_rate_set && sdata->beacon_rateidx_mask[band]) + txrc.rate_idx_mask = sdata->beacon_rateidx_mask[band]; + else + txrc.rate_idx_mask = sdata->rc_rateidx_mask[band]; + txrc.bss = true; + rate_control_get_rate(sdata, NULL, &txrc); + + info->control.vif = vif; + + info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT | + IEEE80211_TX_CTL_ASSIGN_SEQ | + IEEE80211_TX_CTL_FIRST_FRAGMENT; out: rcu_read_unlock(); return skb; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f71b042a5c..2fe71ed913 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -862,19 +862,6 @@ static void __iterate_stations(struct ieee80211_local *local, } } -void ieee80211_iterate_stations(struct ieee80211_hw *hw, - void (*iterator)(void *data, - struct ieee80211_sta *sta), - void *data) -{ - struct ieee80211_local *local = hw_to_local(hw); - - mutex_lock(&local->sta_mtx); - __iterate_stations(local, iterator, data); - mutex_unlock(&local->sta_mtx); -} -EXPORT_SYMBOL_GPL(ieee80211_iterate_stations); - void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw, void (*iterator)(void *data, struct ieee80211_sta *sta), @@ -1130,6 +1117,10 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, } else elem_parse_failed = true; break; + case WLAN_EID_CHALLENGE: + elems->challenge = pos; + elems->challenge_len = elen; + break; case WLAN_EID_VENDOR_SPECIFIC: if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && pos[2] == 0xf2) { @@ -1409,8 +1400,8 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, struct ieee802_11_elems *elems, - const u8 *transmitter_bssid, - const u8 *bss_bssid, + u8 *transmitter_bssid, + u8 *bss_bssid, u8 *nontransmitted_profile) { const struct element *elem, *sub; @@ -1475,20 +1466,16 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, return found ? profile_len : 0; } -struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len, - bool action, u64 filter, - u32 crc, - const u8 *transmitter_bssid, - const u8 *bss_bssid) +u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, + struct ieee802_11_elems *elems, + u64 filter, u32 crc, u8 *transmitter_bssid, + u8 *bss_bssid) { - struct ieee802_11_elems *elems; const struct element *non_inherit = NULL; u8 *nontransmitted_profile; int nontransmitted_profile_len = 0; - elems = kzalloc(sizeof(*elems), GFP_ATOMIC); - if (!elems) - return NULL; + memset(elems, 0, sizeof(*elems)); elems->ie_start = start; elems->total_len = len; @@ -1534,9 +1521,7 @@ struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len, kfree(nontransmitted_profile); - elems->crc = crc; - - return elems; + return crc; } void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, @@ -2081,7 +2066,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, chandef.chan = chan; skb = ieee80211_probereq_get(&local->hw, src, ssid, ssid_len, - local->scan_ies_len + ie_len); + 100 + ie_len); if (!skb) return NULL; @@ -2664,13 +2649,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); } - /* - * If this is for hw restart things are still running. - * We may want to change that later, however. - */ - if (local->open_count && (!suspended || reconfig_due_to_wowlan)) - drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART); - if (local->in_reconfig) { local->in_reconfig = false; barrier(); @@ -2689,6 +2667,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) IEEE80211_QUEUE_STOP_REASON_SUSPEND, false); + /* + * If this is for hw restart things are still running. + * We may want to change that later, however. + */ + if (local->open_count && (!suspended || reconfig_due_to_wowlan)) + drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART); + if (!suspended) return 0; @@ -3404,7 +3389,6 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, const struct ieee80211_sta_he_cap *he_cap; struct cfg80211_chan_def he_chandef = *chandef; const struct ieee80211_he_6ghz_oper *he_6ghz_oper; - struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; bool support_80_80, support_160; u8 he_phy_cap; u32 freq; @@ -3448,19 +3432,6 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, NL80211_BAND_6GHZ); he_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq); - switch (u8_get_bits(he_6ghz_oper->control, - IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) { - case IEEE80211_6GHZ_CTRL_REG_LPI_AP: - bss_conf->power_type = IEEE80211_REG_LPI_AP; - break; - case IEEE80211_6GHZ_CTRL_REG_SP_AP: - bss_conf->power_type = IEEE80211_REG_SP_AP; - break; - default: - bss_conf->power_type = IEEE80211_REG_UNSET_AP; - break; - } - switch (u8_get_bits(he_6ghz_oper->control, IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH)) { case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ: diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 7ed0d268af..4eed23e276 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -449,6 +449,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) return 0; + hdr = (struct ieee80211_hdr *) pos; pos += hdrlen; pn64 = atomic64_inc_return(&key->conf.tx_pn); @@ -685,6 +686,7 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) return 0; + hdr = (struct ieee80211_hdr *)pos; pos += hdrlen; pn64 = atomic64_inc_return(&key->conf.tx_pn); @@ -879,6 +881,8 @@ ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx) if (skb_linearize(rx->skb)) return RX_DROP_UNUSABLE; + hdr = (struct ieee80211_hdr *)rx->skb->data; + rx_pn = key->u.gen.rx_pn[qos_tid]; skb_pn = rx->skb->data + hdrlen + cs->pn_off; diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 500ed1b812..323d3d2d98 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -129,14 +129,15 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) if (!ieee802154_is_valid_extended_unicast_addr(extended_addr)) return -EINVAL; - dev_addr_set(dev, addr->sa_data); + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); sdata->wpan_dev.extended_addr = extended_addr; /* update lowpan interface mac address when * wpan mac has been changed */ if (sdata->wpan_dev.lowpan_dev) - dev_addr_set(sdata->wpan_dev.lowpan_dev, dev->dev_addr); + memcpy(sdata->wpan_dev.lowpan_dev->dev_addr, dev->dev_addr, + dev->addr_len); return mac802154_wpan_update_llsec(dev); } @@ -614,7 +615,6 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, unsigned char name_assign_type, enum nl802154_iftype type, __le64 extended_addr) { - u8 addr[IEEE802154_EXTENDED_ADDR_LEN]; struct net_device *ndev = NULL; struct ieee802154_sub_if_data *sdata = NULL; int ret; @@ -638,12 +638,11 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, switch (type) { case NL802154_IFTYPE_NODE: ndev->type = ARPHRD_IEEE802154; - if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) { - ieee802154_le64_to_be64(addr, &extended_addr); - dev_addr_set(ndev, addr); - } else { - dev_addr_set(ndev, ndev->perm_addr); - } + if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) + ieee802154_le64_to_be64(ndev->dev_addr, &extended_addr); + else + memcpy(ndev->dev_addr, ndev->perm_addr, + IEEE802154_EXTENDED_ADDR_LEN); break; case NL802154_IFTYPE_MONITOR: ndev->type = ARPHRD_IEEE802154_MONITOR; diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig index 3a5c0e70da..2cdf3d0a28 100644 --- a/net/mctp/Kconfig +++ b/net/mctp/Kconfig @@ -1,7 +1,7 @@ menuconfig MCTP depends on NET - bool "MCTP core protocol support" + tristate "MCTP core protocol support" help Management Component Transport Protocol (MCTP) is an in-system protocol for communicating between management controllers and @@ -11,13 +11,3 @@ menuconfig MCTP This option enables core MCTP support. For communicating with other devices, you'll want to enable a driver for a specific hardware channel. - -config MCTP_TEST - bool "MCTP core tests" if !KUNIT_ALL_TESTS - depends on MCTP=y && KUNIT=y - default KUNIT_ALL_TESTS - -config MCTP_FLOWS - bool - depends on MCTP - select SKB_EXTENSIONS diff --git a/net/mctp/Makefile b/net/mctp/Makefile index 6cd55233e6..0171333384 100644 --- a/net/mctp/Makefile +++ b/net/mctp/Makefile @@ -1,6 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MCTP) += mctp.o mctp-objs := af_mctp.o device.o route.o neigh.o - -# tests -obj-$(CONFIG_MCTP_TEST) += test/utils.o diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index c921de63b4..85cc1a28cb 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -16,9 +16,6 @@ #include #include -#define CREATE_TRACE_POINTS -#include - /* socket implementation */ static int mctp_release(struct socket *sock) @@ -39,13 +36,6 @@ static bool mctp_sockaddr_is_ok(const struct sockaddr_mctp *addr) return !addr->__smctp_pad0 && !addr->__smctp_pad1; } -static bool mctp_sockaddr_ext_is_ok(const struct sockaddr_mctp_ext *addr) -{ - return !addr->__smctp_pad0[0] && - !addr->__smctp_pad0[1] && - !addr->__smctp_pad0[2]; -} - static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { struct sock *sk = sock->sk; @@ -93,7 +83,6 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr); int rc, addrlen = msg->msg_namelen; struct sock *sk = sock->sk; - struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_skb_cb *cb; struct mctp_route *rt; struct sk_buff *skb; @@ -119,6 +108,11 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (addr->smctp_network == MCTP_NET_ANY) addr->smctp_network = mctp_default_net(sock_net(sk)); + rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, + addr->smctp_addr.s_addr); + if (!rt) + return -EHOSTUNREACH; + skb = sock_alloc_send_skb(sk, hlen + 1 + len, msg->msg_flags & MSG_DONTWAIT, &rc); if (!skb) @@ -130,46 +124,19 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) *(u8 *)skb_put(skb, 1) = addr->smctp_type; rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len); - if (rc < 0) - goto err_free; + if (rc < 0) { + kfree_skb(skb); + return rc; + } /* set up cb */ cb = __mctp_cb(skb); cb->net = addr->smctp_network; - /* direct addressing */ - if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) { - DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, - extaddr, msg->msg_name); - - if (!mctp_sockaddr_ext_is_ok(extaddr) || - extaddr->smctp_halen > sizeof(cb->haddr)) { - rc = -EINVAL; - goto err_free; - } - - cb->ifindex = extaddr->smctp_ifindex; - cb->halen = extaddr->smctp_halen; - memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen); - - rt = NULL; - } else { - rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, - addr->smctp_addr.s_addr); - if (!rt) { - rc = -EHOSTUNREACH; - goto err_free; - } - } - rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr, addr->smctp_tag); return rc ? : len; - -err_free: - kfree_skb(skb); - return rc; } static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, @@ -177,7 +144,6 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, { DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name); struct sock *sk = sock->sk; - struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct sk_buff *skb; size_t msglen; u8 type; @@ -225,17 +191,6 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); addr->__smctp_pad1 = 0; msg->msg_namelen = sizeof(*addr); - - if (msk->addr_ext) { - DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, ae, - msg->msg_name); - msg->msg_namelen = sizeof(*ae); - ae->smctp_ifindex = cb->ifindex; - ae->smctp_halen = cb->halen; - memset(ae->__smctp_pad0, 0x0, sizeof(ae->__smctp_pad0)); - memset(ae->smctp_haddr, 0x0, sizeof(ae->smctp_haddr)); - memcpy(ae->smctp_haddr, cb->haddr, cb->halen); - } } rc = len; @@ -251,45 +206,12 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, static int mctp_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { - struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk); - int val; - - if (level != SOL_MCTP) - return -EINVAL; - - if (optname == MCTP_OPT_ADDR_EXT) { - if (optlen != sizeof(int)) - return -EINVAL; - if (copy_from_sockptr(&val, optval, sizeof(int))) - return -EFAULT; - msk->addr_ext = val; - return 0; - } - - return -ENOPROTOOPT; + return -EINVAL; } static int mctp_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { - struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk); - int len, val; - - if (level != SOL_MCTP) - return -EINVAL; - - if (get_user(len, optlen)) - return -EFAULT; - - if (optname == MCTP_OPT_ADDR_EXT) { - if (len != sizeof(int)) - return -EINVAL; - val = !!msk->addr_ext; - if (copy_to_user(optval, &val, len)) - return -EFAULT; - return 0; - } - return -EINVAL; } @@ -314,61 +236,16 @@ static const struct proto_ops mctp_dgram_ops = { .sendpage = sock_no_sendpage, }; -static void mctp_sk_expire_keys(struct timer_list *timer) -{ - struct mctp_sock *msk = container_of(timer, struct mctp_sock, - key_expiry); - struct net *net = sock_net(&msk->sk); - unsigned long next_expiry, flags; - struct mctp_sk_key *key; - struct hlist_node *tmp; - bool next_expiry_valid = false; - - spin_lock_irqsave(&net->mctp.keys_lock, flags); - - hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) { - spin_lock(&key->lock); - - if (!time_after_eq(key->expiry, jiffies)) { - trace_mctp_key_release(key, MCTP_TRACE_KEY_TIMEOUT); - key->valid = false; - hlist_del_rcu(&key->hlist); - hlist_del_rcu(&key->sklist); - spin_unlock(&key->lock); - mctp_key_unref(key); - continue; - } - - if (next_expiry_valid) { - if (time_before(key->expiry, next_expiry)) - next_expiry = key->expiry; - } else { - next_expiry = key->expiry; - next_expiry_valid = true; - } - spin_unlock(&key->lock); - } - - spin_unlock_irqrestore(&net->mctp.keys_lock, flags); - - if (next_expiry_valid) - mod_timer(timer, next_expiry); -} - static int mctp_sk_init(struct sock *sk) { struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); INIT_HLIST_HEAD(&msk->keys); - timer_setup(&msk->key_expiry, mctp_sk_expire_keys, 0); return 0; } static void mctp_sk_close(struct sock *sk, long timeout) { - struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); - - del_timer_sync(&msk->key_expiry); sk_common_release(sk); } @@ -399,22 +276,21 @@ static void mctp_sk_unhash(struct sock *sk) /* remove tag allocations */ spin_lock_irqsave(&net->mctp.keys_lock, flags); hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) { - hlist_del(&key->sklist); - hlist_del(&key->hlist); + hlist_del_rcu(&key->sklist); + hlist_del_rcu(&key->hlist); - trace_mctp_key_release(key, MCTP_TRACE_KEY_CLOSED); - - spin_lock(&key->lock); - kfree_skb(key->reasm_head); + spin_lock(&key->reasm_lock); + if (key->reasm_head) + kfree_skb(key->reasm_head); key->reasm_head = NULL; key->reasm_dead = true; - key->valid = false; - spin_unlock(&key->lock); + spin_unlock(&key->reasm_lock); - /* key is no longer on the lookup lists, unref */ - mctp_key_unref(key); + kfree_rcu(key, rcu); } spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + + synchronize_rcu(); } static struct proto mctp_proto = { @@ -522,7 +398,7 @@ static __exit void mctp_exit(void) sock_unregister(PF_MCTP); } -subsys_initcall(mctp_init); +module_init(mctp_init); module_exit(mctp_exit); MODULE_DESCRIPTION("MCTP core"); diff --git a/net/mctp/device.c b/net/mctp/device.c index ef2755f82f..b9f38e765f 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -35,24 +35,22 @@ struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->mctp_ptr); } -static int mctp_addrinfo_size(void) +static void mctp_dev_destroy(struct mctp_dev *mdev) { - return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) - + nla_total_size(1) // IFA_LOCAL - + nla_total_size(1) // IFA_ADDRESS - ; + struct net_device *dev = mdev->dev; + + dev_put(dev); + kfree_rcu(mdev, rcu); } -/* flag should be NLM_F_MULTI for dump calls */ -static int mctp_fill_addrinfo(struct sk_buff *skb, - struct mctp_dev *mdev, mctp_eid_t eid, - int msg_type, u32 portid, u32 seq, int flag) +static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb, + struct mctp_dev *mdev, mctp_eid_t eid) { struct ifaddrmsg *hdr; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, portid, seq, - msg_type, sizeof(*hdr), flag); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + RTM_NEWADDR, sizeof(*hdr), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; @@ -82,14 +80,10 @@ static int mctp_dump_dev_addrinfo(struct mctp_dev *mdev, struct sk_buff *skb, struct netlink_callback *cb) { struct mctp_dump_cb *mcb = (void *)cb->ctx; - u32 portid, seq; int rc = 0; - portid = NETLINK_CB(cb->skb).portid; - seq = cb->nlh->nlmsg_seq; for (; mcb->a_idx < mdev->num_addrs; mcb->a_idx++) { - rc = mctp_fill_addrinfo(skb, mdev, mdev->addrs[mcb->a_idx], - RTM_NEWADDR, portid, seq, NLM_F_MULTI); + rc = mctp_fill_addrinfo(skb, cb, mdev, mdev->addrs[mcb->a_idx]); if (rc < 0) break; } @@ -141,32 +135,6 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static void mctp_addr_notify(struct mctp_dev *mdev, mctp_eid_t eid, int msg_type, - struct sk_buff *req_skb, struct nlmsghdr *req_nlh) -{ - u32 portid = NETLINK_CB(req_skb).portid; - struct net *net = dev_net(mdev->dev); - struct sk_buff *skb; - int rc = -ENOBUFS; - - skb = nlmsg_new(mctp_addrinfo_size(), GFP_KERNEL); - if (!skb) - goto out; - - rc = mctp_fill_addrinfo(skb, mdev, eid, msg_type, - portid, req_nlh->nlmsg_seq, 0); - if (rc < 0) { - WARN_ON_ONCE(rc == -EMSGSIZE); - goto out; - } - - rtnl_notify(skb, net, portid, RTNLGRP_MCTP_IFADDR, req_nlh, GFP_KERNEL); - return; -out: - kfree_skb(skb); - rtnl_set_sk_err(net, RTNLGRP_MCTP_IFADDR, rc); -} - static const struct nla_policy ifa_mctp_policy[IFA_MAX + 1] = { [IFA_ADDRESS] = { .type = NLA_U8 }, [IFA_LOCAL] = { .type = NLA_U8 }, @@ -229,7 +197,6 @@ static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, kfree(tmp_addrs); - mctp_addr_notify(mdev, addr->s_addr, RTM_NEWADDR, skb, nlh); mctp_route_add_local(mdev, addr->s_addr); return 0; @@ -285,42 +252,9 @@ static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, mdev->num_addrs--; spin_unlock_irqrestore(&mdev->addrs_lock, flags); - mctp_addr_notify(mdev, addr->s_addr, RTM_DELADDR, skb, nlh); - return 0; } -void mctp_dev_hold(struct mctp_dev *mdev) -{ - refcount_inc(&mdev->refs); -} - -void mctp_dev_put(struct mctp_dev *mdev) -{ - if (refcount_dec_and_test(&mdev->refs)) { - dev_put(mdev->dev); - kfree_rcu(mdev, rcu); - } -} - -void mctp_dev_release_key(struct mctp_dev *dev, struct mctp_sk_key *key) - __must_hold(&key->lock) -{ - if (!dev) - return; - if (dev->ops && dev->ops->release_flow) - dev->ops->release_flow(dev, key); - key->dev = NULL; - mctp_dev_put(dev); -} - -void mctp_dev_set_key(struct mctp_dev *dev, struct mctp_sk_key *key) - __must_hold(&key->lock) -{ - mctp_dev_hold(dev); - key->dev = dev; -} - static struct mctp_dev *mctp_add_dev(struct net_device *dev) { struct mctp_dev *mdev; @@ -336,9 +270,7 @@ static struct mctp_dev *mctp_add_dev(struct net_device *dev) mdev->net = mctp_default_net(dev_net(dev)); /* associate to net_device */ - refcount_set(&mdev->refs, 1); rcu_assign_pointer(dev->mctp_ptr, mdev); - dev_hold(dev); mdev->dev = dev; @@ -398,26 +330,12 @@ static int mctp_set_link_af(struct net_device *dev, const struct nlattr *attr, return 0; } -/* Matches netdev types that should have MCTP handling */ -static bool mctp_known(struct net_device *dev) -{ - /* only register specific types (inc. NONE for TUN devices) */ - return dev->type == ARPHRD_MCTP || - dev->type == ARPHRD_LOOPBACK || - dev->type == ARPHRD_NONE; -} - static void mctp_unregister(struct net_device *dev) { struct mctp_dev *mdev; mdev = mctp_dev_get_rtnl(dev); - if (mctp_known(dev) != (bool)mdev) { - // Sanity check, should match what was set in mctp_register - netdev_warn(dev, "%s: mdev pointer %d but type (%d) match is %d", - __func__, (bool)mdev, mctp_known(dev), dev->type); - return; - } + if (!mdev) return; @@ -427,7 +345,7 @@ static void mctp_unregister(struct net_device *dev) mctp_neigh_remove_dev(mdev); kfree(mdev->addrs); - mctp_dev_put(mdev); + mctp_dev_destroy(mdev); } static int mctp_register(struct net_device *dev) @@ -435,17 +353,11 @@ static int mctp_register(struct net_device *dev) struct mctp_dev *mdev; /* Already registered? */ - mdev = rtnl_dereference(dev->mctp_ptr); - - if (mdev) { - if (!mctp_known(dev)) - netdev_warn(dev, "%s: mctp_dev set for unknown type %d", - __func__, dev->type); + if (rtnl_dereference(dev->mctp_ptr)) return 0; - } - /* only register specific types */ - if (!mctp_known(dev)) + /* only register specific types; MCTP-specific and loopback for now */ + if (dev->type != ARPHRD_MCTP && dev->type != ARPHRD_LOOPBACK) return 0; mdev = mctp_add_dev(dev); @@ -475,39 +387,6 @@ static int mctp_dev_notify(struct notifier_block *this, unsigned long event, return NOTIFY_OK; } -static int mctp_register_netdevice(struct net_device *dev, - const struct mctp_netdev_ops *ops) -{ - struct mctp_dev *mdev; - - mdev = mctp_add_dev(dev); - if (IS_ERR(mdev)) - return PTR_ERR(mdev); - - mdev->ops = ops; - - return register_netdevice(dev); -} - -int mctp_register_netdev(struct net_device *dev, - const struct mctp_netdev_ops *ops) -{ - int rc; - - rtnl_lock(); - rc = mctp_register_netdevice(dev, ops); - rtnl_unlock(); - - return rc; -} -EXPORT_SYMBOL_GPL(mctp_register_netdev); - -void mctp_unregister_netdev(struct net_device *dev) -{ - unregister_netdev(dev); -} -EXPORT_SYMBOL_GPL(mctp_unregister_netdev); - static struct rtnl_af_ops mctp_af_ops = { .family = AF_MCTP, .fill_link_af = mctp_fill_link_af, diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c index 6ad3e33bd4..90ed2f02d1 100644 --- a/net/mctp/neigh.c +++ b/net/mctp/neigh.c @@ -47,7 +47,7 @@ static int mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid, } INIT_LIST_HEAD(&neigh->list); neigh->dev = mdev; - mctp_dev_hold(neigh->dev); + dev_hold(neigh->dev->dev); neigh->eid = eid; neigh->source = source; memcpy(neigh->ha, lladdr, lladdr_len); @@ -63,7 +63,7 @@ static void __mctp_neigh_free(struct rcu_head *rcu) { struct mctp_neigh *neigh = container_of(rcu, struct mctp_neigh, rcu); - mctp_dev_put(neigh->dev); + dev_put(neigh->dev->dev); kfree(neigh); } @@ -85,8 +85,8 @@ void mctp_neigh_remove_dev(struct mctp_dev *mdev) mutex_unlock(&net->mctp.neigh_lock); } -static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid, - enum mctp_neigh_source source) +// TODO: add a "source" flag so netlink can only delete static neighbours? +static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid) { struct net *net = dev_net(mdev->dev); struct mctp_neigh *neigh, *tmp; @@ -94,8 +94,7 @@ static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid, mutex_lock(&net->mctp.neigh_lock); list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) { - if (neigh->dev == mdev && neigh->eid == eid && - neigh->source == source) { + if (neigh->dev == mdev && neigh->eid == eid) { list_del_rcu(&neigh->list); /* TODO: immediate RTM_DELNEIGH */ call_rcu(&neigh->rcu, __mctp_neigh_free); @@ -203,7 +202,7 @@ static int mctp_rtm_delneigh(struct sk_buff *skb, struct nlmsghdr *nlh, if (!mdev) return -ENODEV; - return mctp_neigh_remove(mdev, eid, MCTP_NEIGH_STATIC); + return mctp_neigh_remove(mdev, eid); } static int mctp_fill_neigh(struct sk_buff *skb, u32 portid, u32 seq, int event, diff --git a/net/mctp/route.c b/net/mctp/route.c index e52cef7505..fb1bf4ec85 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -11,7 +11,6 @@ */ #include -#include #include #include #include @@ -24,12 +23,7 @@ #include #include -#include - static const unsigned int mctp_message_maxlen = 64 * 1024; -static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ; - -static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev); /* route output callbacks */ static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) @@ -89,43 +83,25 @@ static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, return true; } -/* returns a key (with key->lock held, and refcounted), or NULL if no such - * key exists. - */ static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, - mctp_eid_t peer, - unsigned long *irqflags) - __acquires(&key->lock) + mctp_eid_t peer) { struct mctp_sk_key *key, *ret; - unsigned long flags; struct mctp_hdr *mh; u8 tag; + WARN_ON(!rcu_read_lock_held()); + mh = mctp_hdr(skb); tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); ret = NULL; - spin_lock_irqsave(&net->mctp.keys_lock, flags); - hlist_for_each_entry(key, &net->mctp.keys, hlist) { - if (!mctp_key_match(key, mh->dest, peer, tag)) - continue; - - spin_lock(&key->lock); - if (key->valid) { - refcount_inc(&key->refs); + hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) { + if (mctp_key_match(key, mh->dest, peer, tag)) { ret = key; break; } - spin_unlock(&key->lock); - } - - if (ret) { - spin_unlock(&net->mctp.keys_lock); - *irqflags = flags; - } else { - spin_unlock_irqrestore(&net->mctp.keys_lock, flags); } return ret; @@ -145,30 +121,11 @@ static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, key->local_addr = local; key->tag = tag; key->sk = &msk->sk; - key->valid = true; - spin_lock_init(&key->lock); - refcount_set(&key->refs, 1); + spin_lock_init(&key->reasm_lock); return key; } -void mctp_key_unref(struct mctp_sk_key *key) -{ - unsigned long flags; - - if (!refcount_dec_and_test(&key->refs)) - return; - - /* even though no refs exist here, the lock allows us to stay - * consistent with the locking requirement of mctp_dev_release_key - */ - spin_lock_irqsave(&key->lock, flags); - mctp_dev_release_key(key->dev, key); - spin_unlock_irqrestore(&key->lock, flags); - - kfree(key); -} - static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) { struct net *net = sock_net(&msk->sk); @@ -181,20 +138,12 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { if (mctp_key_match(tmp, key->local_addr, key->peer_addr, key->tag)) { - spin_lock(&tmp->lock); - if (tmp->valid) - rc = -EEXIST; - spin_unlock(&tmp->lock); - if (rc) - break; + rc = -EEXIST; + break; } } if (!rc) { - refcount_inc(&key->refs); - key->expiry = jiffies + mctp_key_lifetime; - timer_reduce(&msk->key_expiry, key->expiry); - hlist_add_head(&key->hlist, &net->mctp.keys); hlist_add_head(&key->sklist, &msk->keys); } @@ -204,70 +153,30 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) return rc; } -/* We're done with the key; unset valid and remove from lists. There may still - * be outstanding refs on the key though... +/* Must be called with key->reasm_lock, which it will release. Will schedule + * the key for an RCU free. */ static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net, unsigned long flags) - __releases(&key->lock) + __releases(&key->reasm_lock) { struct sk_buff *skb; skb = key->reasm_head; key->reasm_head = NULL; key->reasm_dead = true; - key->valid = false; - mctp_dev_release_key(key->dev, key); - spin_unlock_irqrestore(&key->lock, flags); + spin_unlock_irqrestore(&key->reasm_lock, flags); spin_lock_irqsave(&net->mctp.keys_lock, flags); - hlist_del(&key->hlist); - hlist_del(&key->sklist); + hlist_del_rcu(&key->hlist); + hlist_del_rcu(&key->sklist); spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + kfree_rcu(key, rcu); - /* one unref for the lists */ - mctp_key_unref(key); - - /* and one for the local reference */ - mctp_key_unref(key); - - kfree_skb(skb); + if (skb) + kfree_skb(skb); } -#ifdef CONFIG_MCTP_FLOWS -static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) -{ - struct mctp_flow *flow; - - flow = skb_ext_add(skb, SKB_EXT_MCTP); - if (!flow) - return; - - refcount_inc(&key->refs); - flow->key = key; -} - -static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) -{ - struct mctp_sk_key *key; - struct mctp_flow *flow; - - flow = skb_ext_find(skb, SKB_EXT_MCTP); - if (!flow) - return; - - key = flow->key; - - if (WARN_ON(key->dev && key->dev != dev)) - return; - - mctp_dev_set_key(dev, key); -} -#else -static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {} -static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {} -#endif - static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) { struct mctp_hdr *hdr = mctp_hdr(skb); @@ -339,10 +248,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) rcu_read_lock(); - /* lookup socket / reasm context, exactly matching (src,dest,tag). - * we hold a ref on the key, and key->lock held. - */ - key = mctp_lookup_key(net, skb, mh->src, &f); + /* lookup socket / reasm context, exactly matching (src,dest,tag) */ + key = mctp_lookup_key(net, skb, mh->src); if (flags & MCTP_HDR_FLAG_SOM) { if (key) { @@ -353,12 +260,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * key for reassembly - we'll create a more specific * one for future packets if required (ie, !EOM). */ - key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f); + key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY); if (key) { msk = container_of(key->sk, struct mctp_sock, sk); - spin_unlock_irqrestore(&key->lock, f); - mctp_key_unref(key); key = NULL; } } @@ -377,13 +282,11 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) if (flags & MCTP_HDR_FLAG_EOM) { sock_queue_rcv_skb(&msk->sk, skb); if (key) { + spin_lock_irqsave(&key->reasm_lock, f); /* we've hit a pending reassembly; not much we * can do but drop it */ - trace_mctp_key_release(key, - MCTP_TRACE_KEY_REPLIED); __mctp_key_unlock_drop(key, net, f); - key = NULL; } rc = 0; goto out_unlock; @@ -400,7 +303,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) goto out_unlock; } - /* we can queue without the key lock here, as the + /* we can queue without the reasm lock here, as the * key isn't observable yet */ mctp_frag_queue(key, skb); @@ -412,26 +315,20 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * this function. */ rc = mctp_key_add(key, msk); - if (rc) { + if (rc) kfree(key); - } else { - trace_mctp_key_acquire(key); - - /* we don't need to release key->lock on exit */ - mctp_key_unref(key); - } - key = NULL; } else { + /* existing key: start reassembly */ + spin_lock_irqsave(&key->reasm_lock, f); + if (key->reasm_head || key->reasm_dead) { /* duplicate start? drop everything */ - trace_mctp_key_release(key, - MCTP_TRACE_KEY_INVALIDATED); __mctp_key_unlock_drop(key, net, f); rc = -EEXIST; - key = NULL; } else { rc = mctp_frag_queue(key, skb); + spin_unlock_irqrestore(&key->reasm_lock, f); } } @@ -440,6 +337,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * using the message-specific key */ + spin_lock_irqsave(&key->reasm_lock, f); + /* we need to be continuing an existing reassembly... */ if (!key->reasm_head) rc = -EINVAL; @@ -452,9 +351,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) if (!rc && flags & MCTP_HDR_FLAG_EOM) { sock_queue_rcv_skb(key->sk, key->reasm_head); key->reasm_head = NULL; - trace_mctp_key_release(key, MCTP_TRACE_KEY_REPLIED); __mctp_key_unlock_drop(key, net, f); - key = NULL; + } else { + spin_unlock_irqrestore(&key->reasm_lock, f); } } else { @@ -464,10 +363,6 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) out_unlock: rcu_read_unlock(); - if (key) { - spin_unlock_irqrestore(&key->lock, f); - mctp_key_unref(key); - } out: if (rc) kfree_skb(skb); @@ -481,7 +376,6 @@ static unsigned int mctp_route_mtu(struct mctp_route *rt) static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) { - struct mctp_skb_cb *cb = mctp_cb(skb); struct mctp_hdr *hdr = mctp_hdr(skb); char daddr_buf[MAX_ADDR_LEN]; char *daddr = NULL; @@ -496,14 +390,9 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) return -EMSGSIZE; } - if (cb->ifindex) { - /* direct route; use the hwaddr we stashed in sendmsg */ - daddr = cb->haddr; - } else { - /* If lookup fails let the device handle daddr==NULL */ - if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) - daddr = daddr_buf; - } + /* If lookup fails let the device handle daddr==NULL */ + if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) + daddr = daddr_buf; rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), daddr, skb->dev->dev_addr, skb->len); @@ -512,8 +401,6 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) return -EHOSTUNREACH; } - mctp_flow_prepare_output(skb, route->dev); - rc = dev_queue_xmit(skb); if (rc) rc = net_xmit_errno(rc); @@ -525,7 +412,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) static void mctp_route_release(struct mctp_route *rt) { if (refcount_dec_and_test(&rt->refs)) { - mctp_dev_put(rt->dev); + dev_put(rt->dev->dev); kfree_rcu(rt, rcu); } } @@ -567,38 +454,30 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, lockdep_assert_held(&mns->keys_lock); - key->expiry = jiffies + mctp_key_lifetime; - timer_reduce(&msk->key_expiry, key->expiry); - /* we hold the net->key_lock here, allowing updates to both * then net and sk */ hlist_add_head_rcu(&key->hlist, &mns->keys); hlist_add_head_rcu(&key->sklist, &msk->keys); - refcount_inc(&key->refs); } /* Allocate a locally-owned tag value for (saddr, daddr), and reserve * it for the socket msk */ -static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, - mctp_eid_t saddr, - mctp_eid_t daddr, u8 *tagp) +static int mctp_alloc_local_tag(struct mctp_sock *msk, + mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp) { struct net *net = sock_net(&msk->sk); struct netns_mctp *mns = &net->mctp; struct mctp_sk_key *key, *tmp; unsigned long flags; + int rc = -EAGAIN; u8 tagbits; - /* for NULL destination EIDs, we may get a response from any peer */ - if (daddr == MCTP_ADDR_NULL) - daddr = MCTP_ADDR_ANY; - /* be optimistic, alloc now */ key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL); if (!key) - return ERR_PTR(-ENOMEM); + return -ENOMEM; /* 8 possible tag values */ tagbits = 0xff; @@ -609,26 +488,14 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, * tags. If we find a conflict, clear that bit from tagbits */ hlist_for_each_entry(tmp, &mns->keys, hlist) { - /* We can check the lookup fields (*_addr, tag) without the - * lock held, they don't change over the lifetime of the key. - */ - /* if we don't own the tag, it can't conflict */ if (tmp->tag & MCTP_HDR_FLAG_TO) continue; - if (!((tmp->peer_addr == daddr || - tmp->peer_addr == MCTP_ADDR_ANY) && - tmp->local_addr == saddr)) - continue; - - spin_lock(&tmp->lock); - /* key must still be valid. If we find a match, clear the - * potential tag value - */ - if (tmp->valid) + if ((tmp->peer_addr == daddr || + tmp->peer_addr == MCTP_ADDR_ANY) && + tmp->local_addr == saddr) tagbits &= ~(1 << tmp->tag); - spin_unlock(&tmp->lock); if (!tagbits) break; @@ -637,19 +504,16 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, if (tagbits) { key->tag = __ffs(tagbits); mctp_reserve_tag(net, key, msk); - trace_mctp_key_acquire(key); - *tagp = key->tag; + rc = 0; } spin_unlock_irqrestore(&mns->keys_lock, flags); - if (!tagbits) { + if (!tagbits) kfree(key); - return ERR_PTR(-EBUSY); - } - return key; + return rc; } /* routing lookups */ @@ -688,18 +552,14 @@ struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, return rt; } -static struct mctp_route *mctp_route_lookup_null(struct net *net, - struct net_device *dev) +/* sends a skb to rt and releases the route. */ +int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb) { - struct mctp_route *rt; + int rc; - list_for_each_entry_rcu(rt, &net->mctp.routes, list) { - if (rt->dev->dev == dev && rt->type == RTN_LOCAL && - refcount_inc_not_zero(&rt->refs)) - return rt; - } - - return NULL; + rc = rt->output(rt, skb); + mctp_route_release(rt); + return rc; } static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, @@ -768,7 +628,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, /* copy message payload */ skb_copy_bits(skb, pos, skb_transport_header(skb2), size); - /* do route */ + /* do route, but don't drop the rt reference */ rc = rt->output(rt, skb2); if (rc) break; @@ -777,6 +637,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, pos += size; } + mctp_route_release(rt); consume_skb(skb); return rc; } @@ -786,52 +647,15 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, { struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_skb_cb *cb = mctp_cb(skb); - struct mctp_route tmp_rt; - struct mctp_sk_key *key; - struct net_device *dev; struct mctp_hdr *hdr; unsigned long flags; unsigned int mtu; mctp_eid_t saddr; - bool ext_rt; int rc; u8 tag; - rc = -ENODEV; - - if (rt) { - ext_rt = false; - dev = NULL; - - if (WARN_ON(!rt->dev)) - goto out_release; - - } else if (cb->ifindex) { - ext_rt = true; - rt = &tmp_rt; - - rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); - if (!dev) { - rcu_read_unlock(); - return rc; - } - - rt->dev = __mctp_dev_get(dev); - rcu_read_unlock(); - - if (!rt->dev) - goto out_release; - - /* establish temporary route - we set up enough to keep - * mctp_route_output happy - */ - rt->output = mctp_route_output; - rt->mtu = 0; - - } else { + if (WARN_ON(!rt->dev)) return -EINVAL; - } spin_lock_irqsave(&rt->dev->addrs_lock, flags); if (rt->dev->num_addrs == 0) { @@ -844,23 +668,18 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); if (rc) - goto out_release; + return rc; if (req_tag & MCTP_HDR_FLAG_TO) { - key = mctp_alloc_local_tag(msk, saddr, daddr, &tag); - if (IS_ERR(key)) { - rc = PTR_ERR(key); - goto out_release; - } - mctp_skb_set_flow(skb, key); - /* done with the key in this scope */ - mctp_key_unref(key); + rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag); + if (rc) + return rc; tag |= MCTP_HDR_FLAG_TO; } else { - key = NULL; tag = req_tag; } + skb->protocol = htons(ETH_P_MCTP); skb->priority = 0; skb_reset_transport_header(skb); @@ -880,21 +699,12 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, mtu = mctp_route_mtu(rt); if (skb->len + sizeof(struct mctp_hdr) <= mtu) { - hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | - MCTP_HDR_FLAG_EOM | tag; - rc = rt->output(rt, skb); + hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | + tag; + return mctp_do_route(rt, skb); } else { - rc = mctp_do_fragment_route(rt, skb, mtu, tag); + return mctp_do_fragment_route(rt, skb, mtu, tag); } - -out_release: - if (!ext_rt) - mctp_route_release(rt); - - dev_put(dev); - - return rc; - } /* route management */ @@ -931,7 +741,7 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, rt->max = daddr_start + daddr_extent; rt->mtu = mtu; rt->dev = mdev; - mctp_dev_hold(rt->dev); + dev_hold(rt->dev->dev); rt->type = type; rt->output = rtfn; @@ -1012,18 +822,13 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, struct net_device *orig_dev) { struct net *net = dev_net(dev); - struct mctp_dev *mdev; struct mctp_skb_cb *cb; struct mctp_route *rt; struct mctp_hdr *mh; - rcu_read_lock(); - mdev = __mctp_dev_get(dev); - rcu_read_unlock(); - if (!mdev) { - /* basic non-data sanity checks */ + /* basic non-data sanity checks */ + if (dev->type != ARPHRD_MCTP) goto err_drop; - } if (!pskb_may_pull(skb, sizeof(struct mctp_hdr))) goto err_drop; @@ -1036,27 +841,16 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) goto err_drop; - /* MCTP drivers must populate halen/haddr */ - if (dev->type == ARPHRD_MCTP) { - cb = mctp_cb(skb); - } else { - cb = __mctp_cb(skb); - cb->halen = 0; - } - cb->net = READ_ONCE(mdev->net); - cb->ifindex = dev->ifindex; + cb = __mctp_cb(skb); + rcu_read_lock(); + cb->net = READ_ONCE(__mctp_dev_get(dev)->net); + rcu_read_unlock(); rt = mctp_route_lookup(net, cb->net, mh->dest); - - /* NULL EID, but addressed to our physical address */ - if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) - rt = mctp_route_lookup_null(net, dev); - if (!rt) goto err_drop; - rt->output(rt, skb); - mctp_route_release(rt); + mctp_do_route(rt, skb); return NET_RX_SUCCESS; @@ -1133,15 +927,10 @@ static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } -static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = { - [RTAX_MTU] = { .type = NLA_U32 }, -}; - static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RTA_MAX + 1]; - struct nlattr *tbx[RTAX_MAX + 1]; mctp_eid_t daddr_start; struct mctp_dev *mdev; struct rtmsg *rtm; @@ -1158,15 +947,8 @@ static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } + /* TODO: parse mtu from nlparse */ mtu = 0; - if (tb[RTA_METRICS]) { - rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS], - rta_metrics_policy, NULL); - if (rc < 0) - return rc; - if (tbx[RTAX_MTU]) - mtu = nla_get_u32(tbx[RTAX_MTU]); - } if (rtm->rtm_type != RTN_UNICAST) return -EINVAL; @@ -1335,7 +1117,3 @@ void __exit mctp_routes_exit(void) rtnl_unregister(PF_MCTP, RTM_GETROUTE); dev_remove_pack(&mctp_packet_type); } - -#if IS_ENABLED(CONFIG_MCTP_TEST) -#include "test/route-test.c" -#endif diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index d6fdc5782d..6e587feb70 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -230,8 +230,8 @@ static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index) * Since those fields can change at any moment, use READ_ONCE to * access both. */ -static const struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, - struct sk_buff *skb) +static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, + struct sk_buff *skb) { u32 hash = 0; int nh_index = 0; @@ -343,8 +343,8 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, { struct net *net = dev_net(dev); struct mpls_shim_hdr *hdr; - const struct mpls_nh *nh; struct mpls_route *rt; + struct mpls_nh *nh; struct mpls_entry_decoded dec; struct net_device *out_dev; struct mpls_dev *out_mdev; @@ -409,7 +409,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, goto err; /* Find the output device */ - out_dev = nh->nh_dev; + out_dev = rcu_dereference(nh->nh_dev); if (!mpls_output_possible(out_dev)) goto tx_err; @@ -698,7 +698,7 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, (dev->addr_len != nh->nh_via_alen)) goto errout; - nh->nh_dev = dev; + RCU_INIT_POINTER(nh->nh_dev, dev); if (!(dev->flags & IFF_UP)) { nh->nh_flags |= RTNH_F_DEAD; @@ -1510,9 +1510,12 @@ static int mpls_ifdown(struct net_device *dev, int event) u8 deleted = 0; for_nexthops(rt) { - if (!nh->nh_dev || nh->nh_dev == dev) + struct net_device *nh_dev = + rtnl_dereference(nh->nh_dev); + + if (!nh_dev || nh_dev == dev) deleted++; - if (nh->nh_dev == dev) + if (nh_dev == dev) nh_del = true; } endfor_nexthops(rt); @@ -1537,7 +1540,7 @@ static int mpls_ifdown(struct net_device *dev, int event) change_nexthops(rt) { unsigned int nh_flags = nh->nh_flags; - if (nh->nh_dev != dev) + if (rtnl_dereference(nh->nh_dev) != dev) goto next; switch (event) { @@ -1550,7 +1553,7 @@ static int mpls_ifdown(struct net_device *dev, int event) break; } if (event == NETDEV_UNREGISTER) - nh->nh_dev = NULL; + RCU_INIT_POINTER(nh->nh_dev, NULL); if (nh->nh_flags != nh_flags) WRITE_ONCE(nh->nh_flags, nh_flags); @@ -1585,12 +1588,14 @@ static void mpls_ifup(struct net_device *dev, unsigned int flags) alive = 0; change_nexthops(rt) { unsigned int nh_flags = nh->nh_flags; + struct net_device *nh_dev = + rtnl_dereference(nh->nh_dev); if (!(nh_flags & flags)) { alive++; continue; } - if (nh->nh_dev != dev) + if (nh_dev != dev) continue; alive++; nh_flags &= ~flags; @@ -1607,7 +1612,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpls_dev *mdev; unsigned int flags; - int err; if (event == NETDEV_REGISTER) { mdev = mpls_add_dev(dev); @@ -1622,6 +1626,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, return NOTIFY_OK; switch (event) { + int err; case NETDEV_DOWN: err = mpls_ifdown(dev, event); @@ -2025,7 +2030,7 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), nh->nh_via_alen)) goto nla_put_failure; - dev = nh->nh_dev; + dev = rtnl_dereference(nh->nh_dev); if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; if (nh->nh_flags & RTNH_F_LINKDOWN) @@ -2043,7 +2048,7 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; for_nexthops(rt) { - dev = nh->nh_dev; + dev = rtnl_dereference(nh->nh_dev); if (!dev) continue; @@ -2154,14 +2159,18 @@ static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, static bool mpls_rt_uses_dev(struct mpls_route *rt, const struct net_device *dev) { + struct net_device *nh_dev; + if (rt->rt_nhn == 1) { struct mpls_nh *nh = rt->rt_nh; - if (nh->nh_dev == dev) + nh_dev = rtnl_dereference(nh->nh_dev); + if (dev == nh_dev) return true; } else { for_nexthops(rt) { - if (nh->nh_dev == dev) + nh_dev = rtnl_dereference(nh->nh_dev); + if (nh_dev == dev) return true; } endfor_nexthops(rt); } @@ -2249,7 +2258,7 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) size_t nhsize = 0; for_nexthops(rt) { - if (!nh->nh_dev) + if (!rtnl_dereference(nh->nh_dev)) continue; nhsize += nla_total_size(sizeof(struct rtnexthop)); /* RTA_VIA */ @@ -2360,12 +2369,12 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, u32 labels[MAX_NEW_LABELS]; struct mpls_shim_hdr *hdr; unsigned int hdr_size = 0; - const struct mpls_nh *nh; struct net_device *dev; struct mpls_route *rt; struct rtmsg *rtm, *r; struct nlmsghdr *nlh; struct sk_buff *skb; + struct mpls_nh *nh; u8 n_labels; int err; @@ -2495,7 +2504,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), nh->nh_via_alen)) goto nla_put_failure; - dev = nh->nh_dev; + dev = rtnl_dereference(nh->nh_dev); if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; @@ -2534,7 +2543,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) rt0 = mpls_rt_alloc(1, lo->addr_len, 0); if (IS_ERR(rt0)) goto nort0; - rt0->rt_nh->nh_dev = lo; + RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); rt0->rt_protocol = RTPROT_KERNEL; rt0->rt_payload_type = MPT_IPV4; rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; @@ -2548,7 +2557,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) rt2 = mpls_rt_alloc(1, lo->addr_len, 0); if (IS_ERR(rt2)) goto nort2; - rt2->rt_nh->nh_dev = lo; + RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); rt2->rt_protocol = RTPROT_KERNEL; rt2->rt_payload_type = MPT_IPV6; rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; diff --git a/net/mpls/internal.h b/net/mpls/internal.h index b9f492ddf9..838cdfc10e 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -87,7 +87,7 @@ enum mpls_payload_type { }; struct mpls_nh { /* next hop label forwarding entry */ - struct net_device *nh_dev; + struct net_device __rcu *nh_dev; /* nh_flags is accessed under RCU in the packet path; it is * modified handling netdev events with rtnl lock held @@ -158,16 +158,17 @@ struct mpls_route { /* next hop label forwarding entry */ }; #define for_nexthops(rt) { \ - int nhsel; const struct mpls_nh *nh; \ - for (nhsel = 0, nh = (rt)->rt_nh; \ + int nhsel; struct mpls_nh *nh; u8 *__nh; \ + for (nhsel = 0, nh = (rt)->rt_nh, __nh = (u8 *)((rt)->rt_nh); \ nhsel < (rt)->rt_nhn; \ - nh = (void *)nh + (rt)->rt_nh_size, nhsel++) + __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++) #define change_nexthops(rt) { \ - int nhsel; struct mpls_nh *nh; \ - for (nhsel = 0, nh = (rt)->rt_nh; \ + int nhsel; struct mpls_nh *nh; u8 *__nh; \ + for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh), \ + __nh = (u8 *)((rt)->rt_nh); \ nhsel < (rt)->rt_nhn; \ - nh = (void *)nh + (rt)->rt_nh_size, nhsel++) + __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++) #define endfor_nexthops(rt) } diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 7558802a14..8d1c67b935 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -74,7 +74,6 @@ bool mptcp_mib_alloc(struct net *net) void mptcp_seq_show(struct seq_file *seq) { - unsigned long sum[ARRAY_SIZE(mptcp_snmp_list) - 1]; struct net *net = seq->private; int i; @@ -84,13 +83,17 @@ void mptcp_seq_show(struct seq_file *seq) seq_puts(seq, "\nMPTcpExt:"); - memset(sum, 0, sizeof(sum)); - if (net->mib.mptcp_statistics) - snmp_get_cpu_field_batch(sum, mptcp_snmp_list, - net->mib.mptcp_statistics); + if (!net->mib.mptcp_statistics) { + for (i = 0; mptcp_snmp_list[i].name; i++) + seq_puts(seq, " 0"); + + seq_putc(seq, '\n'); + return; + } for (i = 0; mptcp_snmp_list[i].name; i++) - seq_printf(seq, " %lu", sum[i]); - + seq_printf(seq, " %lu", + snmp_fold_field(net->mib.mptcp_statistics, + mptcp_snmp_list[i].entry)); seq_putc(seq, '\n'); } diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index f44125dd66..292374fb07 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -113,13 +113,37 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_info *info = _info; + u32 flags = 0; + bool slow; + u8 val; r->idiag_rqueue = sk_rmem_alloc_get(sk); r->idiag_wqueue = sk_wmem_alloc_get(sk); if (!info) return; - mptcp_diag_fill_info(msk, info); + slow = lock_sock_fast(sk); + info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); + info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); + info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); + info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); + info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk); + val = mptcp_pm_get_add_addr_signal_max(msk); + info->mptcpi_add_addr_signal_max = val; + val = mptcp_pm_get_add_addr_accept_max(msk); + info->mptcpi_add_addr_accepted_max = val; + info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk); + if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) + flags |= MPTCP_INFO_FLAG_FALLBACK; + if (READ_ONCE(msk->can_ack)) + flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; + info->mptcpi_flags = flags; + info->mptcpi_token = READ_ONCE(msk->token); + info->mptcpi_write_seq = READ_ONCE(msk->write_seq); + info->mptcpi_snd_una = READ_ONCE(msk->snd_una); + info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); + info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); + unlock_sock_fast(sk, slow); } static const struct inet_diag_handler mptcp_diag_handler = { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 645dd984fe..e515ba9ccb 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -734,7 +734,9 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, /* can't send MP_PRIO with MPC, as they share the same option space: * 'backup'. Also it makes no sense at all */ - if (!subflow->send_mp_prio || (opts->suboptions & OPTIONS_MPTCP_MPC)) + if (!subflow->send_mp_prio || + ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | + OPTION_MPTCP_MPC_ACK) & opts->suboptions)) return false; /* account for the trailing 'nop' option */ @@ -768,28 +770,6 @@ static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_bu return true; } -static bool mptcp_established_options_fastclose(struct sock *sk, - unsigned int *size, - unsigned int remaining, - struct mptcp_out_options *opts) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - - if (likely(!subflow->send_fastclose)) - return false; - - if (remaining < TCPOLEN_MPTCP_FASTCLOSE) - return false; - - *size = TCPOLEN_MPTCP_FASTCLOSE; - opts->suboptions |= OPTION_MPTCP_FASTCLOSE; - opts->rcvr_key = msk->remote_key; - - pr_debug("FASTCLOSE key=%llu", opts->rcvr_key); - return true; -} - static bool mptcp_established_options_mp_fail(struct sock *sk, unsigned int *size, unsigned int remaining, @@ -828,12 +808,10 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, return false; if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { - if (mptcp_established_options_fastclose(sk, &opt_size, remaining, opts) || - mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { + if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; } - /* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */ if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; @@ -1236,7 +1214,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } -u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) +static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum16 sum) { struct csum_pseudo_header header; __wsum csum; @@ -1251,14 +1229,14 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) header.data_len = htons(data_len); header.csum = 0; - csum = csum_partial(&header, sizeof(header), sum); + csum = csum_partial(&header, sizeof(header), ~csum_unfold(sum)); return (__force u16)csum_fold(csum); } static u16 mptcp_make_csum(const struct mptcp_ext *mpext) { return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len, - ~csum_unfold(mpext->csum)); + mpext->csum); } void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, @@ -1278,8 +1256,17 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, ptr += 2; } - /* DSS, MPC, MPJ, ADD_ADDR, FASTCLOSE and RST are mutually exclusive, - * see mptcp_established_options*() + /* RST is mutually exclusive with everything else */ + if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) { + *ptr++ = mptcp_option(MPTCPOPT_RST, + TCPOLEN_MPTCP_RST, + opts->reset_transient, + opts->reset_reason); + return; + } + + /* DSS, MPC, MPJ and ADD_ADDR are mutually exclusive, see + * mptcp_established_options*() */ if (likely(OPTION_MPTCP_DSS & opts->suboptions)) { struct mptcp_ext *mpext = &opts->ext_copy; @@ -1336,7 +1323,8 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, } ptr += 1; } - } else if (OPTIONS_MPTCP_MPC & opts->suboptions) { + } else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | + OPTION_MPTCP_MPC_ACK) & opts->suboptions) { u8 len, flag = MPTCP_CAP_HMAC_SHA256; if (OPTION_MPTCP_MPC_SYN & opts->suboptions) { @@ -1380,7 +1368,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, __mptcp_make_csum(opts->data_seq, opts->subflow_seq, opts->data_len, - ~csum_unfold(opts->csum)), ptr); + opts->csum), ptr); } else { put_unaligned_be32(opts->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); @@ -1389,29 +1377,27 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, /* MPC is additionally mutually exclusive with MP_PRIO */ goto mp_capable_done; - } else if (OPTIONS_MPTCP_MPJ & opts->suboptions) { - if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) { - *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, - TCPOLEN_MPTCP_MPJ_SYN, - opts->backup, opts->join_id); - put_unaligned_be32(opts->token, ptr); - ptr += 1; - put_unaligned_be32(opts->nonce, ptr); - ptr += 1; - } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) { - *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, - TCPOLEN_MPTCP_MPJ_SYNACK, - opts->backup, opts->join_id); - put_unaligned_be64(opts->thmac, ptr); - ptr += 2; - put_unaligned_be32(opts->nonce, ptr); - ptr += 1; - } else { - *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, - TCPOLEN_MPTCP_MPJ_ACK, 0, 0); - memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN); - ptr += 5; - } + } else if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) { + *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, + TCPOLEN_MPTCP_MPJ_SYN, + opts->backup, opts->join_id); + put_unaligned_be32(opts->token, ptr); + ptr += 1; + put_unaligned_be32(opts->nonce, ptr); + ptr += 1; + } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) { + *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, + TCPOLEN_MPTCP_MPJ_SYNACK, + opts->backup, opts->join_id); + put_unaligned_be64(opts->thmac, ptr); + ptr += 2; + put_unaligned_be32(opts->nonce, ptr); + ptr += 1; + } else if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) { + *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, + TCPOLEN_MPTCP_MPJ_ACK, 0, 0); + memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN); + ptr += 5; } else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; u8 echo = MPTCP_ADDR_ECHO; @@ -1468,24 +1454,6 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, ptr += 1; } } - } else if (unlikely(OPTION_MPTCP_FASTCLOSE & opts->suboptions)) { - /* FASTCLOSE is mutually exclusive with others except RST */ - *ptr++ = mptcp_option(MPTCPOPT_MP_FASTCLOSE, - TCPOLEN_MPTCP_FASTCLOSE, - 0, 0); - put_unaligned_be64(opts->rcvr_key, ptr); - ptr += 2; - - if (OPTION_MPTCP_RST & opts->suboptions) - goto mp_rst; - return; - } else if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) { -mp_rst: - *ptr++ = mptcp_option(MPTCPOPT_RST, - TCPOLEN_MPTCP_RST, - opts->reset_transient, - opts->reset_reason); - return; } if (OPTION_MPTCP_PRIO & opts->suboptions) { diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 7bea318ac5..d9790d6fbc 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -172,28 +172,9 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk) spin_unlock_bh(&pm->lock); } -void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, - const struct mptcp_subflow_context *subflow) +void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id) { - struct mptcp_pm_data *pm = &msk->pm; - bool update_subflows; - - update_subflows = (ssk->sk_state == TCP_CLOSE) && - (subflow->request_join || subflow->mp_join); - if (!READ_ONCE(pm->work_pending) && !update_subflows) - return; - - spin_lock_bh(&pm->lock); - if (update_subflows) - pm->subflows--; - - /* Even if this subflow is not really established, tell the PM to try - * to pick the next ones, if possible. - */ - if (mptcp_pm_nl_check_work_pending(msk)) - mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); - - spin_unlock_bh(&pm->lock); + pr_debug("msk=%p", msk); } void mptcp_pm_add_addr_received(struct mptcp_sock *msk, @@ -379,7 +360,7 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) } } -void mptcp_pm_data_reset(struct mptcp_sock *msk) +void mptcp_pm_data_init(struct mptcp_sock *msk) { msk->pm.add_addr_signaled = 0; msk->pm.add_addr_accepted = 0; @@ -393,16 +374,11 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk) WRITE_ONCE(msk->pm.accept_subflow, false); WRITE_ONCE(msk->pm.remote_deny_join_id0, false); msk->pm.status = 0; - bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); - mptcp_pm_nl_data_init(msk); -} - -void mptcp_pm_data_init(struct mptcp_sock *msk) -{ spin_lock_init(&msk->pm.lock); INIT_LIST_HEAD(&msk->pm.anno_list); - mptcp_pm_data_reset(msk); + + mptcp_pm_nl_data_init(msk); } void __init mptcp_pm_init(void) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 4b5d795383..cf0f700f46 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -38,6 +38,9 @@ struct mptcp_pm_add_entry { u8 retrans_times; }; +#define MAX_ADDR_ID 255 +#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG) + struct pm_nl_pernet { /* protects pernet updates */ spinlock_t lock; @@ -49,14 +52,14 @@ struct pm_nl_pernet { unsigned int local_addr_max; unsigned int subflows_max; unsigned int next_id; - DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + unsigned long id_bitmap[BITMAP_SZ]; }; #define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3 static bool addresses_equal(const struct mptcp_addr_info *a, - const struct mptcp_addr_info *b, bool use_port) + struct mptcp_addr_info *b, bool use_port) { bool addr_equals = false; @@ -165,13 +168,11 @@ select_local_address(const struct pm_nl_pernet *pernet, msk_owned_by_me(msk); rcu_read_lock(); + __mptcp_flush_join_list(msk); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) continue; - if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) - continue; - if (entry->addr.family != sk->sk_family) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) if ((entry->addr.family == AF_INET && @@ -182,17 +183,23 @@ select_local_address(const struct pm_nl_pernet *pernet, continue; } - ret = entry; - break; + /* avoid any address already in use by subflows and + * pending join + */ + if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) { + ret = entry; + break; + } } rcu_read_unlock(); return ret; } static struct mptcp_pm_addr_entry * -select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk) +select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos) { struct mptcp_pm_addr_entry *entry, *ret = NULL; + int i = 0; rcu_read_lock(); /* do not keep any additional per socket state, just signal @@ -201,14 +208,12 @@ select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk) * can lead to additional addresses not being announced. */ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) - continue; - if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue; - - ret = entry; - break; + if (i++ == pos) { + ret = entry; + break; + } } rcu_read_unlock(); return ret; @@ -250,17 +255,12 @@ unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk) } EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); -bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) +static void check_work_pending(struct mptcp_sock *msk) { - struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); - - if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || - (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, - MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { + if (msk->pm.add_addr_signaled == mptcp_pm_get_add_addr_signal_max(msk) && + (msk->pm.local_addr_used == mptcp_pm_get_local_addr_max(msk) || + msk->pm.subflows == mptcp_pm_get_subflows_max(msk))) WRITE_ONCE(msk->pm.work_pending, false); - return false; - } - return true; } struct mptcp_pm_add_entry * @@ -429,7 +429,6 @@ static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh, struct mptcp_addr_info *addrs) { - bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); struct sock *sk = (struct sock *)msk, *ssk; struct mptcp_subflow_context *subflow; struct mptcp_addr_info remote = { 0 }; @@ -437,28 +436,22 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm int i = 0; subflows_max = mptcp_pm_get_subflows_max(msk); - remote_address((struct sock_common *)sk, &remote); /* Non-fullmesh endpoint, fill in the single entry * corresponding to the primary MPC subflow remote address */ if (!fullmesh) { - if (deny_id0) - return 0; - + remote_address((struct sock_common *)sk, &remote); msk->pm.subflows++; addrs[i++] = remote; } else { mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); - remote_address((struct sock_common *)ssk, &addrs[i]); - if (deny_id0 && addresses_equal(&addrs[i], &remote, false)) - continue; - - if (!lookup_address_in_vec(addrs, i, &addrs[i]) && + remote_address((struct sock_common *)ssk, &remote); + if (!lookup_address_in_vec(addrs, i, &remote) && msk->pm.subflows < subflows_max) { msk->pm.subflows++; - i++; + addrs[i++] = remote; } } } @@ -467,48 +460,17 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm } static struct mptcp_pm_addr_entry * -__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) +__lookup_addr(struct pm_nl_pernet *pernet, struct mptcp_addr_info *info) { struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, &pernet->local_addr_list, list) { - if (entry->addr.id == id) + if (addresses_equal(&entry->addr, info, true)) return entry; } return NULL; } -static struct mptcp_pm_addr_entry * -__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, - bool lookup_by_id) -{ - struct mptcp_pm_addr_entry *entry; - - list_for_each_entry(entry, &pernet->local_addr_list, list) { - if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) || - (lookup_by_id && entry->addr.id == info->id)) - return entry; - } - return NULL; -} - -static int -lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr) -{ - struct mptcp_pm_addr_entry *entry; - int ret = -1; - - rcu_read_lock(); - list_for_each_entry(entry, &pernet->local_addr_list, list) { - if (addresses_equal(&entry->addr, addr, entry->addr.port)) { - ret = entry->addr.id; - break; - } - } - rcu_read_unlock(); - return ret; -} - static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; @@ -524,19 +486,6 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) local_addr_max = mptcp_pm_get_local_addr_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk); - /* do lazy endpoint usage accounting for the MPC subflows */ - if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { - struct mptcp_addr_info mpc_addr; - int mpc_id; - - local_address((struct sock_common *)msk->first, &mpc_addr); - mpc_id = lookup_id_by_addr(pernet, &mpc_addr); - if (mpc_id >= 0) - __clear_bit(mpc_id, msk->pm.id_avail_bitmap); - - msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); - } - pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", msk->pm.local_addr_used, local_addr_max, msk->pm.add_addr_signaled, add_addr_signal_max, @@ -544,51 +493,47 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check first for announce */ if (msk->pm.add_addr_signaled < add_addr_signal_max) { - local = select_signal_address(pernet, msk); - - /* due to racing events on both ends we can reach here while - * previous add address is still running: if we invoke now - * mptcp_pm_announce_addr(), that will fail and the - * corresponding id will be marked as used. - * Instead let the PM machinery reschedule us when the - * current address announce will be completed. - */ - if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) - return; + local = select_signal_address(pernet, + msk->pm.add_addr_signaled); if (local) { if (mptcp_pm_alloc_anno_list(msk, local)) { - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &local->addr, false); mptcp_pm_nl_addr_send_ack(msk); } + } else { + /* pick failed, avoid fourther attempts later */ + msk->pm.local_addr_used = add_addr_signal_max; } + + check_work_pending(msk); } /* check if should create a new subflow */ - while (msk->pm.local_addr_used < local_addr_max && - msk->pm.subflows < subflows_max) { - struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; - bool fullmesh; - int i, nr; - + if (msk->pm.local_addr_used < local_addr_max && + msk->pm.subflows < subflows_max && + !READ_ONCE(msk->pm.remote_deny_join_id0)) { local = select_local_address(pernet, msk); - if (!local) - break; + if (local) { + bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; + int i, nr; - fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + msk->pm.local_addr_used++; + check_work_pending(msk); + nr = fill_remote_addresses_vec(msk, fullmesh, addrs); + spin_unlock_bh(&msk->pm.lock); + for (i = 0; i < nr; i++) + __mptcp_subflow_connect(sk, &local->addr, &addrs[i]); + spin_lock_bh(&msk->pm.lock); + return; + } - msk->pm.local_addr_used++; - nr = fill_remote_addresses_vec(msk, fullmesh, addrs); - if (nr) - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); - spin_unlock_bh(&msk->pm.lock); - for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &local->addr, &addrs[i]); - spin_lock_bh(&msk->pm.lock); + /* lookup failed, avoid fourther attempts later */ + msk->pm.local_addr_used = local_addr_max; + check_work_pending(msk); } - mptcp_pm_nl_check_work_pending(msk); } static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) @@ -618,6 +563,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, subflows_max = mptcp_pm_get_subflows_max(msk); rcu_read_lock(); + __mptcp_flush_join_list(msk); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) continue; @@ -715,6 +661,7 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) !mptcp_pm_should_rm_signal(msk)) return; + __mptcp_flush_join_list(msk); subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); if (subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -728,9 +675,9 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) } } -static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - u8 bkup) +int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + u8 bkup) { struct mptcp_subflow_context *subflow; @@ -802,15 +749,13 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, i, rm_list->ids[i], subflow->local_id, subflow->remote_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); - - /* the following takes care of updating the subflows counter */ mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); removed = true; + msk->pm.subflows--; __MPTCP_INC_STATS(sock_net(sk), rm_type); } - __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); if (!removed) continue; @@ -840,9 +785,6 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) msk_owned_by_me(msk); - if (!(pm->status & MPTCP_PM_WORK_MASK)) - return; - spin_lock_bh(&msk->pm.lock); pr_debug("msk=%p status=%x", msk, pm->status); @@ -888,7 +830,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, /* to keep the code simple, don't do IDR-like allocation for address ID, * just bail when we exceed limits */ - if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) + if (pernet->next_id == MAX_ADDR_ID) pernet->next_id = 1; if (pernet->addrs >= MPTCP_PM_ADDR_MAX) goto out; @@ -908,15 +850,16 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, if (!entry->addr.id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap, - MPTCP_PM_MAX_ADDR_ID + 1, + MAX_ADDR_ID + 1, pernet->next_id); - if (!entry->addr.id && pernet->next_id != 1) { + if ((!entry->addr.id || entry->addr.id > MAX_ADDR_ID) && + pernet->next_id != 1) { pernet->next_id = 1; goto find_next; } } - if (!entry->addr.id) + if (!entry->addr.id || entry->addr.id > MAX_ADDR_ID) goto out; __set_bit(entry->addr.id, pernet->id_bitmap); @@ -1279,6 +1222,18 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) return 0; } +static struct mptcp_pm_addr_entry * +__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, &pernet->local_addr_list, list) { + if (entry->addr.id == id) + return entry; + } + return NULL; +} + int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, u8 *flags, int *ifindex) { @@ -1537,7 +1492,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) list_splice_init(&pernet->local_addr_list, &free_list); __reset_counters(pernet); pernet->next_id = 1; - bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1); spin_unlock_bh(&pernet->lock); mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list); synchronize_rcu(); @@ -1647,7 +1602,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg, pernet = net_generic(net, pm_nl_pernet_id); spin_lock_bh(&pernet->lock); - for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { + for (i = id; i < MAX_ADDR_ID + 1; i++) { if (test_bit(i, pernet->id_bitmap)) { entry = __lookup_addr_by_id(pernet, i); if (!entry) @@ -1781,27 +1736,22 @@ static int mptcp_nl_addr_backup(struct net *net, static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) { - struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + struct mptcp_pm_addr_entry addr, *entry; struct net *net = sock_net(skb->sk); - u8 bkup = 0, lookup_by_id = 0; + u8 bkup = 0; int ret; - ret = mptcp_pm_parse_addr(attr, info, false, &addr); + ret = mptcp_pm_parse_addr(attr, info, true, &addr); if (ret < 0) return ret; if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; - if (addr.addr.family == AF_UNSPEC) { - lookup_by_id = 1; - if (!addr.addr.id) - return -EOPNOTSUPP; - } spin_lock_bh(&pernet->lock); - entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); + entry = __lookup_addr(pernet, &addr.addr); if (!entry) { spin_unlock_bh(&pernet->lock); return -EINVAL; @@ -2140,9 +2090,6 @@ static int __net_init pm_nl_init_net(struct net *net) struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id); INIT_LIST_HEAD_RCU(&pernet->local_addr_list); - - /* Cit. 2 subflows ought to be enough for anybody. */ - pernet->subflows_max = 2; pernet->next_id = 1; pernet->stale_loss_cnt = 4; spin_lock_init(&pernet->lock); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f60f01b14f..d6def23b8c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -22,7 +22,6 @@ #endif #include #include -#include #include "protocol.h" #include "mib.h" @@ -47,10 +46,9 @@ struct mptcp_skb_cb { enum { MPTCP_CMSG_TS = BIT(0), - MPTCP_CMSG_INQ = BIT(1), }; -static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp; +static struct percpu_counter mptcp_sockets_allocated; static void __mptcp_destroy_sock(struct sock *sk); static void __mptcp_check_send_data_fin(struct sock *sk); @@ -128,11 +126,6 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } -static void mptcp_rmem_charge(struct sock *sk, int size) -{ - mptcp_sk(sk)->rmem_fwd_alloc -= size; -} - static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, struct sk_buff *from) { @@ -149,7 +142,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq; kfree_skb_partial(from, fragstolen); atomic_add(delta, &sk->sk_rmem_alloc); - mptcp_rmem_charge(sk, delta); + sk_mem_charge(sk, delta); return true; } @@ -162,44 +155,6 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, return mptcp_try_coalesce((struct sock *)msk, to, from); } -static void __mptcp_rmem_reclaim(struct sock *sk, int amount) -{ - amount >>= SK_MEM_QUANTUM_SHIFT; - mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT; - __sk_mem_reduce_allocated(sk, amount); -} - -static void mptcp_rmem_uncharge(struct sock *sk, int size) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - int reclaimable; - - msk->rmem_fwd_alloc += size; - reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk); - - /* see sk_mem_uncharge() for the rationale behind the following schema */ - if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD)) - __mptcp_rmem_reclaim(sk, SK_RECLAIM_CHUNK); -} - -static void mptcp_rfree(struct sk_buff *skb) -{ - unsigned int len = skb->truesize; - struct sock *sk = skb->sk; - - atomic_sub(len, &sk->sk_rmem_alloc); - mptcp_rmem_uncharge(sk, len); -} - -static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - skb_orphan(skb); - skb->sk = sk; - skb->destructor = mptcp_rfree; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); - mptcp_rmem_charge(sk, skb->truesize); -} - /* "inspired" by tcp_data_queue_ofo(), main differences: * - use mptcp seqs * - don't cope with sacks @@ -312,29 +267,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) end: skb_condense(skb); - mptcp_set_owner_r(skb, sk); -} - -static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - int amt, amount; - - if (size < msk->rmem_fwd_alloc) - return true; - - amt = sk_mem_pages(size); - amount = amt << SK_MEM_QUANTUM_SHIFT; - msk->rmem_fwd_alloc += amount; - if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) { - if (ssk->sk_forward_alloc < amount) { - msk->rmem_fwd_alloc -= amount; - return false; - } - - ssk->sk_forward_alloc -= amount; - } - return true; + skb_set_owner_r(skb, sk); } static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, @@ -352,8 +285,15 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_orphan(skb); /* try to fetch required memory from subflow */ - if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) - goto drop; + if (!sk_rmem_schedule(sk, skb, skb->truesize)) { + int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT; + + if (ssk->sk_forward_alloc < amount) + goto drop; + + ssk->sk_forward_alloc -= amount; + sk->sk_forward_alloc += amount; + } has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; @@ -373,7 +313,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, if (tail && mptcp_try_coalesce(sk, tail, skb)) return true; - mptcp_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); return true; } else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) { @@ -466,9 +406,12 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq) static void mptcp_set_datafin_timeout(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + u32 retransmits; - mptcp_sk(sk)->timer_ival = min(TCP_RTO_MAX, - TCP_RTO_MIN << icsk->icsk_retransmits); + retransmits = min_t(u32, icsk->icsk_retransmits, + ilog2(TCP_RTO_MAX / TCP_RTO_MIN)); + + mptcp_sk(sk)->timer_ival = TCP_RTO_MIN << retransmits; } static void __mptcp_set_timeout(struct sock *sk, long tout) @@ -740,7 +683,6 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq, delta); MPTCP_SKB_CB(skb)->offset += delta; - MPTCP_SKB_CB(skb)->map_seq += delta; __skb_queue_tail(&sk->sk_receive_queue, skb); } msk->ack_seq = end_seq; @@ -763,7 +705,7 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) if (!sock_owned_by_user(sk)) __mptcp_error_report(sk); else - __set_bit(MPTCP_ERROR_REPORT, &msk->cb_flags); + set_bit(MPTCP_ERROR_REPORT, &msk->flags); } /* If the moves have caught up with the DATA_FIN sequence number @@ -808,38 +750,47 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) mptcp_data_unlock(sk); } -static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) +static bool mptcp_do_flush_join_list(struct mptcp_sock *msk) { - struct sock *sk = (struct sock *)msk; + struct mptcp_subflow_context *subflow; + bool ret = false; - if (sk->sk_state != TCP_ESTABLISHED) + if (likely(list_empty(&msk->join_list))) return false; - /* attach to msk socket only after we are sure we will deal with it - * at close time - */ - if (sk->sk_socket && !ssk->sk_socket) - mptcp_sock_graft(ssk, sk->sk_socket); + spin_lock_bh(&msk->join_list_lock); + list_for_each_entry(subflow, &msk->join_list, node) { + u32 sseq = READ_ONCE(subflow->setsockopt_seq); - mptcp_propagate_sndbuf((struct sock *)msk, ssk); - mptcp_sockopt_sync_locked(msk, ssk); - return true; + mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow)); + if (READ_ONCE(msk->setsockopt_seq) != sseq) + ret = true; + } + list_splice_tail_init(&msk->join_list, &msk->conn_list); + spin_unlock_bh(&msk->join_list_lock); + + return ret; } -static void __mptcp_flush_join_list(struct sock *sk) +void __mptcp_flush_join_list(struct mptcp_sock *msk) { - struct mptcp_subflow_context *tmp, *subflow; - struct mptcp_sock *msk = mptcp_sk(sk); + if (likely(!mptcp_do_flush_join_list(msk))) + return; - list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); + if (!test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags)) + mptcp_schedule_work((struct sock *)msk); +} - list_move_tail(&subflow->node, &msk->conn_list); - if (!__mptcp_finish_join(msk, ssk)) - mptcp_subflow_reset(ssk); - unlock_sock_fast(ssk, slow); - } +static void mptcp_flush_join_list(struct mptcp_sock *msk) +{ + bool sync_needed = test_and_clear_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags); + + might_sleep(); + + if (!mptcp_do_flush_join_list(msk) && !sync_needed) + return; + + mptcp_sockopt_sync_all(msk); } static bool mptcp_timer_pending(struct sock *sk) @@ -960,22 +911,124 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, df->data_seq + df->data_len == msk->write_seq; } +static int mptcp_wmem_with_overhead(int size) +{ + return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT); +} + +static void __mptcp_wmem_reserve(struct sock *sk, int size) +{ + int amount = mptcp_wmem_with_overhead(size); + struct mptcp_sock *msk = mptcp_sk(sk); + + WARN_ON_ONCE(msk->wmem_reserved); + if (WARN_ON_ONCE(amount < 0)) + amount = 0; + + if (amount <= sk->sk_forward_alloc) + goto reserve; + + /* under memory pressure try to reserve at most a single page + * otherwise try to reserve the full estimate and fallback + * to a single page before entering the error path + */ + if ((tcp_under_memory_pressure(sk) && amount > PAGE_SIZE) || + !sk_wmem_schedule(sk, amount)) { + if (amount <= PAGE_SIZE) + goto nomem; + + amount = PAGE_SIZE; + if (!sk_wmem_schedule(sk, amount)) + goto nomem; + } + +reserve: + msk->wmem_reserved = amount; + sk->sk_forward_alloc -= amount; + return; + +nomem: + /* we will wait for memory on next allocation */ + msk->wmem_reserved = -1; +} + +static void __mptcp_update_wmem(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); +#endif + + if (!msk->wmem_reserved) + return; + + if (msk->wmem_reserved < 0) + msk->wmem_reserved = 0; + if (msk->wmem_reserved > 0) { + sk->sk_forward_alloc += msk->wmem_reserved; + msk->wmem_reserved = 0; + } +} + +static bool mptcp_wmem_alloc(struct sock *sk, int size) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + /* check for pre-existing error condition */ + if (msk->wmem_reserved < 0) + return false; + + if (msk->wmem_reserved >= size) + goto account; + + mptcp_data_lock(sk); + if (!sk_wmem_schedule(sk, size)) { + mptcp_data_unlock(sk); + return false; + } + + sk->sk_forward_alloc -= size; + msk->wmem_reserved += size; + mptcp_data_unlock(sk); + +account: + msk->wmem_reserved -= size; + return true; +} + +static void mptcp_wmem_uncharge(struct sock *sk, int size) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + if (msk->wmem_reserved < 0) + msk->wmem_reserved = 0; + msk->wmem_reserved += size; +} + static void __mptcp_mem_reclaim_partial(struct sock *sk) { - int reclaimable = mptcp_sk(sk)->rmem_fwd_alloc - sk_unused_reserved_mem(sk); - lockdep_assert_held_once(&sk->sk_lock.slock); - - if (reclaimable > SK_MEM_QUANTUM) - __mptcp_rmem_reclaim(sk, reclaimable - 1); - + __mptcp_update_wmem(sk); sk_mem_reclaim_partial(sk); } static void mptcp_mem_reclaim_partial(struct sock *sk) { + struct mptcp_sock *msk = mptcp_sk(sk); + + /* if we are experiencing a transint allocation error, + * the forward allocation memory has been already + * released + */ + if (msk->wmem_reserved < 0) + return; + mptcp_data_lock(sk); - __mptcp_mem_reclaim_partial(sk); + sk->sk_forward_alloc += msk->wmem_reserved; + sk_mem_reclaim_partial(sk); + msk->wmem_reserved = sk->sk_forward_alloc; + sk->sk_forward_alloc = 0; mptcp_data_unlock(sk); } @@ -1054,8 +1107,7 @@ static void __mptcp_clean_una(struct sock *sk) if (cleaned && tcp_under_memory_pressure(sk)) __mptcp_mem_reclaim_partial(sk); - if (snd_una == READ_ONCE(msk->snd_nxt) && - snd_una == READ_ONCE(msk->write_seq)) { + if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) { if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) mptcp_stop_timer(sk); } else { @@ -1065,8 +1117,9 @@ static void __mptcp_clean_una(struct sock *sk) static void __mptcp_clean_una_wakeup(struct sock *sk) { - lockdep_assert_held_once(&sk->sk_lock.slock); - +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); +#endif __mptcp_clean_una(sk); mptcp_write_space(sk); } @@ -1170,8 +1223,7 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp) if (likely(skb)) { if (likely(__mptcp_add_ext(skb, gfp))) { skb_reserve(skb, MAX_TCP_HEADER); - skb->ip_summed = CHECKSUM_PARTIAL; - INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); + skb->reserved_tailroom = skb->end - skb->tail; return skb; } __kfree_skb(skb); @@ -1181,23 +1233,31 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp) return NULL; } -static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp) +static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp) { struct sk_buff *skb; + if (ssk->sk_tx_skb_cache) { + skb = ssk->sk_tx_skb_cache; + if (unlikely(!skb_ext_find(skb, SKB_EXT_MPTCP) && + !__mptcp_add_ext(skb, gfp))) + return false; + return true; + } + skb = __mptcp_do_alloc_tx_skb(sk, gfp); if (!skb) - return NULL; + return false; if (likely(sk_wmem_schedule(ssk, skb->truesize))) { - tcp_skb_entail(ssk, skb); - return skb; + ssk->sk_tx_skb_cache = skb; + return true; } kfree_skb(skb); - return NULL; + return false; } -static struct sk_buff *mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held) +static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held) { gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation; @@ -1227,29 +1287,23 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct mptcp_sendmsg_info *info) { u64 data_seq = dfrag->data_seq + info->sent; - int offset = dfrag->offset + info->sent; struct mptcp_sock *msk = mptcp_sk(sk); bool zero_window_probe = false; struct mptcp_ext *mpext = NULL; - bool can_coalesce = false; - bool reuse_skb = true; - struct sk_buff *skb; - size_t copy; - int i; + struct sk_buff *skb, *tail; + bool must_collapse = false; + int size_bias = 0; + int avail_size; + size_t ret = 0; pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u", msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent); - if (WARN_ON_ONCE(info->sent > info->limit || - info->limit > dfrag->data_len)) - return 0; - /* compute send limit */ info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags); - copy = info->size_goal; - + avail_size = info->size_goal; skb = tcp_write_queue_tail(ssk); - if (skb && copy > skb->len) { + if (skb) { /* Limit the write to the size available in the * current skb, if any, so that we create at most a new skb. * Explicitly tells TCP internals to avoid collapsing on later @@ -1262,79 +1316,62 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, goto alloc_skb; } - i = skb_shinfo(skb)->nr_frags; - can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset); - if (!can_coalesce && i >= sysctl_max_skb_frags) { - tcp_mark_push(tcp_sk(ssk), skb); - goto alloc_skb; + must_collapse = (info->size_goal > skb->len) && + (skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags); + if (must_collapse) { + size_bias = skb->len; + avail_size = info->size_goal - skb->len; } - - copy -= skb->len; - } else { -alloc_skb: - skb = mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held); - if (!skb) - return -ENOMEM; - - i = skb_shinfo(skb)->nr_frags; - reuse_skb = false; - mpext = skb_ext_find(skb, SKB_EXT_MPTCP); } +alloc_skb: + if (!must_collapse && + !mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held)) + return 0; + /* Zero window and all data acked? Probe. */ - copy = mptcp_check_allowed_size(msk, data_seq, copy); - if (copy == 0) { + avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size); + if (avail_size == 0) { u64 snd_una = READ_ONCE(msk->snd_una); - if (snd_una != msk->snd_nxt) { - tcp_remove_empty_skb(ssk); + if (skb || snd_una != msk->snd_nxt) return 0; - } - zero_window_probe = true; data_seq = snd_una - 1; - copy = 1; - - /* all mptcp-level data is acked, no skbs should be present into the - * ssk write queue - */ - WARN_ON_ONCE(reuse_skb); + avail_size = 1; } - copy = min_t(size_t, copy, info->limit - info->sent); - if (!sk_wmem_schedule(ssk, copy)) { - tcp_remove_empty_skb(ssk); + if (WARN_ON_ONCE(info->sent > info->limit || + info->limit > dfrag->data_len)) + return 0; + + ret = info->limit - info->sent; + tail = tcp_build_frag(ssk, avail_size + size_bias, info->flags, + dfrag->page, dfrag->offset + info->sent, &ret); + if (!tail) { + tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk)); return -ENOMEM; } - if (can_coalesce) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); - } else { - get_page(dfrag->page); - skb_fill_page_desc(skb, i, dfrag->page, offset, copy); - } - - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; - sk_wmem_queued_add(ssk, copy); - sk_mem_charge(ssk, copy); - WRITE_ONCE(tcp_sk(ssk)->write_seq, tcp_sk(ssk)->write_seq + copy); - TCP_SKB_CB(skb)->end_seq += copy; - tcp_skb_pcount_set(skb, 0); - - /* on skb reuse we just need to update the DSS len */ - if (reuse_skb) { - TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; - mpext->data_len += copy; + /* if the tail skb is still the cached one, collapsing really happened. + */ + if (skb == tail) { + TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH; + mpext->data_len += ret; WARN_ON_ONCE(zero_window_probe); goto out; } + mpext = skb_ext_find(tail, SKB_EXT_MPTCP); + if (WARN_ON_ONCE(!mpext)) { + /* should never reach here, stream corrupted */ + return -EINVAL; + } + memset(mpext, 0, sizeof(*mpext)); mpext->data_seq = data_seq; mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq; - mpext->data_len = copy; + mpext->data_len = ret; mpext->use_map = 1; mpext->dsn64 = 1; @@ -1343,18 +1380,18 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, mpext->dsn64); if (zero_window_probe) { - mptcp_subflow_ctx(ssk)->rel_write_seq += copy; + mptcp_subflow_ctx(ssk)->rel_write_seq += ret; mpext->frozen = 1; if (READ_ONCE(msk->csum_enabled)) - mptcp_update_data_checksum(skb, copy); + mptcp_update_data_checksum(tail, ret); tcp_push_pending_frames(ssk); return 0; } out: if (READ_ONCE(msk->csum_enabled)) - mptcp_update_data_checksum(skb, copy); - mptcp_subflow_ctx(ssk)->rel_write_seq += copy; - return copy; + mptcp_update_data_checksum(tail, ret); + mptcp_subflow_ctx(ssk)->rel_write_seq += ret; + return ret; } #define MPTCP_SEND_BURST_SIZE ((1 << 16) - \ @@ -1365,7 +1402,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct subflow_send_info { struct sock *ssk; - u64 linger_time; + u64 ratio; }; void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow) @@ -1390,24 +1427,20 @@ bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) return __mptcp_subflow_active(subflow); } -#define SSK_MODE_ACTIVE 0 -#define SSK_MODE_BACKUP 1 -#define SSK_MODE_MAX 2 - /* implement the mptcp packet scheduler; * returns the subflow that will transmit the next DSS * additionally updates the rtx timeout */ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) { - struct subflow_send_info send_info[SSK_MODE_MAX]; + struct subflow_send_info send_info[2]; struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - u32 pace, burst, wmem; int i, nr_active = 0; struct sock *ssk; - u64 linger_time; long tout = 0; + u64 ratio; + u32 pace; sock_owned_by_me(sk); @@ -1426,11 +1459,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } /* pick the subflow with the lower wmem/wspace ratio */ - for (i = 0; i < SSK_MODE_MAX; ++i) { + for (i = 0; i < 2; ++i) { send_info[i].ssk = NULL; - send_info[i].linger_time = -1; + send_info[i].ratio = -1; } - mptcp_for_each_subflow(msk, subflow) { trace_mptcp_subflow_get_send(subflow); ssk = mptcp_subflow_tcp_sock(subflow); @@ -1439,54 +1471,38 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) tout = max(tout, mptcp_timeout_from_subflow(subflow)); nr_active += !subflow->backup; - pace = subflow->avg_pacing_rate; - if (unlikely(!pace)) { - /* init pacing rate from socket */ - subflow->avg_pacing_rate = READ_ONCE(ssk->sk_pacing_rate); - pace = subflow->avg_pacing_rate; - if (!pace) - continue; - } + if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd) + continue; - linger_time = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32, pace); - if (linger_time < send_info[subflow->backup].linger_time) { + pace = READ_ONCE(ssk->sk_pacing_rate); + if (!pace) + continue; + + ratio = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32, + pace); + if (ratio < send_info[subflow->backup].ratio) { send_info[subflow->backup].ssk = ssk; - send_info[subflow->backup].linger_time = linger_time; + send_info[subflow->backup].ratio = ratio; } } __mptcp_set_timeout(sk, tout); /* pick the best backup if no other subflow is active */ if (!nr_active) - send_info[SSK_MODE_ACTIVE].ssk = send_info[SSK_MODE_BACKUP].ssk; + send_info[0].ssk = send_info[1].ssk; - /* According to the blest algorithm, to avoid HoL blocking for the - * faster flow, we need to: - * - estimate the faster flow linger time - * - use the above to estimate the amount of byte transferred - * by the faster flow - * - check that the amount of queued data is greter than the above, - * otherwise do not use the picked, slower, subflow - * We select the subflow with the shorter estimated time to flush - * the queued mem, which basically ensure the above. We just need - * to check that subflow has a non empty cwin. - */ - ssk = send_info[SSK_MODE_ACTIVE].ssk; - if (!ssk || !sk_stream_memory_free(ssk) || !tcp_sk(ssk)->snd_wnd) - return NULL; + if (send_info[0].ssk) { + msk->last_snd = send_info[0].ssk; + msk->snd_burst = min_t(int, MPTCP_SEND_BURST_SIZE, + tcp_sk(msk->last_snd)->snd_wnd); + return msk->last_snd; + } - burst = min_t(int, MPTCP_SEND_BURST_SIZE, tcp_sk(ssk)->snd_wnd); - wmem = READ_ONCE(ssk->sk_wmem_queued); - subflow = mptcp_subflow_ctx(ssk); - subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem + - READ_ONCE(ssk->sk_pacing_rate) * burst, - burst + wmem); - msk->last_snd = ssk; - msk->snd_burst = burst; - return ssk; + return NULL; } -static void mptcp_push_release(struct sock *ssk, struct mptcp_sendmsg_info *info) +static void mptcp_push_release(struct sock *sk, struct sock *ssk, + struct mptcp_sendmsg_info *info) { tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal); release_sock(ssk); @@ -1501,6 +1517,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, dfrag->already_sent += sent; msk->snd_burst -= sent; + msk->tx_pending_data -= sent; snd_nxt_new += dfrag->already_sent; @@ -1517,12 +1534,6 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, msk->snd_nxt = snd_nxt_new; } -void mptcp_check_and_set_pending(struct sock *sk) -{ - if (mptcp_send_head(sk)) - mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING); -} - void __mptcp_push_pending(struct sock *sk, unsigned int flags) { struct sock *prev_ssk = NULL, *ssk = NULL; @@ -1541,13 +1552,14 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) int ret = 0; prev_ssk = ssk; + __mptcp_flush_join_list(msk); ssk = mptcp_subflow_get_send(msk); /* First check. If the ssk has changed since * the last round, release prev_ssk */ if (ssk != prev_ssk && prev_ssk) - mptcp_push_release(prev_ssk, &info); + mptcp_push_release(sk, prev_ssk, &info); if (!ssk) goto out; @@ -1560,7 +1572,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) { - mptcp_push_release(ssk, &info); + mptcp_push_release(sk, ssk, &info); goto out; } @@ -1575,7 +1587,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) /* at this point we held the socket lock for the last subflow we used */ if (ssk) - mptcp_push_release(ssk, &info); + mptcp_push_release(sk, ssk, &info); out: /* ensure the rtx timer is running */ @@ -1635,6 +1647,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) /* __mptcp_alloc_tx_skb could have released some wmem and we are * not going to flush it via release_sock() */ + __mptcp_update_wmem(sk); if (copied) { tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); @@ -1671,7 +1684,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* silently ignore everything else */ msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL; - lock_sock(sk); + mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len))); timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); @@ -1719,22 +1732,23 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) psize = min_t(size_t, psize, msg_data_left(msg)); total_ts = psize + frag_truesize; - if (!sk_wmem_schedule(sk, total_ts)) + if (!mptcp_wmem_alloc(sk, total_ts)) goto wait_for_memory; if (copy_page_from_iter(dfrag->page, offset, psize, &msg->msg_iter) != psize) { + mptcp_wmem_uncharge(sk, psize + frag_truesize); ret = -EFAULT; goto out; } /* data successfully copied into the write queue */ - sk->sk_forward_alloc -= total_ts; copied += psize; dfrag->data_len += psize; frag_truesize += psize; pfrag->offset += frag_truesize; WRITE_ONCE(msk->write_seq, msk->write_seq + psize); + msk->tx_pending_data += psize; /* charge data on mptcp pending queue to the msk socket * Note: we charge such data both to sk and ssk @@ -1800,10 +1814,8 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, copied += count; if (count < data_len) { - if (!(flags & MSG_PEEK)) { + if (!(flags & MSG_PEEK)) MPTCP_SKB_CB(skb)->offset += count; - MPTCP_SKB_CB(skb)->map_seq += count; - } break; } @@ -1928,7 +1940,7 @@ static void __mptcp_update_rmem(struct sock *sk) return; atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); - mptcp_rmem_uncharge(sk, msk->rmem_released); + sk_mem_uncharge(sk, msk->rmem_released); WRITE_ONCE(msk->rmem_released, 0); } @@ -1945,6 +1957,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) unsigned int moved = 0; bool ret, done; + mptcp_flush_join_list(msk); do { struct sock *ssk = mptcp_subflow_recv_lookup(msk); bool slowpath; @@ -1982,27 +1995,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) return !skb_queue_empty(&msk->receive_queue); } -static unsigned int mptcp_inq_hint(const struct sock *sk) -{ - const struct mptcp_sock *msk = mptcp_sk(sk); - const struct sk_buff *skb; - - skb = skb_peek(&msk->receive_queue); - if (skb) { - u64 hint_val = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq; - - if (hint_val >= INT_MAX) - return INT_MAX; - - return (unsigned int)hint_val; - } - - if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN)) - return 1; - - return 0; -} - static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { @@ -2016,7 +2008,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); - lock_sock(sk); + mptcp_lock_sock(sk, __mptcp_splice_receive_queue(sk)); if (unlikely(sk->sk_state == TCP_LISTEN)) { copied = -ENOTCONN; goto out_err; @@ -2027,9 +2019,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, len = min_t(size_t, len, INT_MAX); target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); - if (unlikely(msk->recvmsg_inq)) - cmsg_flags = MPTCP_CMSG_INQ; - while (copied < len) { int bytes_read; @@ -2103,12 +2092,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (cmsg_flags && copied >= 0) { if (cmsg_flags & MPTCP_CMSG_TS) tcp_recv_timestamp(msg, sk, &tss); - - if (cmsg_flags & MPTCP_CMSG_INQ) { - unsigned int inq = mptcp_inq_hint(sk); - - put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); - } } pr_debug("msk=%p rx queue empty=%d:%d copied=%d", @@ -2135,7 +2118,7 @@ static void mptcp_retransmit_timer(struct timer_list *t) mptcp_schedule_work(sk); } else { /* delegate our work to tcp_release_cb() */ - __set_bit(MPTCP_RETRANSMIT, &msk->cb_flags); + set_bit(MPTCP_RETRANSMIT, &msk->flags); } bh_unlock_sock(sk); sock_put(sk); @@ -2231,6 +2214,7 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) mptcp_data_unlock(sk); msk->first_pending = rtx_head; + msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq; msk->snd_burst = 0; /* be sure to clear the "sent status" on all re-injected fragments */ @@ -2243,10 +2227,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) return true; } -/* flags for __mptcp_close_ssk() */ -#define MPTCP_CF_PUSH BIT(1) -#define MPTCP_CF_FASTCLOSE BIT(2) - /* subflow sockets can be either outgoing (connect) or incoming * (accept). * @@ -2256,37 +2236,22 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) * parent socket. */ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, - struct mptcp_subflow_context *subflow, - unsigned int flags) + struct mptcp_subflow_context *subflow) { struct mptcp_sock *msk = mptcp_sk(sk); - bool need_push, dispose_it; + bool need_push; - dispose_it = !msk->subflow || ssk != msk->subflow->sk; - if (dispose_it) - list_del(&subflow->node); + list_del(&subflow->node); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); - if (flags & MPTCP_CF_FASTCLOSE) - subflow->send_fastclose = 1; - - need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); - if (!dispose_it) { - tcp_disconnect(ssk, 0); - msk->subflow->state = SS_UNCONNECTED; - mptcp_subflow_ctx_reset(subflow); - release_sock(ssk); - - goto out; - } - /* if we are invoked by the msk cleanup code, the subflow is * already orphaned */ if (ssk->sk_socket) sock_orphan(ssk); + need_push = __mptcp_retransmit_pending_data(sk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2306,12 +2271,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, sock_put(ssk); + if (ssk == msk->last_snd) + msk->last_snd = NULL; + if (ssk == msk->first) msk->first = NULL; -out: - if (ssk == msk->last_snd) - msk->last_snd = NULL; + if (msk->subflow && ssk == msk->subflow->sk) + mptcp_dispose_initial_subflow(msk); if (need_push) __mptcp_push_pending(sk, 0); @@ -2322,13 +2289,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, { if (sk->sk_state == TCP_ESTABLISHED) mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL); - - /* subflow aborted before reaching the fully_established status - * attempt the creation of the next subflow - */ - mptcp_pm_subflow_check_next(mptcp_sk(sk), ssk, subflow); - - __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_PUSH); + __mptcp_close_ssk(sk, ssk, subflow); } static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) @@ -2415,9 +2376,6 @@ static void __mptcp_retrans(struct sock *sk) int ret; mptcp_clean_una_wakeup(sk); - - /* first check ssk: need to kick "stale" logic */ - ssk = mptcp_subflow_get_retrans(msk); dfrag = mptcp_rtx_head(sk); if (!dfrag) { if (mptcp_data_fin_enabled(msk)) { @@ -2430,12 +2388,10 @@ static void __mptcp_retrans(struct sock *sk) goto reset_timer; } - if (!mptcp_send_head(sk)) - return; - - goto reset_timer; + return; } + ssk = mptcp_subflow_get_retrans(msk); if (!ssk) goto reset_timer; @@ -2462,8 +2418,6 @@ static void __mptcp_retrans(struct sock *sk) release_sock(ssk); reset_timer: - mptcp_check_and_set_pending(sk); - if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); } @@ -2480,10 +2434,12 @@ static void mptcp_worker(struct work_struct *work) goto unlock; mptcp_check_data_fin_ack(sk); + mptcp_flush_join_list(msk); mptcp_check_fastclose(msk); - mptcp_pm_nl_work(msk); + if (msk->pm.status) + mptcp_pm_nl_work(msk); if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) mptcp_check_for_eof(msk); @@ -2517,6 +2473,8 @@ static int __mptcp_init_sock(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); + spin_lock_init(&msk->join_list_lock); + INIT_LIST_HEAD(&msk->conn_list); INIT_LIST_HEAD(&msk->join_list); INIT_LIST_HEAD(&msk->rtx_queue); @@ -2524,8 +2482,9 @@ static int __mptcp_init_sock(struct sock *sk) __skb_queue_head_init(&msk->receive_queue); msk->out_of_order_queue = RB_ROOT; msk->first_pending = NULL; - msk->rmem_fwd_alloc = 0; + msk->wmem_reserved = 0; WRITE_ONCE(msk->rmem_released, 0); + msk->tx_pending_data = 0; msk->timer_ival = TCP_RTO_MIN; msk->first = NULL; @@ -2542,20 +2501,9 @@ static int __mptcp_init_sock(struct sock *sk) return 0; } -static void mptcp_ca_reset(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - tcp_assign_congestion_control(sk); - strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name); - - /* no need to keep a reference to the ops, the name will suffice */ - tcp_cleanup_congestion_control(sk); - icsk->icsk_ca_ops = NULL; -} - static int mptcp_init_sock(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); int ret; @@ -2576,7 +2524,12 @@ static int mptcp_init_sock(struct sock *sk) /* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will * propagate the correct value */ - mptcp_ca_reset(sk); + tcp_assign_congestion_control(sk); + strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name); + + /* no need to keep a reference to the ops, the name will suffice */ + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = NULL; sk_sockets_allocated_inc(sk); sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; @@ -2681,7 +2634,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk) * state now */ if (__mptcp_check_fallback(msk)) { - WRITE_ONCE(msk->snd_una, msk->write_seq); if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) { inet_sk_state_store(sk, TCP_CLOSE); mptcp_close_wake_up(sk); @@ -2690,6 +2642,7 @@ static void __mptcp_check_send_data_fin(struct sock *sk) } } + mptcp_flush_join_list(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); @@ -2722,30 +2675,32 @@ static void __mptcp_destroy_sock(struct sock *sk) might_sleep(); - /* join list will be eventually flushed (with rst) at sock lock release time*/ + /* be sure to always acquire the join list lock, to sync vs + * mptcp_finish_join(). + */ + spin_lock_bh(&msk->join_list_lock); + list_splice_tail_init(&msk->join_list, &msk->conn_list); + spin_unlock_bh(&msk->join_list_lock); list_splice_init(&msk->conn_list, &conn_list); sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer); sk_stop_timer(sk, &sk->sk_timer); msk->pm.status = 0; - /* clears msk->subflow, allowing the following loop to close - * even the initial subflow - */ - mptcp_dispose_initial_subflow(msk); list_for_each_entry_safe(subflow, tmp, &conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - __mptcp_close_ssk(sk, ssk, subflow, 0); + __mptcp_close_ssk(sk, ssk, subflow); } sk->sk_prot->destroy(sk); - WARN_ON_ONCE(msk->rmem_fwd_alloc); + WARN_ON_ONCE(msk->wmem_reserved); WARN_ON_ONCE(msk->rmem_released); sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); sk_refcnt_debug_release(sk); + mptcp_dispose_initial_subflow(msk); sock_put(sk); } @@ -2781,9 +2736,6 @@ static void mptcp_close(struct sock *sk, long timeout) sock_hold(sk); pr_debug("msk=%p state=%d", sk, sk->sk_state); - if (mptcp_sk(sk)->token) - mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); - if (sk->sk_state == TCP_CLOSE) { __mptcp_destroy_sock(sk); do_cancel_work = true; @@ -2794,6 +2746,9 @@ static void mptcp_close(struct sock *sk, long timeout) if (do_cancel_work) mptcp_cancel_work(sk); + if (mptcp_sk(sk)->token) + mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); + sock_put(sk); } @@ -2825,38 +2780,15 @@ static int mptcp_disconnect(struct sock *sk, int flags) struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_do_flush_join_list(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE); + lock_sock(ssk); + tcp_disconnect(ssk, flags); + release_sock(ssk); } - - sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer); - sk_stop_timer(sk, &sk->sk_timer); - - if (mptcp_sk(sk)->token) - mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); - - mptcp_destroy_common(msk); - msk->last_snd = NULL; - WRITE_ONCE(msk->flags, 0); - msk->cb_flags = 0; - msk->push_pending = 0; - msk->recovery = false; - msk->can_ack = false; - msk->fully_established = false; - msk->rcv_data_fin = false; - msk->snd_data_fin_enable = false; - msk->rcv_fastclose = false; - msk->use_64bit_ack = false; - WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); - mptcp_pm_data_reset(msk); - mptcp_ca_reset(sk); - - sk->sk_shutdown = 0; - sk_error_report(sk); return 0; } @@ -2996,17 +2928,9 @@ void mptcp_destroy_common(struct mptcp_sock *msk) __mptcp_clear_xmit(sk); /* move to sk_receive_queue, sk_stream_kill_queues will purge it */ - mptcp_data_lock(sk); skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue); - __skb_queue_purge(&sk->sk_receive_queue); - skb_rbtree_purge(&msk->out_of_order_queue); - mptcp_data_unlock(sk); - /* move all the rx fwd alloc into the sk_mem_reclaim_final in - * inet_sock_destruct() will dispose it - */ - sk->sk_forward_alloc += msk->rmem_fwd_alloc; - msk->rmem_fwd_alloc = 0; + skb_rbtree_purge(&msk->out_of_order_queue); mptcp_token_destroy(msk); mptcp_pm_free_anno_list(msk); } @@ -3024,7 +2948,7 @@ void __mptcp_data_acked(struct sock *sk) if (!sock_owned_by_user(sk)) __mptcp_clean_una(sk); else - __set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags); + set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags); if (mptcp_pending_data_fin_ack(sk)) mptcp_schedule_work(sk); @@ -3043,23 +2967,20 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) else if (xmit_ssk) mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND); } else { - __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags); + set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); } } -#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \ - BIT(MPTCP_RETRANSMIT) | \ - BIT(MPTCP_FLUSH_JOIN_LIST)) - /* processes deferred events and flush wmem */ static void mptcp_release_cb(struct sock *sk) - __must_hold(&sk->sk_lock.slock) { - struct mptcp_sock *msk = mptcp_sk(sk); - for (;;) { - unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | - msk->push_pending; + unsigned long flags = 0; + + if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) + flags |= BIT(MPTCP_PUSH_PENDING); + if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags)) + flags |= BIT(MPTCP_RETRANSMIT); if (!flags) break; @@ -3070,11 +2991,8 @@ static void mptcp_release_cb(struct sock *sk) * datapath acquires the msk socket spinlock while helding * the subflow socket lock */ - msk->push_pending = 0; - msk->cb_flags &= ~flags; + spin_unlock_bh(&sk->sk_lock.slock); - if (flags & BIT(MPTCP_FLUSH_JOIN_LIST)) - __mptcp_flush_join_list(sk); if (flags & BIT(MPTCP_PUSH_PENDING)) __mptcp_push_pending(sk, 0); if (flags & BIT(MPTCP_RETRANSMIT)) @@ -3087,13 +3005,17 @@ static void mptcp_release_cb(struct sock *sk) /* be sure to set the current sk state before tacking actions * depending on sk_state */ - if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags)) + if (test_and_clear_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags)) __mptcp_set_connected(sk); - if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) + if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) __mptcp_clean_una_wakeup(sk); - if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) + if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) __mptcp_error_report(sk); + /* push_pending may touch wmem_reserved, ensure we do the cleanup + * later + */ + __mptcp_update_wmem(sk); __mptcp_update_rmem(sk); } @@ -3133,7 +3055,7 @@ void mptcp_subflow_process_delegated(struct sock *ssk) if (!sock_owned_by_user(sk)) __mptcp_subflow_push_pending(sk, ssk); else - __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags); + set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); mptcp_data_unlock(sk); mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND); } @@ -3219,7 +3141,8 @@ bool mptcp_finish_join(struct sock *ssk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct sock *parent = (void *)msk; - bool ret = true; + struct socket *parent_sock; + bool ret; pr_debug("msk=%p, subflow=%p", msk, subflow); @@ -3232,38 +3155,35 @@ bool mptcp_finish_join(struct sock *ssk) if (!msk->pm.server_side) goto out; - if (!mptcp_pm_allow_new_subflow(msk)) - goto err_prohibited; - - if (WARN_ON_ONCE(!list_empty(&subflow->node))) - goto err_prohibited; - - /* active connections are already on conn_list. - * If we can't acquire msk socket lock here, let the release callback - * handle it - */ - mptcp_data_lock(parent); - if (!sock_owned_by_user(parent)) { - ret = __mptcp_finish_join(msk, ssk); - if (ret) { - sock_hold(ssk); - list_add_tail(&subflow->node, &msk->conn_list); - } - } else { - sock_hold(ssk); - list_add_tail(&subflow->node, &msk->join_list); - __set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->cb_flags); - } - mptcp_data_unlock(parent); - - if (!ret) { -err_prohibited: + if (!mptcp_pm_allow_new_subflow(msk)) { subflow->reset_reason = MPTCP_RST_EPROHIBIT; return false; } - subflow->map_seq = READ_ONCE(msk->ack_seq); + /* active connections are already on conn_list, and we can't acquire + * msk lock here. + * use the join list lock as synchronization point and double-check + * msk status to avoid racing with __mptcp_destroy_sock() + */ + spin_lock_bh(&msk->join_list_lock); + ret = inet_sk_state_load(parent) == TCP_ESTABLISHED; + if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node))) { + list_add_tail(&subflow->node, &msk->join_list); + sock_hold(ssk); + } + spin_unlock_bh(&msk->join_list_lock); + if (!ret) { + subflow->reset_reason = MPTCP_RST_EPROHIBIT; + return false; + } + /* attach to msk socket only after we are sure he will deal with us + * at close time + */ + parent_sock = READ_ONCE(parent->sk_socket); + if (parent_sock && !ssk->sk_socket) + mptcp_sock_graft(ssk, parent_sock); + subflow->map_seq = READ_ONCE(msk->ack_seq); out: mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); return true; @@ -3277,62 +3197,6 @@ static void mptcp_shutdown(struct sock *sk, int how) __mptcp_wr_shutdown(sk); } -static int mptcp_forward_alloc_get(const struct sock *sk) -{ - return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc; -} - -static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) -{ - const struct sock *sk = (void *)msk; - u64 delta; - - if (sk->sk_state == TCP_LISTEN) - return -EINVAL; - - if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) - return 0; - - delta = msk->write_seq - v; - if (delta > INT_MAX) - delta = INT_MAX; - - return (int)delta; -} - -static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - bool slow; - int answ; - - switch (cmd) { - case SIOCINQ: - if (sk->sk_state == TCP_LISTEN) - return -EINVAL; - - lock_sock(sk); - __mptcp_move_skbs(msk); - answ = mptcp_inq_hint(sk); - release_sock(sk); - break; - case SIOCOUTQ: - slow = lock_sock_fast(sk); - answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una)); - unlock_sock_fast(sk, slow); - break; - case SIOCOUTQNSD: - slow = lock_sock_fast(sk); - answ = mptcp_ioctl_outq(msk, msk->snd_nxt); - unlock_sock_fast(sk, slow); - break; - default: - return -ENOIOCTLCMD; - } - - return put_user(answ, (int __user *)arg); -} - static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, @@ -3345,13 +3209,11 @@ static struct proto mptcp_prot = { .shutdown = mptcp_shutdown, .destroy = mptcp_destroy, .sendmsg = mptcp_sendmsg, - .ioctl = mptcp_ioctl, .recvmsg = mptcp_recvmsg, .release_cb = mptcp_release_cb, .hash = mptcp_hash, .unhash = mptcp_unhash, .get_port = mptcp_get_port, - .forward_alloc_get = mptcp_forward_alloc_get, .sockets_allocated = &mptcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, @@ -3398,20 +3260,9 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, struct mptcp_sock *msk = mptcp_sk(sock->sk); struct mptcp_subflow_context *subflow; struct socket *ssock; - int err = -EINVAL; + int err; lock_sock(sock->sk); - if (uaddr) { - if (addr_len < sizeof(uaddr->sa_family)) - goto unlock; - - if (uaddr->sa_family == AF_UNSPEC) { - err = mptcp_disconnect(sock->sk, flags); - sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; - goto unlock; - } - } - if (sock->state != SS_UNCONNECTED && msk->subflow) { /* pending connection or invalid state, let existing subflow * cope with that @@ -3421,8 +3272,10 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, } ssock = __mptcp_nmpc_socket(msk); - if (!ssock) + if (!ssock) { + err = -EINVAL; goto unlock; + } mptcp_token_destroy(msk); inet_sk_state_store(sock->sk, TCP_SYN_SENT); @@ -3496,9 +3349,17 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, pr_debug("msk=%p", msk); + lock_sock(sock->sk); + if (sock->sk->sk_state != TCP_LISTEN) + goto unlock_fail; + ssock = __mptcp_nmpc_socket(msk); if (!ssock) - return -EINVAL; + goto unlock_fail; + + clear_bit(MPTCP_DATA_READY, &msk->flags); + sock_hold(ssock->sk); + release_sock(sock->sk); err = ssock->ops->accept(sock, newsock, flags, kern); if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) { @@ -3528,6 +3389,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ + mptcp_flush_join_list(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -3537,7 +3399,14 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, release_sock(newsk); } + if (inet_csk_listen_poll(ssock->sk)) + set_bit(MPTCP_DATA_READY, &msk->flags); + sock_put(ssock->sk); return err; + +unlock_fail: + release_sock(sock->sk); + return -EINVAL; } static __poll_t mptcp_check_readable(struct mptcp_sock *msk) @@ -3583,12 +3452,8 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, state = inet_sk_state_load(sk); pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags); - if (state == TCP_LISTEN) { - if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk)) - return 0; - - return inet_csk_listen_poll(msk->subflow->sk); - } + if (state == TCP_LISTEN) + return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM : 0; if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) { mask |= mptcp_check_readable(msk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 85317ce38e..82c5dc4d6b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -110,20 +110,19 @@ /* MPTCP TCPRST flags */ #define MPTCP_RST_TRANSIENT BIT(0) -/* MPTCP socket atomic flags */ +/* MPTCP socket flags */ +#define MPTCP_DATA_READY 0 #define MPTCP_NOSPACE 1 #define MPTCP_WORK_RTX 2 #define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 - -/* MPTCP socket release cb flags */ -#define MPTCP_PUSH_PENDING 1 -#define MPTCP_CLEAN_UNA 2 -#define MPTCP_ERROR_REPORT 3 -#define MPTCP_RETRANSMIT 4 -#define MPTCP_FLUSH_JOIN_LIST 5 -#define MPTCP_CONNECTED 6 +#define MPTCP_PUSH_PENDING 6 +#define MPTCP_CLEAN_UNA 7 +#define MPTCP_ERROR_REPORT 8 +#define MPTCP_RETRANSMIT 9 +#define MPTCP_WORK_SYNC_SETSOCKOPT 10 +#define MPTCP_CONNECTED 11 static inline bool before64(__u64 seq1, __u64 seq2) { @@ -175,25 +174,16 @@ enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_SEND_ACK, MPTCP_PM_RM_ADDR_RECEIVED, MPTCP_PM_ESTABLISHED, - MPTCP_PM_SUBFLOW_ESTABLISHED, MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */ - MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is - * accounted int id_avail_bitmap - */ + MPTCP_PM_SUBFLOW_ESTABLISHED, }; -/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */ -#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1) - enum mptcp_addr_signal_status { MPTCP_ADD_ADDR_SIGNAL, MPTCP_ADD_ADDR_ECHO, MPTCP_RM_ADDR_SIGNAL, }; -/* max value of mptcp_addr_info.id */ -#define MPTCP_PM_MAX_ADDR_ID U8_MAX - struct mptcp_pm_data { struct mptcp_addr_info local; struct mptcp_addr_info remote; @@ -212,7 +202,6 @@ struct mptcp_pm_data { u8 local_addr_used; u8 subflows; u8 status; - DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_rm_list rm_list_tx; struct mptcp_rm_list rm_list_rx; }; @@ -238,7 +227,7 @@ struct mptcp_sock { u64 ack_seq; u64 rcv_wnd_sent; u64 rcv_data_fin_seq; - int rmem_fwd_alloc; + int wmem_reserved; struct sock *last_snd; int snd_burst; int old_wspace; @@ -252,8 +241,6 @@ struct mptcp_sock { u32 token; int rmem_released; unsigned long flags; - unsigned long cb_flags; - unsigned long push_pending; bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; @@ -262,13 +249,12 @@ struct mptcp_sock { bool rcv_fastclose; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; - u8 recvmsg_inq:1, - cork:1, - nodelay:1; + spinlock_t join_list_lock; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; struct sk_buff_head receive_queue; + int tx_pending_data; struct list_head conn_list; struct list_head rtx_queue; struct mptcp_data_frag *first_pending; @@ -287,6 +273,19 @@ struct mptcp_sock { char ca_name[TCP_CA_NAME_MAX]; }; +#define mptcp_lock_sock(___sk, cb) do { \ + struct sock *__sk = (___sk); /* silence macro reuse warning */ \ + might_sleep(); \ + spin_lock_bh(&__sk->sk_lock.slock); \ + if (__sk->sk_lock.owned) \ + __lock_sock(__sk); \ + cb; \ + __sk->sk_lock.owned = 1; \ + spin_unlock(&__sk->sk_lock.slock); \ + mutex_acquire(&__sk->sk_lock.dep_map, 0, 0, _RET_IP_); \ + local_bh_enable(); \ +} while (0) + #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) #define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock) @@ -407,10 +406,6 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); /* MPTCP subflow context */ struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ - - struct_group(reset, - - unsigned long avg_pacing_rate; /* protected by msk socket lock */ u64 local_key; u64 remote_key; u64 idsn; @@ -438,7 +433,6 @@ struct mptcp_subflow_context { backup : 1, send_mp_prio : 1, send_mp_fail : 1, - send_fastclose : 1, rx_eof : 1, can_ack : 1, /* only after processing the remote a key */ disposable : 1, /* ctx can be free at ulp release time */ @@ -457,9 +451,6 @@ struct mptcp_subflow_context { u8 stale_count; long delegated_status; - - ); - struct list_head delegated_node; /* link into delegated_action, protected by local BH */ u32 setsockopt_seq; @@ -491,13 +482,6 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) return subflow->tcp_sock; } -static inline void -mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) -{ - memset(&subflow->reset, 0, sizeof(subflow->reset)); - subflow->request_mptcp = 1; -} - static inline u64 mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) { @@ -512,6 +496,15 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) return subflow->map_seq + mptcp_subflow_get_map_offset(subflow); } +static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) +{ + sock_hold(mptcp_subflow_tcp_sock(subflow)); + spin_lock_bh(&msk->join_list_lock); + list_add_tail(&subflow->node, &msk->join_list); + spin_unlock_bh(&msk->join_list_lock); +} + void mptcp_subflow_process_delegated(struct sock *ssk); static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action) @@ -575,7 +568,6 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); -void mptcp_check_and_set_pending(struct sock *sk); void __mptcp_push_pending(struct sock *sk, unsigned int flags); bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); @@ -676,6 +668,7 @@ void __mptcp_data_acked(struct sock *sk); void __mptcp_error_report(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); +void __mptcp_flush_join_list(struct mptcp_sock *msk); static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) { return READ_ONCE(msk->snd_data_fin_enable) && @@ -725,11 +718,9 @@ void mptcp_token_destroy(struct mptcp_sock *msk); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); -u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); -void mptcp_pm_data_reset(struct mptcp_sock *msk); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); @@ -737,9 +728,7 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk); -bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk); -void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, - const struct mptcp_subflow_context *subflow); +void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id); void mptcp_pm_add_addr_received(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, @@ -749,6 +738,9 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); +int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); @@ -836,7 +828,7 @@ unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk); unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk); void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk); -void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk); +void mptcp_sockopt_sync_all(struct mptcp_sock *msk); static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb) { diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index dacf3cee00..4bb305342f 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -14,8 +14,6 @@ #include #include "protocol.h" -#define MIN_INFO_OPTLEN_SIZE 16 - static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) { sock_owned_by_me((const struct sock *)msk); @@ -390,8 +388,6 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, switch (optname) { case IPV6_V6ONLY: - case IPV6_TRANSPARENT: - case IPV6_FREEBIND: lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); if (!ssock) { @@ -400,24 +396,8 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, } ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); - if (ret != 0) { - release_sock(sk); - return ret; - } - - sockopt_seq_inc(msk); - - switch (optname) { - case IPV6_V6ONLY: + if (ret == 0) sk->sk_ipv6only = ssock->sk->sk_ipv6only; - break; - case IPV6_TRANSPARENT: - inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent; - break; - case IPV6_FREEBIND: - inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind; - break; - } release_sock(sk); break; @@ -556,7 +536,6 @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_TIMESTAMP: case TCP_NOTSENT_LOWAT: case TCP_TX_DELAY: - case TCP_INQ: return true; } @@ -568,6 +547,7 @@ static bool mptcp_supported_sockopt(int level, int optname) /* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE, * are not supported fastopen is currently unsupported */ + /* TCP_INQ is currently unsupported, needs some recvmsg work */ } return false; } @@ -615,171 +595,14 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t return ret; } -static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - int val; - - if (optlen < sizeof(int)) - return -EINVAL; - - if (copy_from_sockptr(&val, optval, sizeof(val))) - return -EFAULT; - - lock_sock(sk); - sockopt_seq_inc(msk); - msk->cork = !!val; - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - lock_sock(ssk); - __tcp_sock_set_cork(ssk, !!val); - release_sock(ssk); - } - if (!val) - mptcp_check_and_set_pending(sk); - release_sock(sk); - - return 0; -} - -static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - int val; - - if (optlen < sizeof(int)) - return -EINVAL; - - if (copy_from_sockptr(&val, optval, sizeof(val))) - return -EFAULT; - - lock_sock(sk); - sockopt_seq_inc(msk); - msk->nodelay = !!val; - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - lock_sock(ssk); - __tcp_sock_set_nodelay(ssk, !!val); - release_sock(ssk); - } - if (val) - mptcp_check_and_set_pending(sk); - release_sock(sk); - - return 0; -} - -static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) -{ - struct sock *sk = (struct sock *)msk; - struct inet_sock *issk; - struct socket *ssock; - int err; - - err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); - if (err != 0) - return err; - - lock_sock(sk); - - ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { - release_sock(sk); - return -EINVAL; - } - - issk = inet_sk(ssock->sk); - - switch (optname) { - case IP_FREEBIND: - issk->freebind = inet_sk(sk)->freebind; - break; - case IP_TRANSPARENT: - issk->transparent = inet_sk(sk)->transparent; - break; - default: - release_sock(sk); - WARN_ON_ONCE(1); - return -EOPNOTSUPP; - } - - sockopt_seq_inc(msk); - release_sock(sk); - return 0; -} - -static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - int err, val; - - err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); - - if (err != 0) - return err; - - lock_sock(sk); - sockopt_seq_inc(msk); - val = inet_sk(sk)->tos; - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - __ip_sock_set_tos(ssk, val); - } - release_sock(sk); - - return err; -} - -static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) -{ - switch (optname) { - case IP_FREEBIND: - case IP_TRANSPARENT: - return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); - case IP_TOS: - return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); - } - - return -EOPNOTSUPP; -} - static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { - struct sock *sk = (void *)msk; - int ret, val; - switch (optname) { - case TCP_INQ: - ret = mptcp_get_int_option(msk, optval, optlen, &val); - if (ret) - return ret; - if (val < 0 || val > 1) - return -EINVAL; - - lock_sock(sk); - msk->recvmsg_inq = !!val; - release_sock(sk); - return 0; case TCP_ULP: return -EOPNOTSUPP; case TCP_CONGESTION: return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); - case TCP_CORK: - return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); - case TCP_NODELAY: - return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); } return -EOPNOTSUPP; @@ -811,9 +634,6 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname, if (ssk) return tcp_setsockopt(ssk, level, optname, optval, optlen); - if (level == SOL_IP) - return mptcp_setsockopt_v4(msk, optname, optval, optlen); - if (level == SOL_IPV6) return mptcp_setsockopt_v6(msk, optname, optval, optlen); @@ -849,295 +669,6 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int return ret; } -void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) -{ - struct sock *sk = &msk->sk.icsk_inet.sk; - u32 flags = 0; - bool slow; - u8 val; - - memset(info, 0, sizeof(*info)); - - slow = lock_sock_fast(sk); - - info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); - info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); - info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); - info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); - info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk); - val = mptcp_pm_get_add_addr_signal_max(msk); - info->mptcpi_add_addr_signal_max = val; - val = mptcp_pm_get_add_addr_accept_max(msk); - info->mptcpi_add_addr_accepted_max = val; - info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk); - if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) - flags |= MPTCP_INFO_FLAG_FALLBACK; - if (READ_ONCE(msk->can_ack)) - flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; - info->mptcpi_flags = flags; - info->mptcpi_token = READ_ONCE(msk->token); - info->mptcpi_write_seq = READ_ONCE(msk->write_seq); - info->mptcpi_snd_una = READ_ONCE(msk->snd_una); - info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); - info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); - - unlock_sock_fast(sk, slow); -} -EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); - -static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) -{ - struct mptcp_info m_info; - int len; - - if (get_user(len, optlen)) - return -EFAULT; - - len = min_t(unsigned int, len, sizeof(struct mptcp_info)); - - mptcp_diag_fill_info(msk, &m_info); - - if (put_user(len, optlen)) - return -EFAULT; - - if (copy_to_user(optval, &m_info, len)) - return -EFAULT; - - return 0; -} - -static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, - char __user *optval, - u32 copied, - int __user *optlen) -{ - u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); - - if (copied) - copied += sfd->size_subflow_data; - else - copied = copylen; - - if (put_user(copied, optlen)) - return -EFAULT; - - if (copy_to_user(optval, sfd, copylen)) - return -EFAULT; - - return 0; -} - -static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, - char __user *optval, int __user *optlen) -{ - int len, copylen; - - if (get_user(len, optlen)) - return -EFAULT; - - /* if mptcp_subflow_data size is changed, need to adjust - * this function to deal with programs using old version. - */ - BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); - - if (len < MIN_INFO_OPTLEN_SIZE) - return -EINVAL; - - memset(sfd, 0, sizeof(*sfd)); - - copylen = min_t(unsigned int, len, sizeof(*sfd)); - if (copy_from_user(sfd, optval, copylen)) - return -EFAULT; - - /* size_subflow_data is u32, but len is signed */ - if (sfd->size_subflow_data > INT_MAX || - sfd->size_user > INT_MAX) - return -EINVAL; - - if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || - sfd->size_subflow_data > len) - return -EINVAL; - - if (sfd->num_subflows || sfd->size_kernel) - return -EINVAL; - - return len - sfd->size_subflow_data; -} - -static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, - int __user *optlen) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = &msk->sk.icsk_inet.sk; - unsigned int sfcount = 0, copied = 0; - struct mptcp_subflow_data sfd; - char __user *infoptr; - int len; - - len = mptcp_get_subflow_data(&sfd, optval, optlen); - if (len < 0) - return len; - - sfd.size_kernel = sizeof(struct tcp_info); - sfd.size_user = min_t(unsigned int, sfd.size_user, - sizeof(struct tcp_info)); - - infoptr = optval + sfd.size_subflow_data; - - lock_sock(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ++sfcount; - - if (len && len >= sfd.size_user) { - struct tcp_info info; - - tcp_get_info(ssk, &info); - - if (copy_to_user(infoptr, &info, sfd.size_user)) { - release_sock(sk); - return -EFAULT; - } - - infoptr += sfd.size_user; - copied += sfd.size_user; - len -= sfd.size_user; - } - } - - release_sock(sk); - - sfd.num_subflows = sfcount; - - if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) - return -EFAULT; - - return 0; -} - -static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) -{ - struct inet_sock *inet = inet_sk(sk); - - memset(a, 0, sizeof(*a)); - - if (sk->sk_family == AF_INET) { - a->sin_local.sin_family = AF_INET; - a->sin_local.sin_port = inet->inet_sport; - a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; - - if (!a->sin_local.sin_addr.s_addr) - a->sin_local.sin_addr.s_addr = inet->inet_saddr; - - a->sin_remote.sin_family = AF_INET; - a->sin_remote.sin_port = inet->inet_dport; - a->sin_remote.sin_addr.s_addr = inet->inet_daddr; -#if IS_ENABLED(CONFIG_IPV6) - } else if (sk->sk_family == AF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - - if (WARN_ON_ONCE(!np)) - return; - - a->sin6_local.sin6_family = AF_INET6; - a->sin6_local.sin6_port = inet->inet_sport; - - if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) - a->sin6_local.sin6_addr = np->saddr; - else - a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; - - a->sin6_remote.sin6_family = AF_INET6; - a->sin6_remote.sin6_port = inet->inet_dport; - a->sin6_remote.sin6_addr = sk->sk_v6_daddr; -#endif - } -} - -static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, - int __user *optlen) -{ - struct sock *sk = &msk->sk.icsk_inet.sk; - struct mptcp_subflow_context *subflow; - unsigned int sfcount = 0, copied = 0; - struct mptcp_subflow_data sfd; - char __user *addrptr; - int len; - - len = mptcp_get_subflow_data(&sfd, optval, optlen); - if (len < 0) - return len; - - sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); - sfd.size_user = min_t(unsigned int, sfd.size_user, - sizeof(struct mptcp_subflow_addrs)); - - addrptr = optval + sfd.size_subflow_data; - - lock_sock(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ++sfcount; - - if (len && len >= sfd.size_user) { - struct mptcp_subflow_addrs a; - - mptcp_get_sub_addrs(ssk, &a); - - if (copy_to_user(addrptr, &a, sfd.size_user)) { - release_sock(sk); - return -EFAULT; - } - - addrptr += sfd.size_user; - copied += sfd.size_user; - len -= sfd.size_user; - } - } - - release_sock(sk); - - sfd.num_subflows = sfcount; - - if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) - return -EFAULT; - - return 0; -} - -static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, - int __user *optlen, int val) -{ - int len; - - if (get_user(len, optlen)) - return -EFAULT; - if (len < 0) - return -EINVAL; - - if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { - unsigned char ucval = (unsigned char)val; - - len = 1; - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, &ucval, 1)) - return -EFAULT; - } else { - len = min_t(unsigned int, len, sizeof(int)); - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, &val, len)) - return -EFAULT; - } - - return 0; -} - static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, char __user *optval, int __user *optlen) { @@ -1148,44 +679,10 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_CC_INFO: return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); - case TCP_INQ: - return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); - case TCP_CORK: - return mptcp_put_int_option(msk, optval, optlen, msk->cork); - case TCP_NODELAY: - return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); } return -EOPNOTSUPP; } -static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, - char __user *optval, int __user *optlen) -{ - struct sock *sk = (void *)msk; - - switch (optname) { - case IP_TOS: - return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos); - } - - return -EOPNOTSUPP; -} - -static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, - char __user *optval, int __user *optlen) -{ - switch (optname) { - case MPTCP_INFO: - return mptcp_getsockopt_info(msk, optval, optlen); - case MPTCP_TCPINFO: - return mptcp_getsockopt_tcpinfo(msk, optval, optlen); - case MPTCP_SUBFLOW_ADDRS: - return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); - } - - return -EOPNOTSUPP; -} - int mptcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option) { @@ -1206,12 +703,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname, if (ssk) return tcp_getsockopt(ssk, level, optname, optval, option); - if (level == SOL_IP) - return mptcp_getsockopt_v4(msk, optname, optval, option); if (level == SOL_TCP) return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); - if (level == SOL_MPTCP) - return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); return -EOPNOTSUPP; } @@ -1230,7 +723,6 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) ssk->sk_priority = sk->sk_priority; ssk->sk_bound_dev_if = sk->sk_bound_dev_if; ssk->sk_incoming_cpu = sk->sk_incoming_cpu; - __ip_sock_set_tos(ssk, inet_sk(sk)->tos); if (sk->sk_userlocks & tx_rx_locks) { ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; @@ -1256,11 +748,6 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) tcp_set_congestion_control(ssk, msk->ca_name, false, true); - __tcp_sock_set_cork(ssk, !!msk->cork); - __tcp_sock_set_nodelay(ssk, !!msk->nodelay); - - inet_sk(ssk)->transparent = inet_sk(sk)->transparent; - inet_sk(ssk)->freebind = inet_sk(sk)->freebind; } static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) @@ -1285,15 +772,27 @@ void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) } } -void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) +void mptcp_sockopt_sync_all(struct mptcp_sock *msk) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + u32 seq; - msk_owned_by_me(msk); + seq = sockopt_seq_reset(sk); - if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { - sync_socket_options(msk, ssk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u32 sseq = READ_ONCE(subflow->setsockopt_seq); - subflow->setsockopt_seq = msk->setsockopt_seq; + if (sseq != msk->setsockopt_seq) { + __mptcp_sockopt_sync(msk, ssk); + WRITE_ONCE(subflow->setsockopt_seq, seq); + } else if (sseq != seq) { + WRITE_ONCE(subflow->setsockopt_seq, seq); + } + + cond_resched(); } + + msk->setsockopt_seq = seq; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index bea47a1180..6172f380df 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -388,7 +388,7 @@ static void mptcp_set_connected(struct sock *sk) if (!sock_owned_by_user(sk)) __mptcp_set_connected(sk); else - __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags); + set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags); mptcp_data_unlock(sk); } @@ -845,8 +845,9 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * bool csum_reqd) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct csum_pseudo_header header; u32 offset, seq, delta; - u16 csum; + __wsum csum; int len; if (!csum_reqd) @@ -907,11 +908,13 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * * while the pseudo header requires the original DSS data len, * including that */ - csum = __mptcp_make_csum(subflow->map_seq, - subflow->map_subflow_seq, - subflow->map_data_len + subflow->map_data_fin, - subflow->map_data_csum); - if (unlikely(csum)) { + header.data_seq = cpu_to_be64(subflow->map_seq); + header.subflow_seq = htonl(subflow->map_subflow_seq); + header.data_len = htons(subflow->map_data_len + subflow->map_data_fin); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); + if (unlikely(csum_fold(csum))) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); subflow->send_mp_fail = 1; MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); @@ -1271,7 +1274,7 @@ static void subflow_error_report(struct sock *ssk) if (!sock_owned_by_user(sk)) __mptcp_error_report(sk); else - __set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->cb_flags); + set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags); mptcp_data_unlock(sk); } @@ -1290,6 +1293,7 @@ static void subflow_data_ready(struct sock *sk) if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) return; + set_bit(MPTCP_DATA_READY, &msk->flags); parent->sk_data_ready(parent); return; } @@ -1421,8 +1425,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif - mptcp_sockopt_sync(msk, ssk); - ssk->sk_bound_dev_if = ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); if (err) @@ -1438,8 +1440,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); mptcp_info2sockaddr(remote, &addr, ssk->sk_family); - sock_hold(ssk); - list_add_tail(&subflow->node, &msk->conn_list); + mptcp_add_pending_subflow(msk, subflow); + mptcp_sockopt_sync(msk, ssk); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); if (err && err != -EINPROGRESS) goto failed_unlink; @@ -1450,7 +1452,9 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, return err; failed_unlink: + spin_lock_bh(&msk->join_list_lock); list_del(&subflow->node); + spin_unlock_bh(&msk->join_list_lock); sock_put(mptcp_subflow_tcp_sock(subflow)); failed: @@ -1529,8 +1533,10 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) * needs it. */ sf->sk->sk_net_refcnt = 1; - get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + get_net(net); +#ifdef CONFIG_PROC_FS + this_cpu_add(*net->core.sock_inuse, 1); +#endif err = tcp_set_ulp(sf->sk, "mptcp"); release_sock(sf->sk); diff --git a/net/mptcp/token.c b/net/mptcp/token.c index f52ee7b26a..e581b341c5 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -384,7 +384,6 @@ void mptcp_token_destroy(struct mptcp_sock *msk) bucket->chain_len--; } spin_unlock_bh(&bucket->lock); - WRITE_ONCE(msk->token, 0); } void __init mptcp_token_init(void) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 78814417d7..7121ce2a47 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -608,7 +608,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, bitmap = &ncf->bitmap; spin_lock_irqsave(&nc->lock, flags); - index = find_first_bit(bitmap, ncf->n_vids); + index = find_next_bit(bitmap, ncf->n_vids, 0); if (index >= ncf->n_vids) { spin_unlock_irqrestore(&nc->lock, flags); return -1; @@ -667,7 +667,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, return -1; } - index = find_first_zero_bit(bitmap, ncf->n_vids); + index = find_next_zero_bit(bitmap, ncf->n_vids, 0); if (index < 0 || index >= ncf->n_vids) { netdev_err(ndp->ndev.dev, "Channel %u already has all VLAN filters set\n", diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ddc54b6d18..92a747896f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -10,17 +10,6 @@ config NETFILTER_INGRESS This allows you to classify packets from ingress using the Netfilter infrastructure. -config NETFILTER_EGRESS - bool "Netfilter egress support" - default y - select NET_EGRESS - help - This allows you to classify packets before transmission using the - Netfilter infrastructure. - -config NETFILTER_SKIP_EGRESS - def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB) - config NETFILTER_NETLINK tristate @@ -515,6 +504,12 @@ config NFT_FLOW_OFFLOAD This option adds the "flow_offload" expression that you can use to choose what flows are placed into the hardware. +config NFT_COUNTER + tristate "Netfilter nf_tables counter module" + help + This option adds the "counter" expression that you can use to + include packet and byte counters in a rule. + config NFT_CONNLIMIT tristate "Netfilter nf_tables connlimit module" depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a135b1a460..aab20e575e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -75,7 +75,7 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \ nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o nft_last.o \ - nft_counter.o nft_chain_route.o nf_tables_offload.o \ + nft_chain_route.o nf_tables_offload.o \ nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \ nft_set_pipapo.o @@ -100,6 +100,7 @@ obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o obj-$(CONFIG_NFT_REJECT_NETDEV) += nft_reject_netdev.o obj-$(CONFIG_NFT_TUNNEL) += nft_tunnel.o +obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 354cb472f3..60332fdb6d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -316,12 +316,6 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, if (dev && dev_net(dev) == net) return &dev->nf_hooks_ingress; } -#endif -#ifdef CONFIG_NETFILTER_EGRESS - if (hooknum == NF_NETDEV_EGRESS) { - if (dev && dev_net(dev) == net) - return &dev->nf_hooks_egress; - } #endif WARN_ON_ONCE(1); return NULL; @@ -341,8 +335,7 @@ static int nf_ingress_check(struct net *net, const struct nf_hook_ops *reg, return 0; } -static inline bool __maybe_unused nf_ingress_hook(const struct nf_hook_ops *reg, - int pf) +static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf) { if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) || (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS)) @@ -351,12 +344,6 @@ static inline bool __maybe_unused nf_ingress_hook(const struct nf_hook_ops *reg, return false; } -static inline bool __maybe_unused nf_egress_hook(const struct nf_hook_ops *reg, - int pf) -{ - return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS; -} - static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf) { #ifdef CONFIG_JUMP_LABEL @@ -396,18 +383,9 @@ static int __nf_register_net_hook(struct net *net, int pf, switch (pf) { case NFPROTO_NETDEV: -#ifndef CONFIG_NETFILTER_INGRESS - if (reg->hooknum == NF_NETDEV_INGRESS) - return -EOPNOTSUPP; -#endif -#ifndef CONFIG_NETFILTER_EGRESS - if (reg->hooknum == NF_NETDEV_EGRESS) - return -EOPNOTSUPP; -#endif - if ((reg->hooknum != NF_NETDEV_INGRESS && - reg->hooknum != NF_NETDEV_EGRESS) || - !reg->dev || dev_net(reg->dev) != net) - return -EINVAL; + err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS); + if (err < 0) + return err; break; case NFPROTO_INET: if (reg->hooknum != NF_INET_INGRESS) @@ -428,21 +406,18 @@ static int __nf_register_net_hook(struct net *net, int pf, p = nf_entry_dereference(*pp); new_hooks = nf_hook_entries_grow(p, reg); - if (!IS_ERR(new_hooks)) + if (!IS_ERR(new_hooks)) { + hooks_validate(new_hooks); rcu_assign_pointer(*pp, new_hooks); + } mutex_unlock(&nf_hook_mutex); if (IS_ERR(new_hooks)) return PTR_ERR(new_hooks); - hooks_validate(new_hooks); #ifdef CONFIG_NETFILTER_INGRESS if (nf_ingress_hook(reg, pf)) net_inc_ingress_queue(); -#endif -#ifdef CONFIG_NETFILTER_EGRESS - if (nf_egress_hook(reg, pf)) - net_inc_egress_queue(); #endif nf_static_key_inc(reg, pf); @@ -500,10 +475,6 @@ static void __nf_unregister_net_hook(struct net *net, int pf, #ifdef CONFIG_NETFILTER_INGRESS if (nf_ingress_hook(reg, pf)) net_dec_ingress_queue(); -#endif -#ifdef CONFIG_NETFILTER_EGRESS - if (nf_egress_hook(reg, pf)) - net_dec_egress_queue(); #endif nf_static_key_dec(reg, pf); } else { @@ -666,29 +637,32 @@ EXPORT_SYMBOL(nf_hook_slow_list); /* This needs to be compiled in any case to avoid dependencies between the * nfnetlink_queue code and nf_conntrack. */ -const struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly; +struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly; EXPORT_SYMBOL_GPL(nfnl_ct_hook); -const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly; +struct nf_ct_hook __rcu *nf_ct_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_hook); #if IS_ENABLED(CONFIG_NF_CONNTRACK) -const struct nf_nat_hook __rcu *nf_nat_hook __read_mostly; +/* This does not belong here, but locally generated errors need it if connection + tracking in use: without this, connection may not be in hash table, and hence + manufactured ICMP or RST packets will not be associated with it. */ +void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) + __rcu __read_mostly; +EXPORT_SYMBOL(ip_ct_attach); + +struct nf_nat_hook __rcu *nf_nat_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_hook); -/* This does not belong here, but locally generated errors need it if connection - * tracking in use: without this, connection may not be in hash table, and hence - * manufactured ICMP or RST packets will not be associated with it. - */ void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) { - const struct nf_ct_hook *ct_hook; + void (*attach)(struct sk_buff *, const struct sk_buff *); if (skb->_nfct) { rcu_read_lock(); - ct_hook = rcu_dereference(nf_ct_hook); - if (ct_hook) - ct_hook->attach(new, skb); + attach = rcu_dereference(ip_ct_attach); + if (attach) + attach(new, skb); rcu_read_unlock(); } } @@ -696,7 +670,7 @@ EXPORT_SYMBOL(nf_ct_attach); void nf_conntrack_destroy(struct nf_conntrack *nfct) { - const struct nf_ct_hook *ct_hook; + struct nf_ct_hook *ct_hook; rcu_read_lock(); ct_hook = rcu_dereference(nf_ct_hook); @@ -709,7 +683,7 @@ EXPORT_SYMBOL(nf_conntrack_destroy); bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { - const struct nf_ct_hook *ct_hook; + struct nf_ct_hook *ct_hook; bool ret = false; rcu_read_lock(); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 51ad557a52..393058a43a 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1330,15 +1330,12 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * Check if outgoing packet belongs to the established ip_vs_conn. */ static unsigned int -ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) +ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) { - struct netns_ipvs *ipvs = net_ipvs(state->net); - unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; - int af = state->pf; struct sock *sk; EnterFunction(11); @@ -1471,6 +1468,56 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat return NF_ACCEPT; } +/* + * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, + * used only for VS/NAT. + * Check if packet is reply for established ip_vs_conn. + */ +static unsigned int +ip_vs_reply4(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); +} + +/* + * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. + * Check if packet is reply for established ip_vs_conn. + */ +static unsigned int +ip_vs_local_reply4(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); +} + +#ifdef CONFIG_IP_VS_IPV6 + +/* + * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, + * used only for VS/NAT. + * Check if packet is reply for established ip_vs_conn. + */ +static unsigned int +ip_vs_reply6(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); +} + +/* + * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. + * Check if packet is reply for established ip_vs_conn. + */ +static unsigned int +ip_vs_local_reply6(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); +} + +#endif + static unsigned int ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, @@ -1910,17 +1957,14 @@ static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, * and send it on its way... */ static unsigned int -ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) +ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) { - struct netns_ipvs *ipvs = net_ipvs(state->net); - unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, pkts; struct sock *sk; - int af = state->pf; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) @@ -2093,6 +2137,55 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state return ret; } +/* + * AF_INET handler in NF_INET_LOCAL_IN chain + * Schedule and forward packets from remote clients + */ +static unsigned int +ip_vs_remote_request4(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); +} + +/* + * AF_INET handler in NF_INET_LOCAL_OUT chain + * Schedule and forward packets from local clients + */ +static unsigned int +ip_vs_local_request4(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); +} + +#ifdef CONFIG_IP_VS_IPV6 + +/* + * AF_INET6 handler in NF_INET_LOCAL_IN chain + * Schedule and forward packets from remote clients + */ +static unsigned int +ip_vs_remote_request6(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); +} + +/* + * AF_INET6 handler in NF_INET_LOCAL_OUT chain + * Schedule and forward packets from local clients + */ +static unsigned int +ip_vs_local_request6(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); +} + +#endif + + /* * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP * related packets destined for 0.0.0.0/0. @@ -2106,36 +2199,45 @@ static unsigned int ip_vs_forward_icmp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - struct netns_ipvs *ipvs = net_ipvs(state->net); int r; + struct netns_ipvs *ipvs = net_ipvs(state->net); + + if (ip_hdr(skb)->protocol != IPPROTO_ICMP) + return NF_ACCEPT; /* ipvs enabled in this netns ? */ if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - if (state->pf == NFPROTO_IPV4) { - if (ip_hdr(skb)->protocol != IPPROTO_ICMP) - return NF_ACCEPT; -#ifdef CONFIG_IP_VS_IPV6 - } else { - struct ip_vs_iphdr iphdr; - - ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); - - if (iphdr.protocol != IPPROTO_ICMPV6) - return NF_ACCEPT; - - return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); -#endif - } - return ip_vs_in_icmp(ipvs, skb, &r, state->hook); } +#ifdef CONFIG_IP_VS_IPV6 +static unsigned int +ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + int r; + struct netns_ipvs *ipvs = net_ipvs(state->net); + struct ip_vs_iphdr iphdr; + + ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); + if (iphdr.protocol != IPPROTO_ICMPV6) + return NF_ACCEPT; + + /* ipvs enabled in this netns ? */ + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) + return NF_ACCEPT; + + return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); +} +#endif + + static const struct nf_hook_ops ip_vs_ops4[] = { /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_out_hook, + .hook = ip_vs_reply4, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 2, @@ -2144,21 +2246,21 @@ static const struct nf_hook_ops ip_vs_ops4[] = { * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { - .hook = ip_vs_in_hook, + .hook = ip_vs_remote_request4, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { - .hook = ip_vs_out_hook, + .hook = ip_vs_local_reply4, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { - .hook = ip_vs_in_hook, + .hook = ip_vs_local_request4, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 2, @@ -2173,7 +2275,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = { }, /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_out_hook, + .hook = ip_vs_reply4, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = 100, @@ -2184,7 +2286,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = { static const struct nf_hook_ops ip_vs_ops6[] = { /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_out_hook, + .hook = ip_vs_reply6, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 2, @@ -2193,21 +2295,21 @@ static const struct nf_hook_ops ip_vs_ops6[] = { * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { - .hook = ip_vs_in_hook, + .hook = ip_vs_remote_request6, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { - .hook = ip_vs_out_hook, + .hook = ip_vs_local_reply6, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { - .hook = ip_vs_in_hook, + .hook = ip_vs_local_request6, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 2, @@ -2215,14 +2317,14 @@ static const struct nf_hook_ops ip_vs_ops6[] = { /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ { - .hook = ip_vs_forward_icmp, + .hook = ip_vs_forward_icmp_v6, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 99, }, /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_out_hook, + .hook = ip_vs_reply6, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 100, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7f645328b4..29ec3ef63e 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -47,8 +48,6 @@ #include -MODULE_ALIAS_GENL_FAMILY(IPVS_GENL_NAME); - /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ static DEFINE_MUTEX(__ip_vs_mutex); @@ -960,7 +959,8 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, * Create a destination for the given service */ static int -ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) +ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, + struct ip_vs_dest **dest_p) { struct ip_vs_dest *dest; unsigned int atype, i; @@ -1020,6 +1020,8 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) spin_lock_init(&dest->stats.lock); __ip_vs_update_dest(svc, dest, udest, 1); + *dest_p = dest; + LeaveFunction(2); return 0; @@ -1093,7 +1095,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Allocate and initialize the dest structure */ - ret = ip_vs_new_dest(svc, udest); + ret = ip_vs_new_dest(svc, udest, &dest); } LeaveFunction(2); @@ -2015,12 +2017,6 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "run_estimation", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -4094,8 +4090,6 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_schedule_icmp; tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; - ipvs->sysctl_run_estimation = 1; - tbl[idx++].data = &ipvs->sysctl_run_estimation; #ifdef CONFIG_IP_VS_DEBUG /* Global sysctls must be ro in non-init netns */ if (!net_eq(net, &init_net)) diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 9a1a7af6a1..05b8112ffb 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -100,9 +100,6 @@ static void estimation_timer(struct timer_list *t) u64 rate; struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer); - if (!sysctl_run_estimation(ipvs)) - goto skip; - spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); @@ -134,8 +131,6 @@ static void estimation_timer(struct timer_list *t) spin_unlock(&s->lock); } spin_unlock(&ipvs->est_lock); - -skip: mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d6aa5b4703..7f79974607 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include @@ -190,7 +189,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); seqcount_spinlock_t nf_conntrack_generation __read_mostly; -static siphash_aligned_key_t nf_conntrack_hash_rnd; +static siphash_key_t nf_conntrack_hash_rnd __read_mostly; static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, unsigned int zoneid, @@ -483,7 +482,7 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); */ u32 nf_ct_get_id(const struct nf_conn *ct) { - static siphash_aligned_key_t ct_id_seed; + static __read_mostly siphash_key_t ct_id_seed; unsigned long a, b, c, d; net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); @@ -559,7 +558,7 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) -/* Released via nf_ct_destroy() */ +/* Released via destroy_conntrack() */ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags) @@ -586,7 +585,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); nf_ct_zone_add(tmpl, zone); - refcount_set(&tmpl->ct_general.use, 1); + atomic_set(&tmpl->ct_general.use, 0); return tmpl; } @@ -613,12 +612,13 @@ static void destroy_gre_conntrack(struct nf_conn *ct) #endif } -void nf_ct_destroy(struct nf_conntrack *nfct) +static void +destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - pr_debug("%s(%p)\n", __func__, ct); - WARN_ON(refcount_read(&nfct->use) != 0); + pr_debug("destroy_conntrack(%p)\n", ct); + WARN_ON(atomic_read(&nfct->use) != 0); if (unlikely(nf_ct_is_template(ct))) { nf_ct_tmpl_free(ct); @@ -643,10 +643,9 @@ void nf_ct_destroy(struct nf_conntrack *nfct) if (ct->master) nf_ct_put(ct->master); - pr_debug("%s: returning ct=%p to slab\n", __func__, ct); + pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); nf_conntrack_free(ct); } -EXPORT_SYMBOL(nf_ct_destroy); static void nf_ct_delete_from_lists(struct nf_conn *ct) { @@ -743,7 +742,7 @@ nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) /* caller must hold rcu readlock and none of the nf_conntrack_locks */ static void nf_ct_gc_expired(struct nf_conn *ct) { - if (!refcount_inc_not_zero(&ct->ct_general.use)) + if (!atomic_inc_not_zero(&ct->ct_general.use)) return; if (nf_ct_should_gc(ct)) @@ -811,7 +810,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, * in, try to obtain a reference and re-check tuple */ ct = nf_ct_tuplehash_to_ctrack(h); - if (likely(refcount_inc_not_zero(&ct->ct_general.use))) { + if (likely(atomic_inc_not_zero(&ct->ct_general.use))) { if (likely(nf_ct_key_equal(h, tuple, zone, net))) goto found; @@ -908,7 +907,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) smp_wmb(); /* The caller holds a reference to this object */ - refcount_set(&ct->ct_general.use, 2); + atomic_set(&ct->ct_general.use, 2); __nf_conntrack_hash_insert(ct, hash, reply_hash); nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert); @@ -959,7 +958,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct) { struct nf_conn_tstamp *tstamp; - refcount_inc(&ct->ct_general.use); + atomic_inc(&ct->ct_general.use); ct->status |= IPS_CONFIRMED; /* set conntrack timestamp, if enabled. */ @@ -990,7 +989,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb, nf_ct_acct_merge(ct, ctinfo, loser_ct); nf_ct_add_to_dying_list(loser_ct); - nf_ct_put(loser_ct); + nf_conntrack_put(&loser_ct->ct_general); nf_ct_set(skb, ct, ctinfo); NF_CT_STAT_INC(net, clash_resolve); @@ -1352,7 +1351,7 @@ static unsigned int early_drop_list(struct net *net, nf_ct_is_dying(tmp)) continue; - if (!refcount_inc_not_zero(&tmp->ct_general.use)) + if (!atomic_inc_not_zero(&tmp->ct_general.use)) continue; /* kill only if still in same netns -- might have moved due to @@ -1470,7 +1469,7 @@ static void gc_worker(struct work_struct *work) continue; /* need to take reference to avoid possible races */ - if (!refcount_inc_not_zero(&tmp->ct_general.use)) + if (!atomic_inc_not_zero(&tmp->ct_general.use)) continue; if (gc_worker_skip_ct(tmp)) { @@ -1563,14 +1562,16 @@ __nf_conntrack_alloc(struct net *net, ct->status = 0; WRITE_ONCE(ct->timeout, 0); write_pnet(&ct->ct_net, net); - memset_after(ct, 0, __nfct_init_offset); + memset(&ct->__nfct_init_offset, 0, + offsetof(struct nf_conn, proto) - + offsetof(struct nf_conn, __nfct_init_offset)); nf_ct_zone_add(ct, zone); /* Because we use RCU lookups, we set ct_general.use to zero before * this is inserted in any list. */ - refcount_set(&ct->ct_general.use, 0); + atomic_set(&ct->ct_general.use, 0); return ct; out: atomic_dec(&cnet->count); @@ -1595,7 +1596,7 @@ void nf_conntrack_free(struct nf_conn *ct) /* A freed object has refcnt == 0, that's * the golden rule for SLAB_TYPESAFE_BY_RCU */ - WARN_ON(refcount_read(&ct->ct_general.use) != 0); + WARN_ON(atomic_read(&ct->ct_general.use) != 0); nf_ct_ext_destroy(ct); kmem_cache_free(nf_conntrack_cachep, ct); @@ -1687,8 +1688,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); - /* Now it is inserted into the unconfirmed list, set refcount to 1. */ - refcount_set(&ct->ct_general.use, 1); + /* Now it is inserted into the unconfirmed list, bump refcount */ + nf_conntrack_get(&ct->ct_general); nf_ct_add_to_unconfirmed_list(ct); local_bh_enable(); @@ -1748,9 +1749,6 @@ resolve_normal_ct(struct nf_conn *tmpl, return 0; if (IS_ERR(h)) return PTR_ERR(h); - - ct = nf_ct_tuplehash_to_ctrack(h); - ct->local_origin = state->hook == NF_INET_LOCAL_OUT; } ct = nf_ct_tuplehash_to_ctrack(h); @@ -1922,7 +1920,7 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) /* Invalid: inverse of the return code tells * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); - nf_ct_put(ct); + nf_conntrack_put(&ct->ct_general); skb->_nfct = 0; /* Special case: TCP tracker reports an attempt to reopen a * closed/aborted connection. We have to go back and create a @@ -2088,9 +2086,9 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - const struct nf_nat_hook *nat_hook; struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; + struct nf_nat_hook *nat_hook; unsigned int status; int dataoff; u16 l3num; @@ -2303,7 +2301,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), return NULL; found: - refcount_inc(&ct->ct_general.use); + atomic_inc(&ct->ct_general.use); spin_unlock(lockp); local_bh_enable(); return ct; @@ -2458,6 +2456,7 @@ static int kill_all(struct nf_conn *i, void *data) void nf_conntrack_cleanup_start(void) { conntrack_gc_work.exiting = true; + RCU_INIT_POINTER(ip_ct_attach, NULL); } void nf_conntrack_cleanup_end(void) @@ -2593,6 +2592,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize) hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } + old_size = nf_conntrack_htable_size; old_hash = nf_conntrack_hash; nf_conntrack_hash = hash; @@ -2631,7 +2631,7 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp) static __always_inline unsigned int total_extension_size(void) { /* remember to add new extensions below */ - BUILD_BUG_ON(NF_CT_EXT_NUM > 10); + BUILD_BUG_ON(NF_CT_EXT_NUM > 9); return sizeof(struct nf_ct_ext) + sizeof(struct nf_conn_help) @@ -2654,9 +2654,6 @@ static __always_inline unsigned int total_extension_size(void) #endif #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + sizeof(struct nf_conn_synproxy) -#endif -#if IS_ENABLED(CONFIG_NET_ACT_CT) - + sizeof(struct nf_conn_act_ct_ext) #endif ; }; @@ -2775,15 +2772,16 @@ int nf_conntrack_init_start(void) return ret; } -static const struct nf_ct_hook nf_conntrack_hook = { +static struct nf_ct_hook nf_conntrack_hook = { .update = nf_conntrack_update, - .destroy = nf_ct_destroy, + .destroy = destroy_conntrack, .get_tuple_skb = nf_conntrack_get_tuple_skb, - .attach = nf_conntrack_attach, }; void nf_conntrack_init_end(void) { + /* For use by REJECT target */ + RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook); } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 96948e98ec..f562eeef42 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_hash); unsigned int nf_ct_expect_max __read_mostly; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; -static siphash_aligned_key_t nf_ct_expect_hashrnd; +static siphash_key_t nf_ct_expect_hashrnd __read_mostly; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, @@ -203,12 +203,12 @@ nf_ct_find_expectation(struct net *net, * about to invoke ->destroy(), or nf_ct_delete() via timeout * or early_drop(). * - * The refcount_inc_not_zero() check tells: If that fails, we + * The atomic_inc_not_zero() check tells: If that fails, we * know that the ct is being destroyed. If it succeeds, we * can be sure the ct cannot disappear underneath. */ if (unlikely(nf_ct_is_dying(exp->master) || - !refcount_inc_not_zero(&exp->master->ct_general.use))) + !atomic_inc_not_zero(&exp->master->ct_general.use))) return NULL; if (exp->flags & NF_CT_EXPECT_PERMANENT) { diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 55415f0119..7f19ee2596 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -20,14 +20,13 @@ #include #include -#define HELPER_NAME "netbios-ns" #define NMBD_PORT 137 MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_netbios_ns"); -MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); +MODULE_ALIAS_NFCT_HELPER("netbios_ns"); static unsigned int timeout __read_mostly = 3; module_param(timeout, uint, 0400); @@ -45,7 +44,7 @@ static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper helper __read_mostly = { - .name = HELPER_NAME, + .name = "netbios-ns", .tuple.src.l3num = NFPROTO_IPV4, .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), .tuple.dst.protonum = IPPROTO_UDP, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7032402ffd..1c02be04aa 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -508,7 +508,7 @@ static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_USE, htonl(refcount_read(&ct->ct_general.use)))) + if (nla_put_be32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use)))) goto nla_put_failure; return 0; @@ -1195,10 +1195,12 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) } hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], hnnode) { + if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { if (i < ARRAY_SIZE(nf_ct_evict) && - refcount_inc_not_zero(&ct->ct_general.use)) + atomic_inc_not_zero(&ct->ct_general.use)) nf_ct_evict[i++] = ct; continue; } @@ -1206,9 +1208,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) if (!net_eq(net, nf_ct_net(ct))) continue; - if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; - if (cb->args[1]) { if (ct != last) continue; @@ -1747,9 +1746,9 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), - ct, dying, 0); + ct, dying ? true : false, 0); if (res < 0) { - if (!refcount_inc_not_zero(&ct->ct_general.use)) + if (!atomic_inc_not_zero(&ct->ct_general.use)) continue; cb->args[0] = cpu; cb->args[1] = (unsigned long)ct; @@ -1820,7 +1819,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, const struct nlattr *attr) __must_hold(RCU) { - const struct nf_nat_hook *nat_hook; + struct nf_nat_hook *nat_hook; int err; nat_hook = rcu_dereference(nf_nat_hook); @@ -2923,7 +2922,7 @@ static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct, nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff); } -static const struct nfnl_ct_hook ctnetlink_glue_hook = { +static struct nfnl_ct_hook ctnetlink_glue_hook = { .build_size = ctnetlink_glue_build_size, .build = ctnetlink_glue_build, .parse = ctnetlink_glue_parse, @@ -2997,7 +2996,7 @@ static const union nf_inet_addr any_addr; static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) { - static siphash_aligned_key_t exp_id_seed; + static __read_mostly siphash_key_t exp_id_seed; unsigned long a, b, c, d; net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index d1f2d3c8d2..8f7a983734 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -155,16 +155,6 @@ unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff, } EXPORT_SYMBOL_GPL(nf_confirm); -static bool in_vrf_postrouting(const struct nf_hook_state *state) -{ -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - if (state->hook == NF_INET_POST_ROUTING && - netif_is_l3_master(state->out)) - return true; -#endif - return false; -} - static unsigned int ipv4_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -176,9 +166,6 @@ static unsigned int ipv4_confirm(void *priv, if (!ct || ctinfo == IP_CT_RELATED_REPLY) return nf_conntrack_confirm(skb); - if (in_vrf_postrouting(state)) - return NF_ACCEPT; - return nf_confirm(skb, skb_network_offset(skb) + ip_hdrlen(skb), ct, ctinfo); @@ -387,9 +374,6 @@ static unsigned int ipv6_confirm(void *priv, if (!ct || ctinfo == IP_CT_RELATED_REPLY) return nf_conntrack_confirm(skb); - if (in_vrf_postrouting(state)) - return NF_ACCEPT; - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index d1582b888c..af5115e127 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -446,32 +446,6 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } -static void tcp_init_sender(struct ip_ct_tcp_state *sender, - struct ip_ct_tcp_state *receiver, - const struct sk_buff *skb, - unsigned int dataoff, - const struct tcphdr *tcph, - u32 end, u32 win) -{ - /* SYN-ACK in reply to a SYN - * or SYN from reply direction in simultaneous open. - */ - sender->td_end = - sender->td_maxend = end; - sender->td_maxwin = (win == 0 ? 1 : win); - - tcp_options(skb, dataoff, tcph, sender); - /* RFC 1323: - * Both sides must send the Window Scale option - * to enable window scaling in either direction. - */ - if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && - receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) { - sender->td_scale = 0; - receiver->td_scale = 0; - } -} - static bool tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, unsigned int index, @@ -525,9 +499,24 @@ static bool tcp_in_window(struct nf_conn *ct, * Initialize sender data. */ if (tcph->syn) { - tcp_init_sender(sender, receiver, - skb, dataoff, tcph, - end, win); + /* + * SYN-ACK in reply to a SYN + * or SYN from reply direction in simultaneous open. + */ + sender->td_end = + sender->td_maxend = end; + sender->td_maxwin = (win == 0 ? 1 : win); + + tcp_options(skb, dataoff, tcph, sender); + /* + * RFC 1323: + * Both sides must send the Window Scale option + * to enable window scaling in either direction. + */ + if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE + && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) + sender->td_scale = + receiver->td_scale = 0; if (!tcph->ack) /* Simultaneous open */ return true; @@ -571,18 +560,6 @@ static bool tcp_in_window(struct nf_conn *ct, sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); - } else if (tcph->syn && dir == IP_CT_DIR_REPLY && - state->state == TCP_CONNTRACK_SYN_SENT) { - /* Retransmitted syn-ack, or syn (simultaneous open). - * - * Re-init state for this direction, just like for the first - * syn(-ack) reply, it might differ in seq, ack or tcp options. - */ - tcp_init_sender(sender, receiver, - skb, dataoff, tcph, - end, win); - if (!tcph->ack) - return true; } if (!(tcph->ack)) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 3e1afd10a9..80f675d884 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -303,7 +303,7 @@ static int ct_seq_show(struct seq_file *s, void *v) int ret = 0; WARN_ON(!ct); - if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use))) + if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) return 0; if (nf_ct_should_gc(ct)) { @@ -370,7 +370,7 @@ static int ct_seq_show(struct seq_file *s, void *v) ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR); ct_show_delta_time(s, ct); - seq_printf(s, "use=%u\n", refcount_read(&ct->ct_general.use)); + seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); if (seq_has_overflowed(s)) goto release; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index b90eca7a2f..ed37bb9b4e 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -48,7 +48,7 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct) struct flow_offload *flow; if (unlikely(nf_ct_is_dying(ct) || - !refcount_inc_not_zero(&ct->ct_general.use))) + !atomic_inc_not_zero(&ct->ct_general.use))) return NULL; flow = kzalloc(sizeof(*flow), GFP_ATOMIC); diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 5c57ade6bd..bc4126d8ef 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -54,30 +54,8 @@ static struct nf_flowtable_type flowtable_inet = { .owner = THIS_MODULE, }; -static struct nf_flowtable_type flowtable_ipv4 = { - .family = NFPROTO_IPV4, - .init = nf_flow_table_init, - .setup = nf_flow_table_offload_setup, - .action = nf_flow_rule_route_ipv4, - .free = nf_flow_table_free, - .hook = nf_flow_offload_ip_hook, - .owner = THIS_MODULE, -}; - -static struct nf_flowtable_type flowtable_ipv6 = { - .family = NFPROTO_IPV6, - .init = nf_flow_table_init, - .setup = nf_flow_table_offload_setup, - .action = nf_flow_rule_route_ipv6, - .free = nf_flow_table_free, - .hook = nf_flow_offload_ipv6_hook, - .owner = THIS_MODULE, -}; - static int __init nf_flow_inet_module_init(void) { - nft_register_flowtable_type(&flowtable_ipv4); - nft_register_flowtable_type(&flowtable_ipv6); nft_register_flowtable_type(&flowtable_inet); return 0; @@ -86,8 +64,6 @@ static int __init nf_flow_inet_module_init(void) static void __exit nf_flow_inet_module_exit(void) { nft_unregister_flowtable_type(&flowtable_inet); - nft_unregister_flowtable_type(&flowtable_ipv6); - nft_unregister_flowtable_type(&flowtable_ipv4); } module_init(nf_flow_inet_module_init); @@ -95,7 +71,5 @@ module_exit(nf_flow_inet_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); -MODULE_ALIAS_NF_FLOWTABLE(AF_INET); -MODULE_ALIAS_NF_FLOWTABLE(AF_INET6); MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */ MODULE_DESCRIPTION("Netfilter flow table mixed IPv4/IPv6 module"); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 2d06a66899..2731176839 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -34,7 +34,7 @@ static unsigned int nat_net_id __read_mostly; static struct hlist_head *nf_nat_bysource __read_mostly; static unsigned int nf_nat_htable_size __read_mostly; -static siphash_aligned_key_t nf_nat_hash_rnd; +static siphash_key_t nf_nat_hash_rnd __read_mostly; struct nf_nat_lookup_hook_priv { struct nf_hook_entries __rcu *entries; @@ -494,38 +494,6 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, goto another_round; } -static bool tuple_force_port_remap(const struct nf_conntrack_tuple *tuple) -{ - u16 sp, dp; - - switch (tuple->dst.protonum) { - case IPPROTO_TCP: - sp = ntohs(tuple->src.u.tcp.port); - dp = ntohs(tuple->dst.u.tcp.port); - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - sp = ntohs(tuple->src.u.udp.port); - dp = ntohs(tuple->dst.u.udp.port); - break; - default: - return false; - } - - /* IANA: System port range: 1-1023, - * user port range: 1024-49151, - * private port range: 49152-65535. - * - * Linux default ephemeral port range is 32768-60999. - * - * Enforce port remapping if sport is significantly lower - * than dport to prevent NAT port shadowing, i.e. - * accidental match of 'new' inbound connection vs. - * existing outbound one. - */ - return sp < 16384 && dp >= 32768; -} - /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, * we change the source to map into the range. For NF_INET_PRE_ROUTING * and NF_INET_LOCAL_OUT, we change the destination to map into the @@ -539,17 +507,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { - bool random_port = range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL; const struct nf_conntrack_zone *zone; struct net *net = nf_ct_net(ct); zone = nf_ct_zone(ct); - if (maniptype == NF_NAT_MANIP_SRC && - !random_port && - !ct->local_origin) - random_port = tuple_force_port_remap(orig_tuple); - /* 1) If this srcip/proto/src-proto-part is currently mapped, * and that same mapping gives a unique tuple within the given * range, use that. @@ -558,7 +520,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, * So far, we don't do local source mappings, so multiple * manips not an issue. */ - if (maniptype == NF_NAT_MANIP_SRC && !random_port) { + if (maniptype == NF_NAT_MANIP_SRC && + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ if (in_range(orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { @@ -582,7 +545,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, */ /* Only bother mapping if it's not already in range and unique */ - if (!random_port) { + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, @@ -736,16 +699,6 @@ unsigned int nf_nat_packet(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(nf_nat_packet); -static bool in_vrf_postrouting(const struct nf_hook_state *state) -{ -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - if (state->hook == NF_INET_POST_ROUTING && - netif_is_l3_master(state->out)) - return true; -#endif - return false; -} - unsigned int nf_nat_inet_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -762,7 +715,7 @@ nf_nat_inet_fn(void *priv, struct sk_buff *skb, * packet filter it out, or implement conntrack/NAT for that * protocol. 8) --RR */ - if (!ct || in_vrf_postrouting(state)) + if (!ct) return NF_ACCEPT; nat = nfct_nat(ct); @@ -1167,7 +1120,7 @@ static struct pernet_operations nat_net_ops = { .size = sizeof(struct nat_net), }; -static const struct nf_nat_hook nat_hook = { +static struct nf_nat_hook nat_hook = { .parse_nat_setup = nfnetlink_parse_nat_setup, #ifdef CONFIG_XFRM .decode_session = __nf_nat_decode_session, diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index e32fac3746..acd73f717a 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -12,7 +12,6 @@ struct masq_dev_work { struct work_struct work; struct net *net; - netns_tracker ns_tracker; union nf_inet_addr addr; int ifindex; int (*iter)(struct nf_conn *i, void *data); @@ -83,7 +82,7 @@ static void iterate_cleanup_work(struct work_struct *work) nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0); - put_net_track(w->net, &w->ns_tracker); + put_net(w->net); kfree(w); atomic_dec(&masq_worker_count); module_put(THIS_MODULE); @@ -120,7 +119,6 @@ static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr, INIT_WORK(&w->work, iterate_cleanup_work); w->ifindex = ifindex; w->net = net; - netns_tracker_alloc(net, &w->ns_tracker, gfp_flags); w->iter = iter; if (addr) w->addr = *addr; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 6d12afabfe..63d1516816 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -46,6 +46,15 @@ void nf_unregister_queue_handler(void) } EXPORT_SYMBOL(nf_unregister_queue_handler); +static void nf_queue_sock_put(struct sock *sk) +{ +#ifdef CONFIG_INET + sock_gen_put(sk); +#else + sock_put(sk); +#endif +} + static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; @@ -54,7 +63,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) dev_put(state->in); dev_put(state->out); if (state->sk) - sock_put(state->sk); + nf_queue_sock_put(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dev_put(entry->physin); @@ -87,19 +96,21 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry) } /* Bump dev refs so they don't vanish while packet is out */ -void nf_queue_entry_get_refs(struct nf_queue_entry *entry) +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; + if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt)) + return false; + dev_hold(state->in); dev_hold(state->out); - if (state->sk) - sock_hold(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dev_hold(entry->physin); dev_hold(entry->physout); #endif + return true; } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); @@ -169,6 +180,18 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, break; } + if (skb_sk_is_prefetched(skb)) { + struct sock *sk = skb->sk; + + if (!sk_is_refcounted(sk)) { + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + return -ENOTCONN; + + /* drop refcount on skb_orphan */ + skb->destructor = sock_edemux; + } + } + entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC); if (!entry) return -ENOMEM; @@ -187,7 +210,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, __nf_queue_entry_init_physdevs(entry); - nf_queue_entry_get_refs(entry); + if (!nf_queue_entry_get_refs(entry)) { + kfree(entry); + return -ENOTCONN; + } switch (entry->state.pf) { case AF_INET: diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 2dfc5dae06..3d6d49420d 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -349,6 +349,7 @@ static int __net_init synproxy_net_init(struct net *net) goto err2; __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); snet->tmpl = ct; snet->stats = alloc_percpu(struct synproxy_stats); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9cd1d7a628..2b2e0210a7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1747,16 +1747,16 @@ static void nft_chain_stats_replace(struct nft_trans *trans) static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) { - struct nft_rule_blob *g0 = rcu_dereference_raw(chain->blob_gen_0); - struct nft_rule_blob *g1 = rcu_dereference_raw(chain->blob_gen_1); + struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0); + struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1); if (g0 != g1) kvfree(g1); kvfree(g0); /* should be NULL either via abort or via successful commit */ - WARN_ON_ONCE(chain->blob_next); - kvfree(chain->blob_next); + WARN_ON_ONCE(chain->rules_next); + kvfree(chain->rules_next); } void nf_tables_chain_destroy(struct nft_ctx *ctx) @@ -2002,38 +2002,23 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook) struct nft_rules_old { struct rcu_head h; - struct nft_rule_blob *blob; + struct nft_rule **start; }; -static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr) +static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain, + unsigned int alloc) { - struct nft_rule_dp *prule; - - prule = (struct nft_rule_dp *)ptr; - prule->is_last = 1; - /* blob size does not include the trailer rule */ -} - -static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size) -{ - struct nft_rule_blob *blob; - - /* size must include room for the last rule */ - if (size < offsetof(struct nft_rule_dp, data)) + if (alloc > INT_MAX) return NULL; - size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rules_old); - if (size > INT_MAX) + alloc += 1; /* NULL, ends rules */ + if (sizeof(struct nft_rule *) > INT_MAX / alloc) return NULL; - blob = kvmalloc(size, GFP_KERNEL); - if (!blob) - return NULL; + alloc *= sizeof(struct nft_rule *); + alloc += sizeof(struct nft_rules_old); - blob->size = 0; - nft_last_rule(blob, blob->data); - - return blob; + return kvmalloc(alloc, GFP_KERNEL); } static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family, @@ -2106,10 +2091,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_stats __percpu *stats; struct net *net = ctx->net; char name[NFT_NAME_MAXLEN]; - struct nft_rule_blob *blob; struct nft_trans *trans; struct nft_chain *chain; - unsigned int data_size; + struct nft_rule **rules; int err; if (table->use == UINT_MAX) @@ -2194,15 +2178,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]); } - data_size = offsetof(struct nft_rule_dp, data); /* last rule */ - blob = nf_tables_chain_alloc_rules(data_size); - if (!blob) { + rules = nf_tables_chain_alloc_rules(chain, 0); + if (!rules) { err = -ENOMEM; goto err_destroy_chain; } - RCU_INIT_POINTER(chain->blob_gen_0, blob); - RCU_INIT_POINTER(chain->blob_gen_1, blob); + *rules = NULL; + rcu_assign_pointer(chain->rules_gen_0, rules); + rcu_assign_pointer(chain->rules_gen_1, rules); err = nf_tables_register_hook(net, table, chain); if (err < 0) @@ -4502,7 +4486,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { list_del_rcu(&catchall->list); nft_set_elem_destroy(set, catchall->elem, true); - kfree_rcu(catchall); + kfree_rcu(catchall, rcu); } } @@ -5669,7 +5653,7 @@ static void nft_setelem_catchall_remove(const struct net *net, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { if (catchall->elem == elem->priv) { list_del_rcu(&catchall->list); - kfree_rcu(catchall); + kfree_rcu(catchall, rcu); break; } } @@ -8262,82 +8246,32 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) { - const struct nft_expr *expr, *last; - struct nft_regs_track track = {}; - unsigned int size, data_size; - void *data, *data_boundary; - struct nft_rule_dp *prule; struct nft_rule *rule; + unsigned int alloc = 0; + int i; /* already handled or inactive chain? */ - if (chain->blob_next || !nft_is_active_next(net, chain)) + if (chain->rules_next || !nft_is_active_next(net, chain)) return 0; rule = list_entry(&chain->rules, struct nft_rule, list); - - data_size = 0; - list_for_each_entry_continue(rule, &chain->rules, list) { - if (nft_is_active_next(net, rule)) { - data_size += sizeof(*prule) + rule->dlen; - if (data_size > INT_MAX) - return -ENOMEM; - } - } - data_size += offsetof(struct nft_rule_dp, data); /* last rule */ - - chain->blob_next = nf_tables_chain_alloc_rules(data_size); - if (!chain->blob_next) - return -ENOMEM; - - data = (void *)chain->blob_next->data; - data_boundary = data + data_size; - size = 0; + i = 0; list_for_each_entry_continue(rule, &chain->rules, list) { - if (!nft_is_active_next(net, rule)) - continue; - - prule = (struct nft_rule_dp *)data; - data += offsetof(struct nft_rule_dp, data); - if (WARN_ON_ONCE(data > data_boundary)) - return -ENOMEM; - - size = 0; - track.last = nft_expr_last(rule); - nft_rule_for_each_expr(expr, last, rule) { - track.cur = expr; - - if (expr->ops->reduce && - expr->ops->reduce(&track, expr)) { - expr = track.cur; - continue; - } - - if (WARN_ON_ONCE(data + expr->ops->size > data_boundary)) - return -ENOMEM; - - memcpy(data + size, expr, expr->ops->size); - size += expr->ops->size; - } - if (WARN_ON_ONCE(size >= 1 << 12)) - return -ENOMEM; - - prule->handle = rule->handle; - prule->dlen = size; - prule->is_last = 0; - - data += size; - size = 0; - chain->blob_next->size += (unsigned long)(data - (void *)prule); + if (nft_is_active_next(net, rule)) + alloc++; } - prule = (struct nft_rule_dp *)data; - data += offsetof(struct nft_rule_dp, data); - if (WARN_ON_ONCE(data > data_boundary)) + chain->rules_next = nf_tables_chain_alloc_rules(chain, alloc); + if (!chain->rules_next) return -ENOMEM; - nft_last_rule(chain->blob_next, prule); + list_for_each_entry_continue(rule, &chain->rules, list) { + if (nft_is_active_next(net, rule)) + chain->rules_next[i++] = rule; + } + chain->rules_next[i] = NULL; return 0; } @@ -8351,8 +8285,8 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net) if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { - kvfree(chain->blob_next); - chain->blob_next = NULL; + kvfree(chain->rules_next); + chain->rules_next = NULL; } } } @@ -8361,34 +8295,38 @@ static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h) { struct nft_rules_old *o = container_of(h, struct nft_rules_old, h); - kvfree(o->blob); + kvfree(o->start); } -static void nf_tables_commit_chain_free_rules_old(struct nft_rule_blob *blob) +static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules) { + struct nft_rule **r = rules; struct nft_rules_old *old; - /* rcu_head is after end marker */ - old = (void *)blob + sizeof(*blob) + blob->size; - old->blob = blob; + while (*r) + r++; + + r++; /* rcu_head is after end marker */ + old = (void *) r; + old->start = rules; call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old); } static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) { - struct nft_rule_blob *g0, *g1; + struct nft_rule **g0, **g1; bool next_genbit; next_genbit = nft_gencursor_next(net); - g0 = rcu_dereference_protected(chain->blob_gen_0, + g0 = rcu_dereference_protected(chain->rules_gen_0, lockdep_commit_lock_is_held(net)); - g1 = rcu_dereference_protected(chain->blob_gen_1, + g1 = rcu_dereference_protected(chain->rules_gen_1, lockdep_commit_lock_is_held(net)); /* No changes to this chain? */ - if (chain->blob_next == NULL) { + if (chain->rules_next == NULL) { /* chain had no change in last or next generation */ if (g0 == g1) return; @@ -8397,10 +8335,10 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) * one uses same rules as current generation. */ if (next_genbit) { - rcu_assign_pointer(chain->blob_gen_1, g0); + rcu_assign_pointer(chain->rules_gen_1, g0); nf_tables_commit_chain_free_rules_old(g1); } else { - rcu_assign_pointer(chain->blob_gen_0, g1); + rcu_assign_pointer(chain->rules_gen_0, g1); nf_tables_commit_chain_free_rules_old(g0); } @@ -8408,11 +8346,11 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) } if (next_genbit) - rcu_assign_pointer(chain->blob_gen_1, chain->blob_next); + rcu_assign_pointer(chain->rules_gen_1, chain->rules_next); else - rcu_assign_pointer(chain->blob_gen_0, chain->blob_next); + rcu_assign_pointer(chain->rules_gen_0, chain->rules_next); - chain->blob_next = NULL; + chain->rules_next = NULL; if (g0 == g1) return; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 36e73f9828..866cfba04d 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -38,7 +38,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, static inline void nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - const struct nft_rule_dp *rule, + const struct nft_rule *rule, enum nft_trace_types type) { if (static_branch_unlikely(&nft_trace_enabled)) { @@ -67,36 +67,6 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } -static noinline void __nft_trace_verdict(struct nft_traceinfo *info, - const struct nft_chain *chain, - const struct nft_regs *regs) -{ - enum nft_trace_types type; - - switch (regs->verdict.code) { - case NFT_CONTINUE: - case NFT_RETURN: - type = NFT_TRACETYPE_RETURN; - break; - default: - type = NFT_TRACETYPE_RULE; - break; - } - - __nft_trace_packet(info, chain, type); -} - -static inline void nft_trace_verdict(struct nft_traceinfo *info, - const struct nft_chain *chain, - const struct nft_rule_dp *rule, - const struct nft_regs *regs) -{ - if (static_branch_unlikely(&nft_trace_enabled)) { - info->rule = rule; - __nft_trace_verdict(info, chain, regs); - } -} - static bool nft_payload_fast_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -109,7 +79,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb); else { - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!pkt->tprot_set) return false; ptr = skb_network_header(skb) + nft_thoff(pkt); } @@ -140,6 +110,7 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain, base_chain = nft_base_chain(chain); + rcu_read_lock(); pstats = READ_ONCE(base_chain->stats); if (pstats) { local_bh_disable(); @@ -150,12 +121,12 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain, u64_stats_update_end(&stats->syncp); local_bh_enable(); } + rcu_read_unlock(); } struct nft_jumpstack { - const struct nft_chain *chain; - const struct nft_rule_dp *rule; - const struct nft_rule_dp *last_rule; + const struct nft_chain *chain; + struct nft_rule *const *rules; }; static void expr_call_ops_eval(const struct nft_expr *expr, @@ -170,7 +141,6 @@ static void expr_call_ops_eval(const struct nft_expr *expr, X(e, nft_payload_eval); X(e, nft_cmp_eval); - X(e, nft_counter_eval); X(e, nft_meta_get_eval); X(e, nft_lookup_eval); X(e, nft_range_eval); @@ -184,28 +154,18 @@ static void expr_call_ops_eval(const struct nft_expr *expr, expr->ops->eval(expr, regs, pkt); } -#define nft_rule_expr_first(rule) (struct nft_expr *)&rule->data[0] -#define nft_rule_expr_next(expr) ((void *)expr) + expr->ops->size -#define nft_rule_expr_last(rule) (struct nft_expr *)&rule->data[rule->dlen] -#define nft_rule_next(rule) (void *)rule + sizeof(*rule) + rule->dlen - -#define nft_rule_dp_for_each_expr(expr, last, rule) \ - for ((expr) = nft_rule_expr_first(rule), (last) = nft_rule_expr_last(rule); \ - (expr) != (last); \ - (expr) = nft_rule_expr_next(expr)) - unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv) { const struct nft_chain *chain = priv, *basechain = chain; - const struct nft_rule_dp *rule, *last_rule; const struct net *net = nft_net(pkt); + struct nft_rule *const *rules; + const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_regs regs; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; bool genbit = READ_ONCE(net->nft.gencursor); - struct nft_rule_blob *blob; struct nft_traceinfo info; info.trace = false; @@ -213,16 +173,16 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: if (genbit) - blob = rcu_dereference(chain->blob_gen_1); + rules = rcu_dereference(chain->rules_gen_1); else - blob = rcu_dereference(chain->blob_gen_0); + rules = rcu_dereference(chain->rules_gen_0); - rule = (struct nft_rule_dp *)blob->data; - last_rule = (void *)blob->data + blob->size; next_rule: + rule = *rules; regs.verdict.code = NFT_CONTINUE; - for (; rule < last_rule; rule = nft_rule_next(rule)) { - nft_rule_dp_for_each_expr(expr, last, rule) { + for (; *rules ; rules++) { + rule = *rules; + nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, ®s); else if (expr->ops == &nft_bitwise_fast_ops) @@ -247,13 +207,13 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) break; } - nft_trace_verdict(&info, chain, rule, ®s); - switch (regs.verdict.code & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_DROP: case NF_QUEUE: case NF_STOLEN: + nft_trace_packet(&info, chain, rule, + NFT_TRACETYPE_RULE); return regs.verdict.code; } @@ -262,25 +222,28 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE)) return NF_DROP; jumpstack[stackptr].chain = chain; - jumpstack[stackptr].rule = nft_rule_next(rule); - jumpstack[stackptr].last_rule = last_rule; + jumpstack[stackptr].rules = rules + 1; stackptr++; fallthrough; case NFT_GOTO: + nft_trace_packet(&info, chain, rule, + NFT_TRACETYPE_RULE); + chain = regs.verdict.chain; goto do_chain; case NFT_CONTINUE: case NFT_RETURN: + nft_trace_packet(&info, chain, rule, + NFT_TRACETYPE_RETURN); break; default: - WARN_ON_ONCE(1); + WARN_ON(1); } if (stackptr > 0) { stackptr--; chain = jumpstack[stackptr].chain; - rule = jumpstack[stackptr].rule; - last_rule = jumpstack[stackptr].last_rule; + rules = jumpstack[stackptr].rules; goto next_rule; } @@ -306,22 +269,18 @@ static struct nft_expr_type *nft_basic_types[] = { &nft_rt_type, &nft_exthdr_type, &nft_last_type, - &nft_counter_type, }; static struct nft_object_type *nft_basic_objects[] = { #ifdef CONFIG_NETWORK_SECMARK &nft_secmark_obj_type, #endif - &nft_counter_obj_type, }; int __init nf_tables_core_module_init(void) { int err, i, j = 0; - nft_counter_init_seqcount(); - for (i = 0; i < ARRAY_SIZE(nft_basic_objects); i++) { err = nft_register_obj(nft_basic_objects[i]); if (err) diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 5041725423..e4fe2f0780 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -113,13 +113,13 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, int off = skb_network_offset(skb); unsigned int len, nh_end; - nh_end = pkt->flags & NFT_PKTINFO_L4PROTO ? nft_thoff(pkt) : skb->len; + nh_end = pkt->tprot_set ? nft_thoff(pkt) : skb->len; len = min_t(unsigned int, nh_end - skb_network_offset(skb), NFT_TRACETYPE_NETWORK_HSIZE); if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) return -1; - if (pkt->flags & NFT_PKTINFO_L4PROTO) { + if (pkt->tprot_set) { len = min_t(unsigned int, skb->len - nft_thoff(pkt), NFT_TRACETYPE_TRANSPORT_HSIZE); if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, @@ -142,7 +142,7 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, static int nf_trace_fill_rule_info(struct sk_buff *nlskb, const struct nft_traceinfo *info) { - if (!info->rule || info->rule->is_last) + if (!info->rule) return 0; /* a continue verdict with ->type == RETURN means that this is diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 71e29adac4..f554e2ea32 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -186,7 +185,7 @@ static const struct nf_hook_entries * nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev) { const struct nf_hook_entries *hook_head = NULL; -#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) +#ifdef CONFIG_NETFILTER_INGRESS struct net_device *netdev; #endif @@ -222,9 +221,9 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); break; #endif -#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) +#ifdef CONFIG_NETFILTER_INGRESS case NFPROTO_NETDEV: - if (hook >= NF_NETDEV_NUMHOOKS) + if (hook != NF_NETDEV_INGRESS) return ERR_PTR(-EOPNOTSUPP); if (!dev) @@ -234,15 +233,7 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de if (!netdev) return ERR_PTR(-ENODEV); -#ifdef CONFIG_NETFILTER_INGRESS - if (hook == NF_NETDEV_INGRESS) - return rcu_dereference(netdev->nf_hooks_ingress); -#endif -#ifdef CONFIG_NETFILTER_EGRESS - if (hook == NF_NETDEV_EGRESS) - return rcu_dereference(netdev->nf_hooks_egress); -#endif - fallthrough; + return rcu_dereference(netdev->nf_hooks_ingress); #endif default: return ERR_PTR(-EPROTONOSUPPORT); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index ae9c0756bb..7f83f9697f 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -66,7 +66,6 @@ struct nfulnl_instance { struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; struct net *net; - netns_tracker ns_tracker; struct user_namespace *peer_user_ns; /* User namespace of the peer process */ u32 peer_portid; /* PORTID of the peer process */ @@ -141,7 +140,7 @@ static void nfulnl_instance_free_rcu(struct rcu_head *head) struct nfulnl_instance *inst = container_of(head, struct nfulnl_instance, rcu); - put_net_track(inst->net, &inst->ns_tracker); + put_net(inst->net); kfree(inst); module_put(THIS_MODULE); } @@ -188,7 +187,7 @@ instance_create(struct net *net, u_int16_t group_num, timer_setup(&inst->timer, nfulnl_timer, 0); - inst->net = get_net_track(net, &inst->ns_tracker, GFP_ATOMIC); + inst->net = get_net(net); inst->peer_user_ns = user_ns; inst->peer_portid = portid; inst->group_num = group_num; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index ea2d9c2a44..8787d0613a 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -225,7 +225,7 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict) { - const struct nf_ct_hook *ct_hook; + struct nf_ct_hook *ct_hook; int err; if (verdict == NF_ACCEPT || @@ -387,8 +387,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct net_device *indev; struct net_device *outdev; struct nf_conn *ct = NULL; - enum ip_conntrack_info ctinfo = 0; - const struct nfnl_ct_hook *nfnl_ct; + enum ip_conntrack_info ctinfo; + struct nfnl_ct_hook *nfnl_ct; bool csum_verify; char *secdata = NULL; u32 seclen = 0; @@ -710,9 +710,15 @@ static struct nf_queue_entry * nf_queue_entry_dup(struct nf_queue_entry *e) { struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); - if (entry) - nf_queue_entry_get_refs(entry); - return entry; + + if (!entry) + return NULL; + + if (nf_queue_entry_get_refs(entry)) + return entry; + + kfree(entry); + return NULL; } #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) @@ -1104,7 +1110,7 @@ static int nfqnl_recv_verdict_batch(struct sk_buff *skb, return 0; } -static struct nf_conn *nfqnl_ct_parse(const struct nfnl_ct_hook *nfnl_ct, +static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct, const struct nlmsghdr *nlh, const struct nlattr * const nfqa[], struct nf_queue_entry *entry, @@ -1171,11 +1177,11 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info, { struct nfnl_queue_net *q = nfnl_queue_pernet(info->net); u_int16_t queue_num = ntohs(info->nfmsg->res_id); - const struct nfnl_ct_hook *nfnl_ct; struct nfqnl_msg_verdict_hdr *vhdr; enum ip_conntrack_info ctinfo; struct nfqnl_instance *queue; struct nf_queue_entry *entry; + struct nfnl_ct_hook *nfnl_ct; struct nf_conn *ct = NULL; unsigned int verdict; int err; @@ -1528,9 +1534,15 @@ static void __net_exit nfnl_queue_net_exit(struct net *net) WARN_ON_ONCE(!hlist_empty(&q->instance_table[i])); } +static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) +{ + synchronize_rcu(); +} + static struct pernet_operations nfnl_queue_net_ops = { .init = nfnl_queue_net_init, .exit = nfnl_queue_net_exit, + .exit_batch = nfnl_queue_net_exit_batch, .id = &nfnl_queue_net_id, .size = sizeof(struct nfnl_queue_net), }; diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 7b727d3ebf..47b0dba950 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -278,52 +278,12 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx, return 0; } -static bool nft_bitwise_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_bitwise *priv = nft_expr_priv(expr); - const struct nft_bitwise *bitwise; - - if (!track->regs[priv->sreg].selector) - return false; - - bitwise = nft_expr_priv(expr); - if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && - track->regs[priv->dreg].bitwise && - track->regs[priv->dreg].bitwise->ops == expr->ops && - priv->sreg == bitwise->sreg && - priv->dreg == bitwise->dreg && - priv->op == bitwise->op && - priv->len == bitwise->len && - !memcmp(&priv->mask, &bitwise->mask, sizeof(priv->mask)) && - !memcmp(&priv->xor, &bitwise->xor, sizeof(priv->xor)) && - !memcmp(&priv->data, &bitwise->data, sizeof(priv->data))) { - track->cur = expr; - return true; - } - - if (track->regs[priv->sreg].bitwise) { - track->regs[priv->dreg].selector = NULL; - track->regs[priv->dreg].bitwise = NULL; - return false; - } - - if (priv->sreg != priv->dreg) { - track->regs[priv->dreg].selector = - track->regs[priv->sreg].selector; - } - track->regs[priv->dreg].bitwise = expr; - - return false; -} - static const struct nft_expr_ops nft_bitwise_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), .eval = nft_bitwise_eval, .init = nft_bitwise_init, .dump = nft_bitwise_dump, - .reduce = nft_bitwise_reduce, .offload = nft_bitwise_offload, }; @@ -425,49 +385,12 @@ static int nft_bitwise_fast_offload(struct nft_offload_ctx *ctx, return 0; } -static bool nft_bitwise_fast_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); - const struct nft_bitwise_fast_expr *bitwise; - - if (!track->regs[priv->sreg].selector) - return false; - - bitwise = nft_expr_priv(expr); - if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && - track->regs[priv->dreg].bitwise && - track->regs[priv->dreg].bitwise->ops == expr->ops && - priv->sreg == bitwise->sreg && - priv->dreg == bitwise->dreg && - priv->mask == bitwise->mask && - priv->xor == bitwise->xor) { - track->cur = expr; - return true; - } - - if (track->regs[priv->sreg].bitwise) { - track->regs[priv->dreg].selector = NULL; - track->regs[priv->dreg].bitwise = NULL; - return false; - } - - if (priv->sreg != priv->dreg) { - track->regs[priv->dreg].selector = - track->regs[priv->sreg].selector; - } - track->regs[priv->dreg].bitwise = expr; - - return false; -} - const struct nft_expr_ops nft_bitwise_fast_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise_fast_expr)), .eval = NULL, /* inlined */ .init = nft_bitwise_fast_init, .dump = nft_bitwise_fast_dump, - .reduce = nft_bitwise_fast_reduce, .offload = nft_bitwise_fast_offload, }; @@ -504,21 +427,3 @@ struct nft_expr_type nft_bitwise_type __read_mostly = { .maxattr = NFTA_BITWISE_MAX, .owner = THIS_MODULE, }; - -bool nft_expr_reduce_bitwise(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_expr *last = track->last; - const struct nft_expr *next; - - if (expr == last) - return false; - - next = nft_expr_next(expr); - if (next->ops == &nft_bitwise_ops) - return nft_bitwise_reduce(track, next); - else if (next->ops == &nft_bitwise_fast_ops) - return nft_bitwise_fast_reduce(track, next); - - return false; -} diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index e646e9ee4a..9d5947ab8d 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -167,24 +167,12 @@ static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) return -1; } -static bool nft_byteorder_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - struct nft_byteorder *priv = nft_expr_priv(expr); - - track->regs[priv->dreg].selector = NULL; - track->regs[priv->dreg].bitwise = NULL; - - return false; -} - static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, - .reduce = nft_byteorder_reduce, }; struct nft_expr_type nft_byteorder_type __read_mostly = { diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index c3563f0be2..3ced0eb6b7 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -310,11 +310,9 @@ static const struct nft_chain_type nft_chain_filter_netdev = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_NETDEV, - .hook_mask = (1 << NF_NETDEV_INGRESS) | - (1 << NF_NETDEV_EGRESS), + .hook_mask = (1 << NF_NETDEV_INGRESS), .hooks = { [NF_NETDEV_INGRESS] = nft_do_chain_netdev, - [NF_NETDEV_EGRESS] = nft_do_chain_netdev, }, }; diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 3362417ebf..7d0761fad3 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -14,7 +14,7 @@ #include struct nft_connlimit { - struct nf_conncount_list *list; + struct nf_conncount_list list; u32 limit; bool invert; }; @@ -43,12 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, return; } - if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) { + if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) { regs->verdict.code = NF_DROP; return; } - count = priv->list->count; + count = priv->list.count; if ((count > priv->limit) ^ priv->invert) { regs->verdict.code = NFT_BREAK; @@ -62,7 +62,6 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, { bool invert = false; u32 flags, limit; - int err; if (!tb[NFTA_CONNLIMIT_COUNT]) return -EINVAL; @@ -77,31 +76,18 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, invert = true; } - priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL); - if (!priv->list) - return -ENOMEM; - - nf_conncount_list_init(priv->list); + nf_conncount_list_init(&priv->list); priv->limit = limit; priv->invert = invert; - err = nf_ct_netns_get(ctx->net, ctx->family); - if (err < 0) - goto err_netns; - - return 0; -err_netns: - kfree(priv->list); - - return err; + return nf_ct_netns_get(ctx->net, ctx->family); } static void nft_connlimit_do_destroy(const struct nft_ctx *ctx, struct nft_connlimit *priv) { nf_ct_netns_put(ctx->net, ctx->family); - nf_conncount_cache_free(priv->list); - kfree(priv->list); + nf_conncount_cache_free(&priv->list); } static int nft_connlimit_do_dump(struct sk_buff *skb, @@ -214,11 +200,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src) struct nft_connlimit *priv_dst = nft_expr_priv(dst); struct nft_connlimit *priv_src = nft_expr_priv(src); - priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC); - if (!priv_dst->list) - return -ENOMEM; - - nf_conncount_list_init(priv_dst->list); + nf_conncount_list_init(&priv_dst->list); priv_dst->limit = priv_src->limit; priv_dst->invert = priv_src->invert; @@ -230,8 +212,7 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx, { struct nft_connlimit *priv = nft_expr_priv(expr); - nf_conncount_cache_free(priv->list); - kfree(priv->list); + nf_conncount_cache_free(&priv->list); } static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr) @@ -240,7 +221,7 @@ static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr) bool ret; local_bh_disable(); - ret = nf_conncount_gc_list(net, priv->list); + ret = nf_conncount_gc_list(net, &priv->list); local_bh_enable(); return ret; diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index f179e8c3b0..8edd3b3c17 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -13,7 +13,6 @@ #include #include #include -#include #include struct nft_counter { @@ -175,7 +174,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, }; -struct nft_object_type nft_counter_obj_type; +static struct nft_object_type nft_counter_obj_type; static const struct nft_object_ops nft_counter_obj_ops = { .type = &nft_counter_obj_type, .size = sizeof(struct nft_counter_percpu_priv), @@ -185,7 +184,7 @@ static const struct nft_object_ops nft_counter_obj_ops = { .dump = nft_counter_obj_dump, }; -struct nft_object_type nft_counter_obj_type __read_mostly = { +static struct nft_object_type nft_counter_obj_type __read_mostly = { .type = NFT_OBJECT_COUNTER, .ops = &nft_counter_obj_ops, .maxattr = NFTA_COUNTER_MAX, @@ -193,8 +192,9 @@ struct nft_object_type nft_counter_obj_type __read_mostly = { .owner = THIS_MODULE, }; -void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_counter_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); @@ -275,15 +275,7 @@ static void nft_counter_offload_stats(struct nft_expr *expr, preempt_enable(); } -void nft_counter_init_seqcount(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu)); -} - -struct nft_expr_type nft_counter_type; +static struct nft_expr_type nft_counter_type; static const struct nft_expr_ops nft_counter_ops = { .type = &nft_counter_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)), @@ -297,7 +289,7 @@ static const struct nft_expr_ops nft_counter_ops = { .offload_stats = nft_counter_offload_stats, }; -struct nft_expr_type nft_counter_type __read_mostly = { +static struct nft_expr_type nft_counter_type __read_mostly = { .name = "counter", .ops = &nft_counter_ops, .policy = nft_counter_policy, @@ -305,3 +297,39 @@ struct nft_expr_type nft_counter_type __read_mostly = { .flags = NFT_EXPR_STATEFUL, .owner = THIS_MODULE, }; + +static int __init nft_counter_module_init(void) +{ + int cpu, err; + + for_each_possible_cpu(cpu) + seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu)); + + err = nft_register_obj(&nft_counter_obj_type); + if (err < 0) + return err; + + err = nft_register_expr(&nft_counter_type); + if (err < 0) + goto err1; + + return 0; +err1: + nft_unregister_obj(&nft_counter_obj_type); + return err; +} + +static void __exit nft_counter_module_exit(void) +{ + nft_unregister_expr(&nft_counter_type); + nft_unregister_obj(&nft_counter_obj_type); +} + +module_init(nft_counter_module_init); +module_exit(nft_counter_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("counter"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER); +MODULE_DESCRIPTION("nftables counter rule support"); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 5adf8bb628..99b1de14ff 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -259,13 +259,10 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); - if (likely(refcount_read(&ct->ct_general.use) == 1)) { - refcount_inc(&ct->ct_general.use); + if (likely(atomic_read(&ct->ct_general.use) == 1)) { nf_ct_zone_add(ct, &zone); } else { - /* previous skb got queued to userspace, allocate temporary - * one until percpu template can be reused. - */ + /* previous skb got queued to userspace */ ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC); if (!ct) { regs->verdict.code = NF_DROP; @@ -273,6 +270,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, } } + atomic_inc(&ct->ct_general.use); nf_ct_set(skb, ct, IP_CT_NEW); } #endif @@ -377,6 +375,7 @@ static bool nft_ct_tmpl_alloc_pcpu(void) return false; } + atomic_set(&tmp->ct_general.use, 1); per_cpu(nft_ct_pcpu_template, cpu) = tmp; } diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 9e927ab4df..dbe1f2e7dd 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -167,7 +167,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt, { struct tcphdr *tcph; - if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) + if (pkt->tprot != IPPROTO_TCP) return NULL; tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer); diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 619e394a91..7730409f6f 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -27,11 +27,9 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr, { struct nft_fwd_netdev *priv = nft_expr_priv(expr); int oif = regs->data[priv->sreg_dev]; - struct sk_buff *skb = pkt->skb; /* This is used by ifb only. */ - skb->skb_iif = skb->dev->ifindex; - skb_set_redirected(skb, nft_hook(pkt) == NF_NETDEV_INGRESS); + skb_set_redirected(pkt->skb, true); nf_fwd_netdev_egress(pkt, oif); regs->verdict.code = NF_STOLEN; @@ -205,8 +203,7 @@ static int nft_fwd_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { - return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS) | - (1 << NF_NETDEV_EGRESS)); + return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS)); } static struct nft_expr_type nft_fwd_netdev_type; diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 4f745a409d..304e33cbed 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -8,13 +8,9 @@ #include #include -struct nft_last { - unsigned long jiffies; - unsigned int set; -}; - struct nft_last_priv { - struct nft_last *last; + unsigned long last_jiffies; + unsigned int last_set; }; static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = { @@ -26,55 +22,47 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_last_priv *priv = nft_expr_priv(expr); - struct nft_last *last; u64 last_jiffies; + u32 last_set = 0; int err; - last = kzalloc(sizeof(*last), GFP_KERNEL); - if (!last) - return -ENOMEM; + if (tb[NFTA_LAST_SET]) { + last_set = ntohl(nla_get_be32(tb[NFTA_LAST_SET])); + if (last_set == 1) + priv->last_set = 1; + } - if (tb[NFTA_LAST_SET]) - last->set = ntohl(nla_get_be32(tb[NFTA_LAST_SET])); - - if (last->set && tb[NFTA_LAST_MSECS]) { + if (last_set && tb[NFTA_LAST_MSECS]) { err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies); if (err < 0) - goto err; + return err; - last->jiffies = jiffies - (unsigned long)last_jiffies; + priv->last_jiffies = jiffies - (unsigned long)last_jiffies; } - priv->last = last; return 0; -err: - kfree(last); - - return err; } static void nft_last_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_last_priv *priv = nft_expr_priv(expr); - struct nft_last *last = priv->last; - if (READ_ONCE(last->jiffies) != jiffies) - WRITE_ONCE(last->jiffies, jiffies); - if (READ_ONCE(last->set) == 0) - WRITE_ONCE(last->set, 1); + if (READ_ONCE(priv->last_jiffies) != jiffies) + WRITE_ONCE(priv->last_jiffies, jiffies); + if (READ_ONCE(priv->last_set) == 0) + WRITE_ONCE(priv->last_set, 1); } static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_last_priv *priv = nft_expr_priv(expr); - struct nft_last *last = priv->last; - unsigned long last_jiffies = READ_ONCE(last->jiffies); - u32 last_set = READ_ONCE(last->set); + unsigned long last_jiffies = READ_ONCE(priv->last_jiffies); + u32 last_set = READ_ONCE(priv->last_set); __be64 msecs; if (time_before(jiffies, last_jiffies)) { - WRITE_ONCE(last->set, 0); + WRITE_ONCE(priv->last_set, 0); last_set = 0; } @@ -93,32 +81,11 @@ static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr) return -1; } -static void nft_last_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_last_priv *priv = nft_expr_priv(expr); - - kfree(priv->last); -} - -static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src) -{ - struct nft_last_priv *priv_dst = nft_expr_priv(dst); - - priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC); - if (!priv_dst->last) - return -ENOMEM; - - return 0; -} - static const struct nft_expr_ops nft_last_ops = { .type = &nft_last_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_last_priv)), .eval = nft_last_eval, .init = nft_last_init, - .destroy = nft_last_destroy, - .clone = nft_last_clone, .dump = nft_last_dump, }; diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index a726b62396..82ec27bdf9 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -18,10 +18,6 @@ struct nft_limit { spinlock_t lock; u64 last; u64 tokens; -}; - -struct nft_limit_priv { - struct nft_limit *limit; u64 tokens_max; u64 rate; u64 nsecs; @@ -29,33 +25,33 @@ struct nft_limit_priv { bool invert; }; -static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost) +static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) { u64 now, tokens; s64 delta; - spin_lock_bh(&priv->limit->lock); + spin_lock_bh(&limit->lock); now = ktime_get_ns(); - tokens = priv->limit->tokens + now - priv->limit->last; - if (tokens > priv->tokens_max) - tokens = priv->tokens_max; + tokens = limit->tokens + now - limit->last; + if (tokens > limit->tokens_max) + tokens = limit->tokens_max; - priv->limit->last = now; + limit->last = now; delta = tokens - cost; if (delta >= 0) { - priv->limit->tokens = delta; - spin_unlock_bh(&priv->limit->lock); - return priv->invert; + limit->tokens = delta; + spin_unlock_bh(&limit->lock); + return limit->invert; } - priv->limit->tokens = tokens; - spin_unlock_bh(&priv->limit->lock); - return !priv->invert; + limit->tokens = tokens; + spin_unlock_bh(&limit->lock); + return !limit->invert; } /* Use same default as in iptables. */ #define NFT_LIMIT_PKT_BURST_DEFAULT 5 -static int nft_limit_init(struct nft_limit_priv *priv, +static int nft_limit_init(struct nft_limit *limit, const struct nlattr * const tb[], bool pkts) { u64 unit, tokens; @@ -64,62 +60,58 @@ static int nft_limit_init(struct nft_limit_priv *priv, tb[NFTA_LIMIT_UNIT] == NULL) return -EINVAL; - priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + limit->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); - priv->nsecs = unit * NSEC_PER_SEC; - if (priv->rate == 0 || priv->nsecs < unit) + limit->nsecs = unit * NSEC_PER_SEC; + if (limit->rate == 0 || limit->nsecs < unit) return -EOVERFLOW; if (tb[NFTA_LIMIT_BURST]) - priv->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); + limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); - if (pkts && priv->burst == 0) - priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT; + if (pkts && limit->burst == 0) + limit->burst = NFT_LIMIT_PKT_BURST_DEFAULT; - if (priv->rate + priv->burst < priv->rate) + if (limit->rate + limit->burst < limit->rate) return -EOVERFLOW; if (pkts) { - tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst; + tokens = div64_u64(limit->nsecs, limit->rate) * limit->burst; } else { /* The token bucket size limits the number of tokens can be * accumulated. tokens_max specifies the bucket size. * tokens_max = unit * (rate + burst) / rate. */ - tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst), - priv->rate); + tokens = div64_u64(limit->nsecs * (limit->rate + limit->burst), + limit->rate); } - priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL); - if (!priv->limit) - return -ENOMEM; - - priv->limit->tokens = tokens; - priv->tokens_max = priv->limit->tokens; + limit->tokens = tokens; + limit->tokens_max = limit->tokens; if (tb[NFTA_LIMIT_FLAGS]) { u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS])); if (flags & NFT_LIMIT_F_INV) - priv->invert = true; + limit->invert = true; } - priv->limit->last = ktime_get_ns(); - spin_lock_init(&priv->limit->lock); + limit->last = ktime_get_ns(); + spin_lock_init(&limit->lock); return 0; } -static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit_priv *priv, +static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, enum nft_limit_type type) { - u32 flags = priv->invert ? NFT_LIMIT_F_INV : 0; - u64 secs = div_u64(priv->nsecs, NSEC_PER_SEC); + u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0; + u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); - if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate), + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(limit->rate), NFTA_LIMIT_PAD) || nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs), NFTA_LIMIT_PAD) || - nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(priv->burst)) || + nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) || nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) || nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags))) goto nla_put_failure; @@ -129,34 +121,8 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit_priv *priv return -1; } -static void nft_limit_destroy(const struct nft_ctx *ctx, - const struct nft_limit_priv *priv) -{ - kfree(priv->limit); -} - -static int nft_limit_clone(struct nft_limit_priv *priv_dst, - const struct nft_limit_priv *priv_src) -{ - priv_dst->tokens_max = priv_src->tokens_max; - priv_dst->rate = priv_src->rate; - priv_dst->nsecs = priv_src->nsecs; - priv_dst->burst = priv_src->burst; - priv_dst->invert = priv_src->invert; - - priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC); - if (!priv_dst->limit) - return -ENOMEM; - - spin_lock_init(&priv_dst->limit->lock); - priv_dst->limit->tokens = priv_src->tokens_max; - priv_dst->limit->last = ktime_get_ns(); - - return 0; -} - -struct nft_limit_priv_pkts { - struct nft_limit_priv limit; +struct nft_limit_pkts { + struct nft_limit limit; u64 cost; }; @@ -164,7 +130,7 @@ static void nft_limit_pkts_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); + struct nft_limit_pkts *priv = nft_expr_priv(expr); if (nft_limit_eval(&priv->limit, priv->cost)) regs->verdict.code = NFT_BREAK; @@ -182,7 +148,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { - struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); + struct nft_limit_pkts *priv = nft_expr_priv(expr); int err; err = nft_limit_init(&priv->limit, tb, true); @@ -195,35 +161,17 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx, static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr) { - const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); + const struct nft_limit_pkts *priv = nft_expr_priv(expr); return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); } -static void nft_limit_pkts_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); - - nft_limit_destroy(ctx, &priv->limit); -} - -static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src) -{ - struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst); - struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src); - - return nft_limit_clone(&priv_dst->limit, &priv_src->limit); -} - static struct nft_expr_type nft_limit_type; static const struct nft_expr_ops nft_limit_pkts_ops = { .type = &nft_limit_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)), + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)), .eval = nft_limit_pkts_eval, .init = nft_limit_pkts_init, - .destroy = nft_limit_pkts_destroy, - .clone = nft_limit_pkts_clone, .dump = nft_limit_pkts_dump, }; @@ -231,7 +179,7 @@ static void nft_limit_bytes_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_limit_priv *priv = nft_expr_priv(expr); + struct nft_limit *priv = nft_expr_priv(expr); u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate); if (nft_limit_eval(priv, cost)) @@ -242,7 +190,7 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { - struct nft_limit_priv *priv = nft_expr_priv(expr); + struct nft_limit *priv = nft_expr_priv(expr); return nft_limit_init(priv, tb, false); } @@ -250,35 +198,17 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx, static int nft_limit_bytes_dump(struct sk_buff *skb, const struct nft_expr *expr) { - const struct nft_limit_priv *priv = nft_expr_priv(expr); + const struct nft_limit *priv = nft_expr_priv(expr); return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); } -static void nft_limit_bytes_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - const struct nft_limit_priv *priv = nft_expr_priv(expr); - - nft_limit_destroy(ctx, priv); -} - -static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src) -{ - struct nft_limit_priv *priv_dst = nft_expr_priv(dst); - struct nft_limit_priv *priv_src = nft_expr_priv(src); - - return nft_limit_clone(priv_dst, priv_src); -} - static const struct nft_expr_ops nft_limit_bytes_ops = { .type = &nft_limit_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv)), + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), .eval = nft_limit_bytes_eval, .init = nft_limit_bytes_init, .dump = nft_limit_bytes_dump, - .clone = nft_limit_bytes_clone, - .destroy = nft_limit_bytes_destroy, }; static const struct nft_expr_ops * @@ -310,7 +240,7 @@ static void nft_limit_obj_pkts_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_limit_priv_pkts *priv = nft_obj_data(obj); + struct nft_limit_pkts *priv = nft_obj_data(obj); if (nft_limit_eval(&priv->limit, priv->cost)) regs->verdict.code = NFT_BREAK; @@ -320,7 +250,7 @@ static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { - struct nft_limit_priv_pkts *priv = nft_obj_data(obj); + struct nft_limit_pkts *priv = nft_obj_data(obj); int err; err = nft_limit_init(&priv->limit, tb, true); @@ -335,25 +265,16 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { - const struct nft_limit_priv_pkts *priv = nft_obj_data(obj); + const struct nft_limit_pkts *priv = nft_obj_data(obj); return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); } -static void nft_limit_obj_pkts_destroy(const struct nft_ctx *ctx, - struct nft_object *obj) -{ - struct nft_limit_priv_pkts *priv = nft_obj_data(obj); - - nft_limit_destroy(ctx, &priv->limit); -} - static struct nft_object_type nft_limit_obj_type; static const struct nft_object_ops nft_limit_obj_pkts_ops = { .type = &nft_limit_obj_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)), + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)), .init = nft_limit_obj_pkts_init, - .destroy = nft_limit_obj_pkts_destroy, .eval = nft_limit_obj_pkts_eval, .dump = nft_limit_obj_pkts_dump, }; @@ -362,7 +283,7 @@ static void nft_limit_obj_bytes_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_limit_priv *priv = nft_obj_data(obj); + struct nft_limit *priv = nft_obj_data(obj); u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate); if (nft_limit_eval(priv, cost)) @@ -373,7 +294,7 @@ static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { - struct nft_limit_priv *priv = nft_obj_data(obj); + struct nft_limit *priv = nft_obj_data(obj); return nft_limit_init(priv, tb, false); } @@ -382,25 +303,16 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { - const struct nft_limit_priv *priv = nft_obj_data(obj); + const struct nft_limit *priv = nft_obj_data(obj); return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); } -static void nft_limit_obj_bytes_destroy(const struct nft_ctx *ctx, - struct nft_object *obj) -{ - struct nft_limit_priv *priv = nft_obj_data(obj); - - nft_limit_destroy(ctx, priv); -} - static struct nft_object_type nft_limit_obj_type; static const struct nft_object_ops nft_limit_obj_bytes_ops = { .type = &nft_limit_obj_type, - .size = sizeof(struct nft_limit_priv), + .size = sizeof(struct nft_limit), .init = nft_limit_obj_bytes_init, - .destroy = nft_limit_obj_bytes_destroy, .eval = nft_limit_obj_bytes_eval, .dump = nft_limit_obj_bytes_dump, }; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 5ab4df56c9..a7e01e9952 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -244,11 +244,7 @@ static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest, case NFT_META_OIF: nft_meta_store_ifindex(dest, nft_out(pkt)); break; - case NFT_META_IFTYPE: - if (!nft_meta_store_iftype(dest, pkt->skb->dev)) - return false; - break; - case __NFT_META_IIFTYPE: + case NFT_META_IIFTYPE: if (!nft_meta_store_iftype(dest, nft_in(pkt))) return false; break; @@ -333,7 +329,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, nft_reg_store8(dest, nft_pf(pkt)); break; case NFT_META_L4PROTO: - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!pkt->tprot_set) goto err; nft_reg_store8(dest, pkt->tprot); break; @@ -750,63 +746,16 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx, return 0; } -static bool nft_meta_get_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_meta *priv = nft_expr_priv(expr); - const struct nft_meta *meta; - - if (!track->regs[priv->dreg].selector || - track->regs[priv->dreg].selector->ops != expr->ops) { - track->regs[priv->dreg].selector = expr; - track->regs[priv->dreg].bitwise = NULL; - return false; - } - - meta = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->key != meta->key || - priv->dreg != meta->dreg) { - track->regs[priv->dreg].selector = expr; - track->regs[priv->dreg].bitwise = NULL; - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_get_eval, .init = nft_meta_get_init, .dump = nft_meta_get_dump, - .reduce = nft_meta_get_reduce, .validate = nft_meta_get_validate, .offload = nft_meta_get_offload, }; -static bool nft_meta_set_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - int i; - - for (i = 0; i < NFT_REG32_NUM; i++) { - if (!track->regs[i].selector) - continue; - - if (track->regs[i].selector->ops != &nft_meta_get_ops) - continue; - - track->regs[i].selector = NULL; - track->regs[i].bitwise = NULL; - } - - return false; -} - static const struct nft_expr_ops nft_meta_set_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), @@ -814,7 +763,6 @@ static const struct nft_expr_ops nft_meta_set_ops = { .init = nft_meta_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, - .reduce = nft_meta_set_reduce, .validate = nft_meta_set_validate, }; diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 1d378efd88..722cac1e90 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -18,7 +18,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); struct nft_ng_inc { u8 dreg; u32 modulus; - atomic_t *counter; + atomic_t counter; u32 offset; }; @@ -27,9 +27,9 @@ static u32 nft_ng_inc_gen(struct nft_ng_inc *priv) u32 nval, oval; do { - oval = atomic_read(priv->counter); + oval = atomic_read(&priv->counter); nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; - } while (atomic_cmpxchg(priv->counter, oval, nval) != oval); + } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); return nval + priv->offset; } @@ -55,7 +55,6 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_ng_inc *priv = nft_expr_priv(expr); - int err; if (tb[NFTA_NG_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET])); @@ -67,22 +66,10 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - priv->counter = kmalloc(sizeof(*priv->counter), GFP_KERNEL); - if (!priv->counter) - return -ENOMEM; + atomic_set(&priv->counter, priv->modulus - 1); - atomic_set(priv->counter, priv->modulus - 1); - - err = nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, - NULL, NFT_DATA_VALUE, sizeof(u32)); - if (err < 0) - goto err; - - return 0; -err: - kfree(priv->counter); - - return err; + return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, @@ -111,14 +98,6 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) priv->offset); } -static void nft_ng_inc_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - const struct nft_ng_inc *priv = nft_expr_priv(expr); - - kfree(priv->counter); -} - struct nft_ng_random { u8 dreg; u32 modulus; @@ -178,7 +157,6 @@ static const struct nft_expr_ops nft_ng_inc_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)), .eval = nft_ng_inc_eval, .init = nft_ng_inc_init, - .destroy = nft_ng_inc_destroy, .dump = nft_ng_inc_dump, }; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 5cc06aef43..132875cd7f 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -79,45 +79,6 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; } -static int __nft_payload_inner_offset(struct nft_pktinfo *pkt) -{ - unsigned int thoff = nft_thoff(pkt); - - if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) - return -1; - - switch (pkt->tprot) { - case IPPROTO_UDP: - pkt->inneroff = thoff + sizeof(struct udphdr); - break; - case IPPROTO_TCP: { - struct tcphdr *th, _tcph; - - th = skb_header_pointer(pkt->skb, thoff, sizeof(_tcph), &_tcph); - if (!th) - return -1; - - pkt->inneroff = thoff + __tcp_hdrlen(th); - } - break; - default: - return -1; - } - - pkt->flags |= NFT_PKTINFO_INNER; - - return 0; -} - -static int nft_payload_inner_offset(const struct nft_pktinfo *pkt) -{ - if (!(pkt->flags & NFT_PKTINFO_INNER) && - __nft_payload_inner_offset((struct nft_pktinfo *)pkt) < 0) - return -1; - - return pkt->inneroff; -} - void nft_payload_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -147,18 +108,12 @@ void nft_payload_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) + if (!pkt->tprot_set) goto err; offset = nft_thoff(pkt); break; - case NFT_PAYLOAD_INNER_HEADER: - offset = nft_payload_inner_offset(pkt); - if (offset < 0) - goto err; - break; default: - WARN_ON_ONCE(1); - goto err; + BUG(); } offset += priv->offset; @@ -210,34 +165,6 @@ static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) return -1; } -static bool nft_payload_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_payload *priv = nft_expr_priv(expr); - const struct nft_payload *payload; - - if (!track->regs[priv->dreg].selector || - track->regs[priv->dreg].selector->ops != expr->ops) { - track->regs[priv->dreg].selector = expr; - track->regs[priv->dreg].bitwise = NULL; - return false; - } - - payload = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->base != payload->base || - priv->offset != payload->offset || - priv->len != payload->len) { - track->regs[priv->dreg].selector = expr; - track->regs[priv->dreg].bitwise = NULL; - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static bool nft_payload_offload_mask(struct nft_offload_reg *reg, u32 priv_len, u32 field_len) { @@ -541,7 +468,6 @@ static const struct nft_expr_ops nft_payload_ops = { .eval = nft_payload_eval, .init = nft_payload_init, .dump = nft_payload_dump, - .reduce = nft_payload_reduce, .offload = nft_payload_offload, }; @@ -551,7 +477,6 @@ const struct nft_expr_ops nft_payload_fast_ops = { .eval = nft_payload_eval, .init = nft_payload_init, .dump = nft_payload_dump, - .reduce = nft_payload_reduce, .offload = nft_payload_offload, }; @@ -688,26 +613,19 @@ static void nft_payload_set_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) + if (!pkt->tprot_set) goto err; offset = nft_thoff(pkt); break; - case NFT_PAYLOAD_INNER_HEADER: - offset = nft_payload_inner_offset(pkt); - if (offset < 0) - goto err; - break; default: - WARN_ON_ONCE(1); - goto err; + BUG(); } csum_offset = offset + priv->csum_offset; offset += priv->offset; if ((priv->csum_type == NFT_PAYLOAD_CSUM_INET || priv->csum_flags) && - ((priv->base != NFT_PAYLOAD_TRANSPORT_HEADER && - priv->base != NFT_PAYLOAD_INNER_HEADER) || + (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER || skb->ip_summed != CHECKSUM_PARTIAL)) { fsum = skb_checksum(skb, offset, priv->len, 0); tsum = csum_partial(src, priv->len, 0); @@ -728,8 +646,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr, if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP && pkt->tprot == IPPROTO_SCTP && skb->ip_summed != CHECKSUM_PARTIAL) { - if (pkt->fragoff == 0 && - nft_payload_csum_sctp(skb, nft_thoff(pkt))) + if (nft_payload_csum_sctp(skb, nft_thoff(pkt))) goto err; } @@ -802,33 +719,12 @@ static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr return -1; } -static bool nft_payload_set_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - int i; - - for (i = 0; i < NFT_REG32_NUM; i++) { - if (!track->regs[i].selector) - continue; - - if (track->regs[i].selector->ops != &nft_payload_ops && - track->regs[i].selector->ops != &nft_payload_fast_ops) - continue; - - track->regs[i].selector = NULL; - track->regs[i].bitwise = NULL; - } - - return false; -} - static const struct nft_expr_ops nft_payload_set_ops = { .type = &nft_payload_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)), .eval = nft_payload_set_eval, .init = nft_payload_set_init, .dump = nft_payload_set_dump, - .reduce = nft_payload_set_reduce, }; static const struct nft_expr_ops * @@ -848,7 +744,6 @@ nft_payload_select_ops(const struct nft_ctx *ctx, case NFT_PAYLOAD_LL_HEADER: case NFT_PAYLOAD_NETWORK_HEADER: case NFT_PAYLOAD_TRANSPORT_HEADER: - case NFT_PAYLOAD_INNER_HEADER: break; default: return ERR_PTR(-EOPNOTSUPP); @@ -867,7 +762,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) && - base != NFT_PAYLOAD_LL_HEADER && base != NFT_PAYLOAD_INNER_HEADER) + base != NFT_PAYLOAD_LL_HEADER) return &nft_payload_fast_ops; else return &nft_payload_ops; diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index f394a0b562..c4d1389f71 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -15,13 +15,13 @@ struct nft_quota { atomic64_t quota; unsigned long flags; - atomic64_t *consumed; + atomic64_t consumed; }; static inline bool nft_overquota(struct nft_quota *priv, const struct sk_buff *skb) { - return atomic64_add_return(skb->len, priv->consumed) >= + return atomic64_add_return(skb->len, &priv->consumed) >= atomic64_read(&priv->quota); } @@ -90,23 +90,13 @@ static int nft_quota_do_init(const struct nlattr * const tb[], return -EOPNOTSUPP; } - priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL); - if (!priv->consumed) - return -ENOMEM; - atomic64_set(&priv->quota, quota); priv->flags = flags; - atomic64_set(priv->consumed, consumed); + atomic64_set(&priv->consumed, consumed); return 0; } -static void nft_quota_do_destroy(const struct nft_ctx *ctx, - struct nft_quota *priv) -{ - kfree(priv->consumed); -} - static int nft_quota_obj_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) @@ -138,7 +128,7 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, * that we see, don't go over the quota boundary in what we send to * userspace. */ - consumed = atomic64_read(priv->consumed); + consumed = atomic64_read(&priv->consumed); quota = atomic64_read(&priv->quota); if (consumed >= quota) { consumed_cap = quota; @@ -155,7 +145,7 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, goto nla_put_failure; if (reset) { - atomic64_sub(consumed, priv->consumed); + atomic64_sub(consumed, &priv->consumed); clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags); } return 0; @@ -172,20 +162,11 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj, return nft_quota_do_dump(skb, priv, reset); } -static void nft_quota_obj_destroy(const struct nft_ctx *ctx, - struct nft_object *obj) -{ - struct nft_quota *priv = nft_obj_data(obj); - - return nft_quota_do_destroy(ctx, priv); -} - static struct nft_object_type nft_quota_obj_type; static const struct nft_object_ops nft_quota_obj_ops = { .type = &nft_quota_obj_type, .size = sizeof(struct nft_quota), .init = nft_quota_obj_init, - .destroy = nft_quota_obj_destroy, .eval = nft_quota_obj_eval, .dump = nft_quota_obj_dump, .update = nft_quota_obj_update, @@ -224,35 +205,12 @@ static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) return nft_quota_do_dump(skb, priv, false); } -static void nft_quota_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_quota *priv = nft_expr_priv(expr); - - return nft_quota_do_destroy(ctx, priv); -} - -static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src) -{ - struct nft_quota *priv_dst = nft_expr_priv(dst); - - priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC); - if (!priv_dst->consumed) - return -ENOMEM; - - atomic64_set(priv_dst->consumed, 0); - - return 0; -} - static struct nft_expr_type nft_quota_type; static const struct nft_expr_ops nft_quota_ops = { .type = &nft_quota_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_quota)), .eval = nft_quota_eval, .init = nft_quota_init, - .destroy = nft_quota_destroy, - .clone = nft_quota_clone, .dump = nft_quota_dump, }; diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c index 61cd8c4ac3..d89f68754f 100644 --- a/net/netfilter/nft_reject_netdev.c +++ b/net/netfilter/nft_reject_netdev.c @@ -4,7 +4,6 @@ * Copyright (c) 2020 Jose M. Guisado */ -#include #include #include #include diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 52e0d026d3..6f4116e729 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1048,9 +1048,11 @@ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill, struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { - unsigned long bsize = f->bsize; + unsigned long *lt = f->lt, bsize = f->bsize; int i, ret = -1, b; + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + if (first) memset(map, 0xff, bsize * sizeof(*map)); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 267757b039..0a913ce074 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -24,7 +24,7 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) return XT_CONTINUE; if (ct) { - refcount_inc(&ct->ct_general.use); + atomic_inc(&ct->ct_general.use); nf_ct_set(skb, ct, IP_CT_NEW); } else { nf_ct_set(skb, ct, IP_CT_UNTRACKED); @@ -201,6 +201,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, goto err4; } __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); out: info->ct = ct; return 0; diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c index c1a70f8f04..7873b834c3 100644 --- a/net/netfilter/xt_HL.c +++ b/net/netfilter/xt_HL.c @@ -1,93 +1,159 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * IP tables module for matching the value of the TTL - * (C) 2000,2001 by Harald Welte + * TTL modification target for IP tables + * (C) 2000,2005 by Harald Welte * - * Hop Limit matching module - * (C) 2001-2002 Maciej Soltysiak + * Hop Limit modification target for ip6tables + * Maciej Soltysiak */ - -#include -#include +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include +#include +#include +#include #include -#include -#include +#include +#include +MODULE_AUTHOR("Harald Welte "); MODULE_AUTHOR("Maciej Soltysiak "); -MODULE_DESCRIPTION("Xtables: Hoplimit/TTL field match"); +MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ipt_ttl"); -MODULE_ALIAS("ip6t_hl"); -static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par) +static unsigned int +ttl_tg(struct sk_buff *skb, const struct xt_action_param *par) { - const struct ipt_ttl_info *info = par->matchinfo; - const u8 ttl = ip_hdr(skb)->ttl; + struct iphdr *iph; + const struct ipt_TTL_info *info = par->targinfo; + int new_ttl; + + if (skb_ensure_writable(skb, sizeof(*iph))) + return NF_DROP; + + iph = ip_hdr(skb); switch (info->mode) { - case IPT_TTL_EQ: - return ttl == info->ttl; - case IPT_TTL_NE: - return ttl != info->ttl; - case IPT_TTL_LT: - return ttl < info->ttl; - case IPT_TTL_GT: - return ttl > info->ttl; + case IPT_TTL_SET: + new_ttl = info->ttl; + break; + case IPT_TTL_INC: + new_ttl = iph->ttl + info->ttl; + if (new_ttl > 255) + new_ttl = 255; + break; + case IPT_TTL_DEC: + new_ttl = iph->ttl - info->ttl; + if (new_ttl < 0) + new_ttl = 0; + break; + default: + new_ttl = iph->ttl; + break; } - return false; + if (new_ttl != iph->ttl) { + csum_replace2(&iph->check, htons(iph->ttl << 8), + htons(new_ttl << 8)); + iph->ttl = new_ttl; + } + + return XT_CONTINUE; } -static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par) +static unsigned int +hl_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - const struct ip6t_hl_info *info = par->matchinfo; - const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct ipv6hdr *ip6h; + const struct ip6t_HL_info *info = par->targinfo; + int new_hl; + + if (skb_ensure_writable(skb, sizeof(*ip6h))) + return NF_DROP; + + ip6h = ipv6_hdr(skb); switch (info->mode) { - case IP6T_HL_EQ: - return ip6h->hop_limit == info->hop_limit; - case IP6T_HL_NE: - return ip6h->hop_limit != info->hop_limit; - case IP6T_HL_LT: - return ip6h->hop_limit < info->hop_limit; - case IP6T_HL_GT: - return ip6h->hop_limit > info->hop_limit; + case IP6T_HL_SET: + new_hl = info->hop_limit; + break; + case IP6T_HL_INC: + new_hl = ip6h->hop_limit + info->hop_limit; + if (new_hl > 255) + new_hl = 255; + break; + case IP6T_HL_DEC: + new_hl = ip6h->hop_limit - info->hop_limit; + if (new_hl < 0) + new_hl = 0; + break; + default: + new_hl = ip6h->hop_limit; + break; } - return false; + ip6h->hop_limit = new_hl; + + return XT_CONTINUE; } -static struct xt_match hl_mt_reg[] __read_mostly = { +static int ttl_tg_check(const struct xt_tgchk_param *par) +{ + const struct ipt_TTL_info *info = par->targinfo; + + if (info->mode > IPT_TTL_MAXMODE) + return -EINVAL; + if (info->mode != IPT_TTL_SET && info->ttl == 0) + return -EINVAL; + return 0; +} + +static int hl_tg6_check(const struct xt_tgchk_param *par) +{ + const struct ip6t_HL_info *info = par->targinfo; + + if (info->mode > IP6T_HL_MAXMODE) + return -EINVAL; + if (info->mode != IP6T_HL_SET && info->hop_limit == 0) + return -EINVAL; + return 0; +} + +static struct xt_target hl_tg_reg[] __read_mostly = { { - .name = "ttl", + .name = "TTL", .revision = 0, .family = NFPROTO_IPV4, - .match = ttl_mt, - .matchsize = sizeof(struct ipt_ttl_info), + .target = ttl_tg, + .targetsize = sizeof(struct ipt_TTL_info), + .table = "mangle", + .checkentry = ttl_tg_check, .me = THIS_MODULE, }, { - .name = "hl", + .name = "HL", .revision = 0, .family = NFPROTO_IPV6, - .match = hl_mt6, - .matchsize = sizeof(struct ip6t_hl_info), + .target = hl_tg6, + .targetsize = sizeof(struct ip6t_HL_info), + .table = "mangle", + .checkentry = hl_tg6_check, .me = THIS_MODULE, }, }; -static int __init hl_mt_init(void) +static int __init hl_tg_init(void) { - return xt_register_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg)); + return xt_register_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); } -static void __exit hl_mt_exit(void) +static void __exit hl_tg_exit(void) { - xt_unregister_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg)); + xt_unregister_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); } -module_init(hl_mt_init); -module_exit(hl_mt_exit); +module_init(hl_tg_init); +module_exit(hl_tg_exit); +MODULE_ALIAS("ipt_TTL"); +MODULE_ALIAS("ip6t_HL"); diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 0f8bb0bf55..2f7cf5eceb 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -85,9 +85,9 @@ static ssize_t idletimer_tg_show(struct device *dev, mutex_unlock(&list_mutex); if (time_after(expires, jiffies) || ktimespec.tv_sec > 0) - return sysfs_emit(buf, "%ld\n", time_diff); + return snprintf(buf, PAGE_SIZE, "%ld\n", time_diff); - return sysfs_emit(buf, "0\n"); + return snprintf(buf, PAGE_SIZE, "0\n"); } static void idletimer_tg_work(struct work_struct *work) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 37704ab017..122db9fbb9 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -1,107 +1,345 @@ // SPDX-License-Identifier: GPL-2.0-only -/* Kernel module to match TCP MSS values. */ - -/* Copyright (C) 2000 Marc Boucher - * Portions (C) 2005 by Harald Welte +/* + * This is a module which is used for setting the MSS option in TCP packets. + * + * Copyright (C) 2000 Marc Boucher + * Copyright (C) 2007 Patrick McHardy */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include - #include #include +#include +#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); -MODULE_DESCRIPTION("Xtables: TCP MSS match"); -MODULE_ALIAS("ipt_tcpmss"); -MODULE_ALIAS("ip6t_tcpmss"); +MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment"); +MODULE_ALIAS("ipt_TCPMSS"); +MODULE_ALIAS("ip6t_TCPMSS"); -static bool -tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par) +static inline unsigned int +optlen(const u_int8_t *opt, unsigned int offset) { - const struct xt_tcpmss_match_info *info = par->matchinfo; - const struct tcphdr *th; - struct tcphdr _tcph; - /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ - const u_int8_t *op; - u8 _opt[15 * 4 - sizeof(_tcph)]; - unsigned int i, optlen; + /* Beware zero-length options: make finite progress */ + if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) + return 1; + else + return opt[offset+1]; +} - /* If we don't have the whole header, drop packet. */ - th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); - if (th == NULL) - goto dropit; +static u_int32_t tcpmss_reverse_mtu(struct net *net, + const struct sk_buff *skb, + unsigned int family) +{ + struct flowi fl; + struct rtable *rt = NULL; + u_int32_t mtu = ~0U; - /* Malformed. */ - if (th->doff*4 < sizeof(*th)) - goto dropit; + if (family == PF_INET) { + struct flowi4 *fl4 = &fl.u.ip4; + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = ip_hdr(skb)->saddr; + } else { + struct flowi6 *fl6 = &fl.u.ip6; - optlen = th->doff*4 - sizeof(*th); - if (!optlen) - goto out; - - /* Truncated options. */ - op = skb_header_pointer(skb, par->thoff + sizeof(*th), optlen, _opt); - if (op == NULL) - goto dropit; - - for (i = 0; i < optlen; ) { - if (op[i] == TCPOPT_MSS - && (optlen - i) >= TCPOLEN_MSS - && op[i+1] == TCPOLEN_MSS) { - u_int16_t mssval; - - mssval = (op[i+2] << 8) | op[i+3]; - - return (mssval >= info->mss_min && - mssval <= info->mss_max) ^ info->invert; - } - if (op[i] < 2) - i++; - else - i += op[i+1] ? : 1; + memset(fl6, 0, sizeof(*fl6)); + fl6->daddr = ipv6_hdr(skb)->saddr; } -out: - return info->invert; -dropit: - par->hotdrop = true; + nf_route(net, (struct dst_entry **)&rt, &fl, false, family); + if (rt != NULL) { + mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); + } + return mtu; +} + +static int +tcpmss_mangle_packet(struct sk_buff *skb, + const struct xt_action_param *par, + unsigned int family, + unsigned int tcphoff, + unsigned int minlen) +{ + const struct xt_tcpmss_info *info = par->targinfo; + struct tcphdr *tcph; + int len, tcp_hdrlen; + unsigned int i; + __be16 oldval; + u16 newmss; + u8 *opt; + + /* This is a fragment, no TCP header is available */ + if (par->fragoff != 0) + return 0; + + if (skb_ensure_writable(skb, skb->len)) + return -1; + + len = skb->len - tcphoff; + if (len < (int)sizeof(struct tcphdr)) + return -1; + + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); + tcp_hdrlen = tcph->doff * 4; + + if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr)) + return -1; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU) { + struct net *net = xt_net(par); + unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); + unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu); + + if (min_mtu <= minlen) { + net_err_ratelimited("unknown or invalid path-MTU (%u)\n", + min_mtu); + return -1; + } + newmss = min_mtu - minlen; + } else + newmss = info->mss; + + opt = (u_int8_t *)tcph; + for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) { + if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) { + u_int16_t oldmss; + + oldmss = (opt[i+2] << 8) | opt[i+3]; + + /* Never increase MSS, even when setting it, as + * doing so results in problems for hosts that rely + * on MSS being set correctly. + */ + if (oldmss <= newmss) + return 0; + + opt[i+2] = (newmss & 0xff00) >> 8; + opt[i+3] = newmss & 0x00ff; + + inet_proto_csum_replace2(&tcph->check, skb, + htons(oldmss), htons(newmss), + false); + return 0; + } + } + + /* There is data after the header so the option can't be added + * without moving it, and doing so may make the SYN packet + * itself too large. Accept the packet unmodified instead. + */ + if (len > tcp_hdrlen) + return 0; + + /* tcph->doff has 4 bits, do not wrap it to 0 */ + if (tcp_hdrlen >= 15 * 4) + return 0; + + /* + * MSS Option not found ?! add it.. + */ + if (skb_tailroom(skb) < TCPOLEN_MSS) { + if (pskb_expand_head(skb, 0, + TCPOLEN_MSS - skb_tailroom(skb), + GFP_ATOMIC)) + return -1; + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); + } + + skb_put(skb, TCPOLEN_MSS); + + /* + * IPv4: RFC 1122 states "If an MSS option is not received at + * connection setup, TCP MUST assume a default send MSS of 536". + * IPv6: RFC 2460 states IPv6 has a minimum MTU of 1280 and a minimum + * length IPv6 header of 60, ergo the default MSS value is 1220 + * Since no MSS was provided, we must use the default values + */ + if (xt_family(par) == NFPROTO_IPV4) + newmss = min(newmss, (u16)536); + else + newmss = min(newmss, (u16)1220); + + opt = (u_int8_t *)tcph + sizeof(struct tcphdr); + memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr)); + + inet_proto_csum_replace2(&tcph->check, skb, + htons(len), htons(len + TCPOLEN_MSS), true); + opt[0] = TCPOPT_MSS; + opt[1] = TCPOLEN_MSS; + opt[2] = (newmss & 0xff00) >> 8; + opt[3] = newmss & 0x00ff; + + inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), false); + + oldval = ((__be16 *)tcph)[6]; + tcph->doff += TCPOLEN_MSS/4; + inet_proto_csum_replace2(&tcph->check, skb, + oldval, ((__be16 *)tcph)[6], false); + return TCPOLEN_MSS; +} + +static unsigned int +tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct iphdr *iph = ip_hdr(skb); + __be16 newlen; + int ret; + + ret = tcpmss_mangle_packet(skb, par, + PF_INET, + iph->ihl * 4, + sizeof(*iph) + sizeof(struct tcphdr)); + if (ret < 0) + return NF_DROP; + if (ret > 0) { + iph = ip_hdr(skb); + newlen = htons(ntohs(iph->tot_len) + ret); + csum_replace2(&iph->check, iph->tot_len, newlen); + iph->tot_len = newlen; + } + return XT_CONTINUE; +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static unsigned int +tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + u8 nexthdr; + __be16 frag_off, oldlen, newlen; + int tcphoff; + int ret; + + nexthdr = ipv6h->nexthdr; + tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off); + if (tcphoff < 0) + return NF_DROP; + ret = tcpmss_mangle_packet(skb, par, + PF_INET6, + tcphoff, + sizeof(*ipv6h) + sizeof(struct tcphdr)); + if (ret < 0) + return NF_DROP; + if (ret > 0) { + ipv6h = ipv6_hdr(skb); + oldlen = ipv6h->payload_len; + newlen = htons(ntohs(oldlen) + ret); + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(csum_sub(skb->csum, oldlen), + newlen); + ipv6h->payload_len = newlen; + } + return XT_CONTINUE; +} +#endif + +/* Must specify -p tcp --syn */ +static inline bool find_syn_match(const struct xt_entry_match *m) +{ + const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data; + + if (strcmp(m->u.kernel.match->name, "tcp") == 0 && + tcpinfo->flg_cmp & TCPHDR_SYN && + !(tcpinfo->invflags & XT_TCP_INV_FLAGS)) + return true; + return false; } -static struct xt_match tcpmss_mt_reg[] __read_mostly = { +static int tcpmss_tg4_check(const struct xt_tgchk_param *par) +{ + const struct xt_tcpmss_info *info = par->targinfo; + const struct ipt_entry *e = par->entryinfo; + const struct xt_entry_match *ematch; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) != 0) { + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); + return -EINVAL; + } + if (par->nft_compat) + return 0; + + xt_ematch_foreach(ematch, e) + if (find_syn_match(ematch)) + return 0; + pr_info_ratelimited("Only works on TCP SYN packets\n"); + return -EINVAL; +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static int tcpmss_tg6_check(const struct xt_tgchk_param *par) +{ + const struct xt_tcpmss_info *info = par->targinfo; + const struct ip6t_entry *e = par->entryinfo; + const struct xt_entry_match *ematch; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) != 0) { + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); + return -EINVAL; + } + if (par->nft_compat) + return 0; + + xt_ematch_foreach(ematch, e) + if (find_syn_match(ematch)) + return 0; + pr_info_ratelimited("Only works on TCP SYN packets\n"); + return -EINVAL; +} +#endif + +static struct xt_target tcpmss_tg_reg[] __read_mostly = { { - .name = "tcpmss", .family = NFPROTO_IPV4, - .match = tcpmss_mt, - .matchsize = sizeof(struct xt_tcpmss_match_info), + .name = "TCPMSS", + .checkentry = tcpmss_tg4_check, + .target = tcpmss_tg4, + .targetsize = sizeof(struct xt_tcpmss_info), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { - .name = "tcpmss", .family = NFPROTO_IPV6, - .match = tcpmss_mt, - .matchsize = sizeof(struct xt_tcpmss_match_info), + .name = "TCPMSS", + .checkentry = tcpmss_tg6_check, + .target = tcpmss_tg6, + .targetsize = sizeof(struct xt_tcpmss_info), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, +#endif }; -static int __init tcpmss_mt_init(void) +static int __init tcpmss_tg_init(void) { - return xt_register_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg)); + return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg)); } -static void __exit tcpmss_mt_exit(void) +static void __exit tcpmss_tg_exit(void) { - xt_unregister_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg)); + xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg)); } -module_init(tcpmss_mt_init); -module_exit(tcpmss_mt_exit); +module_init(tcpmss_tg_init); +module_exit(tcpmss_tg_exit); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 8490e46359..566ba4397e 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1537,7 +1537,7 @@ int __init netlbl_unlabel_defconf(void) /* Only the kernel is allowed to call this function and the only time * it is called is at bootup before the audit subsystem is reporting * messages so don't worry to much about these values. */ - security_current_getsecid_subj(&audit_info.secid); + security_task_getsecid_subj(current, &audit_info.secid); audit_info.loginuid = GLOBAL_ROOT_UID; audit_info.sessionid = 0; diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index d6c5b31eb4..6190cbf94b 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -32,7 +32,7 @@ */ static inline void netlbl_netlink_auditinfo(struct netlbl_audit *audit_info) { - security_current_getsecid_subj(&audit_info->secid); + security_task_getsecid_subj(current, &audit_info->secid); audit_info->loginuid = audit_get_loginuid(current); audit_info->sessionid = audit_get_sessionid(current); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7b344035bf..81ba8e51e0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -20,10 +20,8 @@ #include -#include #include #include -#include #include #include #include @@ -709,7 +707,9 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (err < 0) goto out_module; + local_bh_disable(); sock_prot_inuse_add(net, &netlink_proto, 1); + local_bh_enable(); nlk = nlk_sk(sock->sk); nlk->module = module; @@ -809,7 +809,9 @@ static int netlink_release(struct socket *sock) netlink_table_ungrab(); } + local_bh_disable(); sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); + local_bh_enable(); call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } @@ -1410,6 +1412,8 @@ struct netlink_broadcast_data { int delivered; gfp_t allocation; struct sk_buff *skb, *skb2; + int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data); + void *tx_data; }; static void do_one_broadcast(struct sock *sk, @@ -1463,6 +1467,11 @@ static void do_one_broadcast(struct sock *sk, p->delivery_failure = 1; goto out; } + if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { + kfree_skb(p->skb2); + p->skb2 = NULL; + goto out; + } if (sk_filter(sk, p->skb2)) { kfree_skb(p->skb2); p->skb2 = NULL; @@ -1485,8 +1494,10 @@ static void do_one_broadcast(struct sock *sk, sock_put(sk); } -int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, - u32 group, gfp_t allocation) +int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, + u32 group, gfp_t allocation, + int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), + void *filter_data) { struct net *net = sock_net(ssk); struct netlink_broadcast_data info; @@ -1505,6 +1516,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, info.allocation = allocation; info.skb = skb; info.skb2 = NULL; + info.tx_filter = filter; + info.tx_data = filter_data; /* While we sleep in clone, do not allow to change socket list */ @@ -1530,6 +1543,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, } return -ESRCH; } +EXPORT_SYMBOL(netlink_broadcast_filtered); + +int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, + u32 group, gfp_t allocation) +{ + return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, + NULL, NULL); +} EXPORT_SYMBOL(netlink_broadcast); struct netlink_set_err_data { diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index fa9dc2ba39..e5c8a295e6 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -633,7 +633,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr; - const ax25_address *source = NULL; + ax25_address *source = NULL; ax25_uid_assoc *user; struct net_device *dev; int err = 0; @@ -673,7 +673,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENETUNREACH; goto out_release; } - source = (const ax25_address *)dev->dev_addr; + source = (ax25_address *)dev->dev_addr; user = ax25_findbyuid(current_euid()); if (user) { diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 3aaac4a22b..29e418c8c6 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -108,10 +108,10 @@ static int __must_check nr_set_mac_address(struct net_device *dev, void *addr) if (err) return err; - ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); + ax25_listen_release((ax25_address *)dev->dev_addr, NULL); } - dev_addr_set(dev, sa->sa_data); + memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); return 0; } @@ -120,7 +120,7 @@ static int nr_open(struct net_device *dev) { int err; - err = ax25_listen_register((const ax25_address *)dev->dev_addr, NULL); + err = ax25_listen_register((ax25_address *)dev->dev_addr, NULL); if (err) return err; @@ -131,7 +131,7 @@ static int nr_open(struct net_device *dev) static int nr_close(struct net_device *dev) { - ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); + ax25_listen_release((ax25_address *)dev->dev_addr, NULL); netif_stop_queue(dev); return 0; } diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index baea3cbd76..ddd5cbd455 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -598,7 +598,7 @@ struct net_device *nr_dev_get(ax25_address *addr) rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && - ax25cmp(addr, (const ax25_address *)dev->dev_addr) == 0) { + ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } @@ -825,7 +825,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) ax25s = nr_neigh->ax25; nr_neigh->ax25 = ax25_send_frame(skb, 256, - (const ax25_address *)dev->dev_addr, + (ax25_address *)dev->dev_addr, &nr_neigh->callsign, nr_neigh->digipeat, nr_neigh->dev); if (ax25s) diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index af6bacb3ba..3a89bd9b89 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -114,6 +114,8 @@ int nfc_hci_send_cmd(struct nfc_hci_dev *hdev, u8 gate, u8 cmd, { u8 pipe; + pr_debug("\n"); + pipe = hdev->gate2pipe[gate]; if (pipe == NFC_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; @@ -128,6 +130,8 @@ int nfc_hci_send_cmd_async(struct nfc_hci_dev *hdev, u8 gate, u8 cmd, { u8 pipe; + pr_debug("\n"); + pipe = hdev->gate2pipe[gate]; if (pipe == NFC_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; @@ -201,6 +205,8 @@ static int nfc_hci_open_pipe(struct nfc_hci_dev *hdev, u8 pipe) static int nfc_hci_close_pipe(struct nfc_hci_dev *hdev, u8 pipe) { + pr_debug("\n"); + return nfc_hci_execute_cmd(hdev, pipe, NFC_HCI_ANY_CLOSE_PIPE, NULL, 0, NULL); } @@ -236,6 +242,8 @@ static u8 nfc_hci_create_pipe(struct nfc_hci_dev *hdev, u8 dest_host, static int nfc_hci_delete_pipe(struct nfc_hci_dev *hdev, u8 pipe) { + pr_debug("\n"); + return nfc_hci_execute_cmd(hdev, NFC_HCI_ADMIN_PIPE, NFC_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL); } @@ -248,6 +256,8 @@ static int nfc_hci_clear_all_pipes(struct nfc_hci_dev *hdev) /* TODO: Find out what the identity reference data is * and fill param with it. HCI spec 6.1.3.5 */ + pr_debug("\n"); + if (test_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &hdev->quirks)) param_len = 0; @@ -261,6 +271,8 @@ int nfc_hci_disconnect_gate(struct nfc_hci_dev *hdev, u8 gate) int r; u8 pipe = hdev->gate2pipe[gate]; + pr_debug("\n"); + if (pipe == NFC_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; @@ -284,6 +296,8 @@ int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev) { int r; + pr_debug("\n"); + r = nfc_hci_clear_all_pipes(hdev); if (r < 0) return r; @@ -300,6 +314,8 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate, bool pipe_created = false; int r; + pr_debug("\n"); + if (pipe == NFC_HCI_DO_NOT_CREATE_PIPE) return 0; diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index e90f703858..aef750d778 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -201,7 +201,8 @@ static void llc_shdlc_reset_t2(struct llc_shdlc *shdlc, int y_nr) del_timer_sync(&shdlc->t2_timer); shdlc->t2_active = false; - pr_debug("All sent frames acked. Stopped T2(retransmit)\n"); + pr_debug + ("All sent frames acked. Stopped T2(retransmit)\n"); } } else { skb = skb_peek(&shdlc->ack_pending_q); @@ -210,7 +211,8 @@ static void llc_shdlc_reset_t2(struct llc_shdlc *shdlc, int y_nr) msecs_to_jiffies(SHDLC_T2_VALUE_MS)); shdlc->t2_active = true; - pr_debug("Start T2(retransmit) for remaining unacked sent frames\n"); + pr_debug + ("Start T2(retransmit) for remaining unacked sent frames\n"); } } @@ -363,6 +365,8 @@ static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc) { struct sk_buff *skb; + pr_debug("\n"); + skb = llc_shdlc_alloc_skb(shdlc, 2); if (skb == NULL) return -ENOMEM; @@ -377,6 +381,8 @@ static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc) { struct sk_buff *skb; + pr_debug("\n"); + skb = llc_shdlc_alloc_skb(shdlc, 0); if (skb == NULL) return -ENOMEM; @@ -516,11 +522,12 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc) unsigned long time_sent; if (shdlc->send_q.qlen) - pr_debug("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n", - shdlc->send_q.qlen, shdlc->ns, shdlc->dnr, - shdlc->rnr == false ? "false" : "true", - shdlc->w - llc_shdlc_w_used(shdlc->ns, shdlc->dnr), - shdlc->ack_pending_q.qlen); + pr_debug + ("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n", + shdlc->send_q.qlen, shdlc->ns, shdlc->dnr, + shdlc->rnr == false ? "false" : "true", + shdlc->w - llc_shdlc_w_used(shdlc->ns, shdlc->dnr), + shdlc->ack_pending_q.qlen); while (shdlc->send_q.qlen && shdlc->ack_pending_q.qlen < shdlc->w && (shdlc->rnr == false)) { @@ -566,6 +573,8 @@ static void llc_shdlc_connect_timeout(struct timer_list *t) { struct llc_shdlc *shdlc = from_timer(shdlc, t, connect_timer); + pr_debug("\n"); + schedule_work(&shdlc->sm_work); } @@ -592,6 +601,8 @@ static void llc_shdlc_sm_work(struct work_struct *work) struct llc_shdlc *shdlc = container_of(work, struct llc_shdlc, sm_work); int r; + pr_debug("\n"); + mutex_lock(&shdlc->state_mutex); switch (shdlc->state) { @@ -638,7 +649,8 @@ static void llc_shdlc_sm_work(struct work_struct *work) llc_shdlc_handle_send_queue(shdlc); if (shdlc->t1_active && timer_pending(&shdlc->t1_timer) == 0) { - pr_debug("Handle T1(send ack) elapsed (T1 now inactive)\n"); + pr_debug + ("Handle T1(send ack) elapsed (T1 now inactive)\n"); shdlc->t1_active = false; r = llc_shdlc_send_s_frame(shdlc, S_FRAME_RR, @@ -648,7 +660,8 @@ static void llc_shdlc_sm_work(struct work_struct *work) } if (shdlc->t2_active && timer_pending(&shdlc->t2_timer) == 0) { - pr_debug("Handle T2(retransmit) elapsed (T2 inactive)\n"); + pr_debug + ("Handle T2(retransmit) elapsed (T2 inactive)\n"); shdlc->t2_active = false; @@ -673,6 +686,8 @@ static int llc_shdlc_connect(struct llc_shdlc *shdlc) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(connect_wq); + pr_debug("\n"); + mutex_lock(&shdlc->state_mutex); shdlc->state = SHDLC_CONNECTING; @@ -691,6 +706,8 @@ static int llc_shdlc_connect(struct llc_shdlc *shdlc) static void llc_shdlc_disconnect(struct llc_shdlc *shdlc) { + pr_debug("\n"); + mutex_lock(&shdlc->state_mutex); shdlc->state = SHDLC_DISCONNECTED; diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 41e3a20c89..3c4172a5ae 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -337,6 +337,8 @@ int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock) struct nfc_dev *dev; struct nfc_llcp_local *local; + pr_debug("Sending DISC\n"); + local = sock->local; if (local == NULL) return -ENODEV; @@ -360,6 +362,8 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) struct nfc_llcp_local *local; u16 size = 0; + pr_debug("Sending SYMM\n"); + local = nfc_llcp_find_local(dev); if (local == NULL) return -ENODEV; @@ -395,6 +399,8 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) u16 size = 0; __be16 miux; + pr_debug("Sending CONNECT\n"); + local = sock->local; if (local == NULL) return -ENODEV; @@ -469,6 +475,8 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) u16 size = 0; __be16 miux; + pr_debug("Sending CC\n"); + local = sock->local; if (local == NULL) return -ENODEV; diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 5ad5157aa9..eaeb2b1cfa 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -45,6 +45,8 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) struct nfc_llcp_local *local = sock->local; struct sk_buff *s, *tmp; + pr_debug("%p\n", &sock->sk); + skb_queue_purge(&sock->tx_queue); skb_queue_purge(&sock->tx_pending_queue); @@ -1503,8 +1505,9 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) { struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; + pr_debug("Received an LLCP PDU\n"); if (err < 0) { - pr_err("LLCP PDU receive err %d\n", err); + pr_err("err %d\n", err); return; } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index d2537383a3..e41e2e9e54 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -942,6 +942,8 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; + pr_debug("entry\n"); + if (!ndev->target_active_prot) { pr_err("unable to deactivate target, no active target\n"); return; @@ -987,6 +989,8 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev) struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; + pr_debug("entry\n"); + if (nfc_dev->rf_mode == NFC_RF_INITIATOR) { nci_deactivate_target(nfc_dev, NULL, NCI_DEACTIVATE_TYPE_IDLE_MODE); } else { diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 19703a649b..e199912ee1 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -432,6 +432,8 @@ void nci_hci_data_received_cb(void *context, struct sk_buff *frag_skb; int msg_len; + pr_debug("\n"); + if (err) { nci_req_complete(ndev, err); return; @@ -545,6 +547,8 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe) { + pr_debug("\n"); + return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL); } diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 282c51051d..c5eacaac41 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -738,6 +738,8 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, const struct nci_nfcee_discover_ntf *nfcee_ntf = (struct nci_nfcee_discover_ntf *)skb->data; + pr_debug("\n"); + /* NFCForum NCI 9.2.1 HCI Network Specific Handling * If the NFCC supports the HCI Network, it SHALL return one, * and only one, NFCEE_DISCOVER_NTF with a Protocol type of @@ -749,6 +751,12 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, nci_req_complete(ndev, status); } +static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev, + const struct sk_buff *skb) +{ + pr_debug("\n"); +} + void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { __u16 ntf_opcode = nci_opcode(skb->data); @@ -805,6 +813,7 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) break; case NCI_OP_RF_NFCEE_ACTION_NTF: + nci_nfcee_action_ntf_packet(ndev, skb); break; default: diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index cc8fa9e361..502e7a3f89 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -1,8 +1,20 @@ -// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2015, Marvell International Ltd. * - * Inspired (hugely) by HCI LDISC implementation in Bluetooth. + * This software file (the "File") is distributed by Marvell International + * Ltd. under the terms of the GNU General Public License Version 2, June 1991 + * (the "License"). You may use, redistribute and/or modify this File in + * accordance with the terms and conditions of the License, a copy of which + * is available on the worldwide web at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. + * + * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE + * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE + * ARE EXPRESSLY DISCLAIMED. The License provides additional details about + * this warranty disclaimer. + */ + +/* Inspired (hugely) by HCI LDISC implementation in Bluetooth. * * Copyright (C) 2000-2001 Qualcomm Incorporated * Copyright (C) 2002-2003 Maxim Krasnyansky @@ -317,13 +329,14 @@ static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data, * Arguments: * * tty pointer to tty instance data + * file pointer to open file object for device * cmd IOCTL command code * arg argument for IOCTL call (cmd dependent) * * Return Value: Command dependent */ -static int nci_uart_tty_ioctl(struct tty_struct *tty, unsigned int cmd, - unsigned long arg) +static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg) { struct nci_uart *nu = (void *)tty->disc_data; int err = 0; @@ -336,7 +349,7 @@ static int nci_uart_tty_ioctl(struct tty_struct *tty, unsigned int cmd, return -EBUSY; break; default: - err = n_tty_ioctl_helper(tty, cmd, arg); + err = n_tty_ioctl_helper(tty, file, cmd, arg); break; } diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f184b0db79..8048a3dcc5 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1668,37 +1668,31 @@ static const struct genl_ops nfc_genl_ops[] = { .cmd = NFC_CMD_DEV_UP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_up, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEV_DOWN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_down, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_START_POLL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_start_poll, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_STOP_POLL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_stop_poll, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEP_LINK_UP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_up, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEP_LINK_DOWN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_down, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_GET_TARGET, @@ -1716,31 +1710,26 @@ static const struct genl_ops nfc_genl_ops[] = { .cmd = NFC_CMD_LLC_SET_PARAMS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_set_params, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_LLC_SDREQ, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_sdreq, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_FW_DOWNLOAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_fw_download, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_ENABLE_SE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_enable_se, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DISABLE_SE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_disable_se, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_GET_SE, @@ -1752,25 +1741,21 @@ static const struct genl_ops nfc_genl_ops[] = { .cmd = NFC_CMD_SE_IO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_se_io, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_ACTIVATE_TARGET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_activate_target, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_VENDOR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_vendor_cmd, - .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEACTIVATE_TARGET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_deactivate_target, - .flags = GENL_ADMIN_PERM, }, }; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c07afff57d..1b5eae57bc 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -25,8 +25,6 @@ #include #endif -#include - #include "datapath.h" #include "conntrack.h" #include "flow.h" @@ -576,7 +574,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); nf_ct_delete(ct, 0, 0); - nf_ct_put(ct); + nf_conntrack_put(&ct->ct_general); } } @@ -725,7 +723,7 @@ static bool skb_nfct_cached(struct net *net, if (nf_ct_is_confirmed(ct)) nf_ct_delete(ct, 0, 0); - nf_ct_put(ct); + nf_conntrack_put(&ct->ct_general); nf_ct_set(skb, NULL, 0); return false; } @@ -969,8 +967,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, /* Associate skb with specified zone. */ if (tmpl) { - ct = nf_ct_get(skb, &ctinfo); - nf_ct_put(ct); + nf_conntrack_put(skb_nfct(skb)); nf_conntrack_get(&tmpl->ct_general); nf_ct_set(skb, tmpl, IP_CT_NEW); } @@ -1048,8 +1045,6 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, */ nf_ct_set_tcp_be_liberal(ct); } - - nf_conn_act_ct_ext_fill(skb, ct, ctinfo); } return 0; @@ -1250,8 +1245,6 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, &info->labels.mask); if (err) return err; - - nf_conn_act_ct_ext_add(ct); } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && labels_nonzero(&info->labels.mask)) { err = ovs_ct_set_labels(ct, key, &info->labels.value, @@ -1335,12 +1328,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key) { - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - - ct = nf_ct_get(skb, &ctinfo); - - nf_ct_put(ct); + nf_conntrack_put(skb_nfct(skb)); nf_ct_set(skb, NULL, IP_CT_UNTRACKED); ovs_ct_fill_key(skb, key, false); @@ -1728,6 +1716,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, goto err_free_ct; __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); + nf_conntrack_get(&ct_info.ct->ct_general); return 0; err_free_ct: __ovs_ct_free_action(&ct_info); diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 04a060ac7f..896b8f5bc8 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index b498dac4e1..8e1a88f136 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -82,7 +82,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) err = -ENODEV; goto error_free_vport; } - netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL); + if (vport->dev->flags & IFF_LOOPBACK || (vport->dev->type != ARPHRD_ETHER && vport->dev->type != ARPHRD_NONE) || @@ -115,7 +115,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) error_unlock: rtnl_unlock(); error_put: - dev_put_track(vport->dev, &vport->dev_tracker); + dev_put(vport->dev); error_free_vport: ovs_vport_free(vport); return ERR_PTR(err); @@ -137,7 +137,8 @@ static void vport_netdev_free(struct rcu_head *rcu) { struct vport *vport = container_of(rcu, struct vport, rcu); - dev_put_track(vport->dev, &vport->dev_tracker); + if (vport->dev) + dev_put(vport->dev); ovs_vport_free(vport); } @@ -173,7 +174,7 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev); - dev_put_track(vport->dev, &vport->dev_tracker); + dev_put(vport->dev); vport->dev = NULL; rtnl_unlock(); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 9de5030d98..8a930ca6d6 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -58,7 +58,6 @@ struct vport_portids { /** * struct vport - one port within a datapath * @dev: Pointer to net_device. - * @dev_tracker: refcount tracker for @dev reference * @dp: Datapath to which this port belongs. * @upcall_portids: RCU protected 'struct vport_portids'. * @port_no: Index into @dp's @ports array. @@ -70,7 +69,6 @@ struct vport_portids { */ struct vport { struct net_device *dev; - netdevice_tracker dev_tracker; struct datapath *dp; struct vport_portids __rcu *upcall_portids; u16 port_no; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ab87f22cc7..e00c38f242 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -49,7 +49,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include @@ -92,7 +91,6 @@ #endif #include #include -#include #include "internal.h" @@ -243,42 +241,8 @@ struct packet_skb_cb { static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); -#ifdef CONFIG_NETFILTER_EGRESS -static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb) -{ - struct sk_buff *next, *head = NULL, *tail; - int rc; - - rcu_read_lock(); - for (; skb != NULL; skb = next) { - next = skb->next; - skb_mark_not_on_list(skb); - - if (!nf_hook_egress(skb, &rc, skb->dev)) - continue; - - if (!head) - head = skb; - else - tail->next = skb; - - tail = skb; - } - rcu_read_unlock(); - - return head; -} -#endif - static int packet_direct_xmit(struct sk_buff *skb) { -#ifdef CONFIG_NETFILTER_EGRESS - if (nf_hook_egress_active()) { - skb = nf_hook_direct_egress(skb); - if (!skb) - return NET_XMIT_DROP; - } -#endif return dev_direct_xmit(skb, packet_pick_tx_queue(skb)); } @@ -3107,14 +3071,16 @@ static int packet_release(struct socket *sock) sk_del_node_init_rcu(sk); mutex_unlock(&net->packet.sklist_lock); + preempt_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); + preempt_enable(); spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); packet_cached_dev_reset(po); if (po->prot_hook.dev) { - dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker); + dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); @@ -3166,10 +3132,12 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, __be16 proto) { struct packet_sock *po = pkt_sk(sk); - struct net_device *dev = NULL; - bool unlisted = false; + struct net_device *dev_curr; + __be16 proto_curr; bool need_rehook; + struct net_device *dev = NULL; int ret = 0; + bool unlisted = false; lock_sock(sk); spin_lock(&po->bind_lock); @@ -3194,10 +3162,14 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, } } - need_rehook = po->prot_hook.type != proto || po->prot_hook.dev != dev; + dev_hold(dev); + + proto_curr = po->prot_hook.type; + dev_curr = po->prot_hook.dev; + + need_rehook = proto_curr != proto || dev_curr != dev; if (need_rehook) { - dev_hold(dev); if (po->running) { rcu_read_unlock(); /* prevents packet_notifier() from calling @@ -3206,6 +3178,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, true); rcu_read_lock(); + dev_curr = po->prot_hook.dev; if (dev) unlisted = !dev_get_by_index_rcu(sock_net(sk), dev->ifindex); @@ -3215,21 +3188,18 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; - dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker); - if (unlikely(unlisted)) { + dev_put(dev); po->prot_hook.dev = NULL; WRITE_ONCE(po->ifindex, -1); packet_cached_dev_reset(po); } else { - dev_hold_track(dev, &po->prot_hook.dev_tracker, - GFP_ATOMIC); po->prot_hook.dev = dev; WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); packet_cached_dev_assign(po, dev); } - dev_put(dev); } + dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; @@ -3368,7 +3338,9 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sk_add_node_tail_rcu(sk, &net->packet.sklist); mutex_unlock(&net->packet.sklist_lock); + preempt_disable(); sock_prot_inuse_add(net, &packet_proto, 1); + preempt_enable(); return 0; out2: @@ -4141,8 +4113,7 @@ static int packet_notifier(struct notifier_block *this, if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); WRITE_ONCE(po->ifindex, -1); - dev_put_track(po->prot_hook.dev, - &po->prot_hook.dev_tracker); + dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile index 8e0605f88a..1b1411d158 100644 --- a/net/qrtr/Makefile +++ b/net/qrtr/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_QRTR) += qrtr.o -qrtr-y := af_qrtr.o ns.o +obj-$(CONFIG_QRTR) := qrtr.o ns.o obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o qrtr-smd-y := smd.o diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index 18196e1c8c..fa611678af 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -79,7 +79,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, int rc; /* start channels */ - rc = mhi_prepare_for_transfer_autoqueue(mhi_dev); + rc = mhi_prepare_for_transfer(mhi_dev); if (rc) return rc; diff --git a/net/rds/send.c b/net/rds/send.c index 0c5504068e..53444397de 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -272,7 +272,7 @@ int rds_send_xmit(struct rds_conn_path *cp) /* Unfortunately, the way Infiniband deals with * RDMA to a bad MR key is by moving the entire - * queue pair to error state. We could possibly + * queue pair to error state. We cold possibly * recover from that, but right now we drop the * connection. * Therefore, we never retransmit messages with RDMA ops. diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 5b1927d66f..ac15a94457 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -946,18 +946,6 @@ bool rfkill_blocked(struct rfkill *rfkill) } EXPORT_SYMBOL(rfkill_blocked); -bool rfkill_soft_blocked(struct rfkill *rfkill) -{ - unsigned long flags; - u32 state; - - spin_lock_irqsave(&rfkill->lock, flags); - state = rfkill->state; - spin_unlock_irqrestore(&rfkill->lock, flags); - - return !!(state & RFKILL_BLOCK_SW); -} -EXPORT_SYMBOL(rfkill_soft_blocked); struct rfkill * __must_check rfkill_alloc(const char *name, struct device *parent, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 30a1cf4c16..cf7d974e0f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -109,7 +109,7 @@ char *rose2asc(char *buf, const rose_address *addr) /* * Compare two ROSE addresses, 0 == equal. */ -int rosecmp(const rose_address *addr1, const rose_address *addr2) +int rosecmp(rose_address *addr1, rose_address *addr2) { int i; @@ -123,8 +123,7 @@ int rosecmp(const rose_address *addr1, const rose_address *addr2) /* * Compare two ROSE addresses for only mask digits, 0 == equal. */ -int rosecmpm(const rose_address *addr1, const rose_address *addr2, - unsigned short mask) +int rosecmpm(rose_address *addr1, rose_address *addr2, unsigned short mask) { unsigned int i, j; diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index f1a76a5820..051804fbee 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -66,10 +66,10 @@ static int rose_set_mac_address(struct net_device *dev, void *addr) if (err) return err; - rose_del_loopback_node((const rose_address *)dev->dev_addr); + rose_del_loopback_node((rose_address *)dev->dev_addr); } - dev_addr_set(dev, sa->sa_data); + memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); return 0; } @@ -78,7 +78,7 @@ static int rose_open(struct net_device *dev) { int err; - err = rose_add_loopback_node((const rose_address *)dev->dev_addr); + err = rose_add_loopback_node((rose_address *)dev->dev_addr); if (err) return err; @@ -90,7 +90,7 @@ static int rose_open(struct net_device *dev) static int rose_close(struct net_device *dev) { netif_stop_queue(dev); - rose_del_loopback_node((const rose_address *)dev->dev_addr); + rose_del_loopback_node((rose_address *)dev->dev_addr); return 0; } diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index 4d67f36dce..6af786d66b 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -9,7 +9,6 @@ * diagrams as the code is not obvious and probably very easy to break. */ #include -#include #include #include #include diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index 8b96a56d3a..f6102e6f51 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -94,11 +94,11 @@ static void rose_t0timer_expiry(struct timer_list *t) */ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) { - const ax25_address *rose_call; + ax25_address *rose_call; ax25_cb *ax25s; if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) - rose_call = (const ax25_address *)neigh->dev->dev_addr; + rose_call = (ax25_address *)neigh->dev->dev_addr; else rose_call = &rose_callsign; @@ -117,11 +117,11 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) */ static int rose_link_up(struct rose_neigh *neigh) { - const ax25_address *rose_call; + ax25_address *rose_call; ax25_cb *ax25s; if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) - rose_call = (const ax25_address *)neigh->dev->dev_addr; + rose_call = (ax25_address *)neigh->dev->dev_addr; else rose_call = &rose_callsign; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index e2e6b6b785..c0e04c261a 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -401,7 +401,7 @@ void rose_add_loopback_neigh(void) /* * Add a loopback node. */ -int rose_add_loopback_node(const rose_address *address) +int rose_add_loopback_node(rose_address *address) { struct rose_node *rose_node; int err = 0; @@ -446,7 +446,7 @@ int rose_add_loopback_node(const rose_address *address) /* * Delete a loopback node. */ -void rose_del_loopback_node(const rose_address *address) +void rose_del_loopback_node(rose_address *address) { struct rose_node *rose_node; @@ -629,8 +629,7 @@ struct net_device *rose_dev_get(rose_address *addr) rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && - rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } @@ -647,8 +646,7 @@ static int rose_dev_exists(rose_address *addr) rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && - rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) goto out; } dev = NULL; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index ca03e72842..7d53272727 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -19,10 +19,8 @@ #include #include #include -#include #include #include -#include #ifdef CONFIG_INET DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count); @@ -131,244 +129,8 @@ static void free_tcf(struct tc_action *p) kfree(p); } -static void offload_action_hw_count_set(struct tc_action *act, - u32 hw_count) -{ - act->in_hw_count = hw_count; -} - -static void offload_action_hw_count_inc(struct tc_action *act, - u32 hw_count) -{ - act->in_hw_count += hw_count; -} - -static void offload_action_hw_count_dec(struct tc_action *act, - u32 hw_count) -{ - act->in_hw_count = act->in_hw_count > hw_count ? - act->in_hw_count - hw_count : 0; -} - -static unsigned int tcf_offload_act_num_actions_single(struct tc_action *act) -{ - if (is_tcf_pedit(act)) - return tcf_pedit_nkeys(act); - else - return 1; -} - -static bool tc_act_skip_hw(u32 flags) -{ - return (flags & TCA_ACT_FLAGS_SKIP_HW) ? true : false; -} - -static bool tc_act_skip_sw(u32 flags) -{ - return (flags & TCA_ACT_FLAGS_SKIP_SW) ? true : false; -} - -static bool tc_act_in_hw(struct tc_action *act) -{ - return !!act->in_hw_count; -} - -/* SKIP_HW and SKIP_SW are mutually exclusive flags. */ -static bool tc_act_flags_valid(u32 flags) -{ - flags &= TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW; - - return flags ^ (TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW); -} - -static int offload_action_init(struct flow_offload_action *fl_action, - struct tc_action *act, - enum offload_act_command cmd, - struct netlink_ext_ack *extack) -{ - int err; - - fl_action->extack = extack; - fl_action->command = cmd; - fl_action->index = act->tcfa_index; - - if (act->ops->offload_act_setup) { - spin_lock_bh(&act->tcfa_lock); - err = act->ops->offload_act_setup(act, fl_action, NULL, - false); - spin_unlock_bh(&act->tcfa_lock); - return err; - } - - return -EOPNOTSUPP; -} - -static int tcf_action_offload_cmd_ex(struct flow_offload_action *fl_act, - u32 *hw_count) -{ - int err; - - err = flow_indr_dev_setup_offload(NULL, NULL, TC_SETUP_ACT, - fl_act, NULL, NULL); - if (err < 0) - return err; - - if (hw_count) - *hw_count = err; - - return 0; -} - -static int tcf_action_offload_cmd_cb_ex(struct flow_offload_action *fl_act, - u32 *hw_count, - flow_indr_block_bind_cb_t *cb, - void *cb_priv) -{ - int err; - - err = cb(NULL, NULL, cb_priv, TC_SETUP_ACT, NULL, fl_act, NULL); - if (err < 0) - return err; - - if (hw_count) - *hw_count = 1; - - return 0; -} - -static int tcf_action_offload_cmd(struct flow_offload_action *fl_act, - u32 *hw_count, - flow_indr_block_bind_cb_t *cb, - void *cb_priv) -{ - return cb ? tcf_action_offload_cmd_cb_ex(fl_act, hw_count, - cb, cb_priv) : - tcf_action_offload_cmd_ex(fl_act, hw_count); -} - -static int tcf_action_offload_add_ex(struct tc_action *action, - struct netlink_ext_ack *extack, - flow_indr_block_bind_cb_t *cb, - void *cb_priv) -{ - bool skip_sw = tc_act_skip_sw(action->tcfa_flags); - struct tc_action *actions[TCA_ACT_MAX_PRIO] = { - [0] = action, - }; - struct flow_offload_action *fl_action; - u32 in_hw_count = 0; - int num, err = 0; - - if (tc_act_skip_hw(action->tcfa_flags)) - return 0; - - num = tcf_offload_act_num_actions_single(action); - fl_action = offload_action_alloc(num); - if (!fl_action) - return -ENOMEM; - - err = offload_action_init(fl_action, action, FLOW_ACT_REPLACE, extack); - if (err) - goto fl_err; - - err = tc_setup_action(&fl_action->action, actions); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "Failed to setup tc actions for offload"); - goto fl_err; - } - - err = tcf_action_offload_cmd(fl_action, &in_hw_count, cb, cb_priv); - if (!err) - cb ? offload_action_hw_count_inc(action, in_hw_count) : - offload_action_hw_count_set(action, in_hw_count); - - if (skip_sw && !tc_act_in_hw(action)) - err = -EINVAL; - - tc_cleanup_offload_action(&fl_action->action); - -fl_err: - kfree(fl_action); - - return err; -} - -/* offload the tc action after it is inserted */ -static int tcf_action_offload_add(struct tc_action *action, - struct netlink_ext_ack *extack) -{ - return tcf_action_offload_add_ex(action, extack, NULL, NULL); -} - -int tcf_action_update_hw_stats(struct tc_action *action) -{ - struct flow_offload_action fl_act = {}; - int err; - - if (!tc_act_in_hw(action)) - return -EOPNOTSUPP; - - err = offload_action_init(&fl_act, action, FLOW_ACT_STATS, NULL); - if (err) - return err; - - err = tcf_action_offload_cmd(&fl_act, NULL, NULL, NULL); - if (!err) { - preempt_disable(); - tcf_action_stats_update(action, fl_act.stats.bytes, - fl_act.stats.pkts, - fl_act.stats.drops, - fl_act.stats.lastused, - true); - preempt_enable(); - action->used_hw_stats = fl_act.stats.used_hw_stats; - action->used_hw_stats_valid = true; - } else { - return -EOPNOTSUPP; - } - - return 0; -} -EXPORT_SYMBOL(tcf_action_update_hw_stats); - -static int tcf_action_offload_del_ex(struct tc_action *action, - flow_indr_block_bind_cb_t *cb, - void *cb_priv) -{ - struct flow_offload_action fl_act = {}; - u32 in_hw_count = 0; - int err = 0; - - if (!tc_act_in_hw(action)) - return 0; - - err = offload_action_init(&fl_act, action, FLOW_ACT_DESTROY, NULL); - if (err) - return err; - - err = tcf_action_offload_cmd(&fl_act, &in_hw_count, cb, cb_priv); - if (err < 0) - return err; - - if (!cb && action->in_hw_count != in_hw_count) - return -EINVAL; - - /* do not need to update hw state when deleting action */ - if (cb && in_hw_count) - offload_action_hw_count_dec(action, in_hw_count); - - return 0; -} - -static int tcf_action_offload_del(struct tc_action *action) -{ - return tcf_action_offload_del_ex(action, NULL, NULL); -} - static void tcf_action_cleanup(struct tc_action *p) { - tcf_action_offload_del(p); if (p->ops->cleanup) p->ops->cleanup(p); @@ -718,28 +480,26 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, atomic_set(&p->tcfa_bindcnt, 1); if (cpustats) { - p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); + p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); if (!p->cpu_bstats) goto err1; - p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); + p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); if (!p->cpu_bstats_hw) goto err2; p->cpu_qstats = alloc_percpu(struct gnet_stats_queue); if (!p->cpu_qstats) goto err3; } - gnet_stats_basic_sync_init(&p->tcfa_bstats); - gnet_stats_basic_sync_init(&p->tcfa_bstats_hw); spin_lock_init(&p->tcfa_lock); p->tcfa_index = index; p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; - p->tcfa_flags = flags; + p->tcfa_flags = flags & TCA_ACT_FLAGS_USER_MASK; if (est) { err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, &p->tcfa_rate_est, - &p->tcfa_lock, false, est); + &p->tcfa_lock, NULL, est); if (err) goto err4; } @@ -860,59 +620,6 @@ EXPORT_SYMBOL(tcf_idrinfo_destroy); static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); -/* since act ops id is stored in pernet subsystem list, - * then there is no way to walk through only all the action - * subsystem, so we keep tc action pernet ops id for - * reoffload to walk through. - */ -static LIST_HEAD(act_pernet_id_list); -static DEFINE_MUTEX(act_id_mutex); -struct tc_act_pernet_id { - struct list_head list; - unsigned int id; -}; - -static int tcf_pernet_add_id_list(unsigned int id) -{ - struct tc_act_pernet_id *id_ptr; - int ret = 0; - - mutex_lock(&act_id_mutex); - list_for_each_entry(id_ptr, &act_pernet_id_list, list) { - if (id_ptr->id == id) { - ret = -EEXIST; - goto err_out; - } - } - - id_ptr = kzalloc(sizeof(*id_ptr), GFP_KERNEL); - if (!id_ptr) { - ret = -ENOMEM; - goto err_out; - } - id_ptr->id = id; - - list_add_tail(&id_ptr->list, &act_pernet_id_list); - -err_out: - mutex_unlock(&act_id_mutex); - return ret; -} - -static void tcf_pernet_del_id_list(unsigned int id) -{ - struct tc_act_pernet_id *id_ptr; - - mutex_lock(&act_id_mutex); - list_for_each_entry(id_ptr, &act_pernet_id_list, list) { - if (id_ptr->id == id) { - list_del(&id_ptr->list); - kfree(id_ptr); - break; - } - } - mutex_unlock(&act_id_mutex); -} int tcf_register_action(struct tc_action_ops *act, struct pernet_operations *ops) @@ -931,31 +638,18 @@ int tcf_register_action(struct tc_action_ops *act, if (ret) return ret; - if (ops->id) { - ret = tcf_pernet_add_id_list(*ops->id); - if (ret) - goto err_id; - } - write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) { - ret = -EEXIST; - goto err_out; + write_unlock(&act_mod_lock); + unregister_pernet_subsys(ops); + return -EEXIST; } } list_add_tail(&act->head, &act_base); write_unlock(&act_mod_lock); return 0; - -err_out: - write_unlock(&act_mod_lock); - if (ops->id) - tcf_pernet_del_id_list(*ops->id); -err_id: - unregister_pernet_subsys(ops); - return ret; } EXPORT_SYMBOL(tcf_register_action); @@ -974,11 +668,8 @@ int tcf_unregister_action(struct tc_action_ops *act, } } write_unlock(&act_mod_lock); - if (!err) { + if (!err) unregister_pernet_subsys(ops); - if (ops->id) - tcf_pernet_del_id_list(*ops->id); - } return err; } EXPORT_SYMBOL(tcf_unregister_action); @@ -1044,12 +735,10 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, continue; } - if (tc_act_skip_sw(a->tcfa_flags)) - continue; - repeat_ttl = 32; repeat: ret = a->ops->act(skb, a, res); + if (unlikely(ret == TC_ACT_REPEAT)) { if (--repeat_ttl != 0) goto repeat; @@ -1057,6 +746,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n"); return TC_ACT_OK; } + if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) { jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK; if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) { @@ -1138,7 +828,6 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) int err = -EINVAL; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - u32 flags; if (tcf_action_dump_terse(skb, a, false)) goto nla_put_failure; @@ -1153,13 +842,9 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) a->used_hw_stats, TCA_ACT_HW_STATS_ANY)) goto nla_put_failure; - flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK; - if (flags && + if (a->tcfa_flags && nla_put_bitfield32(skb, TCA_ACT_FLAGS, - flags, flags)) - goto nla_put_failure; - - if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) + a->tcfa_flags, a->tcfa_flags)) goto nla_put_failure; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); @@ -1241,9 +926,7 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_COOKIE] = { .type = NLA_BINARY, .len = TC_COOKIE_MAX_SIZE }, [TCA_ACT_OPTIONS] = { .type = NLA_NESTED }, - [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS | - TCA_ACT_FLAGS_SKIP_HW | - TCA_ACT_FLAGS_SKIP_SW), + [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS), [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; @@ -1356,13 +1039,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, } } hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); - if (tb[TCA_ACT_FLAGS]) { + if (tb[TCA_ACT_FLAGS]) userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); - if (!tc_act_flags_valid(userflags.value)) { - err = -EINVAL; - goto err_out; - } - } err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp, userflags.value | flags, extack); @@ -1390,17 +1068,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, return ERR_PTR(err); } -static bool tc_act_bind(u32 flags) -{ - return !!(flags & TCA_ACT_FLAGS_BIND); -} - /* Returns numbers of initialized actions or negative error. */ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action *actions[], - int init_res[], size_t *attr_size, - u32 flags, u32 fl_flags, + int init_res[], size_t *attr_size, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; @@ -1438,22 +1110,6 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, sz += tcf_action_fill_size(act); /* Start from index 0 */ actions[i - 1] = act; - if (tc_act_bind(flags)) { - bool skip_sw = tc_skip_sw(fl_flags); - bool skip_hw = tc_skip_hw(fl_flags); - - if (tc_act_bind(act->tcfa_flags)) - continue; - if (skip_sw != tc_act_skip_sw(act->tcfa_flags) || - skip_hw != tc_act_skip_hw(act->tcfa_flags)) { - err = -EINVAL; - goto err; - } - } else { - err = tcf_action_offload_add(act, extack); - if (tc_act_skip_sw(act->tcfa_flags) && err) - goto err; - } } /* We have to commit them all together, because if any error happened in @@ -1479,13 +1135,13 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, u64 drops, bool hw) { if (a->cpu_bstats) { - _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); this_cpu_ptr(a->cpu_qstats)->drops += drops; if (hw) - _bstats_update(this_cpu_ptr(a->cpu_bstats_hw), - bytes, packets); + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), + bytes, packets); return; } @@ -1505,9 +1161,6 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, if (p == NULL) goto errout; - /* update hw stats for this action */ - tcf_action_update_hw_stats(p); - /* compat_mode being true specifies a call that is supposed * to add additional backward compatibility statistic TLVs. */ @@ -1527,10 +1180,9 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, if (err < 0) goto errout; - if (gnet_stats_copy_basic(&d, p->cpu_bstats, - &p->tcfa_bstats, false) < 0 || - gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw, - &p->tcfa_bstats_hw, false) < 0 || + if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 || + gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw, + &p->tcfa_bstats_hw) < 0 || gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 || gnet_stats_copy_queue(&d, p->cpu_qstats, &p->tcfa_qstats, @@ -1749,96 +1401,6 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[]) return 0; } -static int -tcf_reoffload_del_notify(struct net *net, struct tc_action *action) -{ - size_t attr_size = tcf_action_fill_size(action); - struct tc_action *actions[TCA_ACT_MAX_PRIO] = { - [0] = action, - }; - const struct tc_action_ops *ops = action->ops; - struct sk_buff *skb; - int ret; - - skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, - GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1) <= 0) { - kfree_skb(skb); - return -EINVAL; - } - - ret = tcf_idr_release_unsafe(action); - if (ret == ACT_P_DELETED) { - module_put(ops->owner); - ret = rtnetlink_send(skb, net, 0, RTNLGRP_TC, 0); - } else { - kfree_skb(skb); - } - - return ret; -} - -int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, - void *cb_priv, bool add) -{ - struct tc_act_pernet_id *id_ptr; - struct tcf_idrinfo *idrinfo; - struct tc_action_net *tn; - struct tc_action *p; - unsigned int act_id; - unsigned long tmp; - unsigned long id; - struct idr *idr; - struct net *net; - int ret; - - if (!cb) - return -EINVAL; - - down_read(&net_rwsem); - mutex_lock(&act_id_mutex); - - for_each_net(net) { - list_for_each_entry(id_ptr, &act_pernet_id_list, list) { - act_id = id_ptr->id; - tn = net_generic(net, act_id); - if (!tn) - continue; - idrinfo = tn->idrinfo; - if (!idrinfo) - continue; - - mutex_lock(&idrinfo->lock); - idr = &idrinfo->action_idr; - idr_for_each_entry_ul(idr, p, tmp, id) { - if (IS_ERR(p) || tc_act_bind(p->tcfa_flags)) - continue; - if (add) { - tcf_action_offload_add_ex(p, NULL, cb, - cb_priv); - continue; - } - - /* cb unregister to update hw count */ - ret = tcf_action_offload_del_ex(p, cb, cb_priv); - if (ret < 0) - continue; - if (tc_act_skip_sw(p->tcfa_flags) && - !tc_act_in_hw(p)) - tcf_reoffload_del_notify(net, p); - } - mutex_unlock(&idrinfo->lock); - } - } - mutex_unlock(&act_id_mutex); - up_read(&net_rwsem); - - return 0; -} - static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) @@ -1952,7 +1514,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, for (loop = 0; loop < 10; loop++) { ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res, - &attr_size, flags, 0, extack); + &attr_size, flags, extack); if (ret != -EAGAIN) break; } diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 09e2aafc89..94e78ac7a7 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -124,7 +124,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ret = tcf_idr_check_alloc(tn, &index, a, bind); if (!ret) { ret = tcf_idr_create(tn, index, est, a, - &act_connmark_ops, bind, false, flags); + &act_connmark_ops, bind, false, 0); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index e0f515b774..a15ec95e69 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -695,24 +695,6 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act) return nla_total_size(sizeof(struct tc_csum)); } -static int tcf_csum_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) -{ - if (bind) { - struct flow_action_entry *entry = entry_data; - - entry->id = FLOW_ACTION_CSUM; - entry->csum_flags = tcf_csum_update_flags(act); - *index_inc = 1; - } else { - struct flow_offload_action *fl_action = entry_data; - - fl_action->id = FLOW_ACTION_CSUM; - } - - return 0; -} - static struct tc_action_ops act_csum_ops = { .kind = "csum", .id = TCA_ID_CSUM, @@ -724,7 +706,6 @@ static struct tc_action_ops act_csum_ops = { .walk = tcf_csum_walker, .lookup = tcf_csum_search, .get_fill_size = tcf_csum_get_fill_size, - .offload_act_setup = tcf_csum_offload_act_setup, .size = sizeof(struct tcf_csum), }; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 33e70d60f0..4ffea1290c 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -32,7 +32,6 @@ #include #include #include -#include #include static struct workqueue_struct *act_ct_wq; @@ -57,12 +56,6 @@ static const struct rhashtable_params zones_params = { .automatic_shrinking = true, }; -static struct nf_ct_ext_type act_ct_extend __read_mostly = { - .len = sizeof(struct nf_conn_act_ct_ext), - .align = __alignof__(struct nf_conn_act_ct_ext), - .id = NF_CT_EXT_ACT_CT, -}; - static struct flow_action_entry * tcf_ct_flow_table_flow_action_get_next(struct flow_action *flow_action) { @@ -365,7 +358,6 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, struct nf_conn *ct, bool tcp) { - struct nf_conn_act_ct_ext *act_ct_ext; struct flow_offload *entry; int err; @@ -383,14 +375,6 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; } - act_ct_ext = nf_conn_act_ct_ext_find(ct); - if (act_ct_ext) { - entry->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx = - act_ct_ext->ifindex[IP_CT_DIR_ORIGINAL]; - entry->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx = - act_ct_ext->ifindex[IP_CT_DIR_REPLY]; - } - err = flow_offload_add(&ct_ft->nf_ft, entry); if (err) goto err_add; @@ -409,8 +393,7 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, { bool tcp = false; - if ((ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) || - !test_bit(IPS_ASSURED_BIT, &ct->status)) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return; switch (nf_ct_protonum(ct)) { @@ -609,7 +592,7 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, if (nf_ct_is_confirmed(ct)) nf_ct_kill(ct); - nf_ct_put(ct); + nf_conntrack_put(&ct->ct_general); nf_ct_set(skb, NULL, IP_CT_UNTRACKED); return false; @@ -774,7 +757,7 @@ static void tcf_ct_params_free(struct rcu_head *head) tcf_ct_flow_table_put(params); if (params->tmpl) - nf_ct_put(params->tmpl); + nf_conntrack_put(¶ms->tmpl->ct_general); kfree(params); } @@ -984,7 +967,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, tc_skb_cb(skb)->post_ct = false; ct = nf_ct_get(skb, &ctinfo); if (ct) { - nf_ct_put(ct); + nf_conntrack_put(&ct->ct_general); nf_ct_set(skb, NULL, IP_CT_UNTRACKED); } @@ -1044,7 +1027,6 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, if (!ct) goto out_push; nf_ct_deliver_cached_events(ct); - nf_conn_act_ct_ext_fill(skb, ct, ctinfo); err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit); if (err != NF_ACCEPT) @@ -1054,9 +1036,6 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, tcf_ct_act_set_mark(ct, p->mark, p->mark_mask); tcf_ct_act_set_labels(ct, p->labels, p->labels_mask); - if (!nf_ct_is_confirmed(ct)) - nf_conn_act_ct_ext_add(ct); - /* This will take care of sending queued events * even if the connection is already confirmed. */ @@ -1250,6 +1229,7 @@ static int tcf_ct_fill_params(struct net *net, return -ENOMEM; } __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); + nf_conntrack_get(&tmpl->ct_general); p->tmpl = tmpl; return 0; @@ -1514,26 +1494,6 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse); } -static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) -{ - if (bind) { - struct flow_action_entry *entry = entry_data; - - entry->id = FLOW_ACTION_CT; - entry->ct.action = tcf_ct_action(act); - entry->ct.zone = tcf_ct_zone(act); - entry->ct.flow_table = tcf_ct_ft(act); - *index_inc = 1; - } else { - struct flow_offload_action *fl_action = entry_data; - - fl_action->id = FLOW_ACTION_CT; - } - - return 0; -} - static struct tc_action_ops act_ct_ops = { .kind = "ct", .id = TCA_ID_CT, @@ -1545,7 +1505,6 @@ static struct tc_action_ops act_ct_ops = { .walk = tcf_ct_walker, .lookup = tcf_ct_search, .stats_update = tcf_stats_update, - .offload_act_setup = tcf_ct_offload_act_setup, .size = sizeof(struct tcf_ct), }; @@ -1603,16 +1562,10 @@ static int __init ct_init_module(void) if (err) goto err_register; - err = nf_ct_extend_register(&act_ct_extend); - if (err) - goto err_register_extend; - static_branch_inc(&tcf_frag_xmit_count); return 0; -err_register_extend: - tcf_unregister_action(&act_ct_ops, &ct_net_ops); err_register: tcf_ct_flow_tables_uninit(); err_tbl_init: @@ -1623,7 +1576,6 @@ static int __init ct_init_module(void) static void __exit ct_cleanup_module(void) { static_branch_dec(&tcf_frag_xmit_count); - nf_ct_extend_unregister(&act_ct_extend); tcf_unregister_action(&act_ct_ops, &ct_net_ops); tcf_ct_flow_tables_uninit(); destroy_workqueue(act_ct_wq); diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 0281e45987..549374a2d0 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -212,7 +212,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { ret = tcf_idr_create(tn, index, est, a, - &act_ctinfo_ops, bind, false, flags); + &act_ctinfo_ops, bind, false, 0); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index bde6a6c01e..d8dce173df 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -252,43 +252,6 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) return sz; } -static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) -{ - if (bind) { - struct flow_action_entry *entry = entry_data; - - if (is_tcf_gact_ok(act)) { - entry->id = FLOW_ACTION_ACCEPT; - } else if (is_tcf_gact_shot(act)) { - entry->id = FLOW_ACTION_DROP; - } else if (is_tcf_gact_trap(act)) { - entry->id = FLOW_ACTION_TRAP; - } else if (is_tcf_gact_goto_chain(act)) { - entry->id = FLOW_ACTION_GOTO; - entry->chain_index = tcf_gact_goto_chain_index(act); - } else { - return -EOPNOTSUPP; - } - *index_inc = 1; - } else { - struct flow_offload_action *fl_action = entry_data; - - if (is_tcf_gact_ok(act)) - fl_action->id = FLOW_ACTION_ACCEPT; - else if (is_tcf_gact_shot(act)) - fl_action->id = FLOW_ACTION_DROP; - else if (is_tcf_gact_trap(act)) - fl_action->id = FLOW_ACTION_TRAP; - else if (is_tcf_gact_goto_chain(act)) - fl_action->id = FLOW_ACTION_GOTO; - else - return -EOPNOTSUPP; - } - - return 0; -} - static struct tc_action_ops act_gact_ops = { .kind = "gact", .id = TCA_ID_GACT, @@ -300,7 +263,6 @@ static struct tc_action_ops act_gact_ops = { .walk = tcf_gact_walker, .lookup = tcf_gact_search, .get_fill_size = tcf_gact_get_fill_size, - .offload_act_setup = tcf_gact_offload_act_setup, .size = sizeof(struct tcf_gact), }; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index d56e73843a..7df72a4197 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -357,7 +357,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (!err) { ret = tcf_idr_create(tn, index, est, a, - &act_gate_ops, bind, false, flags); + &act_gate_ops, bind, false, 0); if (ret) { tcf_idr_cleanup(tn, index); return ret; @@ -597,54 +597,6 @@ static size_t tcf_gate_get_fill_size(const struct tc_action *act) return nla_total_size(sizeof(struct tc_gate)); } -static void tcf_gate_entry_destructor(void *priv) -{ - struct action_gate_entry *oe = priv; - - kfree(oe); -} - -static int tcf_gate_get_entries(struct flow_action_entry *entry, - const struct tc_action *act) -{ - entry->gate.entries = tcf_gate_get_list(act); - - if (!entry->gate.entries) - return -EINVAL; - - entry->destructor = tcf_gate_entry_destructor; - entry->destructor_priv = entry->gate.entries; - - return 0; -} - -static int tcf_gate_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) -{ - int err; - - if (bind) { - struct flow_action_entry *entry = entry_data; - - entry->id = FLOW_ACTION_GATE; - entry->gate.prio = tcf_gate_prio(act); - entry->gate.basetime = tcf_gate_basetime(act); - entry->gate.cycletime = tcf_gate_cycletime(act); - entry->gate.cycletimeext = tcf_gate_cycletimeext(act); - entry->gate.num_entries = tcf_gate_num_entries(act); - err = tcf_gate_get_entries(entry, act); - if (err) - return err; - *index_inc = 1; - } else { - struct flow_offload_action *fl_action = entry_data; - - fl_action->id = FLOW_ACTION_GATE; - } - - return 0; -} - static struct tc_action_ops act_gate_ops = { .kind = "gate", .id = TCA_ID_GATE, @@ -657,7 +609,6 @@ static struct tc_action_ops act_gate_ops = { .stats_update = tcf_gate_stats_update, .get_fill_size = tcf_gate_get_fill_size, .lookup = tcf_gate_search, - .offload_act_setup = tcf_gate_offload_act_setup, .size = sizeof(struct tcf_gate), }; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 41ba55e60b..7064a365a1 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -553,7 +553,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, &act_ife_ops, - bind, true, flags); + bind, true, 0); if (ret) { tcf_idr_cleanup(tn, index); kfree(p); @@ -718,7 +718,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, u8 *tlv_data; u16 metalen; - bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); + bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); if (skb_at_tc_ingress(skb)) @@ -806,7 +806,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, exceed_mtu = true; } - bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); + bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); if (!metalen) { /* no metadata to send */ diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 2f3d507c24..265b1443e2 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -145,7 +145,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, ops, bind, - false, flags); + false, 0); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 39acd1d186..efc963ab99 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -79,7 +79,7 @@ static void tcf_mirred_release(struct tc_action *a) /* last reference to action, no need to lock */ dev = rcu_dereference_protected(m->tcfm_dev, 1); - dev_put_track(dev, &m->tcfm_dev_tracker); + dev_put(dev); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -101,6 +101,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, bool mac_header_xmit = false; struct tc_mirred *parm; struct tcf_mirred *m; + struct net_device *dev; bool exists = false; int ret, err; u32 index; @@ -170,19 +171,16 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_lock_bh(&m->tcf_lock); if (parm->ifindex) { - struct net_device *odev, *ndev; - - ndev = dev_get_by_index(net, parm->ifindex); - if (!ndev) { + dev = dev_get_by_index(net, parm->ifindex); + if (!dev) { spin_unlock_bh(&m->tcf_lock); err = -ENODEV; goto put_chain; } - mac_header_xmit = dev_is_mac_header_xmit(ndev); - odev = rcu_replace_pointer(m->tcfm_dev, ndev, + mac_header_xmit = dev_is_mac_header_xmit(dev); + dev = rcu_replace_pointer(m->tcfm_dev, dev, lockdep_is_held(&m->tcf_lock)); - dev_put_track(odev, &m->tcfm_dev_tracker); - netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC); + dev_put(dev); m->tcfm_mac_header_xmit = mac_header_xmit; } goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); @@ -402,7 +400,7 @@ static int mirred_device_event(struct notifier_block *unused, list_for_each_entry(m, &mirred_list, tcfm_list) { spin_lock_bh(&m->tcf_lock); if (tcf_mirred_dev_dereference(m) == dev) { - dev_put_track(dev, &m->tcfm_dev_tracker); + dev_put(dev); /* Note : no rcu grace period necessary, as * net_device are already rcu protected. */ @@ -450,55 +448,6 @@ static size_t tcf_mirred_get_fill_size(const struct tc_action *act) return nla_total_size(sizeof(struct tc_mirred)); } -static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry, - const struct tc_action *act) -{ - entry->dev = act->ops->get_dev(act, &entry->destructor); - if (!entry->dev) - return; - entry->destructor_priv = entry->dev; -} - -static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) -{ - if (bind) { - struct flow_action_entry *entry = entry_data; - - if (is_tcf_mirred_egress_redirect(act)) { - entry->id = FLOW_ACTION_REDIRECT; - tcf_offload_mirred_get_dev(entry, act); - } else if (is_tcf_mirred_egress_mirror(act)) { - entry->id = FLOW_ACTION_MIRRED; - tcf_offload_mirred_get_dev(entry, act); - } else if (is_tcf_mirred_ingress_redirect(act)) { - entry->id = FLOW_ACTION_REDIRECT_INGRESS; - tcf_offload_mirred_get_dev(entry, act); - } else if (is_tcf_mirred_ingress_mirror(act)) { - entry->id = FLOW_ACTION_MIRRED_INGRESS; - tcf_offload_mirred_get_dev(entry, act); - } else { - return -EOPNOTSUPP; - } - *index_inc = 1; - } else { - struct flow_offload_action *fl_action = entry_data; - - if (is_tcf_mirred_egress_redirect(act)) - fl_action->id = FLOW_ACTION_REDIRECT; - else if (is_tcf_mirred_egress_mirror(act)) - fl_action->id = FLOW_ACTION_MIRRED; - else if (is_tcf_mirred_ingress_redirect(act)) - fl_action->id = FLOW_ACTION_REDIRECT_INGRESS; - else if (is_tcf_mirred_ingress_mirror(act)) - fl_action->id = FLOW_ACTION_MIRRED_INGRESS; - else - return -EOPNOTSUPP; - } - - return 0; -} - static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .id = TCA_ID_MIRRED, @@ -511,7 +460,6 @@ static struct tc_action_ops act_mirred_ops = { .walk = tcf_mirred_walker, .lookup = tcf_mirred_search, .get_fill_size = tcf_mirred_get_fill_size, - .offload_act_setup = tcf_mirred_offload_act_setup, .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, }; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index b9ff3459fd..e4529b428c 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -59,7 +59,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, int ret, mac_len; tcf_lastuse_update(&m->tcf_tm); - bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb); + bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); /* Ensure 'data' points at mac_header prior calling mpls manipulating * functions. @@ -248,7 +248,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, - &act_mpls_ops, bind, true, flags); + &act_mpls_ops, bind, true, 0); if (ret) { tcf_idr_cleanup(tn, index); return ret; @@ -384,57 +384,6 @@ static int tcf_mpls_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } -static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) -{ - if (bind) { - struct flow_action_entry *entry = entry_data; - - switch (tcf_mpls_action(act)) { - case TCA_MPLS_ACT_PUSH: - entry->id = FLOW_ACTION_MPLS_PUSH; - entry->mpls_push.proto = tcf_mpls_proto(act); - entry->mpls_push.label = tcf_mpls_label(act); - entry->mpls_push.tc = tcf_mpls_tc(act); - entry->mpls_push.bos = tcf_mpls_bos(act); - entry->mpls_push.ttl = tcf_mpls_ttl(act); - break; - case TCA_MPLS_ACT_POP: - entry->id = FLOW_ACTION_MPLS_POP; - entry->mpls_pop.proto = tcf_mpls_proto(act); - break; - case TCA_MPLS_ACT_MODIFY: - entry->id = FLOW_ACTION_MPLS_MANGLE; - entry->mpls_mangle.label = tcf_mpls_label(act); - entry->mpls_mangle.tc = tcf_mpls_tc(act); - entry->mpls_mangle.bos = tcf_mpls_bos(act); - entry->mpls_mangle.ttl = tcf_mpls_ttl(act); - break; - default: - return -EOPNOTSUPP; - } - *index_inc = 1; - } else { - struct flow_offload_action *fl_action = entry_data; - - switch (tcf_mpls_action(act)) { - case TCA_MPLS_ACT_PUSH: - fl_action->id = FLOW_ACTION_MPLS_PUSH; - break; - case TCA_MPLS_ACT_POP: - fl_action->id = FLOW_ACTION_MPLS_POP; - break; - case TCA_MPLS_ACT_MODIFY: - fl_action->id = FLOW_ACTION_MPLS_MANGLE; - break; - default: - return -EOPNOTSUPP; - } - } - - return 0; -} - static struct tc_action_ops act_mpls_ops = { .kind = "mpls", .id = TCA_ID_MPLS, @@ -445,7 +394,6 @@ static struct tc_action_ops act_mpls_ops = { .cleanup = tcf_mpls_cleanup, .walk = tcf_mpls_walker, .lookup = tcf_mpls_search, - .offload_act_setup = tcf_mpls_offload_act_setup, .size = sizeof(struct tcf_mpls), }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 2a39b3729e..7dd6b586ba 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -61,7 +61,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { ret = tcf_idr_create(tn, index, est, a, - &act_nat_ops, bind, false, flags); + &act_nat_ops, bind, false, 0); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 31fcd279c1..c6c862c459 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -189,7 +189,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { ret = tcf_idr_create(tn, index, est, a, - &act_pedit_ops, bind, false, flags); + &act_pedit_ops, bind, false, 0); if (ret) { tcf_idr_cleanup(tn, index); goto out_free; @@ -487,39 +487,6 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } -static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) -{ - if (bind) { - struct flow_action_entry *entry = entry_data; - int k; - - for (k = 0; k < tcf_pedit_nkeys(act); k++) { - switch (tcf_pedit_cmd(act, k)) { - case TCA_PEDIT_KEY_EX_CMD_SET: - entry->id = FLOW_ACTION_MANGLE; - break; - case TCA_PEDIT_KEY_EX_CMD_ADD: - entry->id = FLOW_ACTION_ADD; - break; - default: - return -EOPNOTSUPP; - } - entry->mangle.htype = tcf_pedit_htype(act, k); - entry->mangle.mask = tcf_pedit_mask(act, k); - entry->mangle.val = tcf_pedit_val(act, k); - entry->mangle.offset = tcf_pedit_offset(act, k); - entry->hw_stats = tc_act_hw_stats(act->hw_stats); - entry++; - } - *index_inc = k; - } else { - return -EOPNOTSUPP; - } - - return 0; -} - static struct tc_action_ops act_pedit_ops = { .kind = "pedit", .id = TCA_ID_PEDIT, @@ -531,7 +498,6 @@ static struct tc_action_ops act_pedit_ops = { .init = tcf_pedit_init, .walk = tcf_pedit_walker, .lookup = tcf_pedit_search, - .offload_act_setup = tcf_pedit_offload_act_setup, .size = sizeof(struct tcf_pedit), }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0923aa2b8f..832157a840 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -90,7 +90,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, NULL, a, - &act_police_ops, bind, true, flags); + &act_police_ops, bind, true, 0); if (ret) { tcf_idr_cleanup(tn, index); return ret; @@ -125,7 +125,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, police->common.cpu_bstats, &police->tcf_rate_est, &police->tcf_lock, - false, est); + NULL, est); if (err) goto failure; } else if (tb[TCA_POLICE_AVRATE] && @@ -248,7 +248,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, int ret; tcf_lastuse_update(&police->tcf_tm); - bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb); + bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb); ret = READ_ONCE(police->tcf_action); p = rcu_dereference_bh(police->params); @@ -405,30 +405,6 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } -static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) -{ - if (bind) { - struct flow_action_entry *entry = entry_data; - - entry->id = FLOW_ACTION_POLICE; - entry->police.burst = tcf_police_burst(act); - entry->police.rate_bytes_ps = - tcf_police_rate_bytes_ps(act); - entry->police.burst_pkt = tcf_police_burst_pkt(act); - entry->police.rate_pkt_ps = - tcf_police_rate_pkt_ps(act); - entry->police.mtu = tcf_police_tcfp_mtu(act); - *index_inc = 1; - } else { - struct flow_offload_action *fl_action = entry_data; - - fl_action->id = FLOW_ACTION_POLICE; - } - - return 0; -} - MODULE_AUTHOR("Alexey Kuznetsov"); MODULE_DESCRIPTION("Policing actions"); MODULE_LICENSE("GPL"); @@ -444,7 +420,6 @@ static struct tc_action_ops act_police_ops = { .walk = tcf_police_walker, .lookup = tcf_police_search, .cleanup = tcf_police_cleanup, - .offload_act_setup = tcf_police_offload_act_setup, .size = sizeof(struct tcf_police), }; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 9a22cdda6b..230501eb9e 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -70,7 +70,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, - &act_sample_ops, bind, true, flags); + &act_sample_ops, bind, true, 0); if (ret) { tcf_idr_cleanup(tn, index); return ret; @@ -163,7 +163,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, int retval; tcf_lastuse_update(&s->tcf_tm); - bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb); + bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb); retval = READ_ONCE(s->tcf_action); psample_group = rcu_dereference_bh(s->psample_group); @@ -282,35 +282,6 @@ tcf_sample_get_group(const struct tc_action *a, return group; } -static void tcf_offload_sample_get_group(struct flow_action_entry *entry, - const struct tc_action *act) -{ - entry->sample.psample_group = - act->ops->get_psample_group(act, &entry->destructor); - entry->destructor_priv = entry->sample.psample_group; -} - -static int tcf_sample_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) -{ - if (bind) { - struct flow_action_entry *entry = entry_data; - - entry->id = FLOW_ACTION_SAMPLE; - entry->sample.trunc_size = tcf_sample_trunc_size(act); - entry->sample.truncate = tcf_sample_truncate(act); - entry->sample.rate = tcf_sample_rate(act); - tcf_offload_sample_get_group(entry, act); - *index_inc = 1; - } else { - struct flow_offload_action *fl_action = entry_data; - - fl_action->id = FLOW_ACTION_SAMPLE; - } - - return 0; -} - static struct tc_action_ops act_sample_ops = { .kind = "sample", .id = TCA_ID_SAMPLE, @@ -323,7 +294,6 @@ static struct tc_action_ops act_sample_ops = { .walk = tcf_sample_walker, .lookup = tcf_sample_search, .get_psample_group = tcf_sample_get_group, - .offload_act_setup = tcf_sample_offload_act_setup, .size = sizeof(struct tcf_sample), }; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 8c1d60bde9..cbbe1861d3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -36,8 +36,7 @@ static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a, * then it would look like "hello_3" (without quotes) */ pr_info("simple: %s_%llu\n", - (char *)d->tcfd_defdata, - u64_stats_read(&d->tcf_bstats.packets)); + (char *)d->tcfd_defdata, d->tcf_bstats.packets); spin_unlock(&d->tcf_lock); return d->tcf_action; } @@ -129,7 +128,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, - &act_simp_ops, bind, false, flags); + &act_simp_ops, bind, false, 0); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index ceba11b198..6054185383 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -31,7 +31,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, int action; tcf_lastuse_update(&d->tcf_tm); - bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); + bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); params = rcu_dereference_bh(d->params); action = READ_ONCE(d->tcf_action); @@ -176,7 +176,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, - &act_skbedit_ops, bind, true, act_flags); + &act_skbedit_ops, bind, true, 0); if (ret) { tcf_idr_cleanup(tn, index); return ret; @@ -327,41 +327,6 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) + nla_total_size_64bit(sizeof(u64)); /* TCA_SKBEDIT_FLAGS */ } -static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) -{ - if (bind) { - struct flow_action_entry *entry = entry_data; - - if (is_tcf_skbedit_mark(act)) { - entry->id = FLOW_ACTION_MARK; - entry->mark = tcf_skbedit_mark(act); - } else if (is_tcf_skbedit_ptype(act)) { - entry->id = FLOW_ACTION_PTYPE; - entry->ptype = tcf_skbedit_ptype(act); - } else if (is_tcf_skbedit_priority(act)) { - entry->id = FLOW_ACTION_PRIORITY; - entry->priority = tcf_skbedit_priority(act); - } else { - return -EOPNOTSUPP; - } - *index_inc = 1; - } else { - struct flow_offload_action *fl_action = entry_data; - - if (is_tcf_skbedit_mark(act)) - fl_action->id = FLOW_ACTION_MARK; - else if (is_tcf_skbedit_ptype(act)) - fl_action->id = FLOW_ACTION_PTYPE; - else if (is_tcf_skbedit_priority(act)) - fl_action->id = FLOW_ACTION_PRIORITY; - else - return -EOPNOTSUPP; - } - - return 0; -} - static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", .id = TCA_ID_SKBEDIT, @@ -374,7 +339,6 @@ static struct tc_action_ops act_skbedit_ops = { .walk = tcf_skbedit_walker, .get_fill_size = tcf_skbedit_get_fill_size, .lookup = tcf_skbedit_search, - .offload_act_setup = tcf_skbedit_offload_act_setup, .size = sizeof(struct tcf_skbedit), }; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 23aba03d26..d9cd174eec 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -787,59 +787,6 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } -static void tcf_tunnel_encap_put_tunnel(void *priv) -{ - struct ip_tunnel_info *tunnel = priv; - - kfree(tunnel); -} - -static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, - const struct tc_action *act) -{ - entry->tunnel = tcf_tunnel_info_copy(act); - if (!entry->tunnel) - return -ENOMEM; - entry->destructor = tcf_tunnel_encap_put_tunnel; - entry->destructor_priv = entry->tunnel; - return 0; -} - -static int tcf_tunnel_key_offload_act_setup(struct tc_action *act, - void *entry_data, - u32 *index_inc, - bool bind) -{ - int err; - - if (bind) { - struct flow_action_entry *entry = entry_data; - - if (is_tcf_tunnel_set(act)) { - entry->id = FLOW_ACTION_TUNNEL_ENCAP; - err = tcf_tunnel_encap_get_tunnel(entry, act); - if (err) - return err; - } else if (is_tcf_tunnel_release(act)) { - entry->id = FLOW_ACTION_TUNNEL_DECAP; - } else { - return -EOPNOTSUPP; - } - *index_inc = 1; - } else { - struct flow_offload_action *fl_action = entry_data; - - if (is_tcf_tunnel_set(act)) - fl_action->id = FLOW_ACTION_TUNNEL_ENCAP; - else if (is_tcf_tunnel_release(act)) - fl_action->id = FLOW_ACTION_TUNNEL_DECAP; - else - return -EOPNOTSUPP; - } - - return 0; -} - static struct tc_action_ops act_tunnel_key_ops = { .kind = "tunnel_key", .id = TCA_ID_TUNNEL_KEY, @@ -850,7 +797,6 @@ static struct tc_action_ops act_tunnel_key_ops = { .cleanup = tunnel_key_release, .walk = tunnel_key_walker, .lookup = tunnel_key_search, - .offload_act_setup = tcf_tunnel_key_offload_act_setup, .size = sizeof(struct tcf_tunnel_key), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 756e2dcde1..e4dc5a555b 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -368,53 +368,6 @@ static size_t tcf_vlan_get_fill_size(const struct tc_action *act) + nla_total_size(sizeof(u8)); /* TCA_VLAN_PUSH_VLAN_PRIORITY */ } -static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) -{ - if (bind) { - struct flow_action_entry *entry = entry_data; - - switch (tcf_vlan_action(act)) { - case TCA_VLAN_ACT_PUSH: - entry->id = FLOW_ACTION_VLAN_PUSH; - entry->vlan.vid = tcf_vlan_push_vid(act); - entry->vlan.proto = tcf_vlan_push_proto(act); - entry->vlan.prio = tcf_vlan_push_prio(act); - break; - case TCA_VLAN_ACT_POP: - entry->id = FLOW_ACTION_VLAN_POP; - break; - case TCA_VLAN_ACT_MODIFY: - entry->id = FLOW_ACTION_VLAN_MANGLE; - entry->vlan.vid = tcf_vlan_push_vid(act); - entry->vlan.proto = tcf_vlan_push_proto(act); - entry->vlan.prio = tcf_vlan_push_prio(act); - break; - default: - return -EOPNOTSUPP; - } - *index_inc = 1; - } else { - struct flow_offload_action *fl_action = entry_data; - - switch (tcf_vlan_action(act)) { - case TCA_VLAN_ACT_PUSH: - fl_action->id = FLOW_ACTION_VLAN_PUSH; - break; - case TCA_VLAN_ACT_POP: - fl_action->id = FLOW_ACTION_VLAN_POP; - break; - case TCA_VLAN_ACT_MODIFY: - fl_action->id = FLOW_ACTION_VLAN_MANGLE; - break; - default: - return -EOPNOTSUPP; - } - } - - return 0; -} - static struct tc_action_ops act_vlan_ops = { .kind = "vlan", .id = TCA_ID_VLAN, @@ -427,7 +380,6 @@ static struct tc_action_ops act_vlan_ops = { .stats_update = tcf_vlan_stats_update, .get_fill_size = tcf_vlan_get_fill_size, .lookup = tcf_vlan_search, - .offload_act_setup = tcf_vlan_offload_act_setup, .size = sizeof(struct tcf_vlan), }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5ce1208a6e..cd44cac7fb 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3033,9 +3033,9 @@ void tcf_exts_destroy(struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_destroy); -int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, - u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) +int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, + struct nlattr *rate_tlv, struct tcf_exts *exts, + u32 flags, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { @@ -3069,8 +3069,7 @@ int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr ** flags |= TCA_ACT_FLAGS_BIND; err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, exts->actions, init_res, - &attr_size, flags, fl_flags, - extack); + &attr_size, flags, extack); if (err < 0) return err; exts->nr_actions = err; @@ -3086,15 +3085,6 @@ int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr ** return 0; } -EXPORT_SYMBOL(tcf_exts_validate_ex); - -int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, - u32 flags, struct netlink_ext_ack *extack) -{ - return tcf_exts_validate_ex(net, tp, tb, rate_tlv, exts, - flags, 0, extack); -} EXPORT_SYMBOL(tcf_exts_validate); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src) @@ -3338,7 +3328,7 @@ int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); - return min(ok_count, 0); + return ok_count < 0 ? ok_count : 0; } EXPORT_SYMBOL(tc_setup_cb_add); @@ -3394,7 +3384,7 @@ int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); - return min(ok_count, 0); + return ok_count < 0 ? ok_count : 0; } EXPORT_SYMBOL(tc_setup_cb_replace); @@ -3432,7 +3422,7 @@ int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); - return min(ok_count, 0); + return ok_count < 0 ? ok_count : 0; } EXPORT_SYMBOL(tc_setup_cb_destroy); @@ -3479,7 +3469,7 @@ static void tcf_act_put_cookie(struct flow_action_entry *entry) flow_action_cookie_destroy(entry->cookie); } -void tc_cleanup_offload_action(struct flow_action *flow_action) +void tc_cleanup_flow_action(struct flow_action *flow_action) { struct flow_action_entry *entry; int i; @@ -3490,37 +3480,93 @@ void tc_cleanup_offload_action(struct flow_action *flow_action) entry->destructor(entry->destructor_priv); } } -EXPORT_SYMBOL(tc_cleanup_offload_action); +EXPORT_SYMBOL(tc_cleanup_flow_action); -static int tc_setup_offload_act(struct tc_action *act, - struct flow_action_entry *entry, - u32 *index_inc) +static void tcf_mirred_get_dev(struct flow_action_entry *entry, + const struct tc_action *act) { #ifdef CONFIG_NET_CLS_ACT - if (act->ops->offload_act_setup) - return act->ops->offload_act_setup(act, entry, index_inc, true); - else - return -EOPNOTSUPP; -#else - return 0; + entry->dev = act->ops->get_dev(act, &entry->destructor); + if (!entry->dev) + return; + entry->destructor_priv = entry->dev; #endif } -int tc_setup_action(struct flow_action *flow_action, - struct tc_action *actions[]) +static void tcf_tunnel_encap_put_tunnel(void *priv) +{ + struct ip_tunnel_info *tunnel = priv; + + kfree(tunnel); +} + +static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->tunnel = tcf_tunnel_info_copy(act); + if (!entry->tunnel) + return -ENOMEM; + entry->destructor = tcf_tunnel_encap_put_tunnel; + entry->destructor_priv = entry->tunnel; + return 0; +} + +static void tcf_sample_get_group(struct flow_action_entry *entry, + const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + entry->sample.psample_group = + act->ops->get_psample_group(act, &entry->destructor); + entry->destructor_priv = entry->sample.psample_group; +#endif +} + +static void tcf_gate_entry_destructor(void *priv) +{ + struct action_gate_entry *oe = priv; + + kfree(oe); +} + +static int tcf_gate_get_entries(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->gate.entries = tcf_gate_get_list(act); + + if (!entry->gate.entries) + return -EINVAL; + + entry->destructor = tcf_gate_entry_destructor; + entry->destructor_priv = entry->gate.entries; + + return 0; +} + +static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) +{ + if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY)) + return FLOW_ACTION_HW_STATS_DONT_CARE; + else if (!hw_stats) + return FLOW_ACTION_HW_STATS_DISABLED; + + return hw_stats; +} + +int tc_setup_flow_action(struct flow_action *flow_action, + const struct tcf_exts *exts) { - int i, j, index, err = 0; struct tc_action *act; + int i, j, k, err = 0; BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY); BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE); BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED); - if (!actions) + if (!exts) return 0; j = 0; - tcf_act_for_each_action(i, act, actions) { + tcf_exts_for_each_action(i, act, exts) { struct flow_action_entry *entry; entry = &flow_action->entries[j]; @@ -3530,39 +3576,165 @@ int tc_setup_action(struct flow_action *flow_action, goto err_out_locked; entry->hw_stats = tc_act_hw_stats(act->hw_stats); - entry->hw_index = act->tcfa_index; - index = 0; - err = tc_setup_offload_act(act, entry, &index); - if (!err) - j += index; - else + + if (is_tcf_gact_ok(act)) { + entry->id = FLOW_ACTION_ACCEPT; + } else if (is_tcf_gact_shot(act)) { + entry->id = FLOW_ACTION_DROP; + } else if (is_tcf_gact_trap(act)) { + entry->id = FLOW_ACTION_TRAP; + } else if (is_tcf_gact_goto_chain(act)) { + entry->id = FLOW_ACTION_GOTO; + entry->chain_index = tcf_gact_goto_chain_index(act); + } else if (is_tcf_mirred_egress_redirect(act)) { + entry->id = FLOW_ACTION_REDIRECT; + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_egress_mirror(act)) { + entry->id = FLOW_ACTION_MIRRED; + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_redirect(act)) { + entry->id = FLOW_ACTION_REDIRECT_INGRESS; + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_mirror(act)) { + entry->id = FLOW_ACTION_MIRRED_INGRESS; + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_vlan(act)) { + switch (tcf_vlan_action(act)) { + case TCA_VLAN_ACT_PUSH: + entry->id = FLOW_ACTION_VLAN_PUSH; + entry->vlan.vid = tcf_vlan_push_vid(act); + entry->vlan.proto = tcf_vlan_push_proto(act); + entry->vlan.prio = tcf_vlan_push_prio(act); + break; + case TCA_VLAN_ACT_POP: + entry->id = FLOW_ACTION_VLAN_POP; + break; + case TCA_VLAN_ACT_MODIFY: + entry->id = FLOW_ACTION_VLAN_MANGLE; + entry->vlan.vid = tcf_vlan_push_vid(act); + entry->vlan.proto = tcf_vlan_push_proto(act); + entry->vlan.prio = tcf_vlan_push_prio(act); + break; + default: + err = -EOPNOTSUPP; + goto err_out_locked; + } + } else if (is_tcf_tunnel_set(act)) { + entry->id = FLOW_ACTION_TUNNEL_ENCAP; + err = tcf_tunnel_encap_get_tunnel(entry, act); + if (err) + goto err_out_locked; + } else if (is_tcf_tunnel_release(act)) { + entry->id = FLOW_ACTION_TUNNEL_DECAP; + } else if (is_tcf_pedit(act)) { + for (k = 0; k < tcf_pedit_nkeys(act); k++) { + switch (tcf_pedit_cmd(act, k)) { + case TCA_PEDIT_KEY_EX_CMD_SET: + entry->id = FLOW_ACTION_MANGLE; + break; + case TCA_PEDIT_KEY_EX_CMD_ADD: + entry->id = FLOW_ACTION_ADD; + break; + default: + err = -EOPNOTSUPP; + goto err_out_locked; + } + entry->mangle.htype = tcf_pedit_htype(act, k); + entry->mangle.mask = tcf_pedit_mask(act, k); + entry->mangle.val = tcf_pedit_val(act, k); + entry->mangle.offset = tcf_pedit_offset(act, k); + entry->hw_stats = tc_act_hw_stats(act->hw_stats); + entry = &flow_action->entries[++j]; + } + } else if (is_tcf_csum(act)) { + entry->id = FLOW_ACTION_CSUM; + entry->csum_flags = tcf_csum_update_flags(act); + } else if (is_tcf_skbedit_mark(act)) { + entry->id = FLOW_ACTION_MARK; + entry->mark = tcf_skbedit_mark(act); + } else if (is_tcf_sample(act)) { + entry->id = FLOW_ACTION_SAMPLE; + entry->sample.trunc_size = tcf_sample_trunc_size(act); + entry->sample.truncate = tcf_sample_truncate(act); + entry->sample.rate = tcf_sample_rate(act); + tcf_sample_get_group(entry, act); + } else if (is_tcf_police(act)) { + entry->id = FLOW_ACTION_POLICE; + entry->police.burst = tcf_police_burst(act); + entry->police.rate_bytes_ps = + tcf_police_rate_bytes_ps(act); + entry->police.burst_pkt = tcf_police_burst_pkt(act); + entry->police.rate_pkt_ps = + tcf_police_rate_pkt_ps(act); + entry->police.mtu = tcf_police_tcfp_mtu(act); + entry->police.index = act->tcfa_index; + } else if (is_tcf_ct(act)) { + entry->id = FLOW_ACTION_CT; + entry->ct.action = tcf_ct_action(act); + entry->ct.zone = tcf_ct_zone(act); + entry->ct.flow_table = tcf_ct_ft(act); + } else if (is_tcf_mpls(act)) { + switch (tcf_mpls_action(act)) { + case TCA_MPLS_ACT_PUSH: + entry->id = FLOW_ACTION_MPLS_PUSH; + entry->mpls_push.proto = tcf_mpls_proto(act); + entry->mpls_push.label = tcf_mpls_label(act); + entry->mpls_push.tc = tcf_mpls_tc(act); + entry->mpls_push.bos = tcf_mpls_bos(act); + entry->mpls_push.ttl = tcf_mpls_ttl(act); + break; + case TCA_MPLS_ACT_POP: + entry->id = FLOW_ACTION_MPLS_POP; + entry->mpls_pop.proto = tcf_mpls_proto(act); + break; + case TCA_MPLS_ACT_MODIFY: + entry->id = FLOW_ACTION_MPLS_MANGLE; + entry->mpls_mangle.label = tcf_mpls_label(act); + entry->mpls_mangle.tc = tcf_mpls_tc(act); + entry->mpls_mangle.bos = tcf_mpls_bos(act); + entry->mpls_mangle.ttl = tcf_mpls_ttl(act); + break; + default: + err = -EOPNOTSUPP; + goto err_out_locked; + } + } else if (is_tcf_skbedit_ptype(act)) { + entry->id = FLOW_ACTION_PTYPE; + entry->ptype = tcf_skbedit_ptype(act); + } else if (is_tcf_skbedit_priority(act)) { + entry->id = FLOW_ACTION_PRIORITY; + entry->priority = tcf_skbedit_priority(act); + } else if (is_tcf_gate(act)) { + entry->id = FLOW_ACTION_GATE; + entry->gate.index = tcf_gate_index(act); + entry->gate.prio = tcf_gate_prio(act); + entry->gate.basetime = tcf_gate_basetime(act); + entry->gate.cycletime = tcf_gate_cycletime(act); + entry->gate.cycletimeext = tcf_gate_cycletimeext(act); + entry->gate.num_entries = tcf_gate_num_entries(act); + err = tcf_gate_get_entries(entry, act); + if (err) + goto err_out_locked; + } else { + err = -EOPNOTSUPP; goto err_out_locked; + } spin_unlock_bh(&act->tcfa_lock); + + if (!is_tcf_pedit(act)) + j++; } err_out: if (err) - tc_cleanup_offload_action(flow_action); + tc_cleanup_flow_action(flow_action); return err; err_out_locked: spin_unlock_bh(&act->tcfa_lock); goto err_out; } - -int tc_setup_offload_action(struct flow_action *flow_action, - const struct tcf_exts *exts) -{ -#ifdef CONFIG_NET_CLS_ACT - if (!exts) - return 0; - - return tc_setup_action(flow_action, exts->actions); -#else - return 0; -#endif -} -EXPORT_SYMBOL(tc_setup_offload_action); +EXPORT_SYMBOL(tc_setup_flow_action); unsigned int tcf_exts_num_actions(struct tcf_exts *exts) { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1a9b1f140f..7093482624 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -331,8 +331,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, ARRAY_SIZE(fl_ct_info_to_flower_map), post_ct, zone); skb_flow_dissect_hash(skb, &mask->dissector, &skb_key); - skb_flow_dissect(skb, &mask->dissector, &skb_key, - FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); + skb_flow_dissect(skb, &mask->dissector, &skb_key, 0); f = fl_mask_lookup(mask, &skb_key); if (f && !tc_skip_sw(f->flags)) { @@ -463,7 +462,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; - err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); if (skip_sw) { @@ -475,7 +474,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw, &f->flags, &f->in_hw_count, rtnl_held); - tc_cleanup_offload_action(&cls_flower.rule->action); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { @@ -503,12 +502,12 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, rtnl_held); - tcf_exts_hw_stats_update(&f->exts, cls_flower.stats.bytes, - cls_flower.stats.pkts, - cls_flower.stats.drops, - cls_flower.stats.lastused, - cls_flower.stats.used_hw_stats, - cls_flower.stats.used_hw_stats_valid); + tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, + cls_flower.stats.pkts, + cls_flower.stats.drops, + cls_flower.stats.lastused, + cls_flower.stats.used_hw_stats, + cls_flower.stats.used_hw_stats_valid); } static void __fl_put(struct cls_fl_filter *f) @@ -1919,14 +1918,12 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, struct nlattr *est, - struct fl_flow_tmplt *tmplt, - u32 flags, u32 fl_flags, + struct fl_flow_tmplt *tmplt, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate_ex(net, tp, tb, est, &f->exts, flags, - fl_flags, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; @@ -2040,8 +2037,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], - tp->chain->tmplt_priv, flags, fnew->flags, - extack); + tp->chain->tmplt_priv, flags, extack); if (err) goto errout; @@ -2271,7 +2267,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); if (tc_skip_sw(f->flags)) { @@ -2288,7 +2284,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_SETUP_CLSFLOWER, &cls_flower, cb_priv, &f->flags, &f->in_hw_count); - tc_cleanup_offload_action(&cls_flower.rule->action); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index ca5670fd52..24f0046ce0 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -97,7 +97,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.cookie = cookie; - err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); if (err) { kfree(cls_mall.rule); mall_destroy_hw_filter(tp, head, cookie, NULL); @@ -111,7 +111,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw, &head->flags, &head->in_hw_count, true); - tc_cleanup_offload_action(&cls_mall.rule->action); + tc_cleanup_flow_action(&cls_mall.rule->action); kfree(cls_mall.rule); if (err) { @@ -163,13 +163,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { static int mall_set_parms(struct net *net, struct tcf_proto *tp, struct cls_mall_head *head, unsigned long base, struct nlattr **tb, - struct nlattr *est, u32 flags, u32 fl_flags, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate_ex(net, tp, tb, est, &head->exts, flags, - fl_flags, extack); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, flags, extack); if (err < 0) return err; @@ -227,8 +226,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, goto err_alloc_percpu; } - err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], - flags, new->flags, extack); + err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], flags, + extack); if (err) goto err_set_parms; @@ -302,7 +301,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; cls_mall.cookie = (unsigned long)head; - err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); if (err) { kfree(cls_mall.rule); if (add && tc_skip_sw(head->flags)) { @@ -315,7 +314,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv, &head->flags, &head->in_hw_count); - tc_cleanup_offload_action(&cls_mall.rule->action); + tc_cleanup_flow_action(&cls_mall.rule->action); kfree(cls_mall.rule); if (err) @@ -337,11 +336,11 @@ static void mall_stats_hw_filter(struct tcf_proto *tp, tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true); - tcf_exts_hw_stats_update(&head->exts, cls_mall.stats.bytes, - cls_mall.stats.pkts, cls_mall.stats.drops, - cls_mall.stats.lastused, - cls_mall.stats.used_hw_stats, - cls_mall.stats.used_hw_stats_valid); + tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes, + cls_mall.stats.pkts, cls_mall.stats.drops, + cls_mall.stats.lastused, + cls_mall.stats.used_hw_stats, + cls_mall.stats.used_hw_stats_valid); } static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index cf5649292e..4272814487 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -709,13 +709,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { static int u32_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tc_u_knode *n, struct nlattr **tb, - struct nlattr *est, u32 flags, u32 fl_flags, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate_ex(net, tp, tb, est, &n->exts, flags, - fl_flags, extack); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack); if (err < 0) return err; @@ -896,8 +895,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -ENOMEM; err = u32_set_parms(net, tp, base, new, tb, - tca[TCA_RATE], flags, new->flags, - extack); + tca[TCA_RATE], flags, extack); if (err) { u32_destroy_key(new, false); @@ -1062,8 +1060,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], - flags, n->flags, extack); + err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], flags, + extack); if (err == 0) { struct tc_u_knode __rcu **ins; struct tc_u_knode *pins; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 0a04468b73..46254968d3 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -457,7 +457,7 @@ META_COLLECTOR(int_sk_fwd_alloc) *err = -1; return; } - dst->value = sk_forward_alloc_get(sk); + dst->value = sk->sk_forward_alloc; } META_COLLECTOR(int_sk_sndbuf) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e3c0e8ea2d..0fb387c9d7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -507,8 +507,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) continue; - if (tsize > 0 && - memcmp(stab->data, tab, flex_array_size(stab, data, tsize))) + if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) continue; stab->refcnt++; return stab; @@ -520,14 +519,14 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, return ERR_PTR(-EINVAL); } - stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL); + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) return ERR_PTR(-ENOMEM); stab->refcnt = 1; stab->szopts = *s; if (tsize > 0) - memcpy(stab->data, tab, flex_array_size(stab, data, tsize)); + memcpy(stab->data, tab, tsize * sizeof(u16)); list_add_tail(&stab->list, &qdisc_stab_list); @@ -885,7 +884,7 @@ static void qdisc_offload_graft_root(struct net_device *dev, static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, u32 portid, u32 seq, u16 flags, int event) { - struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL; + struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; struct gnet_stats_queue __percpu *cpu_qstats = NULL; struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -943,7 +942,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, cpu_qstats = q->cpu_qstats; } - if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), + &d, cpu_bstats, &q->bstats) < 0 || gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) goto nla_put_failure; @@ -1264,17 +1264,26 @@ static struct Qdisc *qdisc_create(struct net_device *dev, rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { + seqcount_t *running; + err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) { NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); goto err_out4; } + if (sch->parent != TC_H_ROOT && + !(sch->flags & TCQ_F_INGRESS) && + (!p || !(p->flags & TCQ_F_MQROOT))) + running = qdisc_root_sleeping_running(sch); + else + running = &sch->running; + err = gen_new_estimator(&sch->bstats, sch->cpu_bstats, &sch->rate_est, NULL, - true, + running, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); @@ -1292,7 +1301,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, if (ops->destroy) ops->destroy(sch); err_out3: - dev_put_track(dev, &sch->dev_tracker); + dev_put(dev); qdisc_free(sch); err_out2: module_put(ops->owner); @@ -1350,7 +1359,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, sch->cpu_bstats, &sch->rate_est, NULL, - true, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); } out: diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 4c8e994cf0..7d8518176b 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -52,7 +52,7 @@ struct atm_flow_data { struct atm_qdisc_data *parent; /* parent qdisc */ struct socket *sock; /* for closing */ int ref; /* reference count */ - struct gnet_stats_basic_sync bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct list_head list; struct atm_flow_data *excess; /* flow for excess traffic; @@ -548,7 +548,6 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); INIT_LIST_HEAD(&p->flows); INIT_LIST_HEAD(&p->link.list); - gnet_stats_basic_sync_init(&p->link.bstats); list_add(&p->link.list, &p->flows); p->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, extack); @@ -653,7 +652,8 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct atm_flow_data *flow = (struct atm_flow_data *)arg; - if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &flow->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) return -1; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index a43a58a73d..857aaebd49 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2342,7 +2342,9 @@ static int cake_config_precedence(struct Qdisc *sch) /* List of known Diffserv codepoints: * - * Default Forwarding (DF/CS0) - Best Effort + * Least Effort (CS1, LE) + * Best Effort (CS0) + * Max Reliability & LLT "Lo" (TOS1) * Max Throughput (TOS2) * Min Delay (TOS4) * LLT "La" (TOS5) @@ -2350,7 +2352,6 @@ static int cake_config_precedence(struct Qdisc *sch) * Assured Forwarding 2 (AF2x) - x3 * Assured Forwarding 3 (AF3x) - x3 * Assured Forwarding 4 (AF4x) - x3 - * Precedence Class 1 (CS1) * Precedence Class 2 (CS2) * Precedence Class 3 (CS3) * Precedence Class 4 (CS4) @@ -2359,9 +2360,8 @@ static int cake_config_precedence(struct Qdisc *sch) * Precedence Class 7 (CS7) * Voice Admit (VA) * Expedited Forwarding (EF) - * Lower Effort (LE) - * - * Total 26 codepoints. + + * Total 25 codepoints. */ /* List of traffic classes in RFC 4594, updated by RFC 8622: @@ -2375,12 +2375,12 @@ static int cake_config_precedence(struct Qdisc *sch) * Realtime Interactive (CS4) - eg. games * Multimedia Streaming (AF3x) - eg. YouTube, NetFlix, Twitch * Broadcast Video (CS3) - * Low-Latency Data (AF2x,TOS4) - eg. database - * Ops, Admin, Management (CS2) - eg. ssh - * Standard Service (DF & unrecognised codepoints) - * High-Throughput Data (AF1x,TOS2) - eg. web traffic - * Low-Priority Data (LE,CS1) - eg. BitTorrent - * + * Low Latency Data (AF2x,TOS4) - eg. database + * Ops, Admin, Management (CS2,TOS1) - eg. ssh + * Standard Service (CS0 & unrecognised codepoints) + * High Throughput Data (AF1x,TOS2) - eg. web traffic + * Low Priority Data (CS1,LE) - eg. BitTorrent + * Total 12 traffic classes. */ @@ -2390,12 +2390,12 @@ static int cake_config_diffserv8(struct Qdisc *sch) * * Network Control (CS6, CS7) * Minimum Latency (EF, VA, CS5, CS4) - * Interactive Shell (CS2) + * Interactive Shell (CS2, TOS1) * Low Latency Transactions (AF2x, TOS4) * Video Streaming (AF4x, AF3x, CS3) - * Bog Standard (DF etc.) - * High Throughput (AF1x, TOS2, CS1) - * Background Traffic (LE) + * Bog Standard (CS0 etc.) + * High Throughput (AF1x, TOS2) + * Background Traffic (CS1, LE) * * Total 8 traffic classes. */ @@ -2437,9 +2437,9 @@ static int cake_config_diffserv4(struct Qdisc *sch) /* Further pruned list of traffic classes for four-class system: * * Latency Sensitive (CS7, CS6, EF, VA, CS5, CS4) - * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2) - * Best Effort (DF, AF1x, TOS2, and those not specified) - * Background Traffic (LE, CS1) + * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1) + * Best Effort (CS0, AF1x, TOS2, and those not specified) + * Background Traffic (CS1, LE) * * Total 4 traffic classes. */ @@ -2477,9 +2477,9 @@ static int cake_config_diffserv4(struct Qdisc *sch) static int cake_config_diffserv3(struct Qdisc *sch) { /* Simplified Diffserv structure with 3 tins. - * Latency Sensitive (CS7, CS6, EF, VA, TOS4) + * Low Priority (CS1, LE) * Best Effort - * Low Priority (LE, CS1) + * Latency Sensitive (TOS4, VA, EF, CS6, CS7) */ struct cake_sched_data *q = qdisc_priv(sch); u32 mtu = psched_mtu(qdisc_dev(sch)); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 02d9f0dfe3..e0da15530f 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -116,7 +116,7 @@ struct cbq_class { long avgidle; long deficit; /* Saved deficit for WRR */ psched_time_t penalized; - struct gnet_stats_basic_sync bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct tc_cbq_xstats xstats; @@ -565,7 +565,8 @@ cbq_update(struct cbq_sched_data *q) long avgidle = cl->avgidle; long idle; - _bstats_update(&cl->bstats, len, 1); + cl->bstats.packets++; + cl->bstats.bytes += len; /* * (now - last) is total time between packet right edges. @@ -1383,7 +1384,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (cl->undertime != PSCHED_PASTPERFECT) cl->xstats.undertime = cl->undertime - q->now; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; @@ -1517,7 +1519,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - true, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); @@ -1609,7 +1611,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl == NULL) goto failure; - gnet_stats_basic_sync_init(&cl->bstats); err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); @@ -1618,7 +1619,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, true, tca[TCA_RATE]); + NULL, + qdisc_root_sleeping_running(sch), + tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); tcf_block_put(cl->block); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 18e4f7a0b2..642cd179b7 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -19,7 +19,7 @@ struct drr_class { struct Qdisc_class_common common; unsigned int filter_cnt; - struct gnet_stats_basic_sync bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct list_head alist; @@ -85,7 +85,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, true, + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace estimator"); @@ -105,7 +106,6 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, @@ -118,7 +118,9 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, true, tca[TCA_RATE]); + NULL, + qdisc_root_sleeping_running(sch), + tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace estimator"); qdisc_put(cl->qdisc); @@ -265,7 +267,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (qlen) xstats.deficit = cl->deficit; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0) return -1; diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index d733934935..44fa2532a8 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -41,7 +41,7 @@ struct ets_class { struct Qdisc *qdisc; u32 quantum; u32 deficit; - struct gnet_stats_basic_sync bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; }; @@ -325,7 +325,8 @@ static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg, struct ets_class *cl = ets_class_from_arg(sch, arg); struct Qdisc *cl_q = cl->qdisc; - if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl_q->bstats) < 0 || qdisc_qstats_copy(d, cl_q) < 0) return -1; @@ -660,6 +661,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->nbands = nbands; for (i = nstrict; i < q->nstrict; i++) { + INIT_LIST_HEAD(&q->classes[i].alist); if (q->classes[i].qdisc->q.qlen) { list_add_tail(&q->classes[i].alist, &q->active); q->classes[i].deficit = quanta[i]; @@ -687,11 +689,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, ets_offload_change(sch); for (i = q->nbands; i < oldbands; i++) { qdisc_put(q->classes[i].qdisc); - q->classes[i].qdisc = NULL; - q->classes[i].quantum = 0; - q->classes[i].deficit = 0; - gnet_stats_basic_sync_init(&q->classes[i].bstats); - memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); + memset(&q->classes[i], 0, sizeof(q->classes[i])); } return 0; } @@ -700,7 +698,7 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct ets_sched *q = qdisc_priv(sch); - int err, i; + int err; if (!opt) return -EINVAL; @@ -710,9 +708,6 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt, return err; INIT_LIST_HEAD(&q->active); - for (i = 0; i < TCQ_ETS_MAX_BANDS; i++) - INIT_LIST_HEAD(&q->classes[i].alist); - return ets_qdisc_change(sch, opt, extack); } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 839e1235db..bb0cd6d3d2 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -362,8 +362,6 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 }, [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 }, - [TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR] = { .type = NLA_U8 }, - [TCA_FQ_CODEL_CE_THRESHOLD_MASK] = { .type = NLA_U8 }, }; static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, @@ -410,11 +408,6 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; } - if (tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]) - q->cparams.ce_threshold_selector = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]); - if (tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]) - q->cparams.ce_threshold_mask = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]); - if (tb[TCA_FQ_CODEL_INTERVAL]) { u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); @@ -551,15 +544,10 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) q->flows_cnt)) goto nla_put_failure; - if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD) { - if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, - codel_time_to_us(q->cparams.ce_threshold))) - goto nla_put_failure; - if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, q->cparams.ce_threshold_selector)) - goto nla_put_failure; - if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, q->cparams.ce_threshold_mask)) - goto nla_put_failure; - } + if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD && + nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, + codel_time_to_us(q->cparams.ce_threshold))) + goto nla_put_failure; return nla_nest_end(skb, opts); diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index a9bd0a2358..5ded4c8672 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -#include #include #include #include diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5bab9f8b8f..30c29a9a2e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -304,8 +304,8 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, /* * Transmit possibly several skbs, and handle the return status as - * required. Owning qdisc running bit guarantees that only one CPU - * can execute this function. + * required. Owning running seqcount bit guarantees that + * only one CPU can execute this function. * * Returns to the caller: * false - hardware queue frozen backoff @@ -434,9 +434,9 @@ unsigned long dev_trans_start(struct net_device *dev) dev = vlan_dev_real_dev(dev); else if (netif_is_macvlan(dev)) dev = macvlan_dev_real_dev(dev); - res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start); + res = netdev_get_tx_queue(dev, 0)->trans_start; for (i = 1; i < dev->num_tx_queues; i++) { - val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start); + val = netdev_get_tx_queue(dev, i)->trans_start; if (val && time_after(val, res)) res = val; } @@ -445,63 +445,11 @@ unsigned long dev_trans_start(struct net_device *dev) } EXPORT_SYMBOL(dev_trans_start); -static void netif_freeze_queues(struct net_device *dev) -{ - unsigned int i; - int cpu; - - cpu = smp_processor_id(); - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - /* We are the only thread of execution doing a - * freeze, but we have to grab the _xmit_lock in - * order to synchronize with threads which are in - * the ->hard_start_xmit() handler and already - * checked the frozen bit. - */ - __netif_tx_lock(txq, cpu); - set_bit(__QUEUE_STATE_FROZEN, &txq->state); - __netif_tx_unlock(txq); - } -} - -void netif_tx_lock(struct net_device *dev) -{ - spin_lock(&dev->tx_global_lock); - netif_freeze_queues(dev); -} -EXPORT_SYMBOL(netif_tx_lock); - -static void netif_unfreeze_queues(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - /* No need to grab the _xmit_lock here. If the - * queue is not stopped for another reason, we - * force a schedule. - */ - clear_bit(__QUEUE_STATE_FROZEN, &txq->state); - netif_schedule_queue(txq); - } -} - -void netif_tx_unlock(struct net_device *dev) -{ - netif_unfreeze_queues(dev); - spin_unlock(&dev->tx_global_lock); -} -EXPORT_SYMBOL(netif_tx_unlock); - static void dev_watchdog(struct timer_list *t) { struct net_device *dev = from_timer(dev, t, watchdog_timer); - bool release = true; - spin_lock(&dev->tx_global_lock); + netif_tx_lock(dev); if (!qdisc_tx_is_noop(dev)) { if (netif_device_present(dev) && netif_running(dev) && @@ -514,34 +462,31 @@ static void dev_watchdog(struct timer_list *t) struct netdev_queue *txq; txq = netdev_get_tx_queue(dev, i); - trans_start = READ_ONCE(txq->trans_start); + trans_start = txq->trans_start; if (netif_xmit_stopped(txq) && time_after(jiffies, (trans_start + dev->watchdog_timeo))) { some_queue_timedout = 1; - atomic_long_inc(&txq->trans_timeout); + txq->trans_timeout++; break; } } - if (unlikely(some_queue_timedout)) { + if (some_queue_timedout) { trace_net_dev_xmit_timeout(dev, i); WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", dev->name, netdev_drivername(dev), i); - netif_freeze_queues(dev); dev->netdev_ops->ndo_tx_timeout(dev, i); - netif_unfreeze_queues(dev); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) - release = false; + dev_hold(dev); } } - spin_unlock(&dev->tx_global_lock); + netif_tx_unlock(dev); - if (release) - dev_put_track(dev, &dev->watchdog_dev_tracker); + dev_put(dev); } void __netdev_watchdog_up(struct net_device *dev) @@ -551,7 +496,7 @@ void __netdev_watchdog_up(struct net_device *dev) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) - dev_hold_track(dev, &dev->watchdog_dev_tracker, GFP_ATOMIC); + dev_hold(dev); } } EXPORT_SYMBOL_GPL(__netdev_watchdog_up); @@ -565,7 +510,7 @@ static void dev_watchdog_down(struct net_device *dev) { netif_tx_lock_bh(dev); if (del_timer(&dev->watchdog_timer)) - dev_put_track(dev, &dev->watchdog_dev_tracker); + dev_put(dev); netif_tx_unlock_bh(dev); } @@ -661,6 +606,7 @@ struct Qdisc noop_qdisc = { .ops = &noop_qdisc_ops, .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, + .running = SEQCNT_ZERO(noop_qdisc.running), .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), .gso_skb = { .next = (struct sk_buff *)&noop_qdisc.gso_skb, @@ -921,6 +867,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { EXPORT_SYMBOL(pfifo_fast_ops); static struct lock_class_key qdisc_tx_busylock; +static struct lock_class_key qdisc_running_key; struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, @@ -945,12 +892,11 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, __skb_queue_head_init(&sch->gso_skb); __skb_queue_head_init(&sch->skb_bad_txq); qdisc_skb_head_init(&sch->q); - gnet_stats_basic_sync_init(&sch->bstats); spin_lock_init(&sch->q.lock); if (ops->static_flags & TCQ_F_CPUSTATS) { sch->cpu_bstats = - netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); + netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); if (!sch->cpu_bstats) goto errout1; @@ -970,12 +916,16 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, lockdep_set_class(&sch->seqlock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + seqcount_init(&sch->running); + lockdep_set_class(&sch->running, + dev->qdisc_running_key ?: &qdisc_running_key); + sch->ops = ops; sch->flags = ops->static_flags; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - dev_hold_track(dev, &sch->dev_tracker, GFP_KERNEL); + dev_hold(dev); refcount_set(&sch->refcnt, 1); return sch; @@ -1075,7 +1025,7 @@ static void qdisc_destroy(struct Qdisc *qdisc) ops->destroy(qdisc); module_put(ops->owner); - dev_put_track(qdisc_dev(qdisc), &qdisc->dev_tracker); + dev_put(qdisc_dev(qdisc)); trace_qdisc_destroy(qdisc); @@ -1206,7 +1156,7 @@ static void transition_one_qdisc(struct net_device *dev, rcu_assign_pointer(dev_queue->qdisc, new_qdisc); if (need_watchdog_p) { - WRITE_ONCE(dev_queue->trans_start, 0); + dev_queue->trans_start = 0; *need_watchdog_p = 1; } } @@ -1392,30 +1342,6 @@ void dev_qdisc_change_real_num_tx(struct net_device *dev, qdisc->ops->change_real_num_tx(qdisc, new_real_tx); } -void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx) -{ -#ifdef CONFIG_NET_SCHED - struct net_device *dev = qdisc_dev(sch); - struct Qdisc *qdisc; - unsigned int i; - - for (i = new_real_tx; i < dev->real_num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; - /* Only update the default qdiscs we created, - * qdiscs with handles are always hashed. - */ - if (qdisc != &noop_qdisc && !qdisc->handle) - qdisc_hash_del(qdisc); - } - for (i = dev->real_num_tx_queues; i < new_real_tx; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; - if (qdisc != &noop_qdisc && !qdisc->handle) - qdisc_hash_add(qdisc, false); - } -#endif -} -EXPORT_SYMBOL(mq_change_real_num_tx); - int dev_qdisc_change_tx_queue_len(struct net_device *dev) { bool up = dev->flags & IFF_UP; @@ -1546,6 +1472,10 @@ void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64) } EXPORT_SYMBOL(psched_ppscfg_precompute); +static void mini_qdisc_rcu_func(struct rcu_head *head) +{ +} + void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, struct tcf_proto *tp_head) { @@ -1558,30 +1488,28 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, if (!tp_head) { RCU_INIT_POINTER(*miniqp->p_miniq, NULL); - } else { - miniq = miniq_old != &miniqp->miniq1 ? - &miniqp->miniq1 : &miniqp->miniq2; - - /* We need to make sure that readers won't see the miniq - * we are about to modify. So ensure that at least one RCU - * grace period has elapsed since the miniq was made - * inactive. - */ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - cond_synchronize_rcu(miniq->rcu_state); - else if (!poll_state_synchronize_rcu(miniq->rcu_state)) - synchronize_rcu_expedited(); - - miniq->filter_list = tp_head; - rcu_assign_pointer(*miniqp->p_miniq, miniq); + /* Wait for flying RCU callback before it is freed. */ + rcu_barrier(); + return; } + miniq = !miniq_old || miniq_old == &miniqp->miniq2 ? + &miniqp->miniq1 : &miniqp->miniq2; + + /* We need to make sure that readers won't see the miniq + * we are about to modify. So wait until previous call_rcu callback + * is done. + */ + rcu_barrier(); + miniq->filter_list = tp_head; + rcu_assign_pointer(*miniqp->p_miniq, miniq); + if (miniq_old) - /* This is counterpart of the rcu sync above. We need to + /* This is counterpart of the rcu barriers above. We need to * block potential new user of miniq_old until all readers * are not seeing it. */ - miniq_old->rcu_state = start_poll_synchronize_rcu(); + call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func); } EXPORT_SYMBOL(mini_qdisc_pair_swap); @@ -1600,8 +1528,6 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats; miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats; miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats; - miniqp->miniq1.rcu_state = get_state_synchronize_rcu(); - miniqp->miniq2.rcu_state = miniqp->miniq1.rcu_state; miniqp->p_miniq = p_miniq; } EXPORT_SYMBOL(mini_qdisc_pair_init); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 1073c76d05..621dc6afde 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -56,7 +56,6 @@ struct gred_sched { u32 DPs; u32 def; struct red_vars wred_set; - struct tc_gred_qopt_offload *opt; }; static inline int gred_wred_mode(struct gred_sched *table) @@ -312,50 +311,48 @@ static void gred_offload(struct Qdisc *sch, enum tc_gred_command command) { struct gred_sched *table = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - struct tc_gred_qopt_offload *opt = table->opt; + struct tc_gred_qopt_offload opt = { + .command = command, + .handle = sch->handle, + .parent = sch->parent, + }; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; - memset(opt, 0, sizeof(*opt)); - opt->command = command; - opt->handle = sch->handle; - opt->parent = sch->parent; - if (command == TC_GRED_REPLACE) { unsigned int i; - opt->set.grio_on = gred_rio_mode(table); - opt->set.wred_on = gred_wred_mode(table); - opt->set.dp_cnt = table->DPs; - opt->set.dp_def = table->def; + opt.set.grio_on = gred_rio_mode(table); + opt.set.wred_on = gred_wred_mode(table); + opt.set.dp_cnt = table->DPs; + opt.set.dp_def = table->def; for (i = 0; i < table->DPs; i++) { struct gred_sched_data *q = table->tab[i]; if (!q) continue; - opt->set.tab[i].present = true; - opt->set.tab[i].limit = q->limit; - opt->set.tab[i].prio = q->prio; - opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; - opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; - opt->set.tab[i].is_ecn = gred_use_ecn(q); - opt->set.tab[i].is_harddrop = gred_use_harddrop(q); - opt->set.tab[i].probability = q->parms.max_P; - opt->set.tab[i].backlog = &q->backlog; + opt.set.tab[i].present = true; + opt.set.tab[i].limit = q->limit; + opt.set.tab[i].prio = q->prio; + opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; + opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; + opt.set.tab[i].is_ecn = gred_use_ecn(q); + opt.set.tab[i].is_harddrop = gred_use_harddrop(q); + opt.set.tab[i].probability = q->parms.max_P; + opt.set.tab[i].backlog = &q->backlog; } - opt->set.qstats = &sch->qstats; + opt.set.qstats = &sch->qstats; } - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt); + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt); } static int gred_offload_dump_stats(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_qopt_offload *hw_stats; - u64 bytes = 0, packets = 0; unsigned int i; int ret; @@ -367,11 +364,9 @@ static int gred_offload_dump_stats(struct Qdisc *sch) hw_stats->handle = sch->handle; hw_stats->parent = sch->parent; - for (i = 0; i < MAX_DPs; i++) { - gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]); + for (i = 0; i < MAX_DPs; i++) if (table->tab[i]) hw_stats->stats.xstats[i] = &table->tab[i]->stats; - } ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats); /* Even if driver returns failure adjust the stats - in case offload @@ -380,19 +375,19 @@ static int gred_offload_dump_stats(struct Qdisc *sch) for (i = 0; i < MAX_DPs; i++) { if (!table->tab[i]) continue; - table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets); - table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes); + table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets; + table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes; table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; - bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes); - packets += u64_stats_read(&hw_stats->stats.bstats[i].packets); + _bstats_update(&sch->bstats, + hw_stats->stats.bstats[i].bytes, + hw_stats->stats.bstats[i].packets); sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; sch->qstats.drops += hw_stats->stats.qstats[i].drops; sch->qstats.requeues += hw_stats->stats.qstats[i].requeues; sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits; } - _bstats_update(&sch->bstats, bytes, packets); kfree(hw_stats); return ret; @@ -733,7 +728,6 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, static int gred_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { - struct gred_sched *table = qdisc_priv(sch); struct nlattr *tb[TCA_GRED_MAX + 1]; int err; @@ -757,12 +751,6 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt, sch->limit = qdisc_dev(sch)->tx_queue_len * psched_mtu(qdisc_dev(sch)); - if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) { - table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL); - if (!table->opt) - return -ENOMEM; - } - return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack); } @@ -919,7 +907,6 @@ static void gred_destroy(struct Qdisc *sch) gred_destroy_vq(table->tab[i]); } gred_offload(sch, TC_GRED_DESTROY); - kfree(table->opt); } static struct Qdisc_ops gred_qdisc_ops __read_mostly = { diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d3979a6000..b7ac30cca0 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -111,7 +111,7 @@ enum hfsc_class_flags { struct hfsc_class { struct Qdisc_class_common cl_common; - struct gnet_stats_basic_sync bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct tcf_proto __rcu *filter_list; /* filter list */ @@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - true, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) return err; @@ -1033,7 +1033,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, true, tca[TCA_RATE]); + NULL, + qdisc_root_sleeping_running(sch), + tca[TCA_RATE]); if (err) { tcf_block_put(cl->block); kfree(cl); @@ -1326,7 +1328,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.work = cl->cl_total; xstats.rtwork = cl->cl_cumul; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; @@ -1404,7 +1406,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt, if (err) return err; - gnet_stats_basic_sync_init(&q->root.bstats); q->root.cl_common.classid = sch->handle; q->root.sched = q; q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 23a9d62424..5cbc32fee8 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -113,8 +113,8 @@ struct htb_class { /* * Written often fields */ - struct gnet_stats_basic_sync bstats; - struct gnet_stats_basic_sync bstats_bias; + struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_packed bstats_bias; struct tc_htb_xstats xstats; /* our special stats */ /* token bucket parameters */ @@ -1084,15 +1084,11 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]); if (offload) { - if (sch->parent != TC_H_ROOT) { - NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload"); + if (sch->parent != TC_H_ROOT) return -EOPNOTSUPP; - } - if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) { - NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on"); + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; - } q->num_direct_qdiscs = dev->real_num_tx_queues; q->direct_qdiscs = kcalloc(q->num_direct_qdiscs, @@ -1312,11 +1308,10 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, static void htb_offload_aggregate_stats(struct htb_sched *q, struct htb_class *cl) { - u64 bytes = 0, packets = 0; struct htb_class *c; unsigned int i; - gnet_stats_basic_sync_init(&cl->bstats); + memset(&cl->bstats, 0, sizeof(cl->bstats)); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { @@ -1328,15 +1323,14 @@ static void htb_offload_aggregate_stats(struct htb_sched *q, if (p != cl) continue; - bytes += u64_stats_read(&c->bstats_bias.bytes); - packets += u64_stats_read(&c->bstats_bias.packets); + cl->bstats.bytes += c->bstats_bias.bytes; + cl->bstats.packets += c->bstats_bias.packets; if (c->level == 0) { - bytes += u64_stats_read(&c->leaf.q->bstats.bytes); - packets += u64_stats_read(&c->leaf.q->bstats.packets); + cl->bstats.bytes += c->leaf.q->bstats.bytes; + cl->bstats.packets += c->leaf.q->bstats.packets; } } } - _bstats_update(&cl->bstats, bytes, packets); } static int @@ -1363,16 +1357,16 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) if (cl->leaf.q) cl->bstats = cl->leaf.q->bstats; else - gnet_stats_basic_sync_init(&cl->bstats); - _bstats_update(&cl->bstats, - u64_stats_read(&cl->bstats_bias.bytes), - u64_stats_read(&cl->bstats_bias.packets)); + memset(&cl->bstats, 0, sizeof(cl->bstats)); + cl->bstats.bytes += cl->bstats_bias.bytes; + cl->bstats.packets += cl->bstats_bias.packets; } else { htb_offload_aggregate_stats(q, cl); } } - if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) return -1; @@ -1584,9 +1578,8 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, WARN_ON(old != q); if (cl->parent) { - _bstats_update(&cl->parent->bstats_bias, - u64_stats_read(&q->bstats.bytes), - u64_stats_read(&q->bstats.packets)); + cl->parent->bstats_bias.bytes += q->bstats.bytes; + cl->parent->bstats_bias.packets += q->bstats.packets; } offload_opt = (struct tc_htb_qopt_offload) { @@ -1876,9 +1869,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; - gnet_stats_basic_sync_init(&cl->bstats); - gnet_stats_basic_sync_init(&cl->bstats_bias); - err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); @@ -1888,7 +1878,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - true, + qdisc_root_sleeping_running(sch), tca[TCA_RATE] ? : &est.nla); if (err) goto err_block_put; @@ -1952,9 +1942,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, htb_graft_helper(dev_queue, old_q); goto err_kill_estimator; } - _bstats_update(&parent->bstats_bias, - u64_stats_read(&old_q->bstats.bytes), - u64_stats_read(&old_q->bstats.packets)); + parent->bstats_bias.bytes += old_q->bstats.bytes; + parent->bstats_bias.packets += old_q->bstats.packets; qdisc_put(old_q); } new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, @@ -2014,7 +2003,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - true, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) return err; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 83d2e54bf3..db18d8a860 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -125,14 +125,38 @@ static void mq_attach(struct Qdisc *sch) priv->qdiscs = NULL; } +static void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx) +{ +#ifdef CONFIG_NET_SCHED + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + unsigned int i; + + for (i = new_real_tx; i < dev->real_num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + /* Only update the default qdiscs we created, + * qdiscs with handles are always hashed. + */ + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_del(qdisc); + } + for (i = dev->real_num_tx_queues; i < new_real_tx; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_add(qdisc, false); + } +#endif +} + static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct net_device *dev = qdisc_dev(sch); struct Qdisc *qdisc; unsigned int ntx; + __u32 qlen = 0; sch->q.qlen = 0; - gnet_stats_basic_sync_init(&sch->bstats); + memset(&sch->bstats, 0, sizeof(sch->bstats)); memset(&sch->qstats, 0, sizeof(sch->qstats)); /* MQ supports lockless qdiscs. However, statistics accounting needs @@ -144,11 +168,25 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; spin_lock_bh(qdisc_lock(qdisc)); - gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, - &qdisc->bstats, false); - gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, - &qdisc->qstats); - sch->q.qlen += qdisc_qlen(qdisc); + if (qdisc_is_percpu_stats(qdisc)) { + qlen = qdisc_qlen_sum(qdisc); + __gnet_stats_copy_basic(NULL, &sch->bstats, + qdisc->cpu_bstats, + &qdisc->bstats); + __gnet_stats_copy_queue(&sch->qstats, + qdisc->cpu_qstats, + &qdisc->qstats, qlen); + sch->q.qlen += qlen; + } else { + sch->q.qlen += qdisc->q.qlen; + sch->bstats.bytes += qdisc->bstats.bytes; + sch->bstats.packets += qdisc->bstats.packets; + sch->qstats.qlen += qdisc->qstats.qlen; + sch->qstats.backlog += qdisc->qstats.backlog; + sch->qstats.drops += qdisc->qstats.drops; + sch->qstats.requeues += qdisc->qstats.requeues; + sch->qstats.overlimits += qdisc->qstats.overlimits; + } spin_unlock_bh(qdisc_lock(qdisc)); } @@ -231,7 +269,8 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mq_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || + if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats, + &sch->bstats) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index b29f3453c6..50e15add60 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -306,6 +306,28 @@ static void mqprio_attach(struct Qdisc *sch) priv->qdiscs = NULL; } +static void mqprio_change_real_num_tx(struct Qdisc *sch, + unsigned int new_real_tx) +{ + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + unsigned int i; + + for (i = new_real_tx; i < dev->real_num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + /* Only update the default qdiscs we created, + * qdiscs with handles are always hashed. + */ + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_del(qdisc); + } + for (i = dev->real_num_tx_queues; i < new_real_tx; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_add(qdisc, false); + } +} + static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, unsigned long cl) { @@ -390,7 +412,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) unsigned int ntx, tc; sch->q.qlen = 0; - gnet_stats_basic_sync_init(&sch->bstats); + memset(&sch->bstats, 0, sizeof(sch->bstats)); memset(&sch->qstats, 0, sizeof(sch->qstats)); /* MQ supports lockless qdiscs. However, statistics accounting needs @@ -402,11 +424,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; spin_lock_bh(qdisc_lock(qdisc)); - gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, - &qdisc->bstats, false); - gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, - &qdisc->qstats); - sch->q.qlen += qdisc_qlen(qdisc); + if (qdisc_is_percpu_stats(qdisc)) { + __u32 qlen = qdisc_qlen_sum(qdisc); + + __gnet_stats_copy_basic(NULL, &sch->bstats, + qdisc->cpu_bstats, + &qdisc->bstats); + __gnet_stats_copy_queue(&sch->qstats, + qdisc->cpu_qstats, + &qdisc->qstats, qlen); + sch->q.qlen += qlen; + } else { + sch->q.qlen += qdisc->q.qlen; + sch->bstats.bytes += qdisc->bstats.bytes; + sch->bstats.packets += qdisc->bstats.packets; + sch->qstats.backlog += qdisc->qstats.backlog; + sch->qstats.drops += qdisc->qstats.drops; + sch->qstats.requeues += qdisc->qstats.requeues; + sch->qstats.overlimits += qdisc->qstats.overlimits; + } spin_unlock_bh(qdisc_lock(qdisc)); } @@ -498,13 +534,12 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, { if (cl >= TC_H_MIN_PRIORITY) { int i; - __u32 qlen; + __u32 qlen = 0; struct gnet_stats_queue qstats = {0}; - struct gnet_stats_basic_sync bstats; + struct gnet_stats_basic_packed bstats = {0}; struct net_device *dev = qdisc_dev(sch); struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; - gnet_stats_basic_sync_init(&bstats); /* Drop lock here it will be reclaimed before touching * statistics this is required because the d->lock we * hold here is the look on dev_queue->qdisc_sleeping @@ -519,28 +554,40 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, spin_lock_bh(qdisc_lock(qdisc)); - gnet_stats_add_basic(&bstats, qdisc->cpu_bstats, - &qdisc->bstats, false); - gnet_stats_add_queue(&qstats, qdisc->cpu_qstats, - &qdisc->qstats); - sch->q.qlen += qdisc_qlen(qdisc); + if (qdisc_is_percpu_stats(qdisc)) { + qlen = qdisc_qlen_sum(qdisc); + __gnet_stats_copy_basic(NULL, &bstats, + qdisc->cpu_bstats, + &qdisc->bstats); + __gnet_stats_copy_queue(&qstats, + qdisc->cpu_qstats, + &qdisc->qstats, + qlen); + } else { + qlen += qdisc->q.qlen; + bstats.bytes += qdisc->bstats.bytes; + bstats.packets += qdisc->bstats.packets; + qstats.backlog += qdisc->qstats.backlog; + qstats.drops += qdisc->qstats.drops; + qstats.requeues += qdisc->qstats.requeues; + qstats.overlimits += qdisc->qstats.overlimits; + } spin_unlock_bh(qdisc_lock(qdisc)); } - qlen = qdisc_qlen(sch) + qstats.qlen; /* Reclaim root sleeping lock before completing stats */ if (d->lock) spin_lock_bh(d->lock); - if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 || + if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 || gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0) return -1; } else { struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(d, sch->cpu_bstats, - &sch->bstats, true) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, + sch->cpu_bstats, &sch->bstats) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; } @@ -604,7 +651,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { .init = mqprio_init, .destroy = mqprio_destroy, .attach = mqprio_attach, - .change_real_num_tx = mq_change_real_num_tx, + .change_real_num_tx = mqprio_change_real_num_tx, .dump = mqprio_dump, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index cd8ab90c47..e282e73821 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -338,7 +338,8 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct Qdisc *cl_q; cl_q = q->queues[cl - 1]; - if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || qdisc_qstats_copy(d, cl_q) < 0) return -1; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index ed4ccef5d6..0c345e43a0 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -208,17 +208,17 @@ static bool loss_4state(struct netem_sched_data *q) * next state and if the next packet has to be transmitted or lost. * The four states correspond to: * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period - * LOST_IN_GAP_PERIOD => isolated losses within a gap period - * LOST_IN_BURST_PERIOD => lost packets within a burst period - * TX_IN_BURST_PERIOD => successfully transmitted packets within a burst period + * LOST_IN_BURST_PERIOD => isolated losses within a gap period + * LOST_IN_GAP_PERIOD => lost packets within a burst period + * TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period */ switch (clg->state) { case TX_IN_GAP_PERIOD: if (rnd < clg->a4) { - clg->state = LOST_IN_GAP_PERIOD; + clg->state = LOST_IN_BURST_PERIOD; return true; } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { - clg->state = LOST_IN_BURST_PERIOD; + clg->state = LOST_IN_GAP_PERIOD; return true; } else if (clg->a1 + clg->a4 < rnd) { clg->state = TX_IN_GAP_PERIOD; @@ -227,24 +227,24 @@ static bool loss_4state(struct netem_sched_data *q) break; case TX_IN_BURST_PERIOD: if (rnd < clg->a5) { - clg->state = LOST_IN_BURST_PERIOD; + clg->state = LOST_IN_GAP_PERIOD; return true; } else { clg->state = TX_IN_BURST_PERIOD; } break; - case LOST_IN_BURST_PERIOD: + case LOST_IN_GAP_PERIOD: if (rnd < clg->a3) clg->state = TX_IN_BURST_PERIOD; else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { clg->state = TX_IN_GAP_PERIOD; } else if (clg->a2 + clg->a3 < rnd) { - clg->state = LOST_IN_BURST_PERIOD; + clg->state = LOST_IN_GAP_PERIOD; return true; } break; - case LOST_IN_GAP_PERIOD: + case LOST_IN_BURST_PERIOD: clg->state = TX_IN_GAP_PERIOD; break; } @@ -785,7 +785,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, if (!n || n > NETEM_DIST_MAX) return -EINVAL; - d = kvmalloc(struct_size(d, table, n), GFP_KERNEL); + d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL); if (!d) return -ENOMEM; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3b8d7197c0..03fdf31ccb 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -361,8 +361,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct Qdisc *cl_q; cl_q = q->queues[cl - 1]; - if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, - &cl_q->bstats, true) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || qdisc_qstats_copy(d, cl_q) < 0) return -1; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d4ce58c90f..aea435b0ae 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -131,7 +131,7 @@ struct qfq_class { unsigned int filter_cnt; - struct gnet_stats_basic_sync bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct Qdisc *qdisc; @@ -451,7 +451,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - true, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) return err; @@ -465,7 +465,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->deficit = lmax; @@ -478,7 +477,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - true, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) goto destroy_class; @@ -640,7 +639,8 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.weight = cl->agg->class_weight; xstats.lmax = cl->agg->lmax; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || qdisc_qstats_copy(d, cl->qdisc) < 0) return -1; @@ -1234,7 +1234,8 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - _bstats_update(&cl->bstats, len, gso_segs); + cl->bstats.bytes += len; + cl->bstats.packets += gso_segs; sch->qstats.backlog += len; ++sch->q.qlen; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 377f896bde..a66398fb2d 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1984,7 +1984,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 || + if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; return 0; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 7210227744..78e79029dc 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -184,20 +184,6 @@ static int tbf_offload_dump(struct Qdisc *sch) return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt); } -static void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new, - struct Qdisc *old, struct netlink_ext_ack *extack) -{ - struct tc_tbf_qopt_offload graft_offload = { - .handle = sch->handle, - .parent = sch->parent, - .child_handle = new->handle, - .command = TC_TBF_GRAFT, - }; - - qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old, - TC_SETUP_QDISC_TBF, &graft_offload, extack); -} - /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ @@ -561,8 +547,6 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->qdisc); - - tbf_offload_graft(sch, new, *old, extack); return 0; } diff --git a/net/sctp/input.c b/net/sctp/input.c index 90e12bafdd..1f1786021d 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -746,21 +746,23 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); struct sctp_hashbucket *head; + struct sctp_ep_common *epb; - ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); - head = &sctp_ep_hashtable[ep->hashent]; + epb = &ep->base; + epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port); + head = &sctp_ep_hashtable[epb->hashent]; if (sk->sk_reuseport) { bool any = sctp_is_ep_boundall(sk); - struct sctp_endpoint *ep2; + struct sctp_ep_common *epb2; struct list_head *list; int cnt = 0, err = 1; list_for_each(list, &ep->base.bind_addr.address_list) cnt++; - sctp_for_each_hentry(ep2, &head->chain) { - struct sock *sk2 = ep2->base.sk; + sctp_for_each_hentry(epb2, &head->chain) { + struct sock *sk2 = epb2->sk; if (!net_eq(sock_net(sk2), net) || sk2 == sk || !uid_eq(sock_i_uid(sk2), sock_i_uid(sk)) || @@ -787,7 +789,7 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) } write_lock(&head->lock); - hlist_add_head(&ep->node, &head->chain); + hlist_add_head(&epb->node, &head->chain); write_unlock(&head->lock); return 0; } @@ -809,16 +811,19 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) { struct sock *sk = ep->base.sk; struct sctp_hashbucket *head; + struct sctp_ep_common *epb; - ep->hashent = sctp_ep_hashfn(sock_net(sk), ep->base.bind_addr.port); + epb = &ep->base; - head = &sctp_ep_hashtable[ep->hashent]; + epb->hashent = sctp_ep_hashfn(sock_net(sk), epb->bind_addr.port); + + head = &sctp_ep_hashtable[epb->hashent]; if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); write_lock(&head->lock); - hlist_del_init(&ep->node); + hlist_del_init(&epb->node); write_unlock(&head->lock); } @@ -851,6 +856,7 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( const union sctp_addr *paddr) { struct sctp_hashbucket *head; + struct sctp_ep_common *epb; struct sctp_endpoint *ep; struct sock *sk; __be16 lport; @@ -860,7 +866,8 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( hash = sctp_ep_hashfn(net, ntohs(lport)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); - sctp_for_each_hentry(ep, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { + ep = sctp_ep(epb); if (sctp_endpoint_is_match(ep, net, laddr)) goto hit; } diff --git a/net/sctp/output.c b/net/sctp/output.c index 72fe6669c5..cdfdbd353c 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -134,7 +134,7 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, dst_hold(tp->dst); sk_setup_caps(sk, tp->dst); } - packet->max_size = sk_can_gso(sk) ? READ_ONCE(tp->dst->dev->gso_max_size) + packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size : asoc->pathmtu; rcu_read_unlock(); } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index a18609f608..ff47091c38 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -547,9 +547,6 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_assoc_update_retran_path(transport->asoc); transport->asoc->rtx_data_chunks += transport->asoc->unack_data; - if (transport->pl.state == SCTP_PL_COMPLETE && - transport->asoc->unack_data) - sctp_transport_reset_probe_timer(transport); break; case SCTP_RTXR_FAST_RTX: SCTP_INC_STATS(net, SCTP_MIB_FAST_RETRANSMITS); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index f13d6a34f3..982a87b3e1 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -161,6 +161,7 @@ static void *sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) static int sctp_eps_seq_show(struct seq_file *seq, void *v) { struct sctp_hashbucket *head; + struct sctp_ep_common *epb; struct sctp_endpoint *ep; struct sock *sk; int hash = *(loff_t *)v; @@ -170,17 +171,18 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) head = &sctp_ep_hashtable[hash]; read_lock_bh(&head->lock); - sctp_for_each_hentry(ep, &head->chain) { - sk = ep->base.sk; + sctp_for_each_hentry(epb, &head->chain) { + ep = sctp_ep(epb); + sk = epb->sk; if (!net_eq(sock_net(sk), seq_file_net(seq))) continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5u %5lu ", ep, sk, sctp_sk(sk)->type, sk->sk_state, hash, - ep->base.bind_addr.port, + epb->bind_addr.port, from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk)); - sctp_seq_dump_local_addrs(seq, &ep->base); + sctp_seq_dump_local_addrs(seq, epb); seq_printf(seq, "\n"); } read_unlock_bh(&head->lock); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 35928fefae..ec0f52567c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index cc544a97c4..fb3da4d8f4 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -326,6 +326,11 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, struct sctp_packet *packet; int len; + /* Update socket peer label if first association. */ + if (security_sctp_assoc_request((struct sctp_endpoint *)ep, + chunk->skb)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. @@ -410,12 +415,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (!new_asoc) goto nomem; - /* Update socket peer label if first association. */ - if (security_sctp_assoc_request(new_asoc, chunk->skb)) { - sctp_association_free(new_asoc); - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - } - if (sctp_assoc_set_bind_addr_from_ep(new_asoc, sctp_scope(sctp_source(chunk)), GFP_ATOMIC) < 0) @@ -781,10 +780,6 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, } } - if (security_sctp_assoc_request(new_asoc, chunk->skb)) { - sctp_association_free(new_asoc); - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - } /* Delay state machine commands until later. * @@ -1124,11 +1119,12 @@ enum sctp_disposition sctp_sf_send_probe(struct net *net, if (!sctp_transport_pl_enabled(transport)) return SCTP_DISPOSITION_CONSUME; - sctp_transport_pl_send(transport); - reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); - if (!reply) - return SCTP_DISPOSITION_NOMEM; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + if (sctp_transport_pl_send(transport)) { + reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); + if (!reply) + return SCTP_DISPOSITION_NOMEM; + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + } sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE, SCTP_TRANSPORT(transport)); @@ -1521,6 +1517,11 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( struct sctp_packet *packet; int len; + /* Update socket peer label if first association. */ + if (security_sctp_assoc_request((struct sctp_endpoint *)ep, + chunk->skb)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. @@ -1593,12 +1594,6 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( if (!new_asoc) goto nomem; - /* Update socket peer label if first association. */ - if (security_sctp_assoc_request(new_asoc, chunk->skb)) { - sctp_association_free(new_asoc); - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - } - if (sctp_assoc_set_bind_addr_from_ep(new_asoc, sctp_scope(sctp_source(chunk)), GFP_ATOMIC) < 0) goto nomem; @@ -2260,7 +2255,8 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( } /* Update socket peer label if first association. */ - if (security_sctp_assoc_request(new_asoc, chunk->skb)) { + if (security_sctp_assoc_request((struct sctp_endpoint *)ep, + chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } @@ -4897,6 +4893,9 @@ static enum sctp_disposition sctp_sf_violation_chunk( { static const char err_str[] = "The following chunk violates protocol:"; + if (!asoc) + return sctp_sf_violation(net, ep, asoc, type, arg, commands); + return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3e1a9600be..6b3c32264c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5068,9 +5068,12 @@ static int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); + local_bh_disable(); sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1); + local_bh_enable(); + return 0; } @@ -5096,8 +5099,10 @@ static void sctp_destroy_sock(struct sock *sk) list_del(&sp->auto_asconf_list); } sctp_endpoint_free(sp->ep); + local_bh_disable(); sk_sockets_allocated_dec(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + local_bh_enable(); } /* Triggered when there are no references on the socket anymore */ @@ -5294,14 +5299,14 @@ int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p) { int err = 0; int hash = 0; - struct sctp_endpoint *ep; + struct sctp_ep_common *epb; struct sctp_hashbucket *head; for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize; hash++, head++) { read_lock_bh(&head->lock); - sctp_for_each_hentry(ep, &head->chain) { - err = cb(ep, p); + sctp_for_each_hentry(epb, &head->chain) { + err = cb(sctp_ep(epb), p); if (err) break; } @@ -9422,6 +9427,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; struct sctp_sock *sp = sctp_sk(sk); + struct sctp_endpoint *ep = sp->ep; newsk->sk_type = sk->sk_type; newsk->sk_bound_dev_if = sk->sk_bound_dev_if; @@ -9466,9 +9472,9 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, net_enable_timestamp(); /* Set newsk security attributes from original sk and connection - * security attribute from asoc. + * security attribute from ep. */ - security_sctp_sk_clone(asoc, sk, newsk); + security_sctp_sk_clone(ep, sk, newsk); } static inline void sctp_copy_descendant(struct sock *sk_to, diff --git a/net/sctp/transport.c b/net/sctp/transport.c index f8fd987849..133f1719bf 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -213,18 +213,13 @@ void sctp_transport_reset_reconf_timer(struct sctp_transport *transport) void sctp_transport_reset_probe_timer(struct sctp_transport *transport) { + if (timer_pending(&transport->probe_timer)) + return; if (!mod_timer(&transport->probe_timer, jiffies + transport->probe_interval)) sctp_transport_hold(transport); } -void sctp_transport_reset_raise_timer(struct sctp_transport *transport) -{ - if (!mod_timer(&transport->probe_timer, - jiffies + transport->probe_interval * 30)) - sctp_transport_hold(transport); -} - /* This transport has been assigned to an association. * Initialize fields from the association or from the sock itself. * Register the reference count in the association. @@ -263,11 +258,12 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) sctp_transport_pl_update(transport); } -void sctp_transport_pl_send(struct sctp_transport *t) +bool sctp_transport_pl_send(struct sctp_transport *t) { if (t->pl.probe_count < SCTP_MAX_PROBES) goto out; + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ @@ -302,9 +298,17 @@ void sctp_transport_pl_send(struct sctp_transport *t) } out: + if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 && + !t->pl.probe_count && t->pl.last_rtx_chunks == t->asoc->rtx_data_chunks) { + t->pl.raise_count++; + return false; + } + pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + t->pl.probe_count++; + return true; } bool sctp_transport_pl_recv(struct sctp_transport *t) @@ -312,6 +316,7 @@ bool sctp_transport_pl_recv(struct sctp_transport *t) pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; t->pl.pmtu = t->pl.probe_size; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { @@ -333,14 +338,14 @@ bool sctp_transport_pl_recv(struct sctp_transport *t) t->pl.probe_size += SCTP_PL_MIN_STEP; if (t->pl.probe_size >= t->pl.probe_high) { t->pl.probe_high = 0; + t->pl.raise_count = 0; t->pl.state = SCTP_PL_COMPLETE; /* Search -> Search Complete */ t->pl.probe_size = t->pl.pmtu; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); - sctp_transport_reset_raise_timer(t); } - } else if (t->pl.state == SCTP_PL_COMPLETE) { + } else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) { /* Raise probe_size again after 30 * interval in Search Complete */ t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ t->pl.probe_size += SCTP_PL_MIN_STEP; @@ -388,7 +393,6 @@ static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu) t->pl.probe_high = 0; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); - sctp_transport_reset_probe_timer(t); return true; } } diff --git a/net/smc/Makefile b/net/smc/Makefile index 196fb6f01b..99a0186cba 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y += -I$(src) obj-$(CONFIG_SMC) += smc.o obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o -smc-y += smc_tracepoint.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 306d9e8cd1..5c4c0320e8 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -50,7 +50,6 @@ #include "smc_rx.h" #include "smc_close.h" #include "smc_stats.h" -#include "smc_tracepoint.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -89,8 +88,8 @@ int smc_hash_sk(struct sock *sk) write_lock_bh(&h->lock); sk_add_node(sk, head); - write_unlock_bh(&h->lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + write_unlock_bh(&h->lock); return 0; } @@ -183,7 +182,7 @@ static int smc_release(struct socket *sock) { struct sock *sk = sock->sk; struct smc_sock *smc; - int rc = 0; + int old_state, rc = 0; if (!sk) goto out; @@ -191,8 +190,10 @@ static int smc_release(struct socket *sock) sock_hold(sk); /* sock_put below */ smc = smc_sk(sk); + old_state = sk->sk_state; + /* cleanup for a dangling non-blocking connect */ - if (smc->connect_nonblock && sk->sk_state == SMC_INIT) + if (smc->connect_nonblock && old_state == SMC_INIT) tcp_abort(smc->clcsock->sk, ECONNABORTED); if (cancel_work_sync(&smc->connect_work)) @@ -206,6 +207,10 @@ static int smc_release(struct socket *sock) else lock_sock(sk); + if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE && + !smc->use_fallback) + smc_close_active_abort(smc); + rc = __smc_release(smc); /* detach socket */ @@ -446,47 +451,6 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) return 0; } -static bool smc_isascii(char *hostname) -{ - int i; - - for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++) - if (!isascii(hostname[i])) - return false; - return true; -} - -static void smc_conn_save_peer_info_fce(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm *clc) -{ - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)clc; - struct smc_clc_first_contact_ext *fce; - int clc_v2_len; - - if (clc->hdr.version == SMC_V1 || - !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) - return; - - if (smc->conn.lgr->is_smcd) { - memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid, - SMC_MAX_EID_LEN); - clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, - d1); - } else { - memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid, - SMC_MAX_EID_LEN); - clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, - r1); - } - fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len); - smc->conn.lgr->peer_os = fce->os_type; - smc->conn.lgr->peer_smc_release = fce->release; - if (smc_isascii(fce->hostname)) - memcpy(smc->conn.lgr->peer_hostname, fce->hostname, - SMC_MAX_HOSTNAME_LEN); -} - static void smcr_conn_save_peer_info(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { @@ -499,6 +463,16 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc, smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); } +static bool smc_isascii(char *hostname) +{ + int i; + + for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++) + if (!isascii(hostname[i])) + return false; + return true; +} + static void smcd_conn_save_peer_info(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { @@ -510,6 +484,22 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc, smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; + if (clc->hdr.version > SMC_V1 && + (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) { + struct smc_clc_msg_accept_confirm_v2 *clc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)clc; + struct smc_clc_first_contact_ext *fce = + (struct smc_clc_first_contact_ext *) + (((u8 *)clc_v2) + sizeof(*clc_v2)); + + memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid, + SMC_MAX_EID_LEN); + smc->conn.lgr->peer_os = fce->os_type; + smc->conn.lgr->peer_smc_release = fce->release; + if (smc_isascii(fce->hostname)) + memcpy(smc->conn.lgr->peer_hostname, fce->hostname, + SMC_MAX_HOSTNAME_LEN); + } } static void smc_conn_save_peer_info(struct smc_sock *smc, @@ -519,16 +509,14 @@ static void smc_conn_save_peer_info(struct smc_sock *smc, smcd_conn_save_peer_info(smc, clc); else smcr_conn_save_peer_info(smc, clc); - smc_conn_save_peer_info_fce(smc, clc); } static void smc_link_save_peer_info(struct smc_link *link, - struct smc_clc_msg_accept_confirm *clc, - struct smc_init_info *ini) + struct smc_clc_msg_accept_confirm *clc) { link->peer_qpn = ntoh24(clc->r0.qpn); - memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE); - memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac)); + memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE); + memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac)); link->peer_psn = ntoh24(clc->r0.psn); link->peer_mtu = clc->r0.qp_mtu; } @@ -681,7 +669,6 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) smc->use_fallback = true; smc->fallback_rsn = reason_code; smc_stat_fallback(smc); - trace_smc_switch_to_fallback(smc, reason_code); if (smc->sk.sk_socket && smc->sk.sk_socket->file) { smc->clcsock->file = smc->sk.sk_socket->file; smc->clcsock->file->private_data = smc->clcsock; @@ -759,13 +746,9 @@ static void smc_conn_abort(struct smc_sock *smc, int local_first) { struct smc_connection *conn = &smc->conn; struct smc_link_group *lgr = conn->lgr; - bool lgr_valid = false; - - if (smc_conn_lgr_valid(conn)) - lgr_valid = true; smc_conn_free(conn); - if (local_first && lgr_valid) + if (local_first) smc_lgr_cleanup_early(lgr); } @@ -778,9 +761,7 @@ static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) * used for the internal TCP socket */ smc_pnet_find_roce_resource(smc->clcsock->sk, ini); - if (!ini->check_smcrv2 && !ini->ib_dev) - return SMC_CLC_DECL_NOSMCRDEV; - if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2) + if (!ini->ib_dev) return SMC_CLC_DECL_NOSMCRDEV; return 0; } @@ -864,42 +845,27 @@ static int smc_find_proposal_devices(struct smc_sock *smc, int rc = 0; /* check if there is an ism device available */ - if (!(ini->smcd_version & SMC_V1) || - smc_find_ism_device(smc, ini) || - smc_connect_ism_vlan_setup(smc, ini)) - ini->smcd_version &= ~SMC_V1; - /* else ISM V1 is supported for this connection */ - - /* check if there is an rdma device available */ - if (!(ini->smcr_version & SMC_V1) || - smc_find_rdma_device(smc, ini)) - ini->smcr_version &= ~SMC_V1; - /* else RDMA is supported for this connection */ - - ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1, - ini->smcr_version & SMC_V1); - - /* check if there is an ism v2 device available */ - if (!(ini->smcd_version & SMC_V2) || - !smc_ism_is_v2_capable() || - smc_find_ism_v2_device_clnt(smc, ini)) - ini->smcd_version &= ~SMC_V2; - - /* check if there is an rdma v2 device available */ - ini->check_smcrv2 = true; - ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr; - if (!(ini->smcr_version & SMC_V2) || - smc->clcsock->sk->sk_family != AF_INET || - !smc_clc_ueid_count() || - smc_find_rdma_device(smc, ini)) - ini->smcr_version &= ~SMC_V2; - ini->check_smcrv2 = false; - - ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2, - ini->smcr_version & SMC_V2); + if (ini->smcd_version & SMC_V1) { + if (smc_find_ism_device(smc, ini) || + smc_connect_ism_vlan_setup(smc, ini)) { + if (ini->smc_type_v1 == SMC_TYPE_B) + ini->smc_type_v1 = SMC_TYPE_R; + else + ini->smc_type_v1 = SMC_TYPE_N; + } /* else ISM V1 is supported for this connection */ + if (smc_find_rdma_device(smc, ini)) { + if (ini->smc_type_v1 == SMC_TYPE_B) + ini->smc_type_v1 = SMC_TYPE_D; + else + ini->smc_type_v1 = SMC_TYPE_N; + } /* else RDMA is supported for this connection */ + } + if (smc_ism_is_v2_capable() && smc_find_ism_v2_device_clnt(smc, ini)) + ini->smc_type_v2 = SMC_TYPE_N; /* if neither ISM nor RDMA are supported, fallback */ - if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N) + if (!smcr_indicated(ini->smc_type_v1) && + ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N) rc = SMC_CLC_DECL_NOSMCDEV; return rc; @@ -939,64 +905,6 @@ static int smc_connect_clc(struct smc_sock *smc, SMC_CLC_ACCEPT, CLC_WAIT_TIME); } -void smc_fill_gid_list(struct smc_link_group *lgr, - struct smc_gidlist *gidlist, - struct smc_ib_device *known_dev, u8 *known_gid) -{ - struct smc_init_info *alt_ini = NULL; - - memset(gidlist, 0, sizeof(*gidlist)); - memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE); - - alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL); - if (!alt_ini) - goto out; - - alt_ini->vlan_id = lgr->vlan_id; - alt_ini->check_smcrv2 = true; - alt_ini->smcrv2.saddr = lgr->saddr; - smc_pnet_find_alt_roce(lgr, alt_ini, known_dev); - - if (!alt_ini->smcrv2.ib_dev_v2) - goto out; - - memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2, - SMC_GID_SIZE); - -out: - kfree(alt_ini); -} - -static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm *aclc, - struct smc_init_info *ini) -{ - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - struct smc_clc_first_contact_ext *fce = - (struct smc_clc_first_contact_ext *) - (((u8 *)clc_v2) + sizeof(*clc_v2)); - - if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1) - return 0; - - if (fce->v2_direct) { - memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN); - ini->smcrv2.uses_gateway = false; - } else { - if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr, - smc_ib_gid_to_ipv4(aclc->r0.lcl.gid), - ini->smcrv2.nexthop_mac, - &ini->smcrv2.uses_gateway)) - return SMC_CLC_DECL_NOROUTE; - if (!ini->smcrv2.uses_gateway) { - /* mismatch: peer claims indirect, but its direct */ - return SMC_CLC_DECL_NOINDIRECT; - } - } - return 0; -} - /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, @@ -1004,18 +912,11 @@ static int smc_connect_rdma(struct smc_sock *smc, { int i, reason_code = 0; struct smc_link *link; - u8 *eid = NULL; ini->is_smcd = false; + ini->ib_lcl = &aclc->r0.lcl; ini->ib_clcqpn = ntoh24(aclc->r0.qpn); ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; - memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN); - memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE); - memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN); - - reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini); - if (reason_code) - return reason_code; mutex_lock(&smc_client_lgr_pending); reason_code = smc_conn_create(smc, ini); @@ -1037,9 +938,8 @@ static int smc_connect_rdma(struct smc_sock *smc, if (l->peer_qpn == ntoh24(aclc->r0.qpn) && !memcmp(l->peer_gid, &aclc->r0.lcl.gid, SMC_GID_SIZE) && - (aclc->hdr.version > SMC_V1 || - !memcmp(l->peer_mac, &aclc->r0.lcl.mac, - sizeof(l->peer_mac)))) { + !memcmp(l->peer_mac, &aclc->r0.lcl.mac, + sizeof(l->peer_mac))) { link = l; break; } @@ -1058,7 +958,7 @@ static int smc_connect_rdma(struct smc_sock *smc, } if (ini->first_contact_local) - smc_link_save_peer_info(link, aclc, ini); + smc_link_save_peer_info(link, aclc); if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) { reason_code = SMC_CLC_DECL_ERR_RTOK; @@ -1081,18 +981,8 @@ static int smc_connect_rdma(struct smc_sock *smc, } smc_rmb_sync_sg_for_device(&smc->conn); - if (aclc->hdr.version > SMC_V1) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - - eid = clc_v2->r1.eid; - if (ini->first_contact_local) - smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist, - link->smcibdev, link->gid); - } - reason_code = smc_clc_send_confirm(smc, ini->first_contact_local, - aclc->hdr.version, eid, ini); + SMC_V1); if (reason_code) goto connect_abort; @@ -1132,7 +1022,7 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc, int i; for (i = 0; i < ini->ism_offered_cnt + 1; i++) { - if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) { + if (ini->ism_chid[i] == ntohs(aclc->chid)) { ini->ism_selected = i; return 0; } @@ -1146,7 +1036,6 @@ static int smc_connect_ism(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini) { - u8 *eid = NULL; int rc = 0; ini->is_smcd = true; @@ -1182,15 +1071,8 @@ static int smc_connect_ism(struct smc_sock *smc, smc_rx_init(smc); smc_tx_init(smc); - if (aclc->hdr.version > SMC_V1) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - - eid = clc_v2->d1.eid; - } - rc = smc_clc_send_confirm(smc, ini->first_contact_local, - aclc->hdr.version, eid, NULL); + aclc->hdr.version); if (rc) goto connect_abort; mutex_unlock(&smc_server_lgr_pending); @@ -1213,24 +1095,17 @@ static int smc_connect_ism(struct smc_sock *smc, static int smc_connect_check_aclc(struct smc_init_info *ini, struct smc_clc_msg_accept_confirm *aclc) { - if (aclc->hdr.typev1 != SMC_TYPE_R && - aclc->hdr.typev1 != SMC_TYPE_D) + if ((aclc->hdr.typev1 == SMC_TYPE_R && + !smcr_indicated(ini->smc_type_v1)) || + (aclc->hdr.typev1 == SMC_TYPE_D && + ((!smcd_indicated(ini->smc_type_v1) && + !smcd_indicated(ini->smc_type_v2)) || + (aclc->hdr.version == SMC_V1 && + !smcd_indicated(ini->smc_type_v1)) || + (aclc->hdr.version == SMC_V2 && + !smcd_indicated(ini->smc_type_v2))))) return SMC_CLC_DECL_MODEUNSUPP; - if (aclc->hdr.version >= SMC_V2) { - if ((aclc->hdr.typev1 == SMC_TYPE_R && - !smcr_indicated(ini->smc_type_v2)) || - (aclc->hdr.typev1 == SMC_TYPE_D && - !smcd_indicated(ini->smc_type_v2))) - return SMC_CLC_DECL_MODEUNSUPP; - } else { - if ((aclc->hdr.typev1 == SMC_TYPE_R && - !smcr_indicated(ini->smc_type_v1)) || - (aclc->hdr.typev1 == SMC_TYPE_D && - !smcd_indicated(ini->smc_type_v1))) - return SMC_CLC_DECL_MODEUNSUPP; - } - return 0; } @@ -1261,15 +1136,14 @@ static int __smc_connect(struct smc_sock *smc) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM, version); - ini->smcd_version = SMC_V1 | SMC_V2; - ini->smcr_version = SMC_V1 | SMC_V2; + ini->smcd_version = SMC_V1; + ini->smcd_version |= smc_ism_is_v2_capable() ? SMC_V2 : 0; ini->smc_type_v1 = SMC_TYPE_B; - ini->smc_type_v2 = SMC_TYPE_B; + ini->smc_type_v2 = smc_ism_is_v2_capable() ? SMC_TYPE_D : SMC_TYPE_N; /* get vlan id from IP device */ if (smc_vlan_by_tcpsk(smc->clcsock, ini)) { ini->smcd_version &= ~SMC_V1; - ini->smcr_version = 0; ini->smc_type_v1 = SMC_TYPE_N; if (!ini->smcd_version) { rc = SMC_CLC_DECL_GETVLANERR; @@ -1297,17 +1171,15 @@ static int __smc_connect(struct smc_sock *smc) /* check if smc modes and versions of CLC proposal and accept match */ rc = smc_connect_check_aclc(ini, aclc); version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2; + ini->smcd_version = version; if (rc) goto vlan_cleanup; /* depending on previous steps, connect using rdma or ism */ - if (aclc->hdr.typev1 == SMC_TYPE_R) { - ini->smcr_version = version; + if (aclc->hdr.typev1 == SMC_TYPE_R) rc = smc_connect_rdma(smc, aclc, ini); - } else if (aclc->hdr.typev1 == SMC_TYPE_D) { - ini->smcd_version = version; + else if (aclc->hdr.typev1 == SMC_TYPE_D) rc = smc_connect_ism(smc, aclc, ini); - } if (rc) goto vlan_cleanup; @@ -1588,7 +1460,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); /* initial contact - try to establish second link */ - smc_llc_srv_add_link(link, NULL); + smc_llc_srv_add_link(link); return 0; } @@ -1669,48 +1541,33 @@ static int smc_listen_v2_check(struct smc_sock *new_smc, ini->smc_type_v1 = pclc->hdr.typev1; ini->smc_type_v2 = pclc->hdr.typev2; - ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0; - ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0; - if (pclc->hdr.version > SMC_V1) { - if (smcd_indicated(ini->smc_type_v2)) - ini->smcd_version |= SMC_V2; - if (smcr_indicated(ini->smc_type_v2)) - ini->smcr_version |= SMC_V2; - } - if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) { + ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0; + if (pclc->hdr.version > SMC_V1) + ini->smcd_version |= + ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0; + if (!(ini->smcd_version & SMC_V2)) { rc = SMC_CLC_DECL_PEERNOSMC; goto out; } + if (!smc_ism_is_v2_capable()) { + ini->smcd_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOISM2SUPP; + goto out; + } pclc_v2_ext = smc_get_clc_v2_ext(pclc); if (!pclc_v2_ext) { ini->smcd_version &= ~SMC_V2; - ini->smcr_version &= ~SMC_V2; rc = SMC_CLC_DECL_NOV2EXT; goto out; } pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext); - if (ini->smcd_version & SMC_V2) { - if (!smc_ism_is_v2_capable()) { - ini->smcd_version &= ~SMC_V2; - rc = SMC_CLC_DECL_NOISM2SUPP; - } else if (!pclc_smcd_v2_ext) { - ini->smcd_version &= ~SMC_V2; - rc = SMC_CLC_DECL_NOV2DEXT; - } else if (!pclc_v2_ext->hdr.eid_cnt && - !pclc_v2_ext->hdr.flag.seid) { - ini->smcd_version &= ~SMC_V2; - rc = SMC_CLC_DECL_NOUEID; - } - } - if (ini->smcr_version & SMC_V2) { - if (!pclc_v2_ext->hdr.eid_cnt) { - ini->smcr_version &= ~SMC_V2; - rc = SMC_CLC_DECL_NOUEID; - } + if (!pclc_smcd_v2_ext) { + ini->smcd_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOV2DEXT; } out: - if (!ini->smcd_version && !ini->smcr_version) + if (!ini->smcd_version) return rc; return 0; @@ -1830,6 +1687,11 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, pclc_smcd = smc_get_clc_msg_smcd(pclc); smc_v2_ext = smc_get_clc_v2_ext(pclc); smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); + if (!smcd_v2_ext || + !smc_v2_ext->hdr.flag.seid) { /* no system EID support for SMCD */ + smc_find_ism_store_rc(SMC_CLC_DECL_NOSEID, ini); + goto not_found; + } mutex_lock(&smcd_dev_list.mutex); if (pclc_smcd->ism.chid) @@ -1847,16 +1709,14 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, } mutex_unlock(&smcd_dev_list.mutex); - if (!ini->ism_dev[0]) { - smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini); + if (ini->ism_dev[0]) { + smc_ism_get_system_eid(ini->ism_dev[0], &eid); + if (memcmp(eid, smcd_v2_ext->system_eid, SMC_MAX_EID_LEN)) + goto not_found; + } else { goto not_found; } - smc_ism_get_system_eid(&eid); - if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, - smcd_v2_ext->system_eid, eid)) - goto not_found; - /* separate - outside the smcd_dev_list.lock */ smcd_version = ini->smcd_version; for (i = 0; i < matches; i++) { @@ -1873,7 +1733,6 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, } /* no V2 ISM device could be initialized */ ini->smcd_version = smcd_version; /* restore original value */ - ini->negotiated_eid[0] = 0; not_found: ini->smcd_version &= ~SMC_V2; @@ -1903,7 +1762,6 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, not_found: smc_find_ism_store_rc(rc, ini); - ini->smcd_version &= ~SMC_V1; ini->ism_dev[0] = NULL; ini->is_smcd = false; } @@ -1922,69 +1780,24 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first) return 0; } -static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, - struct smc_clc_msg_proposal *pclc, - struct smc_init_info *ini) -{ - struct smc_clc_v2_extension *smc_v2_ext; - u8 smcr_version; - int rc; - - if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2)) - goto not_found; - - smc_v2_ext = smc_get_clc_v2_ext(pclc); - if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL)) - goto not_found; - - /* prepare RDMA check */ - memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN); - memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE); - memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN); - ini->check_smcrv2 = true; - ini->smcrv2.clc_sk = new_smc->clcsock->sk; - ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr; - ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce); - rc = smc_find_rdma_device(new_smc, ini); - if (rc) { - smc_find_ism_store_rc(rc, ini); - goto not_found; - } - if (!ini->smcrv2.uses_gateway) - memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN); - - smcr_version = ini->smcr_version; - ini->smcr_version = SMC_V2; - rc = smc_listen_rdma_init(new_smc, ini); - if (!rc) - rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local); - if (!rc) - return; - ini->smcr_version = smcr_version; - smc_find_ism_store_rc(rc, ini); - -not_found: - ini->smcr_version &= ~SMC_V2; - ini->check_smcrv2 = false; -} - static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { int rc; - if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1)) + if (!smcr_indicated(ini->smc_type_v1)) return SMC_CLC_DECL_NOSMCDEV; /* prepare RDMA check */ - memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN); - memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE); - memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN); + ini->ib_lcl = &pclc->lcl; rc = smc_find_rdma_device(new_smc, ini); if (rc) { /* no RDMA device found */ - return SMC_CLC_DECL_NOSMCDEV; + if (ini->smc_type_v1 == SMC_TYPE_B) + /* neither ISM nor RDMA device found */ + rc = SMC_CLC_DECL_NOSMCDEV; + return rc; } rc = smc_listen_rdma_init(new_smc, ini); if (rc) @@ -1997,60 +1810,51 @@ static int smc_listen_find_device(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { - int prfx_rc; + int rc; /* check for ISM device matching V2 proposed device */ smc_find_ism_v2_device_serv(new_smc, pclc, ini); if (ini->ism_dev[0]) return 0; - /* check for matching IP prefix and subnet length (V1) */ - prfx_rc = smc_listen_prfx_check(new_smc, pclc); - if (prfx_rc) - smc_find_ism_store_rc(prfx_rc, ini); + if (!(ini->smcd_version & SMC_V1)) + return ini->rc ?: SMC_CLC_DECL_NOSMCD2DEV; + + /* check for matching IP prefix and subnet length */ + rc = smc_listen_prfx_check(new_smc, pclc); + if (rc) + return ini->rc ?: rc; /* get vlan id from IP device */ if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) return ini->rc ?: SMC_CLC_DECL_GETVLANERR; /* check for ISM device matching V1 proposed device */ - if (!prfx_rc) - smc_find_ism_v1_device_serv(new_smc, pclc, ini); + smc_find_ism_v1_device_serv(new_smc, pclc, ini); if (ini->ism_dev[0]) return 0; - if (!smcr_indicated(pclc->hdr.typev1) && - !smcr_indicated(pclc->hdr.typev2)) + if (pclc->hdr.typev1 == SMC_TYPE_D) /* skip RDMA and decline */ return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV; - /* check if RDMA V2 is available */ - smc_find_rdma_v2_device_serv(new_smc, pclc, ini); - if (ini->smcrv2.ib_dev_v2) - return 0; + /* check if RDMA is available */ + rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini); + smc_find_ism_store_rc(rc, ini); - /* check if RDMA V1 is available */ - if (!prfx_rc) { - int rc; - - rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini); - smc_find_ism_store_rc(rc, ini); - return (!rc) ? 0 : ini->rc; - } - return SMC_CLC_DECL_NOSMCDEV; + return (!rc) ? 0 : ini->rc; } /* listen worker: finish RDMA setup */ static int smc_listen_rdma_finish(struct smc_sock *new_smc, struct smc_clc_msg_accept_confirm *cclc, - bool local_first, - struct smc_init_info *ini) + bool local_first) { struct smc_link *link = new_smc->conn.lnk; int reason_code = 0; if (local_first) - smc_link_save_peer_info(link, cclc, ini); + smc_link_save_peer_info(link, cclc); if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) return SMC_CLC_DECL_ERR_RTOK; @@ -2071,13 +1875,12 @@ static void smc_listen_work(struct work_struct *work) { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); + u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1; struct socket *newclcsock = new_smc->clcsock; struct smc_clc_msg_accept_confirm *cclc; struct smc_clc_msg_proposal_area *buf; struct smc_clc_msg_proposal *pclc; struct smc_init_info *ini = NULL; - u8 proposal_version = SMC_V1; - u8 accept_version; int rc = 0; if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) @@ -2111,9 +1914,7 @@ static void smc_listen_work(struct work_struct *work) SMC_CLC_PROPOSAL, CLC_WAIT_TIME); if (rc) goto out_decl; - - if (pclc->hdr.version > SMC_V1) - proposal_version = SMC_V2; + version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version; /* IPSec connections opt out of SMC optimizations */ if (using_ipsec(new_smc)) { @@ -2143,9 +1944,8 @@ static void smc_listen_work(struct work_struct *work) goto out_unlock; /* send SMC Accept CLC message */ - accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version; rc = smc_clc_send_accept(new_smc, ini->first_contact_local, - accept_version, ini->negotiated_eid); + ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1); if (rc) goto out_unlock; @@ -2167,7 +1967,7 @@ static void smc_listen_work(struct work_struct *work) /* finish worker */ if (!ini->is_smcd) { rc = smc_listen_rdma_finish(new_smc, cclc, - ini->first_contact_local, ini); + ini->first_contact_local); if (rc) goto out_unlock; mutex_unlock(&smc_server_lgr_pending); @@ -2181,7 +1981,7 @@ static void smc_listen_work(struct work_struct *work) mutex_unlock(&smc_server_lgr_pending); out_decl: smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0, - proposal_version); + version); out_free: kfree(ini); kfree(buf); @@ -2852,8 +2652,8 @@ static const struct proto_ops smc_sock_ops = { .splice_read = smc_splice_read, }; -static int __smc_create(struct net *net, struct socket *sock, int protocol, - int kern, struct socket *clcsock) +static int smc_create(struct net *net, struct socket *sock, int protocol, + int kern) { int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; struct smc_sock *smc; @@ -2878,19 +2678,12 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, smc = smc_sk(sk); smc->use_fallback = false; /* assume rdma capability first */ smc->fallback_rsn = 0; - - rc = 0; - if (!clcsock) { - rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, - &smc->clcsock); - if (rc) { - sk_common_release(sk); - goto out; - } - } else { - smc->clcsock = clcsock; + rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, + &smc->clcsock); + if (rc) { + sk_common_release(sk); + goto out; } - smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); @@ -2898,76 +2691,12 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, return rc; } -static int smc_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - return __smc_create(net, sock, protocol, kern, NULL); -} - static const struct net_proto_family smc_sock_family_ops = { .family = PF_SMC, .owner = THIS_MODULE, .create = smc_create, }; -static int smc_ulp_init(struct sock *sk) -{ - struct socket *tcp = sk->sk_socket; - struct net *net = sock_net(sk); - struct socket *smcsock; - int protocol, ret; - - /* only TCP can be replaced */ - if (tcp->type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP || - (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) - return -ESOCKTNOSUPPORT; - /* don't handle wq now */ - if (tcp->state != SS_UNCONNECTED || !tcp->file || tcp->wq.fasync_list) - return -ENOTCONN; - - if (sk->sk_family == AF_INET) - protocol = SMCPROTO_SMC; - else - protocol = SMCPROTO_SMC6; - - smcsock = sock_alloc(); - if (!smcsock) - return -ENFILE; - - smcsock->type = SOCK_STREAM; - __module_get(THIS_MODULE); /* tried in __tcp_ulp_find_autoload */ - ret = __smc_create(net, smcsock, protocol, 1, tcp); - if (ret) { - sock_release(smcsock); /* module_put() which ops won't be NULL */ - return ret; - } - - /* replace tcp socket to smc */ - smcsock->file = tcp->file; - smcsock->file->private_data = smcsock; - smcsock->file->f_inode = SOCK_INODE(smcsock); /* replace inode when sock_close */ - smcsock->file->f_path.dentry->d_inode = SOCK_INODE(smcsock); /* dput() in __fput */ - tcp->file = NULL; - - return ret; -} - -static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk, - const gfp_t priority) -{ - struct inet_connection_sock *icsk = inet_csk(newsk); - - /* don't inherit ulp ops to child when listen */ - icsk->icsk_ulp_ops = NULL; -} - -static struct tcp_ulp_ops smc_ulp_ops __read_mostly = { - .name = "smc", - .owner = THIS_MODULE, - .init = smc_ulp_init, - .clone = smc_ulp_clone, -}; - unsigned int smc_net_id; static __net_init int smc_net_init(struct net *net) @@ -3078,12 +2807,6 @@ static int __init smc_init(void) goto out_sock; } - rc = tcp_register_ulp(&smc_ulp_ops); - if (rc) { - pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); - goto out_sock; - } - static_branch_enable(&tcp_have_smc); return 0; @@ -3112,7 +2835,6 @@ static int __init smc_init(void) static void __exit smc_exit(void) { static_branch_disable(&tcp_have_smc); - tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); smc_ib_unregister_client(); @@ -3122,7 +2844,6 @@ static void __exit smc_exit(void) proto_unregister(&smc_proto); smc_pnet_exit(); smc_nl_exit(); - smc_clc_exit(); unregister_pernet_subsys(&smc_net_stat_ops); unregister_pernet_subsys(&smc_net_ops); rcu_barrier(); @@ -3135,4 +2856,3 @@ MODULE_AUTHOR("Ursula Braun "); MODULE_DESCRIPTION("smc socket address family"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_SMC); -MODULE_ALIAS_TCP_ULP("smc"); diff --git a/net/smc/smc.h b/net/smc/smc.h index 37b2001a02..930544f7b2 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -29,6 +29,9 @@ * devices */ +#define SMC_MAX_HOSTNAME_LEN 32 +#define SMC_MAX_EID_LEN 32 + extern struct proto smc_proto; extern struct proto smc_proto6; @@ -56,20 +59,7 @@ enum smc_state { /* possible states of an SMC socket */ struct smc_link_group; struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */ - union { - u8 type; -#if defined(__BIG_ENDIAN_BITFIELD) - struct { - u8 llc_version:4, - llc_type:4; - }; -#elif defined(__LITTLE_ENDIAN_BITFIELD) - struct { - u8 llc_type:4, - llc_version:4; - }; -#endif - }; + u8 type; } __aligned(1); struct smc_cdc_conn_state_flags { @@ -227,7 +217,6 @@ struct smc_connection { */ u64 peer_token; /* SMC-D token of peer */ u8 killed : 1; /* abnormal termination */ - u8 freed : 1; /* normal termiation */ u8 out_of_sync : 1; /* out of sync with peer */ }; @@ -323,12 +312,7 @@ static inline bool using_ipsec(struct smc_sock *smc) } #endif -struct smc_gidlist; - struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); void smc_close_non_accepted(struct sock *sk); -void smc_fill_gid_list(struct smc_link_group *lgr, - struct smc_gidlist *gidlist, - struct smc_ib_device *known_dev, u8 *known_gid); #endif /* __SMC_H */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 9d5a971689..84c8a4374f 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -197,8 +197,7 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) { int rc; - if (!smc_conn_lgr_valid(conn) || - (conn->lgr->is_smcd && conn->lgr->peer_shutdown)) + if (!conn->lgr || (conn->lgr->is_smcd && conn->lgr->peer_shutdown)) return -EPIPE; if (conn->lgr->is_smcd) { diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index ce27399b38..6ec1ebe878 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -26,12 +26,10 @@ #include "smc_clc.h" #include "smc_ib.h" #include "smc_ism.h" -#include "smc_netlink.h" #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 #define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78 -#define SMCR_CLC_ACCEPT_CONFIRM_LEN_V2 108 #define SMC_CLC_RECV_BUF_LEN 100 /* eye catcher "SMCR" EBCDIC for CLC messages */ @@ -41,297 +39,6 @@ static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN]; -struct smc_clc_eid_table { - rwlock_t lock; - struct list_head list; - u8 ueid_cnt; - u8 seid_enabled; -}; - -static struct smc_clc_eid_table smc_clc_eid_table; - -struct smc_clc_eid_entry { - struct list_head list; - u8 eid[SMC_MAX_EID_LEN]; -}; - -/* The size of a user EID is 32 characters. - * Valid characters should be (single-byte character set) A-Z, 0-9, '.' and '-'. - * Blanks should only be used to pad to the expected size. - * First character must be alphanumeric. - */ -static bool smc_clc_ueid_valid(char *ueid) -{ - char *end = ueid + SMC_MAX_EID_LEN; - - while (--end >= ueid && isspace(*end)) - ; - if (end < ueid) - return false; - if (!isalnum(*ueid) || islower(*ueid)) - return false; - while (ueid <= end) { - if ((!isalnum(*ueid) || islower(*ueid)) && *ueid != '.' && - *ueid != '-') - return false; - ueid++; - } - return true; -} - -static int smc_clc_ueid_add(char *ueid) -{ - struct smc_clc_eid_entry *new_ueid, *tmp_ueid; - int rc; - - if (!smc_clc_ueid_valid(ueid)) - return -EINVAL; - - /* add a new ueid entry to the ueid table if there isn't one */ - new_ueid = kzalloc(sizeof(*new_ueid), GFP_KERNEL); - if (!new_ueid) - return -ENOMEM; - memcpy(new_ueid->eid, ueid, SMC_MAX_EID_LEN); - - write_lock(&smc_clc_eid_table.lock); - if (smc_clc_eid_table.ueid_cnt >= SMC_MAX_UEID) { - rc = -ERANGE; - goto err_out; - } - list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) { - if (!memcmp(tmp_ueid->eid, ueid, SMC_MAX_EID_LEN)) { - rc = -EEXIST; - goto err_out; - } - } - list_add_tail(&new_ueid->list, &smc_clc_eid_table.list); - smc_clc_eid_table.ueid_cnt++; - write_unlock(&smc_clc_eid_table.lock); - return 0; - -err_out: - write_unlock(&smc_clc_eid_table.lock); - kfree(new_ueid); - return rc; -} - -int smc_clc_ueid_count(void) -{ - int count; - - read_lock(&smc_clc_eid_table.lock); - count = smc_clc_eid_table.ueid_cnt; - read_unlock(&smc_clc_eid_table.lock); - - return count; -} - -int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info) -{ - struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; - char *ueid; - - if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1) - return -EINVAL; - ueid = (char *)nla_data(nla_ueid); - - return smc_clc_ueid_add(ueid); -} - -/* remove one or all ueid entries from the table */ -static int smc_clc_ueid_remove(char *ueid) -{ - struct smc_clc_eid_entry *lst_ueid, *tmp_ueid; - int rc = -ENOENT; - - /* remove table entry */ - write_lock(&smc_clc_eid_table.lock); - list_for_each_entry_safe(lst_ueid, tmp_ueid, &smc_clc_eid_table.list, - list) { - if (!ueid || !memcmp(lst_ueid->eid, ueid, SMC_MAX_EID_LEN)) { - list_del(&lst_ueid->list); - smc_clc_eid_table.ueid_cnt--; - kfree(lst_ueid); - rc = 0; - } - } - if (!rc && !smc_clc_eid_table.ueid_cnt) { - smc_clc_eid_table.seid_enabled = 1; - rc = -EAGAIN; /* indicate success and enabling of seid */ - } - write_unlock(&smc_clc_eid_table.lock); - return rc; -} - -int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info) -{ - struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; - char *ueid; - - if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1) - return -EINVAL; - ueid = (char *)nla_data(nla_ueid); - - return smc_clc_ueid_remove(ueid); -} - -int smc_nl_flush_ueid(struct sk_buff *skb, struct genl_info *info) -{ - smc_clc_ueid_remove(NULL); - return 0; -} - -static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq, - u32 flags, char *ueid) -{ - char ueid_str[SMC_MAX_EID_LEN + 1]; - void *hdr; - - hdr = genlmsg_put(skb, portid, seq, &smc_gen_nl_family, - flags, SMC_NETLINK_DUMP_UEID); - if (!hdr) - return -ENOMEM; - snprintf(ueid_str, sizeof(ueid_str), "%s", ueid); - if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) { - genlmsg_cancel(skb, hdr); - return -EMSGSIZE; - } - genlmsg_end(skb, hdr); - return 0; -} - -static int _smc_nl_ueid_dump(struct sk_buff *skb, u32 portid, u32 seq, - int start_idx) -{ - struct smc_clc_eid_entry *lst_ueid; - int idx = 0; - - read_lock(&smc_clc_eid_table.lock); - list_for_each_entry(lst_ueid, &smc_clc_eid_table.list, list) { - if (idx++ < start_idx) - continue; - if (smc_nl_ueid_dumpinfo(skb, portid, seq, NLM_F_MULTI, - lst_ueid->eid)) { - --idx; - break; - } - } - read_unlock(&smc_clc_eid_table.lock); - return idx; -} - -int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); - int idx; - - idx = _smc_nl_ueid_dump(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, cb_ctx->pos[0]); - - cb_ctx->pos[0] = idx; - return skb->len; -} - -int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); - char seid_str[SMC_MAX_EID_LEN + 1]; - u8 seid_enabled; - void *hdr; - u8 *seid; - - if (cb_ctx->pos[0]) - return skb->len; - - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - &smc_gen_nl_family, NLM_F_MULTI, - SMC_NETLINK_DUMP_SEID); - if (!hdr) - return -ENOMEM; - if (!smc_ism_is_v2_capable()) - goto end; - - smc_ism_get_system_eid(&seid); - snprintf(seid_str, sizeof(seid_str), "%s", seid); - if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str)) - goto err; - read_lock(&smc_clc_eid_table.lock); - seid_enabled = smc_clc_eid_table.seid_enabled; - read_unlock(&smc_clc_eid_table.lock); - if (nla_put_u8(skb, SMC_NLA_SEID_ENABLED, seid_enabled)) - goto err; -end: - genlmsg_end(skb, hdr); - cb_ctx->pos[0]++; - return skb->len; -err: - genlmsg_cancel(skb, hdr); - return -EMSGSIZE; -} - -int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info) -{ - write_lock(&smc_clc_eid_table.lock); - smc_clc_eid_table.seid_enabled = 1; - write_unlock(&smc_clc_eid_table.lock); - return 0; -} - -int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info) -{ - int rc = 0; - - write_lock(&smc_clc_eid_table.lock); - if (!smc_clc_eid_table.ueid_cnt) - rc = -ENOENT; - else - smc_clc_eid_table.seid_enabled = 0; - write_unlock(&smc_clc_eid_table.lock); - return rc; -} - -static bool _smc_clc_match_ueid(u8 *peer_ueid) -{ - struct smc_clc_eid_entry *tmp_ueid; - - list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) { - if (!memcmp(tmp_ueid->eid, peer_ueid, SMC_MAX_EID_LEN)) - return true; - } - return false; -} - -bool smc_clc_match_eid(u8 *negotiated_eid, - struct smc_clc_v2_extension *smc_v2_ext, - u8 *peer_eid, u8 *local_eid) -{ - bool match = false; - int i; - - negotiated_eid[0] = 0; - read_lock(&smc_clc_eid_table.lock); - if (peer_eid && local_eid && - smc_clc_eid_table.seid_enabled && - smc_v2_ext->hdr.flag.seid && - !memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) { - memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN); - match = true; - goto out; - } - - for (i = 0; i < smc_v2_ext->hdr.eid_cnt; i++) { - if (_smc_clc_match_ueid(smc_v2_ext->user_eids[i])) { - memcpy(negotiated_eid, smc_v2_ext->user_eids[i], - SMC_MAX_EID_LEN); - match = true; - goto out; - } - } -out: - read_unlock(&smc_clc_eid_table.lock); - return match; -} - /* check arriving CLC proposal */ static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc) { @@ -393,27 +100,6 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 + sizeof(struct smc_clc_first_contact_ext))) return false; - if (hdr->typev1 == SMC_TYPE_R && - ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) - return false; - } - return true; -} - -/* check arriving CLC decline */ -static bool -smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc) -{ - struct smc_clc_msg_hdr *hdr = &dclc->hdr; - - if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) - return false; - if (hdr->version == SMC_V1) { - if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline)) - return false; - } else { - if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline_v2)) - return false; } return true; } @@ -459,9 +145,9 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) break; case SMC_CLC_DECLINE: dclc = (struct smc_clc_msg_decline *)clcm; - if (!smc_clc_msg_decl_valid(dclc)) + if (ntohs(dclc->hdr.length) != sizeof(*dclc)) return false; - check_trl = false; + trl = &dclc->trl; break; default: return false; @@ -760,41 +446,32 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, /* send CLC DECLINE message across internal TCP socket */ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) { - struct smc_clc_msg_decline *dclc_v1; - struct smc_clc_msg_decline_v2 dclc; + struct smc_clc_msg_decline dclc; struct msghdr msg; - int len, send_len; struct kvec vec; + int len; - dclc_v1 = (struct smc_clc_msg_decline *)&dclc; memset(&dclc, 0, sizeof(dclc)); memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); dclc.hdr.type = SMC_CLC_DECLINE; + dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); dclc.hdr.version = version; dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? SMC_FIRST_CONTACT_MASK : 0; - if ((!smc_conn_lgr_valid(&smc->conn) || !smc->conn.lgr->is_smcd) && + if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) && smc_ib_is_valid_local_systemid()) memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); dclc.peer_diagnosis = htonl(peer_diag_info); - if (version == SMC_V1) { - memcpy(dclc_v1->trl.eyecatcher, SMC_EYECATCHER, - sizeof(SMC_EYECATCHER)); - send_len = sizeof(*dclc_v1); - } else { - memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, - sizeof(SMC_EYECATCHER)); - send_len = sizeof(dclc); - } - dclc.hdr.length = htons(send_len); + memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); vec.iov_base = &dclc; - vec.iov_len = send_len; - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, send_len); - if (len < 0 || len < send_len) + vec.iov_len = sizeof(struct smc_clc_msg_decline); + len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, + sizeof(struct smc_clc_msg_decline)); + if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) len = -EPROTO; return len > 0 ? 0 : len; } @@ -874,10 +551,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) if (ini->smc_type_v2 == SMC_TYPE_N) { pclc_smcd->v2_ext_offset = 0; } else { - struct smc_clc_eid_entry *ueident; u16 v2_ext_offset; + u8 *eid = NULL; - v2_ext->hdr.flag.release = SMC_RELEASE; v2_ext_offset = sizeof(*pclc_smcd) - offsetofend(struct smc_clc_msg_smcd, v2_ext_offset); if (ini->smc_type_v1 != SMC_TYPE_N) @@ -885,31 +561,21 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) pclc_prfx->ipv6_prefixes_cnt * sizeof(ipv6_prfx[0]); pclc_smcd->v2_ext_offset = htons(v2_ext_offset); - plen += sizeof(*v2_ext); - - read_lock(&smc_clc_eid_table.lock); - v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt; - plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN; - i = 0; - list_for_each_entry(ueident, &smc_clc_eid_table.list, list) { - memcpy(v2_ext->user_eids[i++], ueident->eid, - sizeof(ueident->eid)); - } - read_unlock(&smc_clc_eid_table.lock); - } - if (smcd_indicated(ini->smc_type_v2)) { - u8 *eid = NULL; - - v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled; + v2_ext->hdr.eid_cnt = 0; v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; + v2_ext->hdr.flag.release = SMC_RELEASE; + v2_ext->hdr.flag.seid = 1; v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - offsetofend(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) + v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); - smc_ism_get_system_eid(&eid); - if (eid && v2_ext->hdr.flag.seid) + if (ini->ism_dev[0]) + smc_ism_get_system_eid(ini->ism_dev[0], &eid); + else + smc_ism_get_system_eid(ini->ism_dev[1], &eid); + if (eid) memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN); - plen += sizeof(*smcd_v2_ext); + plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext); if (ini->ism_offered_cnt) { for (i = 1; i <= ini->ism_offered_cnt; i++) { gidchids[i - 1].gid = @@ -921,9 +587,6 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) sizeof(struct smc_clc_smcd_gid_chid); } } - if (smcr_indicated(ini->smc_type_v2)) - memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE); - pclc_base->hdr.length = htons(plen); memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); @@ -945,16 +608,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) } if (ini->smc_type_v2 != SMC_TYPE_N) { vec[i].iov_base = v2_ext; - vec[i++].iov_len = sizeof(*v2_ext) + - (v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); - if (smcd_indicated(ini->smc_type_v2)) { - vec[i].iov_base = smcd_v2_ext; - vec[i++].iov_len = sizeof(*smcd_v2_ext); - if (ini->ism_offered_cnt) { - vec[i].iov_base = gidchids; - vec[i++].iov_len = ini->ism_offered_cnt * + vec[i++].iov_len = sizeof(*v2_ext); + vec[i].iov_base = smcd_v2_ext; + vec[i++].iov_len = sizeof(*smcd_v2_ext); + if (ini->ism_offered_cnt) { + vec[i].iov_base = gidchids; + vec[i++].iov_len = ini->ism_offered_cnt * sizeof(struct smc_clc_smcd_gid_chid); - } } } vec[i].iov_base = trl; @@ -976,15 +636,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) /* build and send CLC CONFIRM / ACCEPT message */ static int smc_clc_send_confirm_accept(struct smc_sock *smc, struct smc_clc_msg_accept_confirm_v2 *clc_v2, - int first_contact, u8 version, - u8 *eid, struct smc_init_info *ini) + int first_contact, u8 version) { struct smc_connection *conn = &smc->conn; struct smc_clc_msg_accept_confirm *clc; struct smc_clc_first_contact_ext fce; - struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; - struct kvec vec[5]; + struct kvec vec[3]; struct msghdr msg; int i, len; @@ -1006,10 +664,12 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, if (version == SMC_V1) { clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); } else { - clc_v2->d1.chid = - htons(smc_ism_get_chid(conn->lgr->smcd)); - if (eid && eid[0]) - memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN); + u8 *eid = NULL; + + clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd)); + smc_ism_get_system_eid(conn->lgr->smcd, &eid); + if (eid) + memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN); len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; if (first_contact) smc_clc_fill_fce(&fce, &len); @@ -1021,6 +681,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, struct smc_link *link = conn->lnk; /* SMC-R specific settings */ + link = conn->lnk; memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); clc->hdr.typev1 = SMC_TYPE_R; @@ -1047,26 +708,6 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address (conn->rmb_desc->sgt[link->link_idx].sgl)); hton24(clc->r0.psn, link->psn_initial); - if (version == SMC_V1) { - clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); - } else { - if (eid && eid[0]) - memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN); - len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2; - if (first_contact) { - smc_clc_fill_fce(&fce, &len); - fce.v2_direct = !link->lgr->uses_gateway; - memset(&gle, 0, sizeof(gle)); - if (ini && clc->hdr.type == SMC_CLC_CONFIRM) { - gle.gid_cnt = ini->smcrv2.gidlist.len; - len += sizeof(gle); - len += gle.gid_cnt * sizeof(gle.gid[0]); - } else { - len += sizeof(gle.reserved); - } - } - clc_v2->hdr.length = htons(len); - } memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); } @@ -1074,10 +715,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, i = 0; vec[i].iov_base = clc_v2; if (version > SMC_V1) - vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? - SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 : - SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) - - sizeof(trl); + vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl); else vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? SMCD_CLC_ACCEPT_CONFIRM_LEN : @@ -1086,18 +724,6 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, if (version > SMC_V1 && first_contact) { vec[i].iov_base = &fce; vec[i++].iov_len = sizeof(fce); - if (!conn->lgr->is_smcd) { - if (clc->hdr.type == SMC_CLC_CONFIRM) { - vec[i].iov_base = &gle; - vec[i++].iov_len = sizeof(gle); - vec[i].iov_base = &ini->smcrv2.gidlist.list; - vec[i++].iov_len = gle.gid_cnt * - sizeof(gle.gid[0]); - } else { - vec[i].iov_base = &gle.reserved; - vec[i++].iov_len = sizeof(gle.reserved); - } - } } vec[i].iov_base = &trl; vec[i++].iov_len = sizeof(trl); @@ -1107,7 +733,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, /* send CLC CONFIRM message across internal TCP socket */ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, - u8 version, u8 *eid, struct smc_init_info *ini) + u8 version) { struct smc_clc_msg_accept_confirm_v2 cclc_v2; int reason_code = 0; @@ -1117,7 +743,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, memset(&cclc_v2, 0, sizeof(cclc_v2)); cclc_v2.hdr.type = SMC_CLC_CONFIRM; len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, - version, eid, ini); + version); if (len < ntohs(cclc_v2.hdr.length)) { if (len >= 0) { reason_code = -ENETUNREACH; @@ -1132,7 +758,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, /* send CLC ACCEPT message across internal TCP socket */ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, - u8 version, u8 *negotiated_eid) + u8 version) { struct smc_clc_msg_accept_confirm_v2 aclc_v2; int len; @@ -1140,7 +766,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, memset(&aclc_v2, 0, sizeof(aclc_v2)); aclc_v2.hdr.type = SMC_CLC_ACCEPT; len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, - version, negotiated_eid, NULL); + version); if (len < ntohs(aclc_v2.hdr.length)) len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; @@ -1160,14 +786,4 @@ void __init smc_clc_init(void) u = utsname(); memcpy(smc_hostname, u->nodename, min_t(size_t, strlen(u->nodename), sizeof(smc_hostname))); - - INIT_LIST_HEAD(&smc_clc_eid_table.list); - rwlock_init(&smc_clc_eid_table.lock); - smc_clc_eid_table.ueid_cnt = 0; - smc_clc_eid_table.seid_enabled = 1; -} - -void smc_clc_exit(void) -{ - smc_clc_ueid_remove(NULL); } diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 83f02f131f..32d37f7b70 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -14,10 +14,8 @@ #define _SMC_CLC_H #include -#include #include "smc.h" -#include "smc_netlink.h" #define SMC_CLC_PROPOSAL 0x01 #define SMC_CLC_ACCEPT 0x02 @@ -44,7 +42,6 @@ #define SMC_CLC_DECL_NOV2DEXT 0x03030005 /* peer sent no clc SMC-Dv2 ext. */ #define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */ #define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */ -#define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */ #define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/ #define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */ #define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */ @@ -55,8 +52,6 @@ #define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */ #define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */ #define SMC_CLC_DECL_MAX_DMB 0x030d0000 /* SMC-D DMB limit exceeded */ -#define SMC_CLC_DECL_NOROUTE 0x030e0000 /* SMC-Rv2 conn. no route to peer */ -#define SMC_CLC_DECL_NOINDIRECT 0x030f0000 /* SMC-Rv2 conn. indirect mismatch*/ #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ #define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ #define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ @@ -163,7 +158,6 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ } __aligned(4); #define SMC_CLC_MAX_V6_PREFIX 8 -#define SMC_CLC_MAX_UEID 8 struct smc_clc_msg_proposal_area { struct smc_clc_msg_proposal pclc_base; @@ -171,7 +165,6 @@ struct smc_clc_msg_proposal_area { struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX]; struct smc_clc_v2_extension pclc_v2_ext; - u8 user_eids[SMC_CLC_MAX_UEID][SMC_MAX_EID_LEN]; struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext; struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS]; struct smc_clc_msg_trail pclc_trl; @@ -216,14 +209,11 @@ struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */ #define SMC_CLC_OS_AIX 3 struct smc_clc_first_contact_ext { + u8 reserved1; #if defined(__BIG_ENDIAN_BITFIELD) - u8 v2_direct : 1, - reserved : 7; u8 os_type : 4, release : 4; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 reserved : 7, - v2_direct : 1; u8 release : 4, os_type : 4; #endif @@ -231,13 +221,6 @@ struct smc_clc_first_contact_ext { u8 hostname[SMC_MAX_HOSTNAME_LEN]; }; -struct smc_clc_fce_gid_ext { - u8 reserved[16]; - u8 gid_cnt; - u8 reserved2[3]; - u8 gid[][SMC_GID_SIZE]; -}; - struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; union { @@ -252,17 +235,13 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; union { - struct { /* SMC-R */ - struct smcr_clc_msg_accept_confirm r0; - u8 eid[SMC_MAX_EID_LEN]; - u8 reserved6[8]; - } r1; + struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ struct { /* SMC-D */ struct smcd_clc_msg_accept_confirm_common d0; __be16 chid; u8 eid[SMC_MAX_EID_LEN]; u8 reserved5[8]; - } d1; + }; }; }; @@ -281,24 +260,6 @@ struct smc_clc_msg_decline { /* clc decline message */ struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */ } __aligned(4); -#define SMC_DECL_DIAG_COUNT_V2 4 /* no. of additional peer diagnosis codes */ - -struct smc_clc_msg_decline_v2 { /* clc decline message */ - struct smc_clc_msg_hdr hdr; - u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */ - __be32 peer_diagnosis; /* diagnosis information */ -#if defined(__BIG_ENDIAN_BITFIELD) - u8 os_type : 4, - reserved : 4; -#elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 reserved : 4, - os_type : 4; -#endif - u8 reserved2[3]; - __be32 peer_diagnosis_v2[SMC_DECL_DIAG_COUNT_V2]; - struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */ -} __aligned(4); - /* determine start of the prefix area within the proposal message */ static inline struct smc_clc_msg_proposal_prefix * smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) @@ -317,17 +278,6 @@ static inline bool smcd_indicated(int smc_type) return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B; } -static inline u8 smc_indicated_type(int is_smcd, int is_smcr) -{ - if (is_smcd && is_smcr) - return SMC_TYPE_B; - if (is_smcd) - return SMC_TYPE_D; - if (is_smcr) - return SMC_TYPE_R; - return SMC_TYPE_N; -} - /* get SMC-D info from proposal message */ static inline struct smc_clc_msg_smcd * smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop) @@ -380,22 +330,10 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version); int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini); int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, - u8 version, u8 *eid, struct smc_init_info *ini); + u8 version); int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, - u8 version, u8 *negotiated_eid); + u8 version); void smc_clc_init(void) __init; -void smc_clc_exit(void); void smc_clc_get_hostname(u8 **host); -bool smc_clc_match_eid(u8 *negotiated_eid, - struct smc_clc_v2_extension *smc_v2_ext, - u8 *peer_eid, u8 *local_eid); -int smc_clc_ueid_count(void); -int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb); -int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info); -int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info); -int smc_nl_flush_ueid(struct sk_buff *skb, struct genl_info *info); -int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb); -int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info); -int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info); #endif diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 292e4d904a..84102db5bb 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -364,9 +364,9 @@ static void smc_close_passive_work(struct work_struct *work) if (rxflags->peer_conn_abort) { /* peer has not received all data */ smc_close_passive_abort_received(smc); - release_sock(sk); + release_sock(&smc->sk); cancel_delayed_work_sync(&conn->tx_work); - lock_sock(sk); + lock_sock(&smc->sk); goto wakeup; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 29525d03b2..dee336eef6 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -34,7 +34,6 @@ #include "smc_ism.h" #include "smc_netlink.h" #include "smc_stats.h" -#include "smc_tracepoint.h" #define SMC_LGR_NUM_INCR 256 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) @@ -211,13 +210,14 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; - if (!smc_conn_lgr_valid(conn)) + if (!lgr) return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { __smc_lgr_unregister_conn(conn); } write_unlock_bh(&lgr->conns_lock); + conn->lgr = NULL; } int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) @@ -225,6 +225,7 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); char hostname[SMC_MAX_HOSTNAME_LEN + 1]; char smc_seid[SMC_MAX_EID_LEN + 1]; + struct smcd_dev *smcd_dev; struct nlattr *attrs; u8 *seid = NULL; u8 *host = NULL; @@ -246,8 +247,6 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) goto errattr; if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable())) goto errattr; - if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true)) - goto errattr; smc_clc_get_hostname(&host); if (host) { memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN); @@ -255,8 +254,13 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname)) goto errattr; } - if (smc_ism_is_v2_capable()) { - smc_ism_get_system_eid(&seid); + mutex_lock(&smcd_dev_list.mutex); + smcd_dev = list_first_entry_or_null(&smcd_dev_list.list, + struct smcd_dev, list); + if (smcd_dev) + smc_ism_get_system_eid(smcd_dev, &seid); + mutex_unlock(&smcd_dev_list.mutex); + if (seid && smc_ism_is_v2_capable()) { memcpy(smc_seid, seid, SMC_MAX_EID_LEN); smc_seid[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid)) @@ -275,65 +279,12 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -/* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */ -static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr, - struct sk_buff *skb, - struct netlink_callback *cb, - struct nlattr *v2_attrs) -{ - char smc_host[SMC_MAX_HOSTNAME_LEN + 1]; - char smc_eid[SMC_MAX_EID_LEN + 1]; - - if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version)) - goto errv2attr; - if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release)) - goto errv2attr; - if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os)) - goto errv2attr; - memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN); - smc_host[SMC_MAX_HOSTNAME_LEN] = 0; - if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host)) - goto errv2attr; - memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN); - smc_eid[SMC_MAX_EID_LEN] = 0; - if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid)) - goto errv2attr; - - nla_nest_end(skb, v2_attrs); - return 0; - -errv2attr: - nla_nest_cancel(skb, v2_attrs); - return -EMSGSIZE; -} - -static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr, - struct sk_buff *skb, - struct netlink_callback *cb) -{ - struct nlattr *v2_attrs; - - v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2); - if (!v2_attrs) - goto errattr; - if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway)) - goto errv2attr; - - nla_nest_end(skb, v2_attrs); - return 0; - -errv2attr: - nla_nest_cancel(skb, v2_attrs); -errattr: - return -EMSGSIZE; -} - static int smc_nl_fill_lgr(struct smc_link_group *lgr, struct sk_buff *skb, struct netlink_callback *cb) { char smc_target[SMC_MAX_PNETID_LEN + 1]; - struct nlattr *attrs, *v2_attrs; + struct nlattr *attrs; attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR); if (!attrs) @@ -349,22 +300,10 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr, goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id)) goto errattr; - if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE, - lgr->net->net_cookie, SMC_NLA_LGR_R_PAD)) - goto errattr; memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN); smc_target[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target)) goto errattr; - if (lgr->smc_version > SMC_V1) { - v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON); - if (!v2_attrs) - goto errattr; - if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs)) - goto errattr; - if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb)) - goto errattr; - } nla_nest_end(skb, attrs); return 0; @@ -497,7 +436,10 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, struct sk_buff *skb, struct netlink_callback *cb) { + char smc_host[SMC_MAX_HOSTNAME_LEN + 1]; char smc_pnet[SMC_MAX_PNETID_LEN + 1]; + char smc_eid[SMC_MAX_EID_LEN + 1]; + struct nlattr *v2_attrs; struct nlattr *attrs; void *nlh; @@ -529,19 +471,32 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet)) goto errattr; - if (lgr->smc_version > SMC_V1) { - struct nlattr *v2_attrs; - v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON); - if (!v2_attrs) - goto errattr; - if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs)) - goto errattr; - } + v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_V2); + if (!v2_attrs) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version)) + goto errv2attr; + if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release)) + goto errv2attr; + if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os)) + goto errv2attr; + memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN); + smc_host[SMC_MAX_HOSTNAME_LEN] = 0; + if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host)) + goto errv2attr; + memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN); + smc_eid[SMC_MAX_EID_LEN] = 0; + if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid)) + goto errv2attr; + + nla_nest_end(skb, v2_attrs); nla_nest_end(skb, attrs); genlmsg_end(skb, nlh); return 0; +errv2attr: + nla_nest_cancel(skb, v2_attrs); errattr: nla_nest_cancel(skb, attrs); errout: @@ -735,33 +690,24 @@ static void smcr_copy_dev_info_to_link(struct smc_link *link) int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini) { - struct smc_ib_device *smcibdev; u8 rndvec[3]; int rc; - if (lgr->smc_version == SMC_V2) { - lnk->smcibdev = ini->smcrv2.ib_dev_v2; - lnk->ibport = ini->smcrv2.ib_port_v2; - } else { - lnk->smcibdev = ini->ib_dev; - lnk->ibport = ini->ib_port; - } - get_device(&lnk->smcibdev->ibdev->dev); - atomic_inc(&lnk->smcibdev->lnk_cnt); - refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */ - lnk->clearing = 0; - lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; + get_device(&ini->ib_dev->ibdev->dev); + atomic_inc(&ini->ib_dev->lnk_cnt); lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; - smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */ lnk->link_idx = link_idx; + lnk->smcibdev = ini->ib_dev; + lnk->ibport = ini->ib_port; smc_ibdev_cnt_inc(lnk); smcr_copy_dev_info_to_link(lnk); + lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; atomic_set(&lnk->conn_cnt, 0); smc_llc_link_set_uid(lnk); INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); - if (!lnk->smcibdev->initialized) { - rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev); + if (!ini->ib_dev->initialized) { + rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); if (rc) goto out; } @@ -769,9 +715,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, - ini->vlan_id, lnk->gid, &lnk->sgid_index, - lgr->smc_version == SMC_V2 ? - &ini->smcrv2 : NULL); + ini->vlan_id, lnk->gid, &lnk->sgid_index); if (rc) goto out; rc = smc_llc_link_init(lnk); @@ -802,13 +746,11 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, smc_llc_link_clear(lnk, false); out: smc_ibdev_cnt_dec(lnk); - put_device(&lnk->smcibdev->ibdev->dev); - smcibdev = lnk->smcibdev; + put_device(&ini->ib_dev->ibdev->dev); memset(lnk, 0, sizeof(struct smc_link)); lnk->state = SMC_LNK_UNUSED; - if (!atomic_dec_return(&smcibdev->lnk_cnt)) - wake_up(&smcibdev->lnks_deleted); - smc_lgr_put(lgr); /* lgr_hold above */ + if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) + wake_up(&ini->ib_dev->lnks_deleted); return rc; } @@ -847,7 +789,6 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->terminating = 0; lgr->freeing = 0; lgr->vlan_id = ini->vlan_id; - refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */ mutex_init(&lgr->sndbufs_lock); mutex_init(&lgr->rmbs_lock); rwlock_init(&lgr->conns_lock); @@ -873,38 +814,18 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt); } else { /* SMC-R specific settings */ - struct smc_ib_device *ibdev; - int ibport; - lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; - lgr->smc_version = ini->smcr_version; - memcpy(lgr->peer_systemid, ini->peer_systemid, + memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, SMC_SYSTEMID_LEN); - if (lgr->smc_version == SMC_V2) { - ibdev = ini->smcrv2.ib_dev_v2; - ibport = ini->smcrv2.ib_port_v2; - lgr->saddr = ini->smcrv2.saddr; - lgr->uses_gateway = ini->smcrv2.uses_gateway; - memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac, - ETH_ALEN); - } else { - ibdev = ini->ib_dev; - ibport = ini->ib_port; - } - memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], + memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1], SMC_MAX_PNETID_LEN); - if (smc_wr_alloc_lgr_mem(lgr)) - goto free_wq; smc_llc_lgr_init(lgr, smc); link_idx = SMC_SINGLE_LINK; lnk = &lgr->lnk[link_idx]; rc = smcr_link_init(lgr, lnk, link_idx, ini); - if (rc) { - smc_wr_free_lgr_mem(lgr); + if (rc) goto free_wq; - } - lgr->net = smc_ib_net(lnk->smcibdev); lgr_list = &smc_lgr_list.list; lgr_lock = &smc_lgr_list.lock; atomic_inc(&lgr_cnt); @@ -1000,12 +921,8 @@ void smc_switch_link_and_count(struct smc_connection *conn, struct smc_link *to_lnk) { atomic_dec(&conn->lnk->conn_cnt); - /* link_hold in smc_conn_create() */ - smcr_link_put(conn->lnk); conn->lnk = to_lnk; atomic_inc(&conn->lnk->conn_cnt); - /* link_put in smc_conn_free() */ - smcr_link_hold(conn->lnk); } struct smc_link *smc_switch_conns(struct smc_link_group *lgr, @@ -1113,24 +1030,18 @@ static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, smc_buf_free(lgr, true, rmb_desc); } else { rmb_desc->used = 0; - memset(rmb_desc->cpu_addr, 0, rmb_desc->len); } } static void smc_buf_unuse(struct smc_connection *conn, struct smc_link_group *lgr) { - if (conn->sndbuf_desc) { + if (conn->sndbuf_desc) conn->sndbuf_desc->used = 0; - memset(conn->sndbuf_desc->cpu_addr, 0, conn->sndbuf_desc->len); - } - if (conn->rmb_desc && lgr->is_smcd) { + if (conn->rmb_desc && lgr->is_smcd) conn->rmb_desc->used = 0; - memset(conn->rmb_desc->cpu_addr, 0, conn->rmb_desc->len + - sizeof(struct smcd_cdc_msg)); - } else if (conn->rmb_desc) { + else if (conn->rmb_desc) smcr_buf_unuse(conn->rmb_desc, lgr); - } } /* remove a finished connection from its link group */ @@ -1138,19 +1049,8 @@ void smc_conn_free(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; - if (!lgr || conn->freed) - /* Connection has never been registered in a - * link group, or has already been freed. - */ + if (!lgr) return; - - conn->freed = 1; - if (!smc_conn_lgr_valid(conn)) - /* Connection has already unregistered from - * link group. - */ - goto lgr_put; - if (lgr->is_smcd) { if (!list_empty(&lgr->list)) smc_ism_unset_conn(conn); @@ -1161,16 +1061,12 @@ void smc_conn_free(struct smc_connection *conn) cancel_work_sync(&conn->abort_work); } if (!list_empty(&lgr->list)) { - smc_lgr_unregister_conn(conn); smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + smc_lgr_unregister_conn(conn); } if (!lgr->conns_num) smc_lgr_schedule_free_work(lgr); -lgr_put: - if (!lgr->is_smcd) - smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */ - smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */ } /* unregister a link from a buf_desc */ @@ -1226,29 +1122,13 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk) } } -static void __smcr_link_clear(struct smc_link *lnk) -{ - struct smc_link_group *lgr = lnk->lgr; - struct smc_ib_device *smcibdev; - - smc_wr_free_link_mem(lnk); - smc_ibdev_cnt_dec(lnk); - put_device(&lnk->smcibdev->ibdev->dev); - smcibdev = lnk->smcibdev; - memset(lnk, 0, sizeof(struct smc_link)); - lnk->state = SMC_LNK_UNUSED; - if (!atomic_dec_return(&smcibdev->lnk_cnt)) - wake_up(&smcibdev->lnks_deleted); - smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */ -} - /* must be called under lgr->llc_conf_mutex lock */ void smcr_link_clear(struct smc_link *lnk, bool log) { - if (!lnk->lgr || lnk->clearing || - lnk->state == SMC_LNK_UNUSED) + struct smc_ib_device *smcibdev; + + if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) return; - lnk->clearing = 1; lnk->peer_qpn = 0; smc_llc_link_clear(lnk, log); smcr_buf_unmap_lgr(lnk); @@ -1257,18 +1137,14 @@ void smcr_link_clear(struct smc_link *lnk, bool log) smc_wr_free_link(lnk); smc_ib_destroy_queue_pair(lnk); smc_ib_dealloc_protection_domain(lnk); - smcr_link_put(lnk); /* theoretically last link_put */ -} - -void smcr_link_hold(struct smc_link *lnk) -{ - refcount_inc(&lnk->refcnt); -} - -void smcr_link_put(struct smc_link *lnk) -{ - if (refcount_dec_and_test(&lnk->refcnt)) - __smcr_link_clear(lnk); + smc_wr_free_link_mem(lnk); + smc_ibdev_cnt_dec(lnk); + put_device(&lnk->smcibdev->ibdev->dev); + smcibdev = lnk->smcibdev; + memset(lnk, 0, sizeof(struct smc_link)); + lnk->state = SMC_LNK_UNUSED; + if (!atomic_dec_return(&smcibdev->lnk_cnt)) + wake_up(&smcibdev->lnks_deleted); } static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, @@ -1333,21 +1209,6 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) __smc_lgr_free_bufs(lgr, true); } -/* won't be freed until no one accesses to lgr anymore */ -static void __smc_lgr_free(struct smc_link_group *lgr) -{ - smc_lgr_free_bufs(lgr); - if (lgr->is_smcd) { - if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) - wake_up(&lgr->smcd->lgrs_deleted); - } else { - smc_wr_free_lgr_mem(lgr); - if (!atomic_dec_return(&lgr_cnt)) - wake_up(&lgrs_deleted); - } - kfree(lgr); -} - /* remove a link group */ static void smc_lgr_free(struct smc_link_group *lgr) { @@ -1363,23 +1224,18 @@ static void smc_lgr_free(struct smc_link_group *lgr) smc_llc_lgr_clear(lgr); } + smc_lgr_free_bufs(lgr); destroy_workqueue(lgr->tx_wq); if (lgr->is_smcd) { smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); put_device(&lgr->smcd->dev); + if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) + wake_up(&lgr->smcd->lgrs_deleted); + } else { + if (!atomic_dec_return(&lgr_cnt)) + wake_up(&lgrs_deleted); } - smc_lgr_put(lgr); /* theoretically last lgr_put */ -} - -void smc_lgr_hold(struct smc_link_group *lgr) -{ - refcount_inc(&lgr->refcnt); -} - -void smc_lgr_put(struct smc_link_group *lgr) -{ - if (refcount_dec_and_test(&lgr->refcnt)) - __smc_lgr_free(lgr); + kfree(lgr); } static void smc_sk_wake_ups(struct smc_sock *smc) @@ -1599,9 +1455,9 @@ void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type) lgr_type = "ASYMMETRIC_LOCAL"; break; } - pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: " + pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: " "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id, - lgr->net->net_cookie, lgr_type, lgr->pnet_id); + lgr_type, lgr->pnet_id); } /* set new lgr type and tag a link as asymmetric */ @@ -1636,8 +1492,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, SMC_MAX_PNETID_LEN) || lgr->type == SMC_LGR_SYMMETRIC || - lgr->type == SMC_LGR_ASYMMETRIC_PEER || - !rdma_dev_access_netns(smcibdev->ibdev, lgr->net)) + lgr->type == SMC_LGR_ASYMMETRIC_PEER) continue; /* trigger local add link processing */ @@ -1693,19 +1548,15 @@ static void smcr_link_down(struct smc_link *lnk) /* must be called under lgr->llc_conf_mutex lock */ void smcr_link_down_cond(struct smc_link *lnk) { - if (smc_link_downing(&lnk->state)) { - trace_smcr_link_down(lnk, __builtin_return_address(0)); + if (smc_link_downing(&lnk->state)) smcr_link_down(lnk); - } } /* will get the lgr->llc_conf_mutex lock */ void smcr_link_down_cond_sched(struct smc_link *lnk) { - if (smc_link_downing(&lnk->state)) { - trace_smcr_link_down(lnk, __builtin_return_address(0)); + if (smc_link_downing(&lnk->state)) schedule_work(&lnk->link_down_wrk); - } } void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) @@ -1791,32 +1642,22 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) return rc; } -static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, - u8 peer_systemid[], - u8 peer_gid[], - u8 peer_mac_v1[], - enum smc_lgr_role role, u32 clcqpn, - struct net *net) +static bool smcr_lgr_match(struct smc_link_group *lgr, + struct smc_clc_msg_local *lcl, + enum smc_lgr_role role, u32 clcqpn) { - struct smc_link *lnk; int i; - if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) || + if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || lgr->role != role) return false; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - lnk = &lgr->lnk[i]; - - if (!smc_link_active(lnk)) + if (!smc_link_active(&lgr->lnk[i])) continue; - /* use verbs API to check netns, instead of lgr->net */ - if (!rdma_dev_access_netns(lnk->smcibdev->ibdev, net)) - return false; - if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) && - !memcmp(lnk->peer_gid, peer_gid, SMC_GID_SIZE) && - (smcr_version == SMC_V2 || - !memcmp(lnk->peer_mac, peer_mac_v1, ETH_ALEN))) + if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && + !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && + !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) return true; } return false; @@ -1832,7 +1673,6 @@ static bool smcd_lgr_match(struct smc_link_group *lgr, int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) { struct smc_connection *conn = &smc->conn; - struct net *net = sock_net(&smc->sk); struct list_head *lgr_list; struct smc_link_group *lgr; enum smc_lgr_role role; @@ -1856,15 +1696,13 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) if ((ini->is_smcd ? smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected], ini->ism_peer_gid[ini->ism_selected]) : - smcr_lgr_match(lgr, ini->smcr_version, - ini->peer_systemid, - ini->peer_gid, ini->peer_mac, role, - ini->ib_clcqpn, net)) && + smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && !lgr->sync_err && (ini->smcd_version == SMC_V2 || lgr->vlan_id == ini->vlan_id) && (role == SMC_CLNT || ini->is_smcd || - lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { + (lgr->conns_num < SMC_RMBS_PER_LGR_MAX && + !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) { /* link group found */ ini->first_contact_local = 0; conn->lgr = lgr; @@ -1903,10 +1741,6 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) goto out; } } - smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */ - if (!conn->lgr->is_smcd) - smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */ - conn->freed = 0; conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; @@ -2235,6 +2069,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (buf_desc) { SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize); SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb); + memset(buf_desc->cpu_addr, 0, bufsize); break; /* found reusable slot */ } @@ -2291,16 +2126,14 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) { - if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || - !smc_link_active(conn->lnk)) + if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { - if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || - !smc_link_active(conn->lnk)) + if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -2309,7 +2142,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) { int i; - if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) + if (!conn->lgr || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&conn->lgr->lnk[i])) @@ -2323,7 +2156,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) { int i; - if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) + if (!conn->lgr || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&conn->lgr->lnk[i])) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 4cb03e9423..9a0523f4c7 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -42,16 +42,11 @@ enum smc_link_state { /* possible states of a link */ }; #define SMC_WR_BUF_SIZE 48 /* size of work request buffer */ -#define SMC_WR_BUF_V2_SIZE 8192 /* size of v2 work request buffer */ struct smc_wr_buf { u8 raw[SMC_WR_BUF_SIZE]; }; -struct smc_wr_v2_buf { - u8 raw[SMC_WR_BUF_V2_SIZE]; -}; - #define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */ enum smc_wr_reg_state { @@ -97,11 +92,7 @@ struct smc_link { struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ struct completion *wr_tx_compl; /* WR send CQE completion */ /* above four vectors have wr_tx_cnt elements and use the same index */ - struct ib_send_wr *wr_tx_v2_ib; /* WR send v2 meta data */ - struct ib_sge *wr_tx_v2_sge; /* WR send v2 gather meta data*/ - struct smc_wr_tx_pend *wr_tx_v2_pend; /* WR send v2 waiting for CQE */ dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ - dma_addr_t wr_tx_v2_dma_addr; /* DMA address of v2 tx buf*/ atomic_long_t wr_tx_id; /* seq # of last sent WR */ unsigned long *wr_tx_mask; /* bit mask of used indexes */ u32 wr_tx_cnt; /* number of WR send buffers */ @@ -113,7 +104,6 @@ struct smc_link { struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */ /* above three vectors have wr_rx_cnt elements and use the same index */ dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */ - dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/ u64 wr_rx_id; /* seq # of last recv WR */ u32 wr_rx_cnt; /* number of WR recv buffers */ unsigned long wr_rx_tstamp; /* jiffies when last buf rx */ @@ -137,8 +127,6 @@ struct smc_link { u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */ u8 link_idx; /* index in lgr link array */ u8 link_is_asym; /* is link asymmetric? */ - u8 clearing : 1; /* link is being cleared */ - refcount_t refcnt; /* link reference count */ struct smc_link_group *lgr; /* parent link group */ struct work_struct link_down_wrk; /* wrk to bring link down */ char ibname[IB_DEVICE_NAME_MAX]; /* ib device name */ @@ -220,7 +208,6 @@ enum smc_llc_flowtype { SMC_LLC_FLOW_NONE = 0, SMC_LLC_FLOW_ADD_LINK = 2, SMC_LLC_FLOW_DEL_LINK = 4, - SMC_LLC_FLOW_REQ_ADD_LINK = 5, SMC_LLC_FLOW_RKEY = 6, }; @@ -251,7 +238,6 @@ struct smc_link_group { u8 terminating : 1;/* lgr is terminating */ u8 freeing : 1; /* lgr is being freed */ - refcount_t refcnt; /* lgr reference count */ bool is_smcd; /* SMC-R or SMC-D */ u8 smc_version; u8 negotiated_eid[SMC_MAX_EID_LEN]; @@ -264,10 +250,6 @@ struct smc_link_group { /* client or server */ struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */ - struct smc_wr_v2_buf *wr_rx_buf_v2; - /* WR v2 recv payload buffer */ - struct smc_wr_v2_buf *wr_tx_buf_v2; - /* WR v2 send payload buffer */ char peer_systemid[SMC_SYSTEMID_LEN]; /* unique system_id of peer */ struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] @@ -306,11 +288,6 @@ struct smc_link_group { /* link keep alive time */ u32 llc_termination_rsn; /* rsn code for termination */ - u8 nexthop_mac[ETH_ALEN]; - u8 uses_gateway; - __be32 saddr; - /* net namespace */ - struct net *net; }; struct { /* SMC-D */ u64 peer_gid; @@ -325,31 +302,6 @@ struct smc_link_group { struct smc_clc_msg_local; -#define GID_LIST_SIZE 2 - -struct smc_gidlist { - u8 len; - u8 list[GID_LIST_SIZE][SMC_GID_SIZE]; -}; - -struct smc_init_info_smcrv2 { - /* Input fields */ - __be32 saddr; - struct sock *clc_sk; - __be32 daddr; - - /* Output fields when saddr is set */ - struct smc_ib_device *ib_dev_v2; - u8 ib_port_v2; - u8 ib_gid_v2[SMC_GID_SIZE]; - - /* Additional output fields when clc_sk and daddr is set as well */ - u8 uses_gateway; - u8 nexthop_mac[ETH_ALEN]; - - struct smc_gidlist gidlist; -}; - struct smc_init_info { u8 is_smcd; u8 smc_type_v1; @@ -358,18 +310,12 @@ struct smc_init_info { u8 first_contact_local; unsigned short vlan_id; u32 rc; - u8 negotiated_eid[SMC_MAX_EID_LEN]; /* SMC-R */ - u8 smcr_version; - u8 check_smcrv2; - u8 peer_gid[SMC_GID_SIZE]; - u8 peer_mac[ETH_ALEN]; - u8 peer_systemid[SMC_SYSTEMID_LEN]; + struct smc_clc_msg_local *ib_lcl; struct smc_ib_device *ib_dev; u8 ib_gid[SMC_GID_SIZE]; u8 ib_port; u32 ib_clcqpn; - struct smc_init_info_smcrv2 smcrv2; /* SMC-D */ u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1]; struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1]; @@ -412,18 +358,7 @@ static inline struct smc_connection *smc_lgr_find_conn( return res; } -static inline bool smc_conn_lgr_valid(struct smc_connection *conn) -{ - return conn->lgr && conn->alert_token_local; -} - -/* - * Returns true if the specified link is usable. - * - * usable means the link is ready to receive RDMA messages, map memory - * on the link, etc. This doesn't ensure we are able to send RDMA messages - * on this link, if sending RDMA messages is needed, use smc_link_sendable() - */ +/* returns true if the specified link is usable */ static inline bool smc_link_usable(struct smc_link *lnk) { if (lnk->state == SMC_LNK_UNUSED || lnk->state == SMC_LNK_INACTIVE) @@ -431,15 +366,6 @@ static inline bool smc_link_usable(struct smc_link *lnk) return true; } -/* - * Returns true if the specified link is ready to receive AND send RDMA - * messages. - * - * For the client side in first contact, the underlying QP may still in - * RESET or RTR when the link state is ACTIVATING, checks in smc_link_usable() - * is not strong enough. For those places that need to send any CDC or LLC - * messages, use smc_link_sendable(), otherwise, use smc_link_usable() instead - */ static inline bool smc_link_sendable(struct smc_link *lnk) { return smc_link_usable(lnk) && @@ -495,8 +421,6 @@ struct smc_clc_msg_accept_confirm; void smc_lgr_cleanup_early(struct smc_link_group *lgr); void smc_lgr_terminate_sched(struct smc_link_group *lgr); -void smc_lgr_hold(struct smc_link_group *lgr); -void smc_lgr_put(struct smc_link_group *lgr); void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, @@ -528,8 +452,6 @@ void smc_core_exit(void); int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini); void smcr_link_clear(struct smc_link *lnk, bool log); -void smcr_link_hold(struct smc_link *lnk); -void smcr_link_put(struct smc_link *lnk); void smc_switch_link_and_count(struct smc_connection *conn, struct smc_link *to_lnk); int smcr_buf_map_lgr(struct smc_link *lnk); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 1fca2f90a9..c952986a6a 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -89,7 +89,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, r->diag_state = sk->sk_state; if (smc->use_fallback) r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP; - else if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd) + else if (smc->conn.lgr && smc->conn.lgr->is_smcd) r->diag_mode = SMC_DIAG_MODE_SMCD; else r->diag_mode = SMC_DIAG_MODE_SMCR; @@ -142,27 +142,27 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, goto errout; } - if (smc_conn_lgr_valid(&smc->conn) && !smc->conn.lgr->is_smcd && + if (smc->conn.lgr && !smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { - struct smc_link *link = smc->conn.lnk; - struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, - .lnk[0].ibport = link->ibport, - .lnk[0].link_id = link->link_id, + .lnk[0].ibport = smc->conn.lnk->ibport, + .lnk[0].link_id = smc->conn.lnk->link_id, }; memcpy(linfo.lnk[0].ibname, smc->conn.lgr->lnk[0].smcibdev->ibdev->name, - sizeof(link->smcibdev->ibdev->name)); - smc_gid_be16_convert(linfo.lnk[0].gid, link->gid); - smc_gid_be16_convert(linfo.lnk[0].peer_gid, link->peer_gid); + sizeof(smc->conn.lnk->smcibdev->ibdev->name)); + smc_gid_be16_convert(linfo.lnk[0].gid, + smc->conn.lnk->gid); + smc_gid_be16_convert(linfo.lnk[0].peer_gid, + smc->conn.lnk->peer_gid); if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) goto errout; } - if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && + if (smc->conn.lgr && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_connection *conn = &smc->conn; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index a3e2d3b895..f0ec1f1d50 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -12,14 +12,11 @@ * Author(s): Ursula Braun */ -#include -#include #include #include #include #include #include -#include #include #include @@ -65,23 +62,16 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk) IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; struct ib_qp_attr qp_attr; - u8 hop_lim = 1; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IB_QPS_RTR; qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); - if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway) - hop_lim = IPV6_DEFAULT_HOPLIMIT; - rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, hop_lim, 0); + rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0); rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); - if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway) - memcpy(&qp_attr.ah_attr.roce.dmac, lnk->lgr->nexthop_mac, - sizeof(lnk->lgr->nexthop_mac)); - else - memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, - sizeof(lnk->peer_mac)); + memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, + sizeof(lnk->peer_mac)); qp_attr.dest_qp_num = lnk->peer_qpn; qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */ qp_attr.max_dest_rd_atomic = 1; /* max # of resources for incoming @@ -193,81 +183,9 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; } -int smc_ib_find_route(__be32 saddr, __be32 daddr, - u8 nexthop_mac[], u8 *uses_gateway) -{ - struct neighbour *neigh = NULL; - struct rtable *rt = NULL; - struct flowi4 fl4 = { - .saddr = saddr, - .daddr = daddr - }; - - if (daddr == cpu_to_be32(INADDR_NONE)) - goto out; - rt = ip_route_output_flow(&init_net, &fl4, NULL); - if (IS_ERR(rt)) - goto out; - if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) - goto out; - neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr); - if (neigh) { - memcpy(nexthop_mac, neigh->ha, ETH_ALEN); - *uses_gateway = rt->rt_uses_gateway; - return 0; - } -out: - return -ENOENT; -} - -static int smc_ib_determine_gid_rcu(const struct net_device *ndev, - const struct ib_gid_attr *attr, - u8 gid[], u8 *sgid_index, - struct smc_init_info_smcrv2 *smcrv2) -{ - if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) { - if (gid) - memcpy(gid, &attr->gid, SMC_GID_SIZE); - if (sgid_index) - *sgid_index = attr->index; - return 0; - } - if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && - smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) { - struct in_device *in_dev = __in_dev_get_rcu(ndev); - const struct in_ifaddr *ifa; - bool subnet_match = false; - - if (!in_dev) - goto out; - in_dev_for_each_ifa_rcu(ifa, in_dev) { - if (!inet_ifa_match(smcrv2->saddr, ifa)) - continue; - subnet_match = true; - break; - } - if (!subnet_match) - goto out; - if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr, - smcrv2->daddr, - smcrv2->nexthop_mac, - &smcrv2->uses_gateway)) - goto out; - - if (gid) - memcpy(gid, &attr->gid, SMC_GID_SIZE); - if (sgid_index) - *sgid_index = attr->index; - return 0; - } -out: - return -ENODEV; -} - /* determine the gid for an ib-device port and vlan id */ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, - unsigned short vlan_id, u8 gid[], u8 *sgid_index, - struct smc_init_info_smcrv2 *smcrv2) + unsigned short vlan_id, u8 gid[], u8 *sgid_index) { const struct ib_gid_attr *attr; const struct net_device *ndev; @@ -283,13 +201,15 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, if (!IS_ERR(ndev) && ((!vlan_id && !is_vlan_dev(ndev)) || (vlan_id && is_vlan_dev(ndev) && - vlan_dev_vlan_id(ndev) == vlan_id))) { - if (!smc_ib_determine_gid_rcu(ndev, attr, gid, - sgid_index, smcrv2)) { - rcu_read_unlock(); - rdma_put_gid_attr(attr); - return 0; - } + vlan_dev_vlan_id(ndev) == vlan_id)) && + attr->gid_type == IB_GID_TYPE_ROCE) { + rcu_read_unlock(); + if (gid) + memcpy(gid, &attr->gid, SMC_GID_SIZE); + if (sgid_index) + *sgid_index = attr->index; + rdma_put_gid_attr(attr); + return 0; } rcu_read_unlock(); rdma_put_gid_attr(attr); @@ -297,58 +217,6 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, return -ENODEV; } -/* check if gid is still defined on smcibdev */ -static bool smc_ib_check_link_gid(u8 gid[SMC_GID_SIZE], bool smcrv2, - struct smc_ib_device *smcibdev, u8 ibport) -{ - const struct ib_gid_attr *attr; - bool rc = false; - int i; - - for (i = 0; !rc && i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { - attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i); - if (IS_ERR(attr)) - continue; - - rcu_read_lock(); - if ((!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) || - (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && - !(ipv6_addr_type((const struct in6_addr *)&attr->gid) - & IPV6_ADDR_LINKLOCAL))) - if (!memcmp(gid, &attr->gid, SMC_GID_SIZE)) - rc = true; - rcu_read_unlock(); - rdma_put_gid_attr(attr); - } - return rc; -} - -/* check all links if the gid is still defined on smcibdev */ -static void smc_ib_gid_check(struct smc_ib_device *smcibdev, u8 ibport) -{ - struct smc_link_group *lgr; - int i; - - spin_lock_bh(&smc_lgr_list.lock); - list_for_each_entry(lgr, &smc_lgr_list.list, list) { - if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, - SMC_MAX_PNETID_LEN)) - continue; /* lgr is not affected */ - if (list_empty(&lgr->list)) - continue; - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state == SMC_LNK_UNUSED || - lgr->lnk[i].smcibdev != smcibdev) - continue; - if (!smc_ib_check_link_gid(lgr->lnk[i].gid, - lgr->smc_version == SMC_V2, - smcibdev, ibport)) - smcr_port_err(smcibdev, ibport); - } - } - spin_unlock_bh(&smc_lgr_list.lock); -} - static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) { int rc; @@ -387,7 +255,6 @@ static void smc_ib_port_event_work(struct work_struct *work) } else { clear_bit(port_idx, smcibdev->ports_going_away); smcr_port_add(smcibdev, port_idx + 1); - smc_ib_gid_check(smcibdev, port_idx + 1); } } } @@ -656,7 +523,6 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk) /* create a queue pair within the protection domain for a link */ int smc_ib_create_queue_pair(struct smc_link *lnk) { - int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; struct ib_qp_init_attr qp_attr = { .event_handler = smc_ib_qp_event_handler, .qp_context = lnk, @@ -670,7 +536,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .max_send_wr = SMC_WR_BUF_CNT * 3, .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, - .max_recv_sge = sges_per_buf, + .max_recv_sge = 1, }, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 5d8b49c57f..6967c3d52b 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -59,24 +59,6 @@ struct smc_ib_device { /* ib-device infos for smc */ int ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */ }; -static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE]) -{ - struct in6_addr *addr6 = (struct in6_addr *)gid; - - if (ipv6_addr_v4mapped(addr6) || - !(addr6->s6_addr32[0] | addr6->s6_addr32[1] | addr6->s6_addr32[2])) - return addr6->s6_addr32[3]; - return cpu_to_be32(INADDR_NONE); -} - -static inline struct net *smc_ib_net(struct smc_ib_device *smcibdev) -{ - if (smcibdev && smcibdev->ibdev) - return read_pnet(&smcibdev->ibdev->coredev.rdma_net); - return NULL; -} - -struct smc_init_info_smcrv2; struct smc_buf_desc; struct smc_link; @@ -109,10 +91,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, - unsigned short vlan_id, u8 gid[], u8 *sgid_index, - struct smc_init_info_smcrv2 *smcrv2); -int smc_ib_find_route(__be32 saddr, __be32 daddr, - u8 nexthop_mac[], u8 *uses_gateway); + unsigned short vlan_id, u8 gid[], u8 *sgid_index); bool smc_ib_is_valid_local_systemid(void); int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); #endif diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index a2084ecdb9..9cb2df2899 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -6,7 +6,6 @@ * Copyright IBM Corp. 2018 */ -#include #include #include #include @@ -24,7 +23,6 @@ struct smcd_dev_list smcd_dev_list = { }; static bool smc_ism_v2_capable; -static u8 smc_ism_v2_system_eid[SMC_MAX_EID_LEN]; /* Test if an ISM communication is possible - same CPC */ int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) @@ -44,12 +42,9 @@ int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos, return rc < 0 ? rc : 0; } -void smc_ism_get_system_eid(u8 **eid) +void smc_ism_get_system_eid(struct smcd_dev *smcd, u8 **eid) { - if (!smc_ism_v2_capable) - *eid = NULL; - else - *eid = smc_ism_v2_system_eid; + smcd->ops->get_system_eid(smcd, eid); } u16 smc_ism_get_chid(struct smcd_dev *smcd) @@ -440,12 +435,9 @@ int smcd_register_dev(struct smcd_dev *smcd) if (list_empty(&smcd_dev_list.list)) { u8 *system_eid = NULL; - smcd->ops->get_system_eid(smcd, &system_eid); - if (system_eid[24] != '0' || system_eid[28] != '0') { + smc_ism_get_system_eid(smcd, &system_eid); + if (system_eid[24] != '0' || system_eid[28] != '0') smc_ism_v2_capable = true; - memcpy(smc_ism_v2_system_eid, system_eid, - SMC_MAX_EID_LEN); - } } /* sort list: devices without pnetid before devices with pnetid */ if (smcd->pnetid[0]) @@ -541,5 +533,4 @@ EXPORT_SYMBOL_GPL(smcd_handle_irq); void __init smc_ism_init(void) { smc_ism_v2_capable = false; - memset(smc_ism_v2_system_eid, 0, SMC_MAX_EID_LEN); } diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 004b22a13f..113efc7352 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -48,7 +48,7 @@ int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, void *data, size_t len); int smc_ism_signal_shutdown(struct smc_link_group *lgr); -void smc_ism_get_system_eid(u8 **eid); +void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid); u16 smc_ism_get_chid(struct smcd_dev *dev); bool smc_ism_is_v2_capable(void); void smc_ism_init(void); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index c4d057b294..ee1f0fdba0 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -23,24 +23,16 @@ struct smc_llc_hdr { struct smc_wr_rx_hdr common; - union { - struct { - u8 length; /* 44 */ - #if defined(__BIG_ENDIAN_BITFIELD) - u8 reserved:4, - add_link_rej_rsn:4; + u8 length; /* 44 */ +#if defined(__BIG_ENDIAN_BITFIELD) + u8 reserved:4, + add_link_rej_rsn:4; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 add_link_rej_rsn:4, - reserved:4; + u8 add_link_rej_rsn:4, + reserved:4; #endif - }; - u16 length_v2; /* 44 - 8192*/ - }; u8 flags; -} __packed; /* format defined in - * IBM Shared Memory Communications Version 2 - * (https://www.ibm.com/support/pages/node/6326337) - */ +}; #define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03 @@ -84,32 +76,6 @@ struct smc_llc_msg_add_link_cont_rt { __be64 rmb_vaddr_new; }; -struct smc_llc_msg_add_link_v2_ext { -#if defined(__BIG_ENDIAN_BITFIELD) - u8 v2_direct : 1, - reserved : 7; -#elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 reserved : 7, - v2_direct : 1; -#endif - u8 reserved2; - u8 client_target_gid[SMC_GID_SIZE]; - u8 reserved3[8]; - u16 num_rkeys; - struct smc_llc_msg_add_link_cont_rt rt[]; -} __packed; /* format defined in - * IBM Shared Memory Communications Version 2 - * (https://www.ibm.com/support/pages/node/6326337) - */ - -struct smc_llc_msg_req_add_link_v2 { - struct smc_llc_hdr hd; - u8 reserved[20]; - u8 gid_cnt; - u8 reserved2[3]; - u8 gid[][SMC_GID_SIZE]; -}; - #define SMC_LLC_RKEYS_PER_CONT_MSG 2 struct smc_llc_msg_add_link_cont { /* type 0x03 */ @@ -148,8 +114,7 @@ struct smc_rmb_rtoken { __be64 rmb_vaddr; } __packed; /* format defined in RFC7609 */ -#define SMC_LLC_RKEYS_PER_MSG 3 -#define SMC_LLC_RKEYS_PER_MSG_V2 255 +#define SMC_LLC_RKEYS_PER_MSG 3 struct smc_llc_msg_confirm_rkey { /* type 0x06 */ struct smc_llc_hdr hd; @@ -170,18 +135,9 @@ struct smc_llc_msg_delete_rkey { /* type 0x09 */ u8 reserved2[4]; }; -struct smc_llc_msg_delete_rkey_v2 { /* type 0x29 */ - struct smc_llc_hdr hd; - u8 num_rkeys; - u8 num_inval_rkeys; - u8 reserved[2]; - __be32 rkey[]; -}; - union smc_llc_msg { struct smc_llc_msg_confirm_link confirm_link; struct smc_llc_msg_add_link add_link; - struct smc_llc_msg_req_add_link_v2 req_add_link; struct smc_llc_msg_add_link_cont add_link_cont; struct smc_llc_msg_del_link delete_link; @@ -233,7 +189,7 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow, static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, struct smc_llc_qentry *qentry) { - u8 msg_type = qentry->msg.raw.hdr.common.llc_type; + u8 msg_type = qentry->msg.raw.hdr.common.type; if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) && flow_type != msg_type && !lgr->delayed_event) { @@ -242,10 +198,9 @@ static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, } /* drop parallel or already-in-progress llc requests */ if (flow_type != msg_type) - pr_warn_once("smc: SMC-R lg %*phN net %llu dropped parallel " + pr_warn_once("smc: SMC-R lg %*phN dropped parallel " "LLC msg: msg %d flow %d role %d\n", SMC_LGR_ID_SIZE, &lgr->id, - lgr->net->net_cookie, qentry->msg.raw.hdr.common.type, flow_type, lgr->role); kfree(qentry); @@ -264,7 +219,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, spin_unlock_bh(&lgr->llc_flow_lock); return false; } - switch (qentry->msg.raw.hdr.common.llc_type) { + switch (qentry->msg.raw.hdr.common.type) { case SMC_LLC_ADD_LINK: flow->type = SMC_LLC_FLOW_ADD_LINK; break; @@ -351,7 +306,7 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, smc_llc_flow_qentry_del(flow); goto out; } - rcv_msg = flow->qentry->msg.raw.hdr.common.llc_type; + rcv_msg = flow->qentry->msg.raw.hdr.common.type; if (exp_msg && rcv_msg != exp_msg) { if (exp_msg == SMC_LLC_ADD_LINK && rcv_msg == SMC_LLC_DELETE_LINK) { @@ -360,10 +315,9 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, smc_llc_flow_qentry_clr(flow)); return NULL; } - pr_warn_once("smc: SMC-R lg %*phN net %llu dropped unexpected LLC msg: " + pr_warn_once("smc: SMC-R lg %*phN dropped unexpected LLC msg: " "msg %d exp %d flow %d role %d flags %x\n", - SMC_LGR_ID_SIZE, &lgr->id, lgr->net->net_cookie, - rcv_msg, exp_msg, + SMC_LGR_ID_SIZE, &lgr->id, rcv_msg, exp_msg, flow->type, lgr->role, flow->qentry->msg.raw.hdr.flags); smc_llc_flow_qentry_del(flow); @@ -420,30 +374,6 @@ static int smc_llc_add_pending_send(struct smc_link *link, return 0; } -static int smc_llc_add_pending_send_v2(struct smc_link *link, - struct smc_wr_v2_buf **wr_buf, - struct smc_wr_tx_pend_priv **pend) -{ - int rc; - - rc = smc_wr_tx_get_v2_slot(link, smc_llc_tx_handler, wr_buf, pend); - if (rc < 0) - return rc; - return 0; -} - -static void smc_llc_init_msg_hdr(struct smc_llc_hdr *hdr, - struct smc_link_group *lgr, size_t len) -{ - if (lgr->smc_version == SMC_V2) { - hdr->common.llc_version = SMC_V2; - hdr->length_v2 = len; - } else { - hdr->common.llc_version = 0; - hdr->length = len; - } -} - /* high-level API to send LLC confirm link */ int smc_llc_send_confirm_link(struct smc_link *link, enum smc_llc_reqresp reqresp) @@ -460,8 +390,8 @@ int smc_llc_send_confirm_link(struct smc_link *link, goto put_out; confllc = (struct smc_llc_msg_confirm_link *)wr_buf; memset(confllc, 0, sizeof(*confllc)); - confllc->hd.common.llc_type = SMC_LLC_CONFIRM_LINK; - smc_llc_init_msg_hdr(&confllc->hd, link->lgr, sizeof(*confllc)); + confllc->hd.common.type = SMC_LLC_CONFIRM_LINK; + confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link); confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC; if (reqresp == SMC_LLC_RESP) confllc->hd.flags |= SMC_LLC_FLAG_RESP; @@ -496,8 +426,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, goto put_out; rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); - rkeyllc->hd.common.llc_type = SMC_LLC_CONFIRM_RKEY; - smc_llc_init_msg_hdr(&rkeyllc->hd, send_link->lgr, sizeof(*rkeyllc)); + rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY; + rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey); rtok_ix = 1; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { @@ -541,8 +471,8 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, goto put_out; rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); - rkeyllc->hd.common.llc_type = SMC_LLC_DELETE_RKEY; - smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc)); + rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY; + rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey); rkeyllc->num_rkeys = 1; rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); /* send llc message */ @@ -552,116 +482,26 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, return rc; } -/* return first buffer from any of the next buf lists */ -static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr, - int *buf_lst) -{ - struct smc_buf_desc *buf_pos; - - while (*buf_lst < SMC_RMBE_SIZES) { - buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst], - struct smc_buf_desc, list); - if (buf_pos) - return buf_pos; - (*buf_lst)++; - } - return NULL; -} - -/* return next rmb from buffer lists */ -static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr, - int *buf_lst, - struct smc_buf_desc *buf_pos) -{ - struct smc_buf_desc *buf_next; - - if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) { - (*buf_lst)++; - return _smc_llc_get_next_rmb(lgr, buf_lst); - } - buf_next = list_next_entry(buf_pos, list); - return buf_next; -} - -static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr, - int *buf_lst) -{ - *buf_lst = 0; - return smc_llc_get_next_rmb(lgr, buf_lst, NULL); -} - -static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext, - struct smc_link *link, struct smc_link *link_new) -{ - struct smc_link_group *lgr = link->lgr; - struct smc_buf_desc *buf_pos; - int prim_lnk_idx, lnk_idx, i; - struct smc_buf_desc *rmb; - int len = sizeof(*ext); - int buf_lst; - - ext->v2_direct = !lgr->uses_gateway; - memcpy(ext->client_target_gid, link_new->gid, SMC_GID_SIZE); - - prim_lnk_idx = link->link_idx; - lnk_idx = link_new->link_idx; - mutex_lock(&lgr->rmbs_lock); - ext->num_rkeys = lgr->conns_num; - if (!ext->num_rkeys) - goto out; - buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst); - for (i = 0; i < ext->num_rkeys; i++) { - if (!buf_pos) - break; - rmb = buf_pos; - ext->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey); - ext->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey); - ext->rt[i].rmb_vaddr_new = - cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl)); - buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); - while (buf_pos && !(buf_pos)->used) - buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); - } - len += i * sizeof(ext->rt[0]); -out: - mutex_unlock(&lgr->rmbs_lock); - return len; -} - /* send ADD LINK request or response */ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], struct smc_link *link_new, enum smc_llc_reqresp reqresp) { - struct smc_llc_msg_add_link_v2_ext *ext = NULL; struct smc_llc_msg_add_link *addllc; struct smc_wr_tx_pend_priv *pend; - int len = sizeof(*addllc); + struct smc_wr_buf *wr_buf; int rc; if (!smc_wr_tx_link_hold(link)) return -ENOLINK; - if (link->lgr->smc_version == SMC_V2) { - struct smc_wr_v2_buf *wr_buf; - - rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend); - if (rc) - goto put_out; - addllc = (struct smc_llc_msg_add_link *)wr_buf; - ext = (struct smc_llc_msg_add_link_v2_ext *) - &wr_buf->raw[sizeof(*addllc)]; - memset(ext, 0, SMC_WR_TX_SIZE); - } else { - struct smc_wr_buf *wr_buf; - - rc = smc_llc_add_pending_send(link, &wr_buf, &pend); - if (rc) - goto put_out; - addllc = (struct smc_llc_msg_add_link *)wr_buf; - } + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + goto put_out; + addllc = (struct smc_llc_msg_add_link *)wr_buf; memset(addllc, 0, sizeof(*addllc)); - addllc->hd.common.llc_type = SMC_LLC_ADD_LINK; + addllc->hd.common.type = SMC_LLC_ADD_LINK; + addllc->hd.length = sizeof(struct smc_llc_msg_add_link); if (reqresp == SMC_LLC_RESP) addllc->hd.flags |= SMC_LLC_FLAG_RESP; memcpy(addllc->sender_mac, mac, ETH_ALEN); @@ -676,14 +516,8 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], addllc->qp_mtu = min(link_new->path_mtu, link_new->peer_mtu); } - if (ext && link_new) - len += smc_llc_fill_ext_v2(ext, link, link_new); - smc_llc_init_msg_hdr(&addllc->hd, link->lgr, len); /* send llc message */ - if (link->lgr->smc_version == SMC_V2) - rc = smc_wr_tx_v2_send(link, pend, len); - else - rc = smc_wr_tx_send(link, pend); + rc = smc_wr_tx_send(link, pend); put_out: smc_wr_tx_link_put(link); return rc; @@ -707,8 +541,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, delllc = (struct smc_llc_msg_del_link *)wr_buf; memset(delllc, 0, sizeof(*delllc)); - delllc->hd.common.llc_type = SMC_LLC_DELETE_LINK; - smc_llc_init_msg_hdr(&delllc->hd, link->lgr, sizeof(*delllc)); + delllc->hd.common.type = SMC_LLC_DELETE_LINK; + delllc->hd.length = sizeof(struct smc_llc_msg_del_link); if (reqresp == SMC_LLC_RESP) delllc->hd.flags |= SMC_LLC_FLAG_RESP; if (orderly) @@ -740,8 +574,8 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) goto put_out; testllc = (struct smc_llc_msg_test_link *)wr_buf; memset(testllc, 0, sizeof(*testllc)); - testllc->hd.common.llc_type = SMC_LLC_TEST_LINK; - smc_llc_init_msg_hdr(&testllc->hd, link->lgr, sizeof(*testllc)); + testllc->hd.common.type = SMC_LLC_TEST_LINK; + testllc->hd.length = sizeof(struct smc_llc_msg_test_link); memcpy(testllc->user_data, user_data, sizeof(testllc->user_data)); /* send llc message */ rc = smc_wr_tx_send(link, pend); @@ -817,6 +651,44 @@ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr, return -EMLINK; } +/* return first buffer from any of the next buf lists */ +static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr, + int *buf_lst) +{ + struct smc_buf_desc *buf_pos; + + while (*buf_lst < SMC_RMBE_SIZES) { + buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst], + struct smc_buf_desc, list); + if (buf_pos) + return buf_pos; + (*buf_lst)++; + } + return NULL; +} + +/* return next rmb from buffer lists */ +static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr, + int *buf_lst, + struct smc_buf_desc *buf_pos) +{ + struct smc_buf_desc *buf_next; + + if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) { + (*buf_lst)++; + return _smc_llc_get_next_rmb(lgr, buf_lst); + } + buf_next = list_next_entry(buf_pos, list); + return buf_next; +} + +static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr, + int *buf_lst) +{ + *buf_lst = 0; + return smc_llc_get_next_rmb(lgr, buf_lst, NULL); +} + /* send one add_link_continue msg */ static int smc_llc_add_link_cont(struct smc_link *link, struct smc_link *link_new, u8 *num_rkeys_todo, @@ -862,7 +734,7 @@ static int smc_llc_add_link_cont(struct smc_link *link, while (*buf_pos && !(*buf_pos)->used) *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); } - addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT; + addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT; addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont); if (lgr->role == SMC_CLNT) addc_llc->hd.flags |= SMC_LLC_FLAG_RESP; @@ -921,8 +793,6 @@ static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry) qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ; qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH; - smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr, - sizeof(qentry->msg)); return smc_llc_send_message(qentry->link, &qentry->msg); } @@ -943,7 +813,7 @@ static int smc_llc_cli_conf_link(struct smc_link *link, SMC_LLC_DEL_LOST_PATH); return -ENOLINK; } - if (qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) { + if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { /* received DELETE_LINK instead */ qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, &qentry->msg); @@ -984,26 +854,6 @@ static int smc_llc_cli_conf_link(struct smc_link *link, return 0; } -static void smc_llc_save_add_link_rkeys(struct smc_link *link, - struct smc_link *link_new) -{ - struct smc_llc_msg_add_link_v2_ext *ext; - struct smc_link_group *lgr = link->lgr; - int max, i; - - ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 + - SMC_WR_TX_SIZE); - max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); - mutex_lock(&lgr->rmbs_lock); - for (i = 0; i < max; i++) { - smc_rtoken_set(lgr, link->link_idx, link_new->link_idx, - ext->rt[i].rmb_key, - ext->rt[i].rmb_vaddr_new, - ext->rt[i].rmb_key_new); - } - mutex_unlock(&lgr->rmbs_lock); -} - static void smc_llc_save_add_link_info(struct smc_link *link, struct smc_llc_msg_add_link *add_llc) { @@ -1020,47 +870,31 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) struct smc_llc_msg_add_link *llc = &qentry->msg.add_link; enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; struct smc_link_group *lgr = smc_get_lgr(link); - struct smc_init_info *ini = NULL; struct smc_link *lnk_new = NULL; + struct smc_init_info ini; int lnk_idx, rc = 0; if (!llc->qp_mtu) goto out_reject; - ini = kzalloc(sizeof(*ini), GFP_KERNEL); - if (!ini) { - rc = -ENOMEM; - goto out_reject; - } - - ini->vlan_id = lgr->vlan_id; - if (lgr->smc_version == SMC_V2) { - ini->check_smcrv2 = true; - ini->smcrv2.saddr = lgr->saddr; - ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid); - } - smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); + ini.vlan_id = lgr->vlan_id; + smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && - (lgr->smc_version == SMC_V2 || - !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN))) { - if (!ini->ib_dev && !ini->smcrv2.ib_dev_v2) + !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) { + if (!ini.ib_dev) goto out_reject; lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; } - if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) { + if (!ini.ib_dev) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; - ini->smcrv2.ib_dev_v2 = link->smcibdev; - ini->smcrv2.ib_port_v2 = link->ibport; - } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) { - lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; - ini->ib_dev = link->smcibdev; - ini->ib_port = link->ibport; + ini.ib_dev = link->smcibdev; + ini.ib_port = link->ibport; } lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); if (lnk_idx < 0) goto out_reject; lnk_new = &lgr->lnk[lnk_idx]; - rc = smcr_link_init(lgr, lnk_new, lnk_idx, ini); + rc = smcr_link_init(lgr, lnk_new, lnk_idx, &ini); if (rc) goto out_reject; smc_llc_save_add_link_info(lnk_new, llc); @@ -1076,20 +910,16 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) goto out_clear_lnk; rc = smc_llc_send_add_link(link, - lnk_new->smcibdev->mac[lnk_new->ibport - 1], + lnk_new->smcibdev->mac[ini.ib_port - 1], lnk_new->gid, lnk_new, SMC_LLC_RESP); if (rc) goto out_clear_lnk; - if (lgr->smc_version == SMC_V2) { - smc_llc_save_add_link_rkeys(link, lnk_new); - } else { - rc = smc_llc_cli_rkey_exchange(link, lnk_new); - if (rc) { - rc = 0; - goto out_clear_lnk; - } + rc = smc_llc_cli_rkey_exchange(link, lnk_new); + if (rc) { + rc = 0; + goto out_clear_lnk; } - rc = smc_llc_cli_conf_link(link, ini, lnk_new, lgr_new_t); + rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t); if (!rc) goto out; out_clear_lnk: @@ -1098,78 +928,29 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) out_reject: smc_llc_cli_add_link_reject(qentry); out: - kfree(ini); kfree(qentry); return rc; } -static void smc_llc_send_request_add_link(struct smc_link *link) -{ - struct smc_llc_msg_req_add_link_v2 *llc; - struct smc_wr_tx_pend_priv *pend; - struct smc_wr_v2_buf *wr_buf; - struct smc_gidlist gidlist; - int rc, len, i; - - if (!smc_wr_tx_link_hold(link)) - return; - if (link->lgr->type == SMC_LGR_SYMMETRIC || - link->lgr->type == SMC_LGR_ASYMMETRIC_PEER) - goto put_out; - - smc_fill_gid_list(link->lgr, &gidlist, link->smcibdev, link->gid); - if (gidlist.len <= 1) - goto put_out; - - rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend); - if (rc) - goto put_out; - llc = (struct smc_llc_msg_req_add_link_v2 *)wr_buf; - memset(llc, 0, SMC_WR_TX_SIZE); - - llc->hd.common.llc_type = SMC_LLC_REQ_ADD_LINK; - for (i = 0; i < gidlist.len; i++) - memcpy(llc->gid[i], gidlist.list[i], sizeof(gidlist.list[0])); - llc->gid_cnt = gidlist.len; - len = sizeof(*llc) + (gidlist.len * sizeof(gidlist.list[0])); - smc_llc_init_msg_hdr(&llc->hd, link->lgr, len); - rc = smc_wr_tx_v2_send(link, pend, len); - if (!rc) - /* set REQ_ADD_LINK flow and wait for response from peer */ - link->lgr->llc_flow_lcl.type = SMC_LLC_FLOW_REQ_ADD_LINK; -put_out: - smc_wr_tx_link_put(link); -} - /* as an SMC client, invite server to start the add_link processing */ static void smc_llc_cli_add_link_invite(struct smc_link *link, struct smc_llc_qentry *qentry) { struct smc_link_group *lgr = smc_get_lgr(link); - struct smc_init_info *ini = NULL; - - if (lgr->smc_version == SMC_V2) { - smc_llc_send_request_add_link(link); - goto out; - } + struct smc_init_info ini; if (lgr->type == SMC_LGR_SYMMETRIC || lgr->type == SMC_LGR_ASYMMETRIC_PEER) goto out; - ini = kzalloc(sizeof(*ini), GFP_KERNEL); - if (!ini) + ini.vlan_id = lgr->vlan_id; + smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); + if (!ini.ib_dev) goto out; - ini->vlan_id = lgr->vlan_id; - smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); - if (!ini->ib_dev) - goto out; - - smc_llc_send_add_link(link, ini->ib_dev->mac[ini->ib_port - 1], - ini->ib_gid, NULL, SMC_LLC_REQ); + smc_llc_send_add_link(link, ini.ib_dev->mac[ini.ib_port - 1], + ini.ib_gid, NULL, SMC_LLC_REQ); out: - kfree(ini); kfree(qentry); } @@ -1185,7 +966,7 @@ static bool smc_llc_is_empty_llc_message(union smc_llc_msg *llc) static bool smc_llc_is_local_add_link(union smc_llc_msg *llc) { - if (llc->raw.hdr.common.llc_type == SMC_LLC_ADD_LINK && + if (llc->raw.hdr.common.type == SMC_LLC_ADD_LINK && smc_llc_is_empty_llc_message(llc)) return true; return false; @@ -1352,7 +1133,7 @@ static int smc_llc_srv_conf_link(struct smc_link *link, /* receive CONFIRM LINK response over the RoCE fabric */ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0); if (!qentry || - qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) { + qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { /* send DELETE LINK */ smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, false, SMC_LLC_DEL_LOST_PATH); @@ -1371,80 +1152,37 @@ static int smc_llc_srv_conf_link(struct smc_link *link, return 0; } -static void smc_llc_send_req_add_link_response(struct smc_llc_qentry *qentry) -{ - qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; - smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr, - sizeof(qentry->msg)); - memset(&qentry->msg.raw.data, 0, sizeof(qentry->msg.raw.data)); - smc_llc_send_message(qentry->link, &qentry->msg); -} - -int smc_llc_srv_add_link(struct smc_link *link, - struct smc_llc_qentry *req_qentry) +int smc_llc_srv_add_link(struct smc_link *link) { enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; struct smc_link_group *lgr = link->lgr; struct smc_llc_msg_add_link *add_llc; struct smc_llc_qentry *qentry = NULL; - bool send_req_add_link_resp = false; - struct smc_link *link_new = NULL; - struct smc_init_info *ini = NULL; + struct smc_link *link_new; + struct smc_init_info ini; int lnk_idx, rc = 0; - if (req_qentry && - req_qentry->msg.raw.hdr.common.llc_type == SMC_LLC_REQ_ADD_LINK) - send_req_add_link_resp = true; - - ini = kzalloc(sizeof(*ini), GFP_KERNEL); - if (!ini) { - rc = -ENOMEM; - goto out; - } - /* ignore client add link recommendation, start new flow */ - ini->vlan_id = lgr->vlan_id; - if (lgr->smc_version == SMC_V2) { - ini->check_smcrv2 = true; - ini->smcrv2.saddr = lgr->saddr; - if (send_req_add_link_resp) { - struct smc_llc_msg_req_add_link_v2 *req_add = - &req_qentry->msg.req_add_link; - - ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]); - } - } - smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); - if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) { + ini.vlan_id = lgr->vlan_id; + smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); + if (!ini.ib_dev) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; - ini->smcrv2.ib_dev_v2 = link->smcibdev; - ini->smcrv2.ib_port_v2 = link->ibport; - } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) { - lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; - ini->ib_dev = link->smcibdev; - ini->ib_port = link->ibport; + ini.ib_dev = link->smcibdev; + ini.ib_port = link->ibport; } lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); - if (lnk_idx < 0) { - rc = 0; - goto out; - } + if (lnk_idx < 0) + return 0; - rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, ini); + rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, &ini); if (rc) - goto out; + return rc; link_new = &lgr->lnk[lnk_idx]; - - rc = smcr_buf_map_lgr(link_new); - if (rc) - goto out_err; - rc = smc_llc_send_add_link(link, - link_new->smcibdev->mac[link_new->ibport-1], + link_new->smcibdev->mac[ini.ib_port - 1], link_new->gid, link_new, SMC_LLC_REQ); if (rc) goto out_err; - send_req_add_link_resp = false; /* receive ADD LINK response over the RoCE fabric */ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK); if (!qentry) { @@ -1459,59 +1197,48 @@ int smc_llc_srv_add_link(struct smc_link *link, } if (lgr->type == SMC_LGR_SINGLE && (!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && - (lgr->smc_version == SMC_V2 || - !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN)))) { + !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN))) { lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; } smc_llc_save_add_link_info(link_new, add_llc); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); rc = smc_ib_ready_link(link_new); + if (rc) + goto out_err; + rc = smcr_buf_map_lgr(link_new); if (rc) goto out_err; rc = smcr_buf_reg_lgr(link_new); if (rc) goto out_err; - if (lgr->smc_version == SMC_V2) { - smc_llc_save_add_link_rkeys(link, link_new); - } else { - rc = smc_llc_srv_rkey_exchange(link, link_new); - if (rc) - goto out_err; - } + rc = smc_llc_srv_rkey_exchange(link, link_new); + if (rc) + goto out_err; rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t); if (rc) goto out_err; - kfree(ini); return 0; out_err: - if (link_new) { - link_new->state = SMC_LNK_INACTIVE; - smcr_link_clear(link_new, false); - } -out: - kfree(ini); - if (send_req_add_link_resp) - smc_llc_send_req_add_link_response(req_qentry); + link_new->state = SMC_LNK_INACTIVE; + smcr_link_clear(link_new, false); return rc; } static void smc_llc_process_srv_add_link(struct smc_link_group *lgr) { struct smc_link *link = lgr->llc_flow_lcl.qentry->link; - struct smc_llc_qentry *qentry; int rc; - qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); + smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); mutex_lock(&lgr->llc_conf_mutex); - rc = smc_llc_srv_add_link(link, qentry); + rc = smc_llc_srv_add_link(link); if (!rc && lgr->type == SMC_LGR_SYMMETRIC) { /* delete any asymmetric link */ smc_llc_delete_asym_link(lgr); } mutex_unlock(&lgr->llc_conf_mutex); - kfree(qentry); } /* enqueue a local add_link req to trigger a new add_link flow */ @@ -1519,8 +1246,8 @@ void smc_llc_add_link_local(struct smc_link *link) { struct smc_llc_msg_add_link add_llc = {}; - add_llc.hd.common.llc_type = SMC_LLC_ADD_LINK; - smc_llc_init_msg_hdr(&add_llc.hd, link->lgr, sizeof(add_llc)); + add_llc.hd.length = sizeof(add_llc); + add_llc.hd.common.type = SMC_LLC_ADD_LINK; /* no dev and port needed */ smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc); } @@ -1542,8 +1269,7 @@ static void smc_llc_add_link_work(struct work_struct *work) else smc_llc_process_srv_add_link(lgr); out: - if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_REQ_ADD_LINK) - smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); + smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); } /* enqueue a local del_link msg to trigger a new del_link flow, @@ -1553,8 +1279,8 @@ void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id) { struct smc_llc_msg_del_link del_llc = {}; - del_llc.hd.common.llc_type = SMC_LLC_DELETE_LINK; - smc_llc_init_msg_hdr(&del_llc.hd, link->lgr, sizeof(del_llc)); + del_llc.hd.length = sizeof(del_llc); + del_llc.hd.common.type = SMC_LLC_DELETE_LINK; del_llc.link_num = del_link_id; del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH); del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; @@ -1624,8 +1350,8 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) struct smc_llc_msg_del_link delllc = {}; int i; - delllc.hd.common.llc_type = SMC_LLC_DELETE_LINK; - smc_llc_init_msg_hdr(&delllc.hd, lgr, sizeof(delllc)); + delllc.hd.common.type = SMC_LLC_DELETE_LINK; + delllc.hd.length = sizeof(delllc); if (ord) delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; @@ -1741,8 +1467,6 @@ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr) link = qentry->link; num_entries = llc->rtoken[0].num_rkeys; - if (num_entries > SMC_LLC_RKEYS_PER_MSG) - goto out_err; /* first rkey entry is for receiving link */ rk_idx = smc_rtoken_add(link, llc->rtoken[0].rmb_vaddr, @@ -1761,7 +1485,6 @@ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr) llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY; out: llc->hd.flags |= SMC_LLC_FLAG_RESP; - smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc)); smc_llc_send_message(link, &qentry->msg); smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); } @@ -1779,28 +1502,6 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) llc = &qentry->msg.delete_rkey; link = qentry->link; - if (lgr->smc_version == SMC_V2) { - struct smc_llc_msg_delete_rkey_v2 *llcv2; - - memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc)); - llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2; - llcv2->num_inval_rkeys = 0; - - max = min_t(u8, llcv2->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); - for (i = 0; i < max; i++) { - if (smc_rtoken_delete(link, llcv2->rkey[i])) - llcv2->num_inval_rkeys++; - } - memset(&llc->rkey[0], 0, sizeof(llc->rkey)); - memset(&llc->reserved2, 0, sizeof(llc->reserved2)); - smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc)); - if (llcv2->num_inval_rkeys) { - llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; - llc->err_mask = llcv2->num_inval_rkeys; - } - goto finish; - } - max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); for (i = 0; i < max; i++) { if (smc_rtoken_delete(link, llc->rkey[i])) @@ -1810,7 +1511,6 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; llc->err_mask = err_mask; } -finish: llc->hd.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, &qentry->msg); smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); @@ -1818,9 +1518,8 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type) { - pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu LLC protocol violation: " - "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, - lgr->net->net_cookie, type); + pr_warn_ratelimited("smc: SMC-R lg %*phN LLC protocol violation: " + "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, type); smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL); smc_lgr_terminate_sched(lgr); } @@ -1847,7 +1546,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) if (!smc_link_usable(link)) goto out; - switch (llc->raw.hdr.common.llc_type) { + switch (llc->raw.hdr.common.type) { case SMC_LLC_TEST_LINK: llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, llc); @@ -1872,18 +1571,8 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); wake_up(&lgr->llc_msg_waiter); - return; - } - if (lgr->llc_flow_lcl.type == - SMC_LLC_FLOW_REQ_ADD_LINK) { - /* server started add_link processing */ - lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK; - smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, - qentry); - schedule_work(&lgr->llc_add_link_work); - return; - } - if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { + } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, + qentry)) { schedule_work(&lgr->llc_add_link_work); } } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { @@ -1931,23 +1620,6 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); } return; - case SMC_LLC_REQ_ADD_LINK: - /* handle response here, smc_llc_flow_stop() cannot be called - * in tasklet context - */ - if (lgr->role == SMC_CLNT && - lgr->llc_flow_lcl.type == SMC_LLC_FLOW_REQ_ADD_LINK && - (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP)) { - smc_llc_flow_stop(link->lgr, &lgr->llc_flow_lcl); - } else if (lgr->role == SMC_SERV) { - if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { - /* as smc server, handle client suggestion */ - lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK; - schedule_work(&lgr->llc_add_link_work); - } - return; - } - break; default: smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type); break; @@ -1991,7 +1663,7 @@ static void smc_llc_rx_response(struct smc_link *link, { enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type; struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl; - u8 llc_type = qentry->msg.raw.hdr.common.llc_type; + u8 llc_type = qentry->msg.raw.hdr.common.type; switch (llc_type) { case SMC_LLC_TEST_LINK: @@ -2017,8 +1689,7 @@ static void smc_llc_rx_response(struct smc_link *link, /* not used because max links is 3 */ break; default: - smc_llc_protocol_violation(link->lgr, - qentry->msg.raw.hdr.common.type); + smc_llc_protocol_violation(link->lgr, llc_type); break; } kfree(qentry); @@ -2043,8 +1714,7 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg)); /* process responses immediately */ - if ((llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) && - llc->raw.hdr.common.llc_type != SMC_LLC_REQ_ADD_LINK) { + if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) { smc_llc_rx_response(link, qentry); return; } @@ -2064,13 +1734,8 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) if (wc->byte_len < sizeof(*llc)) return; /* short message */ - if (!llc->raw.hdr.common.llc_version) { - if (llc->raw.hdr.length != sizeof(*llc)) - return; /* invalid message */ - } else { - if (llc->raw.hdr.length_v2 < sizeof(*llc)) - return; /* invalid message */ - } + if (llc->raw.hdr.length != sizeof(*llc)) + return; /* invalid message */ smc_llc_enqueue(link, llc); } @@ -2149,10 +1814,9 @@ int smc_llc_link_init(struct smc_link *link) void smc_llc_link_active(struct smc_link *link) { - pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu link added: id %*phN, " + pr_warn_ratelimited("smc: SMC-R lg %*phN link added: id %*phN, " "peerid %*phN, ibdev %s, ibport %d\n", SMC_LGR_ID_SIZE, &link->lgr->id, - link->lgr->net->net_cookie, SMC_LGR_ID_SIZE, &link->link_uid, SMC_LGR_ID_SIZE, &link->peer_link_uid, link->smcibdev->ibdev->name, link->ibport); @@ -2168,10 +1832,9 @@ void smc_llc_link_active(struct smc_link *link) void smc_llc_link_clear(struct smc_link *link, bool log) { if (log) - pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu link removed: id %*phN" + pr_warn_ratelimited("smc: SMC-R lg %*phN link removed: id %*phN" ", peerid %*phN, ibdev %s, ibport %d\n", SMC_LGR_ID_SIZE, &link->lgr->id, - link->lgr->net->net_cookie, SMC_LGR_ID_SIZE, &link->link_uid, SMC_LGR_ID_SIZE, &link->peer_link_uid, link->smcibdev->ibdev->name, link->ibport); @@ -2291,35 +1954,6 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { .handler = smc_llc_rx_handler, .type = SMC_LLC_DELETE_RKEY }, - /* V2 types */ - { - .handler = smc_llc_rx_handler, - .type = SMC_LLC_CONFIRM_LINK_V2 - }, - { - .handler = smc_llc_rx_handler, - .type = SMC_LLC_TEST_LINK_V2 - }, - { - .handler = smc_llc_rx_handler, - .type = SMC_LLC_ADD_LINK_V2 - }, - { - .handler = smc_llc_rx_handler, - .type = SMC_LLC_DELETE_LINK_V2 - }, - { - .handler = smc_llc_rx_handler, - .type = SMC_LLC_REQ_ADD_LINK_V2 - }, - { - .handler = smc_llc_rx_handler, - .type = SMC_LLC_CONFIRM_RKEY_V2 - }, - { - .handler = smc_llc_rx_handler, - .type = SMC_LLC_DELETE_RKEY_V2 - }, { .handler = NULL, } diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 4404e52b33..cc00a2ec4e 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -30,19 +30,10 @@ enum smc_llc_msg_type { SMC_LLC_ADD_LINK = 0x02, SMC_LLC_ADD_LINK_CONT = 0x03, SMC_LLC_DELETE_LINK = 0x04, - SMC_LLC_REQ_ADD_LINK = 0x05, SMC_LLC_CONFIRM_RKEY = 0x06, SMC_LLC_TEST_LINK = 0x07, SMC_LLC_CONFIRM_RKEY_CONT = 0x08, SMC_LLC_DELETE_RKEY = 0x09, - /* V2 types */ - SMC_LLC_CONFIRM_LINK_V2 = 0x21, - SMC_LLC_ADD_LINK_V2 = 0x22, - SMC_LLC_DELETE_LINK_V2 = 0x24, - SMC_LLC_REQ_ADD_LINK_V2 = 0x25, - SMC_LLC_CONFIRM_RKEY_V2 = 0x26, - SMC_LLC_TEST_LINK_V2 = 0x27, - SMC_LLC_DELETE_RKEY_V2 = 0x29, }; #define smc_link_downing(state) \ @@ -111,8 +102,7 @@ void smc_llc_flow_qentry_del(struct smc_llc_flow *flow); void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn); int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry); -int smc_llc_srv_add_link(struct smc_link *link, - struct smc_llc_qentry *req_qentry); +int smc_llc_srv_add_link(struct smc_link *link); void smc_llc_add_link_local(struct smc_link *link); int smc_llc_init(void) __init; diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c index f13ab0661e..6fb6f96c1d 100644 --- a/net/smc/smc_netlink.c +++ b/net/smc/smc_netlink.c @@ -19,19 +19,11 @@ #include "smc_core.h" #include "smc_ism.h" #include "smc_ib.h" -#include "smc_clc.h" #include "smc_stats.h" #include "smc_netlink.h" -const struct nla_policy -smc_gen_ueid_policy[SMC_NLA_EID_TABLE_MAX + 1] = { - [SMC_NLA_EID_TABLE_UNSPEC] = { .type = NLA_UNSPEC }, - [SMC_NLA_EID_TABLE_ENTRY] = { .type = NLA_STRING, - .len = SMC_MAX_EID_LEN, - }, -}; - #define SMC_CMD_MAX_ATTR 1 + /* SMC_GENL generic netlink operation definition */ static const struct genl_ops smc_gen_nl_ops[] = { { @@ -74,43 +66,6 @@ static const struct genl_ops smc_gen_nl_ops[] = { /* can be retrieved by unprivileged users */ .dumpit = smc_nl_get_fback_stats, }, - { - .cmd = SMC_NETLINK_DUMP_UEID, - /* can be retrieved by unprivileged users */ - .dumpit = smc_nl_dump_ueid, - }, - { - .cmd = SMC_NETLINK_ADD_UEID, - .flags = GENL_ADMIN_PERM, - .doit = smc_nl_add_ueid, - .policy = smc_gen_ueid_policy, - }, - { - .cmd = SMC_NETLINK_REMOVE_UEID, - .flags = GENL_ADMIN_PERM, - .doit = smc_nl_remove_ueid, - .policy = smc_gen_ueid_policy, - }, - { - .cmd = SMC_NETLINK_FLUSH_UEID, - .flags = GENL_ADMIN_PERM, - .doit = smc_nl_flush_ueid, - }, - { - .cmd = SMC_NETLINK_DUMP_SEID, - /* can be retrieved by unprivileged users */ - .dumpit = smc_nl_dump_seid, - }, - { - .cmd = SMC_NETLINK_ENABLE_SEID, - .flags = GENL_ADMIN_PERM, - .doit = smc_nl_enable_seid, - }, - { - .cmd = SMC_NETLINK_DISABLE_SEID, - .flags = GENL_ADMIN_PERM, - .doit = smc_nl_disable_seid, - }, }; static const struct nla_policy smc_gen_nl_policy[2] = { diff --git a/net/smc/smc_netlink.h b/net/smc/smc_netlink.h index e8c6c3f0e9..5ce2c0a89c 100644 --- a/net/smc/smc_netlink.h +++ b/net/smc/smc_netlink.h @@ -17,8 +17,6 @@ extern struct genl_family smc_gen_nl_family; -extern const struct nla_policy smc_gen_ueid_policy[]; - struct smc_nl_dmp_ctx { int pos[3]; }; diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 29f0a559d8..707615809e 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -64,7 +64,6 @@ struct smc_pnetentry { struct { char eth_name[IFNAMSIZ + 1]; struct net_device *ndev; - netdevice_tracker dev_tracker; }; struct { char ib_name[IB_DEVICE_NAME_MAX + 1]; @@ -120,7 +119,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) smc_pnet_match(pnetelem->pnet_name, pnet_name)) { list_del(&pnetelem->list); if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) { - dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); + dev_put(pnetelem->ndev); pr_warn_ratelimited("smc: net device %s " "erased user defined " "pnetid %.16s\n", @@ -196,7 +195,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { - dev_hold_track(ndev, &pnetelem->dev_tracker, GFP_ATOMIC); + dev_hold(ndev); pnetelem->ndev = ndev; rc = 0; pr_warn_ratelimited("smc: adding net device %s with " @@ -227,7 +226,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { - dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); + dev_put(pnetelem->ndev); pnetelem->ndev = NULL; rc = 0; pr_warn_ratelimited("smc: removing net device %s with " @@ -368,6 +367,8 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, new_pe->type = SMC_PNET_ETH; memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); + new_pe->ndev = ndev; + rc = -EEXIST; new_netdev = true; mutex_lock(&pnettable->lock); @@ -379,11 +380,6 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, } } if (new_netdev) { - if (ndev) { - new_pe->ndev = ndev; - netdev_tracker_alloc(ndev, &new_pe->dev_tracker, - GFP_ATOMIC); - } list_add_tail(&new_pe->list, &pnettable->pnetlist); mutex_unlock(&pnettable->lock); } else { @@ -957,48 +953,30 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, return rc; } -static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i, - struct smc_init_info *ini) -{ - if (!ini->check_smcrv2 && - !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL, - NULL)) { - ini->ib_dev = ibdev; - ini->ib_port = i; - return 0; - } - if (ini->check_smcrv2 && - !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->smcrv2.ib_gid_v2, - NULL, &ini->smcrv2)) { - ini->smcrv2.ib_dev_v2 = ibdev; - ini->smcrv2.ib_port_v2 = i; - return 0; - } - return -ENODEV; -} - /* find a roce device for the given pnetid */ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, struct smc_init_info *ini, - struct smc_ib_device *known_dev, - struct net *net) + struct smc_ib_device *known_dev) { struct smc_ib_device *ibdev; int i; + ini->ib_dev = NULL; mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { - if (ibdev == known_dev || - !rdma_dev_access_netns(ibdev->ibdev, net)) + if (ibdev == known_dev) continue; for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) && smc_ib_port_active(ibdev, i) && - !test_bit(i - 1, ibdev->ports_going_away)) { - if (!smc_pnet_determine_gid(ibdev, i, ini)) - goto out; + !test_bit(i - 1, ibdev->ports_going_away) && + !smc_ib_determine_gid(ibdev, i, ini->vlan_id, + ini->ib_gid, NULL)) { + ini->ib_dev = ibdev; + ini->ib_port = i; + goto out; } } } @@ -1006,14 +984,12 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, mutex_unlock(&smc_ib_devices.mutex); } -/* find alternate roce device with same pnet_id, vlan_id and net namespace */ +/* find alternate roce device with same pnet_id and vlan_id */ void smc_pnet_find_alt_roce(struct smc_link_group *lgr, struct smc_init_info *ini, struct smc_ib_device *known_dev) { - struct net *net = lgr->net; - - _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev, net); + _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev); } /* if handshake network device belongs to a roce device, return its @@ -1022,7 +998,6 @@ void smc_pnet_find_alt_roce(struct smc_link_group *lgr, static void smc_pnet_find_rdma_dev(struct net_device *netdev, struct smc_init_info *ini) { - struct net *net = dev_net(netdev); struct smc_ib_device *ibdev; mutex_lock(&smc_ib_devices.mutex); @@ -1030,10 +1005,6 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, struct net_device *ndev; int i; - /* check rdma net namespace */ - if (!rdma_dev_access_netns(ibdev->ibdev, net)) - continue; - for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; @@ -1045,9 +1016,12 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, dev_put(ndev); if (netdev == ndev && smc_ib_port_active(ibdev, i) && - !test_bit(i - 1, ibdev->ports_going_away)) { - if (!smc_pnet_determine_gid(ibdev, i, ini)) - break; + !test_bit(i - 1, ibdev->ports_going_away) && + !smc_ib_determine_gid(ibdev, i, ini->vlan_id, + ini->ib_gid, NULL)) { + ini->ib_dev = ibdev; + ini->ib_port = i; + break; } } } @@ -1064,17 +1038,15 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, struct smc_init_info *ini) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; - struct net *net; ndev = pnet_find_base_ndev(ndev); - net = dev_net(ndev); if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, ndev_pnetid) && smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { smc_pnet_find_rdma_dev(ndev, ini); return; /* pnetid could not be determined */ } - _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL, net); + _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL); } static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, @@ -1111,6 +1083,8 @@ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(sk); + ini->ib_dev = NULL; + ini->ib_port = 0; if (!dst) goto out; if (!dst->dev) diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 51e8eb2933..170b733bc7 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -22,7 +22,6 @@ #include "smc_tx.h" /* smc_tx_consumer_update() */ #include "smc_rx.h" #include "smc_stats.h" -#include "smc_tracepoint.h" /* callback implementation to wakeup consumers blocked with smc_rx_wait(). * indirectly called by smc_cdc_msg_recv_action(). @@ -439,8 +438,6 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, if (msg && smc_rx_update_consumer(smc, cons, copylen)) goto out; } - - trace_smc_rx_recvmsg(smc, copylen); } while (read_remaining); out: return read_done; diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index be241d5302..738a4a99c8 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -28,7 +28,6 @@ #include "smc_ism.h" #include "smc_tx.h" #include "smc_stats.h" -#include "smc_tracepoint.h" #define SMC_TX_WORK_DELAY 0 #define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */ @@ -246,8 +245,6 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) SMC_TX_CORK_DELAY); else smc_tx_sndbuf_nonempty(conn); - - trace_smc_tx_sendmsg(smc, copylen); } /* while (msg_data_left(msg)) */ return send_done; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 24be1d03fe..59ca1a2d5c 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -54,7 +54,11 @@ struct smc_wr_tx_pend { /* control data for a pending send request */ /* returns true if at least one tx work request is pending on the given link */ static inline bool smc_wr_is_tx_pend(struct smc_link *link) { - return !bitmap_empty(link->wr_tx_mask, link->wr_tx_cnt); + if (find_first_bit(link->wr_tx_mask, link->wr_tx_cnt) != + link->wr_tx_cnt) { + return true; + } + return false; } /* wait till all pending tx work requests on the given link are completed */ @@ -92,39 +96,20 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) } pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id); - if (pnd_snd_idx == link->wr_tx_cnt) { - if (link->lgr->smc_version != SMC_V2 || - link->wr_tx_v2_pend->wr_id != wc->wr_id) - return; - link->wr_tx_v2_pend->wc_status = wc->status; - memcpy(&pnd_snd, link->wr_tx_v2_pend, sizeof(pnd_snd)); - /* clear the full struct smc_wr_tx_pend including .priv */ - memset(link->wr_tx_v2_pend, 0, - sizeof(*link->wr_tx_v2_pend)); - memset(link->lgr->wr_tx_buf_v2, 0, - sizeof(*link->lgr->wr_tx_buf_v2)); - } else { - link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status; - if (link->wr_tx_pends[pnd_snd_idx].compl_requested) - complete(&link->wr_tx_compl[pnd_snd_idx]); - memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], - sizeof(pnd_snd)); - /* clear the full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[pnd_snd_idx], 0, - sizeof(link->wr_tx_pends[pnd_snd_idx])); - memset(&link->wr_tx_bufs[pnd_snd_idx], 0, - sizeof(link->wr_tx_bufs[pnd_snd_idx])); - if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) - return; - } - + if (pnd_snd_idx == link->wr_tx_cnt) + return; + link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status; + if (link->wr_tx_pends[pnd_snd_idx].compl_requested) + complete(&link->wr_tx_compl[pnd_snd_idx]); + memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd)); + /* clear the full struct smc_wr_tx_pend including .priv */ + memset(&link->wr_tx_pends[pnd_snd_idx], 0, + sizeof(link->wr_tx_pends[pnd_snd_idx])); + memset(&link->wr_tx_bufs[pnd_snd_idx], 0, + sizeof(link->wr_tx_bufs[pnd_snd_idx])); + if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) + return; if (wc->status) { - if (link->lgr->smc_version == SMC_V2) { - memset(link->wr_tx_v2_pend, 0, - sizeof(*link->wr_tx_v2_pend)); - memset(link->lgr->wr_tx_buf_v2, 0, - sizeof(*link->lgr->wr_tx_buf_v2)); - } /* terminate link */ smcr_link_down_cond_sched(link); } @@ -241,33 +226,6 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, return 0; } -int smc_wr_tx_get_v2_slot(struct smc_link *link, - smc_wr_tx_handler handler, - struct smc_wr_v2_buf **wr_buf, - struct smc_wr_tx_pend_priv **wr_pend_priv) -{ - struct smc_wr_tx_pend *wr_pend; - struct ib_send_wr *wr_ib; - u64 wr_id; - - if (link->wr_tx_v2_pend->idx == link->wr_tx_cnt) - return -EBUSY; - - *wr_buf = NULL; - *wr_pend_priv = NULL; - wr_id = smc_wr_tx_get_next_wr_id(link); - wr_pend = link->wr_tx_v2_pend; - wr_pend->wr_id = wr_id; - wr_pend->handler = handler; - wr_pend->link = link; - wr_pend->idx = link->wr_tx_cnt; - wr_ib = link->wr_tx_v2_ib; - wr_ib->wr_id = wr_id; - *wr_buf = link->lgr->wr_tx_buf_v2; - *wr_pend_priv = &wr_pend->priv; - return 0; -} - int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv) { @@ -285,14 +243,6 @@ int smc_wr_tx_put_slot(struct smc_link *link, test_and_clear_bit(idx, link->wr_tx_mask); wake_up(&link->wr_tx_wait); return 1; - } else if (link->lgr->smc_version == SMC_V2 && - pend->idx == link->wr_tx_cnt) { - /* Large v2 buffer */ - memset(&link->wr_tx_v2_pend, 0, - sizeof(link->wr_tx_v2_pend)); - memset(&link->lgr->wr_tx_buf_v2, 0, - sizeof(link->lgr->wr_tx_buf_v2)); - return 1; } return 0; @@ -317,22 +267,6 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) return rc; } -int smc_wr_tx_v2_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, - int len) -{ - int rc; - - link->wr_tx_v2_ib->sg_list[0].length = len; - ib_req_notify_cq(link->smcibdev->roce_cq_send, - IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); - rc = ib_post_send(link->roce_qp, link->wr_tx_v2_ib, NULL); - if (rc) { - smc_wr_tx_put_slot(link, priv); - smcr_link_down_cond_sched(link); - } - return rc; -} - /* Send prepared WR slot via ib_post_send and wait for send completion * notification. * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer @@ -553,7 +487,6 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk) static void smc_wr_init_sge(struct smc_link *lnk) { - int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; u32 i; for (i = 0; i < lnk->wr_tx_cnt; i++) { @@ -582,44 +515,14 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list = lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge; } - - if (lnk->lgr->smc_version == SMC_V2) { - lnk->wr_tx_v2_sge->addr = lnk->wr_tx_v2_dma_addr; - lnk->wr_tx_v2_sge->length = SMC_WR_BUF_V2_SIZE; - lnk->wr_tx_v2_sge->lkey = lnk->roce_pd->local_dma_lkey; - - lnk->wr_tx_v2_ib->next = NULL; - lnk->wr_tx_v2_ib->sg_list = lnk->wr_tx_v2_sge; - lnk->wr_tx_v2_ib->num_sge = 1; - lnk->wr_tx_v2_ib->opcode = IB_WR_SEND; - lnk->wr_tx_v2_ib->send_flags = - IB_SEND_SIGNALED | IB_SEND_SOLICITED; - } - - /* With SMC-Rv2 there can be messages larger than SMC_WR_TX_SIZE. - * Each ib_recv_wr gets 2 sges, the second one is a spillover buffer - * and the same buffer for all sges. When a larger message arrived then - * the content of the first small sge is copied to the beginning of - * the larger spillover buffer, allowing easy data mapping. - */ for (i = 0; i < lnk->wr_rx_cnt; i++) { - int x = i * sges_per_buf; - - lnk->wr_rx_sges[x].addr = + lnk->wr_rx_sges[i].addr = lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE; - lnk->wr_rx_sges[x].length = SMC_WR_TX_SIZE; - lnk->wr_rx_sges[x].lkey = lnk->roce_pd->local_dma_lkey; - if (lnk->lgr->smc_version == SMC_V2) { - lnk->wr_rx_sges[x + 1].addr = - lnk->wr_rx_v2_dma_addr + SMC_WR_TX_SIZE; - lnk->wr_rx_sges[x + 1].length = - SMC_WR_BUF_V2_SIZE - SMC_WR_TX_SIZE; - lnk->wr_rx_sges[x + 1].lkey = - lnk->roce_pd->local_dma_lkey; - } + lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE; + lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; lnk->wr_rx_ibs[i].next = NULL; - lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[x]; - lnk->wr_rx_ibs[i].num_sge = sges_per_buf; + lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i]; + lnk->wr_rx_ibs[i].num_sge = 1; } lnk->wr_reg.wr.next = NULL; lnk->wr_reg.wr.num_sge = 0; @@ -649,50 +552,21 @@ void smc_wr_free_link(struct smc_link *lnk) DMA_FROM_DEVICE); lnk->wr_rx_dma_addr = 0; } - if (lnk->wr_rx_v2_dma_addr) { - ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr, - SMC_WR_BUF_V2_SIZE, - DMA_FROM_DEVICE); - lnk->wr_rx_v2_dma_addr = 0; - } if (lnk->wr_tx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, DMA_TO_DEVICE); lnk->wr_tx_dma_addr = 0; } - if (lnk->wr_tx_v2_dma_addr) { - ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr, - SMC_WR_BUF_V2_SIZE, - DMA_TO_DEVICE); - lnk->wr_tx_v2_dma_addr = 0; - } -} - -void smc_wr_free_lgr_mem(struct smc_link_group *lgr) -{ - if (lgr->smc_version < SMC_V2) - return; - - kfree(lgr->wr_rx_buf_v2); - lgr->wr_rx_buf_v2 = NULL; - kfree(lgr->wr_tx_buf_v2); - lgr->wr_tx_buf_v2 = NULL; } void smc_wr_free_link_mem(struct smc_link *lnk) { - kfree(lnk->wr_tx_v2_ib); - lnk->wr_tx_v2_ib = NULL; - kfree(lnk->wr_tx_v2_sge); - lnk->wr_tx_v2_sge = NULL; - kfree(lnk->wr_tx_v2_pend); - lnk->wr_tx_v2_pend = NULL; kfree(lnk->wr_tx_compl); lnk->wr_tx_compl = NULL; kfree(lnk->wr_tx_pends); lnk->wr_tx_pends = NULL; - bitmap_free(lnk->wr_tx_mask); + kfree(lnk->wr_tx_mask); lnk->wr_tx_mask = NULL; kfree(lnk->wr_tx_sges); lnk->wr_tx_sges = NULL; @@ -712,26 +586,8 @@ void smc_wr_free_link_mem(struct smc_link *lnk) lnk->wr_rx_bufs = NULL; } -int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr) -{ - if (lgr->smc_version < SMC_V2) - return 0; - - lgr->wr_rx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL); - if (!lgr->wr_rx_buf_v2) - return -ENOMEM; - lgr->wr_tx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL); - if (!lgr->wr_tx_buf_v2) { - kfree(lgr->wr_rx_buf_v2); - return -ENOMEM; - } - return 0; -} - int smc_wr_alloc_link_mem(struct smc_link *link) { - int sges_per_buf = link->lgr->smc_version == SMC_V2 ? 2 : 1; - /* allocate link related memory */ link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); if (!link->wr_tx_bufs) @@ -764,11 +620,13 @@ int smc_wr_alloc_link_mem(struct smc_link *link) if (!link->wr_tx_sges) goto no_mem_wr_tx_rdma_sges; link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, - sizeof(link->wr_rx_sges[0]) * sges_per_buf, + sizeof(link->wr_rx_sges[0]), GFP_KERNEL); if (!link->wr_rx_sges) goto no_mem_wr_tx_sges; - link->wr_tx_mask = bitmap_zalloc(SMC_WR_BUF_CNT, GFP_KERNEL); + link->wr_tx_mask = kcalloc(BITS_TO_LONGS(SMC_WR_BUF_CNT), + sizeof(*link->wr_tx_mask), + GFP_KERNEL); if (!link->wr_tx_mask) goto no_mem_wr_rx_sges; link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT, @@ -781,29 +639,8 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_tx_compl) goto no_mem_wr_tx_pends; - - if (link->lgr->smc_version == SMC_V2) { - link->wr_tx_v2_ib = kzalloc(sizeof(*link->wr_tx_v2_ib), - GFP_KERNEL); - if (!link->wr_tx_v2_ib) - goto no_mem_tx_compl; - link->wr_tx_v2_sge = kzalloc(sizeof(*link->wr_tx_v2_sge), - GFP_KERNEL); - if (!link->wr_tx_v2_sge) - goto no_mem_v2_ib; - link->wr_tx_v2_pend = kzalloc(sizeof(*link->wr_tx_v2_pend), - GFP_KERNEL); - if (!link->wr_tx_v2_pend) - goto no_mem_v2_sge; - } return 0; -no_mem_v2_sge: - kfree(link->wr_tx_v2_sge); -no_mem_v2_ib: - kfree(link->wr_tx_v2_ib); -no_mem_tx_compl: - kfree(link->wr_tx_compl); no_mem_wr_tx_pends: kfree(link->wr_tx_pends); no_mem_wr_tx_mask: @@ -855,24 +692,6 @@ int smc_wr_create_link(struct smc_link *lnk) rc = -EIO; goto out; } - if (lnk->lgr->smc_version == SMC_V2) { - lnk->wr_rx_v2_dma_addr = ib_dma_map_single(ibdev, - lnk->lgr->wr_rx_buf_v2, SMC_WR_BUF_V2_SIZE, - DMA_FROM_DEVICE); - if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) { - lnk->wr_rx_v2_dma_addr = 0; - rc = -EIO; - goto dma_unmap; - } - lnk->wr_tx_v2_dma_addr = ib_dma_map_single(ibdev, - lnk->lgr->wr_tx_buf_v2, SMC_WR_BUF_V2_SIZE, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(ibdev, lnk->wr_tx_v2_dma_addr)) { - lnk->wr_tx_v2_dma_addr = 0; - rc = -EIO; - goto dma_unmap; - } - } lnk->wr_tx_dma_addr = ib_dma_map_single( ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, DMA_TO_DEVICE); @@ -881,7 +700,8 @@ int smc_wr_create_link(struct smc_link *lnk) goto dma_unmap; } smc_wr_init_sge(lnk); - bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT); + memset(lnk->wr_tx_mask, 0, + BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); init_waitqueue_head(&lnk->wr_tx_wait); atomic_set(&lnk->wr_tx_refcnt, 0); init_waitqueue_head(&lnk->wr_reg_wait); @@ -889,18 +709,6 @@ int smc_wr_create_link(struct smc_link *lnk) return rc; dma_unmap: - if (lnk->wr_rx_v2_dma_addr) { - ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr, - SMC_WR_BUF_V2_SIZE, - DMA_FROM_DEVICE); - lnk->wr_rx_v2_dma_addr = 0; - } - if (lnk->wr_tx_v2_dma_addr) { - ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr, - SMC_WR_BUF_V2_SIZE, - DMA_TO_DEVICE); - lnk->wr_tx_v2_dma_addr = 0; - } ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, DMA_FROM_DEVICE); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index a54e90a111..cb58e60078 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -100,10 +100,8 @@ static inline int smc_wr_rx_post(struct smc_link *link) int smc_wr_create_link(struct smc_link *lnk); int smc_wr_alloc_link_mem(struct smc_link *lnk); -int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr); void smc_wr_free_link(struct smc_link *lnk); void smc_wr_free_link_mem(struct smc_link *lnk); -void smc_wr_free_lgr_mem(struct smc_link_group *lgr); void smc_wr_remember_qp_attr(struct smc_link *lnk); void smc_wr_remove_dev(struct smc_ib_device *smcibdev); void smc_wr_add_dev(struct smc_ib_device *smcibdev); @@ -112,19 +110,17 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wrs, struct smc_wr_tx_pend_priv **wr_pend_priv); -int smc_wr_tx_get_v2_slot(struct smc_link *link, - smc_wr_tx_handler handler, - struct smc_wr_v2_buf **wr_buf, - struct smc_wr_tx_pend_priv **wr_pend_priv); int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); -int smc_wr_tx_v2_send(struct smc_link *link, - struct smc_wr_tx_pend_priv *priv, int len); int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); +void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, + smc_wr_tx_filter filter, + smc_wr_tx_dismisser dismisser, + unsigned long data); void smc_wr_tx_wait_no_pending_sends(struct smc_link *link); int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); diff --git a/net/socket.c b/net/socket.c index 982eecad46..5053eb0100 100644 --- a/net/socket.c +++ b/net/socket.c @@ -52,7 +52,6 @@ * Based upon Swansea University Computer Society NET3.039 */ -#include #include #include #include @@ -1950,7 +1949,7 @@ int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, err = sock->ops->getname(sock, (struct sockaddr *)&address, 0); if (err < 0) goto out_put; - /* "err" is actually length in this case */ + /* "err" is actually length in this case */ err = move_addr_to_user(&address, err, usockaddr, usockaddr_len); out_put: @@ -3237,6 +3236,21 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, return dev_ioctl(net, cmd, &ifreq, data, NULL); } +/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE + * for some operations; this forces use of the newer bridge-utils that + * use compatible ioctls + */ +static int old_bridge_ioctl(compat_ulong_t __user *argp) +{ + compat_ulong_t tmp; + + if (get_user(tmp, argp)) + return -EFAULT; + if (tmp == BRCTL_GET_VERSION) + return BRCTL_VERSION + 1; + return -EINVAL; +} + static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, unsigned int cmd, unsigned long arg) { @@ -3248,6 +3262,9 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, return sock_ioctl(file, cmd, (unsigned long)argp); switch (cmd) { + case SIOCSIFBR: + case SIOCGIFBR: + return old_bridge_ioctl(argp); case SIOCWANDEV: return compat_siocwandev(net, argp); case SIOCGSTAMP_OLD: @@ -3276,8 +3293,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGSTAMP_NEW: case SIOCGSTAMPNS_NEW: case SIOCGIFCONF: - case SIOCSIFBR: - case SIOCGIFBR: return sock_ioctl(file, cmd, arg); case SIOCGIFFLAGS: @@ -3448,7 +3463,7 @@ EXPORT_SYMBOL(kernel_connect); * @addr: address holder * * Fills the @addr pointer with the address which the socket is bound. - * Returns the length of the address in bytes or an error code. + * Returns 0 or an error code. */ int kernel_getsockname(struct socket *sock, struct sockaddr *addr) @@ -3463,7 +3478,7 @@ EXPORT_SYMBOL(kernel_getsockname); * @addr: address holder * * Fills the @addr pointer with the address which the socket is connected. - * Returns the length of the address in bytes or an error code. + * Returns 0 or an error code. */ int kernel_getpeername(struct socket *sock, struct sockaddr *addr) diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index 4a4082bb22..fe97f31065 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -222,8 +222,10 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size, if (ret) return ret; - *buf_in = buf; - *body_size = toksize; + if (!ret) { + *buf_in = buf; + *body_size = toksize; + } return ret; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index c2ba9d4cd2..1f28171955 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -781,7 +781,7 @@ gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS); xdr_seq = kmalloc(4, GFP_KERNEL); if (!xdr_seq) - return -ENOMEM; + return -1; *xdr_seq = htonl(seq); iov.iov_base = xdr_seq; @@ -1433,7 +1433,7 @@ static bool use_gss_proxy(struct net *net) static ssize_t write_gssp(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct net *net = pde_data(file_inode(file)); + struct net *net = PDE_DATA(file_inode(file)); char tbuf[20]; unsigned long i; int res; @@ -1461,7 +1461,7 @@ static ssize_t write_gssp(struct file *file, const char __user *buf, static ssize_t read_gssp(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct net *net = pde_data(file_inode(file)); + struct net *net = PDE_DATA(file_inode(file)); struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); unsigned long p = *ppos; char tbuf[10]; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c83fe61876..5da1d7e846 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1076,21 +1076,24 @@ void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) static void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) { - rpc_task_set_transport(task, clnt); - task->tk_client = clnt; - refcount_inc(&clnt->cl_count); - if (clnt->cl_softrtry) - task->tk_flags |= RPC_TASK_SOFT; - if (clnt->cl_softerr) - task->tk_flags |= RPC_TASK_TIMEOUT; - if (clnt->cl_noretranstimeo) - task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - if (atomic_read(&clnt->cl_swapper)) - task->tk_flags |= RPC_TASK_SWAPPER; - /* Add to the client's list of all tasks */ - spin_lock(&clnt->cl_lock); - list_add_tail(&task->tk_task, &clnt->cl_tasks); - spin_unlock(&clnt->cl_lock); + + if (clnt != NULL) { + rpc_task_set_transport(task, clnt); + task->tk_client = clnt; + refcount_inc(&clnt->cl_count); + if (clnt->cl_softrtry) + task->tk_flags |= RPC_TASK_SOFT; + if (clnt->cl_softerr) + task->tk_flags |= RPC_TASK_TIMEOUT; + if (clnt->cl_noretranstimeo) + task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; + if (atomic_read(&clnt->cl_swapper)) + task->tk_flags |= RPC_TASK_SWAPPER; + /* Add to the client's list of all tasks */ + spin_lock(&clnt->cl_lock); + list_add_tail(&task->tk_task, &clnt->cl_tasks); + spin_unlock(&clnt->cl_lock); + } } static void diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index e2c8354827..c045f63d11 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -277,17 +277,9 @@ static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) static void rpc_task_set_debuginfo(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; + static atomic_t rpc_pid; - /* Might be a task carrying a reverse-direction operation */ - if (!clnt) { - static atomic_t rpc_pid; - - task->tk_pid = atomic_inc_return(&rpc_pid); - return; - } - - task->tk_pid = atomic_inc_return(&clnt->cl_pid); + task->tk_pid = atomic_inc_return(&rpc_pid); } #else static inline void rpc_task_set_debuginfo(struct rpc_task *task) @@ -837,7 +829,6 @@ void rpc_exit_task(struct rpc_task *task) else if (task->tk_client) rpc_count_iostats(task, task->tk_client->cl_metrics); if (task->tk_ops->rpc_call_done != NULL) { - trace_rpc_task_call_done(task, task->tk_ops->rpc_call_done); task->tk_ops->rpc_call_done(task, task->tk_calldata); if (task->tk_action != NULL) { /* Always release the RPC slot and buffer memory */ @@ -912,10 +903,8 @@ static void __rpc_execute(struct rpc_task *task) /* * Lockless check for whether task is sleeping or not. */ - if (!RPC_IS_QUEUED(task)) { - cond_resched(); + if (!RPC_IS_QUEUED(task)) continue; - } /* * Signalled tasks should exit rather than sleep. @@ -1241,7 +1230,8 @@ static int rpciod_start(void) if (!wq) goto out_failed; rpciod_workqueue = wq; - wq = alloc_workqueue("xprtiod", WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + /* Note: highpri because network receive is latency sensitive */ + wq = alloc_workqueue("xprtiod", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_HIGHPRI, 0); if (!wq) goto free_rpciod; xprtiod_workqueue = wq; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2aabec2b4b..08ca797bb8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -37,37 +37,18 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net); -#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL +#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) -/* - * Mode for mapping cpus to pools. - */ -enum { - SVC_POOL_AUTO = -1, /* choose one of the others */ - SVC_POOL_GLOBAL, /* no mapping, just a single global pool - * (legacy & UP mode) */ - SVC_POOL_PERCPU, /* one pool per cpu */ - SVC_POOL_PERNODE /* one pool per numa node */ -}; +#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL /* * Structure for mapping cpus to pools and vice versa. * Setup once during sunrpc initialisation. */ - -struct svc_pool_map { - int count; /* How many svc_servs use us */ - int mode; /* Note: int not enum to avoid - * warnings about "enumeration value - * not handled in switch" */ - unsigned int npools; - unsigned int *pool_to; /* maps pool id to cpu or node */ - unsigned int *to_pool; /* maps cpu or node to pool id */ -}; - -static struct svc_pool_map svc_pool_map = { +struct svc_pool_map svc_pool_map = { .mode = SVC_POOL_DEFAULT }; +EXPORT_SYMBOL_GPL(svc_pool_map); static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ @@ -238,12 +219,10 @@ svc_pool_map_init_pernode(struct svc_pool_map *m) /* * Add a reference to the global map of cpus to pools (and - * vice versa) if pools are in use. - * Initialise the map if we're the first user. - * Returns the number of pools. If this is '1', no reference - * was taken. + * vice versa). Initialise the map if we're the first user. + * Returns the number of pools. */ -static unsigned int +unsigned int svc_pool_map_get(void) { struct svc_pool_map *m = &svc_pool_map; @@ -253,7 +232,6 @@ svc_pool_map_get(void) if (m->count++) { mutex_unlock(&svc_pool_map_mutex); - WARN_ON_ONCE(m->npools <= 1); return m->npools; } @@ -269,36 +247,30 @@ svc_pool_map_get(void) break; } - if (npools <= 0) { + if (npools < 0) { /* default, or memory allocation failure */ npools = 1; m->mode = SVC_POOL_GLOBAL; } m->npools = npools; - if (npools == 1) - /* service is unpooled, so doesn't hold a reference */ - m->count--; - mutex_unlock(&svc_pool_map_mutex); - return npools; + return m->npools; } +EXPORT_SYMBOL_GPL(svc_pool_map_get); /* - * Drop a reference to the global map of cpus to pools, if - * pools were in use, i.e. if npools > 1. + * Drop a reference to the global map of cpus to pools. * When the last reference is dropped, the map data is * freed; this allows the sysadmin to change the pool * mode using the pool_mode module option without * rebooting or re-loading sunrpc.ko. */ -static void -svc_pool_map_put(int npools) +void +svc_pool_map_put(void) { struct svc_pool_map *m = &svc_pool_map; - if (npools <= 1) - return; mutex_lock(&svc_pool_map_mutex); if (!--m->count) { @@ -311,6 +283,7 @@ svc_pool_map_put(int npools) mutex_unlock(&svc_pool_map_mutex); } +EXPORT_SYMBOL_GPL(svc_pool_map_put); static int svc_pool_map_get_node(unsigned int pidx) { @@ -367,18 +340,21 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) struct svc_pool_map *m = &svc_pool_map; unsigned int pidx = 0; - if (serv->sv_nrpools <= 1) - return serv->sv_pools; - - switch (m->mode) { - case SVC_POOL_PERCPU: - pidx = m->to_pool[cpu]; - break; - case SVC_POOL_PERNODE: - pidx = m->to_pool[cpu_to_node(cpu)]; - break; + /* + * An uninitialised map happens in a pure client when + * lockd is brought up, so silently treat it the + * same as SVC_POOL_GLOBAL. + */ + if (svc_serv_is_pooled(serv)) { + switch (m->mode) { + case SVC_POOL_PERCPU: + pidx = m->to_pool[cpu]; + break; + case SVC_POOL_PERNODE: + pidx = m->to_pool[cpu_to_node(cpu)]; + break; + } } - return &serv->sv_pools[pidx % serv->sv_nrpools]; } @@ -459,7 +435,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, return NULL; serv->sv_name = prog->pg_name; serv->sv_program = prog; - kref_init(&serv->sv_refcnt); + serv->sv_nrthreads = 1; serv->sv_stats = prog->pg_stats; if (bufsize > RPCSVC_MAXPAYLOAD) bufsize = RPCSVC_MAXPAYLOAD; @@ -531,7 +507,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, goto out_err; return serv; out_err: - svc_pool_map_put(npools); + svc_pool_map_put(); return NULL; } EXPORT_SYMBOL_GPL(svc_create_pooled); @@ -547,14 +523,23 @@ EXPORT_SYMBOL_GPL(svc_shutdown_net); /* * Destroy an RPC service. Should be called with appropriate locking to - * protect sv_permsocks and sv_tempsocks. + * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. */ void -svc_destroy(struct kref *ref) +svc_destroy(struct svc_serv *serv) { - struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt); + dprintk("svc: svc_destroy(%s, %d)\n", + serv->sv_program->pg_name, + serv->sv_nrthreads); + + if (serv->sv_nrthreads) { + if (--(serv->sv_nrthreads) != 0) { + svc_sock_update_bufs(serv); + return; + } + } else + printk("svc_destroy: no threads for serv=%p!\n", serv); - dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name); del_timer_sync(&serv->sv_temptimer); /* @@ -566,7 +551,8 @@ svc_destroy(struct kref *ref) cache_clean_deferred(serv); - svc_pool_map_put(serv->sv_nrpools); + if (svc_serv_is_pooled(serv)) + svc_pool_map_put(); kfree(serv->sv_pools); kfree(serv); @@ -652,7 +638,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) } EXPORT_SYMBOL_GPL(svc_rqst_alloc); -static struct svc_rqst * +struct svc_rqst * svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { struct svc_rqst *rqstp; @@ -661,17 +647,14 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) return ERR_PTR(-ENOMEM); - svc_get(serv); - spin_lock_bh(&serv->sv_lock); - serv->sv_nrthreads += 1; - spin_unlock_bh(&serv->sv_lock); - + serv->sv_nrthreads++; spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads++; list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); spin_unlock_bh(&pool->sp_lock); return rqstp; } +EXPORT_SYMBOL_GPL(svc_prepare_thread); /* * Choose a pool in which to create a new thread, for svc_set_num_threads @@ -765,13 +748,59 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return 0; } + +/* destroy old threads */ +static int +svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + struct task_struct *task; + unsigned int state = serv->sv_nrthreads-1; + + /* destroy old threads */ + do { + task = choose_victim(serv, pool, &state); + if (task == NULL) + break; + send_sig(SIGINT, task, 1); + nrservs++; + } while (nrservs < 0); + + return 0; +} + /* * Create or destroy enough new threads to make the number * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between * all pools. Caller must ensure that mutual exclusion between this and * server startup or shutdown. + * + * Destroying threads relies on the service threads filling in + * rqstp->rq_task, which only the nfs ones do. Assumes the serv + * has been created using svc_create_pooled(). + * + * Based on code that used to be in nfsd_svc() but tweaked + * to be pool-aware. */ +int +svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + if (pool == NULL) { + /* The -1 assumes caller has done a svc_get() */ + nrservs -= (serv->sv_nrthreads-1); + } else { + spin_lock_bh(&pool->sp_lock); + nrservs -= pool->sp_nrthreads; + spin_unlock_bh(&pool->sp_lock); + } + + if (nrservs > 0) + return svc_start_kthreads(serv, pool, nrservs); + if (nrservs < 0) + return svc_signal_kthreads(serv, pool, nrservs); + return 0; +} +EXPORT_SYMBOL_GPL(svc_set_num_threads); /* destroy old threads */ static int @@ -792,10 +821,11 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) } int -svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { if (pool == NULL) { - nrservs -= serv->sv_nrthreads; + /* The -1 assumes caller has done a svc_get() */ + nrservs -= (serv->sv_nrthreads-1); } else { spin_lock_bh(&pool->sp_lock); nrservs -= pool->sp_nrthreads; @@ -808,7 +838,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return svc_stop_kthreads(serv, pool, nrservs); return 0; } -EXPORT_SYMBOL_GPL(svc_set_num_threads); +EXPORT_SYMBOL_GPL(svc_set_num_threads_sync); /** * svc_rqst_replace_page - Replace one page in rq_pages[] @@ -860,14 +890,11 @@ svc_exit_thread(struct svc_rqst *rqstp) list_del_rcu(&rqstp->rq_all); spin_unlock_bh(&pool->sp_lock); - spin_lock_bh(&serv->sv_lock); - serv->sv_nrthreads -= 1; - spin_unlock_bh(&serv->sv_lock); - svc_sock_update_bufs(serv); - svc_rqst_free(rqstp); - svc_put(serv); + /* Release the server */ + if (serv) + svc_destroy(serv); } EXPORT_SYMBOL_GPL(svc_exit_thread); @@ -1159,6 +1186,45 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} #endif +static int +svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + const struct svc_procedure *procp = rqstp->rq_procinfo; + + /* + * Decode arguments + * XXX: why do we ignore the return value? + */ + if (procp->pc_decode && + !procp->pc_decode(rqstp, argv->iov_base)) { + *statp = rpc_garbage_args; + return 1; + } + + *statp = procp->pc_func(rqstp); + + if (*statp == rpc_drop_reply || + test_bit(RQ_DROPME, &rqstp->rq_flags)) + return 0; + + if (rqstp->rq_auth_stat != rpc_auth_ok) + return 1; + + if (*statp != rpc_success) + return 1; + + /* Encode reply */ + if (procp->pc_encode && + !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) { + dprintk("svc: failed to encode reply\n"); + /* serv->sv_stats->rpcsystemerr++; */ + *statp = rpc_system_err; + } + return 1; +} + __be32 svc_generic_init_request(struct svc_rqst *rqstp, const struct svc_program *progp, @@ -1225,7 +1291,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) __be32 *statp; u32 prog, vers; __be32 rpc_stat; - int auth_res, rc; + int auth_res; __be32 *reply_statp; rpc_stat = rpc_success; @@ -1326,18 +1392,28 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) svc_reserve_auth(rqstp, procp->pc_xdrressize<<2); /* Call the function that processes the request. */ - rc = process.dispatch(rqstp, statp); - if (procp->pc_release) - procp->pc_release(rqstp); - if (!rc) - goto dropit; + if (!process.dispatch) { + if (!svc_generic_dispatch(rqstp, statp)) + goto release_dropit; + if (*statp == rpc_garbage_args) + goto err_garbage; + } else { + dprintk("svc: calling dispatcher\n"); + if (!process.dispatch(rqstp, statp)) + goto release_dropit; /* Release reply info */ + } + if (rqstp->rq_auth_stat != rpc_auth_ok) - goto err_bad_auth; + goto err_release_bad_auth; /* Check RPC status result */ if (*statp != rpc_success) resv->iov_len = ((void*)statp) - resv->iov_base + 4; + /* Release reply info */ + if (procp->pc_release) + procp->pc_release(rqstp); + if (procp->pc_encode == NULL) goto dropit; @@ -1346,6 +1422,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto close_xprt; return 1; /* Caller can now send it */ +release_dropit: + if (procp->pc_release) + procp->pc_release(rqstp); dropit: svc_authorise(rqstp); /* doesn't hurt to call this twice */ dprintk("svc: svc_process dropit\n"); @@ -1372,6 +1451,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) svc_putnl(resv, 2); goto sendit; +err_release_bad_auth: + if (procp->pc_release) + procp->pc_release(rqstp); err_bad_auth: dprintk("svc: authentication failed (%d)\n", be32_to_cpu(rqstp->rq_auth_stat)); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b21ad79941..d4b663401b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -265,6 +264,8 @@ void svc_xprt_received(struct svc_xprt *xprt) return; } + trace_svc_xprt_received(xprt); + /* As soon as we clear busy, the xprt could be closed and * 'put', so we need a reference to call svc_enqueue_xprt with: */ @@ -465,7 +466,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) out_unlock: rcu_read_unlock(); put_cpu(); - trace_svc_xprt_enqueue(xprt, rqstp); + trace_svc_xprt_do_enqueue(xprt, rqstp); } EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); @@ -686,8 +687,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) set_current_state(TASK_RUNNING); return -EINTR; } - trace_svc_alloc_arg_err(pages); - memalloc_retry_wait(GFP_KERNEL); + schedule_timeout(msecs_to_jiffies(500)); } rqstp->rq_page_end = &rqstp->rq_pages[pages]; rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */ @@ -841,8 +841,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } else svc_xprt_received(xprt); - out: + trace_svc_handle_xprt(xprt, len); return len; } diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 05c758da6a..326a31422a 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -109,20 +109,15 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, struct sock_xprt *sock; ssize_t ret = -1; - if (!xprt || !xprt_connected(xprt)) { - xprt_put(xprt); - return -ENOTCONN; - } + if (!xprt) + return 0; sock = container_of(xprt, struct sock_xprt, xprt); - mutex_lock(&sock->recv_mutex); - if (sock->sock == NULL || - kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) + if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) goto out; ret = sprintf(buf, "%pISc\n", &saddr); out: - mutex_unlock(&sock->recv_mutex); xprt_put(xprt); return ret + 1; } @@ -134,10 +129,8 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); ssize_t ret; - if (!xprt || !xprt_connected(xprt)) { - xprt_put(xprt); - return -ENOTCONN; - } + if (!xprt) + return 0; ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" @@ -430,7 +423,6 @@ static struct attribute *rpc_sysfs_xprt_attrs[] = { &rpc_sysfs_xprt_change_state.attr, NULL, }; -ATTRIBUTE_GROUPS(rpc_sysfs_xprt); static struct kobj_attribute rpc_sysfs_xprt_switch_info = __ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL); @@ -439,7 +431,6 @@ static struct attribute *rpc_sysfs_xprt_switch_attrs[] = { &rpc_sysfs_xprt_switch_info.attr, NULL, }; -ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch); static struct kobj_type rpc_sysfs_client_type = { .release = rpc_sysfs_client_release, @@ -449,14 +440,14 @@ static struct kobj_type rpc_sysfs_client_type = { static struct kobj_type rpc_sysfs_xprt_switch_type = { .release = rpc_sysfs_xprt_switch_release, - .default_groups = rpc_sysfs_xprt_switch_groups, + .default_attrs = rpc_sysfs_xprt_switch_attrs, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_xprt_switch_namespace, }; static struct kobj_type rpc_sysfs_xprt_type = { .release = rpc_sysfs_xprt_release, - .default_groups = rpc_sysfs_xprt_groups, + .default_attrs = rpc_sysfs_xprt_attrs, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_xprt_namespace, }; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index df194cc070..ca10ba2626 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1633,7 +1633,7 @@ EXPORT_SYMBOL_GPL(xdr_buf_subsegment); * Sets up @subbuf to represent a portion of @xdr. The portion * starts at the current offset in @xdr, and extends for a length * of @nbytes. If this is successful, @xdr is advanced to the next - * XDR data item following that portion. + * position following that portion. * * Return values: * %true: @subbuf has been initialized, and @xdr has been advanced. @@ -1642,31 +1642,29 @@ EXPORT_SYMBOL_GPL(xdr_buf_subsegment); bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, unsigned int nbytes) { - unsigned int start = xdr_stream_pos(xdr); - unsigned int remaining, len; + unsigned int remaining, offset, len; - /* Extract @subbuf and bounds-check the fn arguments */ - if (xdr_buf_subsegment(xdr->buf, subbuf, start, nbytes)) + if (xdr_buf_subsegment(xdr->buf, subbuf, xdr_stream_pos(xdr), nbytes)) return false; - /* Advance @xdr by @nbytes */ - for (remaining = nbytes; remaining;) { - if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr)) + if (subbuf->head[0].iov_len) + if (!__xdr_inline_decode(xdr, subbuf->head[0].iov_len)) return false; - len = (char *)xdr->end - (char *)xdr->p; - if (remaining <= len) { - xdr->p = (__be32 *)((char *)xdr->p + - (remaining + xdr_pad_size(nbytes))); - break; - } + remaining = subbuf->page_len; + offset = subbuf->page_base; + while (remaining) { + len = min_t(unsigned int, remaining, PAGE_SIZE) - offset; + + if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr)) + return false; + if (!__xdr_inline_decode(xdr, len)) + return false; - xdr->p = (__be32 *)((char *)xdr->p + len); - xdr->end = xdr->p; remaining -= len; + offset = 0; } - xdr_stream_set_pos(xdr, start + nbytes); return true; } EXPORT_SYMBOL_GPL(xdr_stream_subsegment); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a02de2bddb..d4aeee8376 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -246,9 +246,11 @@ EXPORT_SYMBOL_GPL(xprt_find_transport_ident); static void xprt_clear_locked(struct rpc_xprt *xprt) { xprt->snd_task = NULL; - if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) - clear_bit_unlock(XPRT_LOCKED, &xprt->state); - else + if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { + smp_mb__before_atomic(); + clear_bit(XPRT_LOCKED, &xprt->state); + smp_mb__after_atomic(); + } else queue_work(xprtiod_workqueue, &xprt->task_cleanup); } @@ -735,8 +737,6 @@ static void xprt_autoclose(struct work_struct *work) unsigned int pflags = memalloc_nofs_save(); trace_xprt_disconnect_auto(xprt); - xprt->connect_cookie++; - smp_mb__before_atomic(); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); xprt->ops->close(xprt); xprt_release_write(xprt, NULL); @@ -767,8 +767,7 @@ EXPORT_SYMBOL_GPL(xprt_disconnect_done); */ static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt) { - if (test_and_set_bit(XPRT_CLOSE_WAIT, &xprt->state)) - return; + set_bit(XPRT_CLOSE_WAIT, &xprt->state); if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(xprtiod_workqueue, &xprt->task_cleanup); else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index faba7136dd..17f174d6ea 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -13,6 +13,10 @@ #include "xprt_rdma.h" #include +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + #undef RPCRDMA_BACKCHANNEL_DEBUG /** diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 515dd7a66a..f700b34a5b 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -45,6 +45,10 @@ #include "xprt_rdma.h" #include +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + static void frwr_cid_init(struct rpcrdma_ep *ep, struct rpcrdma_mr *mr) { @@ -511,8 +515,8 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * a single ib_post_send() call. */ prev = &first; - mr = rpcrdma_mr_pop(&req->rl_registered); - do { + while ((mr = rpcrdma_mr_pop(&req->rl_registered))) { + trace_xprtrdma_mr_localinv(mr); r_xprt->rx_stats.local_inv_needed++; @@ -529,8 +533,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) *prev = last; prev = &last->next; - } while ((mr = rpcrdma_mr_pop(&req->rl_registered))); - + } mr = container_of(last, struct rpcrdma_mr, mr_invwr); /* Strong send queue ordering guarantees that when the @@ -614,8 +617,8 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * a single ib_post_send() call. */ prev = &first; - mr = rpcrdma_mr_pop(&req->rl_registered); - do { + while ((mr = rpcrdma_mr_pop(&req->rl_registered))) { + trace_xprtrdma_mr_localinv(mr); r_xprt->rx_stats.local_inv_needed++; @@ -632,7 +635,7 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) *prev = last; prev = &last->next; - } while ((mr = rpcrdma_mr_pop(&req->rl_registered))); + } /* Strong send queue ordering guarantees that when the * last WR in the chain completes, all WRs in the chain @@ -663,38 +666,3 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ rpcrdma_force_disconnect(ep); } - -/** - * frwr_wp_create - Create an MR for padding Write chunks - * @r_xprt: transport resources to use - * - * Return 0 on success, negative errno on failure. - */ -int frwr_wp_create(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_ep *ep = r_xprt->rx_ep; - struct rpcrdma_mr_seg seg; - struct rpcrdma_mr *mr; - - mr = rpcrdma_mr_get(r_xprt); - if (!mr) - return -EAGAIN; - mr->mr_req = NULL; - ep->re_write_pad_mr = mr; - - seg.mr_len = XDR_UNIT; - seg.mr_page = virt_to_page(ep->re_write_pad); - seg.mr_offset = offset_in_page(ep->re_write_pad); - if (IS_ERR(frwr_map(r_xprt, &seg, 1, true, xdr_zero, mr))) - return -EIO; - trace_xprtrdma_mr_fastreg(mr); - - mr->mr_cqe.done = frwr_wc_fastreg; - mr->mr_regwr.wr.next = NULL; - mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe; - mr->mr_regwr.wr.num_sge = 0; - mr->mr_regwr.wr.opcode = IB_WR_REG_MR; - mr->mr_regwr.wr.send_flags = 0; - - return ib_post_send(ep->re_id->qp, &mr->mr_regwr.wr, NULL); -} diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 281ddb87ac..c335c13615 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -54,6 +54,10 @@ #include "xprt_rdma.h" #include +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + /* Returns size of largest RPC-over-RDMA header in a Call message * * The largest Call header contains a full-size Read list and a @@ -251,7 +255,15 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, page_base = 0; } - if (type == rpcrdma_readch || type == rpcrdma_writech) + if (type == rpcrdma_readch) + goto out; + + /* When encoding a Write chunk, some servers need to see an + * extra segment for non-XDR-aligned Write chunks. The upper + * layer provides space in the tail iovec that may be used + * for this purpose. + */ + if (type == rpcrdma_writech && r_xprt->rx_ep->re_implicit_roundup) goto out; if (xdrbuf->tail[0].iov_len) @@ -393,7 +405,6 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, enum rpcrdma_chunktype wtype) { struct xdr_stream *xdr = &req->rl_stream; - struct rpcrdma_ep *ep = r_xprt->rx_ep; struct rpcrdma_mr_seg *seg; struct rpcrdma_mr *mr; int nsegs, nchunks; @@ -432,18 +443,6 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, nsegs -= mr->mr_nents; } while (nsegs); - if (xdr_pad_size(rqst->rq_rcv_buf.page_len)) { - if (encode_rdma_segment(xdr, ep->re_write_pad_mr) < 0) - return -EMSGSIZE; - - trace_xprtrdma_chunk_wp(rqst->rq_task, ep->re_write_pad_mr, - nsegs); - r_xprt->rx_stats.write_chunk_count++; - r_xprt->rx_stats.total_rdma_request += mr->mr_length; - nchunks++; - nsegs -= mr->mr_nents; - } - /* Update count of segments in this Write chunk */ *segcount = cpu_to_be32(nchunks); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index cf76a6ad12..6be23ce7a9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -330,9 +330,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); + trace_svcrdma_wc_receive(wc, &ctxt->rc_cid); if (wc->status != IB_WC_SUCCESS) goto flushed; - trace_svcrdma_wc_recv(wc, &ctxt->rc_cid); /* If receive posting fails, the connection is about to be * lost anyway. The server will not be able to send a reply @@ -345,7 +345,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) */ if (rdma->sc_pending_recvs < rdma->sc_max_requests) if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false)) - goto dropped; + goto flushed; /* All wc fields are now known to be valid */ ctxt->rc_byte_len = wc->byte_len; @@ -360,11 +360,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) return; flushed: - if (wc->status == IB_WC_WR_FLUSH_ERR) - trace_svcrdma_wc_recv_flush(wc, &ctxt->rc_cid); - else - trace_svcrdma_wc_recv_err(wc, &ctxt->rc_cid); -dropped: svc_rdma_recv_ctxt_put(rdma, ctxt); svc_xprt_deferred_close(&rdma->sc_xprt); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 5f0155fdef..e27433f08c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -155,7 +155,6 @@ struct svc_rdma_chunk_ctxt { struct ib_cqe cc_cqe; struct svcxprt_rdma *cc_rdma; struct list_head cc_rwctxts; - ktime_t cc_posttime; int cc_sqecount; enum ib_wc_status cc_status; struct completion cc_done; @@ -268,16 +267,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) struct svc_rdma_write_info *info = container_of(cc, struct svc_rdma_write_info, wi_cc); - switch (wc->status) { - case IB_WC_SUCCESS: - trace_svcrdma_wc_write(wc, &cc->cc_cid); - break; - case IB_WC_WR_FLUSH_ERR: - trace_svcrdma_wc_write_flush(wc, &cc->cc_cid); - break; - default: - trace_svcrdma_wc_write_err(wc, &cc->cc_cid); - } + trace_svcrdma_wc_write(wc, &cc->cc_cid); svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); @@ -330,22 +320,11 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_chunk_ctxt *cc = container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); - struct svc_rdma_read_info *info; + struct svcxprt_rdma *rdma = cc->cc_rdma; - switch (wc->status) { - case IB_WC_SUCCESS: - info = container_of(cc, struct svc_rdma_read_info, ri_cc); - trace_svcrdma_wc_read(wc, &cc->cc_cid, info->ri_totalbytes, - cc->cc_posttime); - break; - case IB_WC_WR_FLUSH_ERR: - trace_svcrdma_wc_read_flush(wc, &cc->cc_cid); - break; - default: - trace_svcrdma_wc_read_err(wc, &cc->cc_cid); - } + trace_svcrdma_wc_read(wc, &cc->cc_cid); - svc_rdma_wake_send_waiters(cc->cc_rdma, cc->cc_sqecount); + svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); cc->cc_status = wc->status; complete(&cc->cc_done); return; @@ -384,7 +363,6 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) do { if (atomic_sub_return(cc->cc_sqecount, &rdma->sc_sq_avail) > 0) { - cc->cc_posttime = ktime_get(); ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); if (ret) break; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 22a871e6fe..599021b239 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -280,21 +280,13 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) struct svc_rdma_send_ctxt *ctxt = container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); + trace_svcrdma_wc_send(wc, &ctxt->sc_cid); + svc_rdma_wake_send_waiters(rdma, 1); complete(&ctxt->sc_done); if (unlikely(wc->status != IB_WC_SUCCESS)) - goto flushed; - - trace_svcrdma_wc_send(wc, &ctxt->sc_cid); - return; - -flushed: - if (wc->status != IB_WC_WR_FLUSH_ERR) - trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid); - else - trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid); - svc_xprt_deferred_close(&rdma->sc_xprt); + svc_xprt_deferred_close(&rdma->sc_xprt); } /** diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 42e375dbda..16e5696314 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -60,6 +60,10 @@ #include "xprt_rdma.h" #include +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + /* * tunables */ diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 7b5fce2faa..1295f9ab83 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -63,6 +63,17 @@ #include "xprt_rdma.h" #include +/* + * Globals/Macros + */ + +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +/* + * internal functions + */ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, @@ -194,12 +205,14 @@ static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, unsigned int rsize, wsize; /* Default settings for RPC-over-RDMA Version One */ + ep->re_implicit_roundup = xprt_rdma_pad_optimize; rsize = RPCRDMA_V1_DEF_INLINE_SIZE; wsize = RPCRDMA_V1_DEF_INLINE_SIZE; if (pmsg && pmsg->cp_magic == rpcrdma_cmp_magic && pmsg->cp_version == RPCRDMA_CMP_VERSION) { + ep->re_implicit_roundup = true; rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); } @@ -263,6 +276,8 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ep->re_connect_status = -ENETUNREACH; goto wake_connect_worker; case RDMA_CM_EVENT_REJECTED: + dprintk("rpcrdma: connection to %pISpc rejected: %s\n", + sap, rdma_reject_msg(id, event->status)); ep->re_connect_status = -ECONNREFUSED; if (event->status == IB_CM_REJ_STALE_CONN) ep->re_connect_status = -ENOTCONN; @@ -278,6 +293,8 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) break; } + dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap, + ep->re_id->device->name, rdma_event_msg(event->event)); return 0; } @@ -404,6 +421,14 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->re_attr.qp_type = IB_QPT_RC; ep->re_attr.port_num = ~0; + dprintk("RPC: %s: requested max: dtos: send %d recv %d; " + "iovs: send %d recv %d\n", + __func__, + ep->re_attr.cap.max_send_wr, + ep->re_attr.cap.max_recv_wr, + ep->re_attr.cap.max_send_sge, + ep->re_attr.cap.max_recv_sge); + ep->re_send_batch = ep->re_max_requests >> 3; ep->re_send_count = ep->re_send_batch; init_waitqueue_head(&ep->re_connect_wait); @@ -529,7 +554,6 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) goto out; } rpcrdma_mrs_create(r_xprt); - frwr_wp_create(r_xprt); out: trace_xprtrdma_connect(r_xprt, rc); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c79f92eeda..d91f54eae0 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -68,14 +68,13 @@ /* * RDMA Endpoint -- connection endpoint details */ -struct rpcrdma_mr; struct rpcrdma_ep { struct kref re_kref; struct rdma_cm_id *re_id; struct ib_pd *re_pd; unsigned int re_max_rdma_segs; unsigned int re_max_fr_depth; - struct rpcrdma_mr *re_write_pad_mr; + bool re_implicit_roundup; enum ib_mr_type re_mrtype; struct completion re_done; unsigned int re_send_count; @@ -98,8 +97,6 @@ struct rpcrdma_ep { unsigned int re_inline_recv; /* negotiated */ atomic_t re_completion_ids; - - char re_write_pad[XDR_UNIT]; }; /* Pre-allocate extra Work Requests for handling reverse-direction @@ -538,7 +535,6 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); -int frwr_wp_create(struct rpcrdma_xprt *r_xprt); /* * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0f39e08ee5..04f1b78bcb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1134,7 +1134,6 @@ static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) { - xprt->connect_cookie++; smp_mb__before_atomic(); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); @@ -1154,13 +1153,14 @@ static void xs_error_report(struct sock *sk) struct sock_xprt *transport; struct rpc_xprt *xprt; + read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) - return; + goto out; transport = container_of(xprt, struct sock_xprt, xprt); transport->xprt_err = -sk->sk_err; if (transport->xprt_err == 0) - return; + goto out; dprintk("RPC: xs_error_report client %p, error=%d...\n", xprt, -transport->xprt_err); trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err); @@ -1168,6 +1168,8 @@ static void xs_error_report(struct sock *sk) /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */ smp_mb__before_atomic(); xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR); + out: + read_unlock_bh(&sk->sk_callback_lock); } static void xs_reset_transport(struct sock_xprt *transport) @@ -1186,7 +1188,7 @@ static void xs_reset_transport(struct sock_xprt *transport) kernel_sock_shutdown(sock, SHUT_RDWR); mutex_lock(&transport->recv_mutex); - lock_sock(sk); + write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; transport->sock = NULL; transport->file = NULL; @@ -1195,10 +1197,10 @@ static void xs_reset_transport(struct sock_xprt *transport) xs_restore_old_callbacks(transport, sk); xprt_clear_connected(xprt); + write_unlock_bh(&sk->sk_callback_lock); xs_sock_reset_connection_flags(xprt); /* Reset stream record info */ xs_stream_reset_connect(transport); - release_sock(sk); mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); @@ -1362,6 +1364,7 @@ static void xs_data_ready(struct sock *sk) { struct rpc_xprt *xprt; + read_lock_bh(&sk->sk_callback_lock); dprintk("RPC: xs_data_ready...\n"); xprt = xprt_from_sock(sk); if (xprt != NULL) { @@ -1376,6 +1379,7 @@ static void xs_data_ready(struct sock *sk) if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) queue_work(xprtiod_workqueue, &transport->recv_worker); } + read_unlock_bh(&sk->sk_callback_lock); } /* @@ -1404,8 +1408,9 @@ static void xs_tcp_state_change(struct sock *sk) struct rpc_xprt *xprt; struct sock_xprt *transport; + read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) - return; + goto out; dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n", sk->sk_state, xprt_connected(xprt), @@ -1466,6 +1471,8 @@ static void xs_tcp_state_change(struct sock *sk) /* Trigger the socket release */ xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); } + out: + read_unlock_bh(&sk->sk_callback_lock); } static void xs_write_space(struct sock *sk) @@ -1504,9 +1511,13 @@ static void xs_write_space(struct sock *sk) */ static void xs_udp_write_space(struct sock *sk) { + read_lock_bh(&sk->sk_callback_lock); + /* from net/core/sock.c:sock_def_write_space */ if (sock_writeable(sk)) xs_write_space(sk); + + read_unlock_bh(&sk->sk_callback_lock); } /** @@ -1521,9 +1532,13 @@ static void xs_udp_write_space(struct sock *sk) */ static void xs_tcp_write_space(struct sock *sk) { + read_lock_bh(&sk->sk_callback_lock); + /* from net/core/stream.c:sk_stream_write_space */ if (sk_stream_is_writeable(sk)) xs_write_space(sk); + + read_unlock_bh(&sk->sk_callback_lock); } static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) @@ -1641,12 +1656,7 @@ static int xs_get_srcport(struct sock_xprt *transport) unsigned short get_srcport(struct rpc_xprt *xprt) { struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); - unsigned short ret = 0; - mutex_lock(&sock->recv_mutex); - if (sock->sock) - ret = xs_sock_getport(sock->sock); - mutex_unlock(&sock->recv_mutex); - return ret; + return xs_sock_getport(sock->sock); } EXPORT_SYMBOL(get_srcport); @@ -1725,15 +1735,15 @@ static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) } #ifdef CONFIG_DEBUG_LOCK_ALLOC -static struct lock_class_key xs_key[3]; -static struct lock_class_key xs_slock_key[3]; +static struct lock_class_key xs_key[2]; +static struct lock_class_key xs_slock_key[2]; static inline void xs_reclassify_socketu(struct socket *sock) { struct sock *sk = sock->sk; sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC", - &xs_slock_key[0], "sk_lock-AF_LOCAL-RPC", &xs_key[0]); + &xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]); } static inline void xs_reclassify_socket4(struct socket *sock) @@ -1741,7 +1751,7 @@ static inline void xs_reclassify_socket4(struct socket *sock) struct sock *sk = sock->sk; sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC", - &xs_slock_key[1], "sk_lock-AF_INET-RPC", &xs_key[1]); + &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]); } static inline void xs_reclassify_socket6(struct socket *sock) @@ -1749,7 +1759,7 @@ static inline void xs_reclassify_socket6(struct socket *sock) struct sock *sk = sock->sk; sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", - &xs_slock_key[2], "sk_lock-AF_INET6-RPC", &xs_key[2]); + &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); } static inline void xs_reclassify_socket(int family, struct socket *sock) @@ -1823,7 +1833,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, if (!transport->inet) { struct sock *sk = sock->sk; - lock_sock(sk); + write_lock_bh(&sk->sk_callback_lock); xs_save_old_callbacks(transport, sk); @@ -1839,7 +1849,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, transport->sock = sock; transport->inet = sk; - release_sock(sk); + write_unlock_bh(&sk->sk_callback_lock); } xs_stream_start_connect(transport); @@ -1915,7 +1925,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); int ret; - if (RPC_IS_ASYNC(task)) { + if (RPC_IS_ASYNC(task)) { /* * We want the AF_LOCAL connect to be resolved in the * filesystem namespace of the process making the rpc @@ -2021,7 +2031,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) if (!transport->inet) { struct sock *sk = sock->sk; - lock_sock(sk); + write_lock_bh(&sk->sk_callback_lock); xs_save_old_callbacks(transport, sk); @@ -2038,7 +2048,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_set_memalloc(xprt); - release_sock(sk); + write_unlock_bh(&sk->sk_callback_lock); } xs_udp_do_set_buffer_size(xprt); @@ -2164,6 +2174,7 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret = -ENOTCONN; if (!transport->inet) { struct sock *sk = sock->sk; @@ -2183,7 +2194,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_tcp_set_socket_timeouts(xprt, sock); tcp_sock_set_nodelay(sk); - lock_sock(sk); + write_lock_bh(&sk->sk_callback_lock); xs_save_old_callbacks(transport, sk); @@ -2203,11 +2214,11 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) transport->sock = sock; transport->inet = sk; - release_sock(sk); + write_unlock_bh(&sk->sk_callback_lock); } if (!xprt_bound(xprt)) - return -ENOTCONN; + goto out; xs_set_memalloc(xprt); @@ -2215,7 +2226,22 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) /* Tell the socket layer to start connecting... */ set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); - return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + switch (ret) { + case 0: + xs_set_srcport(transport, sock); + fallthrough; + case -EINPROGRESS: + /* SYN_SENT! */ + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + break; + case -EADDRNOTAVAIL: + /* Source port number is unavailable. Try a new one! */ + transport->srcport = 0; + } +out: + return ret; } /** @@ -2230,14 +2256,14 @@ static void xs_tcp_setup_socket(struct work_struct *work) container_of(work, struct sock_xprt, connect_worker.work); struct socket *sock = transport->sock; struct rpc_xprt *xprt = &transport->xprt; - int status; + int status = -EIO; if (!sock) { sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP, true); if (IS_ERR(sock)) { - xprt_wake_pending_tasks(xprt, PTR_ERR(sock)); + status = PTR_ERR(sock); goto out; } } @@ -2254,21 +2280,21 @@ static void xs_tcp_setup_socket(struct work_struct *work) xprt, -status, xprt_connected(xprt), sock->sk->sk_state); switch (status) { - case 0: - xs_set_srcport(transport, sock); + default: + printk("%s: connect returned unhandled error %d\n", + __func__, status); fallthrough; - case -EINPROGRESS: - /* SYN_SENT! */ - if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) - xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; - fallthrough; - case -EALREADY: - goto out_unlock; case -EADDRNOTAVAIL: - /* Source port number is unavailable. Try a new one! */ - transport->srcport = 0; - status = -EAGAIN; + /* We're probably in TIME_WAIT. Get rid of existing socket, + * and retry + */ + xs_tcp_force_close(xprt); break; + case 0: + case -EINPROGRESS: + case -EALREADY: + xprt_unlock_connect(xprt, transport); + return; case -EINVAL: /* Happens, for instance, if the user specified a link * local IPv6 address without a scope-id. @@ -2280,22 +2306,18 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: - break; - default: - printk("%s: connect returned unhandled error %d\n", - __func__, status); - status = -EAGAIN; + /* xs_tcp_force_close() wakes tasks with a fixed error code. + * We need to wake them first to ensure the correct error code. + */ + xprt_wake_pending_tasks(xprt, status); + xs_tcp_force_close(xprt); + goto out; } - - /* xs_tcp_force_close() wakes tasks with a fixed error code. - * We need to wake them first to ensure the correct error code. - */ - xprt_wake_pending_tasks(xprt, status); - xs_tcp_force_close(xprt); + status = -EAGAIN; out: xprt_clear_connecting(xprt); -out_unlock: xprt_unlock_connect(xprt, transport); + xprt_wake_pending_tasks(xprt, status); } /** @@ -2319,7 +2341,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); - if (transport->sock != NULL && !xprt_connecting(xprt)) { + if (transport->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index b62565278f..0b2c18efc0 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -28,7 +28,6 @@ typedef void switchdev_deferred_func_t(struct net_device *dev, struct switchdev_deferred_item { struct list_head list; struct net_device *dev; - netdevice_tracker dev_tracker; switchdev_deferred_func_t *func; unsigned long data[]; }; @@ -64,7 +63,7 @@ void switchdev_deferred_process(void) while ((dfitem = switchdev_deferred_dequeue())) { dfitem->func(dfitem->dev, dfitem->data); - dev_put_track(dfitem->dev, &dfitem->dev_tracker); + dev_put(dfitem->dev); kfree(dfitem); } } @@ -91,7 +90,7 @@ static int switchdev_deferred_enqueue(struct net_device *dev, dfitem->dev = dev; dfitem->func = func; memcpy(dfitem->data, data, data_len); - dev_hold_track(dev, &dfitem->dev_tracker, GFP_ATOMIC); + dev_hold(dev); spin_lock_bh(&deferred_lock); list_add_tail(&dfitem->list, &deferred); spin_unlock_bh(&deferred_lock); @@ -429,17 +428,17 @@ switchdev_lower_dev_find(struct net_device *dev, return switchdev_priv.lower_dev; } -static int __switchdev_handle_fdb_event_to_device(struct net_device *dev, - struct net_device *orig_dev, unsigned long event, +static int __switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct net_device *orig_dev, const struct switchdev_notifier_fdb_info *fdb_info, bool (*check_cb)(const struct net_device *dev), bool (*foreign_dev_check_cb)(const struct net_device *dev, const struct net_device *foreign_dev), - int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, const struct switchdev_notifier_fdb_info *fdb_info)) { const struct switchdev_notifier_info *info = &fdb_info->info; @@ -448,17 +447,17 @@ static int __switchdev_handle_fdb_event_to_device(struct net_device *dev, int err = -EOPNOTSUPP; if (check_cb(dev)) - return mod_cb(dev, orig_dev, event, info->ctx, fdb_info); + return add_cb(dev, orig_dev, info->ctx, fdb_info); if (netif_is_lag_master(dev)) { if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb)) goto maybe_bridged_with_us; /* This is a LAG interface that we offload */ - if (!lag_mod_cb) + if (!lag_add_cb) return -EOPNOTSUPP; - return lag_mod_cb(dev, orig_dev, event, info->ctx, fdb_info); + return lag_add_cb(dev, orig_dev, info->ctx, fdb_info); } /* Recurse through lower interfaces in case the FDB entry is pointing @@ -482,10 +481,10 @@ static int __switchdev_handle_fdb_event_to_device(struct net_device *dev, foreign_dev_check_cb)) continue; - err = __switchdev_handle_fdb_event_to_device(lower_dev, orig_dev, - event, fdb_info, check_cb, - foreign_dev_check_cb, - mod_cb, lag_mod_cb); + err = __switchdev_handle_fdb_add_to_device(lower_dev, orig_dev, + fdb_info, check_cb, + foreign_dev_check_cb, + add_cb, lag_add_cb); if (err && err != -EOPNOTSUPP) return err; } @@ -504,34 +503,140 @@ static int __switchdev_handle_fdb_event_to_device(struct net_device *dev, if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb)) return 0; - return __switchdev_handle_fdb_event_to_device(br, orig_dev, event, fdb_info, - check_cb, foreign_dev_check_cb, - mod_cb, lag_mod_cb); + return __switchdev_handle_fdb_add_to_device(br, orig_dev, fdb_info, + check_cb, foreign_dev_check_cb, + add_cb, lag_add_cb); } -int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event, +int switchdev_handle_fdb_add_to_device(struct net_device *dev, const struct switchdev_notifier_fdb_info *fdb_info, bool (*check_cb)(const struct net_device *dev), bool (*foreign_dev_check_cb)(const struct net_device *dev, const struct net_device *foreign_dev), - int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, const struct switchdev_notifier_fdb_info *fdb_info)) { int err; - err = __switchdev_handle_fdb_event_to_device(dev, dev, event, fdb_info, - check_cb, foreign_dev_check_cb, - mod_cb, lag_mod_cb); + err = __switchdev_handle_fdb_add_to_device(dev, dev, fdb_info, + check_cb, + foreign_dev_check_cb, + add_cb, lag_add_cb); if (err == -EOPNOTSUPP) err = 0; return err; } -EXPORT_SYMBOL_GPL(switchdev_handle_fdb_event_to_device); +EXPORT_SYMBOL_GPL(switchdev_handle_fdb_add_to_device); + +static int __switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct net_device *orig_dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + const struct switchdev_notifier_info *info = &fdb_info->info; + struct net_device *br, *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (check_cb(dev)) + return del_cb(dev, orig_dev, info->ctx, fdb_info); + + if (netif_is_lag_master(dev)) { + if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb)) + goto maybe_bridged_with_us; + + /* This is a LAG interface that we offload */ + if (!lag_del_cb) + return -EOPNOTSUPP; + + return lag_del_cb(dev, orig_dev, info->ctx, fdb_info); + } + + /* Recurse through lower interfaces in case the FDB entry is pointing + * towards a bridge device. + */ + if (netif_is_bridge_master(dev)) { + if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb)) + return 0; + + /* This is a bridge interface that we offload */ + netdev_for_each_lower_dev(dev, lower_dev, iter) { + /* Do not propagate FDB entries across bridges */ + if (netif_is_bridge_master(lower_dev)) + continue; + + /* Bridge ports might be either us, or LAG interfaces + * that we offload. + */ + if (!check_cb(lower_dev) && + !switchdev_lower_dev_find(lower_dev, check_cb, + foreign_dev_check_cb)) + continue; + + err = __switchdev_handle_fdb_del_to_device(lower_dev, orig_dev, + fdb_info, check_cb, + foreign_dev_check_cb, + del_cb, lag_del_cb); + if (err && err != -EOPNOTSUPP) + return err; + } + + return 0; + } + +maybe_bridged_with_us: + /* Event is neither on a bridge nor a LAG. Check whether it is on an + * interface that is in a bridge with us. + */ + br = netdev_master_upper_dev_get_rcu(dev); + if (!br || !netif_is_bridge_master(br)) + return 0; + + if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb)) + return 0; + + return __switchdev_handle_fdb_del_to_device(br, orig_dev, fdb_info, + check_cb, foreign_dev_check_cb, + del_cb, lag_del_cb); +} + +int switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + int err; + + err = __switchdev_handle_fdb_del_to_device(dev, dev, fdb_info, + check_cb, + foreign_dev_check_cb, + del_cb, lag_del_cb); + if (err == -EOPNOTSUPP) + err = 0; + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_handle_fdb_del_to_device); static int __switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 4b45ed631e..f6cb0d4d11 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -144,7 +144,7 @@ static void ensure_safe_net_sysctl(struct net *net, const char *path, addr = (unsigned long)ent->data; if (is_module_address(addr)) where = "module"; - else if (is_kernel_core_data(addr)) + else if (core_kernel_data(addr)) where = "kernel"; else continue; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 473a790f58..443f8e5b94 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -462,7 +462,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, b->bcast_addr.media_id = b->media->type_id; b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; b->mtu = dev->mtu; - b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr); + b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); rcu_assign_pointer(dev->tipc_ptr, b); return 0; } @@ -703,7 +703,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_CHANGEADDR: b->media->raw2addr(b, &b->addr, - (const char *)dev->dev_addr); + (char *)dev->dev_addr); tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: @@ -787,7 +787,7 @@ int tipc_attach_loopback(struct net *net) if (!dev) return -ENODEV; - dev_hold_track(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); + dev_hold(dev); tn->loopback_pt.dev = dev; tn->loopback_pt.type = htons(ETH_P_TIPC); tn->loopback_pt.func = tipc_loopback_rcv_pkt; @@ -800,7 +800,7 @@ void tipc_detach_loopback(struct net *net) struct tipc_net *tn = tipc_net(net); dev_remove_pack(&tn->loopback_pt); - dev_put_track(net->loopback_dev, &tn->loopback_pt.dev_tracker); + dev_put(net->loopback_dev); } /* Caller should hold rtnl_lock to protect the bearer */ diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 490ad6e5f7..57c6a1a719 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -117,7 +117,7 @@ struct tipc_media { char *msg); int (*raw2addr)(struct tipc_bearer *b, struct tipc_media_addr *addr, - const char *raw); + char *raw); u32 priority; u32 tolerance; u32 min_win; diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index f09316a903..b5074957e8 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -761,10 +761,21 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, skb_tailroom(skb), tailen); } - nsg = skb_cow_data(skb, tailen, &trailer); - if (unlikely(nsg < 0)) { - pr_err("TX: skb_cow_data() returned %d\n", nsg); - return nsg; + if (unlikely(!skb_cloned(skb) && tailen <= skb_tailroom(skb))) { + nsg = 1; + trailer = skb; + } else { + /* TODO: We could avoid skb_cow_data() if skb has no frag_list + * e.g. by skb_fill_page_desc() to add another page to the skb + * with the wanted tailen... However, page skbs look not often, + * so take it easy now! + * Cloned skbs e.g. from link_xmit() seems no choice though :( + */ + nsg = skb_cow_data(skb, tailen, &trailer); + if (unlikely(nsg < 0)) { + pr_err("TX: skb_cow_data() returned %d\n", nsg); + return nsg; + } } pskb_put(skb, trailer, tailen); diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index cb0d185e06..c68019697c 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -60,7 +60,7 @@ static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) /* Convert raw mac address format to media addr format */ static int tipc_eth_raw2addr(struct tipc_bearer *b, struct tipc_media_addr *addr, - const char *msg) + char *msg) { memset(addr, 0, sizeof(*addr)); ether_addr_copy(addr->value, msg); diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index b9ad0434c3..7aa9ff8845 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -67,7 +67,7 @@ static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) /* Convert raw InfiniBand address format to media addr format */ static int tipc_ib_raw2addr(struct tipc_bearer *b, struct tipc_media_addr *addr, - const char *msg) + char *msg) { memset(addr, 0, sizeof(*addr)); memcpy(addr->value, msg, INFINIBAND_ALEN); diff --git a/net/tipc/link.c b/net/tipc/link.c index 1e14d7f8f2..4e7936d9b4 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1298,8 +1298,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, return false; #ifdef CONFIG_TIPC_CRYPTO case MSG_CRYPTO: - if (sysctl_tipc_key_exchange_enabled && - TIPC_SKB_CB(skb)->decrypted) { + if (TIPC_SKB_CB(skb)->decrypted) { tipc_crypto_msg_rcv(l->net, skb); return true; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 6bc2879ba6..9aac9c60d7 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -423,88 +423,6 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EFAULT; break; } - case TLS_CIPHER_AES_CCM_128: { - struct tls12_crypto_info_aes_ccm_128 *aes_ccm_128 = - container_of(crypto_info, - struct tls12_crypto_info_aes_ccm_128, info); - - if (len != sizeof(*aes_ccm_128)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(aes_ccm_128->iv, - cctx->iv + TLS_CIPHER_AES_CCM_128_SALT_SIZE, - TLS_CIPHER_AES_CCM_128_IV_SIZE); - memcpy(aes_ccm_128->rec_seq, cctx->rec_seq, - TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, aes_ccm_128, sizeof(*aes_ccm_128))) - rc = -EFAULT; - break; - } - case TLS_CIPHER_CHACHA20_POLY1305: { - struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305 = - container_of(crypto_info, - struct tls12_crypto_info_chacha20_poly1305, - info); - - if (len != sizeof(*chacha20_poly1305)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(chacha20_poly1305->iv, - cctx->iv + TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE, - TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE); - memcpy(chacha20_poly1305->rec_seq, cctx->rec_seq, - TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, chacha20_poly1305, - sizeof(*chacha20_poly1305))) - rc = -EFAULT; - break; - } - case TLS_CIPHER_SM4_GCM: { - struct tls12_crypto_info_sm4_gcm *sm4_gcm_info = - container_of(crypto_info, - struct tls12_crypto_info_sm4_gcm, info); - - if (len != sizeof(*sm4_gcm_info)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(sm4_gcm_info->iv, - cctx->iv + TLS_CIPHER_SM4_GCM_SALT_SIZE, - TLS_CIPHER_SM4_GCM_IV_SIZE); - memcpy(sm4_gcm_info->rec_seq, cctx->rec_seq, - TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, sm4_gcm_info, sizeof(*sm4_gcm_info))) - rc = -EFAULT; - break; - } - case TLS_CIPHER_SM4_CCM: { - struct tls12_crypto_info_sm4_ccm *sm4_ccm_info = - container_of(crypto_info, - struct tls12_crypto_info_sm4_ccm, info); - - if (len != sizeof(*sm4_ccm_info)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(sm4_ccm_info->iv, - cctx->iv + TLS_CIPHER_SM4_CCM_SALT_SIZE, - TLS_CIPHER_SM4_CCM_IV_SIZE); - memcpy(sm4_ccm_info->rec_seq, cctx->rec_seq, - TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, sm4_ccm_info, sizeof(*sm4_ccm_info))) - rc = -EFAULT; - break; - } default: rc = -EINVAL; } @@ -608,12 +526,6 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, case TLS_CIPHER_CHACHA20_POLY1305: optsize = sizeof(struct tls12_crypto_info_chacha20_poly1305); break; - case TLS_CIPHER_SM4_GCM: - optsize = sizeof(struct tls12_crypto_info_sm4_gcm); - break; - case TLS_CIPHER_SM4_CCM: - optsize = sizeof(struct tls12_crypto_info_sm4_ccm); - break; default: rc = -EINVAL; goto err_crypto_info; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index efc84845bb..bd96ec26f4 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -507,15 +507,9 @@ static int tls_do_encryption(struct sock *sk, int rc, iv_offset = 0; /* For CCM based ciphers, first byte of IV is a constant */ - switch (prot->cipher_type) { - case TLS_CIPHER_AES_CCM_128: + if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) { rec->iv_data[0] = TLS_AES_CCM_IV_B0_BYTE; iv_offset = 1; - break; - case TLS_CIPHER_SM4_CCM: - rec->iv_data[0] = TLS_SM4_CCM_IV_B0_BYTE; - iv_offset = 1; - break; } memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv, @@ -1472,16 +1466,10 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, aad = (u8 *)(sgout + n_sgout); iv = aad + prot->aad_size; - /* For CCM based ciphers, first byte of nonce+iv is a constant */ - switch (prot->cipher_type) { - case TLS_CIPHER_AES_CCM_128: - iv[0] = TLS_AES_CCM_IV_B0_BYTE; + /* For CCM based ciphers, first byte of nonce+iv is always '2' */ + if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) { + iv[0] = 2; iv_offset = 1; - break; - case TLS_CIPHER_SM4_CCM: - iv[0] = TLS_SM4_CCM_IV_B0_BYTE; - iv_offset = 1; - break; } /* Prepare IV */ @@ -1990,7 +1978,6 @@ int tls_sw_recvmsg(struct sock *sk, end: release_sock(sk); - sk_defer_free_flush(sk); if (psock) sk_psock_put(sk, psock); return copied ? : err; @@ -2059,7 +2046,6 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, splice_read_end: release_sock(sk); - sk_defer_free_flush(sk); return copied ? : err; } @@ -2330,6 +2316,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_crypto_info *crypto_info; + struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; + struct tls12_crypto_info_aes_gcm_256 *gcm_256_info; + struct tls12_crypto_info_aes_ccm_128 *ccm_128_info; + struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; struct cipher_context *cctx; @@ -2392,15 +2382,15 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: { - struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; - - gcm_128_info = (void *)crypto_info; nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE; iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; - iv = gcm_128_info->iv; + iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv; rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE; - rec_seq = gcm_128_info->rec_seq; + rec_seq = + ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq; + gcm_128_info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; keysize = TLS_CIPHER_AES_GCM_128_KEY_SIZE; key = gcm_128_info->key; salt = gcm_128_info->salt; @@ -2409,15 +2399,15 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) break; } case TLS_CIPHER_AES_GCM_256: { - struct tls12_crypto_info_aes_gcm_256 *gcm_256_info; - - gcm_256_info = (void *)crypto_info; nonce_size = TLS_CIPHER_AES_GCM_256_IV_SIZE; tag_size = TLS_CIPHER_AES_GCM_256_TAG_SIZE; iv_size = TLS_CIPHER_AES_GCM_256_IV_SIZE; - iv = gcm_256_info->iv; + iv = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->iv; rec_seq_size = TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE; - rec_seq = gcm_256_info->rec_seq; + rec_seq = + ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->rec_seq; + gcm_256_info = + (struct tls12_crypto_info_aes_gcm_256 *)crypto_info; keysize = TLS_CIPHER_AES_GCM_256_KEY_SIZE; key = gcm_256_info->key; salt = gcm_256_info->salt; @@ -2426,15 +2416,15 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) break; } case TLS_CIPHER_AES_CCM_128: { - struct tls12_crypto_info_aes_ccm_128 *ccm_128_info; - - ccm_128_info = (void *)crypto_info; nonce_size = TLS_CIPHER_AES_CCM_128_IV_SIZE; tag_size = TLS_CIPHER_AES_CCM_128_TAG_SIZE; iv_size = TLS_CIPHER_AES_CCM_128_IV_SIZE; - iv = ccm_128_info->iv; + iv = ((struct tls12_crypto_info_aes_ccm_128 *)crypto_info)->iv; rec_seq_size = TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE; - rec_seq = ccm_128_info->rec_seq; + rec_seq = + ((struct tls12_crypto_info_aes_ccm_128 *)crypto_info)->rec_seq; + ccm_128_info = + (struct tls12_crypto_info_aes_ccm_128 *)crypto_info; keysize = TLS_CIPHER_AES_CCM_128_KEY_SIZE; key = ccm_128_info->key; salt = ccm_128_info->salt; @@ -2443,8 +2433,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) break; } case TLS_CIPHER_CHACHA20_POLY1305: { - struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305_info; - chacha20_poly1305_info = (void *)crypto_info; nonce_size = 0; tag_size = TLS_CIPHER_CHACHA20_POLY1305_TAG_SIZE; @@ -2459,40 +2447,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) cipher_name = "rfc7539(chacha20,poly1305)"; break; } - case TLS_CIPHER_SM4_GCM: { - struct tls12_crypto_info_sm4_gcm *sm4_gcm_info; - - sm4_gcm_info = (void *)crypto_info; - nonce_size = TLS_CIPHER_SM4_GCM_IV_SIZE; - tag_size = TLS_CIPHER_SM4_GCM_TAG_SIZE; - iv_size = TLS_CIPHER_SM4_GCM_IV_SIZE; - iv = sm4_gcm_info->iv; - rec_seq_size = TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE; - rec_seq = sm4_gcm_info->rec_seq; - keysize = TLS_CIPHER_SM4_GCM_KEY_SIZE; - key = sm4_gcm_info->key; - salt = sm4_gcm_info->salt; - salt_size = TLS_CIPHER_SM4_GCM_SALT_SIZE; - cipher_name = "gcm(sm4)"; - break; - } - case TLS_CIPHER_SM4_CCM: { - struct tls12_crypto_info_sm4_ccm *sm4_ccm_info; - - sm4_ccm_info = (void *)crypto_info; - nonce_size = TLS_CIPHER_SM4_CCM_IV_SIZE; - tag_size = TLS_CIPHER_SM4_CCM_TAG_SIZE; - iv_size = TLS_CIPHER_SM4_CCM_IV_SIZE; - iv = sm4_ccm_info->iv; - rec_seq_size = TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE; - rec_seq = sm4_ccm_info->rec_seq; - keysize = TLS_CIPHER_SM4_CCM_KEY_SIZE; - key = sm4_ccm_info->key; - salt = sm4_ccm_info->salt; - salt_size = TLS_CIPHER_SM4_CCM_SALT_SIZE; - cipher_name = "ccm(sm4)"; - break; - } default: rc = -EINVAL; goto free_priv; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c195698198..b0bfc78e42 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -89,7 +89,6 @@ #include #include #include -#include #include #include #include @@ -118,64 +117,24 @@ #include "scm.h" -spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; -EXPORT_SYMBOL_GPL(unix_table_locks); struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); +DEFINE_SPINLOCK(unix_table_lock); +EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; -/* SMP locking strategy: - * hash table is protected with spinlock unix_table_locks - * each socket state is protected by separate spin lock. - */ -static unsigned int unix_unbound_hash(struct sock *sk) +static struct hlist_head *unix_sockets_unbound(void *addr) { - unsigned long hash = (unsigned long)sk; + unsigned long hash = (unsigned long)addr; hash ^= hash >> 16; hash ^= hash >> 8; - hash ^= sk->sk_type; - - return UNIX_HASH_SIZE + (hash & (UNIX_HASH_SIZE - 1)); + hash %= UNIX_HASH_SIZE; + return &unix_socket_table[UNIX_HASH_SIZE + hash]; } -static unsigned int unix_bsd_hash(struct inode *i) -{ - return i->i_ino & (UNIX_HASH_SIZE - 1); -} - -static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, - int addr_len, int type) -{ - __wsum csum = csum_partial(sunaddr, addr_len, 0); - unsigned int hash; - - hash = (__force unsigned int)csum_fold(csum); - hash ^= hash >> 8; - hash ^= type; - - return hash & (UNIX_HASH_SIZE - 1); -} - -static void unix_table_double_lock(unsigned int hash1, unsigned int hash2) -{ - /* hash1 and hash2 is never the same because - * one is between 0 and UNIX_HASH_SIZE - 1, and - * another is between UNIX_HASH_SIZE and UNIX_HASH_SIZE * 2. - */ - if (hash1 > hash2) - swap(hash1, hash2); - - spin_lock(&unix_table_locks[hash1]); - spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING); -} - -static void unix_table_double_unlock(unsigned int hash1, unsigned int hash2) -{ - spin_unlock(&unix_table_locks[hash1]); - spin_unlock(&unix_table_locks[hash2]); -} +#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -205,6 +164,20 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) } #endif /* CONFIG_SECURITY_NETWORK */ +/* + * SMP locking strategy: + * hash table is protected with spinlock unix_table_lock + * each socket state is protected by separate spin lock. + */ + +static inline unsigned int unix_hash_fold(__wsum n) +{ + unsigned int hash = (__force unsigned int)csum_fold(n); + + hash ^= hash>>8; + return hash&(UNIX_HASH_SIZE-1); +} + #define unix_peer(sk) (unix_sk(sk)->peer) static inline int unix_our_peer(struct sock *sk, struct sock *osk) @@ -241,22 +214,6 @@ struct sock *unix_peer_get(struct sock *s) } EXPORT_SYMBOL_GPL(unix_peer_get); -static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, - int addr_len) -{ - struct unix_address *addr; - - addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); - if (!addr) - return NULL; - - refcount_set(&addr->refcnt, 1); - addr->len = addr_len; - memcpy(addr->name, sunaddr, addr_len); - - return addr; -} - static inline void unix_release_addr(struct unix_address *addr) { if (refcount_dec_and_test(&addr->refcnt)) @@ -270,29 +227,29 @@ static inline void unix_release_addr(struct unix_address *addr) * - if started by zero, it is abstract name. */ -static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) +static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) { - if (addr_len <= offsetof(struct sockaddr_un, sun_path) || - addr_len > sizeof(*sunaddr)) + *hashp = 0; + + if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; - - if (sunaddr->sun_family != AF_UNIX) + if (!sunaddr || sunaddr->sun_family != AF_UNIX) return -EINVAL; + if (sunaddr->sun_path[0]) { + /* + * This may look like an off by one error but it is a bit more + * subtle. 108 is the longest valid AF_UNIX path for a binding. + * sun_path[108] doesn't as such exist. However in kernel space + * we are guaranteed that it is a valid memory location in our + * kernel address buffer. + */ + ((char *)sunaddr)[len] = 0; + len = strlen(sunaddr->sun_path)+1+sizeof(short); + return len; + } - return 0; -} - -static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) -{ - /* This may look like an off by one error but it is a bit more - * subtle. 108 is the longest valid AF_UNIX path for a binding. - * sun_path[108] doesn't as such exist. However in kernel space - * we are guaranteed that it is a valid memory location in our - * kernel address buffer because syscall functions always pass - * a pointer of struct sockaddr_storage which has a bigger buffer - * than 108. - */ - ((char *)sunaddr)[addr_len] = 0; + *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); + return len; } static void __unix_remove_socket(struct sock *sk) @@ -300,34 +257,32 @@ static void __unix_remove_socket(struct sock *sk) sk_del_node_init(sk); } -static void __unix_insert_socket(struct sock *sk) +static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) { WARN_ON(!sk_unhashed(sk)); - sk_add_node(sk, &unix_socket_table[sk->sk_hash]); + sk_add_node(sk, list); } -static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr, - unsigned int hash) +static void __unix_set_addr(struct sock *sk, struct unix_address *addr, + unsigned hash) { __unix_remove_socket(sk); smp_store_release(&unix_sk(sk)->addr, addr); - - sk->sk_hash = hash; - __unix_insert_socket(sk); + __unix_insert_socket(&unix_socket_table[hash], sk); } -static void unix_remove_socket(struct sock *sk) +static inline void unix_remove_socket(struct sock *sk) { - spin_lock(&unix_table_locks[sk->sk_hash]); + spin_lock(&unix_table_lock); __unix_remove_socket(sk); - spin_unlock(&unix_table_locks[sk->sk_hash]); + spin_unlock(&unix_table_lock); } -static void unix_insert_unbound_socket(struct sock *sk) +static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) { - spin_lock(&unix_table_locks[sk->sk_hash]); - __unix_insert_socket(sk); - spin_unlock(&unix_table_locks[sk->sk_hash]); + spin_lock(&unix_table_lock); + __unix_insert_socket(list, sk); + spin_unlock(&unix_table_lock); } static struct sock *__unix_find_socket_byname(struct net *net, @@ -355,31 +310,32 @@ static inline struct sock *unix_find_socket_byname(struct net *net, { struct sock *s; - spin_lock(&unix_table_locks[hash]); + spin_lock(&unix_table_lock); s = __unix_find_socket_byname(net, sunname, len, hash); if (s) sock_hold(s); - spin_unlock(&unix_table_locks[hash]); + spin_unlock(&unix_table_lock); return s; } static struct sock *unix_find_socket_byinode(struct inode *i) { - unsigned int hash = unix_bsd_hash(i); struct sock *s; - spin_lock(&unix_table_locks[hash]); - sk_for_each(s, &unix_socket_table[hash]) { + spin_lock(&unix_table_lock); + sk_for_each(s, + &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->path.dentry; if (dentry && d_backing_inode(dentry) == i) { sock_hold(s); - spin_unlock(&unix_table_locks[hash]); - return s; + goto found; } } - spin_unlock(&unix_table_locks[hash]); - return NULL; + s = NULL; +found: + spin_unlock(&unix_table_lock); + return s; } /* Support code for asymmetrically connected dgram sockets @@ -566,7 +522,9 @@ static void unix_sock_destructor(struct sock *sk) unix_release_addr(u->addr); atomic_long_dec(&unix_nr_socks); + local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + local_bh_enable(); #ifdef UNIX_REFCNT_DEBUG pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, atomic_long_read(&unix_nr_socks)); @@ -914,7 +872,6 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sock_init_data(sock, sk); - sk->sk_hash = unix_unbound_hash(sk); sk->sk_allocation = GFP_KERNEL_ACCOUNT; sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; @@ -930,9 +887,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); memset(&u->scm_stat, 0, sizeof(struct scm_stat)); - unix_insert_unbound_socket(sk); + unix_insert_socket(unix_sockets_unbound(sk), sk); + local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + local_bh_enable(); return sk; @@ -993,90 +952,15 @@ static int unix_release(struct socket *sock) return 0; } -static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, - int addr_len, int type) +static int unix_autobind(struct socket *sock) { - struct inode *inode; - struct path path; - struct sock *sk; - int err; - - unix_mkname_bsd(sunaddr, addr_len); - err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); - if (err) - goto fail; - - err = path_permission(&path, MAY_WRITE); - if (err) - goto path_put; - - err = -ECONNREFUSED; - inode = d_backing_inode(path.dentry); - if (!S_ISSOCK(inode->i_mode)) - goto path_put; - - sk = unix_find_socket_byinode(inode); - if (!sk) - goto path_put; - - err = -EPROTOTYPE; - if (sk->sk_type == type) - touch_atime(&path); - else - goto sock_put; - - path_put(&path); - - return sk; - -sock_put: - sock_put(sk); -path_put: - path_put(&path); -fail: - return ERR_PTR(err); -} - -static struct sock *unix_find_abstract(struct net *net, - struct sockaddr_un *sunaddr, - int addr_len, int type) -{ - unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); - struct dentry *dentry; - struct sock *sk; - - sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); - if (!sk) - return ERR_PTR(-ECONNREFUSED); - - dentry = unix_sk(sk)->path.dentry; - if (dentry) - touch_atime(&unix_sk(sk)->path); - - return sk; -} - -static struct sock *unix_find_other(struct net *net, - struct sockaddr_un *sunaddr, - int addr_len, int type) -{ - struct sock *sk; - - if (sunaddr->sun_path[0]) - sk = unix_find_bsd(net, sunaddr, addr_len, type); - else - sk = unix_find_abstract(net, sunaddr, addr_len, type); - - return sk; -} - -static int unix_autobind(struct sock *sk) -{ - unsigned int new_hash, old_hash = sk->sk_hash; + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); + static u32 ordernum = 1; struct unix_address *addr; - u32 lastnum, ordernum; int err; + unsigned int retries = 0; err = mutex_lock_interruptible(&u->bindlock); if (err) @@ -1086,103 +970,141 @@ static int unix_autobind(struct sock *sk) goto out; err = -ENOMEM; - addr = kzalloc(sizeof(*addr) + - offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); + addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); if (!addr) goto out; - addr->len = offsetof(struct sockaddr_un, sun_path) + 6; addr->name->sun_family = AF_UNIX; refcount_set(&addr->refcnt, 1); - ordernum = prandom_u32(); - lastnum = ordernum & 0xFFFFF; retry: - ordernum = (ordernum + 1) & 0xFFFFF; - sprintf(addr->name->sun_path + 1, "%05x", ordernum); + addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); + addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); + addr->hash ^= sk->sk_type; - new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); - unix_table_double_lock(old_hash, new_hash); + spin_lock(&unix_table_lock); + ordernum = (ordernum+1)&0xFFFFF; - if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, - new_hash)) { - unix_table_double_unlock(old_hash, new_hash); - - /* __unix_find_socket_byname() may take long time if many names + if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) { + spin_unlock(&unix_table_lock); + /* + * __unix_find_socket_byname() may take long time if many names * are already in use. */ cond_resched(); - - if (ordernum == lastnum) { - /* Give up if all names seems to be in use. */ + /* Give up if all names seems to be in use. */ + if (retries++ == 0xFFFFF) { err = -ENOSPC; - unix_release_addr(addr); + kfree(addr); goto out; } - goto retry; } - __unix_set_addr_hash(sk, addr, new_hash); - unix_table_double_unlock(old_hash, new_hash); + __unix_set_addr(sk, addr, addr->hash); + spin_unlock(&unix_table_lock); err = 0; out: mutex_unlock(&u->bindlock); return err; } -static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, - int addr_len) +static struct sock *unix_find_other(struct net *net, + struct sockaddr_un *sunname, int len, + int type, unsigned int hash, int *error) { + struct sock *u; + struct path path; + int err = 0; + + if (sunname->sun_path[0]) { + struct inode *inode; + err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); + if (err) + goto fail; + inode = d_backing_inode(path.dentry); + err = path_permission(&path, MAY_WRITE); + if (err) + goto put_fail; + + err = -ECONNREFUSED; + if (!S_ISSOCK(inode->i_mode)) + goto put_fail; + u = unix_find_socket_byinode(inode); + if (!u) + goto put_fail; + + if (u->sk_type == type) + touch_atime(&path); + + path_put(&path); + + err = -EPROTOTYPE; + if (u->sk_type != type) { + sock_put(u); + goto fail; + } + } else { + err = -ECONNREFUSED; + u = unix_find_socket_byname(net, sunname, len, type ^ hash); + if (u) { + struct dentry *dentry; + dentry = unix_sk(u)->path.dentry; + if (dentry) + touch_atime(&unix_sk(u)->path); + } else + goto fail; + } + return u; + +put_fail: + path_put(&path); +fail: + *error = err; + return NULL; +} + +static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) +{ + struct unix_sock *u = unix_sk(sk); umode_t mode = S_IFSOCK | (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); - unsigned int new_hash, old_hash = sk->sk_hash; - struct unix_sock *u = unix_sk(sk); struct user_namespace *ns; // barf... - struct unix_address *addr; - struct dentry *dentry; struct path parent; + struct dentry *dentry; + unsigned int hash; int err; - unix_mkname_bsd(sunaddr, addr_len); - addr_len = strlen(sunaddr->sun_path) + - offsetof(struct sockaddr_un, sun_path) + 1; - - addr = unix_create_addr(sunaddr, addr_len); - if (!addr) - return -ENOMEM; - /* * Get the parent directory, calculate the hash for last * component. */ dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); - if (IS_ERR(dentry)) { - err = PTR_ERR(dentry); - goto out; - } + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + ns = mnt_user_ns(parent.mnt); /* * All right, let's create it. */ - ns = mnt_user_ns(parent.mnt); err = security_path_mknod(&parent, dentry, mode, 0); if (!err) err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); if (err) - goto out_path; + goto out; err = mutex_lock_interruptible(&u->bindlock); if (err) goto out_unlink; if (u->addr) goto out_unlock; - new_hash = unix_bsd_hash(d_backing_inode(dentry)); - unix_table_double_lock(old_hash, new_hash); + addr->hash = UNIX_HASH_SIZE; + hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); + spin_lock(&unix_table_lock); u->path.mnt = mntget(parent.mnt); u->path.dentry = dget(dentry); - __unix_set_addr_hash(sk, addr, new_hash); - unix_table_double_unlock(old_hash, new_hash); + __unix_set_addr(sk, addr, hash); + spin_unlock(&unix_table_lock); mutex_unlock(&u->bindlock); done_path_create(&parent, dentry); return 0; @@ -1193,76 +1115,74 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, out_unlink: /* failed after successful mknod? unlink what we'd created... */ vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); -out_path: - done_path_create(&parent, dentry); out: - unix_release_addr(addr); - return err == -EEXIST ? -EADDRINUSE : err; + done_path_create(&parent, dentry); + return err; } -static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, - int addr_len) +static int unix_bind_abstract(struct sock *sk, struct unix_address *addr) { - unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); - struct unix_address *addr; int err; - addr = unix_create_addr(sunaddr, addr_len); - if (!addr) - return -ENOMEM; - err = mutex_lock_interruptible(&u->bindlock); if (err) - goto out; + return err; if (u->addr) { - err = -EINVAL; - goto out_mutex; + mutex_unlock(&u->bindlock); + return -EINVAL; } - new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); - unix_table_double_lock(old_hash, new_hash); - + spin_lock(&unix_table_lock); if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, - new_hash)) - goto out_spin; - - __unix_set_addr_hash(sk, addr, new_hash); - unix_table_double_unlock(old_hash, new_hash); + addr->hash)) { + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + return -EADDRINUSE; + } + __unix_set_addr(sk, addr, addr->hash); + spin_unlock(&unix_table_lock); mutex_unlock(&u->bindlock); return 0; - -out_spin: - unix_table_double_unlock(old_hash, new_hash); - err = -EADDRINUSE; -out_mutex: - mutex_unlock(&u->bindlock); -out: - unix_release_addr(addr); - return err; } static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; struct sock *sk = sock->sk; + struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; + char *sun_path = sunaddr->sun_path; int err; + unsigned int hash; + struct unix_address *addr; - if (addr_len == offsetof(struct sockaddr_un, sun_path) && - sunaddr->sun_family == AF_UNIX) - return unix_autobind(sk); + if (addr_len < offsetofend(struct sockaddr_un, sun_family) || + sunaddr->sun_family != AF_UNIX) + return -EINVAL; - err = unix_validate_addr(sunaddr, addr_len); - if (err) + if (addr_len == sizeof(short)) + return unix_autobind(sock); + + err = unix_mkname(sunaddr, addr_len, &hash); + if (err < 0) return err; + addr_len = err; + addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); + if (!addr) + return -ENOMEM; - if (sunaddr->sun_path[0]) - err = unix_bind_bsd(sk, sunaddr, addr_len); + memcpy(addr->name, sunaddr, addr_len); + addr->len = addr_len; + addr->hash = hash ^ sk->sk_type; + refcount_set(&addr->refcnt, 1); + + if (sun_path[0]) + err = unix_bind_bsd(sk, addr); else - err = unix_bind_abstract(sk, sunaddr, addr_len); - - return err; + err = unix_bind_abstract(sk, addr); + if (err) + unix_release_addr(addr); + return err == -EEXIST ? -EADDRINUSE : err; } static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) @@ -1297,6 +1217,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, struct net *net = sock_net(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; struct sock *other; + unsigned int hash; int err; err = -EINVAL; @@ -1304,23 +1225,19 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, goto out; if (addr->sa_family != AF_UNSPEC) { - err = unix_validate_addr(sunaddr, alen); - if (err) + err = unix_mkname(sunaddr, alen, &hash); + if (err < 0) goto out; + alen = err; if (test_bit(SOCK_PASSCRED, &sock->flags) && - !unix_sk(sk)->addr) { - err = unix_autobind(sk); - if (err) - goto out; - } + !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) + goto out; restart: - other = unix_find_other(net, sunaddr, alen, sock->type); - if (IS_ERR(other)) { - err = PTR_ERR(other); + other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); + if (!other) goto out; - } unix_state_double_lock(sk, other); @@ -1410,19 +1327,19 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *newsk = NULL; struct sock *other = NULL; struct sk_buff *skb = NULL; + unsigned int hash; int st; int err; long timeo; - err = unix_validate_addr(sunaddr, addr_len); - if (err) + err = unix_mkname(sunaddr, addr_len, &hash); + if (err < 0) goto out; + addr_len = err; - if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { - err = unix_autobind(sk); - if (err) - goto out; - } + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && + (err = unix_autobind(sock)) != 0) + goto out; timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); @@ -1448,12 +1365,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, restart: /* Find listening sock. */ - other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); - if (IS_ERR(other)) { - err = PTR_ERR(other); - other = NULL; + other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); + if (!other) goto out; - } /* Latch state of peer */ unix_state_lock(other); @@ -1541,9 +1455,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, * * The contents of *(otheru->addr) and otheru->path * are seen fully set up here, since we have found - * otheru in hash under unix_table_locks. Insertion + * otheru in hash under unix_table_lock. Insertion * into the hash chain we'd found it in had been done - * in an earlier critical area protected by unix_table_locks, + * in an earlier critical area protected by unix_table_lock, * the same one where we'd set *(otheru->addr) contents, * as well as otheru->path and otheru->addr itself. * @@ -1690,7 +1604,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) if (!addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; - err = offsetof(struct sockaddr_un, sun_path); + err = sizeof(short); } else { err = addr->len; memcpy(sunaddr, addr->name, addr->len); @@ -1846,7 +1760,9 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); struct sock *other = NULL; + int namelen = 0; /* fake GCC */ int err; + unsigned int hash; struct sk_buff *skb; long timeo; struct scm_cookie scm; @@ -1863,9 +1779,10 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; if (msg->msg_namelen) { - err = unix_validate_addr(sunaddr, msg->msg_namelen); - if (err) + err = unix_mkname(sunaddr, msg->msg_namelen, &hash); + if (err < 0) goto out; + namelen = err; } else { sunaddr = NULL; err = -ENOTCONN; @@ -1874,11 +1791,9 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { - err = unix_autobind(sk); - if (err) - goto out; - } + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr + && (err = unix_autobind(sock)) != 0) + goto out; err = -EMSGSIZE; if (len > sk->sk_sndbuf - 32) @@ -1918,13 +1833,10 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, if (sunaddr == NULL) goto out_free; - other = unix_find_other(net, sunaddr, msg->msg_namelen, - sk->sk_type); - if (IS_ERR(other)) { - err = PTR_ERR(other); - other = NULL; + other = unix_find_other(net, sunaddr, namelen, sk->sk_type, + hash, &err); + if (other == NULL) goto out_free; - } } if (sk_filter(other, skb) < 0) { @@ -3220,7 +3132,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) #define get_bucket(x) ((x) >> BUCKET_SPACE) -#define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) +#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) @@ -3244,7 +3156,7 @@ static struct sock *unix_next_socket(struct seq_file *seq, struct sock *sk, loff_t *pos) { - unsigned long bucket = get_bucket(*pos); + unsigned long bucket; while (sk > (struct sock *)SEQ_START_TOKEN) { sk = sk_next(sk); @@ -3255,13 +3167,12 @@ static struct sock *unix_next_socket(struct seq_file *seq, } do { - spin_lock(&unix_table_locks[bucket]); sk = unix_from_bucket(seq, pos); if (sk) return sk; next_bucket: - spin_unlock(&unix_table_locks[bucket++]); + bucket = get_bucket(*pos) + 1; *pos = set_bucket_offset(bucket, 1); } while (bucket < ARRAY_SIZE(unix_socket_table)); @@ -3269,7 +3180,10 @@ static struct sock *unix_next_socket(struct seq_file *seq, } static void *unix_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(unix_table_lock) { + spin_lock(&unix_table_lock); + if (!*pos) return SEQ_START_TOKEN; @@ -3286,11 +3200,9 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void unix_seq_stop(struct seq_file *seq, void *v) + __releases(unix_table_lock) { - struct sock *sk = v; - - if (sk) - spin_unlock(&unix_table_locks[sk->sk_hash]); + spin_unlock(&unix_table_lock); } static int unix_seq_show(struct seq_file *seq, void *v) @@ -3315,16 +3227,15 @@ static int unix_seq_show(struct seq_file *seq, void *v) (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), sock_i_ino(s)); - if (u->addr) { // under unix_table_locks here + if (u->addr) { // under unix_table_lock here int i, len; seq_putc(seq, ' '); i = 0; - len = u->addr->len - - offsetof(struct sockaddr_un, sun_path); - if (u->addr->name->sun_path[0]) { + len = u->addr->len - sizeof(short); + if (!UNIX_ABSTRACT(s)) len--; - } else { + else { seq_putc(seq, '@'); i++; } @@ -3474,13 +3385,10 @@ static void __init bpf_iter_register(void) static int __init af_unix_init(void) { - int i, rc = -1; + int rc = -1; BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); - for (i = 0; i < 2 * UNIX_HASH_SIZE; i++) - spin_lock_init(&unix_table_locks[i]); - rc = proto_register(&unix_dgram_proto, 1); if (rc != 0) { pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); diff --git a/net/unix/diag.c b/net/unix/diag.c index bb0b5ea165..7e7d7f4568 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -13,14 +13,13 @@ static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { - /* might or might not have unix_table_locks */ + /* might or might not have unix_table_lock */ struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); if (!addr) return 0; - return nla_put(nlskb, UNIX_DIAG_NAME, - addr->len - offsetof(struct sockaddr_un, sun_path), + return nla_put(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short), addr->name->sun_path); } @@ -204,13 +203,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) s_slot = cb->args[0]; num = s_num = cb->args[1]; + spin_lock(&unix_table_lock); for (slot = s_slot; slot < ARRAY_SIZE(unix_socket_table); s_num = 0, slot++) { struct sock *sk; num = 0; - spin_lock(&unix_table_locks[slot]); sk_for_each(sk, &unix_socket_table[slot]) { if (!net_eq(sock_net(sk), net)) continue; @@ -221,16 +220,14 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (sk_diag_dump(sk, skb, req, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI) < 0) { - spin_unlock(&unix_table_locks[slot]); + NLM_F_MULTI) < 0) goto done; - } next: num++; } - spin_unlock(&unix_table_locks[slot]); } done: + spin_unlock(&unix_table_lock); cb->args[0] = slot; cb->args[1] = num; @@ -239,19 +236,21 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) static struct sock *unix_lookup_by_ino(unsigned int ino) { - struct sock *sk; int i; + struct sock *sk; + spin_lock(&unix_table_lock); for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { - spin_lock(&unix_table_locks[i]); sk_for_each(sk, &unix_socket_table[i]) if (ino == sock_i_ino(sk)) { sock_hold(sk); - spin_unlock(&unix_table_locks[i]); + spin_unlock(&unix_table_lock); + return sk; } - spin_unlock(&unix_table_locks[i]); } + + spin_unlock(&unix_table_lock); return NULL; } diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 01d44e2598..c09bea8915 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -30,6 +30,10 @@ int __net_init unix_sysctl_register(struct net *net) if (table == NULL) goto err_alloc; + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + table[0].procname = NULL; + table[0].data = &net->unx.sysctl_max_dgram_qlen; net->unx.ctl = register_net_sysctl(net, "net/unix", table); if (net->unx.ctl == NULL) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 38baeb189d..91a5c65707 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -85,7 +85,6 @@ * TCP_LISTEN - listening */ -#include #include #include #include @@ -1618,18 +1617,13 @@ static int vsock_connectible_setsockopt(struct socket *sock, vsock_update_buffer_size(vsk, transport, vsk->buffer_size); break; - case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW: - case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: { - struct __kernel_sock_timeval tv; - - err = sock_copy_user_timeval(&tv, optval, optlen, - optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD); - if (err) - break; + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct __kernel_old_timeval tv; + COPY_IN(tv); if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { vsk->connect_timeout = tv.tv_sec * HZ + - DIV_ROUND_UP((unsigned long)tv.tv_usec, (USEC_PER_SEC / HZ)); + DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ)); if (vsk->connect_timeout == 0) vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; @@ -1657,59 +1651,68 @@ static int vsock_connectible_getsockopt(struct socket *sock, char __user *optval, int __user *optlen) { - struct sock *sk = sock->sk; - struct vsock_sock *vsk = vsock_sk(sk); - - union { - u64 val64; - struct old_timeval32 tm32; - struct __kernel_old_timeval tm; - struct __kernel_sock_timeval stm; - } v; - - int lv = sizeof(v.val64); + int err; int len; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; if (level != AF_VSOCK) return -ENOPROTOOPT; - if (get_user(len, optlen)) - return -EFAULT; + err = get_user(len, optlen); + if (err != 0) + return err; - memset(&v, 0, sizeof(v)); +#define COPY_OUT(_v) \ + do { \ + if (len < sizeof(_v)) \ + return -EINVAL; \ + \ + len = sizeof(_v); \ + if (copy_to_user(optval, &_v, len) != 0) \ + return -EFAULT; \ + \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: - v.val64 = vsk->buffer_size; + val = vsk->buffer_size; + COPY_OUT(val); break; case SO_VM_SOCKETS_BUFFER_MAX_SIZE: - v.val64 = vsk->buffer_max_size; + val = vsk->buffer_max_size; + COPY_OUT(val); break; case SO_VM_SOCKETS_BUFFER_MIN_SIZE: - v.val64 = vsk->buffer_min_size; + val = vsk->buffer_min_size; + COPY_OUT(val); break; - case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW: - case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: - lv = sock_get_timeout(vsk->connect_timeout, &v, - optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD); + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct __kernel_old_timeval tv; + tv.tv_sec = vsk->connect_timeout / HZ; + tv.tv_usec = + (vsk->connect_timeout - + tv.tv_sec * HZ) * (1000000 / HZ); + COPY_OUT(tv); break; - + } default: return -ENOPROTOOPT; } - if (len < lv) - return -EINVAL; - if (len > lv) - len = lv; - if (copy_to_user(optval, &v, len)) + err = put_user(len, optlen); + if (err != 0) return -EFAULT; - if (put_user(len, optlen)) - return -EFAULT; +#undef COPY_OUT return 0; } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index e111e13b66..19189cf30a 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -225,20 +225,14 @@ static size_t hvs_channel_writable_bytes(struct vmbus_channel *chan) return round_down(ret, 8); } -static int __hvs_send_data(struct vmbus_channel *chan, - struct vmpipe_proto_header *hdr, - size_t to_write) -{ - hdr->pkt_type = 1; - hdr->data_size = to_write; - return vmbus_sendpacket(chan, hdr, sizeof(*hdr) + to_write, - 0, VM_PKT_DATA_INBAND, 0); -} - static int hvs_send_data(struct vmbus_channel *chan, struct hvs_send_buf *send_buf, size_t to_write) { - return __hvs_send_data(chan, &send_buf->hdr, to_write); + send_buf->hdr.pkt_type = 1; + send_buf->hdr.data_size = to_write; + return vmbus_sendpacket(chan, &send_buf->hdr, + sizeof(send_buf->hdr) + to_write, + 0, VM_PKT_DATA_INBAND, 0); } static void hvs_channel_cb(void *ctx) @@ -474,7 +468,7 @@ static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode) return; /* It can't fail: see hvs_channel_writable_bytes(). */ - (void)__hvs_send_data(hvs->chan, &hdr, 0); + (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0); hvs->fin_sent = true; } diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 1e9be50469..af590ae606 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -26,17 +26,17 @@ endif $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex) @$(kecho) " GEN $@" - $(Q)(echo '#include "reg.h"'; \ + @(echo '#include "reg.h"'; \ echo 'const u8 shipped_regdb_certs[] = {'; \ echo | cat - $^ ; \ echo '};'; \ echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ ) > $@ -$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDI) \ - $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509) +$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ + $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) @$(kecho) " GEN $@" - $(Q)(set -e; \ + @(set -e; \ allf=""; \ for f in $^ ; do \ test -f $$f || continue;\ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index eb822052d3..869c43d441 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -245,7 +245,19 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) oper_freq - MHZ_TO_KHZ(oper_width) / 2) return false; break; + case NL80211_CHAN_WIDTH_40: + if (chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10) + return false; + if (chandef->center_freq2) + return false; + break; case NL80211_CHAN_WIDTH_80P80: + if (chandef->center_freq1 != control_freq + 30 && + chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10 && + chandef->center_freq1 != control_freq - 30) + return false; if (!chandef->center_freq2) return false; /* adjacent is not allowed -- that's a 160 MHz channel */ @@ -253,42 +265,28 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) chandef->center_freq2 - chandef->center_freq1 == 80) return false; break; - default: + case NL80211_CHAN_WIDTH_80: + if (chandef->center_freq1 != control_freq + 30 && + chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10 && + chandef->center_freq1 != control_freq - 30) + return false; if (chandef->center_freq2) return false; break; - } - - switch (chandef->width) { - case NL80211_CHAN_WIDTH_5: - case NL80211_CHAN_WIDTH_10: - case NL80211_CHAN_WIDTH_20: - case NL80211_CHAN_WIDTH_20_NOHT: - case NL80211_CHAN_WIDTH_1: - case NL80211_CHAN_WIDTH_2: - case NL80211_CHAN_WIDTH_4: - case NL80211_CHAN_WIDTH_8: - case NL80211_CHAN_WIDTH_16: - /* all checked above */ - break; case NL80211_CHAN_WIDTH_160: - if (chandef->center_freq1 == control_freq + 70 || - chandef->center_freq1 == control_freq + 50 || - chandef->center_freq1 == control_freq - 50 || - chandef->center_freq1 == control_freq - 70) - break; - fallthrough; - case NL80211_CHAN_WIDTH_80P80: - case NL80211_CHAN_WIDTH_80: - if (chandef->center_freq1 == control_freq + 30 || - chandef->center_freq1 == control_freq - 30) - break; - fallthrough; - case NL80211_CHAN_WIDTH_40: - if (chandef->center_freq1 == control_freq + 10 || - chandef->center_freq1 == control_freq - 10) - break; - fallthrough; + if (chandef->center_freq1 != control_freq + 70 && + chandef->center_freq1 != control_freq + 50 && + chandef->center_freq1 != control_freq + 30 && + chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10 && + chandef->center_freq1 != control_freq - 30 && + chandef->center_freq1 != control_freq - 50 && + chandef->center_freq1 != control_freq - 70) + return false; + if (chandef->center_freq2) + return false; + break; default: return false; } @@ -714,19 +712,6 @@ static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy, return false; } -static bool -cfg80211_offchan_chain_is_active(struct cfg80211_registered_device *rdev, - struct ieee80211_channel *channel) -{ - if (!rdev->background_radar_wdev) - return false; - - if (!cfg80211_chandef_valid(&rdev->background_radar_chandef)) - return false; - - return cfg80211_is_sub_chan(&rdev->background_radar_chandef, channel); -} - bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan) { @@ -743,9 +728,6 @@ bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, if (cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan)) return true; - - if (cfg80211_offchan_chain_is_active(rdev, chan)) - return true; } return false; diff --git a/net/wireless/core.c b/net/wireless/core.c index f08d4b3bb1..441136646f 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -536,10 +536,6 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work); INIT_WORK(&rdev->conn_work, cfg80211_conn_work); INIT_WORK(&rdev->event_work, cfg80211_event_work); - INIT_WORK(&rdev->background_cac_abort_wk, - cfg80211_background_cac_abort_wk); - INIT_DELAYED_WORK(&rdev->background_cac_done_wk, - cfg80211_background_cac_done_wk); init_waitqueue_head(&rdev->dev_wait); @@ -728,7 +724,6 @@ int wiphy_register(struct wiphy *wiphy) if (wiphy->interface_modes & ~(BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_P2P_CLIENT) | BIT(NL80211_IFTYPE_AP) | - BIT(NL80211_IFTYPE_MESH_POINT) | BIT(NL80211_IFTYPE_P2P_GO) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_P2P_DEVICE) | @@ -1050,13 +1045,11 @@ void wiphy_unregister(struct wiphy *wiphy) cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); - cancel_delayed_work_sync(&rdev->background_cac_done_wk); flush_work(&rdev->destroy_work); flush_work(&rdev->sched_scan_stop_wk); flush_work(&rdev->propagate_radar_detect_wk); flush_work(&rdev->propagate_cac_done_wk); flush_work(&rdev->mgmt_registrations_update_wk); - flush_work(&rdev->background_cac_abort_wk); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -1205,8 +1198,6 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, cfg80211_pmsr_wdev_down(wdev); - cfg80211_stop_background_radar_detection(wdev); - switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: __cfg80211_leave_ibss(rdev, dev, true); diff --git a/net/wireless/core.h b/net/wireless/core.h index 3a7dbd63d8..1720abf36f 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -84,11 +84,6 @@ struct cfg80211_registered_device { struct delayed_work dfs_update_channels_wk; - struct wireless_dev *background_radar_wdev; - struct cfg80211_chan_def background_radar_chandef; - struct delayed_work background_cac_done_wk; - struct work_struct background_cac_abort_wk; - /* netlink port which started critical protocol (0 means not started) */ u32 crit_proto_nlportid; @@ -496,17 +491,6 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); -int -cfg80211_start_background_radar_detection(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - struct cfg80211_chan_def *chandef); - -void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev); - -void cfg80211_background_cac_done_wk(struct work_struct *work); - -void cfg80211_background_cac_abort_wk(struct work_struct *work); - bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index c8155a483e..783acd2c42 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -905,13 +905,13 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) } -void __cfg80211_radar_event(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - bool offchan, gfp_t gfp) +void cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - trace_cfg80211_radar_event(wiphy, chandef, offchan); + trace_cfg80211_radar_event(wiphy, chandef); /* only set the chandef supplied channel to unavailable, in * case the radar is detected on only one of multiple channels @@ -919,9 +919,6 @@ void __cfg80211_radar_event(struct wiphy *wiphy, */ cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); - if (offchan) - queue_work(cfg80211_wq, &rdev->background_cac_abort_wk); - cfg80211_sched_dfs_chan_update(rdev); nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp); @@ -929,7 +926,7 @@ void __cfg80211_radar_event(struct wiphy *wiphy, memcpy(&rdev->radar_chandef, chandef, sizeof(struct cfg80211_chan_def)); queue_work(cfg80211_wq, &rdev->propagate_radar_detect_wk); } -EXPORT_SYMBOL(__cfg80211_radar_event); +EXPORT_SYMBOL(cfg80211_radar_event); void cfg80211_cac_event(struct net_device *netdev, const struct cfg80211_chan_def *chandef, @@ -973,143 +970,3 @@ void cfg80211_cac_event(struct net_device *netdev, nl80211_radar_notify(rdev, chandef, event, netdev, gfp); } EXPORT_SYMBOL(cfg80211_cac_event); - -static void -__cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - const struct cfg80211_chan_def *chandef, - enum nl80211_radar_event event) -{ - struct wiphy *wiphy = &rdev->wiphy; - struct net_device *netdev; - - lockdep_assert_wiphy(&rdev->wiphy); - - if (!cfg80211_chandef_valid(chandef)) - return; - - if (!rdev->background_radar_wdev) - return; - - switch (event) { - case NL80211_RADAR_CAC_FINISHED: - cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE); - memcpy(&rdev->cac_done_chandef, chandef, sizeof(*chandef)); - queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk); - cfg80211_sched_dfs_chan_update(rdev); - wdev = rdev->background_radar_wdev; - break; - case NL80211_RADAR_CAC_ABORTED: - if (!cancel_delayed_work(&rdev->background_cac_done_wk)) - return; - wdev = rdev->background_radar_wdev; - break; - case NL80211_RADAR_CAC_STARTED: - break; - default: - return; - } - - netdev = wdev ? wdev->netdev : NULL; - nl80211_radar_notify(rdev, chandef, event, netdev, GFP_KERNEL); -} - -static void -cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, - const struct cfg80211_chan_def *chandef, - enum nl80211_radar_event event) -{ - wiphy_lock(&rdev->wiphy); - __cfg80211_background_cac_event(rdev, rdev->background_radar_wdev, - chandef, event); - wiphy_unlock(&rdev->wiphy); -} - -void cfg80211_background_cac_done_wk(struct work_struct *work) -{ - struct delayed_work *delayed_work = to_delayed_work(work); - struct cfg80211_registered_device *rdev; - - rdev = container_of(delayed_work, struct cfg80211_registered_device, - background_cac_done_wk); - cfg80211_background_cac_event(rdev, &rdev->background_radar_chandef, - NL80211_RADAR_CAC_FINISHED); -} - -void cfg80211_background_cac_abort_wk(struct work_struct *work) -{ - struct cfg80211_registered_device *rdev; - - rdev = container_of(work, struct cfg80211_registered_device, - background_cac_abort_wk); - cfg80211_background_cac_event(rdev, &rdev->background_radar_chandef, - NL80211_RADAR_CAC_ABORTED); -} - -void cfg80211_background_cac_abort(struct wiphy *wiphy) -{ - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - - queue_work(cfg80211_wq, &rdev->background_cac_abort_wk); -} -EXPORT_SYMBOL(cfg80211_background_cac_abort); - -int -cfg80211_start_background_radar_detection(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - struct cfg80211_chan_def *chandef) -{ - unsigned int cac_time_ms; - int err; - - lockdep_assert_wiphy(&rdev->wiphy); - - if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_RADAR_BACKGROUND)) - return -EOPNOTSUPP; - - /* Offchannel chain already locked by another wdev */ - if (rdev->background_radar_wdev && rdev->background_radar_wdev != wdev) - return -EBUSY; - - /* CAC already in progress on the offchannel chain */ - if (rdev->background_radar_wdev == wdev && - delayed_work_pending(&rdev->background_cac_done_wk)) - return -EBUSY; - - err = rdev_set_radar_background(rdev, chandef); - if (err) - return err; - - cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, chandef); - if (!cac_time_ms) - cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; - - rdev->background_radar_chandef = *chandef; - rdev->background_radar_wdev = wdev; /* Get offchain ownership */ - - __cfg80211_background_cac_event(rdev, wdev, chandef, - NL80211_RADAR_CAC_STARTED); - queue_delayed_work(cfg80211_wq, &rdev->background_cac_done_wk, - msecs_to_jiffies(cac_time_ms)); - - return 0; -} - -void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev) -{ - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - - lockdep_assert_wiphy(wiphy); - - if (wdev != rdev->background_radar_wdev) - return; - - rdev_set_radar_background(rdev, NULL); - rdev->background_radar_wdev = NULL; /* Release offchain ownership */ - - __cfg80211_background_cac_event(rdev, wdev, - &rdev->background_radar_chandef, - NL80211_RADAR_CAC_ABORTED); -} diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 578bff9c37..99564db14a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -437,16 +437,6 @@ sar_policy[NL80211_SAR_ATTR_MAX + 1] = { [NL80211_SAR_ATTR_SPECS] = NLA_POLICY_NESTED_ARRAY(sar_specs_policy), }; -static const struct nla_policy -nl80211_mbssid_config_policy[NL80211_MBSSID_CONFIG_ATTR_MAX + 1] = { - [NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES] = NLA_POLICY_MIN(NLA_U8, 2), - [NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY] = - NLA_POLICY_MIN(NLA_U8, 1), - [NL80211_MBSSID_CONFIG_ATTR_INDEX] = { .type = NLA_U8 }, - [NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX] = { .type = NLA_U32 }, - [NL80211_MBSSID_CONFIG_ATTR_EMA] = { .type = NLA_FLAG }, -}; - static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -773,11 +763,6 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_COLOR_CHANGE_COUNT] = { .type = NLA_U8 }, [NL80211_ATTR_COLOR_CHANGE_COLOR] = { .type = NLA_U8 }, [NL80211_ATTR_COLOR_CHANGE_ELEMS] = NLA_POLICY_NESTED(nl80211_policy), - [NL80211_ATTR_MBSSID_CONFIG] = - NLA_POLICY_NESTED(nl80211_mbssid_config_policy), - [NL80211_ATTR_MBSSID_ELEMS] = { .type = NLA_NESTED }, - [NL80211_ATTR_RADAR_BACKGROUND] = { .type = NLA_FLAG }, - [NL80211_ATTR_AP_SETTINGS_FLAGS] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -868,7 +853,6 @@ nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = { [NL80211_BAND_5GHZ] = { .type = NLA_S32 }, [NL80211_BAND_6GHZ] = { .type = NLA_S32 }, [NL80211_BAND_60GHZ] = { .type = NLA_S32 }, - [NL80211_BAND_LC] = { .type = NLA_S32 }, }; static const struct nla_policy @@ -2227,35 +2211,6 @@ nl80211_put_sar_specs(struct cfg80211_registered_device *rdev, return -ENOBUFS; } -static int nl80211_put_mbssid_support(struct wiphy *wiphy, struct sk_buff *msg) -{ - struct nlattr *config; - - if (!wiphy->mbssid_max_interfaces) - return 0; - - config = nla_nest_start(msg, NL80211_ATTR_MBSSID_CONFIG); - if (!config) - return -ENOBUFS; - - if (nla_put_u8(msg, NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES, - wiphy->mbssid_max_interfaces)) - goto fail; - - if (wiphy->ema_max_profile_periodicity && - nla_put_u8(msg, - NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY, - wiphy->ema_max_profile_periodicity)) - goto fail; - - nla_nest_end(msg, config); - return 0; - -fail: - nla_nest_cancel(msg, config); - return -ENOBUFS; -} - struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; @@ -2841,9 +2796,6 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (nl80211_put_sar_specs(rdev, msg)) goto nla_put_failure; - if (nl80211_put_mbssid_support(&rdev->wiphy, msg)) - goto nla_put_failure; - /* done */ state->split_start = 0; break; @@ -3671,16 +3623,14 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_ADHOC: { - const struct element *ssid_elem; - + const u8 *ssid_ie; if (!wdev->current_bss) break; rcu_read_lock(); - ssid_elem = ieee80211_bss_get_elem(&wdev->current_bss->pub, - WLAN_EID_SSID); - if (ssid_elem && - nla_put(msg, NL80211_ATTR_SSID, ssid_elem->datalen, - ssid_elem->data)) + ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub, + WLAN_EID_SSID); + if (ssid_ie && + nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) goto nla_put_failure_rcu_locked; rcu_read_unlock(); break; @@ -5035,96 +4985,6 @@ static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, return 0; } -static int nl80211_parse_mbssid_config(struct wiphy *wiphy, - struct net_device *dev, - struct nlattr *attrs, - struct cfg80211_mbssid_config *config, - u8 num_elems) -{ - struct nlattr *tb[NL80211_MBSSID_CONFIG_ATTR_MAX + 1]; - - if (!wiphy->mbssid_max_interfaces) - return -EOPNOTSUPP; - - if (nla_parse_nested(tb, NL80211_MBSSID_CONFIG_ATTR_MAX, attrs, NULL, - NULL) || - !tb[NL80211_MBSSID_CONFIG_ATTR_INDEX]) - return -EINVAL; - - config->ema = nla_get_flag(tb[NL80211_MBSSID_CONFIG_ATTR_EMA]); - if (config->ema) { - if (!wiphy->ema_max_profile_periodicity) - return -EOPNOTSUPP; - - if (num_elems > wiphy->ema_max_profile_periodicity) - return -EINVAL; - } - - config->index = nla_get_u8(tb[NL80211_MBSSID_CONFIG_ATTR_INDEX]); - if (config->index >= wiphy->mbssid_max_interfaces || - (!config->index && !num_elems)) - return -EINVAL; - - if (tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]) { - u32 tx_ifindex = - nla_get_u32(tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]); - - if ((!config->index && tx_ifindex != dev->ifindex) || - (config->index && tx_ifindex == dev->ifindex)) - return -EINVAL; - - if (tx_ifindex != dev->ifindex) { - struct net_device *tx_netdev = - dev_get_by_index(wiphy_net(wiphy), tx_ifindex); - - if (!tx_netdev || !tx_netdev->ieee80211_ptr || - tx_netdev->ieee80211_ptr->wiphy != wiphy || - tx_netdev->ieee80211_ptr->iftype != - NL80211_IFTYPE_AP) { - dev_put(tx_netdev); - return -EINVAL; - } - - config->tx_wdev = tx_netdev->ieee80211_ptr; - } else { - config->tx_wdev = dev->ieee80211_ptr; - } - } else if (!config->index) { - config->tx_wdev = dev->ieee80211_ptr; - } else { - return -EINVAL; - } - - return 0; -} - -static struct cfg80211_mbssid_elems * -nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs) -{ - struct nlattr *nl_elems; - struct cfg80211_mbssid_elems *elems; - int rem_elems; - u8 i = 0, num_elems = 0; - - if (!wiphy->mbssid_max_interfaces) - return ERR_PTR(-EINVAL); - - nla_for_each_nested(nl_elems, attrs, rem_elems) - num_elems++; - - elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL); - if (!elems) - return ERR_PTR(-ENOMEM); - - nla_for_each_nested(nl_elems, attrs, rem_elems) { - elems->elem[i].data = nla_data(nl_elems); - elems->elem[i].len = nla_len(nl_elems); - i++; - } - elems->cnt = num_elems; - return elems; -} - static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], struct cfg80211_beacon_data *bcn) @@ -5205,17 +5065,6 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, bcn->ftm_responder = -1; } - if (attrs[NL80211_ATTR_MBSSID_ELEMS]) { - struct cfg80211_mbssid_elems *mbssid = - nl80211_parse_mbssid_elems(&rdev->wiphy, - attrs[NL80211_ATTR_MBSSID_ELEMS]); - - if (IS_ERR(mbssid)) - return PTR_ERR(mbssid); - - bcn->mbssid_ies = mbssid; - } - return 0; } @@ -5347,21 +5196,21 @@ nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev, } static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, - const struct element *rates) + const u8 *rates) { int i; if (!rates) return; - for (i = 0; i < rates->datalen; i++) { - if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_HT_PHY) + for (i = 0; i < rates[1]; i++) { + if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HT_PHY) params->ht_required = true; - if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY) + if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY) params->vht_required = true; - if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY) + if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY) params->he_required = true; - if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_SAE_H2E) + if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_SAE_H2E) params->sae_h2e_required = true; } } @@ -5376,27 +5225,27 @@ static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params) const struct cfg80211_beacon_data *bcn = ¶ms->beacon; size_t ies_len = bcn->tail_len; const u8 *ies = bcn->tail; - const struct element *rates; - const struct element *cap; + const u8 *rates; + const u8 *cap; - rates = cfg80211_find_elem(WLAN_EID_SUPP_RATES, ies, ies_len); + rates = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies, ies_len); nl80211_check_ap_rate_selectors(params, rates); - rates = cfg80211_find_elem(WLAN_EID_EXT_SUPP_RATES, ies, ies_len); + rates = cfg80211_find_ie(WLAN_EID_EXT_SUPP_RATES, ies, ies_len); nl80211_check_ap_rate_selectors(params, rates); - cap = cfg80211_find_elem(WLAN_EID_HT_CAPABILITY, ies, ies_len); - if (cap && cap->datalen >= sizeof(*params->ht_cap)) - params->ht_cap = (void *)cap->data; - cap = cfg80211_find_elem(WLAN_EID_VHT_CAPABILITY, ies, ies_len); - if (cap && cap->datalen >= sizeof(*params->vht_cap)) - params->vht_cap = (void *)cap->data; - cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ies, ies_len); - if (cap && cap->datalen >= sizeof(*params->he_cap) + 1) - params->he_cap = (void *)(cap->data + 1); - cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ies, ies_len); - if (cap && cap->datalen >= sizeof(*params->he_oper) + 1) - params->he_oper = (void *)(cap->data + 1); + cap = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies, ies_len); + if (cap && cap[1] >= sizeof(*params->ht_cap)) + params->ht_cap = (void *)(cap + 2); + cap = cfg80211_find_ie(WLAN_EID_VHT_CAPABILITY, ies, ies_len); + if (cap && cap[1] >= sizeof(*params->vht_cap)) + params->vht_cap = (void *)(cap + 2); + cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ies, ies_len); + if (cap && cap[1] >= sizeof(*params->he_cap) + 1) + params->he_cap = (void *)(cap + 3); + cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ies, ies_len); + if (cap && cap[1] >= sizeof(*params->he_oper) + 1) + params->he_oper = (void *)(cap + 3); } static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, @@ -5478,7 +5327,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_ap_settings *params; + struct cfg80211_ap_settings params; int err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && @@ -5491,29 +5340,27 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (wdev->beacon_interval) return -EALREADY; + memset(¶ms, 0, sizeof(params)); + /* these are required for START_AP */ if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] || !info->attrs[NL80211_ATTR_DTIM_PERIOD] || !info->attrs[NL80211_ATTR_BEACON_HEAD]) return -EINVAL; - params = kzalloc(sizeof(*params), GFP_KERNEL); - if (!params) - return -ENOMEM; - - err = nl80211_parse_beacon(rdev, info->attrs, ¶ms->beacon); + err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon); if (err) - goto out; + return err; - params->beacon_interval = + params.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - params->dtim_period = + params.dtim_period = nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); err = cfg80211_validate_beacon_int(rdev, dev->ieee80211_ptr->iftype, - params->beacon_interval); + params.beacon_interval); if (err) - goto out; + return err; /* * In theory, some of these attributes should be required here @@ -5523,157 +5370,129 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) * additional information -- drivers must check! */ if (info->attrs[NL80211_ATTR_SSID]) { - params->ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); - params->ssid_len = + params.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + params.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); - if (params->ssid_len == 0) { - err = -EINVAL; - goto out; - } + if (params.ssid_len == 0) + return -EINVAL; } if (info->attrs[NL80211_ATTR_HIDDEN_SSID]) - params->hidden_ssid = nla_get_u32( + params.hidden_ssid = nla_get_u32( info->attrs[NL80211_ATTR_HIDDEN_SSID]); - params->privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; + params.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { - params->auth_type = nla_get_u32( + params.auth_type = nla_get_u32( info->attrs[NL80211_ATTR_AUTH_TYPE]); - if (!nl80211_valid_auth_type(rdev, params->auth_type, - NL80211_CMD_START_AP)) { - err = -EINVAL; - goto out; - } + if (!nl80211_valid_auth_type(rdev, params.auth_type, + NL80211_CMD_START_AP)) + return -EINVAL; } else - params->auth_type = NL80211_AUTHTYPE_AUTOMATIC; + params.auth_type = NL80211_AUTHTYPE_AUTOMATIC; - err = nl80211_crypto_settings(rdev, info, ¶ms->crypto, + err = nl80211_crypto_settings(rdev, info, ¶ms.crypto, NL80211_MAX_NR_CIPHER_SUITES); if (err) - goto out; + return err; if (info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]) { - if (!(rdev->wiphy.features & NL80211_FEATURE_INACTIVITY_TIMER)) { - err = -EOPNOTSUPP; - goto out; - } - params->inactivity_timeout = nla_get_u16( + if (!(rdev->wiphy.features & NL80211_FEATURE_INACTIVITY_TIMER)) + return -EOPNOTSUPP; + params.inactivity_timeout = nla_get_u16( info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]); } if (info->attrs[NL80211_ATTR_P2P_CTWINDOW]) { - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { - err = -EINVAL; - goto out; - } - params->p2p_ctwindow = + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; + params.p2p_ctwindow = nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]); - if (params->p2p_ctwindow != 0 && - !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) { - err = -EINVAL; - goto out; - } + if (params.p2p_ctwindow != 0 && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) + return -EINVAL; } if (info->attrs[NL80211_ATTR_P2P_OPPPS]) { u8 tmp; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { - err = -EINVAL; - goto out; - } + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]); - params->p2p_opp_ps = tmp; - if (params->p2p_opp_ps != 0 && - !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) { - err = -EINVAL; - goto out; - } + params.p2p_opp_ps = tmp; + if (params.p2p_opp_ps != 0 && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) + return -EINVAL; } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - err = nl80211_parse_chandef(rdev, info, ¶ms->chandef); + err = nl80211_parse_chandef(rdev, info, ¶ms.chandef); if (err) - goto out; + return err; } else if (wdev->preset_chandef.chan) { - params->chandef = wdev->preset_chandef; - } else if (!nl80211_get_ap_channel(rdev, params)) { - err = -EINVAL; - goto out; - } + params.chandef = wdev->preset_chandef; + } else if (!nl80211_get_ap_channel(rdev, ¶ms)) + return -EINVAL; - if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms->chandef, - wdev->iftype)) { - err = -EINVAL; - goto out; - } + if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) + return -EINVAL; if (info->attrs[NL80211_ATTR_TX_RATES]) { err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, - ¶ms->beacon_rate, + ¶ms.beacon_rate, dev, false); if (err) - goto out; + return err; - err = validate_beacon_tx_rate(rdev, params->chandef.chan->band, - ¶ms->beacon_rate); + err = validate_beacon_tx_rate(rdev, params.chandef.chan->band, + ¶ms.beacon_rate); if (err) - goto out; + return err; } if (info->attrs[NL80211_ATTR_SMPS_MODE]) { - params->smps_mode = + params.smps_mode = nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); - switch (params->smps_mode) { + switch (params.smps_mode) { case NL80211_SMPS_OFF: break; case NL80211_SMPS_STATIC: if (!(rdev->wiphy.features & - NL80211_FEATURE_STATIC_SMPS)) { - err = -EINVAL; - goto out; - } + NL80211_FEATURE_STATIC_SMPS)) + return -EINVAL; break; case NL80211_SMPS_DYNAMIC: if (!(rdev->wiphy.features & - NL80211_FEATURE_DYNAMIC_SMPS)) { - err = -EINVAL; - goto out; - } + NL80211_FEATURE_DYNAMIC_SMPS)) + return -EINVAL; break; default: - err = -EINVAL; - goto out; + return -EINVAL; } } else { - params->smps_mode = NL80211_SMPS_OFF; + params.smps_mode = NL80211_SMPS_OFF; } - params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); - if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { - err = -EOPNOTSUPP; - goto out; - } + params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); + if (params.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) + return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_ACL_POLICY]) { - params->acl = parse_acl_data(&rdev->wiphy, info); - if (IS_ERR(params->acl)) { - err = PTR_ERR(params->acl); - params->acl = NULL; - goto out; - } + params.acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(params.acl)) + return PTR_ERR(params.acl); } - params->twt_responder = + params.twt_responder = nla_get_flag(info->attrs[NL80211_ATTR_TWT_RESPONDER]); if (info->attrs[NL80211_ATTR_HE_OBSS_PD]) { err = nl80211_parse_he_obss_pd( info->attrs[NL80211_ATTR_HE_OBSS_PD], - ¶ms->he_obss_pd); + ¶ms.he_obss_pd); if (err) goto out; } @@ -5681,7 +5500,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) { err = nl80211_parse_he_bss_color( info->attrs[NL80211_ATTR_HE_BSS_COLOR], - ¶ms->he_bss_color); + ¶ms.he_bss_color); if (err) goto out; } @@ -5689,7 +5508,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) { err = nl80211_parse_fils_discovery(rdev, info->attrs[NL80211_ATTR_FILS_DISCOVERY], - params); + ¶ms); if (err) goto out; } @@ -5697,38 +5516,24 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) { err = nl80211_parse_unsol_bcast_probe_resp( rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP], - params); + ¶ms); if (err) goto out; } - if (info->attrs[NL80211_ATTR_MBSSID_CONFIG]) { - err = nl80211_parse_mbssid_config(&rdev->wiphy, dev, - info->attrs[NL80211_ATTR_MBSSID_CONFIG], - ¶ms->mbssid_config, - params->beacon.mbssid_ies ? - params->beacon.mbssid_ies->cnt : - 0); - if (err) - goto out; - } + nl80211_calculate_ap_params(¶ms); - nl80211_calculate_ap_params(params); - - if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]) - params->flags = nla_get_u32( - info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]); - else if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) - params->flags |= NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; + if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) + params.flags |= AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; wdev_lock(wdev); - err = rdev_start_ap(rdev, dev, params); + err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { - wdev->preset_chandef = params->chandef; - wdev->beacon_interval = params->beacon_interval; - wdev->chandef = params->chandef; - wdev->ssid_len = params->ssid_len; - memcpy(wdev->ssid, params->ssid, wdev->ssid_len); + wdev->preset_chandef = params.chandef; + wdev->beacon_interval = params.beacon_interval; + wdev->chandef = params.chandef; + wdev->ssid_len = params.ssid_len; + memcpy(wdev->ssid, params.ssid, wdev->ssid_len); if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) wdev->conn_owner_nlportid = info->snd_portid; @@ -5736,13 +5541,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev_unlock(wdev); out: - kfree(params->acl); - kfree(params->beacon.mbssid_ies); - if (params->mbssid_config.tx_wdev && - params->mbssid_config.tx_wdev->netdev && - params->mbssid_config.tx_wdev->netdev != dev) - dev_put(params->mbssid_config.tx_wdev->netdev); - kfree(params); + kfree(params.acl); return err; } @@ -5767,14 +5566,12 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_beacon(rdev, info->attrs, ¶ms); if (err) - goto out; + return err; wdev_lock(wdev); err = rdev_change_beacon(rdev, dev, ¶ms); wdev_unlock(wdev); -out: - kfree(params.mbssid_ies); return err; } @@ -9281,60 +9078,38 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, struct cfg80211_chan_def chandef; enum nl80211_dfs_regions dfs_region; unsigned int cac_time_ms; - int err = -EINVAL; - - flush_delayed_work(&rdev->dfs_update_channels_wk); - - wiphy_lock(wiphy); + int err; dfs_region = reg_get_dfs_region(wiphy); if (dfs_region == NL80211_DFS_UNSET) - goto unlock; + return -EINVAL; err = nl80211_parse_chandef(rdev, info, &chandef); if (err) - goto unlock; + return err; + + if (netif_carrier_ok(dev)) + return -EBUSY; + + if (wdev->cac_started) + return -EBUSY; err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype); if (err < 0) - goto unlock; + return err; - if (err == 0) { - err = -EINVAL; - goto unlock; - } + if (err == 0) + return -EINVAL; - if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) { - err = -EINVAL; - goto unlock; - } - - if (nla_get_flag(info->attrs[NL80211_ATTR_RADAR_BACKGROUND])) { - err = cfg80211_start_background_radar_detection(rdev, wdev, - &chandef); - goto unlock; - } - - if (netif_carrier_ok(dev)) { - err = -EBUSY; - goto unlock; - } - - if (wdev->cac_started) { - err = -EBUSY; - goto unlock; - } + if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) + return -EINVAL; /* CAC start is offloaded to HW and can't be started manually */ - if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) { - err = -EOPNOTSUPP; - goto unlock; - } + if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) + return -EOPNOTSUPP; - if (!rdev->ops->start_radar_detection) { - err = -EOPNOTSUPP; - goto unlock; - } + if (!rdev->ops->start_radar_detection) + return -EOPNOTSUPP; cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef); if (WARN_ON(!cac_time_ms)) @@ -9347,9 +9122,6 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, wdev->cac_start_time = jiffies; wdev->cac_time_ms = cac_time_ms; } -unlock: - wiphy_unlock(wiphy); - return err; } @@ -9476,14 +9248,12 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon_after); if (err) - goto free; + return err; csa_attrs = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*csa_attrs), GFP_KERNEL); - if (!csa_attrs) { - err = -ENOMEM; - goto free; - } + if (!csa_attrs) + return -ENOMEM; err = nla_parse_nested_deprecated(csa_attrs, NL80211_ATTR_MAX, info->attrs[NL80211_ATTR_CSA_IES], @@ -9601,8 +9371,6 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) wdev_unlock(wdev); free: - kfree(params.beacon_after.mbssid_ies); - kfree(params.beacon_csa.mbssid_ies); kfree(csa_attrs); return err; } @@ -12003,9 +11771,8 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, if (n_thresholds) { struct cfg80211_cqm_config *cqm_config; - cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, - n_thresholds), - GFP_KERNEL); + cqm_config = kzalloc(sizeof(struct cfg80211_cqm_config) + + n_thresholds * sizeof(s32), GFP_KERNEL); if (!cqm_config) { err = -ENOMEM; goto unlock; @@ -12014,8 +11781,7 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, cqm_config->rssi_hyst = hysteresis; cqm_config->n_rssi_thresholds = n_thresholds; memcpy(cqm_config->rssi_thresholds, thresholds, - flex_array_size(cqm_config, rssi_thresholds, - n_thresholds)); + n_thresholds * sizeof(s32)); wdev->cqm_config = cqm_config; } @@ -13411,6 +13177,9 @@ static int handle_nan_filter(struct nlattr *attr_filter, i = 0; nla_for_each_nested(attr, attr_filter, rem) { filter[i].filter = nla_memdup(attr, GFP_KERNEL); + if (!filter[i].filter) + goto err; + filter[i].len = nla_len(attr); i++; } @@ -13423,6 +13192,15 @@ static int handle_nan_filter(struct nlattr *attr_filter, } return 0; + +err: + i = 0; + nla_for_each_nested(attr, attr_filter, rem) { + kfree(filter[i].filter); + i++; + } + kfree(filter); + return -ENOMEM; } static int nl80211_nan_add_func(struct sk_buff *skb, @@ -15138,35 +14916,10 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info) wdev_unlock(wdev); out: - kfree(params.beacon_next.mbssid_ies); - kfree(params.beacon_color_change.mbssid_ies); kfree(tb); return err; } -static int nl80211_set_fils_aad(struct sk_buff *skb, - struct genl_info *info) -{ - struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; - struct cfg80211_fils_aad fils_aad = {}; - u8 *nonces; - - if (!info->attrs[NL80211_ATTR_MAC] || - !info->attrs[NL80211_ATTR_FILS_KEK] || - !info->attrs[NL80211_ATTR_FILS_NONCES]) - return -EINVAL; - - fils_aad.macaddr = nla_data(info->attrs[NL80211_ATTR_MAC]); - fils_aad.kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]); - fils_aad.kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]); - nonces = nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]); - fils_aad.snonce = nonces; - fils_aad.anonce = nonces + FILS_NONCE_LEN; - - return rdev_set_fils_aad(rdev, dev, &fils_aad); -} - #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -15344,7 +15097,9 @@ static int nl80211_set_sar_specs(struct sk_buff *skb, struct genl_info *info) if (specs > rdev->wiphy.sar_capa->num_freq_ranges) return -EINVAL; - sar_spec = kzalloc(struct_size(sar_spec, sub_specs, specs), GFP_KERNEL); + sar_spec = kzalloc(sizeof(*sar_spec) + + specs * sizeof(struct cfg80211_sar_sub_specs), + GFP_KERNEL); if (!sar_spec) return -ENOMEM; @@ -15986,8 +15741,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_radar_detection, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NO_WIPHY_MTX, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, @@ -16169,13 +15923,6 @@ static const struct genl_small_ops nl80211_small_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, - { - .cmd = NL80211_CMD_SET_FILS_AAD, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = nl80211_set_fils_aad, - .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, - }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -17068,44 +16815,6 @@ static void nl80211_send_remain_on_chan_event( nlmsg_free(msg); } -void cfg80211_assoc_comeback(struct net_device *netdev, - struct cfg80211_bss *bss, u32 timeout) -{ - struct wireless_dev *wdev = netdev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - struct sk_buff *msg; - void *hdr; - - trace_cfg80211_assoc_comeback(wdev, bss->bssid, timeout); - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return; - - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ASSOC_COMEBACK); - if (!hdr) { - nlmsg_free(msg); - return; - } - - if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bss->bssid) || - nla_put_u32(msg, NL80211_ATTR_TIMEOUT, timeout)) - goto nla_put_failure; - - genlmsg_end(msg, hdr); - - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, - NL80211_MCGRP_MLME, GFP_KERNEL); - return; - - nla_put_failure: - nlmsg_free(msg); -} -EXPORT_SYMBOL(cfg80211_assoc_comeback); - void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, unsigned int duration, gfp_t gfp) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 439bcf5236..ce6bf218a1 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1381,35 +1381,4 @@ static inline int rdev_color_change(struct cfg80211_registered_device *rdev, return ret; } -static inline int -rdev_set_fils_aad(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct cfg80211_fils_aad *fils_aad) -{ - int ret = -EOPNOTSUPP; - - trace_rdev_set_fils_aad(&rdev->wiphy, dev, fils_aad); - if (rdev->ops->set_fils_aad) - ret = rdev->ops->set_fils_aad(&rdev->wiphy, dev, fils_aad); - trace_rdev_return_int(&rdev->wiphy, ret); - - return ret; -} - -static inline int -rdev_set_radar_background(struct cfg80211_registered_device *rdev, - struct cfg80211_chan_def *chandef) -{ - struct wiphy *wiphy = &rdev->wiphy; - int ret; - - if (!rdev->ops->set_radar_background) - return -EOPNOTSUPP; - - trace_rdev_set_radar_background(wiphy, chandef); - ret = rdev->ops->set_radar_background(wiphy, chandef); - trace_rdev_return_int(wiphy, ret); - - return ret; -} - #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index ec25924a1c..795e86b371 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -133,7 +133,6 @@ static u32 reg_is_indoor_portid; static void restore_regulatory_settings(bool reset_user, bool cached); static void print_regdomain(const struct ieee80211_regdomain *rd); -static void reg_process_hint(struct regulatory_request *reg_request); static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { @@ -1099,8 +1098,6 @@ int reg_reload_regdb(void) const struct firmware *fw; void *db; int err; - const struct ieee80211_regdomain *current_regdomain; - struct regulatory_request *request; err = request_firmware(&fw, "regulatory.db", ®_pdev->dev); if (err) @@ -1121,26 +1118,8 @@ int reg_reload_regdb(void) if (!IS_ERR_OR_NULL(regdb)) kfree(regdb); regdb = db; - - /* reset regulatory domain */ - current_regdomain = get_cfg80211_regdom(); - - request = kzalloc(sizeof(*request), GFP_KERNEL); - if (!request) { - err = -ENOMEM; - goto out_unlock; - } - - request->wiphy_idx = WIPHY_IDX_INVALID; - request->alpha2[0] = current_regdomain->alpha2[0]; - request->alpha2[1] = current_regdomain->alpha2[1]; - request->initiator = NL80211_REGDOM_SET_BY_CORE; - request->user_reg_hint_type = NL80211_USER_REG_HINT_USER; - - reg_process_hint(request); - -out_unlock: rtnl_unlock(); + out: release_firmware(fw); return err; @@ -2371,7 +2350,6 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_MESH_POINT: if (!wdev->beacon_interval) goto wdev_inactive_unlock; chandef = wdev->chandef; @@ -2410,7 +2388,6 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_MESH_POINT: wiphy_lock(wiphy); ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); wiphy_unlock(wiphy); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b888522f13..adc0d14cfd 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -383,7 +383,7 @@ static bool is_bss(struct cfg80211_bss *a, const u8 *bssid, const u8 *ssid, size_t ssid_len) { const struct cfg80211_bss_ies *ies; - const struct element *ssid_elem; + const u8 *ssidie; if (bssid && !ether_addr_equal(a->bssid, bssid)) return false; @@ -394,32 +394,34 @@ static bool is_bss(struct cfg80211_bss *a, const u8 *bssid, ies = rcu_access_pointer(a->ies); if (!ies) return false; - ssid_elem = cfg80211_find_elem(WLAN_EID_SSID, ies->data, ies->len); - if (!ssid_elem) + ssidie = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len); + if (!ssidie) return false; - if (ssid_elem->datalen != ssid_len) + if (ssidie[1] != ssid_len) return false; - return memcmp(ssid_elem->data, ssid, ssid_len) == 0; + return memcmp(ssidie + 2, ssid, ssid_len) == 0; } static int cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, struct cfg80211_bss *nontrans_bss) { - const struct element *ssid_elem; + const u8 *ssid; + size_t ssid_len; struct cfg80211_bss *bss = NULL; rcu_read_lock(); - ssid_elem = ieee80211_bss_get_elem(nontrans_bss, WLAN_EID_SSID); - if (!ssid_elem) { + ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID); + if (!ssid) { rcu_read_unlock(); return -EINVAL; } + ssid_len = ssid[1]; + ssid = ssid + 2; /* check if nontrans_bss is in the list */ list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) { - if (is_bss(bss, nontrans_bss->bssid, ssid_elem->data, - ssid_elem->datalen)) { + if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) { rcu_read_unlock(); return 0; } @@ -1792,76 +1794,44 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, return NULL; } -int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, - enum nl80211_band band, - enum cfg80211_bss_frame_type ftype) -{ - const struct element *tmp; - - if (band == NL80211_BAND_6GHZ) { - struct ieee80211_he_operation *he_oper; - - tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, - ielen); - if (tmp && tmp->datalen >= sizeof(*he_oper) && - tmp->datalen >= ieee80211_he_oper_size(&tmp->data[1])) { - const struct ieee80211_he_6ghz_oper *he_6ghz_oper; - - he_oper = (void *)&tmp->data[1]; - - he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper); - if (!he_6ghz_oper) - return -1; - - if (ftype != CFG80211_BSS_FTYPE_BEACON || - he_6ghz_oper->control & IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON) - return he_6ghz_oper->primary; - } - } else if (band == NL80211_BAND_S1GHZ) { - tmp = cfg80211_find_elem(WLAN_EID_S1G_OPERATION, ie, ielen); - if (tmp && tmp->datalen >= sizeof(struct ieee80211_s1g_oper_ie)) { - struct ieee80211_s1g_oper_ie *s1gop = (void *)tmp->data; - - return s1gop->primary_ch; - } - } else { - tmp = cfg80211_find_elem(WLAN_EID_DS_PARAMS, ie, ielen); - if (tmp && tmp->datalen == 1) - return tmp->data[0]; - - tmp = cfg80211_find_elem(WLAN_EID_HT_OPERATION, ie, ielen); - if (tmp && - tmp->datalen >= sizeof(struct ieee80211_ht_operation)) { - struct ieee80211_ht_operation *htop = (void *)tmp->data; - - return htop->primary_chan; - } - } - - return -1; -} -EXPORT_SYMBOL(cfg80211_get_ies_channel_number); - /* * Update RX channel information based on the available frame payload * information. This is mainly for the 2.4 GHz band where frames can be received * from neighboring channels and the Beacon frames use the DSSS Parameter Set * element to indicate the current (transmitting) channel, but this might also * be needed on other bands if RX frequency does not match with the actual - * operating channel of a BSS, or if the AP reports a different primary channel. + * operating channel of a BSS. */ static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, struct ieee80211_channel *channel, - enum nl80211_bss_scan_width scan_width, - enum cfg80211_bss_frame_type ftype) + enum nl80211_bss_scan_width scan_width) { + const u8 *tmp; u32 freq; - int channel_number; + int channel_number = -1; struct ieee80211_channel *alt_channel; - channel_number = cfg80211_get_ies_channel_number(ie, ielen, - channel->band, ftype); + if (channel->band == NL80211_BAND_S1GHZ) { + tmp = cfg80211_find_ie(WLAN_EID_S1G_OPERATION, ie, ielen); + if (tmp && tmp[1] >= sizeof(struct ieee80211_s1g_oper_ie)) { + struct ieee80211_s1g_oper_ie *s1gop = (void *)(tmp + 2); + + channel_number = s1gop->primary_ch; + } + } else { + tmp = cfg80211_find_ie(WLAN_EID_DS_PARAMS, ie, ielen); + if (tmp && tmp[1] == 1) { + channel_number = tmp[2]; + } else { + tmp = cfg80211_find_ie(WLAN_EID_HT_OPERATION, ie, ielen); + if (tmp && tmp[1] >= sizeof(struct ieee80211_ht_operation)) { + struct ieee80211_ht_operation *htop = (void *)(tmp + 2); + + channel_number = htop->primary_chan; + } + } + } if (channel_number < 0) { /* No channel information in frame payload */ @@ -1869,16 +1839,6 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, } freq = ieee80211_channel_to_freq_khz(channel_number, channel->band); - - /* - * In 6GHz, duplicated beacon indication is relevant for - * beacons only. - */ - if (channel->band == NL80211_BAND_6GHZ && - (freq == channel->center_freq || - abs(freq - channel->center_freq) > 80)) - return channel; - alt_channel = ieee80211_get_channel_khz(wiphy, freq); if (!alt_channel) { if (channel->band == NL80211_BAND_2GHZ) { @@ -1940,7 +1900,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, return NULL; channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan, - data->scan_width, ftype); + data->scan_width); if (!channel) return NULL; @@ -2115,12 +2075,12 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, if (!non_tx_data) return; - if (!cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) + if (!cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return; if (!wiphy->support_mbssid) return; if (wiphy->support_only_he_mbssid && - !cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen)) + !cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen)) return; new_ie = kmalloc(IEEE80211_MAX_DATA_LEN, gfp); @@ -2263,8 +2223,7 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, struct ieee80211_mgmt *mgmt, size_t len) { u8 *ie, *new_ie, *pos; - const struct element *nontrans_ssid; - const u8 *trans_ssid, *mbssid; + const u8 *nontrans_ssid, *trans_ssid, *mbssid; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); size_t new_ie_len; @@ -2291,11 +2250,11 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, return; new_ie_len -= mbssid[1]; - nontrans_ssid = ieee80211_bss_get_elem(nontrans_bss, WLAN_EID_SSID); + nontrans_ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID); if (!nontrans_ssid) return; - new_ie_len += nontrans_ssid->datalen; + new_ie_len += nontrans_ssid[1]; /* generate new ie for nontrans BSS * 1. replace SSID with nontrans BSS' SSID @@ -2312,7 +2271,7 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, pos = new_ie; /* copy the nontransmitted SSID */ - cpy_len = nontrans_ssid->datalen + 2; + cpy_len = nontrans_ssid[1] + 2; memcpy(pos, nontrans_ssid, cpy_len); pos += cpy_len; /* copy the IEs between SSID and MBSSID */ @@ -2363,7 +2322,6 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, size_t ielen, min_hdr_len = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); int bss_type; - enum cfg80211_bss_frame_type ftype; BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != offsetof(struct ieee80211_mgmt, u.beacon.variable)); @@ -2400,16 +2358,8 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, variable = ext->u.s1g_beacon.variable; } - if (ieee80211_is_beacon(mgmt->frame_control)) - ftype = CFG80211_BSS_FTYPE_BEACON; - else if (ieee80211_is_probe_resp(mgmt->frame_control)) - ftype = CFG80211_BSS_FTYPE_PRESP; - else - ftype = CFG80211_BSS_FTYPE_UNKNOWN; - channel = cfg80211_get_bss_channel(wiphy, variable, - ielen, data->chan, data->scan_width, - ftype); + ielen, data->chan, data->scan_width); if (!channel) return NULL; @@ -2497,10 +2447,10 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, len, gfp); if (!res || !wiphy->support_mbssid || - !cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) + !cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return res; if (wiphy->support_only_he_mbssid && - !cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen)) + !cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen)) return res; non_tx_data.tx_bss = res; @@ -2726,7 +2676,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, struct cfg80211_registered_device *rdev; struct wiphy *wiphy; struct iw_scan_req *wreq = NULL; - struct cfg80211_scan_request *creq; + struct cfg80211_scan_request *creq = NULL; int i, err, n_channels = 0; enum nl80211_band band; @@ -2741,8 +2691,10 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - if (rdev->scan_req || rdev->scan_msg) - return -EBUSY; + if (rdev->scan_req || rdev->scan_msg) { + err = -EBUSY; + goto out; + } wiphy = &rdev->wiphy; @@ -2755,8 +2707,10 @@ int cfg80211_wext_siwscan(struct net_device *dev, creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) + n_channels * sizeof(void *), GFP_ATOMIC); - if (!creq) - return -ENOMEM; + if (!creq) { + err = -ENOMEM; + goto out; + } creq->wiphy = wiphy; creq->wdev = dev->ieee80211_ptr; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index ff4d48fcbf..08a70b4f09 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -680,9 +680,7 @@ void __cfg80211_connect_result(struct net_device *dev, bool wextev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - const struct element *country_elem; - const u8 *country_data; - u8 country_datalen; + const u8 *country_ie; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -764,22 +762,26 @@ void __cfg80211_connect_result(struct net_device *dev, cfg80211_upload_connect_keys(wdev); rcu_read_lock(); - country_elem = ieee80211_bss_get_elem(cr->bss, WLAN_EID_COUNTRY); - if (!country_elem) { + country_ie = ieee80211_bss_get_ie(cr->bss, WLAN_EID_COUNTRY); + if (!country_ie) { rcu_read_unlock(); return; } - country_datalen = country_elem->datalen; - country_data = kmemdup(country_elem->data, country_datalen, GFP_ATOMIC); + country_ie = kmemdup(country_ie, 2 + country_ie[1], GFP_ATOMIC); rcu_read_unlock(); - if (!country_data) + if (!country_ie) return; + /* + * ieee80211_bss_get_ie() ensures we can access: + * - country_ie + 2, the start of the country ie data, and + * - and country_ie[1] which is the IE length + */ regulatory_hint_country_ie(wdev->wiphy, cr->bss->channel->band, - country_data, country_datalen); - kfree(country_data); + country_ie + 2, country_ie[1]); + kfree(country_ie); } /* Consumes bss object one way or another */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 228079d769..19b78d4722 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -167,19 +167,6 @@ __entry->center_freq1, __entry->freq1_offset, \ __entry->center_freq2 -#define FILS_AAD_ASSIGN(fa) \ - do { \ - if (fa) { \ - ether_addr_copy(__entry->macaddr, fa->macaddr); \ - __entry->kek_len = fa->kek_len; \ - } else { \ - eth_zero_addr(__entry->macaddr); \ - __entry->kek_len = 0; \ - } \ - } while (0) -#define FILS_AAD_PR_FMT \ - "macaddr: %pM, kek_len: %d" - #define SINFO_ENTRY __field(int, generation) \ __field(u32, connected_time) \ __field(u32, inactive_time) \ @@ -2627,24 +2614,6 @@ DEFINE_EVENT(wiphy_wdev_cookie_evt, rdev_abort_pmsr, TP_ARGS(wiphy, wdev, cookie) ); -TRACE_EVENT(rdev_set_fils_aad, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct cfg80211_fils_aad *fils_aad), - TP_ARGS(wiphy, netdev, fils_aad), - TP_STRUCT__entry(WIPHY_ENTRY - NETDEV_ENTRY - __array(u8, macaddr, ETH_ALEN) - __field(u8, kek_len) - ), - TP_fast_assign(WIPHY_ASSIGN; - NETDEV_ASSIGN; - FILS_AAD_ASSIGN(fils_aad); - ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " FILS_AAD_PR_FMT, - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr, - __entry->kek_len) -); - /************************************************************* * cfg80211 exported functions traces * *************************************************************/ @@ -3053,21 +3022,18 @@ TRACE_EVENT(cfg80211_ch_switch_started_notify, ); TRACE_EVENT(cfg80211_radar_event, - TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, - bool offchan), - TP_ARGS(wiphy, chandef, offchan), + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, chandef), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY - __field(bool, offchan) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); - __entry->offchan = offchan; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", offchan %d", - WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->offchan) + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) ); TRACE_EVENT(cfg80211_cac_event, @@ -3677,42 +3643,6 @@ TRACE_EVENT(cfg80211_bss_color_notify, __entry->color_bitmap) ); -TRACE_EVENT(rdev_set_radar_background, - TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), - - TP_ARGS(wiphy, chandef), - - TP_STRUCT__entry( - WIPHY_ENTRY - CHAN_DEF_ENTRY - ), - - TP_fast_assign( - WIPHY_ASSIGN; - CHAN_DEF_ASSIGN(chandef) - ), - - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, - WIPHY_PR_ARG, CHAN_DEF_PR_ARG) -); - -TRACE_EVENT(cfg80211_assoc_comeback, - TP_PROTO(struct wireless_dev *wdev, const u8 *bssid, u32 timeout), - TP_ARGS(wdev, bssid, timeout), - TP_STRUCT__entry( - WDEV_ENTRY - MAC_ENTRY(bssid) - __field(u32, timeout) - ), - TP_fast_assign( - WDEV_ASSIGN; - MAC_ASSIGN(bssid, bssid); - __entry->timeout = timeout; - ), - TP_printk(WDEV_PR_FMT ", " MAC_PR_FMT ", timeout: %u TUs", - WDEV_PR_ARG, MAC_PR_ARG(bssid), __entry->timeout) -); - #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 41ea65deb6..4ddc269164 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -80,7 +80,6 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band) return 0; /* not supported */ switch (band) { case NL80211_BAND_2GHZ: - case NL80211_BAND_LC: if (chan == 14) return MHZ_TO_KHZ(2484); else if (chan < 14) @@ -210,7 +209,6 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband) WARN_ON(want); break; case NL80211_BAND_2GHZ: - case NL80211_BAND_LC: want = 7; for (i = 0; i < sband->n_bitrates; i++) { switch (sband->bitrates[i].bitrate) { diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index cd09a90422..193a18a531 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -212,18 +212,18 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, wdev_lock(wdev); if (wdev->current_bss) { - const struct element *ssid_elem; + const u8 *ie; rcu_read_lock(); - ssid_elem = ieee80211_bss_get_elem(&wdev->current_bss->pub, - WLAN_EID_SSID); - if (ssid_elem) { + ie = ieee80211_bss_get_ie(&wdev->current_bss->pub, + WLAN_EID_SSID); + if (ie) { data->flags = 1; - data->length = ssid_elem->datalen; + data->length = ie[1]; if (data->length > IW_ESSID_MAX_SIZE) ret = -EINVAL; else - memcpy(ssid, ssid_elem->data, data->length); + memcpy(ssid, ie + 2, data->length); } rcu_read_unlock(); } else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) { diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index b981a4828d..e1c4197af4 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -41,7 +41,7 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) return 0; } - if (x25->fraglen > 0) { /* End of fragment */ + if (!more && x25->fraglen > 0) { /* End of fragment */ int len = x25->fraglen + skb->len; if ((skbn = alloc_skb(len, GFP_ATOMIC)) == NULL){ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 28ef3f4465..d6b500dc42 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -134,6 +134,21 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, return 0; } +void xp_release(struct xdp_buff_xsk *xskb) +{ + xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; +} + +static u64 xp_get_handle(struct xdp_buff_xsk *xskb) +{ + u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; + + offset += xskb->pool->headroom; + if (!xskb->pool->unaligned) + return xskb->orig_addr + offset; + return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); +} + static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); @@ -794,7 +809,9 @@ static int xsk_release(struct socket *sock) sk_del_node_init_rcu(sk); mutex_unlock(&net->xdp.lock); + local_bh_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); + local_bh_enable(); xsk_delete_from_maps(xs); mutex_lock(&xs->mutex); @@ -1394,7 +1411,9 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, sk_add_node_rcu(sk, &net->xdp.list); mutex_unlock(&net->xdp.lock); + local_bh_disable(); sock_prot_inuse_add(net, &xsk_proto, 1); + local_bh_enable(); return 0; } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index fd39bb660e..8de01aaac4 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -44,13 +44,12 @@ void xp_destroy(struct xsk_buff_pool *pool) struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem) { - bool unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; struct xsk_buff_pool *pool; struct xdp_buff_xsk *xskb; - u32 i, entries; + u32 i; - entries = unaligned ? umem->chunks : 0; - pool = kvzalloc(struct_size(pool, free_heads, entries), GFP_KERNEL); + pool = kvzalloc(struct_size(pool, free_heads, umem->chunks), + GFP_KERNEL); if (!pool) goto out; @@ -64,8 +63,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, pool->free_heads_cnt = umem->chunks; pool->headroom = umem->headroom; pool->chunk_size = umem->chunk_size; - pool->chunk_shift = ffs(umem->chunk_size) - 1; - pool->unaligned = unaligned; + pool->unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; pool->frame_len = umem->chunk_size - umem->headroom - XDP_PACKET_HEADROOM; pool->umem = umem; @@ -83,11 +81,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, xskb = &pool->heads[i]; xskb->pool = pool; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; - INIT_LIST_HEAD(&xskb->free_list_node); - if (pool->unaligned) - pool->free_heads[i] = xskb; - else - xp_init_xskb_addr(xskb, pool, i * pool->chunk_size); + pool->free_heads[i] = xskb; } return pool; @@ -412,12 +406,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, if (pool->unaligned) xp_check_dma_contiguity(dma_map); - else - for (i = 0; i < pool->heads_cnt; i++) { - struct xdp_buff_xsk *xskb = &pool->heads[i]; - - xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); - } err = xp_init_dma_info(pool, dma_map); if (err) { @@ -460,9 +448,12 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) if (pool->free_heads_cnt == 0) return NULL; + xskb = pool->free_heads[--pool->free_heads_cnt]; + for (;;) { if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) { pool->fq->queue_empty_descs++; + xp_release(xskb); return NULL; } @@ -475,17 +466,17 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) } break; } - - if (pool->unaligned) { - xskb = pool->free_heads[--pool->free_heads_cnt]; - xp_init_xskb_addr(xskb, pool, addr); - if (pool->dma_pages_cnt) - xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); - } else { - xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; - } - xskq_cons_release(pool->fq); + + xskb->orig_addr = addr; + xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom; + if (pool->dma_pages_cnt) { + xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] & + ~XSK_NEXT_PG_CONTIG_MASK) + + (addr & ~PAGE_MASK); + xskb->dma = xskb->frame_dma + pool->headroom + + XDP_PACKET_HEADROOM; + } return xskb; } @@ -501,7 +492,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) pool->free_list_cnt--; xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node); - list_del_init(&xskb->free_list_node); + list_del(&xskb->free_list_node); } xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; @@ -516,96 +507,6 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) } EXPORT_SYMBOL(xp_alloc); -static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) -{ - u32 i, cached_cons, nb_entries; - - if (max > pool->free_heads_cnt) - max = pool->free_heads_cnt; - max = xskq_cons_nb_entries(pool->fq, max); - - cached_cons = pool->fq->cached_cons; - nb_entries = max; - i = max; - while (i--) { - struct xdp_buff_xsk *xskb; - u64 addr; - bool ok; - - __xskq_cons_read_addr_unchecked(pool->fq, cached_cons++, &addr); - - ok = pool->unaligned ? xp_check_unaligned(pool, &addr) : - xp_check_aligned(pool, &addr); - if (unlikely(!ok)) { - pool->fq->invalid_descs++; - nb_entries--; - continue; - } - - if (pool->unaligned) { - xskb = pool->free_heads[--pool->free_heads_cnt]; - xp_init_xskb_addr(xskb, pool, addr); - if (pool->dma_pages_cnt) - xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); - } else { - xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; - } - - *xdp = &xskb->xdp; - xdp++; - } - - xskq_cons_release_n(pool->fq, max); - return nb_entries; -} - -static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 nb_entries) -{ - struct xdp_buff_xsk *xskb; - u32 i; - - nb_entries = min_t(u32, nb_entries, pool->free_list_cnt); - - i = nb_entries; - while (i--) { - xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node); - list_del_init(&xskb->free_list_node); - - *xdp = &xskb->xdp; - xdp++; - } - pool->free_list_cnt -= nb_entries; - - return nb_entries; -} - -u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) -{ - u32 nb_entries1 = 0, nb_entries2; - - if (unlikely(pool->dma_need_sync)) { - /* Slow path */ - *xdp = xp_alloc(pool); - return !!*xdp; - } - - if (unlikely(pool->free_list_cnt)) { - nb_entries1 = xp_alloc_reused(pool, xdp, max); - if (nb_entries1 == max) - return nb_entries1; - - max -= nb_entries1; - xdp += nb_entries1; - } - - nb_entries2 = xp_alloc_new_from_fq(pool, xdp, max); - if (!nb_entries2) - pool->fq->queue_empty_descs++; - - return nb_entries1 + nb_entries2; -} -EXPORT_SYMBOL(xp_alloc_batch); - bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count) { if (pool->free_list_cnt >= count) @@ -616,9 +517,6 @@ EXPORT_SYMBOL(xp_can_alloc); void xp_free(struct xdp_buff_xsk *xskb) { - if (!list_empty(&xskb->free_list_node)) - return; - xskb->pool->free_list_cnt++; list_add(&xskb->free_list_node, &xskb->pool->free_list); } diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index e9aa2c2363..9ae13cccfb 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -111,18 +111,14 @@ struct xsk_queue { /* Functions that read and validate content from consumer rings. */ -static inline void __xskq_cons_read_addr_unchecked(struct xsk_queue *q, u32 cached_cons, u64 *addr) -{ - struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; - u32 idx = cached_cons & q->ring_mask; - - *addr = ring->desc[idx]; -} - static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) { + struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; + if (q->cached_cons != q->cached_prod) { - __xskq_cons_read_addr_unchecked(q, q->cached_cons, addr); + u32 idx = q->cached_cons & q->ring_mask; + + *addr = ring->desc[idx]; return true; } diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 65b53fb3de..2e48d0e094 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -4,7 +4,6 @@ */ #include -#include #include #include #include diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 094734fbec..4dae3ab8d0 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -341,26 +341,6 @@ static struct xfrm_algo_desc aalg_list[] = { .pfkey_supported = 0, }, -{ - .name = "hmac(sm3)", - .compat = "sm3", - - .uinfo = { - .auth = { - .icv_truncbits = 256, - .icv_fullbits = 256, - } - }, - - .pfkey_supported = 1, - - .desc = { - .sadb_alg_id = SADB_X_AALG_SM3_256HMAC, - .sadb_alg_ivlen = 0, - .sadb_alg_minbits = 256, - .sadb_alg_maxbits = 256 - } -}, }; static struct xfrm_algo_desc ealg_list[] = { @@ -572,27 +552,6 @@ static struct xfrm_algo_desc ealg_list[] = { .sadb_alg_maxbits = 288 } }, -{ - .name = "cbc(sm4)", - .compat = "sm4", - - .uinfo = { - .encr = { - .geniv = "echainiv", - .blockbits = 128, - .defkeybits = 128, - } - }, - - .pfkey_supported = 1, - - .desc = { - .sadb_alg_id = SADB_X_EALG_SM4CBC, - .sadb_alg_ivlen = 16, - .sadb_alg_minbits = 128, - .sadb_alg_maxbits = 256 - } -}, }; static struct xfrm_algo_desc calg_list[] = { diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 3fa066419d..c255aac6b8 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -223,6 +223,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, if (x->encap || x->tfcpad) return -EINVAL; + if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) + return -EINVAL; + dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { @@ -259,10 +262,10 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, } xso->dev = dev; - netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC); xso->real_dev = dev; xso->num_exthdrs = 1; - xso->flags = xuo->flags; + /* Don't forward bit that is not implemented */ + xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6; err = dev->xfrmdev_ops->xdo_dev_state_add(x); if (err) { @@ -270,7 +273,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->flags = 0; xso->dev = NULL; xso->real_dev = NULL; - dev_put_track(dev, &xso->dev_tracker); + dev_put(dev); if (err != -EOPNOTSUPP) return err; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 144238a50f..3df0861d43 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -530,7 +530,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - if (xfrm_parse_spi(skb, nexthdr, &spi, &seq)) { + if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } @@ -560,7 +560,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) } seq = 0; - if (!spi && xfrm_parse_spi(skb, nexthdr, &spi, &seq)) { + if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { secpath_reset(skb); XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; @@ -669,7 +669,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) x->curlft.bytes += skb->len; x->curlft.packets++; - x->curlft.use_time = ktime_get_real_seconds(); spin_unlock(&x->lock); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index d4935b3b99..4dc4a7bbe5 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -533,7 +533,6 @@ static int xfrm_output_one(struct sk_buff *skb, int err) x->curlft.bytes += skb->len; x->curlft.packets++; - x->curlft.use_time = ktime_get_real_seconds(); spin_unlock_bh(&x->lock); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 04d1ce9b51..37b149f632 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2488,7 +2488,9 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0); if (likely(xdst)) { - memset_after(xdst, 0, u.dst); + struct dst_entry *dst = &xdst->u.dst; + + memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); } else xdst = ERR_PTR(-ENOBUFS); @@ -2682,7 +2684,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, *num_xfrms = pols[0]->xfrm_nr; #ifdef CONFIG_XFRM_SUB_POLICY - if (pols[0]->action == XFRM_POLICY_ALLOW && + if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && pols[0]->type != XFRM_POLICY_TYPE_MAIN) { pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), XFRM_POLICY_TYPE_MAIN, @@ -3394,6 +3396,7 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) case NEXTHDR_DEST: offset += ipv6_optlen(exthdr); nexthdr = exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr *)(nh + offset); break; case IPPROTO_UDP: case IPPROTO_UDPLITE: diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ca6bee1834..100b4b3723 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -14,7 +14,6 @@ * */ -#include #include #include #include @@ -2572,7 +2571,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); -u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) +u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) { const struct xfrm_type *type = READ_ONCE(x->type); struct crypto_aead *aead; @@ -2603,17 +2602,7 @@ u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - net_adj) & ~(blksize - 1)) + net_adj - 2; } -EXPORT_SYMBOL_GPL(__xfrm_state_mtu); - -u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) -{ - mtu = __xfrm_state_mtu(x, mtu); - - if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU) - return IPV6_MIN_MTU; - - return mtu; -} +EXPORT_SYMBOL_GPL(xfrm_state_mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8cd6c81290..b10f88822c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -11,7 +11,6 @@ * */ -#include #include #include #include @@ -2984,7 +2983,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; /* clear the padding bytes */ - memset_after(ue, 0, hard); + memset(&ue->hard + 1, 0, sizeof(*ue) - offsetofend(typeof(*ue), hard)); err = xfrm_mark_put(skb, &x->mark); if (err) diff --git a/samples/Kconfig b/samples/Kconfig index 22cc921ae2..b0503ef058 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -26,20 +26,11 @@ config SAMPLE_TRACE_PRINTK config SAMPLE_FTRACE_DIRECT tristate "Build register_ftrace_direct() example" depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m - depends on HAVE_SAMPLE_FTRACE_DIRECT + depends on X86_64 # has x86_64 inlined asm help This builds an ftrace direct function example that hooks to wake_up_process and prints the parameters. -config SAMPLE_FTRACE_DIRECT_MULTI - tristate "Build register_ftrace_direct_multi() example" - depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m - depends on HAVE_SAMPLE_FTRACE_DIRECT_MULTI - help - This builds an ftrace direct function example - that hooks to wake_up_process and schedule, and prints - the function addresses. - config SAMPLE_TRACE_ARRAY tristate "Build sample module for kernel access to Ftrace instancess" depends on EVENT_TRACING && m @@ -129,15 +120,6 @@ config SAMPLE_CONNECTOR with it. See also Documentation/driver-api/connector.rst -config SAMPLE_FANOTIFY_ERROR - bool "Build fanotify error monitoring sample" - depends on FANOTIFY && CC_CAN_LINK && HEADERS_INSTALL - help - When enabled, this builds an example code that uses the - FAN_FS_ERROR fanotify mechanism to monitor filesystem - errors. - See also Documentation/admin-guide/filesystem-monitoring.rst. - config SAMPLE_HIDRAW bool "hidraw sample" depends on CC_CAN_LINK && HEADERS_INSTALL @@ -241,19 +223,4 @@ config SAMPLE_WATCH_QUEUE Build example userspace program to use the new mount_notify(), sb_notify() syscalls and the KEYCTL_WATCH_KEY keyctl() function. -config SAMPLE_CORESIGHT_SYSCFG - tristate "Build example loadable module for CoreSight config" - depends on CORESIGHT && m - help - Build an example loadable module that adds new CoreSight features - and configuration using the CoreSight system configuration API. - This demonstrates how a user may create their own CoreSight - configurations and easily load them into the system at runtime. - endif # SAMPLES - -config HAVE_SAMPLE_FTRACE_DIRECT - bool - -config HAVE_SAMPLE_FTRACE_DIRECT_MULTI - bool diff --git a/samples/Makefile b/samples/Makefile index 1ae4de99c9..087e0988cc 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -5,7 +5,6 @@ subdir-$(CONFIG_SAMPLE_AUXDISPLAY) += auxdisplay subdir-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs/ obj-$(CONFIG_SAMPLE_CONNECTOR) += connector/ -obj-$(CONFIG_SAMPLE_FANOTIFY_ERROR) += fanotify/ subdir-$(CONFIG_SAMPLE_HIDRAW) += hidraw obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += hw_breakpoint/ obj-$(CONFIG_SAMPLE_KDB) += kdb/ @@ -22,7 +21,6 @@ subdir-$(CONFIG_SAMPLE_TIMER) += timers obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace_events/ obj-$(CONFIG_SAMPLE_TRACE_PRINTK) += trace_printk/ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace/ -obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace/ obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += ftrace/ subdir-$(CONFIG_SAMPLE_UHID) += uhid obj-$(CONFIG_VIDEO_PCI_SKELETON) += v4l/ @@ -32,4 +30,3 @@ obj-$(CONFIG_SAMPLE_INTEL_MEI) += mei/ subdir-$(CONFIG_SAMPLE_WATCHDOG) += watchdog subdir-$(CONFIG_SAMPLE_WATCH_QUEUE) += watch_queue obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak/ -obj-$(CONFIG_SAMPLE_CORESIGHT_SYSCFG) += coresight/ diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 0e7bfdbff8..fcba217f0a 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -57,7 +57,3 @@ testfile.img hbm_out.log iperf.* *.out -*.skel.h -/vmlinux.h -/bpftool/ -/libbpf/ diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 38638845db..c6e38e43c3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -3,8 +3,6 @@ BPF_SAMPLES_PATH ?= $(abspath $(srctree)/$(src)) TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools -pound := \# - # List of programs to build tprogs-y := test_lru_dist tprogs-y += sock_example @@ -234,7 +232,6 @@ CLANG ?= clang OPT ?= opt LLVM_DIS ?= llvm-dis LLVM_OBJCOPY ?= llvm-objcopy -LLVM_READELF ?= llvm-readelf BTF_PAHOLE ?= pahole # Detect that we're cross compiling and use the cross compiler @@ -244,7 +241,7 @@ endif # Don't evaluate probes and warnings if we need to run make recursively ifneq ($(src),) -HDR_PROBE := $(shell printf "$(pound)include \n struct list_head { int a; }; int main() { return 0; }" | \ +HDR_PROBE := $(shell printf "\#include \n struct list_head { int a; }; int main() { return 0; }" | \ $(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \ -o /dev/null 2>/dev/null && echo okay) @@ -258,7 +255,7 @@ BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ - $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \ + readelf -S ./llvm_btf_verify.o | grep BTF; \ /bin/rm -f ./llvm_btf_verify.o) BPF_EXTRA_CFLAGS += -fno-stack-protector @@ -282,7 +279,7 @@ clean: @find $(CURDIR) -type f -name '*~' -delete @$(RM) -r $(CURDIR)/libbpf $(CURDIR)/bpftool -$(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) +$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) # Fix up variables inherited from Kbuild that tools/ build system won't like $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ \ diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c index f0df3dda4b..54958802c0 100644 --- a/samples/bpf/cookie_uid_helper_example.c +++ b/samples/bpf/cookie_uid_helper_example.c @@ -67,8 +67,8 @@ static bool test_finish; static void maps_create(void) { - map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(uint32_t), - sizeof(struct stats), 100, NULL); + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t), + sizeof(struct stats), 100, 0); if (map_fd < 0) error(1, errno, "map create failed!\n"); } @@ -157,13 +157,9 @@ static void prog_load(void) offsetof(struct __sk_buff, len)), BPF_EXIT_INSN(), }; - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = log_buf, - .log_size = sizeof(log_buf), - ); - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - prog, ARRAY_SIZE(prog), &opts); + prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + ARRAY_SIZE(prog), "GPL", 0, + log_buf, sizeof(log_buf)); if (prog_fd < 0) error(1, errno, "failed to load prog\n%s\n", log_buf); } diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index 16dbf49e0f..59f45fef51 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -46,6 +46,12 @@ static void usage(void) printf(" -h Display this help.\n"); } +static int bpf_map_create(void) +{ + return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t), + sizeof(uint32_t), 1024, 0); +} + static int bpf_prog_create(const char *object) { static struct bpf_insn insns[] = { @@ -54,22 +60,16 @@ static int bpf_prog_create(const char *object) }; size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn); struct bpf_object *obj; - int err; + int prog_fd; if (object) { - obj = bpf_object__open_file(object, NULL); - assert(!libbpf_get_error(obj)); - err = bpf_object__load(obj); - assert(!err); - return bpf_program__fd(bpf_object__next_program(obj, NULL)); + assert(!bpf_prog_load(object, BPF_PROG_TYPE_UNSPEC, + &obj, &prog_fd)); + return prog_fd; } else { - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = bpf_log_buf, - .log_size = BPF_LOG_BUF_SIZE, - ); - - return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - insns, insns_cnt, &opts); + return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, + insns, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); } } @@ -79,8 +79,7 @@ static int bpf_do_map(const char *file, uint32_t flags, uint32_t key, int fd, ret; if (flags & BPF_F_PIN) { - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(uint32_t), - sizeof(uint32_t), 1024, NULL); + fd = bpf_map_create(); printf("bpf: map fd:%d (%s)\n", fd, strerror(errno)); assert(fd > 0); diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index 1fe5bcafb3..b0c18efe79 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -120,9 +120,6 @@ static void do_error(char *msg, bool errno_flag) static int prog_load(char *prog) { - struct bpf_program *pos; - const char *sec_name; - obj = bpf_object__open_file(prog, NULL); if (libbpf_get_error(obj)) { printf("ERROR: opening BPF object file failed\n"); @@ -135,13 +132,7 @@ static int prog_load(char *prog) goto err; } - bpf_object__for_each_program(pos, obj) { - sec_name = bpf_program__section_name(pos); - if (sec_name && !strcmp(sec_name, "cgroup_skb/egress")) { - bpf_prog = pos; - break; - } - } + bpf_prog = bpf_object__find_program_by_title(obj, "cgroup_skb/egress"); if (!bpf_prog) { printf("ERROR: finding a prog in obj file failed\n"); goto err; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 319fd31522..9db949290a 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -134,22 +134,19 @@ static void do_test_lru(enum test_type test, int cpu) */ int outer_fd = map_fd[array_of_lru_hashs_idx]; unsigned int mycpu, mynode; - LIBBPF_OPTS(bpf_map_create_opts, opts, - .map_flags = BPF_F_NUMA_NODE, - ); assert(cpu < MAX_NR_CPUS); ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL); assert(!ret); - opts.numa_node = mynode; inner_lru_map_fds[cpu] = - bpf_map_create(BPF_MAP_TYPE_LRU_HASH, - test_map_names[INNER_LRU_HASH_PREALLOC], - sizeof(uint32_t), - sizeof(long), - inner_lru_hash_size, &opts); + bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH, + test_map_names[INNER_LRU_HASH_PREALLOC], + sizeof(uint32_t), + sizeof(long), + inner_lru_hash_size, 0, + mynode); if (inner_lru_map_fds[cpu] == -1) { printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n", strerror(errno), errno); diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c index eb4d94742e..4866afd054 100644 --- a/samples/bpf/offwaketime_kern.c +++ b/samples/bpf/offwaketime_kern.c @@ -113,11 +113,11 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta) /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; - char prev_comm[TASK_COMM_LEN]; + char prev_comm[16]; int prev_pid; int prev_prio; long long prev_state; - char next_comm[TASK_COMM_LEN]; + char next_comm[16]; int next_pid; int next_prio; }; diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index a88f69504c..23d1930e19 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -37,8 +37,8 @@ static int test_sock(void) int sock = -1, map_fd, prog_fd, i, key; long long value = 0, tcp_cnt, udp_cnt, icmp_cnt; - map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(key), sizeof(value), - 256, NULL); + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), + 256, 0); if (map_fd < 0) { printf("failed to create map '%s'\n", strerror(errno)); goto cleanup; @@ -59,13 +59,9 @@ static int test_sock(void) BPF_EXIT_INSN(), }; size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = bpf_log_buf, - .log_size = BPF_LOG_BUF_SIZE, - ); - prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - prog, insns_cnt, &opts); + prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, insns_cnt, + "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); if (prog_fd < 0) { printf("failed to load prog '%s'\n", strerror(errno)); goto cleanup; diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c index 9e8d39e245..3c83722877 100644 --- a/samples/bpf/sockex1_user.c +++ b/samples/bpf/sockex1_user.c @@ -11,26 +11,17 @@ int main(int ac, char **argv) { struct bpf_object *obj; - struct bpf_program *prog; int map_fd, prog_fd; char filename[256]; - int i, sock, err; + int i, sock; FILE *f; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) + if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER, + &obj, &prog_fd)) return 1; - prog = bpf_object__next_program(obj, NULL); - bpf_program__set_type(prog, BPF_PROG_TYPE_SOCKET_FILTER); - - err = bpf_object__load(obj); - if (err) - return 1; - - prog_fd = bpf_program__fd(prog); map_fd = bpf_object__find_map_fd_by_name(obj, "my_map"); sock = open_raw_sock("lo"); diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 6a3fd369d3..bafa567b84 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -16,26 +16,18 @@ struct pair { int main(int ac, char **argv) { - struct bpf_program *prog; struct bpf_object *obj; int map_fd, prog_fd; char filename[256]; - int i, sock, err; + int i, sock; FILE *f; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) + + if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER, + &obj, &prog_fd)) return 1; - prog = bpf_object__next_program(obj, NULL); - bpf_program__set_type(prog, BPF_PROG_TYPE_SOCKET_FILTER); - - err = bpf_object__load(obj); - if (err) - return 1; - - prog_fd = bpf_program__fd(prog); map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map"); sock = open_raw_sock("lo"); diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c index 05e88aa630..6d564aa754 100644 --- a/samples/bpf/test_cgrp2_array_pin.c +++ b/samples/bpf/test_cgrp2_array_pin.c @@ -64,9 +64,9 @@ int main(int argc, char **argv) } if (create_array) { - array_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_ARRAY, NULL, + array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY, sizeof(uint32_t), sizeof(uint32_t), - 1, NULL); + 1, 0); if (array_fd < 0) { fprintf(stderr, "bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n", diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c index 6d90874b09..390ff38d2a 100644 --- a/samples/bpf/test_cgrp2_attach.c +++ b/samples/bpf/test_cgrp2_attach.c @@ -71,13 +71,10 @@ static int prog_load(int map_fd, int verdict) BPF_EXIT_INSN(), }; size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = bpf_log_buf, - .log_size = BPF_LOG_BUF_SIZE, - ); - return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, NULL, "GPL", - prog, insns_cnt, &opts); + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); } static int usage(const char *argv0) @@ -93,9 +90,9 @@ static int attach_filter(int cg_fd, int type, int verdict) int prog_fd, map_fd, ret, key; long long pkt_cnt, byte_cnt; - map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(byte_cnt), - 256, NULL); + 256, 0); if (map_fd < 0) { printf("Failed to create map: '%s'\n", strerror(errno)); return EXIT_FAILURE; diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index a0811df888..b0811da5a0 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -70,10 +70,6 @@ static int prog_load(__u32 idx, __u32 mark, __u32 prio) BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)), }; - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = bpf_log_buf, - .log_size = BPF_LOG_BUF_SIZE, - ); struct bpf_insn *prog; size_t insns_cnt; @@ -119,8 +115,8 @@ static int prog_load(__u32 idx, __u32 mark, __u32 prio) insns_cnt /= sizeof(struct bpf_insn); - ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", - prog, insns_cnt, &opts); + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, + "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); free(prog); diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index 75e8778535..c92c5c06b9 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -105,10 +105,10 @@ struct pfect_lru { static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size, unsigned int nr_possible_elems) { - lru->map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, + lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(unsigned long long), sizeof(struct pfect_lru_node *), - nr_possible_elems, NULL); + nr_possible_elems, 0); assert(lru->map_fd != -1); lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node)); @@ -207,13 +207,10 @@ static unsigned int read_keys(const char *dist_file, static int create_map(int map_type, int map_flags, unsigned int size) { - LIBBPF_OPTS(bpf_map_create_opts, opts, - .map_flags = map_flags, - ); int map_fd; - map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long), - sizeof(unsigned long long), size, &opts); + map_fd = bpf_create_map(map_type, sizeof(unsigned long long), + sizeof(unsigned long long), size, map_flags); if (map_fd == -1) perror("bpf_create_map"); diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c index 8fdd2c9c56..f6d593e470 100644 --- a/samples/bpf/test_overhead_kprobe_kern.c +++ b/samples/bpf/test_overhead_kprobe_kern.c @@ -6,7 +6,6 @@ */ #include #include -#include #include #include #include @@ -23,17 +22,17 @@ int prog(struct pt_regs *ctx) { struct signal_struct *signal; struct task_struct *tsk; - char oldcomm[TASK_COMM_LEN] = {}; - char newcomm[TASK_COMM_LEN] = {}; + char oldcomm[16] = {}; + char newcomm[16] = {}; u16 oom_score_adj; u32 pid; tsk = (void *)PT_REGS_PARM1(ctx); pid = _(tsk->pid); - bpf_probe_read_kernel_str(oldcomm, sizeof(oldcomm), &tsk->comm); - bpf_probe_read_kernel_str(newcomm, sizeof(newcomm), - (void *)PT_REGS_PARM2(ctx)); + bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm); + bpf_probe_read_kernel(newcomm, sizeof(newcomm), + (void *)PT_REGS_PARM2(ctx)); signal = _(tsk->signal); oom_score_adj = _(signal->oom_score_adj); return 0; diff --git a/samples/bpf/test_overhead_tp_kern.c b/samples/bpf/test_overhead_tp_kern.c index 80edadacb6..eaa32693f8 100644 --- a/samples/bpf/test_overhead_tp_kern.c +++ b/samples/bpf/test_overhead_tp_kern.c @@ -4,7 +4,6 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include #include #include @@ -12,8 +11,8 @@ struct task_rename { __u64 pad; __u32 pid; - char oldcomm[TASK_COMM_LEN]; - char newcomm[TASK_COMM_LEN]; + char oldcomm[16]; + char newcomm[16]; __u16 oom_score_adj; }; SEC("tracepoint/task/task_rename") diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index 371732f9cf..364b98764d 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -43,6 +43,7 @@ static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size) int main(int argc, char **argv) { + struct perf_buffer_opts pb_opts = {}; struct bpf_link *link = NULL; struct bpf_program *prog; struct perf_buffer *pb; @@ -83,7 +84,8 @@ int main(int argc, char **argv) goto cleanup; } - pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL); + pb_opts.sample_cb = print_bpf_output; + pb = perf_buffer__new(map_fd, 8, &pb_opts); ret = libbpf_get_error(pb); if (ret) { printf("failed to setup perf_buffer: %d\n", ret); diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 8675fa5273..116e39f6b6 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -128,7 +128,7 @@ int main(int argc, char **argv) if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - map = bpf_object__next_map(obj, NULL); + map = bpf_map__next(NULL, obj); if (!map) { printf("finding a map in obj file failed\n"); return 1; diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index 4ad896782f..00061261a8 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -79,9 +79,7 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; const char *prog_name = "xdp_fwd"; - struct bpf_program *prog = NULL; - struct bpf_program *pos; - const char *sec_name; + struct bpf_program *prog; int prog_fd, map_fd = -1; char filename[PATH_MAX]; struct bpf_object *obj; @@ -136,13 +134,7 @@ int main(int argc, char **argv) return 1; } - bpf_object__for_each_program(pos, obj) { - sec_name = bpf_program__section_name(pos); - if (sec_name && !strcmp(sec_name, prog_name)) { - prog = pos; - break; - } - } + prog = bpf_object__find_program_by_title(obj, prog_name); prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { printf("program not found: %s\n", strerror(prog_fd)); diff --git a/samples/bpf/xdp_redirect_cpu.bpf.c b/samples/bpf/xdp_redirect_cpu.bpf.c index 25e3a40537..f10fe3cf25 100644 --- a/samples/bpf/xdp_redirect_cpu.bpf.c +++ b/samples/bpf/xdp_redirect_cpu.bpf.c @@ -100,6 +100,7 @@ u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) void *data = (void *)(long)ctx->data; struct iphdr *iph = data + nh_off; struct udphdr *udph; + u16 dport; if (iph + 1 > data_end) return 0; @@ -110,7 +111,8 @@ u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) if (udph + 1 > data_end) return 0; - return bpf_ntohs(udph->dest); + dport = bpf_ntohs(udph->dest); + return dport; } static __always_inline diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index cfaf7e50e4..b5f03cb17a 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -155,7 +155,7 @@ static void read_route(struct nlmsghdr *nh, int nll) printf("%d\n", nh->nlmsg_type); memset(&route, 0, sizeof(route)); - printf("Destination Gateway Genmask Metric Iface\n"); + printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n"); for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { rt_msg = (struct rtmsg *)NLMSG_DATA(nh); rtm_family = rt_msg->rtm_family; @@ -207,7 +207,6 @@ static void read_route(struct nlmsghdr *nh, int nll) int metric; __be32 gw; } *prefix_value; - struct in_addr dst_addr, gw_addr, mask_addr; prefix_key = alloca(sizeof(*prefix_key) + 3); prefix_value = alloca(sizeof(*prefix_value)); @@ -235,17 +234,14 @@ static void read_route(struct nlmsghdr *nh, int nll) for (i = 0; i < 4; i++) prefix_key->data[i] = (route.dst >> i * 8) & 0xff; - dst_addr.s_addr = route.dst; - printf("%-16s", inet_ntoa(dst_addr)); - - gw_addr.s_addr = route.gw; - printf("%-16s", inet_ntoa(gw_addr)); - - mask_addr.s_addr = htonl(~(0xffffffffU >> route.dst_len)); - printf("%-16s%-7d%s\n", inet_ntoa(mask_addr), + printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n", + (int)prefix_key->data[0], + (int)prefix_key->data[1], + (int)prefix_key->data[2], + (int)prefix_key->data[3], + route.gw, route.dst_len, route.metric, route.iface_name); - if (bpf_map_lookup_elem(lpm_map_fd, prefix_key, prefix_value) < 0) { for (i = 0; i < 4; i++) @@ -397,12 +393,8 @@ static void read_arp(struct nlmsghdr *nh, int nll) if (nh->nlmsg_type == RTM_GETNEIGH) printf("READING arp entry\n"); - printf("Address HwAddress\n"); + printf("Address\tHwAddress\n"); for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { - struct in_addr dst_addr; - char mac_str[18]; - int len = 0, i; - rt_msg = (struct ndmsg *)NLMSG_DATA(nh); rt_attr = (struct rtattr *)RTM_RTA(rt_msg); ndm_family = rt_msg->ndm_family; @@ -423,14 +415,7 @@ static void read_arp(struct nlmsghdr *nh, int nll) } arp_entry.dst = atoi(dsts); arp_entry.mac = atol(mac); - - dst_addr.s_addr = arp_entry.dst; - for (i = 0; i < 6; i++) - len += snprintf(mac_str + len, 18 - len, "%02llx%s", - ((arp_entry.mac >> i * 8) & 0xff), - i < 5 ? ":" : ""); - printf("%-16s%s\n", inet_ntoa(dst_addr), mac_str); - + printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac); if (ndm_family == AF_INET) { if (bpf_map_lookup_elem(exact_match_map_fd, &arp_entry.dst, @@ -687,7 +672,7 @@ int main(int ac, char **argv) if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - printf("\n******************loading bpf file*********************\n"); + printf("\n**************loading bpf file*********************\n\n\n"); if (!prog_fd) { printf("bpf_prog_load_xattr: %s\n", strerror(errno)); return 1; @@ -737,9 +722,9 @@ int main(int ac, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - printf("\n*******************ROUTE TABLE*************************\n"); + printf("*******************ROUTE TABLE*************************\n\n\n"); get_route_table(AF_INET); - printf("\n*******************ARP TABLE***************************\n"); + printf("*******************ARP TABLE***************************\n\n\n"); get_arp_table(AF_INET); if (monitor_route() < 0) { printf("Error in receiving route update"); diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 587eacb491..495e09897b 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -110,9 +110,12 @@ static void usage(const char *prog) int main(int argc, char **argv) { + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct perf_buffer_opts pb_opts = {}; const char *optstr = "FS"; int prog_fd, map_fd, opt; - struct bpf_program *prog; struct bpf_object *obj; struct bpf_map *map; char filename[256]; @@ -141,21 +144,17 @@ int main(int argc, char **argv) } snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - prog = bpf_object__next_program(obj, NULL); - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); - - err = bpf_object__load(obj); - if (err) + if (!prog_fd) { + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); return 1; + } - prog_fd = bpf_program__fd(prog); - - map = bpf_object__next_map(obj, NULL); + map = bpf_map__next(NULL, obj); if (!map) { printf("finding a map in obj file failed\n"); return 1; @@ -182,7 +181,8 @@ int main(int argc, char **argv) return 1; } - pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL); + pb_opts.sample_cb = print_bpf_output; + pb = perf_buffer__new(map_fd, 8, &pb_opts); err = libbpf_get_error(pb); if (err) { perror("perf_buffer setup failed"); diff --git a/samples/bpf/xdpsock_ctrl_proc.c b/samples/bpf/xdpsock_ctrl_proc.c index cc4408797a..384e62e3c6 100644 --- a/samples/bpf/xdpsock_ctrl_proc.c +++ b/samples/bpf/xdpsock_ctrl_proc.c @@ -15,9 +15,6 @@ #include #include "xdpsock.h" -/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - static const char *opt_if = ""; static struct option long_options[] = { diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index aa50864e44..49d7a6ad7e 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -31,16 +30,12 @@ #include #include #include -#include #include #include #include #include "xdpsock.h" -/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - #ifndef SOL_XDP #define SOL_XDP 283 #endif @@ -58,27 +53,12 @@ #define DEBUG_HEXDUMP 0 -#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ -#define VLAN_PRIO_SHIFT 13 -#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ -#define VLAN_VID__DEFAULT 1 -#define VLAN_PRI__DEFAULT 0 - -#define NSEC_PER_SEC 1000000000UL -#define NSEC_PER_USEC 1000 - -#define SCHED_PRI__DEFAULT 0 - typedef __u64 u64; typedef __u32 u32; typedef __u16 u16; typedef __u8 u8; static unsigned long prev_time; -static long tx_cycle_diff_min; -static long tx_cycle_diff_max; -static double tx_cycle_diff_ave; -static long tx_cycle_cnt; enum benchmark_type { BENCH_RXDROP = 0, @@ -98,23 +78,14 @@ static u32 opt_batch_size = 64; static int opt_pkt_count; static u16 opt_pkt_size = MIN_PKT_SIZE; static u32 opt_pkt_fill_pattern = 0x12345678; -static bool opt_vlan_tag; -static u16 opt_pkt_vlan_id = VLAN_VID__DEFAULT; -static u16 opt_pkt_vlan_pri = VLAN_PRI__DEFAULT; -static struct ether_addr opt_txdmac = {{ 0x3c, 0xfd, 0xfe, - 0x9e, 0x7f, 0x71 }}; -static struct ether_addr opt_txsmac = {{ 0xec, 0xb1, 0xd7, - 0x98, 0x3a, 0xc0 }}; static bool opt_extra_stats; static bool opt_quiet; static bool opt_app_stats; static const char *opt_irq_str = ""; static u32 irq_no; static int irqs_at_init = -1; -static u32 sequence; static int opt_poll; static int opt_interval = 1; -static int opt_retries = 3; static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; static u32 opt_umem_flags; static int opt_unaligned_chunks; @@ -126,27 +97,6 @@ static u32 opt_num_xsks = 1; static u32 prog_id; static bool opt_busy_poll; static bool opt_reduced_cap; -static clockid_t opt_clock = CLOCK_MONOTONIC; -static unsigned long opt_tx_cycle_ns; -static int opt_schpolicy = SCHED_OTHER; -static int opt_schprio = SCHED_PRI__DEFAULT; -static bool opt_tstamp; - -struct vlan_ethhdr { - unsigned char h_dest[6]; - unsigned char h_source[6]; - __be16 h_vlan_proto; - __be16 h_vlan_TCI; - __be16 h_vlan_encapsulated_proto; -}; - -#define PKTGEN_MAGIC 0xbe9be955 -struct pktgen_hdr { - __be32 pgh_magic; - __be32 seq_num; - __be32 tv_sec; - __be32 tv_usec; -}; struct xsk_ring_stats { unsigned long rx_npkts; @@ -203,63 +153,15 @@ struct xsk_socket_info { u32 outstanding_tx; }; -static const struct clockid_map { - const char *name; - clockid_t clockid; -} clockids_map[] = { - { "REALTIME", CLOCK_REALTIME }, - { "TAI", CLOCK_TAI }, - { "BOOTTIME", CLOCK_BOOTTIME }, - { "MONOTONIC", CLOCK_MONOTONIC }, - { NULL } -}; - -static const struct sched_map { - const char *name; - int policy; -} schmap[] = { - { "OTHER", SCHED_OTHER }, - { "FIFO", SCHED_FIFO }, - { NULL } -}; - static int num_socks; struct xsk_socket_info *xsks[MAX_SOCKS]; int sock; -static int get_clockid(clockid_t *id, const char *name) -{ - const struct clockid_map *clk; - - for (clk = clockids_map; clk->name; clk++) { - if (strcasecmp(clk->name, name) == 0) { - *id = clk->clockid; - return 0; - } - } - - return -1; -} - -static int get_schpolicy(int *policy, const char *name) -{ - const struct sched_map *sch; - - for (sch = schmap; sch->name; sch++) { - if (strcasecmp(sch->name, name) == 0) { - *policy = sch->policy; - return 0; - } - } - - return -1; -} - static unsigned long get_nsecs(void) { struct timespec ts; - clock_gettime(opt_clock, &ts); + clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_sec * 1000000000UL + ts.tv_nsec; } @@ -352,15 +254,6 @@ static void dump_app_stats(long dt) xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos; xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls; } - - if (opt_tx_cycle_ns) { - printf("\n%-18s %-10s %-10s %-10s %-10s %-10s\n", - "", "period", "min", "ave", "max", "cycle"); - printf("%-18s %-10lu %-10lu %-10lu %-10lu %-10lu\n", - "Cyclic TX", opt_tx_cycle_ns, tx_cycle_diff_min, - (long)(tx_cycle_diff_ave / tx_cycle_cnt), - tx_cycle_diff_max, tx_cycle_cnt); - } } static bool get_interrupt_number(void) @@ -844,69 +737,29 @@ static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, #define ETH_FCS_SIZE 4 -#define ETH_HDR_SIZE (opt_vlan_tag ? sizeof(struct vlan_ethhdr) : \ - sizeof(struct ethhdr)) -#define PKTGEN_HDR_SIZE (opt_tstamp ? sizeof(struct pktgen_hdr) : 0) -#define PKT_HDR_SIZE (ETH_HDR_SIZE + sizeof(struct iphdr) + \ - sizeof(struct udphdr) + PKTGEN_HDR_SIZE) -#define PKTGEN_HDR_OFFSET (ETH_HDR_SIZE + sizeof(struct iphdr) + \ - sizeof(struct udphdr)) -#define PKTGEN_SIZE_MIN (PKTGEN_HDR_OFFSET + sizeof(struct pktgen_hdr) + \ - ETH_FCS_SIZE) +#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ + sizeof(struct udphdr)) #define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) -#define IP_PKT_SIZE (PKT_SIZE - ETH_HDR_SIZE) +#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) -#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - \ - (sizeof(struct udphdr) + PKTGEN_HDR_SIZE)) +#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; static void gen_eth_hdr_data(void) { - struct pktgen_hdr *pktgen_hdr; - struct udphdr *udp_hdr; - struct iphdr *ip_hdr; - - if (opt_vlan_tag) { - struct vlan_ethhdr *veth_hdr = (struct vlan_ethhdr *)pkt_data; - u16 vlan_tci = 0; - - udp_hdr = (struct udphdr *)(pkt_data + - sizeof(struct vlan_ethhdr) + - sizeof(struct iphdr)); - ip_hdr = (struct iphdr *)(pkt_data + - sizeof(struct vlan_ethhdr)); - pktgen_hdr = (struct pktgen_hdr *)(pkt_data + - sizeof(struct vlan_ethhdr) + - sizeof(struct iphdr) + - sizeof(struct udphdr)); - /* ethernet & VLAN header */ - memcpy(veth_hdr->h_dest, &opt_txdmac, ETH_ALEN); - memcpy(veth_hdr->h_source, &opt_txsmac, ETH_ALEN); - veth_hdr->h_vlan_proto = htons(ETH_P_8021Q); - vlan_tci = opt_pkt_vlan_id & VLAN_VID_MASK; - vlan_tci |= (opt_pkt_vlan_pri << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; - veth_hdr->h_vlan_TCI = htons(vlan_tci); - veth_hdr->h_vlan_encapsulated_proto = htons(ETH_P_IP); - } else { - struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; - - udp_hdr = (struct udphdr *)(pkt_data + - sizeof(struct ethhdr) + - sizeof(struct iphdr)); - ip_hdr = (struct iphdr *)(pkt_data + - sizeof(struct ethhdr)); - pktgen_hdr = (struct pktgen_hdr *)(pkt_data + + struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + - sizeof(struct iphdr) + - sizeof(struct udphdr)); - /* ethernet header */ - memcpy(eth_hdr->h_dest, &opt_txdmac, ETH_ALEN); - memcpy(eth_hdr->h_source, &opt_txsmac, ETH_ALEN); - eth_hdr->h_proto = htons(ETH_P_IP); - } + sizeof(struct iphdr)); + struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + + sizeof(struct ethhdr)); + struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; + /* ethernet header */ + memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); + memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); + eth_hdr->h_proto = htons(ETH_P_IP); /* IP header */ ip_hdr->version = IPVERSION; @@ -929,9 +782,6 @@ static void gen_eth_hdr_data(void) udp_hdr->dest = htons(0x1000); udp_hdr->len = htons(UDP_PKT_SIZE); - if (opt_tstamp) - pktgen_hdr->pgh_magic = htonl(PKTGEN_MAGIC); - /* UDP data */ memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern, UDP_PKT_DATA_SIZE); @@ -1055,7 +905,6 @@ static struct option long_options[] = { {"xdp-skb", no_argument, 0, 'S'}, {"xdp-native", no_argument, 0, 'N'}, {"interval", required_argument, 0, 'n'}, - {"retries", required_argument, 0, 'O'}, {"zero-copy", no_argument, 0, 'z'}, {"copy", no_argument, 0, 'c'}, {"frame-size", required_argument, 0, 'f'}, @@ -1064,20 +913,10 @@ static struct option long_options[] = { {"shared-umem", no_argument, 0, 'M'}, {"force", no_argument, 0, 'F'}, {"duration", required_argument, 0, 'd'}, - {"clock", required_argument, 0, 'w'}, {"batch-size", required_argument, 0, 'b'}, {"tx-pkt-count", required_argument, 0, 'C'}, {"tx-pkt-size", required_argument, 0, 's'}, {"tx-pkt-pattern", required_argument, 0, 'P'}, - {"tx-vlan", no_argument, 0, 'V'}, - {"tx-vlan-id", required_argument, 0, 'J'}, - {"tx-vlan-pri", required_argument, 0, 'K'}, - {"tx-dmac", required_argument, 0, 'G'}, - {"tx-smac", required_argument, 0, 'H'}, - {"tx-cycle", required_argument, 0, 'T'}, - {"tstamp", no_argument, 0, 'y'}, - {"policy", required_argument, 0, 'W'}, - {"schpri", required_argument, 0, 'U'}, {"extra-stats", no_argument, 0, 'x'}, {"quiet", no_argument, 0, 'Q'}, {"app-stats", no_argument, 0, 'a'}, @@ -1101,7 +940,6 @@ static void usage(const char *prog) " -S, --xdp-skb=n Use XDP skb-mod\n" " -N, --xdp-native=n Enforce XDP native mode\n" " -n, --interval=n Specify statistics update interval (default 1 sec).\n" - " -O, --retries=n Specify time-out retries (1s interval) attempt (default 3).\n" " -z, --zero-copy Force zero-copy mode.\n" " -c, --copy Force copy mode.\n" " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" @@ -1111,7 +949,6 @@ static void usage(const char *prog) " -F, --force Force loading the XDP prog\n" " -d, --duration=n Duration in secs to run command.\n" " Default: forever.\n" - " -w, --clock=CLOCK Clock NAME (default MONOTONIC).\n" " -b, --batch-size=n Batch size for sending or receiving\n" " packets. Default: %d\n" " -C, --tx-pkt-count=n Number of packets to send.\n" @@ -1120,15 +957,6 @@ static void usage(const char *prog) " (Default: %d bytes)\n" " Min size: %d, Max size %d.\n" " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" - " -V, --tx-vlan Send VLAN tagged packets (For -t|--txonly)\n" - " -J, --tx-vlan-id=n Tx VLAN ID [1-4095]. Default: %d (For -V|--tx-vlan)\n" - " -K, --tx-vlan-pri=n Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n" - " -G, --tx-dmac= Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" - " -H, --tx-smac= Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" - " -T, --tx-cycle=n Tx cycle time in micro-seconds (For -t|--txonly).\n" - " -y, --tstamp Add time-stamp to packet (For -t|--txonly).\n" - " -W, --policy=POLICY Schedule policy. Default: SCHED_OTHER\n" - " -U, --schpri=n Schedule priority. Default: %d\n" " -x, --extra-stats Display extra statistics.\n" " -Q, --quiet Do not display any stats.\n" " -a, --app-stats Display application (syscall) statistics.\n" @@ -1138,9 +966,7 @@ static void usage(const char *prog) "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, - XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern, - VLAN_VID__DEFAULT, VLAN_PRI__DEFAULT, - SCHED_PRI__DEFAULT); + XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern); exit(EXIT_FAILURE); } @@ -1152,8 +978,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, - "Frtli:q:pSNn:w:O:czf:muMd:b:C:s:P:VJ:K:G:H:T:yW:U:xQaI:BR", + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:BR", long_options, &option_index); if (c == -1) break; @@ -1187,17 +1012,6 @@ static void parse_command_line(int argc, char **argv) case 'n': opt_interval = atoi(optarg); break; - case 'w': - if (get_clockid(&opt_clock, optarg)) { - fprintf(stderr, - "ERROR: Invalid clock %s. Default to CLOCK_MONOTONIC.\n", - optarg); - opt_clock = CLOCK_MONOTONIC; - } - break; - case 'O': - opt_retries = atoi(optarg); - break; case 'z': opt_xdp_bind_flags |= XDP_ZEROCOPY; break; @@ -1245,49 +1059,6 @@ static void parse_command_line(int argc, char **argv) case 'P': opt_pkt_fill_pattern = strtol(optarg, NULL, 16); break; - case 'V': - opt_vlan_tag = true; - break; - case 'J': - opt_pkt_vlan_id = atoi(optarg); - break; - case 'K': - opt_pkt_vlan_pri = atoi(optarg); - break; - case 'G': - if (!ether_aton_r(optarg, - (struct ether_addr *)&opt_txdmac)) { - fprintf(stderr, "Invalid dmac address:%s\n", - optarg); - usage(basename(argv[0])); - } - break; - case 'H': - if (!ether_aton_r(optarg, - (struct ether_addr *)&opt_txsmac)) { - fprintf(stderr, "Invalid smac address:%s\n", - optarg); - usage(basename(argv[0])); - } - break; - case 'T': - opt_tx_cycle_ns = atoi(optarg); - opt_tx_cycle_ns *= NSEC_PER_USEC; - break; - case 'y': - opt_tstamp = 1; - break; - case 'W': - if (get_schpolicy(&opt_schpolicy, optarg)) { - fprintf(stderr, - "ERROR: Invalid policy %s. Default to SCHED_OTHER.\n", - optarg); - opt_schpolicy = SCHED_OTHER; - } - break; - case 'U': - opt_schprio = atoi(optarg); - break; case 'x': opt_extra_stats = 1; break; @@ -1493,22 +1264,16 @@ static void rx_drop_all(void) } } -static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, - int batch_size, unsigned long tx_ns) +static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) { - u32 idx, tv_sec, tv_usec; + u32 idx; unsigned int i; while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size) { complete_tx_only(xsk, batch_size); if (benchmark_done) - return 0; - } - - if (opt_tstamp) { - tv_sec = (u32)(tx_ns / NSEC_PER_SEC); - tv_usec = (u32)((tx_ns % NSEC_PER_SEC) / 1000); + return; } for (i = 0; i < batch_size; i++) { @@ -1516,21 +1281,6 @@ static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, idx + i); tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size; tx_desc->len = PKT_SIZE; - - if (opt_tstamp) { - struct pktgen_hdr *pktgen_hdr; - u64 addr = tx_desc->addr; - char *pkt; - - pkt = xsk_umem__get_data(xsk->umem->buffer, addr); - pktgen_hdr = (struct pktgen_hdr *)(pkt + PKTGEN_HDR_OFFSET); - - pktgen_hdr->seq_num = htonl(sequence++); - pktgen_hdr->tv_sec = htonl(tv_sec); - pktgen_hdr->tv_usec = htonl(tv_usec); - - hex_dump(pkt, PKT_SIZE, addr); - } } xsk_ring_prod__submit(&xsk->tx, batch_size); @@ -1539,8 +1289,6 @@ static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, *frame_nb += batch_size; *frame_nb %= NUM_FRAMES; complete_tx_only(xsk, batch_size); - - return batch_size; } static inline int get_batch_size(int pkt_cnt) @@ -1567,48 +1315,23 @@ static void complete_tx_only_all(void) pending = !!xsks[i]->outstanding_tx; } } - sleep(1); - } while (pending && opt_retries-- > 0); + } while (pending); } static void tx_only_all(void) { struct pollfd fds[MAX_SOCKS] = {}; u32 frame_nb[MAX_SOCKS] = {}; - unsigned long next_tx_ns = 0; int pkt_cnt = 0; int i, ret; - if (opt_poll && opt_tx_cycle_ns) { - fprintf(stderr, - "Error: --poll and --tx-cycles are both set\n"); - return; - } - for (i = 0; i < num_socks; i++) { fds[0].fd = xsk_socket__fd(xsks[i]->xsk); fds[0].events = POLLOUT; } - if (opt_tx_cycle_ns) { - /* Align Tx time to micro-second boundary */ - next_tx_ns = (get_nsecs() / NSEC_PER_USEC + 1) * - NSEC_PER_USEC; - next_tx_ns += opt_tx_cycle_ns; - - /* Initialize periodic Tx scheduling variance */ - tx_cycle_diff_min = 1000000000; - tx_cycle_diff_max = 0; - tx_cycle_diff_ave = 0.0; - } - while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { int batch_size = get_batch_size(pkt_cnt); - unsigned long tx_ns = 0; - struct timespec next; - int tx_cnt = 0; - long diff; - int err; if (opt_poll) { for (i = 0; i < num_socks; i++) @@ -1621,43 +1344,13 @@ static void tx_only_all(void) continue; } - if (opt_tx_cycle_ns) { - next.tv_sec = next_tx_ns / NSEC_PER_SEC; - next.tv_nsec = next_tx_ns % NSEC_PER_SEC; - err = clock_nanosleep(opt_clock, TIMER_ABSTIME, &next, NULL); - if (err) { - if (err != EINTR) - fprintf(stderr, - "clock_nanosleep failed. Err:%d errno:%d\n", - err, errno); - break; - } - - /* Measure periodic Tx scheduling variance */ - tx_ns = get_nsecs(); - diff = tx_ns - next_tx_ns; - if (diff < tx_cycle_diff_min) - tx_cycle_diff_min = diff; - - if (diff > tx_cycle_diff_max) - tx_cycle_diff_max = diff; - - tx_cycle_diff_ave += (double)diff; - tx_cycle_cnt++; - } else if (opt_tstamp) { - tx_ns = get_nsecs(); - } - for (i = 0; i < num_socks; i++) - tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size, tx_ns); + tx_only(xsks[i], &frame_nb[i], batch_size); - pkt_cnt += tx_cnt; + pkt_cnt += batch_size; if (benchmark_done) break; - - if (opt_tx_cycle_ns) - next_tx_ns += opt_tx_cycle_ns; } if (opt_pkt_count) @@ -1888,7 +1581,6 @@ int main(int argc, char **argv) struct __user_cap_data_struct data[2] = { { 0 } }; struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; bool rx = false, tx = false; - struct sched_param schparam; struct xsk_umem_info *umem; struct bpf_object *obj; int xsks_map_fd = 0; @@ -1951,9 +1643,6 @@ int main(int argc, char **argv) apply_setsockopt(xsks[i]); if (opt_bench == BENCH_TXONLY) { - if (opt_tstamp && opt_pkt_size < PKTGEN_SIZE_MIN) - opt_pkt_size = PKTGEN_SIZE_MIN; - gen_eth_hdr_data(); for (i = 0; i < NUM_FRAMES; i++) @@ -1993,16 +1682,6 @@ int main(int argc, char **argv) prev_time = get_nsecs(); start_time = prev_time; - /* Configure sched priority for better wake-up accuracy */ - memset(&schparam, 0, sizeof(schparam)); - schparam.sched_priority = opt_schprio; - ret = sched_setscheduler(0, opt_schpolicy, &schparam); - if (ret) { - fprintf(stderr, "Error(%d) in setting priority(%d): %s\n", - errno, opt_schprio, strerror(errno)); - goto out; - } - if (opt_bench == BENCH_RXDROP) rx_drop_all(); else if (opt_bench == BENCH_TXONLY) @@ -2010,7 +1689,6 @@ int main(int argc, char **argv) else l2fwd_all(); -out: benchmark_done = true; if (!opt_quiet) diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c index 52e7c4ffd2..1cd97c84c3 100644 --- a/samples/bpf/xsk_fwd.c +++ b/samples/bpf/xsk_fwd.c @@ -27,9 +27,6 @@ #include #include -/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) typedef __u64 u64; diff --git a/samples/ftrace/Makefile b/samples/ftrace/Makefile index faf8cdb79c..4ce896e10b 100644 --- a/samples/ftrace/Makefile +++ b/samples/ftrace/Makefile @@ -3,8 +3,6 @@ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-too.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-modify.o -obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi.o -obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi-modify.o CFLAGS_sample-trace-array.o := -I$(src) obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += sample-trace-array.o diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index 2c7c318935..89e6bf27cd 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -2,7 +2,6 @@ #include #include #include -#include extern void my_direct_func1(void); extern void my_direct_func2(void); @@ -22,8 +21,6 @@ extern void my_tramp2(void *); static unsigned long my_ip = (unsigned long)schedule; -#ifdef CONFIG_X86_64 - asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp1, @function\n" @@ -34,7 +31,7 @@ asm ( " call my_direct_func1\n" " leave\n" " .size my_tramp1, .-my_tramp1\n" - ASM_RET +" ret\n" " .type my_tramp2, @function\n" " .globl my_tramp2\n" " my_tramp2:" @@ -42,52 +39,11 @@ asm ( " movq %rsp, %rbp\n" " call my_direct_func2\n" " leave\n" - ASM_RET +" ret\n" " .size my_tramp2, .-my_tramp2\n" " .popsection\n" ); -#endif /* CONFIG_X86_64 */ - -#ifdef CONFIG_S390 - -asm ( -" .pushsection .text, \"ax\", @progbits\n" -" .type my_tramp1, @function\n" -" .globl my_tramp1\n" -" my_tramp1:" -" lgr %r1,%r15\n" -" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" -" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" -" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n" -" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n" -" brasl %r14,my_direct_func1\n" -" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n" -" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" -" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" -" lgr %r1,%r0\n" -" br %r1\n" -" .size my_tramp1, .-my_tramp1\n" -" .type my_tramp2, @function\n" -" .globl my_tramp2\n" -" my_tramp2:" -" lgr %r1,%r15\n" -" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" -" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" -" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n" -" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n" -" brasl %r14,my_direct_func2\n" -" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n" -" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" -" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" -" lgr %r1,%r0\n" -" br %r1\n" -" .size my_tramp2, .-my_tramp2\n" -" .popsection\n" -); - -#endif /* CONFIG_S390 */ - static unsigned long my_tramp = (unsigned long)my_tramp1; static unsigned long tramps[2] = { (unsigned long)my_tramp1, diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index c93fb0e951..11b99325f3 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -3,7 +3,6 @@ #include /* for handle_mm_fault() */ #include -#include extern void my_direct_func(struct vm_area_struct *vma, unsigned long address, unsigned int flags); @@ -17,8 +16,6 @@ void my_direct_func(struct vm_area_struct *vma, extern void my_tramp(void *); -#ifdef CONFIG_X86_64 - asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" @@ -34,36 +31,11 @@ asm ( " popq %rsi\n" " popq %rdi\n" " leave\n" - ASM_RET +" ret\n" " .size my_tramp, .-my_tramp\n" " .popsection\n" ); -#endif /* CONFIG_X86_64 */ - -#ifdef CONFIG_S390 - -asm ( -" .pushsection .text, \"ax\", @progbits\n" -" .type my_tramp, @function\n" -" .globl my_tramp\n" -" my_tramp:" -" lgr %r1,%r15\n" -" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" -" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" -" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n" -" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n" -" brasl %r14,my_direct_func\n" -" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n" -" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" -" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" -" lgr %r1,%r0\n" -" br %r1\n" -" .size my_tramp, .-my_tramp\n" -" .popsection\n" -); - -#endif /* CONFIG_S390 */ static int __init ftrace_direct_init(void) { diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 8b551e503a..642c50b5f7 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -3,7 +3,6 @@ #include /* for wake_up_process() */ #include -#include extern void my_direct_func(struct task_struct *p); @@ -14,8 +13,6 @@ void my_direct_func(struct task_struct *p) extern void my_tramp(void *); -#ifdef CONFIG_X86_64 - asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" @@ -27,36 +24,11 @@ asm ( " call my_direct_func\n" " popq %rdi\n" " leave\n" - ASM_RET +" ret\n" " .size my_tramp, .-my_tramp\n" " .popsection\n" ); -#endif /* CONFIG_X86_64 */ - -#ifdef CONFIG_S390 - -asm ( -" .pushsection .text, \"ax\", @progbits\n" -" .type my_tramp, @function\n" -" .globl my_tramp\n" -" my_tramp:" -" lgr %r1,%r15\n" -" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" -" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" -" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n" -" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n" -" brasl %r14,my_direct_func\n" -" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n" -" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" -" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" -" lgr %r1,%r0\n" -" br %r1\n" -" .size my_tramp, .-my_tramp\n" -" .popsection\n" -); - -#endif /* CONFIG_S390 */ static int __init ftrace_direct_init(void) { diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c index 642d0748c1..5a90aa5278 100644 --- a/samples/kfifo/bytestream-example.c +++ b/samples/kfifo/bytestream-example.c @@ -22,10 +22,10 @@ #define PROC_FIFO "bytestream-fifo" /* lock for procfs read access */ -static DEFINE_MUTEX(read_access); +static DEFINE_MUTEX(read_lock); /* lock for procfs write access */ -static DEFINE_MUTEX(write_access); +static DEFINE_MUTEX(write_lock); /* * define DYNAMIC in this example for a dynamically allocated fifo. @@ -116,12 +116,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf, int ret; unsigned int copied; - if (mutex_lock_interruptible(&write_access)) + if (mutex_lock_interruptible(&write_lock)) return -ERESTARTSYS; ret = kfifo_from_user(&test, buf, count, &copied); - mutex_unlock(&write_access); + mutex_unlock(&write_lock); if (ret) return ret; @@ -134,12 +134,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf, int ret; unsigned int copied; - if (mutex_lock_interruptible(&read_access)) + if (mutex_lock_interruptible(&read_lock)) return -ERESTARTSYS; ret = kfifo_to_user(&test, buf, count, &copied); - mutex_unlock(&read_access); + mutex_unlock(&read_lock); if (ret) return ret; diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c index c61482ba94..e5403d8c97 100644 --- a/samples/kfifo/inttype-example.c +++ b/samples/kfifo/inttype-example.c @@ -22,10 +22,10 @@ #define PROC_FIFO "int-fifo" /* lock for procfs read access */ -static DEFINE_MUTEX(read_access); +static DEFINE_MUTEX(read_lock); /* lock for procfs write access */ -static DEFINE_MUTEX(write_access); +static DEFINE_MUTEX(write_lock); /* * define DYNAMIC in this example for a dynamically allocated fifo. @@ -109,12 +109,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf, int ret; unsigned int copied; - if (mutex_lock_interruptible(&write_access)) + if (mutex_lock_interruptible(&write_lock)) return -ERESTARTSYS; ret = kfifo_from_user(&test, buf, count, &copied); - mutex_unlock(&write_access); + mutex_unlock(&write_lock); if (ret) return ret; @@ -127,12 +127,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf, int ret; unsigned int copied; - if (mutex_lock_interruptible(&read_access)) + if (mutex_lock_interruptible(&read_lock)) return -ERESTARTSYS; ret = kfifo_to_user(&test, buf, count, &copied); - mutex_unlock(&read_access); + mutex_unlock(&read_lock); if (ret) return ret; diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c index e4087b2d3f..f64f3d62d6 100644 --- a/samples/kfifo/record-example.c +++ b/samples/kfifo/record-example.c @@ -22,10 +22,10 @@ #define PROC_FIFO "record-fifo" /* lock for procfs read access */ -static DEFINE_MUTEX(read_access); +static DEFINE_MUTEX(read_lock); /* lock for procfs write access */ -static DEFINE_MUTEX(write_access); +static DEFINE_MUTEX(write_lock); /* * define DYNAMIC in this example for a dynamically allocated fifo. @@ -123,12 +123,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf, int ret; unsigned int copied; - if (mutex_lock_interruptible(&write_access)) + if (mutex_lock_interruptible(&write_lock)) return -ERESTARTSYS; ret = kfifo_from_user(&test, buf, count, &copied); - mutex_unlock(&write_access); + mutex_unlock(&write_lock); if (ret) return ret; @@ -141,12 +141,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf, int ret; unsigned int copied; - if (mutex_lock_interruptible(&read_access)) + if (mutex_lock_interruptible(&read_lock)) return -ERESTARTSYS; ret = kfifo_to_user(&test, buf, count, &copied); - mutex_unlock(&read_access); + mutex_unlock(&read_lock); if (ret) return ret; diff --git a/samples/nitro_enclaves/ne_ioctl_sample.c b/samples/nitro_enclaves/ne_ioctl_sample.c index 765b131c73..480b763142 100644 --- a/samples/nitro_enclaves/ne_ioctl_sample.c +++ b/samples/nitro_enclaves/ne_ioctl_sample.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2020-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. */ /** @@ -185,6 +185,7 @@ static int ne_create_vm(int ne_dev_fd, unsigned long *slot_uid, int *enclave_fd) return 0; } + /** * ne_poll_enclave_fd() - Thread function for polling the enclave fd. * @data: Argument provided for the polling function. @@ -559,8 +560,8 @@ static int ne_add_vcpu(int enclave_fd, unsigned int *vcpu_id) default: printf("Error in add vcpu [%m]\n"); - } + } return rc; } @@ -637,7 +638,7 @@ static int ne_start_enclave(int enclave_fd, struct ne_enclave_start_info *encla } /** - * ne_start_enclave_check_booted() - Start the enclave and wait for a heartbeat + * ne_start_enclave_check_booted() - Start the enclave and wait for a hearbeat * from it, on a newly created vsock channel, * to check it has booted. * @enclave_fd : The file descriptor associated with the enclave. diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h index 417e48a4c4..0cc9816fe8 100644 --- a/samples/seccomp/bpf-helper.h +++ b/samples/seccomp/bpf-helper.h @@ -62,9 +62,9 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count); #define EXPAND(...) __VA_ARGS__ /* Ensure that we load the logically correct offset. */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN #define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN #define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) #else #error "Unknown endianness" @@ -85,10 +85,10 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count); #elif __BITS_PER_LONG == 64 /* Ensure that we load the logically correct offset. */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN #define ENDIAN(_lo, _hi) _lo, _hi #define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN #define ENDIAN(_lo, _hi) _hi, _lo #define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) #endif diff --git a/samples/seccomp/dropper.c b/samples/seccomp/dropper.c index 4bca4b70f6..cc0648eb38 100644 --- a/samples/seccomp/dropper.c +++ b/samples/seccomp/dropper.c @@ -25,7 +25,7 @@ #include #include -static int install_filter(int arch, int nr, int error) +static int install_filter(int nr, int arch, int error) { struct sock_filter filter[] = { BPF_STMT(BPF_LD+BPF_W+BPF_ABS, @@ -42,10 +42,6 @@ static int install_filter(int arch, int nr, int error) .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), .filter = filter, }; - if (error == -1) { - struct sock_filter kill = BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL); - filter[4] = kill; - } if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { perror("prctl(NO_NEW_PRIVS)"); return 1; @@ -61,10 +57,9 @@ int main(int argc, char **argv) { if (argc < 5) { fprintf(stderr, "Usage:\n" - "dropper []\n" + "dropper []\n" "Hint: AUDIT_ARCH_I386: 0x%X\n" " AUDIT_ARCH_X86_64: 0x%X\n" - " errno == -1 means SECCOMP_RET_KILL\n" "\n", AUDIT_ARCH_I386, AUDIT_ARCH_X86_64); return 1; } diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 4d34dc0b0f..1a72b7d95c 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -21,7 +21,6 @@ static const char *random_strings[] = { static void simple_thread_func(int cnt) { - unsigned long bitmask[1] = {0xdeadbeefUL}; int array[6]; int len = cnt % 5; int i; @@ -44,8 +43,6 @@ static void simple_thread_func(int cnt) trace_foo_with_template_cond("prints other times", cnt); trace_foo_with_template_print("I have to be different", cnt); - - trace_foo_rel_loc("Hello __rel_loc", cnt, bitmask); } static int simple_thread(void *arg) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index cbbbb83bec..e61471ab7d 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -155,7 +155,7 @@ * * To assign this string, use the helper macro __assign_str_len(). * - * __assign_str_len(foo, bar, len); + * __assign_str(foo, bar, len); * * Then len + 1 is allocated to the ring buffer, and a nul terminating * byte is added. This is similar to: @@ -506,39 +506,6 @@ DEFINE_EVENT_PRINT(foo_template, foo_with_template_print, TP_ARGS(foo, bar), TP_printk("bar %s %d", __get_str(foo), __entry->bar)); -/* - * There are yet another __rel_loc dynamic data attribute. If you - * use __rel_dynamic_array() and __rel_string() etc. macros, you - * can use this attribute. There is no difference from the viewpoint - * of functionality with/without 'rel' but the encoding is a bit - * different. This is expected to be used with user-space event, - * there is no reason that the kernel event use this, but only for - * testing. - */ - -TRACE_EVENT(foo_rel_loc, - - TP_PROTO(const char *foo, int bar, unsigned long *mask), - - TP_ARGS(foo, bar, mask), - - TP_STRUCT__entry( - __rel_string( foo, foo ) - __field( int, bar ) - __rel_bitmask( bitmask, - BITS_PER_BYTE * sizeof(unsigned long) ) - ), - - TP_fast_assign( - __assign_rel_str(foo, foo); - __entry->bar = bar; - __assign_rel_bitmask(bitmask, mask, - BITS_PER_BYTE * sizeof(unsigned long)); - ), - - TP_printk("foo_rel_loc %s, %d, %s", __get_rel_str(foo), __entry->bar, - __get_rel_bitmask(bitmask)) -); #endif /***** NOTICE! The #if protection ends here. *****/ diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index e90c8552cc..c313ab4d1f 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -553,7 +553,7 @@ static int mbochs_probe(struct mdev_device *mdev) mbochs_create_config_space(mdev_state); mbochs_reset(mdev_state); - ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev); + ret = vfio_register_group_dev(&mdev_state->vdev); if (ret) goto err_mem; dev_set_drvdata(&mdev->dev, mdev_state); @@ -1493,6 +1493,5 @@ static void __exit mbochs_dev_exit(void) mbochs_class = NULL; } -MODULE_IMPORT_NS(DMA_BUF); module_init(mbochs_dev_init) module_exit(mbochs_dev_exit) diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index fe5d43e797..8d1a80a072 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -258,7 +258,7 @@ static int mdpy_probe(struct mdev_device *mdev) mdpy_count++; - ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev); + ret = vfio_register_group_dev(&mdev_state->vdev); if (ret) goto err_mem; dev_set_drvdata(&mdev->dev, mdev_state); diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index a0e1a469bd..5983cdb16e 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -741,7 +741,7 @@ static int mtty_probe(struct mdev_device *mdev) mtty_create_config_space(mdev_state); - ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev); + ret = vfio_register_group_dev(&mdev_state->vdev); if (ret) goto err_vconfig; dev_set_drvdata(&mdev->dev, mdev_state); diff --git a/scripts/.gitignore b/scripts/.gitignore index eed308bef6..e83c620ef5 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only /asn1_compiler /bin2c +/extract-cert /insert-sys-cert /kallsyms /module.lds diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 3514c2149e..cdec220884 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -195,6 +195,53 @@ why = \ echo-why = $(call escsq, $(strip $(why))) endif +############################################################################### +# +# When a Kconfig string contains a filename, it is suitable for +# passing to shell commands. It is surrounded by double-quotes, and +# any double-quotes or backslashes within it are escaped by +# backslashes. +# +# This is no use for dependencies or $(wildcard). We need to strip the +# surrounding quotes and the escaping from quotes and backslashes, and +# we *do* need to escape any spaces in the string. So, for example: +# +# Usage: $(eval $(call config_filename,FOO)) +# +# Defines FOO_FILENAME based on the contents of the CONFIG_FOO option, +# transformed as described above to be suitable for use within the +# makefile. +# +# Also, if the filename is a relative filename and exists in the source +# tree but not the build tree, define FOO_SRCPREFIX as $(srctree)/ to +# be prefixed to *both* command invocation and dependencies. +# +# Note: We also print the filenames in the quiet_cmd_foo text, and +# perhaps ought to have a version specially escaped for that purpose. +# But it's only cosmetic, and $(patsubst "%",%,$(CONFIG_FOO)) is good +# enough. It'll strip the quotes in the common case where there's no +# space and it's a simple filename, and it'll retain the quotes when +# there's a space. There are some esoteric cases in which it'll print +# the wrong thing, but we don't really care. The actual dependencies +# and commands *do* get it right, with various combinations of single +# and double quotes, backslashes and spaces in the filenames. +# +############################################################################### +# +define config_filename +ifneq ($$(CONFIG_$(1)),"") +$(1)_FILENAME := $$(subst \\,\,$$(subst \$$(quote),$$(quote),$$(subst $$(space_escape),\$$(space),$$(patsubst "%",%,$$(subst $$(space),$$(space_escape),$$(CONFIG_$(1))))))) +ifneq ($$(patsubst /%,%,$$(firstword $$($(1)_FILENAME))),$$(firstword $$($(1)_FILENAME))) +else +ifeq ($$(wildcard $$($(1)_FILENAME)),) +ifneq ($$(wildcard $$(srctree)/$$($(1)_FILENAME)),) +$(1)_SRCPREFIX := $(srctree)/ +endif +endif +endif +endif +endef +# ############################################################################### # delete partially updated (i.e. corrupted) files on error diff --git a/scripts/Makefile b/scripts/Makefile index ce5aa9030b..9adb6d2478 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -3,19 +3,25 @@ # scripts contains sources for various helper programs used throughout # the kernel for the build process. +CRYPTO_LIBS = $(shell pkg-config --libs libcrypto 2> /dev/null || echo -lcrypto) +CRYPTO_CFLAGS = $(shell pkg-config --cflags libcrypto 2> /dev/null) + hostprogs-always-$(CONFIG_BUILD_BIN2C) += bin2c hostprogs-always-$(CONFIG_KALLSYMS) += kallsyms hostprogs-always-$(BUILD_C_RECORDMCOUNT) += recordmcount hostprogs-always-$(CONFIG_BUILDTIME_TABLE_SORT) += sorttable hostprogs-always-$(CONFIG_ASN1) += asn1_compiler hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file +hostprogs-always-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert +hostprogs-always-$(CONFIG_SYSTEM_REVOCATION_LIST) += extract-cert HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include -HOSTLDLIBS_sorttable = -lpthread HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include -HOSTCFLAGS_sign-file.o = $(shell pkg-config --cflags libcrypto 2> /dev/null) -HOSTLDLIBS_sign-file = $(shell pkg-config --libs libcrypto 2> /dev/null || echo -lcrypto) +HOSTCFLAGS_sign-file.o = $(CRYPTO_CFLAGS) +HOSTLDLIBS_sign-file = $(CRYPTO_LIBS) +HOSTCFLAGS_extract-cert.o = $(CRYPTO_CFLAGS) +HOSTLDLIBS_extract-cert = $(CRYPTO_LIBS) ifdef CONFIG_UNWINDER_ORC ifeq ($(ARCH),x86_64) @@ -23,10 +29,7 @@ ARCH := x86 endif HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED -endif - -ifdef CONFIG_BUILDTIME_MCOUNT_SORT -HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED +HOSTLDLIBS_sorttable = -lpthread endif # The following programs are only built on demand diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a4b89b7572..3efc984d4c 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -155,7 +155,7 @@ $(obj)/%.ll: $(src)/%.c FORCE # (See cmd_cc_o_c + relevant part of rule_cc_o_c) quiet_cmd_cc_o_c = CC $(quiet_modtag) $@ - cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< $(cmd_objtool) + cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< ifdef CONFIG_MODVERSIONS # When module versioning is enabled the following steps are executed: @@ -224,39 +224,27 @@ cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)), endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT ifdef CONFIG_STACK_VALIDATION +ifndef CONFIG_LTO_CLANG -objtool := $(objtree)/tools/objtool/objtool - -objtool_args = \ - $(if $(CONFIG_UNWINDER_ORC),orc generate,check) \ - $(if $(part-of-module), --module) \ - $(if $(CONFIG_FRAME_POINTER),, --no-fp) \ - $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\ - $(if $(CONFIG_RETPOLINE), --retpoline) \ - $(if $(CONFIG_X86_SMAP), --uaccess) \ - $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ - $(if $(CONFIG_SLS), --sls) - -cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool_args) $@) -cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) - -endif # CONFIG_STACK_VALIDATION - -ifdef CONFIG_LTO_CLANG - -# Skip objtool for LLVM bitcode -$(obj)/%.o: objtool-enabled := - -else +__objtool_obj := $(objtree)/tools/objtool/objtool # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file # 'OBJECT_FILES_NON_STANDARD_foo.o := 'n': override directory skip for a file +cmd_objtool = $(if $(patsubst y%,, \ + $(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \ + $(__objtool_obj) $(objtool_args) $@) +objtool_obj = $(if $(patsubst y%,, \ + $(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \ + $(__objtool_obj)) -$(obj)/%.o: objtool-enabled = $(if $(filter-out y%, \ - $(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n),y) +endif # CONFIG_LTO_CLANG +endif # CONFIG_STACK_VALIDATION -endif +# Rebuild all objects when objtool changes, or is enabled/disabled. +objtool_dep = $(objtool_obj) \ + $(wildcard include/config/ORC_UNWINDER \ + include/config/STACK_VALIDATION) ifdef CONFIG_TRIM_UNUSED_KSYMS cmd_gen_ksymdeps = \ @@ -271,7 +259,7 @@ define rule_cc_o_c $(call cmd,gen_ksymdeps) $(call cmd,checksrc) $(call cmd,checkdoc) - $(call cmd,gen_objtooldep) + $(call cmd,objtool) $(call cmd,modversions_c) $(call cmd,record_mcount) endef @@ -279,12 +267,13 @@ endef define rule_as_o_S $(call cmd_and_fixdep,as_o_S) $(call cmd,gen_ksymdeps) - $(call cmd,gen_objtooldep) + $(call cmd,objtool) $(call cmd,modversions_S) endef # Built-in and composite module parts -$(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE +.SECONDEXPANSION: +$(obj)/%.o: $(src)/%.c $(recordmcount_source) $$(objtool_dep) FORCE $(call if_changed_rule,cc_o_c) $(call cmd,force_checksrc) @@ -296,13 +285,14 @@ cmd_cc_lto_link_modules = \ $(LD) $(ld_flags) -r -o $@ \ $(shell [ -s $(@:.lto.o=.o.symversions) ] && \ echo -T $(@:.lto.o=.o.symversions)) \ - --whole-archive $(filter-out FORCE,$^) \ - $(cmd_objtool) + --whole-archive $(filter-out FORCE,$^) +ifdef CONFIG_STACK_VALIDATION # objtool was skipped for LLVM bitcode, run it now that we have compiled # modules into native code -$(obj)/%.lto.o: objtool-enabled = y -$(obj)/%.lto.o: part-of-module := y +cmd_cc_lto_link_modules += ; \ + $(objtree)/tools/objtool/objtool $(objtool_args) --module $@ +endif $(obj)/%.lto.o: $(obj)/%.o FORCE $(call if_changed,cc_lto_link_modules) @@ -366,7 +356,7 @@ $(obj)/%.s: $(src)/%.S FORCE $(call if_changed_dep,cpp_s_S) quiet_cmd_as_o_S = AS $(quiet_modtag) $@ - cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< $(cmd_objtool) + cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< ifdef CONFIG_ASM_MODVERSIONS @@ -385,7 +375,7 @@ cmd_modversions_S = \ fi endif -$(obj)/%.o: $(src)/%.S FORCE +$(obj)/%.o: $(src)/%.S $$(objtool_dep) FORCE $(call if_changed_rule,as_o_S) targets += $(filter-out $(subdir-builtin), $(real-obj-y)) diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index 1d16ca1b78..4aad284800 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +gcc-plugin-$(CONFIG_GCC_PLUGIN_CYC_COMPLEXITY) += cyc_complexity_plugin.so + gcc-plugin-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) += latent_entropy_plugin.so gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) \ += -DLATENT_ENTROPY_PLUGIN diff --git a/scripts/Makefile.kcsan b/scripts/Makefile.kcsan index 19f693b68a..37cb504c77 100644 --- a/scripts/Makefile.kcsan +++ b/scripts/Makefile.kcsan @@ -9,18 +9,7 @@ endif # Keep most options here optional, to allow enabling more compilers if absence # of some options does not break KCSAN nor causes false positive reports. -kcsan-cflags := -fsanitize=thread -fno-optimize-sibling-calls \ +export CFLAGS_KCSAN := -fsanitize=thread \ + $(call cc-option,$(call cc-param,tsan-instrument-func-entry-exit=0) -fno-optimize-sibling-calls) \ $(call cc-option,$(call cc-param,tsan-compound-read-before-write=1),$(call cc-option,$(call cc-param,tsan-instrument-read-before-write=1))) \ $(call cc-param,tsan-distinguish-volatile=1) - -ifdef CONFIG_CC_IS_GCC -# GCC started warning about operations unsupported by the TSan runtime. But -# KCSAN != TSan, so just ignore these warnings. -kcsan-cflags += -Wno-tsan -endif - -ifndef CONFIG_KCSAN_WEAK_MEMORY -kcsan-cflags += $(call cc-option,$(call cc-param,tsan-instrument-func-entry-exit=0)) -endif - -export CFLAGS_KCSAN := $(kcsan-cflags) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 405163d4a5..7e3f156ac5 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -182,11 +182,6 @@ ifeq ($(CONFIG_KCSAN),y) _c_flags += $(if $(patsubst n%,, \ $(KCSAN_SANITIZE_$(basetarget).o)$(KCSAN_SANITIZE)y), \ $(CFLAGS_KCSAN)) -# Some uninstrumented files provide implied barriers required to avoid false -# positives: set KCSAN_INSTRUMENT_BARRIERS for barrier instrumentation only. -_c_flags += $(if $(patsubst n%,, \ - $(KCSAN_INSTRUMENT_BARRIERS_$(basetarget).o)$(KCSAN_INSTRUMENT_BARRIERS)n), \ - -D__KCSAN_INSTRUMENT_BARRIERS__) endif # $(srctree)/$(src) for including checkin headers from generated source files @@ -237,6 +232,17 @@ ifeq ($(CONFIG_LTO_CLANG),y) mod-prelink-ext := .lto endif +# Objtool arguments are also needed for modfinal with LTO, so we define +# then here to avoid duplication. +objtool_args = \ + $(if $(CONFIG_UNWINDER_ORC),orc generate,check) \ + $(if $(part-of-module), --module) \ + $(if $(CONFIG_FRAME_POINTER),, --no-fp) \ + $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\ + $(if $(CONFIG_RETPOLINE), --retpoline) \ + $(if $(CONFIG_X86_SMAP), --uaccess) \ + $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) + # Useful for describing the dependency of composite objects # Usage: # $(call multi_depend, multi_used_targets, suffix_to_remove, suffix_to_add) @@ -304,6 +310,7 @@ DTC_FLAGS += -Wno-interrupt_provider # Disable noisy checks by default ifeq ($(findstring 1,$(KBUILD_EXTRA_WARN)),) DTC_FLAGS += -Wno-unit_address_vs_reg \ + -Wno-unit_address_format \ -Wno-gpios_property \ -Wno-avoid_unnecessary_addr_size \ -Wno-alias_paths \ @@ -418,35 +425,20 @@ printf "%08x\n" $$dec_size | \ } \ ) -quiet_cmd_file_size = GEN $@ - cmd_file_size = $(size_append) > $@ - quiet_cmd_bzip2 = BZIP2 $@ - cmd_bzip2 = cat $(real-prereqs) | $(KBZIP2) -9 > $@ - -quiet_cmd_bzip2_with_size = BZIP2 $@ - cmd_bzip2_with_size = { cat $(real-prereqs) | $(KBZIP2) -9; $(size_append); } > $@ + cmd_bzip2 = { cat $(real-prereqs) | $(KBZIP2) -9; $(size_append); } > $@ # Lzma # --------------------------------------------------------------------------- quiet_cmd_lzma = LZMA $@ - cmd_lzma = cat $(real-prereqs) | $(LZMA) -9 > $@ - -quiet_cmd_lzma_with_size = LZMA $@ - cmd_lzma_with_size = { cat $(real-prereqs) | $(LZMA) -9; $(size_append); } > $@ + cmd_lzma = { cat $(real-prereqs) | $(LZMA) -9; $(size_append); } > $@ quiet_cmd_lzo = LZO $@ - cmd_lzo = cat $(real-prereqs) | $(KLZOP) -9 > $@ - -quiet_cmd_lzo_with_size = LZO $@ - cmd_lzo_with_size = { cat $(real-prereqs) | $(KLZOP) -9; $(size_append); } > $@ + cmd_lzo = { cat $(real-prereqs) | $(KLZOP) -9; $(size_append); } > $@ quiet_cmd_lz4 = LZ4 $@ - cmd_lz4 = cat $(real-prereqs) | $(LZ4) -l -c1 stdin stdout > $@ - -quiet_cmd_lz4_with_size = LZ4 $@ - cmd_lz4_with_size = { cat $(real-prereqs) | $(LZ4) -l -c1 stdin stdout; \ + cmd_lz4 = { cat $(real-prereqs) | $(LZ4) -l -c1 stdin stdout; \ $(size_append); } > $@ # U-Boot mkimage @@ -489,10 +481,7 @@ quiet_cmd_uimage = UIMAGE $@ # big dictionary would increase the memory usage too much in the multi-call # decompression mode. A BCJ filter isn't used either. quiet_cmd_xzkern = XZKERN $@ - cmd_xzkern = cat $(real-prereqs) | sh $(srctree)/scripts/xz_wrap.sh > $@ - -quiet_cmd_xzkern_with_size = XZKERN $@ - cmd_xzkern_with_size = { cat $(real-prereqs) | sh $(srctree)/scripts/xz_wrap.sh; \ + cmd_xzkern = { cat $(real-prereqs) | sh $(srctree)/scripts/xz_wrap.sh; \ $(size_append); } > $@ quiet_cmd_xzmisc = XZMISC $@ @@ -515,13 +504,10 @@ quiet_cmd_xzmisc = XZMISC $@ # be used because it would require zstd to allocate a 128 MB buffer. quiet_cmd_zstd = ZSTD $@ - cmd_zstd = cat $(real-prereqs) | $(ZSTD) -19 > $@ + cmd_zstd = { cat $(real-prereqs) | $(ZSTD) -19; $(size_append); } > $@ quiet_cmd_zstd22 = ZSTD22 $@ - cmd_zstd22 = cat $(real-prereqs) | $(ZSTD) -22 --ultra > $@ - -quiet_cmd_zstd22_with_size = ZSTD22 $@ - cmd_zstd22_with_size = { cat $(real-prereqs) | $(ZSTD) -22 --ultra; $(size_append); } > $@ + cmd_zstd22 = { cat $(real-prereqs) | $(ZSTD) -22 --ultra; $(size_append); } > $@ # ASM offsets # --------------------------------------------------------------------------- diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 7f39599e9f..ff80577743 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -40,8 +40,7 @@ quiet_cmd_ld_ko_o = LD [M] $@ quiet_cmd_btf_ko = BTF [M] $@ cmd_btf_ko = \ if [ -f vmlinux ]; then \ - LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J $(PAHOLE_FLAGS) --btf_base vmlinux $@; \ - $(RESOLVE_BTFIDS) -b vmlinux $@; \ + LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \ else \ printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \ fi; diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index c2c43a0ecf..ff9b09e4cf 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -66,9 +66,9 @@ endif # Don't stop modules_install even if we can't sign external modules. # ifeq ($(CONFIG_MODULE_SIG_ALL),y) -sig-key := $(if $(wildcard $(CONFIG_MODULE_SIG_KEY)),,$(srctree)/)$(CONFIG_MODULE_SIG_KEY) quiet_cmd_sign = SIGN $@ - cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) $(sig-key) certs/signing_key.x509 $@ \ +$(eval $(call config_filename,MODULE_SIG_KEY)) + cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) $(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY) certs/signing_key.x509 $@ \ $(if $(KBUILD_EXTMOD),|| true) else quiet_cmd_sign := diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 77b612183c..b74c65284f 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -103,7 +103,7 @@ snap-pkg: # tarball targets # --------------------------------------------------------------------------- -tar-pkgs := dir-pkg tar-pkg targz-pkg tarbz2-pkg tarxz-pkg tarzst-pkg +tar-pkgs := dir-pkg tar-pkg targz-pkg tarbz2-pkg tarxz-pkg PHONY += $(tar-pkgs) $(tar-pkgs): $(MAKE) -f $(srctree)/Makefile @@ -130,12 +130,10 @@ $(if $(findstring tar-src,$@),, \ $(if $(findstring bz2,$@),$(KBZIP2), \ $(if $(findstring gz,$@),$(KGZIP), \ $(if $(findstring xz,$@),$(XZ), \ -$(if $(findstring zst,$@),$(ZSTD), \ -$(error unknown target $@))))) \ +$(error unknown target $@)))) \ -f -9 $(perf-tar).tar) -perf-tar-pkgs := perf-tar-src-pkg perf-targz-src-pkg perf-tarbz2-src-pkg \ - perf-tarxz-src-pkg perf-tarzst-src-pkg +perf-tar-pkgs := perf-tar-src-pkg perf-targz-src-pkg perf-tarbz2-src-pkg perf-tarxz-src-pkg PHONY += $(perf-tar-pkgs) $(perf-tar-pkgs): $(call cmd,perf_tar) @@ -155,11 +153,9 @@ help: @echo ' targz-pkg - Build the kernel as a gzip compressed tarball' @echo ' tarbz2-pkg - Build the kernel as a bzip2 compressed tarball' @echo ' tarxz-pkg - Build the kernel as a xz compressed tarball' - @echo ' tarzst-pkg - Build the kernel as a zstd compressed tarball' @echo ' perf-tar-src-pkg - Build $(perf-tar).tar source tarball' @echo ' perf-targz-src-pkg - Build $(perf-tar).tar.gz source tarball' @echo ' perf-tarbz2-src-pkg - Build $(perf-tar).tar.bz2 source tarball' @echo ' perf-tarxz-src-pkg - Build $(perf-tar).tar.xz source tarball' - @echo ' perf-tarzst-src-pkg - Build $(perf-tar).tar.zst source tarball' .PHONY: $(PHONY) diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan index 7099c603ff..9e2092fd52 100644 --- a/scripts/Makefile.ubsan +++ b/scripts/Makefile.ubsan @@ -8,6 +8,7 @@ ubsan-cflags-$(CONFIG_UBSAN_LOCAL_BOUNDS) += -fsanitize=local-bounds ubsan-cflags-$(CONFIG_UBSAN_SHIFT) += -fsanitize=shift ubsan-cflags-$(CONFIG_UBSAN_DIV_ZERO) += -fsanitize=integer-divide-by-zero ubsan-cflags-$(CONFIG_UBSAN_UNREACHABLE) += -fsanitize=unreachable +ubsan-cflags-$(CONFIG_UBSAN_OBJECT_SIZE) += -fsanitize=object-size ubsan-cflags-$(CONFIG_UBSAN_BOOL) += -fsanitize=bool ubsan-cflags-$(CONFIG_UBSAN_ENUM) += -fsanitize=enum ubsan-cflags-$(CONFIG_UBSAN_TRAP) += -fsanitize-undefined-trap-on-error diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh index 68f902731d..035ceb4ee8 100644 --- a/scripts/atomic/gen-atomic-instrumented.sh +++ b/scripts/atomic/gen-atomic-instrumented.sh @@ -34,14 +34,6 @@ gen_param_check() gen_params_checks() { local meta="$1"; shift - local order="$1"; shift - - if [ "${order}" = "_release" ]; then - printf "\tkcsan_release();\n" - elif [ -z "${order}" ] && ! meta_in "$meta" "slv"; then - # RMW with return value is fully ordered - printf "\tkcsan_mb();\n" - fi while [ "$#" -gt 0 ]; do gen_param_check "$meta" "$1" @@ -64,7 +56,7 @@ gen_proto_order_variant() local ret="$(gen_ret_type "${meta}" "${int}")" local params="$(gen_params "${int}" "${atomic}" "$@")" - local checks="$(gen_params_checks "${meta}" "${order}" "$@")" + local checks="$(gen_params_checks "${meta}" "$@")" local args="$(gen_args "$@")" local retstmt="$(gen_ret_stmt "${meta}")" @@ -83,44 +75,29 @@ EOF gen_xchg() { local xchg="$1"; shift - local order="$1"; shift local mult="$1"; shift - kcsan_barrier="" - if [ "${xchg%_local}" = "${xchg}" ]; then - case "$order" in - _release) kcsan_barrier="kcsan_release()" ;; - "") kcsan_barrier="kcsan_mb()" ;; - esac - fi - if [ "${xchg%${xchg#try_cmpxchg}}" = "try_cmpxchg" ] ; then cat < \%debug, 'test-only=s' => \$tst_only, 'codespell!' => \$codespell, - 'codespellfile=s' => \$user_codespellfile, + 'codespellfile=s' => \$codespellfile, 'typedefsfile=s' => \$typedefsfile, 'color=s' => \$color, 'no-color' => \$color, #keep old behaviors of -nocolor @@ -326,32 +325,9 @@ GetOptions( 'kconfig-prefix=s' => \${CONFIG_}, 'h|help' => \$help, 'version' => \$help -) or $help = 2; +) or help(1); -if ($user_codespellfile) { - # Use the user provided codespell file unconditionally - $codespellfile = $user_codespellfile; -} elsif (!(-f $codespellfile)) { - # If /usr/share/codespell/dictionary.txt is not present, try to find it - # under codespell's install directory: /data/dictionary.txt - if (($codespell || $help) && which("codespell") ne "" && which("python") ne "") { - my $python_codespell_dict = << "EOF"; - -import os.path as op -import codespell_lib -codespell_dir = op.dirname(codespell_lib.__file__) -codespell_file = op.join(codespell_dir, 'data', 'dictionary.txt') -print(codespell_file, end='') -EOF - - my $codespell_dict = `python -c "$python_codespell_dict" 2> /dev/null`; - $codespellfile = $codespell_dict if (-f $codespell_dict); - } -} - -# $help is 1 if either -h, --help or --version is passed as option - exitcode: 0 -# $help is 2 if invalid option is passed - exitcode: 1 -help($help - 1) if ($help); +help(0) if ($help); die "$P: --git cannot be used with --file or --fix\n" if ($git && ($file || $fix)); die "$P: --verbose cannot be used with --terse\n" if ($verbose && $terse); @@ -513,8 +489,7 @@ our $Attribute = qr{ ____cacheline_aligned| ____cacheline_aligned_in_smp| ____cacheline_internodealigned_in_smp| - __weak| - __alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) + __weak }x; our $Modifier; our $Inline = qr{inline|__always_inline|noinline|__inline|__inline__}; @@ -3172,7 +3147,7 @@ sub process { length($line) > 75 && !($line =~ /^\s*[a-zA-Z0-9_\/\.]+\s+\|\s+\d+/ || # file delta changes - $line =~ /^\s*(?:[\w\.\-\+]*\/)++[\w\.\-\+]+:/ || + $line =~ /^\s*(?:[\w\.\-]+\/)++[\w\.\-]+:/ || # filename then : $line =~ /^\s*(?:Fixes:|Link:|$signature_tags)/i || # A Fixes: or Link: line or signature tag line @@ -3479,47 +3454,47 @@ sub process { # Kconfig supports named choices), so use a word boundary # (\b) rather than a whitespace character (\s) $line =~ /^\+\s*(?:config|menuconfig|choice)\b/) { - my $ln = $linenr; - my $needs_help = 0; - my $has_help = 0; - my $help_length = 0; - while (defined $lines[$ln]) { - my $f = $lines[$ln++]; + my $length = 0; + my $cnt = $realcnt; + my $ln = $linenr + 1; + my $f; + my $is_start = 0; + my $is_end = 0; + for (; $cnt > 0 && defined $lines[$ln - 1]; $ln++) { + $f = $lines[$ln - 1]; + $cnt-- if ($lines[$ln - 1] !~ /^-/); + $is_end = $lines[$ln - 1] =~ /^\+/; next if ($f =~ /^-/); - last if ($f !~ /^[\+ ]/); # !patch context + last if (!$file && $f =~ /^\@\@/); - if ($f =~ /^\+\s*(?:bool|tristate|prompt)\s*["']/) { - $needs_help = 1; - next; - } - if ($f =~ /^\+\s*help\s*$/) { - $has_help = 1; - next; + if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate|prompt)\s*["']/) { + $is_start = 1; + } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) { + $length = -1; } - $f =~ s/^.//; # strip patch context [+ ] - $f =~ s/#.*//; # strip # directives - $f =~ s/^\s+//; # strip leading blanks - next if ($f =~ /^$/); # skip blank lines + $f =~ s/^.//; + $f =~ s/#.*//; + $f =~ s/^\s+//; + next if ($f =~ /^$/); - # At the end of this Kconfig block: # This only checks context lines in the patch # and so hopefully shouldn't trigger false # positives, even though some of these are # common words in help texts - if ($f =~ /^(?:config|menuconfig|choice|endchoice| - if|endif|menu|endmenu|source)\b/x) { + if ($f =~ /^\s*(?:config|menuconfig|choice|endchoice| + if|endif|menu|endmenu|source)\b/x) { + $is_end = 1; last; } - $help_length++ if ($has_help); + $length++; } - if ($needs_help && - $help_length < $min_conf_desc_length) { - my $stat_real = get_stat_real($linenr, $ln - 1); + if ($is_start && $is_end && $length < $min_conf_desc_length) { WARN("CONFIG_DESCRIPTION", - "please write a help paragraph that fully describes the config symbol\n" . "$here\n$stat_real\n"); + "please write a paragraph that describes the config symbol fully\n" . $herecurr); } + #print "is_start<$is_start> is_end<$is_end> length<$length>\n"; } # check MAINTAINERS entries @@ -4473,7 +4448,6 @@ sub process { # XXX(foo); # EXPORT_SYMBOL(something_foo); my $name = $1; - $name =~ s/^\s*($Ident).*/$1/; if ($stat =~ /^(?:.\s*}\s*\n)?.([A-Z_]+)\s*\(\s*($Ident)/ && $name =~ /^${Ident}_$2/) { #print "FOO C name<$name>\n"; diff --git a/scripts/const_structs.checkpatch b/scripts/const_structs.checkpatch index 1eeb7b42c5..1aae4f4fda 100644 --- a/scripts/const_structs.checkpatch +++ b/scripts/const_structs.checkpatch @@ -12,27 +12,19 @@ driver_info drm_connector_funcs drm_encoder_funcs drm_encoder_helper_funcs -dvb_frontend_ops -dvb_tuner_ops ethtool_ops extent_io_ops -fb_ops file_lock_operations file_operations hv_ops -hwmon_ops -ib_device_ops ide_dma_ops ide_port_ops -ieee80211_ops -iio_buffer_setup_ops inode_operations intel_dvo_dev_ops irq_domain_ops item_operations iwl_cfg iwl_ops -kernel_param_ops kgdb_arch kgdb_io kset_uevent_ops @@ -40,41 +32,29 @@ lock_manager_operations machine_desc microcode_ops mlxsw_reg_info -mtd_ooblayout_ops mtrr_ops -nand_controller_ops neigh_ops net_device_ops -nft_expr_ops nlmsvc_binding nvkm_device_chip of_device_id pci_raw_ops phy_ops -pinconf_ops pinctrl_ops pinmux_ops pipe_buf_operations platform_hibernation_ops platform_suspend_ops -proc_ops proto_ops -pwm_ops regmap_access_table regulator_ops -reset_control_ops rpc_pipe_ops rtc_class_ops sd_desc -sdhci_ops seq_operations sirfsoc_padmux snd_ac97_build_ops -snd_pcm_ops -snd_rawmidi_ops snd_soc_component_driver -snd_soc_dai_ops -snd_soc_ops soc_pcmcia_socket_ops stacktrace_ops sysfs_ops @@ -83,13 +63,6 @@ uart_ops usb_mon_operations v4l2_ctrl_ops v4l2_ioctl_ops -v4l2_subdev_core_ops -v4l2_subdev_internal_ops -v4l2_subdev_ops -v4l2_subdev_pad_ops -v4l2_subdev_video_ops -vb2_ops vm_operations_struct wacom_features -watchdog_ops wd_ops diff --git a/scripts/decodecode b/scripts/decodecode index c711a19651..31d884e35f 100644 --- a/scripts/decodecode +++ b/scripts/decodecode @@ -126,7 +126,7 @@ if [ $marker -ne 0 ]; then fi echo Code starting with the faulting instruction > $T.aa echo =========================================== >> $T.aa -code=`echo $code | sed -e 's/\r//;s/ [<(]/ /;s/[>)] / /;s/ /,0x/g; s/[>)]$//'` +code=`echo $code | sed -e 's/ [<(]/ /;s/[>)] / /;s/ /,0x/g; s/[>)]$//'` echo -n " .$type 0x" > $T.s echo $code >> $T.s disas $T 0 diff --git a/scripts/documentation-file-ref-check b/scripts/documentation-file-ref-check index 68083f2f11..7187ea5e51 100644 --- a/scripts/documentation-file-ref-check +++ b/scripts/documentation-file-ref-check @@ -94,9 +94,6 @@ while () { # Makefiles and scripts contain nasty expressions to parse docs next if ($f =~ m/Makefile/ || $f =~ m/\.sh$/); - # It doesn't make sense to parse hidden files - next if ($f =~ m#/\.#); - # Skip this script next if ($f eq $scriptname); @@ -147,7 +144,6 @@ while () { if ($f =~ m/tools/) { my $path = $f; $path =~ s,(.*)/.*,$1,; - $path =~ s,testing/selftests/bpf,bpf/bpftool,; next if (grep -e, glob("$path/$ref $path/../$ref $path/$fulref")); } diff --git a/scripts/dtc/checks.c b/scripts/dtc/checks.c index 781ba1129a..17cb6890d4 100644 --- a/scripts/dtc/checks.c +++ b/scripts/dtc/checks.c @@ -143,14 +143,6 @@ static void check_nodes_props(struct check *c, struct dt_info *dti, struct node check_nodes_props(c, dti, child); } -static bool is_multiple_of(int multiple, int divisor) -{ - if (divisor == 0) - return multiple == 0; - else - return (multiple % divisor) == 0; -} - static bool run_check(struct check *c, struct dt_info *dti) { struct node *dt = dti->dt; @@ -305,20 +297,19 @@ ERROR(duplicate_property_names, check_duplicate_property_names, NULL); #define LOWERCASE "abcdefghijklmnopqrstuvwxyz" #define UPPERCASE "ABCDEFGHIJKLMNOPQRSTUVWXYZ" #define DIGITS "0123456789" -#define NODECHARS LOWERCASE UPPERCASE DIGITS ",._+-@" -#define PROPCHARS LOWERCASE UPPERCASE DIGITS ",._+*#?-" +#define PROPNODECHARS LOWERCASE UPPERCASE DIGITS ",._+*#?-" #define PROPNODECHARSSTRICT LOWERCASE UPPERCASE DIGITS ",-" static void check_node_name_chars(struct check *c, struct dt_info *dti, struct node *node) { - size_t n = strspn(node->name, c->data); + int n = strspn(node->name, c->data); if (n < strlen(node->name)) FAIL(c, dti, node, "Bad character '%c' in node name", node->name[n]); } -ERROR(node_name_chars, check_node_name_chars, NODECHARS); +ERROR(node_name_chars, check_node_name_chars, PROPNODECHARS "@"); static void check_node_name_chars_strict(struct check *c, struct dt_info *dti, struct node *node) @@ -339,20 +330,6 @@ static void check_node_name_format(struct check *c, struct dt_info *dti, } ERROR(node_name_format, check_node_name_format, NULL, &node_name_chars); -static void check_node_name_vs_property_name(struct check *c, - struct dt_info *dti, - struct node *node) -{ - if (!node->parent) - return; - - if (get_property(node->parent, node->name)) { - FAIL(c, dti, node, "node name and property name conflict"); - } -} -WARNING(node_name_vs_property_name, check_node_name_vs_property_name, - NULL, &node_name_chars); - static void check_unit_address_vs_reg(struct check *c, struct dt_info *dti, struct node *node) { @@ -386,14 +363,14 @@ static void check_property_name_chars(struct check *c, struct dt_info *dti, struct property *prop; for_each_property(node, prop) { - size_t n = strspn(prop->name, c->data); + int n = strspn(prop->name, c->data); if (n < strlen(prop->name)) FAIL_PROP(c, dti, node, prop, "Bad character '%c' in property name", prop->name[n]); } } -ERROR(property_name_chars, check_property_name_chars, PROPCHARS); +ERROR(property_name_chars, check_property_name_chars, PROPNODECHARS); static void check_property_name_chars_strict(struct check *c, struct dt_info *dti, @@ -403,7 +380,7 @@ static void check_property_name_chars_strict(struct check *c, for_each_property(node, prop) { const char *name = prop->name; - size_t n = strspn(name, c->data); + int n = strspn(name, c->data); if (n == strlen(prop->name)) continue; @@ -520,7 +497,7 @@ static cell_t check_phandle_prop(struct check *c, struct dt_info *dti, phandle = propval_cell(prop); - if (!phandle_is_valid(phandle)) { + if ((phandle == 0) || (phandle == -1)) { FAIL_PROP(c, dti, node, prop, "bad value (0x%x) in %s property", phandle, prop->name); return 0; @@ -579,7 +556,7 @@ static void check_name_properties(struct check *c, struct dt_info *dti, if (!prop) return; /* No name property, that's fine */ - if ((prop->val.len != node->basenamelen + 1U) + if ((prop->val.len != node->basenamelen+1) || (memcmp(prop->val.val, node->name, node->basenamelen) != 0)) { FAIL(c, dti, node, "\"name\" property is incorrect (\"%s\" instead" " of base node name)", prop->val.val); @@ -680,6 +657,7 @@ ERROR(omit_unused_nodes, fixup_omit_unused_nodes, NULL, &phandle_references, &pa */ WARNING_IF_NOT_CELL(address_cells_is_cell, "#address-cells"); WARNING_IF_NOT_CELL(size_cells_is_cell, "#size-cells"); +WARNING_IF_NOT_CELL(interrupt_cells_is_cell, "#interrupt-cells"); WARNING_IF_NOT_STRING(device_type_is_string, "device_type"); WARNING_IF_NOT_STRING(model_is_string, "model"); @@ -694,7 +672,8 @@ static void check_names_is_string_list(struct check *c, struct dt_info *dti, struct property *prop; for_each_property(node, prop) { - if (!strends(prop->name, "-names")) + const char *s = strrchr(prop->name, '-'); + if (!s || !streq(s, "-names")) continue; c->data = prop->name; @@ -774,7 +753,7 @@ static void check_reg_format(struct check *c, struct dt_info *dti, size_cells = node_size_cells(node->parent); entrylen = (addr_cells + size_cells) * sizeof(cell_t); - if (!is_multiple_of(prop->val.len, entrylen)) + if (!entrylen || (prop->val.len % entrylen) != 0) FAIL_PROP(c, dti, node, prop, "property has invalid length (%d bytes) " "(#address-cells == %d, #size-cells == %d)", prop->val.len, addr_cells, size_cells); @@ -815,7 +794,7 @@ static void check_ranges_format(struct check *c, struct dt_info *dti, "#size-cells (%d) differs from %s (%d)", ranges, c_size_cells, node->parent->fullpath, p_size_cells); - } else if (!is_multiple_of(prop->val.len, entrylen)) { + } else if ((prop->val.len % entrylen) != 0) { FAIL_PROP(c, dti, node, prop, "\"%s\" property has invalid length (%d bytes) " "(parent #address-cells == %d, child #address-cells == %d, " "#size-cells == %d)", ranges, prop->val.len, @@ -892,7 +871,7 @@ static void check_pci_device_bus_num(struct check *c, struct dt_info *dti, struc } else { cells = (cell_t *)prop->val.val; min_bus = fdt32_to_cpu(cells[0]); - max_bus = fdt32_to_cpu(cells[1]); + max_bus = fdt32_to_cpu(cells[0]); } if ((bus_num < min_bus) || (bus_num > max_bus)) FAIL_PROP(c, dti, node, prop, "PCI bus number %d out of range, expected (%d - %d)", @@ -1388,9 +1367,9 @@ static void check_property_phandle_args(struct check *c, const struct provider *provider) { struct node *root = dti->dt; - unsigned int cell, cellsize = 0; + int cell, cellsize = 0; - if (!is_multiple_of(prop->val.len, sizeof(cell_t))) { + if (prop->val.len % sizeof(cell_t)) { FAIL_PROP(c, dti, node, prop, "property size (%d) is invalid, expected multiple of %zu", prop->val.len, sizeof(cell_t)); @@ -1400,14 +1379,14 @@ static void check_property_phandle_args(struct check *c, for (cell = 0; cell < prop->val.len / sizeof(cell_t); cell += cellsize + 1) { struct node *provider_node; struct property *cellprop; - cell_t phandle; + int phandle; phandle = propval_cell_n(prop, cell); /* * Some bindings use a cell value 0 or -1 to skip over optional * entries when each index position has a specific definition. */ - if (!phandle_is_valid(phandle)) { + if (phandle == 0 || phandle == -1) { /* Give up if this is an overlay with external references */ if (dti->dtsflags & DTSF_PLUGIN) break; @@ -1473,8 +1452,7 @@ static void check_provider_cells_property(struct check *c, } #define WARNING_PROPERTY_PHANDLE_CELLS(nm, propname, cells_name, ...) \ static struct provider nm##_provider = { (propname), (cells_name), __VA_ARGS__ }; \ - WARNING_IF_NOT_CELL(nm##_is_cell, cells_name); \ - WARNING(nm##_property, check_provider_cells_property, &nm##_provider, &nm##_is_cell, &phandle_references); + WARNING(nm##_property, check_provider_cells_property, &nm##_provider, &phandle_references); WARNING_PROPERTY_PHANDLE_CELLS(clocks, "clocks", "#clock-cells"); WARNING_PROPERTY_PHANDLE_CELLS(cooling_device, "cooling-device", "#cooling-cells"); @@ -1495,17 +1473,24 @@ WARNING_PROPERTY_PHANDLE_CELLS(thermal_sensors, "thermal-sensors", "#thermal-sen static bool prop_is_gpio(struct property *prop) { + char *str; + /* * *-gpios and *-gpio can appear in property names, * so skip over any false matches (only one known ATM) */ - if (strends(prop->name, ",nr-gpios")) + if (strstr(prop->name, "nr-gpio")) return false; - return strends(prop->name, "-gpios") || - streq(prop->name, "gpios") || - strends(prop->name, "-gpio") || - streq(prop->name, "gpio"); + str = strrchr(prop->name, '-'); + if (str) + str++; + else + str = prop->name; + if (!(streq(str, "gpios") || streq(str, "gpio"))) + return false; + + return true; } static void check_gpios_property(struct check *c, @@ -1540,10 +1525,13 @@ static void check_deprecated_gpio_property(struct check *c, struct property *prop; for_each_property(node, prop) { + char *str; + if (!prop_is_gpio(prop)) continue; - if (!strends(prop->name, "gpio")) + str = strstr(prop->name, "gpio"); + if (!streq(str, "gpio")) continue; FAIL_PROP(c, dti, node, prop, @@ -1573,106 +1561,21 @@ static void check_interrupt_provider(struct check *c, struct node *node) { struct property *prop; - bool irq_provider = node_is_interrupt_provider(node); + + if (!node_is_interrupt_provider(node)) + return; prop = get_property(node, "#interrupt-cells"); - if (irq_provider && !prop) { + if (!prop) FAIL(c, dti, node, - "Missing '#interrupt-cells' in interrupt provider"); - return; - } + "Missing #interrupt-cells in interrupt provider"); - if (!irq_provider && prop) { + prop = get_property(node, "#address-cells"); + if (!prop) FAIL(c, dti, node, - "'#interrupt-cells' found, but node is not an interrupt provider"); - return; - } + "Missing #address-cells in interrupt provider"); } -WARNING(interrupt_provider, check_interrupt_provider, NULL, &interrupts_extended_is_cell); - -static void check_interrupt_map(struct check *c, - struct dt_info *dti, - struct node *node) -{ - struct node *root = dti->dt; - struct property *prop, *irq_map_prop; - size_t cellsize, cell, map_cells; - - irq_map_prop = get_property(node, "interrupt-map"); - if (!irq_map_prop) - return; - - if (node->addr_cells < 0) { - FAIL(c, dti, node, - "Missing '#address-cells' in interrupt-map provider"); - return; - } - cellsize = node_addr_cells(node); - cellsize += propval_cell(get_property(node, "#interrupt-cells")); - - prop = get_property(node, "interrupt-map-mask"); - if (prop && (prop->val.len != (cellsize * sizeof(cell_t)))) - FAIL_PROP(c, dti, node, prop, - "property size (%d) is invalid, expected %zu", - prop->val.len, cellsize * sizeof(cell_t)); - - if (!is_multiple_of(irq_map_prop->val.len, sizeof(cell_t))) { - FAIL_PROP(c, dti, node, irq_map_prop, - "property size (%d) is invalid, expected multiple of %zu", - irq_map_prop->val.len, sizeof(cell_t)); - return; - } - - map_cells = irq_map_prop->val.len / sizeof(cell_t); - for (cell = 0; cell < map_cells; ) { - struct node *provider_node; - struct property *cellprop; - int phandle; - size_t parent_cellsize; - - if ((cell + cellsize) >= map_cells) { - FAIL_PROP(c, dti, node, irq_map_prop, - "property size (%d) too small, expected > %zu", - irq_map_prop->val.len, (cell + cellsize) * sizeof(cell_t)); - break; - } - cell += cellsize; - - phandle = propval_cell_n(irq_map_prop, cell); - if (!phandle_is_valid(phandle)) { - /* Give up if this is an overlay with external references */ - if (!(dti->dtsflags & DTSF_PLUGIN)) - FAIL_PROP(c, dti, node, irq_map_prop, - "Cell %zu is not a phandle(%d)", - cell, phandle); - break; - } - - provider_node = get_node_by_phandle(root, phandle); - if (!provider_node) { - FAIL_PROP(c, dti, node, irq_map_prop, - "Could not get phandle(%d) node for (cell %zu)", - phandle, cell); - break; - } - - cellprop = get_property(provider_node, "#interrupt-cells"); - if (cellprop) { - parent_cellsize = propval_cell(cellprop); - } else { - FAIL(c, dti, node, "Missing property '#interrupt-cells' in node %s or bad phandle (referred from interrupt-map[%zu])", - provider_node->fullpath, cell); - break; - } - - cellprop = get_property(provider_node, "#address-cells"); - if (cellprop) - parent_cellsize += propval_cell(cellprop); - - cell += 1 + parent_cellsize; - } -} -WARNING(interrupt_map, check_interrupt_map, NULL, &phandle_references, &addr_size_cells, &interrupt_provider); +WARNING(interrupt_provider, check_interrupt_provider, NULL); static void check_interrupts_property(struct check *c, struct dt_info *dti, @@ -1681,13 +1584,13 @@ static void check_interrupts_property(struct check *c, struct node *root = dti->dt; struct node *irq_node = NULL, *parent = node; struct property *irq_prop, *prop = NULL; - cell_t irq_cells, phandle; + int irq_cells, phandle; irq_prop = get_property(node, "interrupts"); if (!irq_prop) return; - if (!is_multiple_of(irq_prop->val.len, sizeof(cell_t))) + if (irq_prop->val.len % sizeof(cell_t)) FAIL_PROP(c, dti, node, irq_prop, "size (%d) is invalid, expected multiple of %zu", irq_prop->val.len, sizeof(cell_t)); @@ -1700,7 +1603,7 @@ static void check_interrupts_property(struct check *c, prop = get_property(parent, "interrupt-parent"); if (prop) { phandle = propval_cell(prop); - if (!phandle_is_valid(phandle)) { + if ((phandle == 0) || (phandle == -1)) { /* Give up if this is an overlay with * external references */ if (dti->dtsflags & DTSF_PLUGIN) @@ -1736,7 +1639,7 @@ static void check_interrupts_property(struct check *c, } irq_cells = propval_cell(prop); - if (!is_multiple_of(irq_prop->val.len, irq_cells * sizeof(cell_t))) { + if (irq_prop->val.len % (irq_cells * sizeof(cell_t))) { FAIL_PROP(c, dti, node, prop, "size is (%d), expected multiple of %d", irq_prop->val.len, (int)(irq_cells * sizeof(cell_t))); @@ -1847,7 +1750,7 @@ WARNING(graph_port, check_graph_port, NULL, &graph_nodes); static struct node *get_remote_endpoint(struct check *c, struct dt_info *dti, struct node *endpoint) { - cell_t phandle; + int phandle; struct node *node; struct property *prop; @@ -1857,7 +1760,7 @@ static struct node *get_remote_endpoint(struct check *c, struct dt_info *dti, phandle = propval_cell(prop); /* Give up if this is an overlay with external references */ - if (!phandle_is_valid(phandle)) + if (phandle == 0 || phandle == -1) return NULL; node = get_node_by_phandle(dti->dt, phandle); @@ -1893,7 +1796,7 @@ WARNING(graph_endpoint, check_graph_endpoint, NULL, &graph_nodes); static struct check *check_table[] = { &duplicate_node_names, &duplicate_property_names, &node_name_chars, &node_name_format, &property_name_chars, - &name_is_string, &name_properties, &node_name_vs_property_name, + &name_is_string, &name_properties, &duplicate_label, @@ -1901,7 +1804,7 @@ static struct check *check_table[] = { &phandle_references, &path_references, &omit_unused_nodes, - &address_cells_is_cell, &size_cells_is_cell, + &address_cells_is_cell, &size_cells_is_cell, &interrupt_cells_is_cell, &device_type_is_string, &model_is_string, &status_is_string, &label_is_string, @@ -1936,43 +1839,26 @@ static struct check *check_table[] = { &chosen_node_is_root, &chosen_node_bootargs, &chosen_node_stdout_path, &clocks_property, - &clocks_is_cell, &cooling_device_property, - &cooling_device_is_cell, &dmas_property, - &dmas_is_cell, &hwlocks_property, - &hwlocks_is_cell, &interrupts_extended_property, - &interrupts_extended_is_cell, &io_channels_property, - &io_channels_is_cell, &iommus_property, - &iommus_is_cell, &mboxes_property, - &mboxes_is_cell, &msi_parent_property, - &msi_parent_is_cell, &mux_controls_property, - &mux_controls_is_cell, &phys_property, - &phys_is_cell, &power_domains_property, - &power_domains_is_cell, &pwms_property, - &pwms_is_cell, &resets_property, - &resets_is_cell, &sound_dai_property, - &sound_dai_is_cell, &thermal_sensors_property, - &thermal_sensors_is_cell, &deprecated_gpio_property, &gpios_property, &interrupts_property, &interrupt_provider, - &interrupt_map, &alias_paths, @@ -1996,7 +1882,7 @@ static void enable_warning_error(struct check *c, bool warn, bool error) static void disable_warning_error(struct check *c, bool warn, bool error) { - unsigned int i; + int i; /* Lowering level, also lower it for things this is the prereq * for */ @@ -2017,7 +1903,7 @@ static void disable_warning_error(struct check *c, bool warn, bool error) void parse_checks_option(bool warn, bool error, const char *arg) { - unsigned int i; + int i; const char *name = arg; bool enable = true; @@ -2044,7 +1930,7 @@ void parse_checks_option(bool warn, bool error, const char *arg) void process_checks(bool force, struct dt_info *dti) { - unsigned int i; + int i; int error = 0; for (i = 0; i < ARRAY_SIZE(check_table); i++) { diff --git a/scripts/dtc/dtc.c b/scripts/dtc/dtc.c index bc786c543b..838c5df96c 100644 --- a/scripts/dtc/dtc.c +++ b/scripts/dtc/dtc.c @@ -12,7 +12,7 @@ * Command line options */ int quiet; /* Level of quietness */ -unsigned int reservenum;/* Number of memory reservation slots */ +int reservenum; /* Number of memory reservation slots */ int minsize; /* Minimum blob size */ int padsize; /* Additional padding to blob */ int alignsize; /* Additional padding to blob accroding to the alignsize */ @@ -197,7 +197,7 @@ int main(int argc, char *argv[]) depname = optarg; break; case 'R': - reservenum = strtoul(optarg, NULL, 0); + reservenum = strtol(optarg, NULL, 0); break; case 'S': minsize = strtol(optarg, NULL, 0); @@ -359,6 +359,8 @@ int main(int argc, char *argv[]) #endif } else if (streq(outform, "dtb")) { dt_to_blob(outf, dti, outversion); + } else if (streq(outform, "dtbo")) { + dt_to_blob(outf, dti, outversion); } else if (streq(outform, "asm")) { dt_to_asm(outf, dti, outversion); } else if (streq(outform, "null")) { diff --git a/scripts/dtc/dtc.h b/scripts/dtc/dtc.h index 0a1f549910..d3e82fb8e3 100644 --- a/scripts/dtc/dtc.h +++ b/scripts/dtc/dtc.h @@ -35,7 +35,7 @@ * Command line options */ extern int quiet; /* Level of quietness */ -extern unsigned int reservenum; /* Number of memory reservation slots */ +extern int reservenum; /* Number of memory reservation slots */ extern int minsize; /* Minimum blob size */ extern int padsize; /* Additional padding to blob */ extern int alignsize; /* Additional padding to blob accroding to the alignsize */ @@ -51,11 +51,6 @@ extern int annotate; /* annotate .dts with input source location */ typedef uint32_t cell_t; -static inline bool phandle_is_valid(cell_t phandle) -{ - return phandle != 0 && phandle != ~0U; -} - static inline uint16_t dtb_ld16(const void *p) { const uint8_t *bp = (const uint8_t *)p; @@ -91,16 +86,6 @@ static inline uint64_t dtb_ld64(const void *p) #define streq(a, b) (strcmp((a), (b)) == 0) #define strstarts(s, prefix) (strncmp((s), (prefix), strlen(prefix)) == 0) #define strprefixeq(a, n, b) (strlen(b) == (n) && (memcmp(a, b, n) == 0)) -static inline bool strends(const char *str, const char *suffix) -{ - unsigned int len, suffix_len; - - len = strlen(str); - suffix_len = strlen(suffix); - if (len < suffix_len) - return false; - return streq(str + len - suffix_len, suffix); -} #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) @@ -116,12 +101,6 @@ enum markertype { TYPE_UINT64, TYPE_STRING, }; - -static inline bool is_type_marker(enum markertype type) -{ - return type >= TYPE_UINT8; -} - extern const char *markername(enum markertype markertype); struct marker { @@ -146,22 +125,7 @@ struct data { for_each_marker(m) \ if ((m)->type == (t)) -static inline struct marker *next_type_marker(struct marker *m) -{ - for_each_marker(m) - if (is_type_marker(m->type)) - break; - return m; -} - -static inline size_t type_marker_length(struct marker *m) -{ - struct marker *next = next_type_marker(m->next); - - if (next) - return next->offset - m->offset; - return 0; -} +size_t type_marker_length(struct marker *m); void data_free(struct data d); diff --git a/scripts/dtc/flattree.c b/scripts/dtc/flattree.c index 95e43d32c3..4659afbfcb 100644 --- a/scripts/dtc/flattree.c +++ b/scripts/dtc/flattree.c @@ -124,8 +124,7 @@ static void asm_emit_cell(void *e, cell_t val) { FILE *f = e; - fprintf(f, "\t.byte\t0x%02x\n" "\t.byte\t0x%02x\n" - "\t.byte\t0x%02x\n" "\t.byte\t0x%02x\n", + fprintf(f, "\t.byte 0x%02x; .byte 0x%02x; .byte 0x%02x; .byte 0x%02x\n", (val >> 24) & 0xff, (val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff); } @@ -135,9 +134,9 @@ static void asm_emit_string(void *e, const char *str, int len) FILE *f = e; if (len != 0) - fprintf(f, "\t.asciz\t\"%.*s\"\n", len, str); + fprintf(f, "\t.string\t\"%.*s\"\n", len, str); else - fprintf(f, "\t.asciz\t\"%s\"\n", str); + fprintf(f, "\t.string\t\"%s\"\n", str); } static void asm_emit_align(void *e, int a) @@ -296,7 +295,7 @@ static struct data flatten_reserve_list(struct reserve_info *reservelist, { struct reserve_info *re; struct data d = empty_data; - unsigned int j; + int j; for (re = reservelist; re; re = re->next) { d = data_append_re(d, re->address, re->size); @@ -439,7 +438,7 @@ static void dump_stringtable_asm(FILE *f, struct data strbuf) while (p < (strbuf.val + strbuf.len)) { len = strlen(p); - fprintf(f, "\t.asciz \"%s\"\n", p); + fprintf(f, "\t.string \"%s\"\n", p); p += len+1; } } diff --git a/scripts/dtc/libfdt/fdt.c b/scripts/dtc/libfdt/fdt.c index 9fe7cf4b74..3e893073da 100644 --- a/scripts/dtc/libfdt/fdt.c +++ b/scripts/dtc/libfdt/fdt.c @@ -90,10 +90,6 @@ int fdt_check_header(const void *fdt) { size_t hdrsize; - /* The device tree must be at an 8-byte aligned address */ - if ((uintptr_t)fdt & 7) - return -FDT_ERR_ALIGNMENT; - if (fdt_magic(fdt) != FDT_MAGIC) return -FDT_ERR_BADMAGIC; if (!can_assume(LATEST)) { diff --git a/scripts/dtc/libfdt/fdt_rw.c b/scripts/dtc/libfdt/fdt_rw.c index 3621d3651d..f13458d165 100644 --- a/scripts/dtc/libfdt/fdt_rw.c +++ b/scripts/dtc/libfdt/fdt_rw.c @@ -349,10 +349,7 @@ int fdt_add_subnode_namelen(void *fdt, int parentoffset, return offset; /* Try to place the new node after the parent's properties */ - tag = fdt_next_tag(fdt, parentoffset, &nextoffset); - /* the fdt_subnode_offset_namelen() should ensure this never hits */ - if (!can_assume(LIBFDT_FLAWLESS) && (tag != FDT_BEGIN_NODE)) - return -FDT_ERR_INTERNAL; + fdt_next_tag(fdt, parentoffset, &nextoffset); /* skip the BEGIN_NODE */ do { offset = nextoffset; tag = fdt_next_tag(fdt, offset, &nextoffset); @@ -394,9 +391,7 @@ int fdt_del_node(void *fdt, int nodeoffset) } static void fdt_packblocks_(const char *old, char *new, - int mem_rsv_size, - int struct_size, - int strings_size) + int mem_rsv_size, int struct_size) { int mem_rsv_off, struct_off, strings_off; @@ -411,7 +406,8 @@ static void fdt_packblocks_(const char *old, char *new, fdt_set_off_dt_struct(new, struct_off); fdt_set_size_dt_struct(new, struct_size); - memmove(new + strings_off, old + fdt_off_dt_strings(old), strings_size); + memmove(new + strings_off, old + fdt_off_dt_strings(old), + fdt_size_dt_strings(old)); fdt_set_off_dt_strings(new, strings_off); fdt_set_size_dt_strings(new, fdt_size_dt_strings(old)); } @@ -471,8 +467,7 @@ int fdt_open_into(const void *fdt, void *buf, int bufsize) return -FDT_ERR_NOSPACE; } - fdt_packblocks_(fdt, tmp, mem_rsv_size, struct_size, - fdt_size_dt_strings(fdt)); + fdt_packblocks_(fdt, tmp, mem_rsv_size, struct_size); memmove(buf, tmp, newsize); fdt_set_magic(buf, FDT_MAGIC); @@ -492,8 +487,7 @@ int fdt_pack(void *fdt) mem_rsv_size = (fdt_num_mem_rsv(fdt)+1) * sizeof(struct fdt_reserve_entry); - fdt_packblocks_(fdt, fdt, mem_rsv_size, fdt_size_dt_struct(fdt), - fdt_size_dt_strings(fdt)); + fdt_packblocks_(fdt, fdt, mem_rsv_size, fdt_size_dt_struct(fdt)); fdt_set_totalsize(fdt, fdt_data_size_(fdt)); return 0; diff --git a/scripts/dtc/libfdt/fdt_strerror.c b/scripts/dtc/libfdt/fdt_strerror.c index d852b77e81..b4356931b0 100644 --- a/scripts/dtc/libfdt/fdt_strerror.c +++ b/scripts/dtc/libfdt/fdt_strerror.c @@ -39,7 +39,6 @@ static struct fdt_errtabent fdt_errtable[] = { FDT_ERRTABENT(FDT_ERR_BADOVERLAY), FDT_ERRTABENT(FDT_ERR_NOPHANDLES), FDT_ERRTABENT(FDT_ERR_BADFLAGS), - FDT_ERRTABENT(FDT_ERR_ALIGNMENT), }; #define FDT_ERRTABSIZE ((int)(sizeof(fdt_errtable) / sizeof(fdt_errtable[0]))) diff --git a/scripts/dtc/libfdt/libfdt.h b/scripts/dtc/libfdt/libfdt.h index ce31e84485..c42807a766 100644 --- a/scripts/dtc/libfdt/libfdt.h +++ b/scripts/dtc/libfdt/libfdt.h @@ -131,13 +131,6 @@ uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset); * to work even with unaligned pointers on platforms (such as ARMv5) that don't * like unaligned loads and stores. */ -static inline uint16_t fdt16_ld(const fdt16_t *p) -{ - const uint8_t *bp = (const uint8_t *)p; - - return ((uint16_t)bp[0] << 8) | bp[1]; -} - static inline uint32_t fdt32_ld(const fdt32_t *p) { const uint8_t *bp = (const uint8_t *)p; diff --git a/scripts/dtc/livetree.c b/scripts/dtc/livetree.c index cc612370ec..7eacd02486 100644 --- a/scripts/dtc/livetree.c +++ b/scripts/dtc/livetree.c @@ -526,7 +526,7 @@ struct node *get_node_by_path(struct node *tree, const char *path) p = strchr(path, '/'); for_each_child(tree, child) { - if (p && strprefixeq(path, (size_t)(p - path), child->name)) + if (p && strprefixeq(path, p - path, child->name)) return get_node_by_path(child, p+1); else if (!p && streq(path, child->name)) return child; @@ -559,7 +559,7 @@ struct node *get_node_by_phandle(struct node *tree, cell_t phandle) { struct node *child, *node; - if (!phandle_is_valid(phandle)) { + if ((phandle == 0) || (phandle == -1)) { assert(generate_fixups); return NULL; } @@ -594,7 +594,7 @@ cell_t get_node_phandle(struct node *root, struct node *node) static cell_t phandle = 1; /* FIXME: ick, static local */ struct data d = empty_data; - if (phandle_is_valid(node->phandle)) + if ((node->phandle != 0) && (node->phandle != -1)) return node->phandle; while (get_node_by_phandle(root, phandle)) diff --git a/scripts/dtc/treesource.c b/scripts/dtc/treesource.c index 33fedee82d..061ba8c9c5 100644 --- a/scripts/dtc/treesource.c +++ b/scripts/dtc/treesource.c @@ -124,6 +124,27 @@ static void write_propval_int(FILE *f, const char *p, size_t len, size_t width) } } +static bool has_data_type_information(struct marker *m) +{ + return m->type >= TYPE_UINT8; +} + +static struct marker *next_type_marker(struct marker *m) +{ + while (m && !has_data_type_information(m)) + m = m->next; + return m; +} + +size_t type_marker_length(struct marker *m) +{ + struct marker *next = next_type_marker(m->next); + + if (next) + return next->offset - m->offset; + return 0; +} + static const char *delim_start[] = { [TYPE_UINT8] = "[", [TYPE_UINT16] = "/bits/ 16 <", @@ -208,39 +229,26 @@ static void write_propval(FILE *f, struct property *prop) size_t chunk_len = (m->next ? m->next->offset : len) - m->offset; size_t data_len = type_marker_length(m) ? : len - m->offset; const char *p = &prop->val.val[m->offset]; - struct marker *m_phandle; - if (is_type_marker(m->type)) { + if (has_data_type_information(m)) { emit_type = m->type; fprintf(f, " %s", delim_start[emit_type]); } else if (m->type == LABEL) fprintf(f, " %s:", m->ref); + else if (m->offset) + fputc(' ', f); - if (emit_type == TYPE_NONE || chunk_len == 0) + if (emit_type == TYPE_NONE) { + assert(chunk_len == 0); continue; + } switch(emit_type) { case TYPE_UINT16: write_propval_int(f, p, chunk_len, 2); break; case TYPE_UINT32: - m_phandle = prop->val.markers; - for_each_marker_of_type(m_phandle, REF_PHANDLE) - if (m->offset == m_phandle->offset) - break; - - if (m_phandle) { - if (m_phandle->ref[0] == '/') - fprintf(f, "&{%s}", m_phandle->ref); - else - fprintf(f, "&%s", m_phandle->ref); - if (chunk_len > 4) { - fputc(' ', f); - write_propval_int(f, p + 4, chunk_len - 4, 4); - } - } else { - write_propval_int(f, p, chunk_len, 4); - } + write_propval_int(f, p, chunk_len, 4); break; case TYPE_UINT64: write_propval_int(f, p, chunk_len, 8); diff --git a/scripts/dtc/util.h b/scripts/dtc/util.h index c45b2c295a..a771b4654c 100644 --- a/scripts/dtc/util.h +++ b/scripts/dtc/util.h @@ -13,10 +13,10 @@ */ #ifdef __GNUC__ -#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) -#define PRINTF(i, j) __attribute__((format (gnu_printf, i, j))) -#else +#ifdef __clang__ #define PRINTF(i, j) __attribute__((format (printf, i, j))) +#else +#define PRINTF(i, j) __attribute__((format (gnu_printf, i, j))) #endif #define NORETURN __attribute__((noreturn)) #else diff --git a/scripts/dtc/yamltree.c b/scripts/dtc/yamltree.c index 55908c829c..e63d32fe14 100644 --- a/scripts/dtc/yamltree.c +++ b/scripts/dtc/yamltree.c @@ -29,12 +29,11 @@ char *yaml_error_name[] = { (emitter)->problem, __func__, __LINE__); \ }) -static void yaml_propval_int(yaml_emitter_t *emitter, struct marker *markers, - char *data, unsigned int seq_offset, unsigned int len, int width) +static void yaml_propval_int(yaml_emitter_t *emitter, struct marker *markers, char *data, unsigned int len, int width) { yaml_event_t event; void *tag; - unsigned int off; + unsigned int off, start_offset = markers->offset; switch(width) { case 1: tag = "!u8"; break; @@ -67,7 +66,7 @@ static void yaml_propval_int(yaml_emitter_t *emitter, struct marker *markers, m = markers; is_phandle = false; for_each_marker_of_type(m, REF_PHANDLE) { - if (m->offset == (seq_offset + off)) { + if (m->offset == (start_offset + off)) { is_phandle = true; break; } @@ -115,7 +114,6 @@ static void yaml_propval(yaml_emitter_t *emitter, struct property *prop) yaml_event_t event; unsigned int len = prop->val.len; struct marker *m = prop->val.markers; - struct marker *markers = prop->val.markers; /* Emit the property name */ yaml_scalar_event_initialize(&event, NULL, @@ -153,19 +151,19 @@ static void yaml_propval(yaml_emitter_t *emitter, struct property *prop) switch(m->type) { case TYPE_UINT16: - yaml_propval_int(emitter, markers, data, m->offset, chunk_len, 2); + yaml_propval_int(emitter, m, data, chunk_len, 2); break; case TYPE_UINT32: - yaml_propval_int(emitter, markers, data, m->offset, chunk_len, 4); + yaml_propval_int(emitter, m, data, chunk_len, 4); break; case TYPE_UINT64: - yaml_propval_int(emitter, markers, data, m->offset, chunk_len, 8); + yaml_propval_int(emitter, m, data, chunk_len, 8); break; case TYPE_STRING: yaml_propval_string(emitter, data, chunk_len); break; default: - yaml_propval_int(emitter, markers, data, m->offset, chunk_len, 1); + yaml_propval_int(emitter, m, data, chunk_len, 1); break; } } diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig index 51d81c3f03..ab9eb4cbe3 100644 --- a/scripts/gcc-plugins/Kconfig +++ b/scripts/gcc-plugins/Kconfig @@ -19,10 +19,24 @@ menuconfig GCC_PLUGINS if GCC_PLUGINS +config GCC_PLUGIN_CYC_COMPLEXITY + bool "Compute the cyclomatic complexity of a function" if EXPERT + depends on !COMPILE_TEST # too noisy + help + The complexity M of a function's control flow graph is defined as: + M = E - N + 2P + where + + E = the number of edges + N = the number of nodes + P = the number of connected components (exit nodes). + + Enabling this plugin reports the complexity to stderr during the + build. It mainly serves as a simple example of how to create a + gcc plugin for the kernel. + config GCC_PLUGIN_SANCOV bool - # Plugin can be removed once the kernel only supports GCC 6+ - depends on !CC_HAS_SANCOV_TRACE_PC help This plugin inserts a __sanitizer_cov_trace_pc() call at the start of basic blocks. It supports all gcc versions with plugin support (from @@ -69,6 +83,8 @@ config GCC_PLUGIN_RANDSTRUCT the existing seed and will be removed by a make mrproper or make distclean. + Note that the implementation requires gcc 4.7 or newer. + This plugin was ported from grsecurity/PaX. More information at: * https://grsecurity.net/ * https://pax.grsecurity.net/ diff --git a/scripts/gcc-plugins/arm_ssp_per_task_plugin.c b/scripts/gcc-plugins/arm_ssp_per_task_plugin.c index 7328d037f9..8c1af9bdcb 100644 --- a/scripts/gcc-plugins/arm_ssp_per_task_plugin.c +++ b/scripts/gcc-plugins/arm_ssp_per_task_plugin.c @@ -4,7 +4,7 @@ __visible int plugin_is_GPL_compatible; -static unsigned int canary_offset; +static unsigned int sp_mask, canary_offset; static unsigned int arm_pertask_ssp_rtl_execute(void) { @@ -13,7 +13,7 @@ static unsigned int arm_pertask_ssp_rtl_execute(void) for (insn = get_insns(); insn; insn = NEXT_INSN(insn)) { const char *sym; rtx body; - rtx current; + rtx mask, masked_sp; /* * Find a SET insn involving a SYMBOL_REF to __stack_chk_guard @@ -30,13 +30,19 @@ static unsigned int arm_pertask_ssp_rtl_execute(void) /* * Replace the source of the SET insn with an expression that - * produces the address of the current task's stack canary value + * produces the address of the copy of the stack canary value + * stored in struct thread_info */ - current = gen_reg_rtx(Pmode); + mask = GEN_INT(sext_hwi(sp_mask, GET_MODE_PRECISION(Pmode))); + masked_sp = gen_reg_rtx(Pmode); - emit_insn_before(gen_load_tp_hard(current), insn); + emit_insn_before(gen_rtx_set(masked_sp, + gen_rtx_AND(Pmode, + stack_pointer_rtx, + mask)), + insn); - SET_SRC(body) = gen_rtx_PLUS(Pmode, current, + SET_SRC(body) = gen_rtx_PLUS(Pmode, masked_sp, GEN_INT(canary_offset)); } return 0; @@ -66,6 +72,7 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, const char * const plugin_name = plugin_info->base_name; const int argc = plugin_info->argc; const struct plugin_argument *argv = plugin_info->argv; + int tso = 0; int i; if (!plugin_default_version_check(version, &gcc_version)) { @@ -84,6 +91,11 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, return 1; } + if (!strcmp(argv[i].key, "tso")) { + tso = atoi(argv[i].value); + continue; + } + if (!strcmp(argv[i].key, "offset")) { canary_offset = atoi(argv[i].value); continue; @@ -93,6 +105,9 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, return 1; } + /* create the mask that produces the base of the stack */ + sp_mask = ~((1U << (12 + tso)) - 1); + PASS_INFO(arm_pertask_ssp_rtl, "expand", 1, PASS_POS_INSERT_AFTER); register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 9a1895747b..0c087614fc 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -27,7 +27,9 @@ #include "except.h" #include "function.h" #include "toplev.h" +#if BUILDING_GCC_VERSION >= 5000 #include "expr.h" +#endif #include "basic-block.h" #include "intl.h" #include "ggc.h" @@ -37,7 +39,11 @@ #include "params.h" #endif +#if BUILDING_GCC_VERSION <= 4009 +#include "pointer-set.h" +#else #include "hash-map.h" +#endif #if BUILDING_GCC_VERSION >= 7000 #include "memmodel.h" @@ -86,13 +92,16 @@ #include "stmt.h" #include "gimplify.h" #include "gimple.h" +#include "tree-ssa-operands.h" #include "tree-phinodes.h" #include "tree-cfg.h" #include "gimple-iterator.h" #include "gimple-ssa.h" #include "ssa-iterators.h" +#if BUILDING_GCC_VERSION >= 5000 #include "builtins.h" +#endif /* missing from basic_block.h... */ void debug_dominance_info(enum cdi_direction dir); @@ -143,6 +152,125 @@ struct register_pass_info NAME##_pass_info = { \ #define TODO_dump_func 0 #define TODO_dump_cgraph 0 +#if BUILDING_GCC_VERSION <= 4009 +#define TODO_verify_il 0 +#define AVAIL_INTERPOSABLE AVAIL_OVERWRITABLE + +#define section_name_prefix LTO_SECTION_NAME_PREFIX +#define fatal_error(loc, gmsgid, ...) fatal_error((gmsgid), __VA_ARGS__) + +rtx emit_move_insn(rtx x, rtx y); + +typedef struct rtx_def rtx_insn; + +static inline const char *get_decl_section_name(const_tree decl) +{ + if (DECL_SECTION_NAME(decl) == NULL_TREE) + return NULL; + + return TREE_STRING_POINTER(DECL_SECTION_NAME(decl)); +} + +static inline void set_decl_section_name(tree node, const char *value) +{ + if (value) + DECL_SECTION_NAME(node) = build_string(strlen(value) + 1, value); + else + DECL_SECTION_NAME(node) = NULL; +} +#endif + +#if BUILDING_GCC_VERSION == 4009 +typedef struct gimple_statement_asm gasm; +typedef struct gimple_statement_base gassign; +typedef struct gimple_statement_call gcall; +typedef struct gimple_statement_base gcond; +typedef struct gimple_statement_base gdebug; +typedef struct gimple_statement_base ggoto; +typedef struct gimple_statement_phi gphi; +typedef struct gimple_statement_base greturn; + +static inline gasm *as_a_gasm(gimple stmt) +{ + return as_a(stmt); +} + +static inline const gasm *as_a_const_gasm(const_gimple stmt) +{ + return as_a(stmt); +} + +static inline gassign *as_a_gassign(gimple stmt) +{ + return stmt; +} + +static inline const gassign *as_a_const_gassign(const_gimple stmt) +{ + return stmt; +} + +static inline gcall *as_a_gcall(gimple stmt) +{ + return as_a(stmt); +} + +static inline const gcall *as_a_const_gcall(const_gimple stmt) +{ + return as_a(stmt); +} + +static inline gcond *as_a_gcond(gimple stmt) +{ + return stmt; +} + +static inline const gcond *as_a_const_gcond(const_gimple stmt) +{ + return stmt; +} + +static inline gdebug *as_a_gdebug(gimple stmt) +{ + return stmt; +} + +static inline const gdebug *as_a_const_gdebug(const_gimple stmt) +{ + return stmt; +} + +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return stmt; +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return stmt; +} + +static inline gphi *as_a_gphi(gimple stmt) +{ + return as_a(stmt); +} + +static inline const gphi *as_a_const_gphi(const_gimple stmt) +{ + return as_a(stmt); +} + +static inline greturn *as_a_greturn(gimple stmt) +{ + return stmt; +} + +static inline const greturn *as_a_const_greturn(const_gimple stmt) +{ + return stmt; +} +#endif + #define TODO_ggc_collect 0 #define NODE_SYMBOL(node) (node) #define NODE_DECL(node) (node)->decl @@ -154,7 +282,7 @@ static inline opt_pass *get_pass_for_id(int id) return g->get_passes()->get_pass_for_id(id); } -#if BUILDING_GCC_VERSION < 6000 +#if BUILDING_GCC_VERSION >= 5000 && BUILDING_GCC_VERSION < 6000 /* gimple related */ template <> template <> @@ -164,6 +292,7 @@ inline bool is_a_helper::test(const_gimple gs) } #endif +#if BUILDING_GCC_VERSION >= 5000 #define TODO_verify_ssa TODO_verify_il #define TODO_verify_flow TODO_verify_il #define TODO_verify_stmts TODO_verify_il @@ -404,6 +533,7 @@ static inline void ipa_remove_stmt_references(symtab_node *referring_node, gimpl { referring_node->remove_stmt_references(stmt); } +#endif #if BUILDING_GCC_VERSION < 6000 #define get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep, keep_aligning) \ diff --git a/scripts/gcc-plugins/gcc-generate-gimple-pass.h b/scripts/gcc-plugins/gcc-generate-gimple-pass.h index 503c074963..5178082873 100644 --- a/scripts/gcc-plugins/gcc-generate-gimple-pass.h +++ b/scripts/gcc-plugins/gcc-generate-gimple-pass.h @@ -78,6 +78,17 @@ static const pass_data _PASS_NAME_PASS_DATA = { .type = GIMPLE_PASS, .name = _PASS_NAME_NAME, .optinfo_flags = OPTGROUP_NONE, +#if BUILDING_GCC_VERSION >= 5000 +#elif BUILDING_GCC_VERSION == 4009 + .has_gate = _HAS_GATE, + .has_execute = _HAS_EXECUTE, +#else + .gate = _GATE, + .execute = _EXECUTE, + .sub = NULL, + .next = NULL, + .static_pass_number = 0, +#endif .tv_id = TV_NONE, .properties_required = PROPERTIES_REQUIRED, .properties_provided = PROPERTIES_PROVIDED, @@ -91,13 +102,21 @@ class _PASS_NAME_PASS : public gimple_opt_pass { _PASS_NAME_PASS() : gimple_opt_pass(_PASS_NAME_PASS_DATA, g) {} #ifndef NO_GATE +#if BUILDING_GCC_VERSION >= 5000 virtual bool gate(function *) { return _GATE(); } +#else + virtual bool gate(void) { return _GATE(); } +#endif #endif virtual opt_pass * clone () { return new _PASS_NAME_PASS(); } #ifndef NO_EXECUTE +#if BUILDING_GCC_VERSION >= 5000 virtual unsigned int execute(function *) { return _EXECUTE(); } +#else + virtual unsigned int execute(void) { return _EXECUTE(); } +#endif }; } diff --git a/scripts/gcc-plugins/gcc-generate-ipa-pass.h b/scripts/gcc-plugins/gcc-generate-ipa-pass.h index 1e7f064e8f..c34ffec035 100644 --- a/scripts/gcc-plugins/gcc-generate-ipa-pass.h +++ b/scripts/gcc-plugins/gcc-generate-ipa-pass.h @@ -146,6 +146,17 @@ static const pass_data _PASS_NAME_PASS_DATA = { .type = IPA_PASS, .name = _PASS_NAME_NAME, .optinfo_flags = OPTGROUP_NONE, +#if BUILDING_GCC_VERSION >= 5000 +#elif BUILDING_GCC_VERSION == 4009 + .has_gate = _HAS_GATE, + .has_execute = _HAS_EXECUTE, +#else + .gate = _GATE, + .execute = _EXECUTE, + .sub = NULL, + .next = NULL, + .static_pass_number = 0, +#endif .tv_id = TV_NONE, .properties_required = PROPERTIES_REQUIRED, .properties_provided = PROPERTIES_PROVIDED, @@ -169,12 +180,20 @@ class _PASS_NAME_PASS : public ipa_opt_pass_d { _VARIABLE_TRANSFORM) {} #ifndef NO_GATE +#if BUILDING_GCC_VERSION >= 5000 virtual bool gate(function *) { return _GATE(); } +#else + virtual bool gate(void) { return _GATE(); } +#endif virtual opt_pass *clone() { return new _PASS_NAME_PASS(); } #ifndef NO_EXECUTE +#if BUILDING_GCC_VERSION >= 5000 virtual unsigned int execute(function *) { return _EXECUTE(); } +#else + virtual unsigned int execute(void) { return _EXECUTE(); } +#endif #endif }; } diff --git a/scripts/gcc-plugins/gcc-generate-rtl-pass.h b/scripts/gcc-plugins/gcc-generate-rtl-pass.h index 7cd46e8d50..d14614f4b1 100644 --- a/scripts/gcc-plugins/gcc-generate-rtl-pass.h +++ b/scripts/gcc-plugins/gcc-generate-rtl-pass.h @@ -78,6 +78,17 @@ static const pass_data _PASS_NAME_PASS_DATA = { .type = RTL_PASS, .name = _PASS_NAME_NAME, .optinfo_flags = OPTGROUP_NONE, +#if BUILDING_GCC_VERSION >= 5000 +#elif BUILDING_GCC_VERSION == 4009 + .has_gate = _HAS_GATE, + .has_execute = _HAS_EXECUTE, +#else + .gate = _GATE, + .execute = _EXECUTE, + .sub = NULL, + .next = NULL, + .static_pass_number = 0, +#endif .tv_id = TV_NONE, .properties_required = PROPERTIES_REQUIRED, .properties_provided = PROPERTIES_PROVIDED, @@ -91,13 +102,21 @@ class _PASS_NAME_PASS : public rtl_opt_pass { _PASS_NAME_PASS() : rtl_opt_pass(_PASS_NAME_PASS_DATA, g) {} #ifndef NO_GATE +#if BUILDING_GCC_VERSION >= 5000 virtual bool gate(function *) { return _GATE(); } +#else + virtual bool gate(void) { return _GATE(); } +#endif #endif virtual opt_pass *clone() { return new _PASS_NAME_PASS(); } #ifndef NO_EXECUTE +#if BUILDING_GCC_VERSION >= 5000 virtual unsigned int execute(function *) { return _EXECUTE(); } +#else + virtual unsigned int execute(void) { return _EXECUTE(); } +#endif #endif }; } diff --git a/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h b/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h index 33093ccc94..ef6f4c2cb6 100644 --- a/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h +++ b/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h @@ -78,6 +78,17 @@ static const pass_data _PASS_NAME_PASS_DATA = { .type = SIMPLE_IPA_PASS, .name = _PASS_NAME_NAME, .optinfo_flags = OPTGROUP_NONE, +#if BUILDING_GCC_VERSION >= 5000 +#elif BUILDING_GCC_VERSION == 4009 + .has_gate = _HAS_GATE, + .has_execute = _HAS_EXECUTE, +#else + .gate = _GATE, + .execute = _EXECUTE, + .sub = NULL, + .next = NULL, + .static_pass_number = 0, +#endif .tv_id = TV_NONE, .properties_required = PROPERTIES_REQUIRED, .properties_provided = PROPERTIES_PROVIDED, @@ -91,13 +102,21 @@ class _PASS_NAME_PASS : public simple_ipa_opt_pass { _PASS_NAME_PASS() : simple_ipa_opt_pass(_PASS_NAME_PASS_DATA, g) {} #ifndef NO_GATE +#if BUILDING_GCC_VERSION >= 5000 virtual bool gate(function *) { return _GATE(); } +#else + virtual bool gate(void) { return _GATE(); } +#endif #endif virtual opt_pass *clone() { return new _PASS_NAME_PASS(); } #ifndef NO_EXECUTE +#if BUILDING_GCC_VERSION >= 5000 virtual unsigned int execute(function *) { return _EXECUTE(); } +#else + virtual unsigned int execute(void) { return _EXECUTE(); } +#endif #endif }; } diff --git a/scripts/gcc-plugins/structleak_plugin.c b/scripts/gcc-plugins/structleak_plugin.c index 74e3192883..d7190e443a 100644 --- a/scripts/gcc-plugins/structleak_plugin.c +++ b/scripts/gcc-plugins/structleak_plugin.c @@ -103,8 +103,10 @@ static void finish_type(void *event_data, void *data) if (type == NULL_TREE || type == error_mark_node) return; +#if BUILDING_GCC_VERSION >= 5000 if (TREE_CODE(type) == ENUMERAL_TYPE) return; +#endif if (TYPE_USERSPACE(type)) return; diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py index d5983cf3db..a92c55bd8d 100644 --- a/scripts/gdb/linux/dmesg.py +++ b/scripts/gdb/linux/dmesg.py @@ -44,17 +44,19 @@ class LxDmesg(gdb.Command): sz = prb_desc_ring_type.get_type().sizeof desc_ring = utils.read_memoryview(inf, addr, sz).tobytes() - # read in descriptor count, size, and address + # read in descriptor array off = prb_desc_ring_type.get_type()['count_bits'].bitpos // 8 desc_ring_count = 1 << utils.read_u32(desc_ring, off) desc_sz = prb_desc_type.get_type().sizeof off = prb_desc_ring_type.get_type()['descs'].bitpos // 8 - desc_addr = utils.read_ulong(desc_ring, off) + addr = utils.read_ulong(desc_ring, off) + descs = utils.read_memoryview(inf, addr, desc_sz * desc_ring_count).tobytes() - # read in info size and address + # read in info array info_sz = printk_info_type.get_type().sizeof off = prb_desc_ring_type.get_type()['infos'].bitpos // 8 - info_addr = utils.read_ulong(desc_ring, off) + addr = utils.read_ulong(desc_ring, off) + infos = utils.read_memoryview(inf, addr, info_sz * desc_ring_count).tobytes() # read in text data ring structure off = printk_ringbuffer_type.get_type()['text_data_ring'].bitpos // 8 @@ -62,11 +64,12 @@ class LxDmesg(gdb.Command): sz = prb_data_ring_type.get_type().sizeof text_data_ring = utils.read_memoryview(inf, addr, sz).tobytes() - # read in text data size and address + # read in text data off = prb_data_ring_type.get_type()['size_bits'].bitpos // 8 text_data_sz = 1 << utils.read_u32(text_data_ring, off) off = prb_data_ring_type.get_type()['data'].bitpos // 8 - text_data_addr = utils.read_ulong(text_data_ring, off) + addr = utils.read_ulong(text_data_ring, off) + text_data = utils.read_memoryview(inf, addr, text_data_sz).tobytes() counter_off = atomic_long_type.get_type()['counter'].bitpos // 8 @@ -99,20 +102,17 @@ class LxDmesg(gdb.Command): desc_off = desc_sz * ind info_off = info_sz * ind - desc = utils.read_memoryview(inf, desc_addr + desc_off, desc_sz).tobytes() - # skip non-committed record - state = 3 & (utils.read_u64(desc, sv_off + counter_off) >> desc_flags_shift) + state = 3 & (utils.read_u64(descs, desc_off + sv_off + + counter_off) >> desc_flags_shift) if state != desc_committed and state != desc_finalized: if did == head_id: break did = (did + 1) & desc_id_mask continue - begin = utils.read_ulong(desc, begin_off) % text_data_sz - end = utils.read_ulong(desc, next_off) % text_data_sz - - info = utils.read_memoryview(inf, info_addr + info_off, info_sz).tobytes() + begin = utils.read_ulong(descs, desc_off + begin_off) % text_data_sz + end = utils.read_ulong(descs, desc_off + next_off) % text_data_sz # handle data-less record if begin & 1 == 1: @@ -125,17 +125,16 @@ class LxDmesg(gdb.Command): # skip over descriptor id text_start = begin + utils.get_long_type().sizeof - text_len = utils.read_u16(info, len_off) + text_len = utils.read_u16(infos, info_off + len_off) # handle truncated message if end - text_start < text_len: text_len = end - text_start - text_data = utils.read_memoryview(inf, text_data_addr + text_start, - text_len).tobytes() - text = text_data[0:text_len].decode(encoding='utf8', errors='replace') + text = text_data[text_start:text_start + text_len].decode( + encoding='utf8', errors='replace') - time_stamp = utils.read_u64(info, ts_off) + time_stamp = utils.read_u64(infos, info_off + ts_off) for line in text.splitlines(): msg = u"[{time:12.6f}] {line}\n".format( diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index 46f7542db0..08d264ac32 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -148,8 +148,7 @@ lx-symbols command.""" # drop all current symbols and reload vmlinux orig_vmlinux = 'vmlinux' for obj in gdb.objfiles(): - if (obj.filename.endswith('vmlinux') or - obj.filename.endswith('vmlinux.debug')): + if obj.filename.endswith('vmlinux'): orig_vmlinux = obj.filename gdb.execute("symbol-file", to_string=True) gdb.execute("symbol-file {0}".format(orig_vmlinux)) diff --git a/scripts/gen_autoksyms.sh b/scripts/gen_autoksyms.sh index 120225c541..6ed0d225c8 100644 --- a/scripts/gen_autoksyms.sh +++ b/scripts/gen_autoksyms.sh @@ -16,15 +16,20 @@ case "$KBUILD_VERBOSE" in ;; esac +# We need access to CONFIG_ symbols +. include/config/auto.conf + needed_symbols= # Special case for modversions (see modpost.c) -if grep -q "^CONFIG_MODVERSIONS=y$" include/config/auto.conf; then +if [ -n "$CONFIG_MODVERSIONS" ]; then needed_symbols="$needed_symbols module_layout" fi -ksym_wl=$(sed -n 's/^CONFIG_UNUSED_KSYMS_WHITELIST=\(.*\)$/\1/p' include/config/auto.conf) -if [ -n "$ksym_wl" ]; then +ksym_wl= +if [ -n "$CONFIG_UNUSED_KSYMS_WHITELIST" ]; then + # Use 'eval' to expand the whitelist path and check if it is relative + eval ksym_wl="$CONFIG_UNUSED_KSYMS_WHITELIST" [ "${ksym_wl}" != "${ksym_wl#/}" ] || ksym_wl="$abs_srctree/$ksym_wl" if [ ! -f "$ksym_wl" ] || [ ! -r "$ksym_wl" ]; then echo "ERROR: '$ksym_wl' whitelist file not found" >&2 diff --git a/scripts/get_abi.pl b/scripts/get_abi.pl index 6212f58b69..d7aa820942 100644 --- a/scripts/get_abi.pl +++ b/scripts/get_abi.pl @@ -1,37 +1,19 @@ #!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 -BEGIN { $Pod::Usage::Formatter = 'Pod::Text::Termcap'; } - use strict; use warnings; use utf8; -use Pod::Usage qw(pod2usage); +use Pod::Usage; use Getopt::Long; use File::Find; -use IO::Handle; use Fcntl ':mode'; -use Cwd 'abs_path'; -use Data::Dumper; my $help = 0; -my $hint = 0; my $man = 0; my $debug = 0; my $enable_lineno = 0; -my $show_warnings = 1; my $prefix="Documentation/ABI"; -my $sysfs_prefix="/sys"; -my $search_string; - -# Debug options -my $dbg_what_parsing = 1; -my $dbg_what_open = 2; -my $dbg_dump_abi_structs = 4; -my $dbg_undefined = 8; - -$Data::Dumper::Indent = 1; -$Data::Dumper::Terse = 1; # # If true, assumes that the description is formatted with ReST @@ -39,27 +21,25 @@ $Data::Dumper::Terse = 1; my $description_is_rst = 1; GetOptions( - "debug=i" => \$debug, + "debug|d+" => \$debug, "enable-lineno" => \$enable_lineno, "rst-source!" => \$description_is_rst, "dir=s" => \$prefix, 'help|?' => \$help, - "show-hints" => \$hint, - "search-string=s" => \$search_string, man => \$man ) or pod2usage(2); pod2usage(1) if $help; -pod2usage(-exitstatus => 0, -noperldoc, -verbose => 2) if $man; +pod2usage(-exitstatus => 0, -verbose => 2) if $man; pod2usage(2) if (scalar @ARGV < 1 || @ARGV > 2); my ($cmd, $arg) = @ARGV; -pod2usage(2) if ($cmd ne "search" && $cmd ne "rest" && $cmd ne "validate" && $cmd ne "undefined"); +pod2usage(2) if ($cmd ne "search" && $cmd ne "rest" && $cmd ne "validate"); pod2usage(2) if ($cmd eq "search" && !$arg); -require Data::Dumper if ($debug & $dbg_dump_abi_structs); +require Data::Dumper if ($debug); my %data; my %symbols; @@ -70,8 +50,6 @@ my %symbols; sub parse_error($$$$) { my ($file, $ln, $msg, $data) = @_; - return if (!$show_warnings); - $data =~ s/\s+$/\n/; print STDERR "Warning: file $file#$ln:\n\t$msg"; @@ -119,7 +97,7 @@ sub parse_abi { my @labels; my $label = ""; - print STDERR "Opening $file\n" if ($debug & $dbg_what_open); + print STDERR "Opening $file\n" if ($debug > 1); open IN, $file; while() { $ln++; @@ -151,12 +129,12 @@ sub parse_abi { push @{$symbols{$content}->{file}}, " $file:" . ($ln - 1); if ($tag =~ m/what/) { - $what .= "\xac" . $content; + $what .= ", " . $content; } else { if ($what) { parse_error($file, $ln, "What '$what' doesn't have a description", "") if (!$data{$what}->{description}); - foreach my $w(split /\xac/, $what) { + foreach my $w(split /, /, $what) { $symbols{$w}->{xref} = $what; }; } @@ -186,13 +164,12 @@ sub parse_abi { $data{$what}->{file} = $name; $data{$what}->{filepath} = $file; } else { - $data{$what}->{description} .= "\n\n" if (defined($data{$what}->{description})); if ($name ne $data{$what}->{file}) { $data{$what}->{file} .= " " . $name; $data{$what}->{filepath} .= " " . $file; } } - print STDERR "\twhat: $what\n" if ($debug & $dbg_what_parsing); + print STDERR "\twhat: $what\n" if ($debug > 1); $data{$what}->{line_no} = $ln; } else { $data{$what}->{line_no} = $ln if (!defined($data{$what}->{line_no})); @@ -262,7 +239,7 @@ sub parse_abi { if ($what) { parse_error($file, $ln, "What '$what' doesn't have a description", "") if (!$data{$what}->{description}); - foreach my $w(split /\xac/,$what) { + foreach my $w(split /, /,$what) { $symbols{$w}->{xref} = $what; }; } @@ -351,7 +328,7 @@ sub output_rest { printf ".. _%s:\n\n", $data{$what}->{label}; - my @names = split /\xac/,$w; + my @names = split /, /,$w; my $len = 0; foreach my $name (@names) { @@ -515,7 +492,6 @@ sub search_symbols { my $file = $data{$what}->{filepath}; - $what =~ s/\xac/, /g; my $bar = $what; $bar =~ s/./-/g; @@ -545,420 +521,22 @@ sub search_symbols { } } -# Exclude /sys/kernel/debug and /sys/kernel/tracing from the search path -sub dont_parse_special_attributes { - if (($File::Find::dir =~ m,^/sys/kernel,)) { - return grep {!/(debug|tracing)/ } @_; - } - - if (($File::Find::dir =~ m,^/sys/fs,)) { - return grep {!/(pstore|bpf|fuse)/ } @_; - } - - return @_ -} - -my %leaf; -my %aliases; -my @files; -my %root; - -sub graph_add_file { - my $file = shift; - my $type = shift; - - my $dir = $file; - $dir =~ s,^(.*/).*,$1,; - $file =~ s,.*/,,; - - my $name; - my $file_ref = \%root; - foreach my $edge(split "/", $dir) { - $name .= "$edge/"; - if (!defined ${$file_ref}{$edge}) { - ${$file_ref}{$edge} = { }; - } - $file_ref = \%{$$file_ref{$edge}}; - ${$file_ref}{"__name"} = [ $name ]; - } - $name .= "$file"; - ${$file_ref}{$file} = { - "__name" => [ $name ] - }; - - return \%{$$file_ref{$file}}; -} - -sub graph_add_link { - my $file = shift; - my $link = shift; - - # Traverse graph to find the reference - my $file_ref = \%root; - foreach my $edge(split "/", $file) { - $file_ref = \%{$$file_ref{$edge}} || die "Missing node!"; - } - - # do a BFS - - my @queue; - my %seen; - my $st; - - push @queue, $file_ref; - $seen{$start}++; - - while (@queue) { - my $v = shift @queue; - my @child = keys(%{$v}); - - foreach my $c(@child) { - next if $seen{$$v{$c}}; - next if ($c eq "__name"); - - if (!defined($$v{$c}{"__name"})) { - printf STDERR "Error: Couldn't find a non-empty name on a children of $file/.*: "; - print STDERR Dumper(%{$v}); - exit; - } - - # Add new name - my $name = @{$$v{$c}{"__name"}}[0]; - if ($name =~ s#^$file/#$link/#) { - push @{$$v{$c}{"__name"}}, $name; - } - # Add child to the queue and mark as seen - push @queue, $$v{$c}; - $seen{$c}++; - } - } -} - -my $escape_symbols = qr { ([\x01-\x08\x0e-\x1f\x21-\x29\x2b-\x2d\x3a-\x40\x7b-\xfe]) }x; -sub parse_existing_sysfs { - my $file = $File::Find::name; - - my $mode = (lstat($file))[2]; - my $abs_file = abs_path($file); - - my @tmp; - push @tmp, $file; - push @tmp, $abs_file if ($abs_file ne $file); - - foreach my $f(@tmp) { - # Ignore cgroup, as this is big and has zero docs under ABI - return if ($f =~ m#^/sys/fs/cgroup/#); - - # Ignore firmware as it is documented elsewhere - # Either ACPI or under Documentation/devicetree/bindings/ - return if ($f =~ m#^/sys/firmware/#); - - # Ignore some sysfs nodes that aren't actually part of ABI - return if ($f =~ m#/sections|notes/#); - - # Would need to check at - # Documentation/admin-guide/kernel-parameters.txt, but this - # is not easily parseable. - return if ($f =~ m#/parameters/#); - } - - if (S_ISLNK($mode)) { - $aliases{$file} = $abs_file; - return; - } - - return if (S_ISDIR($mode)); - - # Trivial: file is defined exactly the same way at ABI What: - return if (defined($data{$file})); - return if (defined($data{$abs_file})); - - push @files, graph_add_file($abs_file, "file"); -} - -sub get_leave($) -{ - my $what = shift; - my $leave; - - my $l = $what; - my $stop = 1; - - $leave = $l; - $leave =~ s,/$,,; - $leave =~ s,.*/,,; - $leave =~ s/[\(\)]//g; - - # $leave is used to improve search performance at - # check_undefined_symbols, as the algorithm there can seek - # for a small number of "what". It also allows giving a - # hint about a leave with the same name somewhere else. - # However, there are a few occurences where the leave is - # either a wildcard or a number. Just group such cases - # altogether. - if ($leave =~ m/\.\*/ || $leave eq "" || $leave =~ /\\d/) { - $leave = "others"; - } - - return $leave; -} - -my @not_found; - -sub check_file($$) -{ - my $file_ref = shift; - my $names_ref = shift; - my @names = @{$names_ref}; - my $file = $names[0]; - - my $found_string; - - my $leave = get_leave($file); - if (!defined($leaf{$leave})) { - $leave = "others"; - } - my @expr = @{$leaf{$leave}->{expr}}; - die ("\rmissing rules for $leave") if (!defined($leaf{$leave})); - - my $path = $file; - $path =~ s,(.*/).*,$1,; - - if ($search_string) { - return if (!($file =~ m#$search_string#)); - $found_string = 1; - } - - for (my $i = 0; $i < @names; $i++) { - if ($found_string && $hint) { - if (!$i) { - print STDERR "--> $names[$i]\n"; - } else { - print STDERR " $names[$i]\n"; - } - } - foreach my $re (@expr) { - print STDERR "$names[$i] =~ /^$re\$/\n" if ($debug && $dbg_undefined); - if ($names[$i] =~ $re) { - return; - } - } - } - - if ($leave ne "others") { - my @expr = @{$leaf{"others"}->{expr}}; - for (my $i = 0; $i < @names; $i++) { - foreach my $re (@expr) { - print STDERR "$names[$i] =~ /^$re\$/\n" if ($debug && $dbg_undefined); - if ($names[$i] =~ $re) { - return; - } - } - } - } - - push @not_found, $file if (!$search_string || $found_string); - - if ($hint && (!$search_string || $found_string)) { - my $what = $leaf{$leave}->{what}; - $what =~ s/\xac/\n\t/g; - if ($leave ne "others") { - print STDERR "\r more likely regexes:\n\t$what\n"; - } else { - print STDERR "\r tested regexes:\n\t$what\n"; - } - } -} - -sub check_undefined_symbols { - my $num_files = scalar @files; - my $next_i = 0; - my $start_time = times; - - @files = sort @files; - - my $last_time = $start_time; - - # When either debug or hint is enabled, there's no sense showing - # progress, as the progress will be overriden. - if ($hint || ($debug && $dbg_undefined)) { - $next_i = $num_files; - } - - my $is_console; - $is_console = 1 if (-t STDERR); - - for (my $i = 0; $i < $num_files; $i++) { - my $file_ref = $files[$i]; - my @names = @{$$file_ref{"__name"}}; - - check_file($file_ref, \@names); - - my $cur_time = times; - - if ($i == $next_i || $cur_time > $last_time + 1) { - my $percent = $i * 100 / $num_files; - - my $tm = $cur_time - $start_time; - my $time = sprintf "%d:%02d", int($tm), 60 * ($tm - int($tm)); - - printf STDERR "\33[2K\r", if ($is_console); - printf STDERR "%s: processing sysfs files... %i%%: $names[0]", $time, $percent; - printf STDERR "\n", if (!$is_console); - STDERR->flush(); - - $next_i = int (($percent + 1) * $num_files / 100); - $last_time = $cur_time; - } - } - - my $cur_time = times; - my $tm = $cur_time - $start_time; - my $time = sprintf "%d:%02d", int($tm), 60 * ($tm - int($tm)); - - printf STDERR "\33[2K\r", if ($is_console); - printf STDERR "%s: processing sysfs files... done\n", $time; - - foreach my $file (@not_found) { - print "$file not found.\n"; - } -} - -sub undefined_symbols { - print STDERR "Reading $sysfs_prefix directory contents..."; - find({ - wanted =>\&parse_existing_sysfs, - preprocess =>\&dont_parse_special_attributes, - no_chdir => 1 - }, $sysfs_prefix); - print STDERR "done.\n"; - - $leaf{"others"}->{what} = ""; - - print STDERR "Converting ABI What fields into regexes..."; - foreach my $w (sort keys %data) { - foreach my $what (split /\xac/,$w) { - next if (!($what =~ m/^$sysfs_prefix/)); - - # Convert what into regular expressions - - # Escape dot characters - $what =~ s/\./\xf6/g; - - # Temporarily change [0-9]+ type of patterns - $what =~ s/\[0\-9\]\+/\xff/g; - - # Temporarily change [\d+-\d+] type of patterns - $what =~ s/\[0\-\d+\]/\xff/g; - $what =~ s/\[(\d+)\]/\xf4$1\xf5/g; - - # Temporarily change [0-9] type of patterns - $what =~ s/\[(\d)\-(\d)\]/\xf4$1-$2\xf5/g; - - # Handle multiple option patterns - $what =~ s/[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]/($1|$2)/g; - - # Handle wildcards - $what =~ s,\*,.*,g; - $what =~ s,/\xf6..,/.*,g; - $what =~ s/\<[^\>]+\>/.*/g; - $what =~ s/\{[^\}]+\}/.*/g; - $what =~ s/\[[^\]]+\]/.*/g; - - $what =~ s/[XYZ]/.*/g; - - # Recover [0-9] type of patterns - $what =~ s/\xf4/[/g; - $what =~ s/\xf5/]/g; - - # Remove duplicated spaces - $what =~ s/\s+/ /g; - - # Special case: this ABI has a parenthesis on it - $what =~ s/sqrt\(x^2\+y^2\+z^2\)/sqrt\(x^2\+y^2\+z^2\)/; - - # Special case: drop comparition as in: - # What: foo = - # (this happens on a few IIO definitions) - $what =~ s,\s*\=.*$,,; - - # Escape all other symbols - $what =~ s/$escape_symbols/\\$1/g; - $what =~ s/\\\\/\\/g; - $what =~ s/\\([\[\]\(\)\|])/$1/g; - $what =~ s/(\d+)\\(-\d+)/$1$2/g; - - $what =~ s/\xff/\\d+/g; - - # Special case: IIO ABI which a parenthesis. - $what =~ s/sqrt(.*)/sqrt\(.*\)/; - - # Simplify regexes with multiple .* - $what =~ s#(?:\.\*){2,}##g; -# $what =~ s#\.\*/\.\*#.*#g; - - # Recover dot characters - $what =~ s/\xf6/\./g; - - my $leave = get_leave($what); - - my $added = 0; - foreach my $l (split /\|/, $leave) { - if (defined($leaf{$l})) { - next if ($leaf{$l}->{what} =~ m/\b$what\b/); - $leaf{$l}->{what} .= "\xac" . $what; - $added = 1; - } else { - $leaf{$l}->{what} = $what; - $added = 1; - } - } - if ($search_string && $added) { - print STDERR "What: $what\n" if ($what =~ m#$search_string#); - } - - } - } - # Compile regexes - foreach my $l (sort keys %leaf) { - my @expr; - foreach my $w(sort split /\xac/, $leaf{$l}->{what}) { - push @expr, qr /^$w$/; - } - $leaf{$l}->{expr} = \@expr; - } - - # Take links into account - foreach my $link (sort keys %aliases) { - my $abs_file = $aliases{$link}; - graph_add_link($abs_file, $link); - } - print STDERR "done.\n"; - - check_undefined_symbols; -} - # Ensure that the prefix will always end with a slash # While this is not needed for find, it makes the patch nicer # with --enable-lineno $prefix =~ s,/?$,/,; -if ($cmd eq "undefined" || $cmd eq "search") { - $show_warnings = 0; -} # # Parses all ABI files located at $prefix dir # find({wanted =>\&parse_abi, no_chdir => 1}, $prefix); -print STDERR Data::Dumper->Dump([\%data], [qw(*data)]) if ($debug & $dbg_dump_abi_structs); +print STDERR Data::Dumper->Dump([\%data], [qw(*data)]) if ($debug); # # Handles the command # -if ($cmd eq "undefined") { - undefined_symbols; -} elsif ($cmd eq "search") { +if ($cmd eq "search") { search_symbols; } else { if ($cmd eq "rest") { @@ -984,23 +562,18 @@ abi_book.pl - parse the Linux ABI files and produce a ReST book. =head1 SYNOPSIS -B [--debug ] [--enable-lineno] [--man] [--help] - [--(no-)rst-source] [--dir=] [--show-hints] - [--search-string ] - [] +B [--debug] [--enable-lineno] [--man] [--help] + [--(no-)rst-source] [--dir=] [] -Where B can be: +Where can be: =over 8 -B I - search for I inside ABI +B [SEARCH_REGEX] - search for [SEARCH_REGEX] inside ABI -B - output the ABI in ReST markup language +B - output the ABI in ReST markup language -B - validate the ABI contents - -B - existing symbols at the system that aren't - defined at Documentation/ABI +B - validate the ABI contents =back @@ -1016,32 +589,18 @@ the Documentation/ABI directory. =item B<--rst-source> and B<--no-rst-source> The input file may be using ReST syntax or not. Those two options allow -selecting between a rst-compliant source ABI (B<--rst-source>), or a +selecting between a rst-compliant source ABI (--rst-source), or a plain text that may be violating ReST spec, so it requres some escaping -logic (B<--no-rst-source>). +logic (--no-rst-source). =item B<--enable-lineno> Enable output of #define LINENO lines. -=item B<--debug> I +=item B<--debug> -Print debug information according with the level, which is given by the -following bitmask: - - - 1: Debug parsing What entries from ABI files; - - 2: Shows what files are opened from ABI files; - - 4: Dump the structs used to store the contents of the ABI files. - -=item B<--show-hints> - -Show hints about possible definitions for the missing ABI symbols. -Used only when B. - -=item B<--search-string> I - -Show only occurences that match a search string. -Used only when B. +Put the script in verbose mode, useful for debugging. Can be called multiple +times, to increase verbosity. =item B<--help> @@ -1087,11 +646,11 @@ $ scripts/get_abi.pl rest --dir Documentation/ABI/obsolete =head1 BUGS -Report bugs to Mauro Carvalho Chehab +Report bugs to Mauro Carvalho Chehab =head1 COPYRIGHT -Copyright (c) 2016-2021 by Mauro Carvalho Chehab . +Copyright (c) 2016-2019 by Mauro Carvalho Chehab . License GPLv2: GNU GPL version 2 . diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 6bd5221d37..2075db0c08 100644 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -1718,7 +1718,7 @@ sub vcs_exists { %VCS_cmds = %VCS_cmds_hg; return 2 if eval $VCS_cmds{"available"}; %VCS_cmds = (); - if (!$printed_novcs && $email_git) { + if (!$printed_novcs) { warn("$P: No supported VCS found. Add --nogit to options?\n"); warn("Using a git repository produces better results.\n"); warn("Try Linus Torvalds' latest git repository using:\n"); diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index b8ef0fb4bb..5a215880b2 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -69,7 +69,7 @@ localyesconfig localmodconfig: $(obj)/conf # deprecated for external use simple-targets := oldconfig allnoconfig allyesconfig allmodconfig \ alldefconfig randconfig listnewconfig olddefconfig syncconfig \ - helpnewconfig yes2modconfig mod2yesconfig mod2noconfig + helpnewconfig yes2modconfig mod2yesconfig PHONY += $(simple-targets) @@ -134,7 +134,6 @@ help: @echo ' randconfig - New config with random answer to all options' @echo ' yes2modconfig - Change answers from yes to mod if possible' @echo ' mod2yesconfig - Change answers from mod to yes if possible' - @echo ' mod2noconfig - Change answers from mod to no if possible' @echo ' listnewconfig - List new options' @echo ' helpnewconfig - List new options and help text' @echo ' olddefconfig - Same as oldconfig but sets new symbols to their' diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index 4178065ca2..5d84b44a2a 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -35,7 +35,6 @@ enum input_mode { olddefconfig, yes2modconfig, mod2yesconfig, - mod2noconfig, }; static enum input_mode input_mode = oldaskconfig; static int input_mode_opt; @@ -164,6 +163,8 @@ enum conf_def_mode { def_default, def_yes, def_mod, + def_y2m, + def_m2y, def_no, def_random }; @@ -301,10 +302,12 @@ static bool conf_set_all_new_symbols(enum conf_def_mode mode) return has_changed; } -static void conf_rewrite_tristates(tristate old_val, tristate new_val) +static void conf_rewrite_mod_or_yes(enum conf_def_mode mode) { struct symbol *sym; int i; + tristate old_val = (mode == def_y2m) ? yes : mod; + tristate new_val = (mode == def_y2m) ? mod : yes; for_all_symbols(i, sym) { if (sym_get_type(sym) == S_TRISTATE && @@ -643,8 +646,19 @@ static void check_conf(struct menu *menu) switch (input_mode) { case listnewconfig: - if (sym->name) - print_symbol_for_listconfig(sym); + if (sym->name) { + const char *str; + + if (sym->type == S_STRING) { + str = sym_get_string_value(sym); + str = sym_escape_string_value(str); + printf("%s%s=%s\n", CONFIG_, sym->name, str); + free((void *)str); + } else { + str = sym_get_string_value(sym); + printf("%s%s=%s\n", CONFIG_, sym->name, str); + } + } break; case helpnewconfig: printf("-----\n"); @@ -682,7 +696,6 @@ static const struct option long_opts[] = { {"olddefconfig", no_argument, &input_mode_opt, olddefconfig}, {"yes2modconfig", no_argument, &input_mode_opt, yes2modconfig}, {"mod2yesconfig", no_argument, &input_mode_opt, mod2yesconfig}, - {"mod2noconfig", no_argument, &input_mode_opt, mod2noconfig}, {NULL, 0, NULL, 0} }; @@ -711,7 +724,6 @@ static void conf_usage(const char *progname) printf(" --randconfig New config with random answer to all options\n"); printf(" --yes2modconfig Change answers from yes to mod if possible\n"); printf(" --mod2yesconfig Change answers from mod to yes if possible\n"); - printf(" --mod2noconfig Change answers from mod to no if possible\n"); printf(" (If none of the above is given, --oldaskconfig is the default)\n"); } @@ -787,7 +799,6 @@ int main(int ac, char **av) case olddefconfig: case yes2modconfig: case mod2yesconfig: - case mod2noconfig: conf_read(NULL); break; case allnoconfig: @@ -862,13 +873,10 @@ int main(int ac, char **av) case savedefconfig: break; case yes2modconfig: - conf_rewrite_tristates(yes, mod); + conf_rewrite_mod_or_yes(def_y2m); break; case mod2yesconfig: - conf_rewrite_tristates(mod, yes); - break; - case mod2noconfig: - conf_rewrite_tristates(mod, no); + conf_rewrite_mod_or_yes(def_m2y); break; case oldaskconfig: rootEntry = &rootmenu; diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index d3c3a61308..4a828bca07 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -130,22 +129,41 @@ static size_t depfile_prefix_len; /* touch depfile for symbol 'name' */ static int conf_touch_dep(const char *name) { - int fd; + int fd, ret; + char *d; /* check overflow: prefix + name + '\0' must fit in buffer. */ if (depfile_prefix_len + strlen(name) + 1 > sizeof(depfile_path)) return -1; - strcpy(depfile_path + depfile_prefix_len, name); + d = depfile_path + depfile_prefix_len; + strcpy(d, name); + /* Assume directory path already exists. */ fd = open(depfile_path, O_WRONLY | O_CREAT | O_TRUNC, 0644); - if (fd == -1) - return -1; + if (fd == -1) { + if (errno != ENOENT) + return -1; + + ret = make_parent_dir(depfile_path); + if (ret) + return ret; + + /* Try it again. */ + fd = open(depfile_path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd == -1) + return -1; + } close(fd); return 0; } +struct conf_printer { + void (*print_symbol)(FILE *, struct symbol *, const char *, void *); + void (*print_comment)(FILE *, const char *, void *); +}; + static void conf_warning(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); @@ -209,13 +227,6 @@ static const char *conf_get_autoconfig_name(void) return name ? name : "include/config/auto.conf"; } -static const char *conf_get_autoheader_name(void) -{ - char *name = getenv("KCONFIG_AUTOHEADER"); - - return name ? name : "include/generated/autoconf.h"; -} - static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p) { char *p2; @@ -244,21 +255,19 @@ static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p) p, sym->name); return 1; case S_STRING: - /* No escaping for S_DEF_AUTO (include/config/auto.conf) */ - if (def != S_DEF_AUTO) { - if (*p++ != '"') + if (*p++ != '"') + break; + for (p2 = p; (p2 = strpbrk(p2, "\"\\")); p2++) { + if (*p2 == '"') { + *p2 = 0; break; - for (p2 = p; (p2 = strpbrk(p2, "\"\\")); p2++) { - if (*p2 == '"') { - *p2 = 0; - break; - } - memmove(p2, p2 + 1, strlen(p2)); } - if (!p2) { + memmove(p2, p2 + 1, strlen(p2)); + } + if (!p2) { + if (def != S_DEF_AUTO) conf_warning("invalid string found"); - return 1; - } + return 1; } /* fall through */ case S_INT: @@ -585,171 +594,169 @@ int conf_read(const char *name) return 0; } -struct comment_style { - const char *decoration; - const char *prefix; - const char *postfix; -}; - -static const struct comment_style comment_style_pound = { - .decoration = "#", - .prefix = "#", - .postfix = "#", -}; - -static const struct comment_style comment_style_c = { - .decoration = " *", - .prefix = "/*", - .postfix = " */", -}; - -static void conf_write_heading(FILE *fp, const struct comment_style *cs) -{ - fprintf(fp, "%s\n", cs->prefix); - - fprintf(fp, "%s Automatically generated file; DO NOT EDIT.\n", - cs->decoration); - - fprintf(fp, "%s %s\n", cs->decoration, rootmenu.prompt->text); - - fprintf(fp, "%s\n", cs->postfix); -} - -/* The returned pointer must be freed on the caller side */ -static char *escape_string_value(const char *in) -{ - const char *p; - char *out; - size_t len; - - len = strlen(in) + strlen("\"\"") + 1; - - p = in; - while (1) { - p += strcspn(p, "\"\\"); - - if (p[0] == '\0') - break; - - len++; - p++; - } - - out = xmalloc(len); - out[0] = '\0'; - - strcat(out, "\""); - - p = in; - while (1) { - len = strcspn(p, "\"\\"); - strncat(out, p, len); - p += len; - - if (p[0] == '\0') - break; - - strcat(out, "\\"); - strncat(out, p++, 1); - } - - strcat(out, "\""); - - return out; -} - /* * Kconfig configuration printer * * This printer is used when generating the resulting configuration after * kconfig invocation and `defconfig' files. Unset symbol might be omitted by * passing a non-NULL argument to the printer. + * */ -enum output_n { OUTPUT_N, OUTPUT_N_AS_UNSET, OUTPUT_N_NONE }; - -static void __print_symbol(FILE *fp, struct symbol *sym, enum output_n output_n, - bool escape_string) +static void +kconfig_print_symbol(FILE *fp, struct symbol *sym, const char *value, void *arg) { - const char *val; - char *escaped = NULL; - - if (sym->type == S_UNKNOWN) - return; - - val = sym_get_string_value(sym); - - if ((sym->type == S_BOOLEAN || sym->type == S_TRISTATE) && - output_n != OUTPUT_N && *val == 'n') { - if (output_n == OUTPUT_N_AS_UNSET) - fprintf(fp, "# %s%s is not set\n", CONFIG_, sym->name); - return; - } - - if (sym->type == S_STRING && escape_string) { - escaped = escape_string_value(val); - val = escaped; - } - - fprintf(fp, "%s%s=%s\n", CONFIG_, sym->name, val); - - free(escaped); -} - -static void print_symbol_for_dotconfig(FILE *fp, struct symbol *sym) -{ - __print_symbol(fp, sym, OUTPUT_N_AS_UNSET, true); -} - -static void print_symbol_for_autoconf(FILE *fp, struct symbol *sym) -{ - __print_symbol(fp, sym, OUTPUT_N_NONE, false); -} - -void print_symbol_for_listconfig(struct symbol *sym) -{ - __print_symbol(stdout, sym, OUTPUT_N, true); -} - -static void print_symbol_for_c(FILE *fp, struct symbol *sym) -{ - const char *val; - const char *sym_suffix = ""; - const char *val_prefix = ""; - char *escaped = NULL; - - if (sym->type == S_UNKNOWN) - return; - - val = sym_get_string_value(sym); switch (sym->type) { case S_BOOLEAN: case S_TRISTATE: - switch (*val) { - case 'n': + if (*value == 'n') { + bool skip_unset = (arg != NULL); + + if (!skip_unset) + fprintf(fp, "# %s%s is not set\n", + CONFIG_, sym->name); return; - case 'm': - sym_suffix = "_MODULE"; - /* fall through */ - default: - val = "1"; } break; - case S_HEX: - if (val[0] != '0' || (val[1] != 'x' && val[1] != 'X')) - val_prefix = "0x"; - break; - case S_STRING: - escaped = escape_string_value(val); - val = escaped; default: break; } - fprintf(fp, "#define %s%s%s %s%s\n", CONFIG_, sym->name, sym_suffix, - val_prefix, val); + fprintf(fp, "%s%s=%s\n", CONFIG_, sym->name, value); +} - free(escaped); +static void +kconfig_print_comment(FILE *fp, const char *value, void *arg) +{ + const char *p = value; + size_t l; + + for (;;) { + l = strcspn(p, "\n"); + fprintf(fp, "#"); + if (l) { + fprintf(fp, " "); + xfwrite(p, l, 1, fp); + p += l; + } + fprintf(fp, "\n"); + if (*p++ == '\0') + break; + } +} + +static struct conf_printer kconfig_printer_cb = +{ + .print_symbol = kconfig_print_symbol, + .print_comment = kconfig_print_comment, +}; + +/* + * Header printer + * + * This printer is used when generating the `include/generated/autoconf.h' file. + */ +static void +header_print_symbol(FILE *fp, struct symbol *sym, const char *value, void *arg) +{ + + switch (sym->type) { + case S_BOOLEAN: + case S_TRISTATE: { + const char *suffix = ""; + + switch (*value) { + case 'n': + break; + case 'm': + suffix = "_MODULE"; + /* fall through */ + default: + fprintf(fp, "#define %s%s%s 1\n", + CONFIG_, sym->name, suffix); + } + break; + } + case S_HEX: { + const char *prefix = ""; + + if (value[0] != '0' || (value[1] != 'x' && value[1] != 'X')) + prefix = "0x"; + fprintf(fp, "#define %s%s %s%s\n", + CONFIG_, sym->name, prefix, value); + break; + } + case S_STRING: + case S_INT: + fprintf(fp, "#define %s%s %s\n", + CONFIG_, sym->name, value); + break; + default: + break; + } + +} + +static void +header_print_comment(FILE *fp, const char *value, void *arg) +{ + const char *p = value; + size_t l; + + fprintf(fp, "/*\n"); + for (;;) { + l = strcspn(p, "\n"); + fprintf(fp, " *"); + if (l) { + fprintf(fp, " "); + xfwrite(p, l, 1, fp); + p += l; + } + fprintf(fp, "\n"); + if (*p++ == '\0') + break; + } + fprintf(fp, " */\n"); +} + +static struct conf_printer header_printer_cb = +{ + .print_symbol = header_print_symbol, + .print_comment = header_print_comment, +}; + +static void conf_write_symbol(FILE *fp, struct symbol *sym, + struct conf_printer *printer, void *printer_arg) +{ + const char *str; + + switch (sym->type) { + case S_UNKNOWN: + break; + case S_STRING: + str = sym_get_string_value(sym); + str = sym_escape_string_value(str); + printer->print_symbol(fp, sym, str, printer_arg); + free((void *)str); + break; + default: + str = sym_get_string_value(sym); + printer->print_symbol(fp, sym, str, printer_arg); + } +} + +static void +conf_write_heading(FILE *fp, struct conf_printer *printer, void *printer_arg) +{ + char buf[256]; + + snprintf(buf, sizeof(buf), + "\n" + "Automatically generated file; DO NOT EDIT.\n" + "%s\n", + rootmenu.prompt->text); + + printer->print_comment(fp, buf, printer_arg); } /* @@ -808,7 +815,7 @@ int conf_write_defconfig(const char *filename) goto next_menu; } } - print_symbol_for_dotconfig(out, sym); + conf_write_symbol(out, sym, &kconfig_printer_cb, NULL); } next_menu: if (menu->list != NULL) { @@ -868,7 +875,7 @@ int conf_write(const char *name) if (!out) return 1; - conf_write_heading(out, &comment_style_pound); + conf_write_heading(out, &kconfig_printer_cb, NULL); if (!conf_get_changed()) sym_clear_all_valid(); @@ -895,7 +902,7 @@ int conf_write(const char *name) need_newline = false; } sym->flags |= SYMBOL_WRITTEN; - print_symbol_for_dotconfig(out, sym); + conf_write_symbol(out, sym, &kconfig_printer_cb, NULL); } next: @@ -945,50 +952,32 @@ int conf_write(const char *name) } /* write a dependency file as used by kbuild to track dependencies */ -static int conf_write_autoconf_cmd(const char *autoconf_name) +static int conf_write_dep(const char *name) { - char name[PATH_MAX], tmp[PATH_MAX]; struct file *file; FILE *out; - int ret; - - ret = snprintf(name, sizeof(name), "%s.cmd", autoconf_name); - if (ret >= sizeof(name)) /* check truncation */ - return -1; - - if (make_parent_dir(name)) - return -1; - - ret = snprintf(tmp, sizeof(tmp), "%s.cmd.tmp", autoconf_name); - if (ret >= sizeof(tmp)) /* check truncation */ - return -1; - - out = fopen(tmp, "w"); - if (!out) { - perror("fopen"); - return -1; - } + out = fopen("..config.tmp", "w"); + if (!out) + return 1; fprintf(out, "deps_config := \\\n"); - for (file = file_list; file; file = file->next) - fprintf(out, "\t%s \\\n", file->name); + for (file = file_list; file; file = file->next) { + if (file->next) + fprintf(out, "\t%s \\\n", file->name); + else + fprintf(out, "\t%s\n", file->name); + } + fprintf(out, "\n%s: \\\n" + "\t$(deps_config)\n\n", conf_get_autoconfig_name()); - fprintf(out, "\n%s: $(deps_config)\n\n", autoconf_name); - - env_write_dep(out, autoconf_name); + env_write_dep(out, conf_get_autoconfig_name()); fprintf(out, "\n$(deps_config): ;\n"); - - ret = ferror(out); /* error check for all fprintf() calls */ fclose(out); - if (ret) - return -1; - - if (rename(tmp, name)) { - perror("rename"); - return -1; - } + if (make_parent_dir(name)) + return 1; + rename("..config.tmp", name); return 0; } @@ -1069,83 +1058,63 @@ static int conf_touch_deps(void) return 0; } -static int __conf_write_autoconf(const char *filename, - void (*print_symbol)(FILE *, struct symbol *), - const struct comment_style *comment_style) -{ - char tmp[PATH_MAX]; - FILE *file; - struct symbol *sym; - int ret, i; - - if (make_parent_dir(filename)) - return -1; - - ret = snprintf(tmp, sizeof(tmp), "%s.tmp", filename); - if (ret >= sizeof(tmp)) /* check truncation */ - return -1; - - file = fopen(tmp, "w"); - if (!file) { - perror("fopen"); - return -1; - } - - conf_write_heading(file, comment_style); - - for_all_symbols(i, sym) - if ((sym->flags & SYMBOL_WRITE) && sym->name) - print_symbol(file, sym); - - /* check possible errors in conf_write_heading() and print_symbol() */ - ret = ferror(file); - fclose(file); - if (ret) - return -1; - - if (rename(tmp, filename)) { - perror("rename"); - return -1; - } - - return 0; -} - int conf_write_autoconf(int overwrite) { struct symbol *sym; + const char *name; const char *autoconf_name = conf_get_autoconfig_name(); - int ret, i; + FILE *out, *out_h; + int i; if (!overwrite && is_present(autoconf_name)) return 0; - ret = conf_write_autoconf_cmd(autoconf_name); - if (ret) - return -1; + conf_write_dep("include/config/auto.conf.cmd"); if (conf_touch_deps()) return 1; - for_all_symbols(i, sym) + out = fopen(".tmpconfig", "w"); + if (!out) + return 1; + + out_h = fopen(".tmpconfig.h", "w"); + if (!out_h) { + fclose(out); + return 1; + } + + conf_write_heading(out, &kconfig_printer_cb, NULL); + conf_write_heading(out_h, &header_printer_cb, NULL); + + for_all_symbols(i, sym) { sym_calc_value(sym); + if (!(sym->flags & SYMBOL_WRITE) || !sym->name) + continue; - ret = __conf_write_autoconf(conf_get_autoheader_name(), - print_symbol_for_c, - &comment_style_c); - if (ret) - return ret; + /* write symbols to auto.conf and autoconf.h */ + conf_write_symbol(out, sym, &kconfig_printer_cb, (void *)1); + conf_write_symbol(out_h, sym, &header_printer_cb, NULL); + } + fclose(out); + fclose(out_h); + name = getenv("KCONFIG_AUTOHEADER"); + if (!name) + name = "include/generated/autoconf.h"; + if (make_parent_dir(name)) + return 1; + if (rename(".tmpconfig.h", name)) + return 1; + + if (make_parent_dir(autoconf_name)) + return 1; /* - * Create include/config/auto.conf. This must be the last step because - * Kbuild has a dependency on auto.conf and this marks the successful - * completion of the previous steps. + * This must be the last step, kbuild has a dependency on auto.conf + * and this marks the successful completion of the previous steps. */ - ret = __conf_write_autoconf(conf_get_autoconfig_name(), - print_symbol_for_autoconf, - &comment_style_pound); - if (ret) - return ret; + if (rename(".tmpconfig", autoconf_name)) + return 1; return 0; } diff --git a/scripts/kconfig/lexer.l b/scripts/kconfig/lexer.l index cc386e4436..312cbad2d3 100644 --- a/scripts/kconfig/lexer.l +++ b/scripts/kconfig/lexer.l @@ -84,7 +84,8 @@ static void warn_ignored_character(char chr) n [A-Za-z0-9_-] %% - char open_quote = 0; + int str = 0; + int ts, i; #.* /* ignore comment */ [ \t]* /* whitespaces */ @@ -133,7 +134,7 @@ n [A-Za-z0-9_-] ":=" return T_COLON_EQUAL; "+=" return T_PLUS_EQUAL; \"|\' { - open_quote = yytext[0]; + str = yytext[0]; new_string(); BEGIN(STRING); } @@ -170,7 +171,7 @@ n [A-Za-z0-9_-] append_string(yytext + 1, yyleng - 1); } \'|\" { - if (open_quote == yytext[0]) { + if (str == yytext[0]) { BEGIN(INITIAL); yylval.string = text; return T_WORD_QUOTE; @@ -195,8 +196,6 @@ n [A-Za-z0-9_-] { [ \t]+ { - int ts, i; - ts = 0; for (i = 0; i < yyleng; i++) { if (yytext[i] == '\t') diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 3d6f7cba88..606ba8a63c 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -728,7 +728,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, get_dep_str(r, prop->visible.expr, " Visible if: "); menu = prop->menu->parent; - for (i = 0; menu && i < 8; menu = menu->parent) { + for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent) { bool accessible = menu_is_visible(menu); submenu[i++] = menu; @@ -758,24 +758,21 @@ static void get_prompt_str(struct gstr *r, struct property *prop, list_add_tail(&jump->entries, head); } - str_printf(r, " Location:\n"); - for (j = 4; --i >= 0; j += 2) { - menu = submenu[i]; - if (jump && menu == location) - jump->offset = strlen(r->s); - - if (menu == &rootmenu) - /* The real rootmenu prompt is ugly */ - str_printf(r, "%*cMain menu", j, ' '); - else - str_printf(r, "%*c-> %s", j, ' ', menu_get_prompt(menu)); - - if (menu->sym) { - str_printf(r, " (%s [=%s])", menu->sym->name ? - menu->sym->name : "", - sym_get_string_value(menu->sym)); + if (i > 0) { + str_printf(r, " Location:\n"); + for (j = 4; --i >= 0; j += 2) { + menu = submenu[i]; + if (jump && menu == location) + jump->offset = strlen(r->s); + str_printf(r, "%*c-> %s", j, ' ', + menu_get_prompt(menu)); + if (menu->sym) { + str_printf(r, " (%s [=%s])", menu->sym->name ? + menu->sym->name : "", + sym_get_string_value(menu->sym)); + } + str_append(r, "\n"); } - str_append(r, "\n"); } } diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl index 3387ad7508..1a5fea0519 100644 --- a/scripts/kconfig/streamline_config.pl +++ b/scripts/kconfig/streamline_config.pl @@ -170,7 +170,7 @@ sub read_kconfig { $source =~ s/\$\($env\)/$ENV{$env}/; } - open(my $kinfile, '<', $source) || die "Can't open $source"; + open(my $kinfile, '<', $source) || die "Can't open $kconfig"; while (<$kinfile>) { chomp; diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 0572330bf8..5844d636d3 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -871,6 +871,49 @@ struct symbol *sym_find(const char *name) return symbol; } +const char *sym_escape_string_value(const char *in) +{ + const char *p; + size_t reslen; + char *res; + size_t l; + + reslen = strlen(in) + strlen("\"\"") + 1; + + p = in; + for (;;) { + l = strcspn(p, "\"\\"); + p += l; + + if (p[0] == '\0') + break; + + reslen++; + p++; + } + + res = xmalloc(reslen); + res[0] = '\0'; + + strcat(res, "\""); + + p = in; + for (;;) { + l = strcspn(p, "\"\\"); + strncat(res, p, l); + p += l; + + if (p[0] == '\0') + break; + + strcat(res, "\\"); + strncat(res, p++, 1); + } + + strcat(res, "\""); + return res; +} + struct sym_match { struct symbol *sym; off_t so, eo; diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 3106b7536b..cfcb607379 100644 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1245,18 +1245,10 @@ sub dump_struct($$) { $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; $members =~ s/\s*____cacheline_aligned/ /gos; - # unwrap struct_group(): - # - first eat non-declaration parameters and rewrite for final match - # - then remove macro, outer parens, and trailing semicolon - $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos; - $members =~ s/\bstruct_group_(attr|tagged)\s*\(([^,]*,){2}/STRUCT_GROUP(/gos; - $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos; - $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos; my $args = qr{([^,)]+)}; # replace DECLARE_BITMAP $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, PHY_INTERFACE_MODE_MAX)/gos; $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; # replace DECLARE_HASHTABLE $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; @@ -1264,8 +1256,6 @@ sub dump_struct($$) { $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; # replace DECLARE_KFIFO_PTR $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; - # replace DECLARE_FLEX_ARRAY - $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos; my $declaration = $members; # Split nested struct/union elements as newer ones @@ -1799,7 +1789,6 @@ sub dump_function($$) { $prototype =~ s/__weak +//; $prototype =~ s/__sched +//; $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//; - $prototype =~ s/__alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//; my $define = $prototype =~ s/^#\s*define\s+//; #ak added $prototype =~ s/__attribute_const__ +//; $prototype =~ s/__attribute__\s*\(\( diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 666f7bbc13..d74cee5c43 100644 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -34,10 +34,6 @@ LD="$1" KBUILD_LDFLAGS="$2" LDFLAGS_vmlinux="$3" -is_enabled() { - grep -q "^$1=y" include/config/auto.conf -} - # Nice output in kbuild format # Will be supressed by "make -s" info() @@ -84,11 +80,11 @@ modpost_link() ${KBUILD_VMLINUX_LIBS} \ --end-group" - if is_enabled CONFIG_LTO_CLANG; then + if [ -n "${CONFIG_LTO_CLANG}" ]; then gen_initcalls lds="-T .tmp_initcalls.lds" - if is_enabled CONFIG_MODVERSIONS; then + if [ -n "${CONFIG_MODVERSIONS}" ]; then gen_symversions lds="${lds} -T .tmp_symversions.lds" fi @@ -108,21 +104,21 @@ objtool_link() local objtoolcmd; local objtoolopt; - if is_enabled CONFIG_LTO_CLANG && is_enabled CONFIG_STACK_VALIDATION; then + if [ "${CONFIG_LTO_CLANG} ${CONFIG_STACK_VALIDATION}" = "y y" ]; then # Don't perform vmlinux validation unless explicitly requested, # but run objtool on vmlinux.o now that we have an object file. - if is_enabled CONFIG_UNWINDER_ORC; then + if [ -n "${CONFIG_UNWINDER_ORC}" ]; then objtoolcmd="orc generate" fi objtoolopt="${objtoolopt} --duplicate" - if is_enabled CONFIG_FTRACE_MCOUNT_USE_OBJTOOL; then + if [ -n "${CONFIG_FTRACE_MCOUNT_USE_OBJTOOL}" ]; then objtoolopt="${objtoolopt} --mcount" fi fi - if is_enabled CONFIG_VMLINUX_VALIDATION; then + if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then objtoolopt="${objtoolopt} --noinstr" fi @@ -131,21 +127,18 @@ objtool_link() objtoolcmd="check" fi objtoolopt="${objtoolopt} --vmlinux" - if ! is_enabled CONFIG_FRAME_POINTER; then + if [ -z "${CONFIG_FRAME_POINTER}" ]; then objtoolopt="${objtoolopt} --no-fp" fi - if is_enabled CONFIG_GCOV_KERNEL || is_enabled CONFIG_LTO_CLANG; then + if [ -n "${CONFIG_GCOV_KERNEL}" ] || [ -n "${CONFIG_LTO_CLANG}" ]; then objtoolopt="${objtoolopt} --no-unreachable" fi - if is_enabled CONFIG_RETPOLINE; then + if [ -n "${CONFIG_RETPOLINE}" ]; then objtoolopt="${objtoolopt} --retpoline" fi - if is_enabled CONFIG_X86_SMAP; then + if [ -n "${CONFIG_X86_SMAP}" ]; then objtoolopt="${objtoolopt} --uaccess" fi - if is_enabled CONFIG_SLS; then - objtoolopt="${objtoolopt} --sls" - fi info OBJTOOL ${1} tools/objtool/objtool ${objtoolcmd} ${objtoolopt} ${1} fi @@ -168,7 +161,7 @@ vmlinux_link() # skip output file argument shift - if is_enabled CONFIG_LTO_CLANG; then + if [ -n "${CONFIG_LTO_CLANG}" ]; then # Use vmlinux.o instead of performing the slow LTO link again. objs=vmlinux.o libs= @@ -196,7 +189,7 @@ vmlinux_link() ldflags="${ldflags} ${wl}--strip-debug" fi - if is_enabled CONFIG_VMLINUX_MAP; then + if [ -n "${CONFIG_VMLINUX_MAP}" ]; then ldflags="${ldflags} ${wl}-Map=${output}.map" fi @@ -212,6 +205,7 @@ vmlinux_link() gen_btf() { local pahole_ver + local extra_paholeopt= if ! [ -x "$(command -v ${PAHOLE})" ]; then echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" @@ -226,8 +220,16 @@ gen_btf() vmlinux_link ${1} + if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then + # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars + extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars" + fi + if [ "${pahole_ver}" -ge "121" ]; then + extra_paholeopt="${extra_paholeopt} --btf_gen_floats" + fi + info "BTF" ${2} - LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${PAHOLE_FLAGS} ${1} + LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${extra_paholeopt} ${1} # Create ${2} which contains just .BTF section but no symbols. Add # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all @@ -246,15 +248,15 @@ kallsyms() { local kallsymopt; - if is_enabled CONFIG_KALLSYMS_ALL; then + if [ -n "${CONFIG_KALLSYMS_ALL}" ]; then kallsymopt="${kallsymopt} --all-symbols" fi - if is_enabled CONFIG_KALLSYMS_ABSOLUTE_PERCPU; then + if [ -n "${CONFIG_KALLSYMS_ABSOLUTE_PERCPU}" ]; then kallsymopt="${kallsymopt} --absolute-percpu" fi - if is_enabled CONFIG_KALLSYMS_BASE_RELATIVE; then + if [ -n "${CONFIG_KALLSYMS_BASE_RELATIVE}" ]; then kallsymopt="${kallsymopt} --base-relative" fi @@ -319,6 +321,9 @@ if [ "$1" = "clean" ]; then exit 0 fi +# We need access to CONFIG_ symbols +. include/config/auto.conf + # Update version info GEN .version if [ -r .version ]; then @@ -347,7 +352,7 @@ tr '\0' '\n' < modules.builtin.modinfo | sed -n 's/^[[:alnum:]:_]*\.file=//p' | tr ' ' '\n' | uniq | sed -e 's:^:kernel/:' -e 's/$/.ko/' > modules.builtin btf_vmlinux_bin_o="" -if is_enabled CONFIG_DEBUG_INFO_BTF; then +if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then btf_vmlinux_bin_o=.btf.vmlinux.bin.o if ! gen_btf .tmp_vmlinux.btf $btf_vmlinux_bin_o ; then echo >&2 "Failed to generate BTF for vmlinux" @@ -359,19 +364,19 @@ fi kallsymso="" kallsymso_prev="" kallsyms_vmlinux="" -if is_enabled CONFIG_KALLSYMS; then +if [ -n "${CONFIG_KALLSYMS}" ]; then # kallsyms support # Generate section listing all symbols and add it into vmlinux # It's a three step process: - # 1) Link .tmp_vmlinux.kallsyms1 so it has all symbols and sections, + # 1) Link .tmp_vmlinux1 so it has all symbols and sections, # but __kallsyms is empty. # Running kallsyms on that gives us .tmp_kallsyms1.o with # the right size - # 2) Link .tmp_vmlinux.kallsyms2 so it now has a __kallsyms section of + # 2) Link .tmp_vmlinux2 so it now has a __kallsyms section of # the right size, but due to the added section, some # addresses have shifted. - # From here, we generate a correct .tmp_vmlinux.kallsyms2.o + # From here, we generate a correct .tmp_kallsyms2.o # 3) That link may have expanded the kernel image enough that # more linker branch stubs / trampolines had to be added, which # introduces new names, which further expands kallsyms. Do another @@ -399,15 +404,12 @@ fi vmlinux_link vmlinux "${kallsymso}" ${btf_vmlinux_bin_o} # fill in BTF IDs -if is_enabled CONFIG_DEBUG_INFO_BTF && is_enabled CONFIG_BPF; then +if [ -n "${CONFIG_DEBUG_INFO_BTF}" -a -n "${CONFIG_BPF}" ]; then info BTFIDS vmlinux ${RESOLVE_BTFIDS} vmlinux fi -info SYSMAP System.map -mksysmap vmlinux System.map - -if is_enabled CONFIG_BUILDTIME_TABLE_SORT; then +if [ -n "${CONFIG_BUILDTIME_TABLE_SORT}" ]; then info SORTTAB vmlinux if ! sorttable vmlinux; then echo >&2 Failed to sort kernel tables @@ -415,8 +417,11 @@ if is_enabled CONFIG_BUILDTIME_TABLE_SORT; then fi fi +info SYSMAP System.map +mksysmap vmlinux System.map + # step a (see comment above) -if is_enabled CONFIG_KALLSYMS; then +if [ -n "${CONFIG_KALLSYMS}" ]; then mksysmap ${kallsyms_vmlinux} .tmp_System.map if ! cmp -s System.map .tmp_System.map; then diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index c0d3bcb991..cc3625617a 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -259,8 +259,5 @@ int main(void) DEVID_FIELD(dfl_device_id, type); DEVID_FIELD(dfl_device_id, feature_id); - DEVID(ishtp_device_id); - DEVID_FIELD(ishtp_device_id, guid); - return 0; } diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 5258247d78..49aba86207 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -115,17 +115,6 @@ static inline void add_uuid(char *str, uuid_le uuid) uuid.b[12], uuid.b[13], uuid.b[14], uuid.b[15]); } -static inline void add_guid(char *str, guid_t guid) -{ - int len = strlen(str); - - sprintf(str + len, "%02X%02X%02X%02X-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X", - guid.b[3], guid.b[2], guid.b[1], guid.b[0], - guid.b[5], guid.b[4], guid.b[7], guid.b[6], - guid.b[8], guid.b[9], guid.b[10], guid.b[11], - guid.b[12], guid.b[13], guid.b[14], guid.b[15]); -} - /** * Check that sizeof(device_id type) are consistent with size of section * in .o file. If in-consistent then userspace and kernel does not agree @@ -1391,18 +1380,6 @@ static int do_mhi_entry(const char *filename, void *symval, char *alias) return 1; } -/* Looks like: ishtp:{guid} */ -static int do_ishtp_entry(const char *filename, void *symval, char *alias) -{ - DEF_FIELD(symval, ishtp_device_id, guid); - - strcpy(alias, ISHTP_MODULE_PREFIX "{"); - add_guid(alias, guid); - strcat(alias, "}"); - - return 1; -} - static int do_auxiliary_entry(const char *filename, void *symval, char *alias) { DEF_FIELD_ADDR(symval, auxiliary_device_id, name); @@ -1522,7 +1499,6 @@ static const struct devtable devtable[] = { {"auxiliary", SIZE_auxiliary_device_id, do_auxiliary_entry}, {"ssam", SIZE_ssam_device_id, do_ssam_entry}, {"dfl", SIZE_dfl_device_id, do_dfl_entry}, - {"ishtp", SIZE_ishtp_device_id, do_ishtp_entry}, }; /* Create MODULE_ALIAS() statements. diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6bfa332179..cb8ab7d91d 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1830,14 +1830,6 @@ static int addend_mips_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) return 0; } -#ifndef EM_RISCV -#define EM_RISCV 243 -#endif - -#ifndef R_RISCV_SUB32 -#define R_RISCV_SUB32 39 -#endif - static void section_rela(const char *modname, struct elf_info *elf, Elf_Shdr *sechdr) { @@ -1874,13 +1866,6 @@ static void section_rela(const char *modname, struct elf_info *elf, r_sym = ELF_R_SYM(r.r_info); #endif r.r_addend = TO_NATIVE(rela->r_addend); - switch (elf->hdr->e_machine) { - case EM_RISCV: - if (!strcmp("__ex_table", fromsec) && - ELF_R_TYPE(r.r_info) == R_RISCV_SUB32) - continue; - break; - } sym = elf->symtab_start + r_sym; /* Skip special sections */ if (is_shndx_special(sym->st_shndx)) diff --git a/scripts/package/buildtar b/scripts/package/buildtar index cb54c7f1aa..221aa7df00 100644 --- a/scripts/package/buildtar +++ b/scripts/package/buildtar @@ -39,10 +39,6 @@ case "${1}" in opts="-I ${XZ}" tarball=${tarball}.xz ;; - tarzst-pkg) - opts="-I ${ZSTD}" - tarball=${tarball}.zst - ;; *) echo "Unknown tarball target \"${1}\" requested, please add it to ${0}." >&2 exit 1 diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files index 7adab46180..c3eb81c3f7 100644 --- a/scripts/remove-stale-files +++ b/scripts/remove-stale-files @@ -24,20 +24,8 @@ set -e # with O=, make sure to remove the stale files in the output tree. Otherwise, # the build system wrongly compiles the stale ones. if [ -n "${building_out_of_srctree}" ]; then - for f in fdt_rw.c fdt_ro.c fdt_wip.c fdt.c ashldi3.S bswapsdi2.S font.c lib1funcs.S hyp-stub.S + for f in fdt_rw.c fdt_ro.c fdt_wip.c fdt.c do rm -f arch/arm/boot/compressed/${f} done - - for f in uart-ath79.c ashldi3.c bswapdi.c bswapsi.c - do - rm -f arch/mips/boot/compressed/${f} - done - - for f in firmware.c real2.S - do - rm -f arch/parisc/boot/compressed/${f} - done fi - -rm -f scripts/extract-cert diff --git a/scripts/setlocalversion b/scripts/setlocalversion index af4754a35e..6b54e46a0f 100644 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -111,7 +111,9 @@ if $scm_only; then exit fi -if ! test -e include/config/auto.conf; then +if test -e include/config/auto.conf; then + . include/config/auto.conf +else echo "Error: kernelrelease not valid - run 'make prepare' to update it" >&2 exit 1 fi @@ -123,11 +125,10 @@ if test ! "$srctree" -ef .; then fi # CONFIG_LOCALVERSION and LOCALVERSION (if set) -config_localversion=$(sed -n 's/^CONFIG_LOCALVERSION=\(.*\)$/\1/p' include/config/auto.conf) -res="${res}${config_localversion}${LOCALVERSION}" +res="${res}${CONFIG_LOCALVERSION}${LOCALVERSION}" # scm version string if not at a tagged commit -if grep -q "^CONFIG_LOCALVERSION_AUTO=y$" include/config/auto.conf; then +if test "$CONFIG_LOCALVERSION_AUTO" = "y"; then # full scm version string res="$res$(scm_version)" elif [ "${LOCALVERSION+set}" != "set" ]; then diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 3a8ea5ed55..6ee4fa8829 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -30,8 +30,6 @@ #include #include #include -#include -#include #include #include @@ -233,7 +231,7 @@ static void sort_relative_table(char *extab_image, int image_size) } } -static void sort_relative_table_with_data(char *extab_image, int image_size) +static void x86_sort_relative_table(char *extab_image, int image_size) { int i = 0; @@ -242,7 +240,7 @@ static void sort_relative_table_with_data(char *extab_image, int image_size) w(r(loc) + i, loc); w(r(loc + 1) + i + 4, loc + 1); - /* Don't touch the fixup type or data */ + w(r(loc + 2) + i + 8, loc + 2); i += sizeof(uint32_t) * 3; } @@ -255,7 +253,7 @@ static void sort_relative_table_with_data(char *extab_image, int image_size) w(r(loc) - i, loc); w(r(loc + 1) - (i + 4), loc + 1); - /* Don't touch the fixup type or data */ + w(r(loc + 2) - (i + 8), loc + 2); i += sizeof(uint32_t) * 3; } @@ -338,14 +336,13 @@ static int do_file(char const *const fname, void *addr) switch (r2(&ehdr->e_machine)) { case EM_386: - case EM_AARCH64: - case EM_RISCV: case EM_X86_64: - custom_sort = sort_relative_table_with_data; + custom_sort = x86_sort_relative_table; break; case EM_S390: custom_sort = s390_sort_relative_table; break; + case EM_AARCH64: case EM_PARISC: case EM_PPC: case EM_PPC64: @@ -356,6 +353,7 @@ static int do_file(char const *const fname, void *addr) case EM_ARM: case EM_MICROBLAZE: case EM_MIPS: + case EM_RISCV: case EM_XTENSA: break; default: diff --git a/scripts/sorttable.h b/scripts/sorttable.h index deb7c1d3e9..a2baa2fefb 100644 --- a/scripts/sorttable.h +++ b/scripts/sorttable.h @@ -19,9 +19,6 @@ #undef extable_ent_size #undef compare_extable -#undef get_mcount_loc -#undef sort_mcount_loc -#undef elf_mcount_loc #undef do_sort #undef Elf_Addr #undef Elf_Ehdr @@ -44,9 +41,6 @@ #ifdef SORTTABLE_64 # define extable_ent_size 16 # define compare_extable compare_extable_64 -# define get_mcount_loc get_mcount_loc_64 -# define sort_mcount_loc sort_mcount_loc_64 -# define elf_mcount_loc elf_mcount_loc_64 # define do_sort do_sort_64 # define Elf_Addr Elf64_Addr # define Elf_Ehdr Elf64_Ehdr @@ -68,9 +62,6 @@ #else # define extable_ent_size 8 # define compare_extable compare_extable_32 -# define get_mcount_loc get_mcount_loc_32 -# define sort_mcount_loc sort_mcount_loc_32 -# define elf_mcount_loc elf_mcount_loc_32 # define do_sort do_sort_32 # define Elf_Addr Elf32_Addr # define Elf_Ehdr Elf32_Ehdr @@ -93,6 +84,8 @@ #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED) /* ORC unwinder only support X86_64 */ +#include +#include #include #define ERRSTR_MAXSZ 256 @@ -198,66 +191,7 @@ static int compare_extable(const void *a, const void *b) return 1; return 0; } -#ifdef MCOUNT_SORT_ENABLED -pthread_t mcount_sort_thread; -struct elf_mcount_loc { - Elf_Ehdr *ehdr; - Elf_Shdr *init_data_sec; - uint_t start_mcount_loc; - uint_t stop_mcount_loc; -}; - -/* Sort the addresses stored between __start_mcount_loc to __stop_mcount_loc in vmlinux */ -static void *sort_mcount_loc(void *arg) -{ - struct elf_mcount_loc *emloc = (struct elf_mcount_loc *)arg; - uint_t offset = emloc->start_mcount_loc - _r(&(emloc->init_data_sec)->sh_addr) - + _r(&(emloc->init_data_sec)->sh_offset); - uint_t count = emloc->stop_mcount_loc - emloc->start_mcount_loc; - unsigned char *start_loc = (void *)emloc->ehdr + offset; - - qsort(start_loc, count/sizeof(uint_t), sizeof(uint_t), compare_extable); - return NULL; -} - -/* Get the address of __start_mcount_loc and __stop_mcount_loc in System.map */ -static void get_mcount_loc(uint_t *_start, uint_t *_stop) -{ - FILE *file_start, *file_stop; - char start_buff[20]; - char stop_buff[20]; - int len = 0; - - file_start = popen(" grep start_mcount System.map | awk '{print $1}' ", "r"); - if (!file_start) { - fprintf(stderr, "get start_mcount_loc error!"); - return; - } - - file_stop = popen(" grep stop_mcount System.map | awk '{print $1}' ", "r"); - if (!file_stop) { - fprintf(stderr, "get stop_mcount_loc error!"); - pclose(file_start); - return; - } - - while (fgets(start_buff, sizeof(start_buff), file_start) != NULL) { - len = strlen(start_buff); - start_buff[len - 1] = '\0'; - } - *_start = strtoul(start_buff, NULL, 16); - - while (fgets(stop_buff, sizeof(stop_buff), file_stop) != NULL) { - len = strlen(stop_buff); - stop_buff[len - 1] = '\0'; - } - *_stop = strtoul(stop_buff, NULL, 16); - - pclose(file_start); - pclose(file_stop); -} -#endif static int do_sort(Elf_Ehdr *ehdr, char const *const fname, table_sort_t custom_sort) @@ -283,11 +217,6 @@ static int do_sort(Elf_Ehdr *ehdr, int idx; unsigned int shnum; unsigned int shstrndx; -#ifdef MCOUNT_SORT_ENABLED - struct elf_mcount_loc mstruct = {0}; - uint_t _start_mcount_loc = 0; - uint_t _stop_mcount_loc = 0; -#endif #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED) unsigned int orc_ip_size = 0; unsigned int orc_size = 0; @@ -324,17 +253,6 @@ static int do_sort(Elf_Ehdr *ehdr, symtab_shndx = (Elf32_Word *)((const char *)ehdr + _r(&s->sh_offset)); -#ifdef MCOUNT_SORT_ENABLED - /* locate the .init.data section in vmlinux */ - if (!strcmp(secstrings + idx, ".init.data")) { - get_mcount_loc(&_start_mcount_loc, &_stop_mcount_loc); - mstruct.ehdr = ehdr; - mstruct.init_data_sec = s; - mstruct.start_mcount_loc = _start_mcount_loc; - mstruct.stop_mcount_loc = _stop_mcount_loc; - } -#endif - #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED) /* locate the ORC unwind tables */ if (!strcmp(secstrings + idx, ".orc_unwind_ip")) { @@ -376,23 +294,6 @@ static int do_sort(Elf_Ehdr *ehdr, goto out; } #endif - -#ifdef MCOUNT_SORT_ENABLED - if (!mstruct.init_data_sec || !_start_mcount_loc || !_stop_mcount_loc) { - fprintf(stderr, - "incomplete mcount's sort in file: %s\n", - fname); - goto out; - } - - /* create thread to sort mcount_loc concurrently */ - if (pthread_create(&mcount_sort_thread, NULL, &sort_mcount_loc, &mstruct)) { - fprintf(stderr, - "pthread_create mcount_sort_thread failed '%s': %s\n", - strerror(errno), fname); - goto out; - } -#endif if (!extab_sec) { fprintf(stderr, "no __ex_table in file: %s\n", fname); goto out; @@ -463,11 +364,11 @@ static int do_sort(Elf_Ehdr *ehdr, void *retval = NULL; /* wait for ORC tables sort done */ rc = pthread_join(orc_sort_thread, &retval); - if (rc) { + if (rc) fprintf(stderr, "pthread_join failed '%s': %s\n", strerror(errno), fname); - } else if (retval) { + else if (retval) { rc = -1; fprintf(stderr, "failed to sort ORC tables '%s': %s\n", @@ -475,23 +376,5 @@ static int do_sort(Elf_Ehdr *ehdr, } } #endif - -#ifdef MCOUNT_SORT_ENABLED - if (mcount_sort_thread) { - void *retval = NULL; - /* wait for mcount sort done */ - rc = pthread_join(mcount_sort_thread, &retval); - if (rc) { - fprintf(stderr, - "pthread_join failed '%s': %s\n", - strerror(errno), fname); - } else if (retval) { - rc = -1; - fprintf(stderr, - "failed to sort mcount '%s': %s\n", - (char *)retval, fname); - } - } -#endif return rc; } diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 0c8b79cfb1..17fdc620d5 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -178,7 +178,6 @@ assum||assume assumtpion||assumption asuming||assuming asycronous||asynchronous -asychronous||asynchronous asynchnous||asynchronous asynchromous||asynchronous asymetric||asymmetric @@ -242,7 +241,6 @@ beter||better betweeen||between bianries||binaries bitmast||bitmask -bitwiedh||bitwidth boardcast||broadcast borad||board boundry||boundary @@ -267,10 +265,7 @@ calucate||calculate calulate||calculate cancelation||cancellation cancle||cancel -cant||can't -cant'||can't canot||cannot -cann't||can't capabilites||capabilities capabilties||capabilities capabilty||capability @@ -506,7 +501,6 @@ disble||disable disgest||digest disired||desired dispalying||displaying -dissable||disable diplay||display directon||direction direcly||directly @@ -601,7 +595,6 @@ exceded||exceeded exceds||exceeds exceeed||exceed excellant||excellent -exchnage||exchange execeeded||exceeded execeeds||exceeds exeed||exceed @@ -945,7 +938,6 @@ migrateable||migratable milliseonds||milliseconds minium||minimum minimam||minimum -minimun||minimum miniumum||minimum minumum||minimum misalinged||misaligned @@ -964,7 +956,6 @@ mmnemonic||mnemonic mnay||many modfiy||modify modifer||modifier -modul||module modulues||modules momery||memory memomry||memory @@ -1046,7 +1037,6 @@ oustanding||outstanding overaall||overall overhread||overhead overlaping||overlapping -oveflow||overflow overflw||overflow overlfow||overflow overide||override @@ -1164,7 +1154,6 @@ programable||programmable programers||programmers programm||program programms||programs -progres||progress progresss||progress prohibitted||prohibited prohibitting||prohibiting @@ -1339,7 +1328,6 @@ servive||service setts||sets settting||setting shapshot||snapshot -shoft||shift shotdown||shutdown shoud||should shouldnt||shouldn't @@ -1451,7 +1439,6 @@ syfs||sysfs symetric||symmetric synax||syntax synchonized||synchronized -sychronization||synchronization synchronuously||synchronously syncronize||synchronize syncronized||synchronized @@ -1534,7 +1521,6 @@ unexpexted||unexpected unfortunatelly||unfortunately unifiy||unify uniterrupted||uninterrupted -uninterruptable||uninterruptible unintialized||uninitialized unitialized||uninitialized unkmown||unknown @@ -1567,7 +1553,6 @@ unuseful||useless unvalid||invalid upate||update upsupported||unsupported -useable||usable usefule||useful usefull||useful usege||usage @@ -1589,7 +1574,6 @@ varient||variant vaule||value verbse||verbose veify||verify -verfication||verification veriosn||version verisons||versions verison||version @@ -1602,7 +1586,6 @@ visiters||visitors vitual||virtual vunerable||vulnerable wakeus||wakeups -was't||wasn't wathdog||watchdog wating||waiting wiat||wait diff --git a/scripts/tags.sh b/scripts/tags.sh index 16d475b3e2..db8ba41186 100644 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -140,71 +140,71 @@ dogtags() # - etags regular expressions have to match at the start of a line; # a ^[^#] is prepended by setup_regex unless an anchor is already present regex_asm=( - '/^\(ENTRY\|_GLOBAL\)([[:space:]]*\([[:alnum:]_\\]*\)).*/\2/' + '/^\(ENTRY\|_GLOBAL\)(\([[:alnum:]_\\]*\)).*/\2/' ) regex_c=( - '/^SYSCALL_DEFINE[0-9]([[:space:]]*\([[:alnum:]_]*\).*/sys_\1/' - '/^BPF_CALL_[0-9]([[:space:]]*\([[:alnum:]_]*\).*/\1/' - '/^COMPAT_SYSCALL_DEFINE[0-9]([[:space:]]*\([[:alnum:]_]*\).*/compat_sys_\1/' - '/^TRACE_EVENT([[:space:]]*\([[:alnum:]_]*\).*/trace_\1/' - '/^TRACE_EVENT([[:space:]]*\([[:alnum:]_]*\).*/trace_\1_rcuidle/' - '/^DEFINE_EVENT([^,)]*,[[:space:]]*\([[:alnum:]_]*\).*/trace_\1/' - '/^DEFINE_EVENT([^,)]*,[[:space:]]*\([[:alnum:]_]*\).*/trace_\1_rcuidle/' - '/^DEFINE_INSN_CACHE_OPS([[:space:]]*\([[:alnum:]_]*\).*/get_\1_slot/' - '/^DEFINE_INSN_CACHE_OPS([[:space:]]*\([[:alnum:]_]*\).*/free_\1_slot/' - '/^PAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/Page\1/' - '/^PAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/SetPage\1/' - '/^PAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/ClearPage\1/' - '/^TESTSETFLAG([[:space:]]*\([[:alnum:]_]*\).*/TestSetPage\1/' - '/^TESTPAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/Page\1/' - '/^SETPAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/SetPage\1/' - '/\<__SETPAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/__SetPage\1/' - '/\&1 | grep -iq universal; then - CTAGS_EXTRA="extras" - fi setup_regex exuberant asm c all_target_sources | xargs $1 -a \ -I __initdata,__exitdata,__initconst,__ro_after_init \ @@ -265,7 +261,7 @@ exuberant() -I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL,ACPI_EXPORT_SYMBOL \ -I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \ -I static,const \ - --$CTAGS_EXTRA=+fq --c-kinds=+px --fields=+iaS --langmap=c:+.h \ + --extra=+fq --c-kinds=+px --fields=+iaS --langmap=c:+.h \ "${regex[@]}" setup_regex exuberant kconfig diff --git a/security/Kconfig b/security/Kconfig index 0b847f435b..fe6c0395fa 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -163,6 +163,20 @@ config HARDENED_USERCOPY or are part of the kernel text. This kills entire classes of heap overflow exploits and similar kernel memory exposures. +config HARDENED_USERCOPY_FALLBACK + bool "Allow usercopy whitelist violations to fallback to object size" + depends on HARDENED_USERCOPY + default y + help + This is a temporary option that allows missing usercopy whitelists + to be discovered via a WARN() to the kernel log, instead of + rejecting the copy, falling back to non-whitelisted hardened + usercopy that checks the slab allocation size instead of the + whitelist size. This option will be removed once it seems like + all missing usercopy whitelists have been identified and fixed. + Booting with "slab_common.usercopy_fallback=Y/N" can change + this setting. + config HARDENED_USERCOPY_PAGESPAN bool "Refuse to copy allocations that span multiple pages" depends on HARDENED_USERCOPY diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index d051f8ceef..90cbaff86e 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -23,16 +23,13 @@ config CC_HAS_AUTO_VAR_INIT_PATTERN def_bool $(cc-option,-ftrivial-auto-var-init=pattern) config CC_HAS_AUTO_VAR_INIT_ZERO - # GCC ignores the -enable flag, so we can test for the feature with - # a single invocation using the flag, but drop it as appropriate in - # the Makefile, depending on the presence of Clang. def_bool $(cc-option,-ftrivial-auto-var-init=zero -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang) choice prompt "Initialize kernel stack variables at function entry" default GCC_PLUGIN_STRUCTLEAK_BYREF_ALL if COMPILE_TEST && GCC_PLUGINS default INIT_STACK_ALL_PATTERN if COMPILE_TEST && CC_HAS_AUTO_VAR_INIT_PATTERN - default INIT_STACK_ALL_ZERO if CC_HAS_AUTO_VAR_INIT_ZERO + default INIT_STACK_ALL_ZERO if CC_HAS_AUTO_VAR_INIT_PATTERN default INIT_STACK_NONE help This option enables initialization of stack variables at @@ -56,8 +53,7 @@ choice config GCC_PLUGIN_STRUCTLEAK_USER bool "zero-init structs marked for userspace (weak)" - # Plugin can be removed once the kernel only supports GCC 12+ - depends on GCC_PLUGINS && !CC_HAS_AUTO_VAR_INIT_ZERO + depends on GCC_PLUGINS select GCC_PLUGIN_STRUCTLEAK help Zero-initialize any structures on the stack containing @@ -68,8 +64,7 @@ choice config GCC_PLUGIN_STRUCTLEAK_BYREF bool "zero-init structs passed by reference (strong)" - # Plugin can be removed once the kernel only supports GCC 12+ - depends on GCC_PLUGINS && !CC_HAS_AUTO_VAR_INIT_ZERO + depends on GCC_PLUGINS depends on !(KASAN && KASAN_STACK) select GCC_PLUGIN_STRUCTLEAK help @@ -87,8 +82,7 @@ choice config GCC_PLUGIN_STRUCTLEAK_BYREF_ALL bool "zero-init everything passed by reference (very strong)" - # Plugin can be removed once the kernel only supports GCC 12+ - depends on GCC_PLUGINS && !CC_HAS_AUTO_VAR_INIT_ZERO + depends on GCC_PLUGINS depends on !(KASAN && KASAN_STACK) select GCC_PLUGIN_STRUCTLEAK help diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 0797edb2fb..2ee3b3d29f 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -812,6 +812,8 @@ struct multi_transaction { }; #define MULTI_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct multi_transaction)) +/* TODO: replace with per file lock */ +static DEFINE_SPINLOCK(multi_transaction_lock); static void multi_transaction_kref(struct kref *kref) { @@ -845,10 +847,10 @@ static void multi_transaction_set(struct file *file, AA_BUG(n > MULTI_TRANSACTION_LIMIT); new->size = n; - spin_lock(&file->f_lock); + spin_lock(&multi_transaction_lock); old = (struct multi_transaction *) file->private_data; file->private_data = new; - spin_unlock(&file->f_lock); + spin_unlock(&multi_transaction_lock); put_multi_transaction(old); } @@ -877,10 +879,9 @@ static ssize_t multi_transaction_read(struct file *file, char __user *buf, struct multi_transaction *t; ssize_t ret; - spin_lock(&file->f_lock); + spin_lock(&multi_transaction_lock); t = get_multi_transaction(file->private_data); - spin_unlock(&file->f_lock); - + spin_unlock(&multi_transaction_lock); if (!t) return 0; @@ -1357,7 +1358,7 @@ static int rawdata_open(struct inode *inode, struct file *file) struct aa_loaddata *loaddata; struct rawdata_f_data *private; - if (!aa_current_policy_view_capable(NULL)) + if (!policy_view_capable(NULL)) return -EACCES; loaddata = __aa_get_loaddata(inode->i_private); @@ -2113,7 +2114,7 @@ static struct aa_profile *__first_profile(struct aa_ns *root, /** * __next_profile - step to the next profile in a profile tree - * @p: current profile in tree (NOT NULL) + * @profile: current profile in tree (NOT NULL) * * Perform a depth first traversal on the profile tree in a namespace * @@ -2264,7 +2265,7 @@ static const struct seq_operations aa_sfs_profiles_op = { static int profiles_open(struct inode *inode, struct file *file) { - if (!aa_current_policy_view_capable(NULL)) + if (!policy_view_capable(NULL)) return -EACCES; return seq_open(file, &aa_sfs_profiles_op); diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 7517605a18..d4f8948517 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -167,7 +167,7 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, * @perms: permission table indexed by the matched state accept entry of @dfa * @trans: transition table for indexed by named x transitions * - * File permission are determined by matching a path against @dfa and + * File permission are determined by matching a path against @dfa and then * then using the value of the accept entry for the matching state as * an index into @perms. If a named exec transition is required it is * looked up in the transition table. diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index 9101c2c76d..1e90384b15 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -77,6 +77,10 @@ struct aa_labelset { #define __labelset_for_each(LS, N) \ for ((N) = rb_first(&(LS)->root); (N); (N) = rb_next(N)) +void aa_labelset_destroy(struct aa_labelset *ls); +void aa_labelset_init(struct aa_labelset *ls); + + enum label_flags { FLAG_HAT = 1, /* profile is a hat */ FLAG_UNCONFINED = 2, /* label unconfined only if all */ @@ -144,7 +148,6 @@ do { \ #define __label_make_stale(X) ((X)->flags |= FLAG_STALE) #define labels_ns(X) (vec_ns(&((X)->vec[0]), (X)->size)) #define labels_set(X) (&labels_ns(X)->labels) -#define labels_view(X) labels_ns(X) #define labels_profile(X) ((X)->vec[(X)->size - 1]) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index e2e8df0c6f..7d27db740b 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -31,17 +31,12 @@ #define AA_WARN(X) WARN((X), "APPARMOR WARN %s: %s\n", __func__, #X) -#define AA_BUG(X, args...) \ - do { \ - _Pragma("GCC diagnostic ignored \"-Wformat-zero-length\""); \ - AA_BUG_FMT((X), "" args); \ - _Pragma("GCC diagnostic warning \"-Wformat-zero-length\""); \ - } while (0) +#define AA_BUG(X, args...) AA_BUG_FMT((X), "" args) #ifdef CONFIG_SECURITY_APPARMOR_DEBUG_ASSERTS #define AA_BUG_FMT(X, fmt, args...) \ WARN((X), "AppArmor WARN %s: (" #X "): " fmt, __func__, ##args) #else -#define AA_BUG_FMT(X, fmt, args...) no_printk(fmt, ##args) +#define AA_BUG_FMT(X, fmt, args...) #endif #define AA_ERROR(fmt, args...) \ diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index cb5ef21991..b5b4b8190e 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -301,11 +301,9 @@ static inline int AUDIT_MODE(struct aa_profile *profile) return profile->audit; } -bool aa_policy_view_capable(struct aa_label *label, struct aa_ns *ns); -bool aa_policy_admin_capable(struct aa_label *label, struct aa_ns *ns); +bool policy_view_capable(struct aa_ns *ns); +bool policy_admin_capable(struct aa_ns *ns); int aa_may_manage_policy(struct aa_label *label, struct aa_ns *ns, u32 mask); -bool aa_current_policy_view_capable(struct aa_ns *ns); -bool aa_current_policy_admin_capable(struct aa_ns *ns); #endif /* __AA_POLICY_H */ diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 0b0265da19..6222fdfebe 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -425,7 +425,8 @@ struct aa_label *aa_label_alloc(int size, struct aa_proxy *proxy, gfp_t gfp) AA_BUG(size < 1); /* + 1 for null terminator entry on vec */ - new = kzalloc(struct_size(new, vec, size + 1), gfp); + new = kzalloc(sizeof(*new) + sizeof(struct aa_profile *) * (size + 1), + gfp); AA_DEBUG("%s (%p)\n", __func__, new); if (!new) goto fail; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 4f0eecb67d..f72406fe1b 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -728,14 +728,7 @@ static void apparmor_bprm_committed_creds(struct linux_binprm *bprm) return; } -static void apparmor_current_getsecid_subj(u32 *secid) -{ - struct aa_label *label = aa_get_current_label(); - *secid = label->secid; - aa_put_label(label); -} - -static void apparmor_task_getsecid_obj(struct task_struct *p, u32 *secid) +static void apparmor_task_getsecid(struct task_struct *p, u32 *secid) { struct aa_label *label = aa_get_task_label(p); *secid = label->secid; @@ -1259,8 +1252,8 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(task_free, apparmor_task_free), LSM_HOOK_INIT(task_alloc, apparmor_task_alloc), - LSM_HOOK_INIT(current_getsecid_subj, apparmor_current_getsecid_subj), - LSM_HOOK_INIT(task_getsecid_obj, apparmor_task_getsecid_obj), + LSM_HOOK_INIT(task_getsecid_subj, apparmor_task_getsecid), + LSM_HOOK_INIT(task_getsecid_obj, apparmor_task_getsecid), LSM_HOOK_INIT(task_setrlimit, apparmor_task_setrlimit), LSM_HOOK_INIT(task_kill, apparmor_task_kill), @@ -1409,7 +1402,7 @@ static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) + if (apparmor_initialized && !policy_admin_capable(NULL)) return -EPERM; return param_set_bool(val, kp); } @@ -1418,7 +1411,7 @@ static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) + if (apparmor_initialized && !policy_view_capable(NULL)) return -EPERM; return param_get_bool(buffer, kp); } @@ -1427,7 +1420,7 @@ static int param_set_aabool(const char *val, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) + if (apparmor_initialized && !policy_admin_capable(NULL)) return -EPERM; return param_set_bool(val, kp); } @@ -1436,7 +1429,7 @@ static int param_get_aabool(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) + if (apparmor_initialized && !policy_view_capable(NULL)) return -EPERM; return param_get_bool(buffer, kp); } @@ -1462,7 +1455,7 @@ static int param_get_aauint(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) + if (apparmor_initialized && !policy_view_capable(NULL)) return -EPERM; return param_get_uint(buffer, kp); } @@ -1533,7 +1526,7 @@ static int param_get_aacompressionlevel(char *buffer, { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) + if (apparmor_initialized && !policy_view_capable(NULL)) return -EPERM; return param_get_int(buffer, kp); } @@ -1542,7 +1535,7 @@ static int param_get_audit(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) + if (apparmor_initialized && !policy_view_capable(NULL)) return -EPERM; return sprintf(buffer, "%s", audit_mode_names[aa_g_audit]); } @@ -1555,7 +1548,7 @@ static int param_set_audit(const char *val, const struct kernel_param *kp) return -EINVAL; if (!val) return -EINVAL; - if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) + if (apparmor_initialized && !policy_admin_capable(NULL)) return -EPERM; i = match_string(audit_mode_names, AUDIT_MAX_INDEX, val); @@ -1570,7 +1563,7 @@ static int param_get_mode(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) + if (apparmor_initialized && !policy_view_capable(NULL)) return -EPERM; return sprintf(buffer, "%s", aa_profile_mode_names[aa_g_profile_mode]); @@ -1584,7 +1577,7 @@ static int param_set_mode(const char *val, const struct kernel_param *kp) return -EINVAL; if (!val) return -EINVAL; - if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) + if (apparmor_initialized && !policy_admin_capable(NULL)) return -EPERM; i = match_string(aa_profile_mode_names, APPARMOR_MODE_NAMES_MAX_INDEX, @@ -1720,7 +1713,7 @@ static int __init alloc_buffers(void) static int apparmor_dointvec(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - if (!aa_current_policy_admin_capable(NULL)) + if (!policy_admin_capable(NULL)) return -EPERM; if (!apparmor_enabled) return -EINVAL; @@ -1780,16 +1773,32 @@ static unsigned int apparmor_ip_postroute(void *priv, } +static unsigned int apparmor_ipv4_postroute(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return apparmor_ip_postroute(priv, skb, state); +} + +#if IS_ENABLED(CONFIG_IPV6) +static unsigned int apparmor_ipv6_postroute(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return apparmor_ip_postroute(priv, skb, state); +} +#endif + static const struct nf_hook_ops apparmor_nf_ops[] = { { - .hook = apparmor_ip_postroute, + .hook = apparmor_ipv4_postroute, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_SELINUX_FIRST, }, #if IS_ENABLED(CONFIG_IPV6) { - .hook = apparmor_ip_postroute, + .hook = apparmor_ipv6_postroute, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_SELINUX_FIRST, diff --git a/security/apparmor/path.c b/security/apparmor/path.c index 45ec994b55..b02dfdbff7 100644 --- a/security/apparmor/path.c +++ b/security/apparmor/path.c @@ -83,7 +83,7 @@ static int disconnect(const struct path *path, char *buf, char **name, * * Returns: %0 else error code if path lookup fails * When no error the path name is returned in @name which points to - * a position in @buf + * to a position in @buf */ static int d_namespace_path(const struct path *path, char *buf, char **name, int flags, const char *disconnected) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index b0cbc4906c..4c010c9a6a 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -260,7 +260,8 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, struct aa_profile *profile; /* freed by free_profile - usually through aa_put_profile */ - profile = kzalloc(struct_size(profile, label.vec, 2), gfp); + profile = kzalloc(sizeof(*profile) + sizeof(struct aa_profile *) * 2, + gfp); if (!profile) return NULL; @@ -631,35 +632,18 @@ static int audit_policy(struct aa_label *label, const char *op, return error; } -/* don't call out to other LSMs in the stack for apparmor policy admin - * permissions - */ -static int policy_ns_capable(struct aa_label *label, - struct user_namespace *userns, int cap) -{ - int err; - - /* check for MAC_ADMIN cap in cred */ - err = cap_capable(current_cred(), userns, cap, CAP_OPT_NONE); - if (!err) - err = aa_capable(label, cap, CAP_OPT_NONE); - - return err; -} - /** - * aa_policy_view_capable - check if viewing policy in at @ns is allowed - * label: label that is trying to view policy in ns - * ns: namespace being viewed by @label (may be NULL if @label's ns) + * policy_view_capable - check if viewing policy in at @ns is allowed + * ns: namespace being viewed by current task (may be NULL) * Returns: true if viewing policy is allowed * * If @ns is NULL then the namespace being viewed is assumed to be the * tasks current namespace. */ -bool aa_policy_view_capable(struct aa_label *label, struct aa_ns *ns) +bool policy_view_capable(struct aa_ns *ns) { struct user_namespace *user_ns = current_user_ns(); - struct aa_ns *view_ns = labels_view(label); + struct aa_ns *view_ns = aa_get_current_ns(); bool root_in_user_ns = uid_eq(current_euid(), make_kuid(user_ns, 0)) || in_egroup_p(make_kgid(user_ns, 0)); bool response = false; @@ -671,44 +655,20 @@ bool aa_policy_view_capable(struct aa_label *label, struct aa_ns *ns) (unprivileged_userns_apparmor_policy != 0 && user_ns->level == view_ns->level))) response = true; + aa_put_ns(view_ns); return response; } -bool aa_policy_admin_capable(struct aa_label *label, struct aa_ns *ns) +bool policy_admin_capable(struct aa_ns *ns) { struct user_namespace *user_ns = current_user_ns(); - bool capable = policy_ns_capable(label, user_ns, CAP_MAC_ADMIN) == 0; + bool capable = ns_capable(user_ns, CAP_MAC_ADMIN); AA_DEBUG("cap_mac_admin? %d\n", capable); AA_DEBUG("policy locked? %d\n", aa_g_lock_policy); - return aa_policy_view_capable(label, ns) && capable && - !aa_g_lock_policy; -} - -bool aa_current_policy_view_capable(struct aa_ns *ns) -{ - struct aa_label *label; - bool res; - - label = __begin_current_label_crit_section(); - res = aa_policy_view_capable(label, ns); - __end_current_label_crit_section(label); - - return res; -} - -bool aa_current_policy_admin_capable(struct aa_ns *ns) -{ - struct aa_label *label; - bool res; - - label = __begin_current_label_crit_section(); - res = aa_policy_admin_capable(label, ns); - __end_current_label_crit_section(label); - - return res; + return policy_view_capable(ns) && capable && !aa_g_lock_policy; } /** @@ -734,7 +694,7 @@ int aa_may_manage_policy(struct aa_label *label, struct aa_ns *ns, u32 mask) return audit_policy(label, op, NULL, NULL, "policy_locked", -EACCES); - if (!aa_policy_admin_capable(label, ns)) + if (!policy_admin_capable(ns)) return audit_policy(label, op, NULL, NULL, "not policy admin", -EACCES); diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 0acca6f2a9..4e1f96b216 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -39,7 +39,7 @@ /* * The AppArmor interface treats data as a type byte followed by the - * actual data. The interface has the notion of a named entry + * actual data. The interface has the notion of a a named entry * which has a name (AA_NAME typecode followed by name string) followed by * the entries typecode and data. Named types allow for optional * elements and extensions to be added and tested for without breaking diff --git a/security/apparmor/procattr.c b/security/apparmor/procattr.c index fde332e0ea..c929bf4a3d 100644 --- a/security/apparmor/procattr.c +++ b/security/apparmor/procattr.c @@ -21,6 +21,8 @@ * @profile: the profile to print profile info about (NOT NULL) * @string: Returns - string containing the profile info (NOT NULL) * + * Returns: length of @string on success else error on failure + * * Requires: profile != NULL * * Creates a string containing the namespace_name://profile_name for diff --git a/security/commoncap.c b/security/commoncap.c index 5fc8986c3c..3f810d37b7 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -24,7 +24,6 @@ #include #include #include -#include /* * If a non-root user executes a setuid-root binary in @@ -419,7 +418,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns, kroot = make_kuid(fs_ns, root); /* If this is an idmapped mount shift the kuid. */ - kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot); + kroot = kuid_into_mnt(mnt_userns, kroot); /* If the root kuid maps to a valid uid in current ns, then return * this as a nscap. */ @@ -489,7 +488,6 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns, * @size: size of @ivalue * @task_ns: user namespace of the caller * @mnt_userns: user namespace of the mount the inode was found from - * @fs_userns: user namespace of the filesystem * * If the inode has been found through an idmapped mount the user namespace of * the vfsmount must be passed through @mnt_userns. This function will then @@ -499,8 +497,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns, */ static kuid_t rootid_from_xattr(const void *value, size_t size, struct user_namespace *task_ns, - struct user_namespace *mnt_userns, - struct user_namespace *fs_userns) + struct user_namespace *mnt_userns) { const struct vfs_ns_cap_data *nscap = value; kuid_t rootkid; @@ -510,7 +507,7 @@ static kuid_t rootid_from_xattr(const void *value, size_t size, rootid = le32_to_cpu(nscap->rootid); rootkid = make_kuid(task_ns, rootid); - return mapped_kuid_user(mnt_userns, fs_userns, rootkid); + return kuid_from_mnt(mnt_userns, rootkid); } static bool validheader(size_t size, const struct vfs_cap_data *cap) @@ -556,12 +553,12 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, return -EINVAL; if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP)) return -EPERM; - if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns)) + if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns)) if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) /* user is privileged, just write the v2 */ return size; - rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns); + rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns); if (!uid_valid(rootid)) return -EINVAL; @@ -702,7 +699,7 @@ int get_vfs_caps_from_disk(struct user_namespace *mnt_userns, /* Limit the caps to the mounter of the filesystem * or the more limited uid specified in the xattr. */ - rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid); + rootkuid = kuid_into_mnt(mnt_userns, rootkuid); if (!rootid_owns_currentns(rootkuid)) return -ENODATA; diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 842889f3dc..04375df52f 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -5,7 +5,6 @@ * Copyright 2007 IBM Corp */ -#include #include #include #include diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 17232bbfb9..dbba51583e 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -76,7 +76,7 @@ int ima_must_appraise(struct user_namespace *mnt_userns, struct inode *inode, if (!ima_appraise) return 0; - security_current_getsecid_subj(&secid); + security_task_getsecid_subj(current, &secid); return ima_match_policy(mnt_userns, inode, current_cred(), secid, func, mask, IMA_APPRAISE | IMA_HASH, NULL, NULL, NULL, NULL); diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c index 13753136f0..f799cc278a 100644 --- a/security/integrity/ima/ima_kexec.c +++ b/security/integrity/ima/ima_kexec.c @@ -61,9 +61,9 @@ static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer, } memcpy(file.buf, &khdr, sizeof(khdr)); - print_hex_dump_debug("ima dump: ", DUMP_PREFIX_NONE, 16, 1, - file.buf, file.count < 100 ? file.count : 100, - true); + print_hex_dump(KERN_DEBUG, "ima dump: ", DUMP_PREFIX_NONE, + 16, 1, file.buf, + file.count < 100 ? file.count : 100, true); *buffer_size = file.count; *buffer = file.buf; diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 8c6e4514d4..4658654121 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -408,7 +408,7 @@ int ima_file_mmap(struct file *file, unsigned long prot) u32 secid; if (file && (prot & PROT_EXEC)) { - security_current_getsecid_subj(&secid); + security_task_getsecid_subj(current, &secid); return process_measurement(file, current_cred(), secid, NULL, 0, MAY_EXEC, MMAP_CHECK); } @@ -446,7 +446,7 @@ int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot) !(prot & PROT_EXEC) || (vma->vm_flags & VM_EXEC)) return 0; - security_current_getsecid_subj(&secid); + security_task_getsecid_subj(current, &secid); inode = file_inode(vma->vm_file); action = ima_get_action(file_mnt_user_ns(vma->vm_file), inode, current_cred(), secid, MAY_EXEC, MMAP_CHECK, @@ -487,7 +487,7 @@ int ima_bprm_check(struct linux_binprm *bprm) int ret; u32 secid; - security_current_getsecid_subj(&secid); + security_task_getsecid_subj(current, &secid); ret = process_measurement(bprm->file, current_cred(), secid, NULL, 0, MAY_EXEC, BPRM_CHECK); if (ret) @@ -512,7 +512,7 @@ int ima_file_check(struct file *file, int mask) { u32 secid; - security_current_getsecid_subj(&secid); + security_task_getsecid_subj(current, &secid); return process_measurement(file, current_cred(), secid, NULL, 0, mask & (MAY_READ | MAY_WRITE | MAY_EXEC | MAY_APPEND), FILE_CHECK); @@ -709,7 +709,7 @@ int ima_read_file(struct file *file, enum kernel_read_file_id read_id, /* Read entire file for all partial reads. */ func = read_idmap[read_id] ?: FILE_CHECK; - security_current_getsecid_subj(&secid); + security_task_getsecid_subj(current, &secid); return process_measurement(file, current_cred(), secid, NULL, 0, MAY_READ, func); } @@ -752,7 +752,7 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size, } func = read_idmap[read_id] ?: FILE_CHECK; - security_current_getsecid_subj(&secid); + security_task_getsecid_subj(current, &secid); return process_measurement(file, current_cred(), secid, buf, size, MAY_READ, func); } @@ -905,7 +905,7 @@ int process_buffer_measurement(struct user_namespace *mnt_userns, * buffer measurements. */ if (func) { - security_current_getsecid_subj(&secid); + security_task_getsecid_subj(current, &secid); action = ima_get_action(mnt_userns, inode, current_cred(), secid, 0, func, &pcr, &template, func_data, NULL); diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 2a1f6418b1..fa5a93dbe5 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -36,9 +36,6 @@ #define IMA_KEYRINGS 0x0400 #define IMA_LABEL 0x0800 #define IMA_VALIDATE_ALGOS 0x1000 -#define IMA_GID 0x2000 -#define IMA_EGID 0x4000 -#define IMA_FGROUP 0x8000 #define UNKNOWN 0 #define MEASURE 0x0001 /* same as IMA_MEASURE */ @@ -81,13 +78,9 @@ struct ima_rule_entry { unsigned long fsmagic; uuid_t fsuuid; kuid_t uid; - kgid_t gid; kuid_t fowner; - kgid_t fgroup; - bool (*uid_op)(kuid_t cred_uid, kuid_t rule_uid); /* Handlers for operators */ - bool (*gid_op)(kgid_t cred_gid, kgid_t rule_gid); - bool (*fowner_op)(kuid_t cred_uid, kuid_t rule_uid); /* uid_eq(), uid_gt(), uid_lt() */ - bool (*fgroup_op)(kgid_t cred_gid, kgid_t rule_gid); /* gid_eq(), gid_gt(), gid_lt() */ + bool (*uid_op)(kuid_t, kuid_t); /* Handlers for operators */ + bool (*fowner_op)(kuid_t, kuid_t); /* uid_eq(), uid_gt(), uid_lt() */ int pcr; unsigned int allowed_algos; /* bitfield of allowed hash algorithms */ struct { @@ -111,8 +104,7 @@ static_assert( /* * Without LSM specific knowledge, the default policy can only be - * written in terms of .action, .func, .mask, .fsmagic, .uid, .gid, - * .fowner, and .fgroup + * written in terms of .action, .func, .mask, .fsmagic, .uid, and .fowner */ /* @@ -590,23 +582,10 @@ static bool ima_match_rules(struct ima_rule_entry *rule, } else if (!rule->uid_op(cred->euid, rule->uid)) return false; } - if ((rule->flags & IMA_GID) && !rule->gid_op(cred->gid, rule->gid)) - return false; - if (rule->flags & IMA_EGID) { - if (has_capability_noaudit(current, CAP_SETGID)) { - if (!rule->gid_op(cred->egid, rule->gid) - && !rule->gid_op(cred->sgid, rule->gid) - && !rule->gid_op(cred->gid, rule->gid)) - return false; - } else if (!rule->gid_op(cred->egid, rule->gid)) - return false; - } + if ((rule->flags & IMA_FOWNER) && !rule->fowner_op(i_uid_into_mnt(mnt_userns, inode), rule->fowner)) return false; - if ((rule->flags & IMA_FGROUP) && - !rule->fgroup_op(i_gid_into_mnt(mnt_userns, inode), rule->fgroup)) - return false; for (i = 0; i < MAX_LSM_RULES; i++) { int rc = 0; u32 osid; @@ -870,7 +849,7 @@ static int __init ima_init_arch_policy(void) char rule[255]; int result; - result = strscpy(rule, *rules, sizeof(rule)); + result = strlcpy(rule, *rules, sizeof(rule)); INIT_LIST_HEAD(&arch_policy_entry[i].list); result = ima_parse_rule(rule, &arch_policy_entry[i]); @@ -889,7 +868,8 @@ static int __init ima_init_arch_policy(void) /** * ima_init_policy - initialize the default measure rules. * - * ima_rules points to either the ima_default_rules or the new ima_policy_rules. + * ima_rules points to either the ima_default_rules or the + * the new ima_policy_rules. */ void __init ima_init_policy(void) { @@ -1011,19 +991,16 @@ void ima_update_policy(void) } /* Keep the enumeration in sync with the policy_tokens! */ -enum policy_opt { +enum { Opt_measure, Opt_dont_measure, Opt_appraise, Opt_dont_appraise, Opt_audit, Opt_hash, Opt_dont_hash, Opt_obj_user, Opt_obj_role, Opt_obj_type, Opt_subj_user, Opt_subj_role, Opt_subj_type, - Opt_func, Opt_mask, Opt_fsmagic, Opt_fsname, Opt_fsuuid, - Opt_uid_eq, Opt_euid_eq, Opt_gid_eq, Opt_egid_eq, - Opt_fowner_eq, Opt_fgroup_eq, - Opt_uid_gt, Opt_euid_gt, Opt_gid_gt, Opt_egid_gt, - Opt_fowner_gt, Opt_fgroup_gt, - Opt_uid_lt, Opt_euid_lt, Opt_gid_lt, Opt_egid_lt, - Opt_fowner_lt, Opt_fgroup_lt, + Opt_func, Opt_mask, Opt_fsmagic, Opt_fsname, + Opt_fsuuid, Opt_uid_eq, Opt_euid_eq, Opt_fowner_eq, + Opt_uid_gt, Opt_euid_gt, Opt_fowner_gt, + Opt_uid_lt, Opt_euid_lt, Opt_fowner_lt, Opt_appraise_type, Opt_appraise_flag, Opt_appraise_algos, Opt_permit_directio, Opt_pcr, Opt_template, Opt_keyrings, Opt_label, Opt_err @@ -1050,22 +1027,13 @@ static const match_table_t policy_tokens = { {Opt_fsuuid, "fsuuid=%s"}, {Opt_uid_eq, "uid=%s"}, {Opt_euid_eq, "euid=%s"}, - {Opt_gid_eq, "gid=%s"}, - {Opt_egid_eq, "egid=%s"}, {Opt_fowner_eq, "fowner=%s"}, - {Opt_fgroup_eq, "fgroup=%s"}, {Opt_uid_gt, "uid>%s"}, {Opt_euid_gt, "euid>%s"}, - {Opt_gid_gt, "gid>%s"}, - {Opt_egid_gt, "egid>%s"}, {Opt_fowner_gt, "fowner>%s"}, - {Opt_fgroup_gt, "fgroup>%s"}, {Opt_uid_lt, "uid<%s"}, {Opt_euid_lt, "euid<%s"}, - {Opt_gid_lt, "gid<%s"}, - {Opt_egid_lt, "egid<%s"}, {Opt_fowner_lt, "fowner<%s"}, - {Opt_fgroup_lt, "fgroup<%s"}, {Opt_appraise_type, "appraise_type=%s"}, {Opt_appraise_flag, "appraise_flag=%s"}, {Opt_appraise_algos, "appraise_algos=%s"}, @@ -1109,36 +1077,22 @@ static int ima_lsm_rule_init(struct ima_rule_entry *entry, } static void ima_log_string_op(struct audit_buffer *ab, char *key, char *value, - enum policy_opt rule_operator) + bool (*rule_operator)(kuid_t, kuid_t)) { if (!ab) return; - switch (rule_operator) { - case Opt_uid_gt: - case Opt_euid_gt: - case Opt_gid_gt: - case Opt_egid_gt: - case Opt_fowner_gt: - case Opt_fgroup_gt: + if (rule_operator == &uid_gt) audit_log_format(ab, "%s>", key); - break; - case Opt_uid_lt: - case Opt_euid_lt: - case Opt_gid_lt: - case Opt_egid_lt: - case Opt_fowner_lt: - case Opt_fgroup_lt: + else if (rule_operator == &uid_lt) audit_log_format(ab, "%s<", key); - break; - default: + else audit_log_format(ab, "%s=", key); - } audit_log_format(ab, "%s ", value); } static void ima_log_string(struct audit_buffer *ab, char *key, char *value) { - ima_log_string_op(ab, key, value, Opt_err); + ima_log_string_op(ab, key, value, NULL); } /* @@ -1213,8 +1167,7 @@ static bool ima_validate_rule(struct ima_rule_entry *entry) if (entry->flags & ~(IMA_FUNC | IMA_MASK | IMA_FSMAGIC | IMA_UID | IMA_FOWNER | IMA_FSUUID | IMA_INMASK | IMA_EUID | IMA_PCR | - IMA_FSNAME | IMA_GID | IMA_EGID | - IMA_FGROUP | IMA_DIGSIG_REQUIRED | + IMA_FSNAME | IMA_DIGSIG_REQUIRED | IMA_PERMIT_DIRECTIO | IMA_VALIDATE_ALGOS)) return false; @@ -1225,8 +1178,7 @@ static bool ima_validate_rule(struct ima_rule_entry *entry) if (entry->flags & ~(IMA_FUNC | IMA_MASK | IMA_FSMAGIC | IMA_UID | IMA_FOWNER | IMA_FSUUID | IMA_INMASK | IMA_EUID | IMA_PCR | - IMA_FSNAME | IMA_GID | IMA_EGID | - IMA_FGROUP | IMA_DIGSIG_REQUIRED | + IMA_FSNAME | IMA_DIGSIG_REQUIRED | IMA_PERMIT_DIRECTIO | IMA_MODSIG_ALLOWED | IMA_CHECK_BLACKLIST | IMA_VALIDATE_ALGOS)) return false; @@ -1238,8 +1190,7 @@ static bool ima_validate_rule(struct ima_rule_entry *entry) if (entry->flags & ~(IMA_FUNC | IMA_FSMAGIC | IMA_UID | IMA_FOWNER | IMA_FSUUID | IMA_EUID | - IMA_PCR | IMA_FSNAME | IMA_GID | IMA_EGID | - IMA_FGROUP)) + IMA_PCR | IMA_FSNAME)) return false; break; @@ -1247,7 +1198,7 @@ static bool ima_validate_rule(struct ima_rule_entry *entry) if (entry->action & ~(MEASURE | DONT_MEASURE)) return false; - if (entry->flags & ~(IMA_FUNC | IMA_UID | IMA_GID | IMA_PCR | + if (entry->flags & ~(IMA_FUNC | IMA_UID | IMA_PCR | IMA_KEYRINGS)) return false; @@ -1259,7 +1210,7 @@ static bool ima_validate_rule(struct ima_rule_entry *entry) if (entry->action & ~(MEASURE | DONT_MEASURE)) return false; - if (entry->flags & ~(IMA_FUNC | IMA_UID | IMA_GID | IMA_PCR | + if (entry->flags & ~(IMA_FUNC | IMA_UID | IMA_PCR | IMA_LABEL)) return false; @@ -1329,7 +1280,7 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) struct audit_buffer *ab; char *from; char *p; - bool eid_token; /* either euid or egid */ + bool uid_token; struct ima_template_desc *template_desc; int result = 0; @@ -1337,13 +1288,9 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) AUDIT_INTEGRITY_POLICY_RULE); entry->uid = INVALID_UID; - entry->gid = INVALID_GID; entry->fowner = INVALID_UID; - entry->fgroup = INVALID_GID; entry->uid_op = &uid_eq; - entry->gid_op = &gid_eq; entry->fowner_op = &uid_eq; - entry->fgroup_op = &gid_eq; entry->action = UNKNOWN; while ((p = strsep(&rule, " \t")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -1561,12 +1508,12 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) fallthrough; case Opt_uid_eq: case Opt_euid_eq: - eid_token = (token == Opt_euid_eq) || - (token == Opt_euid_gt) || - (token == Opt_euid_lt); + uid_token = (token == Opt_uid_eq) || + (token == Opt_uid_gt) || + (token == Opt_uid_lt); - ima_log_string_op(ab, eid_token ? "euid" : "uid", - args[0].from, token); + ima_log_string_op(ab, uid_token ? "uid" : "euid", + args[0].from, entry->uid_op); if (uid_valid(entry->uid)) { result = -EINVAL; @@ -1581,43 +1528,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) (uid_t)lnum != lnum) result = -EINVAL; else - entry->flags |= eid_token - ? IMA_EUID : IMA_UID; - } - break; - case Opt_gid_gt: - case Opt_egid_gt: - entry->gid_op = &gid_gt; - fallthrough; - case Opt_gid_lt: - case Opt_egid_lt: - if ((token == Opt_gid_lt) || (token == Opt_egid_lt)) - entry->gid_op = &gid_lt; - fallthrough; - case Opt_gid_eq: - case Opt_egid_eq: - eid_token = (token == Opt_egid_eq) || - (token == Opt_egid_gt) || - (token == Opt_egid_lt); - - ima_log_string_op(ab, eid_token ? "egid" : "gid", - args[0].from, token); - - if (gid_valid(entry->gid)) { - result = -EINVAL; - break; - } - - result = kstrtoul(args[0].from, 10, &lnum); - if (!result) { - entry->gid = make_kgid(current_user_ns(), - (gid_t)lnum); - if (!gid_valid(entry->gid) || - (((gid_t)lnum) != lnum)) - result = -EINVAL; - else - entry->flags |= eid_token - ? IMA_EGID : IMA_GID; + entry->flags |= uid_token + ? IMA_UID : IMA_EUID; } break; case Opt_fowner_gt: @@ -1628,7 +1540,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) entry->fowner_op = &uid_lt; fallthrough; case Opt_fowner_eq: - ima_log_string_op(ab, "fowner", args[0].from, token); + ima_log_string_op(ab, "fowner", args[0].from, + entry->fowner_op); if (uid_valid(entry->fowner)) { result = -EINVAL; @@ -1637,41 +1550,13 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) result = kstrtoul(args[0].from, 10, &lnum); if (!result) { - entry->fowner = make_kuid(current_user_ns(), - (uid_t)lnum); - if (!uid_valid(entry->fowner) || - (((uid_t)lnum) != lnum)) + entry->fowner = make_kuid(current_user_ns(), (uid_t)lnum); + if (!uid_valid(entry->fowner) || (((uid_t)lnum) != lnum)) result = -EINVAL; else entry->flags |= IMA_FOWNER; } break; - case Opt_fgroup_gt: - entry->fgroup_op = &gid_gt; - fallthrough; - case Opt_fgroup_lt: - if (token == Opt_fgroup_lt) - entry->fgroup_op = &gid_lt; - fallthrough; - case Opt_fgroup_eq: - ima_log_string_op(ab, "fgroup", args[0].from, token); - - if (gid_valid(entry->fgroup)) { - result = -EINVAL; - break; - } - - result = kstrtoul(args[0].from, 10, &lnum); - if (!result) { - entry->fgroup = make_kgid(current_user_ns(), - (gid_t)lnum); - if (!gid_valid(entry->fgroup) || - (((gid_t)lnum) != lnum)) - result = -EINVAL; - else - entry->flags |= IMA_FGROUP; - } - break; case Opt_obj_user: ima_log_string(ab, "obj_user", args[0].from); result = ima_lsm_rule_init(entry, args, @@ -2066,28 +1951,6 @@ int ima_policy_show(struct seq_file *m, void *v) seq_puts(m, " "); } - if (entry->flags & IMA_GID) { - snprintf(tbuf, sizeof(tbuf), "%d", __kgid_val(entry->gid)); - if (entry->gid_op == &gid_gt) - seq_printf(m, pt(Opt_gid_gt), tbuf); - else if (entry->gid_op == &gid_lt) - seq_printf(m, pt(Opt_gid_lt), tbuf); - else - seq_printf(m, pt(Opt_gid_eq), tbuf); - seq_puts(m, " "); - } - - if (entry->flags & IMA_EGID) { - snprintf(tbuf, sizeof(tbuf), "%d", __kgid_val(entry->gid)); - if (entry->gid_op == &gid_gt) - seq_printf(m, pt(Opt_egid_gt), tbuf); - else if (entry->gid_op == &gid_lt) - seq_printf(m, pt(Opt_egid_lt), tbuf); - else - seq_printf(m, pt(Opt_egid_eq), tbuf); - seq_puts(m, " "); - } - if (entry->flags & IMA_FOWNER) { snprintf(tbuf, sizeof(tbuf), "%d", __kuid_val(entry->fowner)); if (entry->fowner_op == &uid_gt) @@ -2099,17 +1962,6 @@ int ima_policy_show(struct seq_file *m, void *v) seq_puts(m, " "); } - if (entry->flags & IMA_FGROUP) { - snprintf(tbuf, sizeof(tbuf), "%d", __kgid_val(entry->fgroup)); - if (entry->fgroup_op == &gid_gt) - seq_printf(m, pt(Opt_fgroup_gt), tbuf); - else if (entry->fgroup_op == &gid_lt) - seq_printf(m, pt(Opt_fgroup_lt), tbuf); - else - seq_printf(m, pt(Opt_fgroup_eq), tbuf); - seq_puts(m, " "); - } - if (entry->flags & IMA_VALIDATE_ALGOS) { seq_puts(m, "appraise_algos="); ima_policy_show_appraise_algos(m, entry->allowed_algos); diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index 08b6d12f99..f290f78c3f 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include "../integrity.h" @@ -177,10 +176,6 @@ static int __init load_uefi_certs(void) kfree(dbx); } - /* the MOK/MOKx can not be trusted when secure boot is disabled */ - if (!arch_ima_get_secureboot()) - return 0; - mokx = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &status); if (!mokx) { if (status == EFI_NOT_FOUND) diff --git a/security/keys/Kconfig b/security/keys/Kconfig index 969122c7b9..64b81abd08 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -109,7 +109,7 @@ config KEY_DH_OPERATIONS bool "Diffie-Hellman operations on retained keys" depends on KEYS select CRYPTO - select CRYPTO_KDF800108_CTR + select CRYPTO_HASH select CRYPTO_DH help This option provides support for calculating Diffie-Hellman diff --git a/security/keys/dh.c b/security/keys/dh.c index 4573fc1561..1abfa70ed6 100644 --- a/security/keys/dh.c +++ b/security/keys/dh.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include "internal.h" @@ -80,9 +79,17 @@ static void dh_crypto_done(struct crypto_async_request *req, int err) complete(&compl->completion); } -static int kdf_alloc(struct crypto_shash **hash, char *hashname) +struct kdf_sdesc { + struct shash_desc shash; + char ctx[]; +}; + +static int kdf_alloc(struct kdf_sdesc **sdesc_ret, char *hashname) { struct crypto_shash *tfm; + struct kdf_sdesc *sdesc; + int size; + int err; /* allocate synchronous hash */ tfm = crypto_alloc_shash(hashname, 0, 0); @@ -91,30 +98,112 @@ static int kdf_alloc(struct crypto_shash **hash, char *hashname) return PTR_ERR(tfm); } - if (crypto_shash_digestsize(tfm) == 0) { - crypto_free_shash(tfm); - return -EINVAL; - } + err = -EINVAL; + if (crypto_shash_digestsize(tfm) == 0) + goto out_free_tfm; - *hash = tfm; + err = -ENOMEM; + size = sizeof(struct shash_desc) + crypto_shash_descsize(tfm); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) + goto out_free_tfm; + sdesc->shash.tfm = tfm; + + *sdesc_ret = sdesc; return 0; + +out_free_tfm: + crypto_free_shash(tfm); + return err; } -static void kdf_dealloc(struct crypto_shash *hash) +static void kdf_dealloc(struct kdf_sdesc *sdesc) { - if (hash) - crypto_free_shash(hash); + if (!sdesc) + return; + + if (sdesc->shash.tfm) + crypto_free_shash(sdesc->shash.tfm); + + kfree_sensitive(sdesc); } -static int keyctl_dh_compute_kdf(struct crypto_shash *hash, +/* + * Implementation of the KDF in counter mode according to SP800-108 section 5.1 + * as well as SP800-56A section 5.8.1 (Single-step KDF). + * + * SP800-56A: + * The src pointer is defined as Z || other info where Z is the shared secret + * from DH and other info is an arbitrary string (see SP800-56A section + * 5.8.1.2). + * + * 'dlen' must be a multiple of the digest size. + */ +static int kdf_ctr(struct kdf_sdesc *sdesc, const u8 *src, unsigned int slen, + u8 *dst, unsigned int dlen, unsigned int zlen) +{ + struct shash_desc *desc = &sdesc->shash; + unsigned int h = crypto_shash_digestsize(desc->tfm); + int err = 0; + u8 *dst_orig = dst; + __be32 counter = cpu_to_be32(1); + + while (dlen) { + err = crypto_shash_init(desc); + if (err) + goto err; + + err = crypto_shash_update(desc, (u8 *)&counter, sizeof(__be32)); + if (err) + goto err; + + if (zlen && h) { + u8 tmpbuffer[32]; + size_t chunk = min_t(size_t, zlen, sizeof(tmpbuffer)); + memset(tmpbuffer, 0, chunk); + + do { + err = crypto_shash_update(desc, tmpbuffer, + chunk); + if (err) + goto err; + + zlen -= chunk; + chunk = min_t(size_t, zlen, sizeof(tmpbuffer)); + } while (zlen); + } + + if (src && slen) { + err = crypto_shash_update(desc, src, slen); + if (err) + goto err; + } + + err = crypto_shash_final(desc, dst); + if (err) + goto err; + + dlen -= h; + dst += h; + counter = cpu_to_be32(be32_to_cpu(counter) + 1); + } + + return 0; + +err: + memzero_explicit(dst_orig, dlen); + return err; +} + +static int keyctl_dh_compute_kdf(struct kdf_sdesc *sdesc, char __user *buffer, size_t buflen, - uint8_t *kbuf, size_t kbuflen) + uint8_t *kbuf, size_t kbuflen, size_t lzero) { - struct kvec kbuf_iov = { .iov_base = kbuf, .iov_len = kbuflen }; uint8_t *outbuf = NULL; int ret; - size_t outbuf_len = roundup(buflen, crypto_shash_digestsize(hash)); + size_t outbuf_len = roundup(buflen, + crypto_shash_digestsize(sdesc->shash.tfm)); outbuf = kmalloc(outbuf_len, GFP_KERNEL); if (!outbuf) { @@ -122,7 +211,7 @@ static int keyctl_dh_compute_kdf(struct crypto_shash *hash, goto err; } - ret = crypto_kdf108_ctr_generate(hash, &kbuf_iov, 1, outbuf, outbuf_len); + ret = kdf_ctr(sdesc, kbuf, kbuflen, outbuf, outbuf_len, lzero); if (ret) goto err; @@ -151,7 +240,7 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params, struct kpp_request *req; uint8_t *secret; uint8_t *outbuf; - struct crypto_shash *hash = NULL; + struct kdf_sdesc *sdesc = NULL; if (!params || (!buffer && buflen)) { ret = -EINVAL; @@ -184,7 +273,7 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params, } /* allocate KDF from the kernel crypto API */ - ret = kdf_alloc(&hash, hashname); + ret = kdf_alloc(&sdesc, hashname); kfree(hashname); if (ret) goto out1; @@ -294,8 +383,9 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params, goto out6; } - ret = keyctl_dh_compute_kdf(hash, buffer, buflen, outbuf, - req->dst_len + kdfcopy->otherinfolen); + ret = keyctl_dh_compute_kdf(sdesc, buffer, buflen, outbuf, + req->dst_len + kdfcopy->otherinfolen, + outlen - req->dst_len); } else if (copy_to_user(buffer, outbuf, req->dst_len) == 0) { ret = req->dst_len; } else { @@ -313,7 +403,7 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params, out2: dh_free_data(&dh_inputs); out1: - kdf_dealloc(hash); + kdf_dealloc(sdesc); return ret; } diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 1897cbf6fc..5a5016ef43 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -224,7 +224,7 @@ static void dump_common_audit_data(struct audit_buffer *ab, case LSM_AUDIT_DATA_NONE: return; case LSM_AUDIT_DATA_IPC: - audit_log_format(ab, " ipc_key=%d ", a->u.ipc_id); + audit_log_format(ab, " key=%d ", a->u.ipc_id); break; case LSM_AUDIT_DATA_CAP: audit_log_format(ab, " capability=%d ", a->u.cap); diff --git a/security/security.c b/security/security.c index 22261d79f3..67264cb08f 100644 --- a/security/security.c +++ b/security/security.c @@ -706,7 +706,7 @@ static int lsm_superblock_alloc(struct super_block *sb) #define LSM_RET_DEFAULT(NAME) (NAME##_default) #define DECLARE_LSM_RET_DEFAULT_void(DEFAULT, NAME) #define DECLARE_LSM_RET_DEFAULT_int(DEFAULT, NAME) \ - static const int __maybe_unused LSM_RET_DEFAULT(NAME) = (DEFAULT); + static const int LSM_RET_DEFAULT(NAME) = (DEFAULT); #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ DECLARE_LSM_RET_DEFAULT_##RET(DEFAULT, NAME) @@ -994,6 +994,14 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb, } EXPORT_SYMBOL(security_sb_clone_mnt_opts); +int security_add_mnt_opt(const char *option, const char *val, int len, + void **mnt_opts) +{ + return call_int_hook(sb_add_mnt_opt, -EINVAL, + option, val, len, mnt_opts); +} +EXPORT_SYMBOL(security_add_mnt_opt); + int security_move_mount(const struct path *from_path, const struct path *to_path) { return call_int_hook(move_mount, 0, from_path, to_path); @@ -1044,23 +1052,11 @@ void security_inode_free(struct inode *inode) } int security_dentry_init_security(struct dentry *dentry, int mode, - const struct qstr *name, - const char **xattr_name, void **ctx, - u32 *ctxlen) + const struct qstr *name, void **ctx, + u32 *ctxlen) { - struct security_hook_list *hp; - int rc; - - /* - * Only one module will provide a security context. - */ - hlist_for_each_entry(hp, &security_hook_heads.dentry_init_security, list) { - rc = hp->hook.dentry_init_security(dentry, mode, name, - xattr_name, ctx, ctxlen); - if (rc != LSM_RET_DEFAULT(dentry_init_security)) - return rc; - } - return LSM_RET_DEFAULT(dentry_init_security); + return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode, + name, ctx, ctxlen); } EXPORT_SYMBOL(security_dentry_init_security); @@ -1811,12 +1807,12 @@ int security_task_getsid(struct task_struct *p) return call_int_hook(task_getsid, 0, p); } -void security_current_getsecid_subj(u32 *secid) +void security_task_getsecid_subj(struct task_struct *p, u32 *secid) { *secid = 0; - call_void_hook(current_getsecid_subj, secid); + call_void_hook(task_getsecid_subj, p, secid); } -EXPORT_SYMBOL(security_current_getsecid_subj); +EXPORT_SYMBOL(security_task_getsecid_subj); void security_task_getsecid_obj(struct task_struct *p, u32 *secid) { @@ -2370,9 +2366,9 @@ int security_tun_dev_open(void *security) } EXPORT_SYMBOL(security_tun_dev_open); -int security_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb) +int security_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb) { - return call_int_hook(sctp_assoc_request, 0, asoc, skb); + return call_int_hook(sctp_assoc_request, 0, ep, skb); } EXPORT_SYMBOL(security_sctp_assoc_request); @@ -2384,10 +2380,10 @@ int security_sctp_bind_connect(struct sock *sk, int optname, } EXPORT_SYMBOL(security_sctp_bind_connect); -void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, +void security_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk, struct sock *newsk) { - call_void_hook(sctp_sk_clone, asoc, sk, newsk); + call_void_hook(sctp_sk_clone, ep, sk, newsk); } EXPORT_SYMBOL(security_sctp_sk_clone); @@ -2629,15 +2625,3 @@ int security_perf_event_write(struct perf_event *event) return call_int_hook(perf_event_write, 0, event); } #endif /* CONFIG_PERF_EVENTS */ - -#ifdef CONFIG_IO_URING -int security_uring_override_creds(const struct cred *new) -{ - return call_int_hook(uring_override_creds, 0, new); -} - -int security_uring_sqpoll(void) -{ - return call_int_hook(uring_sqpoll, 0); -} -#endif /* CONFIG_IO_URING */ diff --git a/security/selinux/avc.c b/security/selinux/avc.c index abcd9740d1..97f4c944a2 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -547,7 +547,6 @@ static inline struct avc_node *avc_search_node(struct selinux_avc *avc, /** * avc_lookup - Look up an AVC entry. - * @avc: the access vector cache * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class @@ -598,7 +597,6 @@ static int avc_latest_notif_update(struct selinux_avc *avc, /** * avc_insert - Insert an AVC entry. - * @avc: the access vector cache * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class @@ -827,14 +825,9 @@ int __init avc_add_callback(int (*callback)(u32 event), u32 events) /** * avc_update_node - Update an AVC entry - * @avc: the access vector cache * @event : Updating event * @perms : Permission mask bits - * @driver: xperm driver information - * @xperm: xperm permissions - * @ssid: AVC entry source sid - * @tsid: AVC entry target sid - * @tclass : AVC entry target object class + * @ssid,@tsid,@tclass : identifier of an AVC entry * @seqno : sequence number when decision was made * @xpd: extended_perms_decision to be added to the node * @flags: the AVC_* flags, e.g. AVC_EXTENDED_PERMS, or 0. @@ -935,7 +928,6 @@ static int avc_update_node(struct selinux_avc *avc, /** * avc_flush - Flush the cache - * @avc: the access vector cache */ static void avc_flush(struct selinux_avc *avc) { @@ -964,7 +956,6 @@ static void avc_flush(struct selinux_avc *avc) /** * avc_ss_reset - Flush the cache and revalidate migrated permissions. - * @avc: the access vector cache * @seqno: policy sequence number */ int avc_ss_reset(struct selinux_avc *avc, u32 seqno) @@ -1114,7 +1105,6 @@ int avc_has_extended_perms(struct selinux_state *state, /** * avc_has_perm_noaudit - Check permissions but perform no auditing. - * @state: SELinux state * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class @@ -1166,7 +1156,6 @@ inline int avc_has_perm_noaudit(struct selinux_state *state, /** * avc_has_perm - Check permissions and perform any appropriate auditing. - * @state: SELinux state * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5b6895e4fc..baa12d1007 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -229,6 +229,19 @@ static inline u32 cred_sid(const struct cred *cred) return tsec->sid; } +/* + * get the subjective security ID of a task + */ +static inline u32 task_sid_subj(const struct task_struct *task) +{ + u32 sid; + + rcu_read_lock(); + sid = cred_sid(rcu_dereference(task->cred)); + rcu_read_unlock(); + return sid; +} + /* * get the objective security ID of a task */ @@ -729,8 +742,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, !strcmp(sb->s_type->name, "tracefs") || !strcmp(sb->s_type->name, "binder") || !strcmp(sb->s_type->name, "bpf") || - !strcmp(sb->s_type->name, "pstore") || - !strcmp(sb->s_type->name, "securityfs")) + !strcmp(sb->s_type->name, "pstore")) sbsec->flags |= SE_SBGENFS; if (!strcmp(sb->s_type->name, "sysfs") || @@ -977,14 +989,14 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts) struct selinux_mnt_opts *opts = *mnt_opts; bool is_alloc_opts = false; - if (token == Opt_seclabel) - /* eaten and completely ignored */ + if (token == Opt_seclabel) /* eaten and completely ignored */ return 0; + if (!s) return -ENOMEM; if (!opts) { - opts = kzalloc(sizeof(*opts), GFP_KERNEL); + opts = kzalloc(sizeof(struct selinux_mnt_opts), GFP_KERNEL); if (!opts) return -ENOMEM; *mnt_opts = opts; @@ -994,29 +1006,27 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts) switch (token) { case Opt_context: if (opts->context || opts->defcontext) - goto err; + goto Einval; opts->context = s; break; case Opt_fscontext: if (opts->fscontext) - goto err; + goto Einval; opts->fscontext = s; break; case Opt_rootcontext: if (opts->rootcontext) - goto err; + goto Einval; opts->rootcontext = s; break; case Opt_defcontext: if (opts->context || opts->defcontext) - goto err; + goto Einval; opts->defcontext = s; break; } - return 0; - -err: +Einval: if (is_alloc_opts) { kfree(opts); *mnt_opts = NULL; @@ -1025,6 +1035,44 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts) return -EINVAL; } +static int selinux_add_mnt_opt(const char *option, const char *val, int len, + void **mnt_opts) +{ + int token = Opt_error; + int rc, i; + + for (i = 0; i < ARRAY_SIZE(tokens); i++) { + if (strcmp(option, tokens[i].name) == 0) { + token = tokens[i].opt; + break; + } + } + + if (token == Opt_error) + return -EINVAL; + + if (token != Opt_seclabel) { + val = kmemdup_nul(val, len, GFP_KERNEL); + if (!val) { + rc = -ENOMEM; + goto free_opt; + } + } + rc = selinux_add_opt(token, val, mnt_opts); + if (unlikely(rc)) { + kfree(val); + goto free_opt; + } + return rc; + +free_opt: + if (*mnt_opts) { + selinux_free_mnt_opts(*mnt_opts); + *mnt_opts = NULL; + } + return rc; +} + static int show_sid(struct seq_file *m, u32 sid) { char *context = NULL; @@ -2891,8 +2939,7 @@ static void selinux_inode_free_security(struct inode *inode) } static int selinux_dentry_init_security(struct dentry *dentry, int mode, - const struct qstr *name, - const char **xattr_name, void **ctx, + const struct qstr *name, void **ctx, u32 *ctxlen) { u32 newsid; @@ -2905,9 +2952,6 @@ static int selinux_dentry_init_security(struct dentry *dentry, int mode, if (rc) return rc; - if (xattr_name) - *xattr_name = XATTR_NAME_SELINUX; - return security_sid_to_context(&selinux_state, newsid, (char **)ctx, ctxlen); } @@ -4169,9 +4213,9 @@ static int selinux_task_getsid(struct task_struct *p) PROCESS__GETSESSION, NULL); } -static void selinux_current_getsecid_subj(u32 *secid) +static void selinux_task_getsecid_subj(struct task_struct *p, u32 *secid) { - *secid = current_sid(); + *secid = task_sid_subj(p); } static void selinux_task_getsecid_obj(struct task_struct *p, u32 *secid) @@ -5303,10 +5347,10 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent) * connect(2), sctp_connectx(3) or sctp_sendmsg(3) (with no association * already present). */ -static int selinux_sctp_assoc_request(struct sctp_association *asoc, +static int selinux_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb) { - struct sk_security_struct *sksec = asoc->base.sk->sk_security; + struct sk_security_struct *sksec = ep->base.sk->sk_security; struct common_audit_data ad; struct lsm_network_audit net = {0,}; u8 peerlbl_active; @@ -5323,7 +5367,7 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, /* This will return peer_sid = SECSID_NULL if there are * no peer labels, see security_net_peersid_resolve(). */ - err = selinux_skb_peerlbl_sid(skb, asoc->base.sk->sk_family, + err = selinux_skb_peerlbl_sid(skb, ep->base.sk->sk_family, &peer_sid); if (err) return err; @@ -5347,7 +5391,7 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, */ ad.type = LSM_AUDIT_DATA_NET; ad.u.net = &net; - ad.u.net->sk = asoc->base.sk; + ad.u.net->sk = ep->base.sk; err = avc_has_perm(&selinux_state, sksec->peer_sid, peer_sid, sksec->sclass, SCTP_SOCKET__ASSOCIATION, &ad); @@ -5356,7 +5400,7 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, } /* Compute the MLS component for the connection and store - * the information in asoc. This will be used by SCTP TCP type + * the information in ep. This will be used by SCTP TCP type * sockets and peeled off connections as they cause a new * socket to be generated. selinux_sctp_sk_clone() will then * plug this into the new socket. @@ -5365,11 +5409,11 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, if (err) return err; - asoc->secid = conn_sid; - asoc->peer_secid = peer_sid; + ep->secid = conn_sid; + ep->peer_secid = peer_sid; /* Set any NetLabel labels including CIPSO/CALIPSO options. */ - return selinux_netlbl_sctp_assoc_request(asoc, skb); + return selinux_netlbl_sctp_assoc_request(ep, skb); } /* Check if sctp IPv4/IPv6 addresses are valid for binding or connecting @@ -5454,7 +5498,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname, } /* Called whenever a new socket is created by accept(2) or sctp_peeloff(3). */ -static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, +static void selinux_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk, struct sock *newsk) { struct sk_security_struct *sksec = sk->sk_security; @@ -5466,8 +5510,8 @@ static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk if (!selinux_policycap_extsockclass()) return selinux_sk_clone_security(sk, newsk); - newsksec->sid = asoc->secid; - newsksec->peer_sid = asoc->peer_secid; + newsksec->sid = ep->secid; + newsksec->peer_sid = ep->peer_secid; newsksec->sclass = sksec->sclass; selinux_netlbl_sctp_sk_clone(sk, newsk); } @@ -5635,41 +5679,40 @@ static int selinux_tun_dev_open(void *security) #ifdef CONFIG_NETFILTER -static unsigned int selinux_ip_forward(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) +static unsigned int selinux_ip_forward(struct sk_buff *skb, + const struct net_device *indev, + u16 family) { - int ifindex; - u16 family; + int err; char *addrp; u32 peer_sid; struct common_audit_data ad; struct lsm_network_audit net = {0,}; - int secmark_active, peerlbl_active; + u8 secmark_active; + u8 netlbl_active; + u8 peerlbl_active; if (!selinux_policycap_netpeer()) return NF_ACCEPT; secmark_active = selinux_secmark_enabled(); + netlbl_active = netlbl_enabled(); peerlbl_active = selinux_peerlbl_enabled(); if (!secmark_active && !peerlbl_active) return NF_ACCEPT; - family = state->pf; if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0) return NF_DROP; - ifindex = state->in->ifindex; ad.type = LSM_AUDIT_DATA_NET; ad.u.net = &net; - ad.u.net->netif = ifindex; + ad.u.net->netif = indev->ifindex; ad.u.net->family = family; if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0) return NF_DROP; if (peerlbl_active) { - int err; - - err = selinux_inet_sys_rcv_skb(state->net, ifindex, + err = selinux_inet_sys_rcv_skb(dev_net(indev), indev->ifindex, addrp, family, peer_sid, &ad); if (err) { selinux_netlbl_err(skb, family, err, 1); @@ -5683,7 +5726,7 @@ static unsigned int selinux_ip_forward(void *priv, struct sk_buff *skb, SECCLASS_PACKET, PACKET__FORWARD_IN, &ad)) return NF_DROP; - if (netlbl_enabled()) + if (netlbl_active) /* we do this in the FORWARD path and not the POST_ROUTING * path because we want to make sure we apply the necessary * labeling before IPsec is applied so we can leverage AH @@ -5694,8 +5737,24 @@ static unsigned int selinux_ip_forward(void *priv, struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int selinux_ip_output(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) +static unsigned int selinux_ipv4_forward(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return selinux_ip_forward(skb, state->in, PF_INET); +} + +#if IS_ENABLED(CONFIG_IPV6) +static unsigned int selinux_ipv6_forward(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return selinux_ip_forward(skb, state->in, PF_INET6); +} +#endif /* IPV6 */ + +static unsigned int selinux_ip_output(struct sk_buff *skb, + u16 family) { struct sock *sk; u32 sid; @@ -5730,32 +5789,48 @@ static unsigned int selinux_ip_output(void *priv, struct sk_buff *skb, sid = sksec->sid; } else sid = SECINITSID_KERNEL; - if (selinux_netlbl_skbuff_setsid(skb, state->pf, sid) != 0) + if (selinux_netlbl_skbuff_setsid(skb, family, sid) != 0) return NF_DROP; return NF_ACCEPT; } - -static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, +static unsigned int selinux_ipv4_output(void *priv, + struct sk_buff *skb, const struct nf_hook_state *state) { - struct sock *sk; + return selinux_ip_output(skb, PF_INET); +} + +#if IS_ENABLED(CONFIG_IPV6) +static unsigned int selinux_ipv6_output(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return selinux_ip_output(skb, PF_INET6); +} +#endif /* IPV6 */ + +static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, + int ifindex, + u16 family) +{ + struct sock *sk = skb_to_full_sk(skb); struct sk_security_struct *sksec; struct common_audit_data ad; struct lsm_network_audit net = {0,}; + char *addrp; u8 proto = 0; - sk = skb_to_full_sk(skb); if (sk == NULL) return NF_ACCEPT; sksec = sk->sk_security; ad.type = LSM_AUDIT_DATA_NET; ad.u.net = &net; - ad.u.net->netif = state->out->ifindex; - ad.u.net->family = state->pf; - if (selinux_parse_skb(skb, &ad, NULL, 0, &proto)) + ad.u.net->netif = ifindex; + ad.u.net->family = family; + if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto)) return NF_DROP; if (selinux_secmark_enabled()) @@ -5770,26 +5845,26 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int selinux_ip_postroute(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) +static unsigned int selinux_ip_postroute(struct sk_buff *skb, + const struct net_device *outdev, + u16 family) { - u16 family; u32 secmark_perm; u32 peer_sid; - int ifindex; + int ifindex = outdev->ifindex; struct sock *sk; struct common_audit_data ad; struct lsm_network_audit net = {0,}; char *addrp; - int secmark_active, peerlbl_active; + u8 secmark_active; + u8 peerlbl_active; /* If any sort of compatibility mode is enabled then handoff processing * to the selinux_ip_postroute_compat() function to deal with the * special handling. We do this in an attempt to keep this function * as fast and as clean as possible. */ if (!selinux_policycap_netpeer()) - return selinux_ip_postroute_compat(skb, state); + return selinux_ip_postroute_compat(skb, ifindex, family); secmark_active = selinux_secmark_enabled(); peerlbl_active = selinux_peerlbl_enabled(); @@ -5815,7 +5890,6 @@ static unsigned int selinux_ip_postroute(void *priv, return NF_ACCEPT; #endif - family = state->pf; if (sk == NULL) { /* Without an associated socket the packet is either coming * from the kernel or it is being forwarded; check the packet @@ -5876,7 +5950,6 @@ static unsigned int selinux_ip_postroute(void *priv, secmark_perm = PACKET__SEND; } - ifindex = state->out->ifindex; ad.type = LSM_AUDIT_DATA_NET; ad.u.net = &net; ad.u.net->netif = ifindex; @@ -5894,7 +5967,7 @@ static unsigned int selinux_ip_postroute(void *priv, u32 if_sid; u32 node_sid; - if (sel_netif_sid(state->net, ifindex, &if_sid)) + if (sel_netif_sid(dev_net(outdev), ifindex, &if_sid)) return NF_DROP; if (avc_has_perm(&selinux_state, peer_sid, if_sid, @@ -5911,6 +5984,23 @@ static unsigned int selinux_ip_postroute(void *priv, return NF_ACCEPT; } + +static unsigned int selinux_ipv4_postroute(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return selinux_ip_postroute(skb, state->out, PF_INET); +} + +#if IS_ENABLED(CONFIG_IPV6) +static unsigned int selinux_ipv6_postroute(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return selinux_ip_postroute(skb, state->out, PF_INET6); +} +#endif /* IPV6 */ + #endif /* CONFIG_NETFILTER */ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) @@ -6914,6 +7004,34 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) } #endif +static int selinux_lockdown(enum lockdown_reason what) +{ + struct common_audit_data ad; + u32 sid = current_sid(); + int invalid_reason = (what <= LOCKDOWN_NONE) || + (what == LOCKDOWN_INTEGRITY_MAX) || + (what >= LOCKDOWN_CONFIDENTIALITY_MAX); + + if (WARN(invalid_reason, "Invalid lockdown reason")) { + audit_log(audit_context(), + GFP_ATOMIC, AUDIT_SELINUX_ERR, + "lockdown_reason=invalid"); + return -EINVAL; + } + + ad.type = LSM_AUDIT_DATA_LOCKDOWN; + ad.u.reason = what; + + if (what <= LOCKDOWN_INTEGRITY_MAX) + return avc_has_perm(&selinux_state, + sid, sid, SECCLASS_LOCKDOWN, + LOCKDOWN__INTEGRITY, &ad); + else + return avc_has_perm(&selinux_state, + sid, sid, SECCLASS_LOCKDOWN, + LOCKDOWN__CONFIDENTIALITY, &ad); +} + struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_security_struct), .lbs_file = sizeof(struct file_security_struct), @@ -6984,35 +7102,6 @@ static int selinux_perf_event_write(struct perf_event *event) } #endif -#ifdef CONFIG_IO_URING -/** - * selinux_uring_override_creds - check the requested cred override - * @new: the target creds - * - * Check to see if the current task is allowed to override it's credentials - * to service an io_uring operation. - */ -static int selinux_uring_override_creds(const struct cred *new) -{ - return avc_has_perm(&selinux_state, current_sid(), cred_sid(new), - SECCLASS_IO_URING, IO_URING__OVERRIDE_CREDS, NULL); -} - -/** - * selinux_uring_sqpoll - check if a io_uring polling thread can be created - * - * Check to see if the current task is allowed to create a new io_uring - * kernel polling thread. - */ -static int selinux_uring_sqpoll(void) -{ - int sid = current_sid(); - - return avc_has_perm(&selinux_state, sid, sid, - SECCLASS_IO_URING, IO_URING__SQPOLL, NULL); -} -#endif /* CONFIG_IO_URING */ - /* * IMPORTANT NOTE: When adding new hooks, please be careful to keep this order: * 1. any hooks that don't belong to (2.) or (3.) below, @@ -7123,7 +7212,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(task_setpgid, selinux_task_setpgid), LSM_HOOK_INIT(task_getpgid, selinux_task_getpgid), LSM_HOOK_INIT(task_getsid, selinux_task_getsid), - LSM_HOOK_INIT(current_getsecid_subj, selinux_current_getsecid_subj), + LSM_HOOK_INIT(task_getsecid_subj, selinux_task_getsecid_subj), LSM_HOOK_INIT(task_getsecid_obj, selinux_task_getsecid_obj), LSM_HOOK_INIT(task_setnice, selinux_task_setnice), LSM_HOOK_INIT(task_setioprio, selinux_task_setioprio), @@ -7251,10 +7340,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write), #endif -#ifdef CONFIG_IO_URING - LSM_HOOK_INIT(uring_override_creds, selinux_uring_override_creds), - LSM_HOOK_INIT(uring_sqpoll, selinux_uring_sqpoll), -#endif + LSM_HOOK_INIT(locked_down, selinux_lockdown), /* * PUT "CLONING" (ACCESSING + ALLOCATING) HOOKS HERE @@ -7262,6 +7348,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(fs_context_dup, selinux_fs_context_dup), LSM_HOOK_INIT(fs_context_parse_param, selinux_fs_context_parse_param), LSM_HOOK_INIT(sb_eat_lsm_opts, selinux_sb_eat_lsm_opts), + LSM_HOOK_INIT(sb_add_mnt_opt, selinux_add_mnt_opt), #ifdef CONFIG_SECURITY_NETWORK_XFRM LSM_HOOK_INIT(xfrm_policy_clone_security, selinux_xfrm_policy_clone), #endif @@ -7374,38 +7461,38 @@ DEFINE_LSM(selinux) = { static const struct nf_hook_ops selinux_nf_ops[] = { { - .hook = selinux_ip_postroute, + .hook = selinux_ipv4_postroute, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_SELINUX_LAST, }, { - .hook = selinux_ip_forward, + .hook = selinux_ipv4_forward, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = NF_IP_PRI_SELINUX_FIRST, }, { - .hook = selinux_ip_output, + .hook = selinux_ipv4_output, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_SELINUX_FIRST, }, #if IS_ENABLED(CONFIG_IPV6) { - .hook = selinux_ip_postroute, + .hook = selinux_ipv6_postroute, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_SELINUX_LAST, }, { - .hook = selinux_ip_forward, + .hook = selinux_ipv6_forward, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = NF_IP6_PRI_SELINUX_FIRST, }, { - .hook = selinux_ip_output, + .hook = selinux_ipv6_output, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_SELINUX_FIRST, diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 35aac62a66..084757ff43 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -250,10 +250,10 @@ struct security_class_mapping secclass_map[] = { { COMMON_SOCK_PERMS, NULL } }, { "perf_event", { "open", "cpu", "kernel", "tracepoint", "read", "write", NULL } }, + { "lockdown", + { "integrity", "confidentiality", NULL } }, { "anon_inode", { COMMON_FILE_PERMS, NULL } }, - { "io_uring", - { "override_creds", "sqpoll", NULL } }, { NULL } }; diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index 4d0456d3d4..0c58f62dc6 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -39,7 +39,7 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, u16 family, u32 sid); -int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc, +int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb); int selinux_netlbl_inet_conn_request(struct request_sock *req, u16 family); void selinux_netlbl_inet_csk_clone(struct sock *sk, u16 family); @@ -98,7 +98,7 @@ static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, return 0; } -static inline int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc, +static inline int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb) { return 0; diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index 1321f15799..abaab76838 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -29,7 +29,6 @@ /** * selinux_netlbl_sidlookup_cached - Cache a SID lookup * @skb: the packet - * @family: the packet's address family * @secattr: the NetLabel security attributes * @sid: the SID * @@ -129,7 +128,6 @@ void selinux_netlbl_cache_invalidate(void) /** * selinux_netlbl_err - Handle a NetLabel packet error * @skb: the packet - * @family: the packet's address family * @error: the error code * @gateway: true if host is acting as a gateway, false otherwise * @@ -162,6 +160,7 @@ void selinux_netlbl_sk_security_free(struct sk_security_struct *sksec) /** * selinux_netlbl_sk_security_reset - Reset the NetLabel fields * @sksec: the sk_security_struct + * @family: the socket family * * Description: * Called when the NetLabel state of a sk_security_struct needs to be reset. @@ -261,30 +260,30 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, /** * selinux_netlbl_sctp_assoc_request - Label an incoming sctp association. - * @asoc: incoming association. + * @ep: incoming association endpoint. * @skb: the packet. * * Description: - * A new incoming connection is represented by @asoc, ...... + * A new incoming connection is represented by @ep, ...... * Returns zero on success, negative values on failure. * */ -int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc, +int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb) { int rc; struct netlbl_lsm_secattr secattr; - struct sk_security_struct *sksec = asoc->base.sk->sk_security; + struct sk_security_struct *sksec = ep->base.sk->sk_security; struct sockaddr_in addr4; struct sockaddr_in6 addr6; - if (asoc->base.sk->sk_family != PF_INET && - asoc->base.sk->sk_family != PF_INET6) + if (ep->base.sk->sk_family != PF_INET && + ep->base.sk->sk_family != PF_INET6) return 0; netlbl_secattr_init(&secattr); rc = security_netlbl_sid_to_secattr(&selinux_state, - asoc->secid, &secattr); + ep->secid, &secattr); if (rc != 0) goto assoc_request_return; @@ -294,11 +293,11 @@ int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc, if (ip_hdr(skb)->version == 4) { addr4.sin_family = AF_INET; addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; - rc = netlbl_conn_setattr(asoc->base.sk, (void *)&addr4, &secattr); + rc = netlbl_conn_setattr(ep->base.sk, (void *)&addr4, &secattr); } else if (IS_ENABLED(CONFIG_IPV6) && ip_hdr(skb)->version == 6) { addr6.sin6_family = AF_INET6; addr6.sin6_addr = ipv6_hdr(skb)->saddr; - rc = netlbl_conn_setattr(asoc->base.sk, (void *)&addr6, &secattr); + rc = netlbl_conn_setattr(ep->base.sk, (void *)&addr6, &secattr); } else { rc = -EAFNOSUPPORT; } @@ -314,7 +313,6 @@ int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc, /** * selinux_netlbl_inet_conn_request - Label an incoming stream connection * @req: incoming connection request socket - * @family: the request socket's address family * * Description: * A new incoming connection request is represented by @req, we need to label @@ -345,7 +343,6 @@ int selinux_netlbl_inet_conn_request(struct request_sock *req, u16 family) /** * selinux_netlbl_inet_csk_clone - Initialize the newly created sock * @sk: the new sock - * @family: the sock's address family * * Description: * A new connection has been established using @sk, we've already labeled the @@ -381,7 +378,7 @@ void selinux_netlbl_sctp_sk_clone(struct sock *sk, struct sock *newsk) /** * selinux_netlbl_socket_post_create - Label a socket using NetLabel - * @sk: the sock to label + * @sock: the socket to label * @family: protocol family * * Description: diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index 0ae4e4e57a..a91fb0ed00 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -8,7 +8,6 @@ #include #include #include "hashtab.h" -#include "security.h" static struct kmem_cache *hashtab_node_cachep __ro_after_init; diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 3f5fd12434..d338962fb0 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -553,7 +553,6 @@ int mls_compute_sid(struct policydb *p, #ifdef CONFIG_NETLABEL /** * mls_export_netlbl_lvl - Export the MLS sensitivity levels to NetLabel - * @p: the policy * @context: the security context * @secattr: the NetLabel security attributes * @@ -575,7 +574,6 @@ void mls_export_netlbl_lvl(struct policydb *p, /** * mls_import_netlbl_lvl - Import the NetLabel MLS sensitivity levels - * @p: the policy * @context: the security context * @secattr: the NetLabel security attributes * @@ -597,7 +595,6 @@ void mls_import_netlbl_lvl(struct policydb *p, /** * mls_export_netlbl_cat - Export the MLS categories to NetLabel - * @p: the policy * @context: the security context * @secattr: the NetLabel security attributes * @@ -625,7 +622,6 @@ int mls_export_netlbl_cat(struct policydb *p, /** * mls_import_netlbl_cat - Import the MLS categories from NetLabel - * @p: the policy * @context: the security context * @secattr: the NetLabel security attributes * diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 8e92af7dd2..c4931bf6f9 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1102,7 +1102,7 @@ void security_compute_xperms_decision(struct selinux_state *state, * @state: SELinux state * @ssid: source security identifier * @tsid: target security identifier - * @orig_tclass: target security class + * @tclass: target security class * @avd: access vector decisions * @xperms: extended permissions * @@ -1626,7 +1626,6 @@ int security_context_str_to_sid(struct selinux_state *state, * @scontext_len: length in bytes * @sid: security identifier, SID * @def_sid: default SID to assign on error - * @gfp_flags: the allocator get-free-page (GFP) flags * * Obtains a SID associated with the security context that * has the string representation specified by @scontext. @@ -1920,7 +1919,6 @@ static int security_compute_sid(struct selinux_state *state, * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class - * @qstr: object name * @out_sid: security identifier for new subject/object * * Compute a SID to use for labeling a new subject or object in the @@ -1949,7 +1947,6 @@ int security_transition_sid_user(struct selinux_state *state, /** * security_member_sid - Compute the SID for member selection. - * @state: SELinux state * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class @@ -2276,7 +2273,6 @@ void selinux_policy_commit(struct selinux_state *state, * @state: SELinux state * @data: binary policy data * @len: length of data in bytes - * @load_state: policy load state * * Load a new set of security policy configuration data, * validate it and convert the SID table as necessary. @@ -2529,7 +2525,7 @@ int security_ib_pkey_sid(struct selinux_state *state, * security_ib_endport_sid - Obtain the SID for a subnet management interface. * @state: SELinux state * @dev_name: device name - * @port_num: port number + * @port: port number * @out_sid: security identifier */ int security_ib_endport_sid(struct selinux_state *state, @@ -2860,10 +2856,9 @@ int security_get_user_sids(struct selinux_state *state, /** * __security_genfs_sid - Helper to obtain a SID for a file in a filesystem - * @policy: policy * @fstype: filesystem type * @path: path from root of mount - * @orig_sclass: file security class + * @sclass: file security class * @sid: SID for path * * Obtain a SID to use for a file in a filesystem that @@ -2920,7 +2915,7 @@ static inline int __security_genfs_sid(struct selinux_policy *policy, * @state: SELinux state * @fstype: filesystem type * @path: path from root of mount - * @orig_sclass: file security class + * @sclass: file security class * @sid: SID for path * * Acquire policy_rwlock before calling __security_genfs_sid() and release @@ -3302,7 +3297,6 @@ int security_sid_mls_copy(struct selinux_state *state, * @nlbl_sid: NetLabel SID * @nlbl_type: NetLabel labeling protocol type * @xfrm_sid: XFRM SID - * @peer_sid: network peer sid * * Description: * Compare the @nlbl_sid and @xfrm_sid values and if the two SIDs can be diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index 293ec048af..656d50b09f 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -570,7 +570,7 @@ void sidtab_sid2str_put(struct sidtab *s, struct sidtab_entry *entry, goto out_unlock; } - cache = kmalloc(struct_size(cache, str, str_len), GFP_ATOMIC); + cache = kmalloc(sizeof(struct sidtab_str_cache) + str_len, GFP_ATOMIC); if (!cache) goto out_unlock; diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 9069731789..be83e5ce44 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -89,7 +89,7 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, if (str_len >= PAGE_SIZE) return -ENOMEM; - ctx = kmalloc(struct_size(ctx, ctx_str, str_len + 1), gfp); + ctx = kmalloc(sizeof(*ctx) + str_len + 1, gfp); if (!ctx) return -ENOMEM; @@ -360,7 +360,7 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, if (rc) return rc; - ctx = kmalloc(struct_size(ctx, ctx_str, str_len), GFP_ATOMIC); + ctx = kmalloc(sizeof(*ctx) + str_len, GFP_ATOMIC); if (!ctx) { rc = -ENOMEM; goto out; diff --git a/security/smack/smack.h b/security/smack/smack.h index fc837dcebf..99c3422596 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -389,6 +389,22 @@ static inline struct smack_known *smk_of_task(const struct task_smack *tsp) return tsp->smk_task; } +static inline struct smack_known *smk_of_task_struct_subj( + const struct task_struct *t) +{ + struct smack_known *skp; + const struct cred *cred; + + rcu_read_lock(); + + cred = rcu_dereference(t->cred); + skp = smk_of_task(smack_cred(cred)); + + rcu_read_unlock(); + + return skp; +} + static inline struct smack_known *smk_of_task_struct_obj( const struct task_struct *t) { diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 14b279cc75..21a0e7c3b8 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -51,10 +51,8 @@ #define SMK_RECEIVING 1 #define SMK_SENDING 2 -#ifdef SMACK_IPV6_PORT_LABELING static DEFINE_MUTEX(smack_ipv6_lock); static LIST_HEAD(smk_ipv6_port_list); -#endif struct kmem_cache *smack_rule_cache; int smack_enabled __initdata; @@ -391,7 +389,7 @@ static int smk_copy_relabel(struct list_head *nhead, struct list_head *ohead, /** * smk_ptrace_mode - helper function for converting PTRACE_MODE_* into MAY_* - * @mode: input mode in form of PTRACE_MODE_* + * @mode - input mode in form of PTRACE_MODE_* * * Returns a converted MAY_* mode usable by smack rules */ @@ -1215,7 +1213,6 @@ static int smack_inode_getattr(const struct path *path) /** * smack_inode_setxattr - Smack check for setting xattrs - * @mnt_userns: active user namespace * @dentry: the object * @name: name of the attribute * @value: value of the attribute @@ -1342,7 +1339,6 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name) /** * smack_inode_removexattr - Smack check on removexattr - * @mnt_userns: active user namespace * @dentry: the object * @name: name of the attribute * @@ -1402,7 +1398,6 @@ static int smack_inode_removexattr(struct user_namespace *mnt_userns, /** * smack_inode_getsecurity - get smack xattrs - * @mnt_userns: active user namespace * @inode: the object * @name: attribute name * @buffer: where to put the result @@ -1624,14 +1619,13 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, } /** - * smack_mmap_file - Check permissions for a mmap operation. - * @file: contains the file structure for file to map (may be NULL). - * @reqprot: contains the protection requested by the application. - * @prot: contains the protection that will be applied by the kernel. - * @flags: contains the operational flags. - * - * The @file may be NULL, e.g. if mapping anonymous memory. - * + * smack_mmap_file : + * Check permissions for a mmap operation. The @file may be NULL, e.g. + * if mapping anonymous memory. + * @file contains the file structure for file to map (may be NULL). + * @reqprot contains the protection requested by the application. + * @prot contains the protection that will be applied by the kernel. + * @flags contains the operational flags. * Return 0 if permission is granted. */ static int smack_mmap_file(struct file *file, @@ -2067,14 +2061,15 @@ static int smack_task_getsid(struct task_struct *p) } /** - * smack_current_getsecid_subj - get the subjective secid of the current task + * smack_task_getsecid_subj - get the subjective secid of the task + * @p: the task * @secid: where to put the result * * Sets the secid to contain a u32 version of the task's subjective smack label. */ -static void smack_current_getsecid_subj(u32 *secid) +static void smack_task_getsecid_subj(struct task_struct *p, u32 *secid) { - struct smack_known *skp = smk_of_current(); + struct smack_known *skp = smk_of_task_struct_subj(p); *secid = skp->smk_secid; } @@ -2608,6 +2603,7 @@ static void smk_ipv6_port_label(struct socket *sock, struct sockaddr *address) mutex_unlock(&smack_ipv6_lock); return; } +#endif /** * smk_ipv6_port_check - check Smack port access @@ -2670,7 +2666,6 @@ static int smk_ipv6_port_check(struct sock *sk, struct sockaddr_in6 *address, return smk_ipv6_check(skp, object, address, act); } -#endif /** * smack_inode_setsecurity - set smack xattrs @@ -2857,9 +2852,8 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap, rc = smk_ipv6_check(ssp->smk_out, rsp, sip, SMK_CONNECTING); } -#ifdef SMACK_IPV6_PORT_LABELING - rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING); -#endif + if (__is_defined(SMACK_IPV6_PORT_LABELING)) + rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING); return rc; } @@ -3057,7 +3051,7 @@ static int smack_sem_associate(struct kern_ipc_perm *isp, int semflg) } /** - * smack_sem_semctl - Smack access check for sem + * smack_sem_shmctl - Smack access check for sem * @isp: the object * @cmd: what it wants to do * @@ -3203,7 +3197,7 @@ static int smack_msg_queue_msgsnd(struct kern_ipc_perm *isp, struct msg_msg *msg } /** - * smack_msg_queue_msgrcv - Smack access check for msg_queue + * smack_msg_queue_msgsnd - Smack access check for msg_queue * @isp: the object * @msg: unused * @target: unused @@ -3212,10 +3206,8 @@ static int smack_msg_queue_msgsnd(struct kern_ipc_perm *isp, struct msg_msg *msg * * Returns 0 if current has read and write access, error code otherwise */ -static int smack_msg_queue_msgrcv(struct kern_ipc_perm *isp, - struct msg_msg *msg, - struct task_struct *target, long type, - int mode) +static int smack_msg_queue_msgrcv(struct kern_ipc_perm *isp, struct msg_msg *msg, + struct task_struct *target, long type, int mode) { return smk_curacc_msq(isp, MAY_READWRITE); } @@ -4642,7 +4634,7 @@ static int smack_inode_copy_up(struct dentry *dentry, struct cred **new) /* * Get label from overlay inode and set it in create_sid */ - isp = smack_inode(d_inode(dentry)); + isp = smack_inode(d_inode(dentry->d_parent)); skp = isp->smk_inode; tsp->smk_task = skp; *new = new_creds; @@ -4699,48 +4691,6 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode, return 0; } -#ifdef CONFIG_IO_URING -/** - * smack_uring_override_creds - Is io_uring cred override allowed? - * @new: the target creds - * - * Check to see if the current task is allowed to override it's credentials - * to service an io_uring operation. - */ -static int smack_uring_override_creds(const struct cred *new) -{ - struct task_smack *tsp = smack_cred(current_cred()); - struct task_smack *nsp = smack_cred(new); - - /* - * Allow the degenerate case where the new Smack value is - * the same as the current Smack value. - */ - if (tsp->smk_task == nsp->smk_task) - return 0; - - if (smack_privileged_cred(CAP_MAC_OVERRIDE, current_cred())) - return 0; - - return -EPERM; -} - -/** - * smack_uring_sqpoll - check if a io_uring polling thread can be created - * - * Check to see if the current task is allowed to create a new io_uring - * kernel polling thread. - */ -static int smack_uring_sqpoll(void) -{ - if (smack_privileged_cred(CAP_MAC_ADMIN, current_cred())) - return 0; - - return -EPERM; -} - -#endif /* CONFIG_IO_URING */ - struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_smack), .lbs_file = sizeof(struct smack_known *), @@ -4806,7 +4756,7 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(task_setpgid, smack_task_setpgid), LSM_HOOK_INIT(task_getpgid, smack_task_getpgid), LSM_HOOK_INIT(task_getsid, smack_task_getsid), - LSM_HOOK_INIT(current_getsecid_subj, smack_current_getsecid_subj), + LSM_HOOK_INIT(task_getsecid_subj, smack_task_getsecid_subj), LSM_HOOK_INIT(task_getsecid_obj, smack_task_getsecid_obj), LSM_HOOK_INIT(task_setnice, smack_task_setnice), LSM_HOOK_INIT(task_setioprio, smack_task_setioprio), @@ -4893,10 +4843,6 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(inode_copy_up, smack_inode_copy_up), LSM_HOOK_INIT(inode_copy_up_xattr, smack_inode_copy_up_xattr), LSM_HOOK_INIT(dentry_create_files_as, smack_dentry_create_files_as), -#ifdef CONFIG_IO_URING - LSM_HOOK_INIT(uring_override_creds, smack_uring_override_creds), - LSM_HOOK_INIT(uring_sqpoll, smack_uring_sqpoll), -#endif }; diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c index b945c1d3a7..fc7399b453 100644 --- a/security/smack/smack_netfilter.c +++ b/security/smack/smack_netfilter.c @@ -18,7 +18,27 @@ #include #include "smack.h" -static unsigned int smack_ip_output(void *priv, +#if IS_ENABLED(CONFIG_IPV6) + +static unsigned int smack_ipv6_output(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct sock *sk = skb_to_full_sk(skb); + struct socket_smack *ssp; + struct smack_known *skp; + + if (sk && sk->sk_security) { + ssp = sk->sk_security; + skp = ssp->smk_out; + skb->secmark = skp->smk_secid; + } + + return NF_ACCEPT; +} +#endif /* IPV6 */ + +static unsigned int smack_ipv4_output(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -37,14 +57,14 @@ static unsigned int smack_ip_output(void *priv, static const struct nf_hook_ops smack_nf_ops[] = { { - .hook = smack_ip_output, + .hook = smack_ipv4_output, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_SELINUX_FIRST, }, #if IS_ENABLED(CONFIG_IPV6) { - .hook = smack_ip_output, + .hook = smack_ipv6_output, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_SELINUX_FIRST, diff --git a/sound/Makefile b/sound/Makefile index d973053da6..04ef04b116 100644 --- a/sound/Makefile +++ b/sound/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for the Linux sound card driver # -# If no sound enabled via RDP then use dummy sound driver instead and dont route via HDMI plug. + obj-$(CONFIG_SOUND) += soundcore.o obj-$(CONFIG_DMASOUND) += oss/dmasound/ obj-$(CONFIG_SND) += core/ i2c/ drivers/ isa/ pci/ ppc/ arm/ sh/ synth/ usb/ \ @@ -18,4 +18,3 @@ ifeq ($(CONFIG_SND),y) endif soundcore-objs := sound_core.o - diff --git a/sound/core/Makefile b/sound/core/Makefile index 350d704ced..79e1407cd0 100644 --- a/sound/core/Makefile +++ b/sound/core/Makefile @@ -19,6 +19,7 @@ snd-$(CONFIG_SND_JACK) += ctljack.o jack.o snd-pcm-y := pcm.o pcm_native.o pcm_lib.o pcm_misc.o \ pcm_memory.o memalloc.o snd-pcm-$(CONFIG_SND_PCM_TIMER) += pcm_timer.o +snd-pcm-$(CONFIG_SND_DMA_SGBUF) += sgbuf.o snd-pcm-$(CONFIG_SND_PCM_ELD) += pcm_drm_eld.o snd-pcm-$(CONFIG_SND_PCM_IEC958) += pcm_iec958.o diff --git a/sound/core/control_led.c b/sound/core/control_led.c index 207828f309..a95332b2b9 100644 --- a/sound/core/control_led.c +++ b/sound/core/control_led.c @@ -509,7 +509,7 @@ static char *parse_string(char *s, char *val, size_t val_size) return s; } -static char *parse_iface(char *s, snd_ctl_elem_iface_t *val) +static char *parse_iface(char *s, unsigned int *val) { if (!strncasecmp(s, "card", 4)) *val = SNDRV_CTL_ELEM_IFACE_CARD; diff --git a/sound/core/info_oss.c b/sound/core/info_oss.c index ebc714b2f4..1ba887c795 100644 --- a/sound/core/info_oss.c +++ b/sound/core/info_oss.c @@ -32,8 +32,10 @@ int snd_oss_info_register(int dev, int num, char *string) mutex_lock(&strings); if (string == NULL) { x = snd_sndstat_strings[num][dev]; - kfree(x); - x = NULL; + if (x) { + kfree(x); + x = NULL; + } } else { x = kstrdup(string, GFP_KERNEL); if (x == NULL) { diff --git a/sound/core/init.c b/sound/core/init.c index 31ba7024e3..ac335f5906 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -1111,14 +1111,29 @@ EXPORT_SYMBOL(snd_card_file_remove); */ int snd_power_ref_and_wait(struct snd_card *card) { + wait_queue_entry_t wait; + int result = 0; + snd_power_ref(card); + /* fastpath */ if (snd_power_get_state(card) == SNDRV_CTL_POWER_D0) return 0; - wait_event_cmd(card->power_sleep, - card->shutdown || - snd_power_get_state(card) == SNDRV_CTL_POWER_D0, - snd_power_unref(card), snd_power_ref(card)); - return card->shutdown ? -ENODEV : 0; + init_waitqueue_entry(&wait, current); + add_wait_queue(&card->power_sleep, &wait); + while (1) { + if (card->shutdown) { + result = -ENODEV; + break; + } + if (snd_power_get_state(card) == SNDRV_CTL_POWER_D0) + break; + snd_power_unref(card); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(30 * HZ); + snd_power_ref(card); + } + remove_wait_queue(&card->power_sleep, &wait); + return result; } EXPORT_SYMBOL_GPL(snd_power_ref_and_wait); diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 6fd763d4d1..2d84298257 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #ifdef CONFIG_X86 #include @@ -40,11 +39,9 @@ static void *__snd_dma_alloc_pages(struct snd_dma_buffer *dmab, size_t size) } /** - * snd_dma_alloc_dir_pages - allocate the buffer area according to the given - * type and direction + * snd_dma_alloc_pages - allocate the buffer area according to the given type * @type: the DMA buffer type * @device: the device pointer - * @dir: DMA direction * @size: the buffer size to allocate * @dmab: buffer allocation record to store the allocated data * @@ -54,9 +51,8 @@ static void *__snd_dma_alloc_pages(struct snd_dma_buffer *dmab, size_t size) * Return: Zero if the buffer with the given size is allocated successfully, * otherwise a negative value on error. */ -int snd_dma_alloc_dir_pages(int type, struct device *device, - enum dma_data_direction dir, size_t size, - struct snd_dma_buffer *dmab) +int snd_dma_alloc_pages(int type, struct device *device, size_t size, + struct snd_dma_buffer *dmab) { if (WARN_ON(!size)) return -ENXIO; @@ -66,7 +62,6 @@ int snd_dma_alloc_dir_pages(int type, struct device *device, size = PAGE_ALIGN(size); dmab->dev.type = type; dmab->dev.dev = device; - dmab->dev.dir = dir; dmab->bytes = 0; dmab->addr = 0; dmab->private_data = NULL; @@ -76,7 +71,7 @@ int snd_dma_alloc_dir_pages(int type, struct device *device, dmab->bytes = size; return 0; } -EXPORT_SYMBOL(snd_dma_alloc_dir_pages); +EXPORT_SYMBOL(snd_dma_alloc_pages); /** * snd_dma_alloc_pages_fallback - allocate the buffer area according to the given type with fallback @@ -134,10 +129,9 @@ static void __snd_release_pages(struct device *dev, void *res) } /** - * snd_devm_alloc_dir_pages - allocate the buffer and manage with devres + * snd_devm_alloc_pages - allocate the buffer and manage with devres * @dev: the device pointer * @type: the DMA buffer type - * @dir: DMA direction * @size: the buffer size to allocate * * Allocate buffer pages depending on the given type and manage using devres. @@ -150,8 +144,7 @@ static void __snd_release_pages(struct device *dev, void *res) * The function returns the snd_dma_buffer object at success, or NULL if failed. */ struct snd_dma_buffer * -snd_devm_alloc_dir_pages(struct device *dev, int type, - enum dma_data_direction dir, size_t size) +snd_devm_alloc_pages(struct device *dev, int type, size_t size) { struct snd_dma_buffer *dmab; int err; @@ -164,7 +157,7 @@ snd_devm_alloc_dir_pages(struct device *dev, int type, if (!dmab) return NULL; - err = snd_dma_alloc_dir_pages(type, dev, dir, size, dmab); + err = snd_dma_alloc_pages(type, dev, size, dmab); if (err < 0) { devres_free(dmab); return NULL; @@ -173,7 +166,7 @@ snd_devm_alloc_dir_pages(struct device *dev, int type, devres_add(dev, dmab); return dmab; } -EXPORT_SYMBOL_GPL(snd_devm_alloc_dir_pages); +EXPORT_SYMBOL_GPL(snd_devm_alloc_pages); /** * snd_dma_buffer_mmap - perform mmap of the given DMA buffer @@ -195,26 +188,6 @@ int snd_dma_buffer_mmap(struct snd_dma_buffer *dmab, } EXPORT_SYMBOL(snd_dma_buffer_mmap); -#ifdef CONFIG_HAS_DMA -/** - * snd_dma_buffer_sync - sync DMA buffer between CPU and device - * @dmab: buffer allocation information - * @mode: sync mode - */ -void snd_dma_buffer_sync(struct snd_dma_buffer *dmab, - enum snd_dma_sync_mode mode) -{ - const struct snd_malloc_ops *ops; - - if (!dmab || !dmab->dev.need_sync) - return; - ops = snd_dma_get_ops(dmab); - if (ops && ops->sync) - ops->sync(dmab, mode); -} -EXPORT_SYMBOL_GPL(snd_dma_buffer_sync); -#endif /* CONFIG_HAS_DMA */ - /** * snd_sgbuf_get_addr - return the physical address at the corresponding offset * @dmab: buffer allocation information @@ -498,225 +471,6 @@ static const struct snd_malloc_ops snd_dma_wc_ops = { .mmap = snd_dma_wc_mmap, }; #endif /* CONFIG_X86 */ - -/* - * Non-contiguous pages allocator - */ -static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) -{ - struct sg_table *sgt; - void *p; - - sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, - DEFAULT_GFP, 0); - if (!sgt) - return NULL; - dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, - sg_dma_address(sgt->sgl)); - p = dma_vmap_noncontiguous(dmab->dev.dev, size, sgt); - if (p) - dmab->private_data = sgt; - else - dma_free_noncontiguous(dmab->dev.dev, size, sgt, dmab->dev.dir); - return p; -} - -static void snd_dma_noncontig_free(struct snd_dma_buffer *dmab) -{ - dma_vunmap_noncontiguous(dmab->dev.dev, dmab->area); - dma_free_noncontiguous(dmab->dev.dev, dmab->bytes, dmab->private_data, - dmab->dev.dir); -} - -static int snd_dma_noncontig_mmap(struct snd_dma_buffer *dmab, - struct vm_area_struct *area) -{ - return dma_mmap_noncontiguous(dmab->dev.dev, area, - dmab->bytes, dmab->private_data); -} - -static void snd_dma_noncontig_sync(struct snd_dma_buffer *dmab, - enum snd_dma_sync_mode mode) -{ - if (mode == SNDRV_DMA_SYNC_CPU) { - if (dmab->dev.dir == DMA_TO_DEVICE) - return; - invalidate_kernel_vmap_range(dmab->area, dmab->bytes); - dma_sync_sgtable_for_cpu(dmab->dev.dev, dmab->private_data, - dmab->dev.dir); - } else { - if (dmab->dev.dir == DMA_FROM_DEVICE) - return; - flush_kernel_vmap_range(dmab->area, dmab->bytes); - dma_sync_sgtable_for_device(dmab->dev.dev, dmab->private_data, - dmab->dev.dir); - } -} - -static inline void snd_dma_noncontig_iter_set(struct snd_dma_buffer *dmab, - struct sg_page_iter *piter, - size_t offset) -{ - struct sg_table *sgt = dmab->private_data; - - __sg_page_iter_start(piter, sgt->sgl, sgt->orig_nents, - offset >> PAGE_SHIFT); -} - -static dma_addr_t snd_dma_noncontig_get_addr(struct snd_dma_buffer *dmab, - size_t offset) -{ - struct sg_dma_page_iter iter; - - snd_dma_noncontig_iter_set(dmab, &iter.base, offset); - __sg_page_iter_dma_next(&iter); - return sg_page_iter_dma_address(&iter) + offset % PAGE_SIZE; -} - -static struct page *snd_dma_noncontig_get_page(struct snd_dma_buffer *dmab, - size_t offset) -{ - struct sg_page_iter iter; - - snd_dma_noncontig_iter_set(dmab, &iter, offset); - __sg_page_iter_next(&iter); - return sg_page_iter_page(&iter); -} - -static unsigned int -snd_dma_noncontig_get_chunk_size(struct snd_dma_buffer *dmab, - unsigned int ofs, unsigned int size) -{ - struct sg_dma_page_iter iter; - unsigned int start, end; - unsigned long addr; - - start = ALIGN_DOWN(ofs, PAGE_SIZE); - end = ofs + size - 1; /* the last byte address */ - snd_dma_noncontig_iter_set(dmab, &iter.base, start); - if (!__sg_page_iter_dma_next(&iter)) - return 0; - /* check page continuity */ - addr = sg_page_iter_dma_address(&iter); - for (;;) { - start += PAGE_SIZE; - if (start > end) - break; - addr += PAGE_SIZE; - if (!__sg_page_iter_dma_next(&iter) || - sg_page_iter_dma_address(&iter) != addr) - return start - ofs; - } - /* ok, all on continuous pages */ - return size; -} - -static const struct snd_malloc_ops snd_dma_noncontig_ops = { - .alloc = snd_dma_noncontig_alloc, - .free = snd_dma_noncontig_free, - .mmap = snd_dma_noncontig_mmap, - .sync = snd_dma_noncontig_sync, - .get_addr = snd_dma_noncontig_get_addr, - .get_page = snd_dma_noncontig_get_page, - .get_chunk_size = snd_dma_noncontig_get_chunk_size, -}; - -/* x86-specific SG-buffer with WC pages */ -#ifdef CONFIG_SND_DMA_SGBUF -#define sg_wc_address(it) ((unsigned long)page_address(sg_page_iter_page(it))) - -static void *snd_dma_sg_wc_alloc(struct snd_dma_buffer *dmab, size_t size) -{ - void *p = snd_dma_noncontig_alloc(dmab, size); - struct sg_table *sgt = dmab->private_data; - struct sg_page_iter iter; - - if (!p) - return NULL; - for_each_sgtable_page(sgt, &iter, 0) - set_memory_wc(sg_wc_address(&iter), 1); - return p; -} - -static void snd_dma_sg_wc_free(struct snd_dma_buffer *dmab) -{ - struct sg_table *sgt = dmab->private_data; - struct sg_page_iter iter; - - for_each_sgtable_page(sgt, &iter, 0) - set_memory_wb(sg_wc_address(&iter), 1); - snd_dma_noncontig_free(dmab); -} - -static int snd_dma_sg_wc_mmap(struct snd_dma_buffer *dmab, - struct vm_area_struct *area) -{ - area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); - return dma_mmap_noncontiguous(dmab->dev.dev, area, - dmab->bytes, dmab->private_data); -} - -static const struct snd_malloc_ops snd_dma_sg_wc_ops = { - .alloc = snd_dma_sg_wc_alloc, - .free = snd_dma_sg_wc_free, - .mmap = snd_dma_sg_wc_mmap, - .sync = snd_dma_noncontig_sync, - .get_addr = snd_dma_noncontig_get_addr, - .get_page = snd_dma_noncontig_get_page, - .get_chunk_size = snd_dma_noncontig_get_chunk_size, -}; -#endif /* CONFIG_SND_DMA_SGBUF */ - -/* - * Non-coherent pages allocator - */ -static void *snd_dma_noncoherent_alloc(struct snd_dma_buffer *dmab, size_t size) -{ - void *p; - - p = dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr, - dmab->dev.dir, DEFAULT_GFP); - if (p) - dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->addr); - return p; -} - -static void snd_dma_noncoherent_free(struct snd_dma_buffer *dmab) -{ - dma_free_noncoherent(dmab->dev.dev, dmab->bytes, dmab->area, - dmab->addr, dmab->dev.dir); -} - -static int snd_dma_noncoherent_mmap(struct snd_dma_buffer *dmab, - struct vm_area_struct *area) -{ - area->vm_page_prot = vm_get_page_prot(area->vm_flags); - return dma_mmap_pages(dmab->dev.dev, area, - area->vm_end - area->vm_start, - virt_to_page(dmab->area)); -} - -static void snd_dma_noncoherent_sync(struct snd_dma_buffer *dmab, - enum snd_dma_sync_mode mode) -{ - if (mode == SNDRV_DMA_SYNC_CPU) { - if (dmab->dev.dir != DMA_TO_DEVICE) - dma_sync_single_for_cpu(dmab->dev.dev, dmab->addr, - dmab->bytes, dmab->dev.dir); - } else { - if (dmab->dev.dir != DMA_FROM_DEVICE) - dma_sync_single_for_device(dmab->dev.dev, dmab->addr, - dmab->bytes, dmab->dev.dir); - } -} - -static const struct snd_malloc_ops snd_dma_noncoherent_ops = { - .alloc = snd_dma_noncoherent_alloc, - .free = snd_dma_noncoherent_free, - .mmap = snd_dma_noncoherent_mmap, - .sync = snd_dma_noncoherent_sync, -}; - #endif /* CONFIG_HAS_DMA */ /* @@ -728,15 +482,14 @@ static const struct snd_malloc_ops *dma_ops[] = { #ifdef CONFIG_HAS_DMA [SNDRV_DMA_TYPE_DEV] = &snd_dma_dev_ops, [SNDRV_DMA_TYPE_DEV_WC] = &snd_dma_wc_ops, - [SNDRV_DMA_TYPE_NONCONTIG] = &snd_dma_noncontig_ops, - [SNDRV_DMA_TYPE_NONCOHERENT] = &snd_dma_noncoherent_ops, -#ifdef CONFIG_SND_DMA_SGBUF - [SNDRV_DMA_TYPE_DEV_WC_SG] = &snd_dma_sg_wc_ops, -#endif #ifdef CONFIG_GENERIC_ALLOCATOR [SNDRV_DMA_TYPE_DEV_IRAM] = &snd_dma_iram_ops, #endif /* CONFIG_GENERIC_ALLOCATOR */ #endif /* CONFIG_HAS_DMA */ +#ifdef CONFIG_SND_DMA_SGBUF + [SNDRV_DMA_TYPE_DEV_SG] = &snd_dma_sg_ops, + [SNDRV_DMA_TYPE_DEV_WC_SG] = &snd_dma_sg_ops, +#endif }; static const struct snd_malloc_ops *snd_dma_get_ops(struct snd_dma_buffer *dmab) diff --git a/sound/core/memalloc_local.h b/sound/core/memalloc_local.h index a6f3a87194..9f2e0a608b 100644 --- a/sound/core/memalloc_local.h +++ b/sound/core/memalloc_local.h @@ -10,7 +10,6 @@ struct snd_malloc_ops { unsigned int (*get_chunk_size)(struct snd_dma_buffer *dmab, unsigned int ofs, unsigned int size); int (*mmap)(struct snd_dma_buffer *dmab, struct vm_area_struct *area); - void (*sync)(struct snd_dma_buffer *dmab, enum snd_dma_sync_mode mode); }; #ifdef CONFIG_SND_DMA_SGBUF diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index e4e176854c..dfe5a64e19 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -453,8 +453,6 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, sstatus.suspended_state = status->suspended_state; sstatus.audio_tstamp = status->audio_tstamp; snd_pcm_stream_unlock_irq(substream); - if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) - snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); if (put_user(sstatus.state, &src->s.status.state) || put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) || put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec) || @@ -535,8 +533,6 @@ static int snd_pcm_ioctl_sync_ptr_buggy(struct snd_pcm_substream *substream, sync_ptr.s.status.suspended_state = status->suspended_state; sync_ptr.s.status.audio_tstamp = status->audio_tstamp; snd_pcm_stream_unlock_irq(substream); - if (!(sync_ptr.flags & SNDRV_PCM_SYNC_PTR_APPL)) - snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); if (copy_to_user(_sync_ptr, &sync_ptr, sizeof(sync_ptr))) return -EFAULT; return 0; diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c index af6f717e1e..1fc2fa0775 100644 --- a/sound/core/pcm_dmaengine.c +++ b/sound/core/pcm_dmaengine.c @@ -91,8 +91,8 @@ EXPORT_SYMBOL_GPL(snd_hwparams_to_dma_slave_config); * @dma_data: DAI DMA data * @slave_config: DMA slave configuration * - * Initializes the {dst,src}_addr, {dst,src}_maxburst, {dst,src}_addr_width - * fields of the DMA slave config from the same fields of the DAI DMA + * Initializes the {dst,src}_addr, {dst,src}_maxburst, {dst,src}_addr_width and + * slave_id fields of the DMA slave config from the same fields of the DAI DMA * data struct. The src and dst fields will be initialized depending on the * direction of the substream. If the substream is a playback stream the dst * fields will be initialized, if it is a capture stream the src fields will be @@ -124,6 +124,7 @@ void snd_dmaengine_pcm_set_config_from_dai_data( slave_config->src_addr_width = dma_data->addr_width; } + slave_config->slave_id = dma_data->slave_id; slave_config->peripheral_config = dma_data->peripheral_config; slave_config->peripheral_size = dma_data->peripheral_size; } diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index f209002523..a144a3f68e 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -106,7 +106,6 @@ void snd_pcm_playback_silence(struct snd_pcm_substream *substream, snd_pcm_ufram frames -= transfer; ofs = 0; } - snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); } #ifdef CONFIG_SND_DEBUG @@ -2128,28 +2127,11 @@ int pcm_lib_apply_appl_ptr(struct snd_pcm_substream *substream, { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_uframes_t old_appl_ptr = runtime->control->appl_ptr; - snd_pcm_sframes_t diff; int ret; if (old_appl_ptr == appl_ptr) return 0; - if (appl_ptr >= runtime->boundary) - return -EINVAL; - /* - * check if a rewind is requested by the application - */ - if (substream->runtime->info & SNDRV_PCM_INFO_NO_REWINDS) { - diff = appl_ptr - old_appl_ptr; - if (diff >= 0) { - if (diff > runtime->buffer_size) - return -EINVAL; - } else { - if (runtime->boundary + diff > runtime->buffer_size) - return -EINVAL; - } - } - runtime->control->appl_ptr = appl_ptr; if (substream->ops->ack) { ret = substream->ops->ack(substream); @@ -2274,12 +2256,8 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, goto _end_unlock; } snd_pcm_stream_unlock_irq(substream); - if (!is_playback) - snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_CPU); err = writer(substream, appl_ofs, data, offset, frames, transfer); - if (is_playback) - snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); snd_pcm_stream_lock_irq(substream); if (err < 0) goto _end_unlock; diff --git a/sound/core/pcm_local.h b/sound/core/pcm_local.h index ecb21697ae..fe9689b8a6 100644 --- a/sound/core/pcm_local.h +++ b/sound/core/pcm_local.h @@ -73,11 +73,4 @@ void snd_pcm_sync_stop(struct snd_pcm_substream *substream, bool sync_irq); for ((subs) = (pcm)->streams[str].substream; (subs); \ (subs) = (subs)->next) -static inline void snd_pcm_dma_buffer_sync(struct snd_pcm_substream *substream, - enum snd_dma_sync_mode mode) -{ - if (substream->runtime->info & SNDRV_PCM_INFO_EXPLICIT_SYNC) - snd_dma_buffer_sync(snd_pcm_get_dma_buf(substream), mode); -} - #endif /* __SOUND_CORE_PCM_LOCAL_H */ diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c index b70ce3b69a..7fbd1ccbb5 100644 --- a/sound/core/pcm_memory.c +++ b/sound/core/pcm_memory.c @@ -32,20 +32,15 @@ module_param(max_alloc_per_card, ulong, 0644); MODULE_PARM_DESC(max_alloc_per_card, "Max total allocation bytes per card."); static int do_alloc_pages(struct snd_card *card, int type, struct device *dev, - int str, size_t size, struct snd_dma_buffer *dmab) + size_t size, struct snd_dma_buffer *dmab) { - enum dma_data_direction dir; int err; if (max_alloc_per_card && card->total_pcm_alloc_bytes + size > max_alloc_per_card) return -ENOMEM; - if (str == SNDRV_PCM_STREAM_PLAYBACK) - dir = DMA_TO_DEVICE; - else - dir = DMA_FROM_DEVICE; - err = snd_dma_alloc_dir_pages(type, dev, dir, size, dmab); + err = snd_dma_alloc_pages(type, dev, size, dmab); if (!err) { mutex_lock(&card->memory_mutex); card->total_pcm_alloc_bytes += dmab->bytes; @@ -82,7 +77,7 @@ static int preallocate_pcm_pages(struct snd_pcm_substream *substream, do { err = do_alloc_pages(card, dmab->dev.type, dmab->dev.dev, - substream->stream, size, dmab); + size, dmab); if (err != -ENOMEM) return err; if (no_fallback) @@ -182,7 +177,6 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry, if (do_alloc_pages(card, substream->dma_buffer.dev.type, substream->dma_buffer.dev.dev, - substream->stream, size, &new_dmab) < 0) { buffer->error = -ENOMEM; pr_debug("ALSA pcmC%dD%d%c,%d:%s: cannot preallocate for size %zu\n", @@ -424,7 +418,6 @@ int snd_pcm_lib_malloc_pages(struct snd_pcm_substream *substream, size_t size) if (do_alloc_pages(card, substream->dma_buffer.dev.type, substream->dma_buffer.dev.dev, - substream->stream, size, dmab) < 0) { kfree(dmab); pr_debug("ALSA pcmC%dD%d%c,%d:%s: cannot preallocate for size %zu\n", diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index a056b3ef3c..d233cb3b41 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -172,19 +172,6 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream) } EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave); -unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream) -{ - unsigned long flags = 0; - if (substream->pcm->nonatomic) - mutex_lock_nested(&substream->self_group.mutex, - SINGLE_DEPTH_NESTING); - else - spin_lock_irqsave_nested(&substream->self_group.lock, flags, - SINGLE_DEPTH_NESTING); - return flags; -} -EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave_nested); - /** * snd_pcm_stream_unlock_irqrestore - Unlock the PCM stream * @substream: PCM substream @@ -2698,13 +2685,6 @@ int snd_pcm_open_substream(struct snd_pcm *pcm, int stream, goto error; } - /* automatically set EXPLICIT_SYNC flag in the managed mode whenever - * the DMA buffer requires it - */ - if (substream->managed_buffer_alloc && - substream->dma_buffer.dev.need_sync) - substream->runtime->hw.info |= SNDRV_PCM_INFO_EXPLICIT_SYNC; - *rsubstream = substream; return 0; @@ -2932,8 +2912,6 @@ static snd_pcm_sframes_t snd_pcm_rewind(struct snd_pcm_substream *substream, ret = rewind_appl_ptr(substream, frames, snd_pcm_hw_avail(substream)); snd_pcm_stream_unlock_irq(substream); - if (ret >= 0) - snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); return ret; } @@ -2951,31 +2929,35 @@ static snd_pcm_sframes_t snd_pcm_forward(struct snd_pcm_substream *substream, ret = forward_appl_ptr(substream, frames, snd_pcm_avail(substream)); snd_pcm_stream_unlock_irq(substream); - if (ret >= 0) - snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); return ret; } -static int snd_pcm_delay(struct snd_pcm_substream *substream, - snd_pcm_sframes_t *delay) +static int snd_pcm_hwsync(struct snd_pcm_substream *substream) { int err; snd_pcm_stream_lock_irq(substream); err = do_pcm_hwsync(substream); - if (delay && !err) - *delay = snd_pcm_calc_delay(substream); snd_pcm_stream_unlock_irq(substream); - snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_CPU); - return err; } -static inline int snd_pcm_hwsync(struct snd_pcm_substream *substream) +static int snd_pcm_delay(struct snd_pcm_substream *substream, + snd_pcm_sframes_t *delay) { - return snd_pcm_delay(substream, NULL); -} + int err; + snd_pcm_sframes_t n = 0; + snd_pcm_stream_lock_irq(substream); + err = do_pcm_hwsync(substream); + if (!err) + n = snd_pcm_calc_delay(substream); + snd_pcm_stream_unlock_irq(substream); + if (!err) + *delay = n; + return err; +} + static int snd_pcm_sync_ptr(struct snd_pcm_substream *substream, struct snd_pcm_sync_ptr __user *_sync_ptr) { @@ -3018,8 +3000,6 @@ static int snd_pcm_sync_ptr(struct snd_pcm_substream *substream, sync_ptr.s.status.suspended_state = status->suspended_state; sync_ptr.s.status.audio_tstamp = status->audio_tstamp; snd_pcm_stream_unlock_irq(substream); - if (!(sync_ptr.flags & SNDRV_PCM_SYNC_PTR_APPL)) - snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); if (copy_to_user(_sync_ptr, &sync_ptr, sizeof(sync_ptr))) return -EFAULT; return 0; @@ -3116,8 +3096,6 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream, sstatus.suspended_state = status->suspended_state; sstatus.audio_tstamp = status->audio_tstamp; snd_pcm_stream_unlock_irq(substream); - if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) - snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); if (put_user(sstatus.state, &src->s.status.state) || put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) || put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec) || @@ -3240,9 +3218,6 @@ static int snd_pcm_common_ioctl(struct file *file, if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; - if (substream->runtime->status->state == SNDRV_PCM_STATE_DISCONNECTED) - return -EBADFD; - res = snd_power_wait(substream->pcm->card); if (res < 0) return res; @@ -3297,7 +3272,7 @@ static int snd_pcm_common_ioctl(struct file *file, return snd_pcm_hwsync(substream); case SNDRV_PCM_IOCTL_DELAY: { - snd_pcm_sframes_t delay = 0; + snd_pcm_sframes_t delay; snd_pcm_sframes_t __user *res = arg; int err; @@ -3369,9 +3344,6 @@ int snd_pcm_kernel_ioctl(struct snd_pcm_substream *substream, snd_pcm_uframes_t *frames = arg; snd_pcm_sframes_t result; - if (substream->runtime->status->state == SNDRV_PCM_STATE_DISCONNECTED) - return -EBADFD; - switch (cmd) { case SNDRV_PCM_IOCTL_FORWARD: { @@ -3414,8 +3386,7 @@ static ssize_t snd_pcm_read(struct file *file, char __user *buf, size_t count, if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; - if (runtime->status->state == SNDRV_PCM_STATE_OPEN || - runtime->status->state == SNDRV_PCM_STATE_DISCONNECTED) + if (runtime->status->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; if (!frame_aligned(runtime, count)) return -EINVAL; @@ -3439,8 +3410,7 @@ static ssize_t snd_pcm_write(struct file *file, const char __user *buf, if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; - if (runtime->status->state == SNDRV_PCM_STATE_OPEN || - runtime->status->state == SNDRV_PCM_STATE_DISCONNECTED) + if (runtime->status->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; if (!frame_aligned(runtime, count)) return -EINVAL; @@ -3466,8 +3436,7 @@ static ssize_t snd_pcm_readv(struct kiocb *iocb, struct iov_iter *to) if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; - if (runtime->status->state == SNDRV_PCM_STATE_OPEN || - runtime->status->state == SNDRV_PCM_STATE_DISCONNECTED) + if (runtime->status->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; if (!iter_is_iovec(to)) return -EINVAL; @@ -3503,8 +3472,7 @@ static ssize_t snd_pcm_writev(struct kiocb *iocb, struct iov_iter *from) if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; - if (runtime->status->state == SNDRV_PCM_STATE_OPEN || - runtime->status->state == SNDRV_PCM_STATE_DISCONNECTED) + if (runtime->status->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; if (!iter_is_iovec(from)) return -EINVAL; @@ -3543,9 +3511,6 @@ static __poll_t snd_pcm_poll(struct file *file, poll_table *wait) return ok | EPOLLERR; runtime = substream->runtime; - if (runtime->status->state == SNDRV_PCM_STATE_DISCONNECTED) - return ok | EPOLLERR; - poll_wait(file, &runtime->sleep, wait); mask = 0; @@ -3855,8 +3820,6 @@ static int snd_pcm_mmap(struct file *file, struct vm_area_struct *area) substream = pcm_file->substream; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; - if (substream->runtime->status->state == SNDRV_PCM_STATE_DISCONNECTED) - return -EBADFD; offset = area->vm_pgoff << PAGE_SHIFT; switch (offset) { @@ -3893,8 +3856,6 @@ static int snd_pcm_fasync(int fd, struct file * file, int on) if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; - if (runtime->status->state == SNDRV_PCM_STATE_DISCONNECTED) - return -EBADFD; return fasync_helper(fd, file, on, &runtime->fasync); } diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index f5cae49500..4abc38c70c 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -262,16 +262,6 @@ static int snd_virmidi_output_close(struct snd_rawmidi_substream *substream) return 0; } -/* - * drain output work queue - */ -static void snd_virmidi_output_drain(struct snd_rawmidi_substream *substream) -{ - struct snd_virmidi *vmidi = substream->runtime->private_data; - - flush_work(&vmidi->output_work); -} - /* * subscribe callback - allow output to rawmidi device */ @@ -346,7 +336,6 @@ static const struct snd_rawmidi_ops snd_virmidi_output_ops = { .open = snd_virmidi_output_open, .close = snd_virmidi_output_close, .trigger = snd_virmidi_output_trigger, - .drain = snd_virmidi_output_drain, }; /* diff --git a/sound/drivers/virmidi.c b/sound/drivers/virmidi.c index 58012de90c..7f7eed6faa 100644 --- a/sound/drivers/virmidi.c +++ b/sound/drivers/virmidi.c @@ -90,12 +90,15 @@ static int snd_virmidi_probe(struct platform_device *devptr) } for (idx = 0; idx < midi_devs[dev]; idx++) { struct snd_rawmidi *rmidi; + struct snd_virmidi_dev *rdev; err = snd_virmidi_new(card, idx, &rmidi); if (err < 0) return err; + rdev = rmidi->private_data; vmidi->midi[idx] = rmidi; strcpy(rmidi->name, "Virtual Raw MIDI"); + rdev->seq_mode = SNDRV_VIRMIDI_SEQ_DISPATCH; } strcpy(card->driver, "VirMIDI"); diff --git a/sound/firewire/Kconfig b/sound/firewire/Kconfig index 22b6c77968..fd109bea4c 100644 --- a/sound/firewire/Kconfig +++ b/sound/firewire/Kconfig @@ -169,7 +169,6 @@ config SND_FIREWIRE_MOTU * 828 * 896 * 828mk2 - * 896hd * Traveler * Ultralite * 8pre @@ -177,9 +176,7 @@ config SND_FIREWIRE_MOTU * 828mk3 (Hybrid) * Ultralite mk3 (FireWire only) * Ultralite mk3 (Hybrid) - * Traveler mk3 * Audio Express - * Track 16 * 4pre To compile this driver as a module, choose M here: the module diff --git a/sound/firewire/fireworks/fireworks_stream.c b/sound/firewire/fireworks/fireworks_stream.c index 53dbd4d4b0..ac66f08acd 100644 --- a/sound/firewire/fireworks/fireworks_stream.c +++ b/sound/firewire/fireworks/fireworks_stream.c @@ -50,9 +50,8 @@ static int init_stream(struct snd_efw *efw, struct amdtp_stream *stream) efw->firmware_version == 0x5070300 || efw->firmware_version == 0x5080000)) efw->tx_stream.flags |= CIP_UNALIGHED_DBC; - // AudioFire9 always reports wrong dbs. Onyx 1200F with the latest firmware (v4.6.0) - // also report wrong dbs at 88.2 kHz or greater. - if (efw->is_af9 || efw->firmware_version == 0x4060000) + // AudioFire9 always reports wrong dbs. + if (efw->is_af9) efw->tx_stream.flags |= CIP_WRONG_DBS; // Firmware version 5.5 reports fixed interval for dbc. if (efw->firmware_version == 0x5050000) diff --git a/sound/firewire/motu/Makefile b/sound/firewire/motu/Makefile index 3bef2a0b1e..acdf66564f 100644 --- a/sound/firewire/motu/Makefile +++ b/sound/firewire/motu/Makefile @@ -4,6 +4,5 @@ CFLAGS_amdtp-motu.o := -I$(src) snd-firewire-motu-objs := motu.o amdtp-motu.o motu-transaction.o motu-stream.o \ motu-proc.o motu-pcm.o motu-midi.o motu-hwdep.o \ motu-protocol-v2.o motu-protocol-v3.o \ - motu-protocol-v1.o motu-register-dsp-message-parser.o \ - motu-command-dsp-message-parser.o + motu-protocol-v1.o obj-$(CONFIG_SND_FIREWIRE_MOTU) += snd-firewire-motu.o diff --git a/sound/firewire/motu/amdtp-motu.c b/sound/firewire/motu/amdtp-motu.c index 2fb52f481d..a18c2c033e 100644 --- a/sound/firewire/motu/amdtp-motu.c +++ b/sound/firewire/motu/amdtp-motu.c @@ -333,7 +333,6 @@ static unsigned int process_ir_ctx_payloads(struct amdtp_stream *s, unsigned int packets, struct snd_pcm_substream *pcm) { - struct snd_motu *motu = container_of(s, struct snd_motu, tx_stream); struct amdtp_motu *p = s->protocol; unsigned int pcm_frames = 0; int i; @@ -358,14 +357,6 @@ static unsigned int process_ir_ctx_payloads(struct amdtp_stream *s, read_midi_messages(s, buf, data_blocks); } - if (motu->spec->flags & SND_MOTU_SPEC_REGISTER_DSP) { - snd_motu_register_dsp_message_parser_parse(motu, descs, packets, - s->data_block_quadlets); - } else if (motu->spec->flags & SND_MOTU_SPEC_COMMAND_DSP) { - snd_motu_command_dsp_message_parser_parse(motu, descs, packets, - s->data_block_quadlets); - } - // For tracepoints. if (trace_data_block_sph_enabled() || trace_data_block_message_enabled()) @@ -424,6 +415,8 @@ static unsigned int process_it_ctx_payloads(struct amdtp_stream *s, if (p->midi_ports) write_midi_messages(s, buf, data_blocks); + // TODO: how to interact control messages between userspace? + write_sph(p->cache, buf, data_blocks, s->data_block_quadlets); } diff --git a/sound/firewire/motu/motu-hwdep.c b/sound/firewire/motu/motu-hwdep.c index a900fc0e76..b5ced5d277 100644 --- a/sound/firewire/motu/motu-hwdep.c +++ b/sound/firewire/motu/motu-hwdep.c @@ -16,14 +16,6 @@ #include "motu.h" -static bool has_dsp_event(struct snd_motu *motu) -{ - if (motu->spec->flags & SND_MOTU_SPEC_REGISTER_DSP) - return (snd_motu_register_dsp_message_parser_count_event(motu) > 0); - else - return false; -} - static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, loff_t *offset) { @@ -33,7 +25,7 @@ static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, spin_lock_irq(&motu->lock); - while (!motu->dev_lock_changed && motu->msg == 0 && !has_dsp_event(motu)) { + while (!motu->dev_lock_changed && motu->msg == 0) { prepare_to_wait(&motu->hwdep_wait, &wait, TASK_INTERRUPTIBLE); spin_unlock_irq(&motu->lock); schedule(); @@ -48,47 +40,21 @@ static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS; event.lock_status.status = (motu->dev_lock_count > 0); motu->dev_lock_changed = false; - spin_unlock_irq(&motu->lock); - count = min_t(long, count, sizeof(event)); - if (copy_to_user(buf, &event, count)) - return -EFAULT; - } else if (motu->msg > 0) { + count = min_t(long, count, sizeof(event.lock_status)); + } else { event.motu_notification.type = SNDRV_FIREWIRE_EVENT_MOTU_NOTIFICATION; event.motu_notification.message = motu->msg; motu->msg = 0; - spin_unlock_irq(&motu->lock); - count = min_t(long, count, sizeof(event)); - if (copy_to_user(buf, &event, count)) - return -EFAULT; - } else if (has_dsp_event(motu)) { - size_t consumed = 0; - u32 __user *ptr; - u32 ev; - - spin_unlock_irq(&motu->lock); - - // Header is filled later. - consumed += sizeof(event.motu_register_dsp_change); - - while (consumed < count && - snd_motu_register_dsp_message_parser_copy_event(motu, &ev)) { - ptr = (u32 __user *)(buf + consumed); - if (put_user(ev, ptr)) - return -EFAULT; - consumed += sizeof(ev); - } - - event.motu_register_dsp_change.type = SNDRV_FIREWIRE_EVENT_MOTU_REGISTER_DSP_CHANGE; - event.motu_register_dsp_change.count = - (consumed - sizeof(event.motu_register_dsp_change)) / 4; - if (copy_to_user(buf, &event, sizeof(event.motu_register_dsp_change))) - return -EFAULT; - - count = consumed; + count = min_t(long, count, sizeof(event.motu_notification)); } + spin_unlock_irq(&motu->lock); + + if (copy_to_user(buf, &event, count)) + return -EFAULT; + return count; } @@ -101,7 +67,7 @@ static __poll_t hwdep_poll(struct snd_hwdep *hwdep, struct file *file, poll_wait(file, &motu->hwdep_wait, wait); spin_lock_irq(&motu->lock); - if (motu->dev_lock_changed || motu->msg || has_dsp_event(motu)) + if (motu->dev_lock_changed || motu->msg) events = EPOLLIN | EPOLLRDNORM; else events = 0; @@ -189,71 +155,6 @@ static int hwdep_ioctl(struct snd_hwdep *hwdep, struct file *file, return hwdep_lock(motu); case SNDRV_FIREWIRE_IOCTL_UNLOCK: return hwdep_unlock(motu); - case SNDRV_FIREWIRE_IOCTL_MOTU_REGISTER_DSP_METER: - { - struct snd_firewire_motu_register_dsp_meter *meter; - int err; - - if (!(motu->spec->flags & SND_MOTU_SPEC_REGISTER_DSP)) - return -ENXIO; - - meter = kzalloc(sizeof(*meter), GFP_KERNEL); - if (!meter) - return -ENOMEM; - - snd_motu_register_dsp_message_parser_copy_meter(motu, meter); - - err = copy_to_user((void __user *)arg, meter, sizeof(*meter)); - kfree(meter); - - if (err) - return -EFAULT; - - return 0; - } - case SNDRV_FIREWIRE_IOCTL_MOTU_COMMAND_DSP_METER: - { - struct snd_firewire_motu_command_dsp_meter *meter; - int err; - - if (!(motu->spec->flags & SND_MOTU_SPEC_COMMAND_DSP)) - return -ENXIO; - - meter = kzalloc(sizeof(*meter), GFP_KERNEL); - if (!meter) - return -ENOMEM; - - snd_motu_command_dsp_message_parser_copy_meter(motu, meter); - - err = copy_to_user((void __user *)arg, meter, sizeof(*meter)); - kfree(meter); - - if (err) - return -EFAULT; - - return 0; - } - case SNDRV_FIREWIRE_IOCTL_MOTU_REGISTER_DSP_PARAMETER: - { - struct snd_firewire_motu_register_dsp_parameter *param; - int err; - - if (!(motu->spec->flags & SND_MOTU_SPEC_REGISTER_DSP)) - return -ENXIO; - - param = kzalloc(sizeof(*param), GFP_KERNEL); - if (!param) - return -ENOMEM; - - snd_motu_register_dsp_message_parser_copy_parameter(motu, param); - - err = copy_to_user((void __user *)arg, param, sizeof(*param)); - kfree(param); - if (err) - return -EFAULT; - - return 0; - } default: return -ENOIOCTLCMD; } @@ -292,7 +193,5 @@ int snd_motu_create_hwdep_device(struct snd_motu *motu) hwdep->private_data = motu; hwdep->exclusive = true; - motu->hwdep = hwdep; - return 0; } diff --git a/sound/firewire/motu/motu-protocol-v2.c b/sound/firewire/motu/motu-protocol-v2.c index a5f70efa2e..2bd4485e4b 100644 --- a/sound/firewire/motu/motu-protocol-v2.c +++ b/sound/firewire/motu/motu-protocol-v2.c @@ -275,8 +275,7 @@ const struct snd_motu_spec snd_motu_spec_828mk2 = { .name = "828mk2", .protocol_version = SND_MOTU_PROTOCOL_V2, .flags = SND_MOTU_SPEC_RX_MIDI_2ND_Q | - SND_MOTU_SPEC_TX_MIDI_2ND_Q | - SND_MOTU_SPEC_REGISTER_DSP, + SND_MOTU_SPEC_TX_MIDI_2ND_Q, .tx_fixed_pcm_chunks = {14, 14, 0}, .rx_fixed_pcm_chunks = {14, 14, 0}, }; @@ -284,7 +283,7 @@ const struct snd_motu_spec snd_motu_spec_828mk2 = { const struct snd_motu_spec snd_motu_spec_896hd = { .name = "896HD", .protocol_version = SND_MOTU_PROTOCOL_V2, - .flags = SND_MOTU_SPEC_REGISTER_DSP, + // No support for MIDI. .tx_fixed_pcm_chunks = {14, 14, 8}, .rx_fixed_pcm_chunks = {14, 14, 8}, }; @@ -293,8 +292,7 @@ const struct snd_motu_spec snd_motu_spec_traveler = { .name = "Traveler", .protocol_version = SND_MOTU_PROTOCOL_V2, .flags = SND_MOTU_SPEC_RX_MIDI_2ND_Q | - SND_MOTU_SPEC_TX_MIDI_2ND_Q | - SND_MOTU_SPEC_REGISTER_DSP, + SND_MOTU_SPEC_TX_MIDI_2ND_Q, .tx_fixed_pcm_chunks = {14, 14, 8}, .rx_fixed_pcm_chunks = {14, 14, 8}, }; @@ -303,8 +301,7 @@ const struct snd_motu_spec snd_motu_spec_ultralite = { .name = "UltraLite", .protocol_version = SND_MOTU_PROTOCOL_V2, .flags = SND_MOTU_SPEC_RX_MIDI_2ND_Q | - SND_MOTU_SPEC_TX_MIDI_2ND_Q | - SND_MOTU_SPEC_REGISTER_DSP, + SND_MOTU_SPEC_TX_MIDI_2ND_Q, .tx_fixed_pcm_chunks = {14, 14, 0}, .rx_fixed_pcm_chunks = {14, 14, 0}, }; @@ -313,8 +310,7 @@ const struct snd_motu_spec snd_motu_spec_8pre = { .name = "8pre", .protocol_version = SND_MOTU_PROTOCOL_V2, .flags = SND_MOTU_SPEC_RX_MIDI_2ND_Q | - SND_MOTU_SPEC_TX_MIDI_2ND_Q | - SND_MOTU_SPEC_REGISTER_DSP, + SND_MOTU_SPEC_TX_MIDI_2ND_Q, // Two dummy chunks always in the end of data block. .tx_fixed_pcm_chunks = {10, 10, 0}, .rx_fixed_pcm_chunks = {6, 6, 0}, diff --git a/sound/firewire/motu/motu-protocol-v3.c b/sound/firewire/motu/motu-protocol-v3.c index 8a0426920a..56e4504e7e 100644 --- a/sound/firewire/motu/motu-protocol-v3.c +++ b/sound/firewire/motu/motu-protocol-v3.c @@ -16,7 +16,6 @@ #define V3_CLOCK_SRC_INTERNAL 0x00 #define V3_CLOCK_SRC_WORD_ON_BNC 0x01 #define V3_CLOCK_SRC_SPH 0x02 -#define V3_CLOCK_SRC_AESEBU_ON_XLR 0x08 #define V3_CLOCK_SRC_SPDIF_ON_COAX 0x10 #define V3_CLOCK_SRC_OPT_IFACE_A 0x18 #define V3_CLOCK_SRC_OPT_IFACE_B 0x19 @@ -127,9 +126,6 @@ int snd_motu_protocol_v3_get_clock_source(struct snd_motu *motu, case V3_CLOCK_SRC_SPH: *src = SND_MOTU_CLOCK_SOURCE_SPH; break; - case V3_CLOCK_SRC_AESEBU_ON_XLR: - *src = SND_MOTU_CLOCK_SOURCE_AESEBU_ON_XLR; - break; case V3_CLOCK_SRC_SPDIF_ON_COAX: *src = SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX; break; @@ -189,7 +185,7 @@ int snd_motu_protocol_v3_switch_fetching_mode(struct snd_motu *motu, sizeof(reg)); } -static int detect_packet_formats_with_opt_ifaces(struct snd_motu *motu, u32 data) +static int detect_packet_formats_828mk3(struct snd_motu *motu, u32 data) { if (data & V3_ENABLE_OPT_IN_IFACE_A) { if (data & V3_NO_ADAT_OPT_IN_IFACE_A) { @@ -259,21 +255,18 @@ int snd_motu_protocol_v3_cache_packet_formats(struct snd_motu *motu) motu->spec->rx_fixed_pcm_chunks, sizeof(motu->rx_packet_formats.pcm_chunks)); - if (motu->spec == &snd_motu_spec_828mk3_fw || - motu->spec == &snd_motu_spec_828mk3_hybrid || - motu->spec == &snd_motu_spec_traveler_mk3 || - motu->spec == &snd_motu_spec_track16) - return detect_packet_formats_with_opt_ifaces(motu, data); + if (motu->spec == &snd_motu_spec_828mk3_fw || motu->spec == &snd_motu_spec_828mk3_hybrid) + return detect_packet_formats_828mk3(motu, data); else return 0; } + const struct snd_motu_spec snd_motu_spec_828mk3_fw = { .name = "828mk3", .protocol_version = SND_MOTU_PROTOCOL_V3, .flags = SND_MOTU_SPEC_RX_MIDI_3RD_Q | - SND_MOTU_SPEC_TX_MIDI_3RD_Q | - SND_MOTU_SPEC_COMMAND_DSP, + SND_MOTU_SPEC_TX_MIDI_3RD_Q, .tx_fixed_pcm_chunks = {18, 18, 14}, .rx_fixed_pcm_chunks = {14, 14, 10}, }; @@ -282,28 +275,16 @@ const struct snd_motu_spec snd_motu_spec_828mk3_hybrid = { .name = "828mk3", .protocol_version = SND_MOTU_PROTOCOL_V3, .flags = SND_MOTU_SPEC_RX_MIDI_3RD_Q | - SND_MOTU_SPEC_TX_MIDI_3RD_Q | - SND_MOTU_SPEC_COMMAND_DSP, + SND_MOTU_SPEC_TX_MIDI_3RD_Q, .tx_fixed_pcm_chunks = {18, 18, 14}, .rx_fixed_pcm_chunks = {14, 14, 14}, // Additional 4 dummy chunks at higher rate. }; -const struct snd_motu_spec snd_motu_spec_traveler_mk3 = { - .name = "TravelerMk3", - .protocol_version = SND_MOTU_PROTOCOL_V3, - .flags = SND_MOTU_SPEC_RX_MIDI_3RD_Q | - SND_MOTU_SPEC_TX_MIDI_3RD_Q | - SND_MOTU_SPEC_COMMAND_DSP, - .tx_fixed_pcm_chunks = {18, 14, 10}, - .rx_fixed_pcm_chunks = {14, 14, 10}, -}; - const struct snd_motu_spec snd_motu_spec_ultralite_mk3 = { .name = "UltraLiteMk3", .protocol_version = SND_MOTU_PROTOCOL_V3, .flags = SND_MOTU_SPEC_RX_MIDI_3RD_Q | - SND_MOTU_SPEC_TX_MIDI_3RD_Q | - SND_MOTU_SPEC_COMMAND_DSP, + SND_MOTU_SPEC_TX_MIDI_3RD_Q, .tx_fixed_pcm_chunks = {18, 14, 10}, .rx_fixed_pcm_chunks = {14, 14, 14}, }; @@ -312,26 +293,14 @@ const struct snd_motu_spec snd_motu_spec_audio_express = { .name = "AudioExpress", .protocol_version = SND_MOTU_PROTOCOL_V3, .flags = SND_MOTU_SPEC_RX_MIDI_2ND_Q | - SND_MOTU_SPEC_TX_MIDI_3RD_Q | - SND_MOTU_SPEC_REGISTER_DSP, + SND_MOTU_SPEC_TX_MIDI_3RD_Q, .tx_fixed_pcm_chunks = {10, 10, 0}, .rx_fixed_pcm_chunks = {10, 10, 0}, }; -const struct snd_motu_spec snd_motu_spec_track16 = { - .name = "Track16", - .protocol_version = SND_MOTU_PROTOCOL_V3, - .flags = SND_MOTU_SPEC_RX_MIDI_3RD_Q | - SND_MOTU_SPEC_TX_MIDI_3RD_Q | - SND_MOTU_SPEC_COMMAND_DSP, - .tx_fixed_pcm_chunks = {14, 14, 14}, - .rx_fixed_pcm_chunks = {6, 6, 6}, -}; - const struct snd_motu_spec snd_motu_spec_4pre = { .name = "4pre", .protocol_version = SND_MOTU_PROTOCOL_V3, - .flags = SND_MOTU_SPEC_REGISTER_DSP, .tx_fixed_pcm_chunks = {10, 10, 0}, .rx_fixed_pcm_chunks = {10, 10, 0}, }; diff --git a/sound/firewire/motu/motu-stream.c b/sound/firewire/motu/motu-stream.c index 64aec9c3ee..9e6ca39ebd 100644 --- a/sound/firewire/motu/motu-stream.c +++ b/sound/firewire/motu/motu-stream.c @@ -255,16 +255,6 @@ int snd_motu_stream_start_duplex(struct snd_motu *motu) if (err < 0) return err; - if (motu->spec->flags & SND_MOTU_SPEC_REGISTER_DSP) { - err = snd_motu_register_dsp_message_parser_init(motu); - if (err < 0) - return err; - } else if (motu->spec->flags & SND_MOTU_SPEC_COMMAND_DSP) { - err = snd_motu_command_dsp_message_parser_init(motu, motu->tx_stream.sfc); - if (err < 0) - return err; - } - err = begin_session(motu); if (err < 0) { dev_err(&motu->unit->device, diff --git a/sound/firewire/motu/motu.c b/sound/firewire/motu/motu.c index f8b7fe3875..f65426238d 100644 --- a/sound/firewire/motu/motu.c +++ b/sound/firewire/motu/motu.c @@ -112,16 +112,6 @@ static int motu_probe(struct fw_unit *unit, const struct ieee1394_device_id *ent if (err < 0) goto error; - if (motu->spec->flags & SND_MOTU_SPEC_REGISTER_DSP) { - err = snd_motu_register_dsp_message_parser_new(motu); - if (err < 0) - goto error; - } else if (motu->spec->flags & SND_MOTU_SPEC_COMMAND_DSP) { - err = snd_motu_command_dsp_message_parser_new(motu); - if (err < 0) - goto error; - } - err = snd_card_register(card); if (err < 0) goto error; @@ -169,11 +159,9 @@ static const struct ieee1394_device_id motu_id_table[] = { SND_MOTU_DEV_ENTRY(0x00000f, &snd_motu_spec_8pre), SND_MOTU_DEV_ENTRY(0x000015, &snd_motu_spec_828mk3_fw), // FireWire only. SND_MOTU_DEV_ENTRY(0x000019, &snd_motu_spec_ultralite_mk3), // FireWire only. - SND_MOTU_DEV_ENTRY(0x00001b, &snd_motu_spec_traveler_mk3), SND_MOTU_DEV_ENTRY(0x000030, &snd_motu_spec_ultralite_mk3), // Hybrid. SND_MOTU_DEV_ENTRY(0x000035, &snd_motu_spec_828mk3_hybrid), // Hybrid. SND_MOTU_DEV_ENTRY(0x000033, &snd_motu_spec_audio_express), - SND_MOTU_DEV_ENTRY(0x000039, &snd_motu_spec_track16), SND_MOTU_DEV_ENTRY(0x000045, &snd_motu_spec_4pre), { } }; diff --git a/sound/firewire/motu/motu.h b/sound/firewire/motu/motu.h index 4189f21922..f1a830b358 100644 --- a/sound/firewire/motu/motu.h +++ b/sound/firewire/motu/motu.h @@ -74,13 +74,10 @@ struct snd_motu { int dev_lock_count; bool dev_lock_changed; wait_queue_head_t hwdep_wait; - struct snd_hwdep *hwdep; struct amdtp_domain domain; struct amdtp_motu_cache cache; - - void *message_parser; }; enum snd_motu_spec_flags { @@ -88,8 +85,6 @@ enum snd_motu_spec_flags { SND_MOTU_SPEC_RX_MIDI_3RD_Q = 0x0002, SND_MOTU_SPEC_TX_MIDI_2ND_Q = 0x0004, SND_MOTU_SPEC_TX_MIDI_3RD_Q = 0x0008, - SND_MOTU_SPEC_REGISTER_DSP = 0x0010, - SND_MOTU_SPEC_COMMAND_DSP = 0x0020, }; #define SND_MOTU_CLOCK_RATE_COUNT 6 @@ -138,10 +133,8 @@ extern const struct snd_motu_spec snd_motu_spec_8pre; extern const struct snd_motu_spec snd_motu_spec_828mk3_fw; extern const struct snd_motu_spec snd_motu_spec_828mk3_hybrid; -extern const struct snd_motu_spec snd_motu_spec_traveler_mk3; extern const struct snd_motu_spec snd_motu_spec_ultralite_mk3; extern const struct snd_motu_spec snd_motu_spec_audio_express; -extern const struct snd_motu_spec snd_motu_spec_track16; extern const struct snd_motu_spec snd_motu_spec_4pre; int amdtp_motu_init(struct amdtp_stream *s, struct fw_unit *unit, @@ -277,22 +270,4 @@ static inline int snd_motu_protocol_cache_packet_formats(struct snd_motu *motu) return -ENXIO; } -int snd_motu_register_dsp_message_parser_new(struct snd_motu *motu); -int snd_motu_register_dsp_message_parser_init(struct snd_motu *motu); -void snd_motu_register_dsp_message_parser_parse(struct snd_motu *motu, const struct pkt_desc *descs, - unsigned int desc_count, unsigned int data_block_quadlets); -void snd_motu_register_dsp_message_parser_copy_meter(struct snd_motu *motu, - struct snd_firewire_motu_register_dsp_meter *meter); -void snd_motu_register_dsp_message_parser_copy_parameter(struct snd_motu *motu, - struct snd_firewire_motu_register_dsp_parameter *params); -unsigned int snd_motu_register_dsp_message_parser_count_event(struct snd_motu *motu); -bool snd_motu_register_dsp_message_parser_copy_event(struct snd_motu *motu, u32 *event); - -int snd_motu_command_dsp_message_parser_new(struct snd_motu *motu); -int snd_motu_command_dsp_message_parser_init(struct snd_motu *motu, enum cip_sfc sfc); -void snd_motu_command_dsp_message_parser_parse(struct snd_motu *motu, const struct pkt_desc *descs, - unsigned int desc_count, unsigned int data_block_quadlets); -void snd_motu_command_dsp_message_parser_copy_meter(struct snd_motu *motu, - struct snd_firewire_motu_command_dsp_meter *meter); - #endif diff --git a/sound/hda/ext/hdac_ext_stream.c b/sound/hda/ext/hdac_ext_stream.c index d2b5724b46..37154ed43b 100644 --- a/sound/hda/ext/hdac_ext_stream.c +++ b/sound/hda/ext/hdac_ext_stream.c @@ -18,7 +18,7 @@ /** * snd_hdac_ext_stream_init - initialize each stream (aka device) * @bus: HD-audio core bus - * @hext_stream: HD-audio ext core stream object to initialize + * @stream: HD-audio ext core stream object to initialize * @idx: stream index number * @direction: stream direction (SNDRV_PCM_STREAM_PLAYBACK or SNDRV_PCM_STREAM_CAPTURE) * @tag: the tag id to assign @@ -27,34 +27,34 @@ * invoke hdac stream initialization routine */ void snd_hdac_ext_stream_init(struct hdac_bus *bus, - struct hdac_ext_stream *hext_stream, - int idx, int direction, int tag) + struct hdac_ext_stream *stream, + int idx, int direction, int tag) { if (bus->ppcap) { - hext_stream->pphc_addr = bus->ppcap + AZX_PPHC_BASE + + stream->pphc_addr = bus->ppcap + AZX_PPHC_BASE + AZX_PPHC_INTERVAL * idx; - hext_stream->pplc_addr = bus->ppcap + AZX_PPLC_BASE + + stream->pplc_addr = bus->ppcap + AZX_PPLC_BASE + AZX_PPLC_MULTI * bus->num_streams + AZX_PPLC_INTERVAL * idx; } if (bus->spbcap) { - hext_stream->spib_addr = bus->spbcap + AZX_SPB_BASE + + stream->spib_addr = bus->spbcap + AZX_SPB_BASE + AZX_SPB_INTERVAL * idx + AZX_SPB_SPIB; - hext_stream->fifo_addr = bus->spbcap + AZX_SPB_BASE + + stream->fifo_addr = bus->spbcap + AZX_SPB_BASE + AZX_SPB_INTERVAL * idx + AZX_SPB_MAXFIFO; } if (bus->drsmcap) - hext_stream->dpibr_addr = bus->drsmcap + AZX_DRSM_BASE + + stream->dpibr_addr = bus->drsmcap + AZX_DRSM_BASE + AZX_DRSM_INTERVAL * idx; - hext_stream->decoupled = false; - snd_hdac_stream_init(bus, &hext_stream->hstream, idx, direction, tag); + stream->decoupled = false; + snd_hdac_stream_init(bus, &stream->hstream, idx, direction, tag); } EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_init); @@ -67,18 +67,18 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_init); * @dir: direction of streams */ int snd_hdac_ext_stream_init_all(struct hdac_bus *bus, int start_idx, - int num_stream, int dir) + int num_stream, int dir) { int stream_tag = 0; int i, tag, idx = start_idx; for (i = 0; i < num_stream; i++) { - struct hdac_ext_stream *hext_stream = - kzalloc(sizeof(*hext_stream), GFP_KERNEL); - if (!hext_stream) + struct hdac_ext_stream *stream = + kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) return -ENOMEM; tag = ++stream_tag; - snd_hdac_ext_stream_init(bus, hext_stream, idx, dir, tag); + snd_hdac_ext_stream_init(bus, stream, idx, dir, tag); idx++; } @@ -95,22 +95,22 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_init_all); void snd_hdac_stream_free_all(struct hdac_bus *bus) { struct hdac_stream *s, *_s; - struct hdac_ext_stream *hext_stream; + struct hdac_ext_stream *stream; list_for_each_entry_safe(s, _s, &bus->stream_list, list) { - hext_stream = stream_to_hdac_ext_stream(s); - snd_hdac_ext_stream_decouple(bus, hext_stream, false); + stream = stream_to_hdac_ext_stream(s); + snd_hdac_ext_stream_decouple(bus, stream, false); list_del(&s->list); - kfree(hext_stream); + kfree(stream); } } EXPORT_SYMBOL_GPL(snd_hdac_stream_free_all); void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, - struct hdac_ext_stream *hext_stream, + struct hdac_ext_stream *stream, bool decouple) { - struct hdac_stream *hstream = &hext_stream->hstream; + struct hdac_stream *hstream = &stream->hstream; u32 val; int mask = AZX_PPCTL_PROCEN(hstream->index); @@ -121,76 +121,76 @@ void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, else if (!decouple && val) snd_hdac_updatel(bus->ppcap, AZX_REG_PP_PPCTL, mask, 0); - hext_stream->decoupled = decouple; + stream->decoupled = decouple; } EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple_locked); /** * snd_hdac_ext_stream_decouple - decouple the hdac stream * @bus: HD-audio core bus - * @hext_stream: HD-audio ext core stream object to initialize + * @stream: HD-audio ext core stream object to initialize * @decouple: flag to decouple */ void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, - struct hdac_ext_stream *hext_stream, bool decouple) + struct hdac_ext_stream *stream, bool decouple) { spin_lock_irq(&bus->reg_lock); - snd_hdac_ext_stream_decouple_locked(bus, hext_stream, decouple); + snd_hdac_ext_stream_decouple_locked(bus, stream, decouple); spin_unlock_irq(&bus->reg_lock); } EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple); /** * snd_hdac_ext_link_stream_start - start a stream - * @hext_stream: HD-audio ext core stream to start + * @stream: HD-audio ext core stream to start */ -void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *hext_stream) +void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *stream) { - snd_hdac_updatel(hext_stream->pplc_addr, AZX_REG_PPLCCTL, + snd_hdac_updatel(stream->pplc_addr, AZX_REG_PPLCCTL, AZX_PPLCCTL_RUN, AZX_PPLCCTL_RUN); } EXPORT_SYMBOL_GPL(snd_hdac_ext_link_stream_start); /** * snd_hdac_ext_link_stream_clear - stop a stream DMA - * @hext_stream: HD-audio ext core stream to stop + * @stream: HD-audio ext core stream to stop */ -void snd_hdac_ext_link_stream_clear(struct hdac_ext_stream *hext_stream) +void snd_hdac_ext_link_stream_clear(struct hdac_ext_stream *stream) { - snd_hdac_updatel(hext_stream->pplc_addr, AZX_REG_PPLCCTL, AZX_PPLCCTL_RUN, 0); + snd_hdac_updatel(stream->pplc_addr, AZX_REG_PPLCCTL, AZX_PPLCCTL_RUN, 0); } EXPORT_SYMBOL_GPL(snd_hdac_ext_link_stream_clear); /** * snd_hdac_ext_link_stream_reset - reset a stream - * @hext_stream: HD-audio ext core stream to reset + * @stream: HD-audio ext core stream to reset */ -void snd_hdac_ext_link_stream_reset(struct hdac_ext_stream *hext_stream) +void snd_hdac_ext_link_stream_reset(struct hdac_ext_stream *stream) { unsigned char val; int timeout; - snd_hdac_ext_link_stream_clear(hext_stream); + snd_hdac_ext_link_stream_clear(stream); - snd_hdac_updatel(hext_stream->pplc_addr, AZX_REG_PPLCCTL, + snd_hdac_updatel(stream->pplc_addr, AZX_REG_PPLCCTL, AZX_PPLCCTL_STRST, AZX_PPLCCTL_STRST); udelay(3); timeout = 50; do { - val = readl(hext_stream->pplc_addr + AZX_REG_PPLCCTL) & + val = readl(stream->pplc_addr + AZX_REG_PPLCCTL) & AZX_PPLCCTL_STRST; if (val) break; udelay(3); } while (--timeout); val &= ~AZX_PPLCCTL_STRST; - writel(val, hext_stream->pplc_addr + AZX_REG_PPLCCTL); + writel(val, stream->pplc_addr + AZX_REG_PPLCCTL); udelay(3); timeout = 50; /* waiting for hardware to report that the stream is out of reset */ do { - val = readl(hext_stream->pplc_addr + AZX_REG_PPLCCTL) & AZX_PPLCCTL_STRST; + val = readl(stream->pplc_addr + AZX_REG_PPLCCTL) & AZX_PPLCCTL_STRST; if (!val) break; udelay(3); @@ -201,24 +201,24 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_link_stream_reset); /** * snd_hdac_ext_link_stream_setup - set up the SD for streaming - * @hext_stream: HD-audio ext core stream to set up + * @stream: HD-audio ext core stream to set up * @fmt: stream format */ -int snd_hdac_ext_link_stream_setup(struct hdac_ext_stream *hext_stream, int fmt) +int snd_hdac_ext_link_stream_setup(struct hdac_ext_stream *stream, int fmt) { - struct hdac_stream *hstream = &hext_stream->hstream; + struct hdac_stream *hstream = &stream->hstream; unsigned int val; /* make sure the run bit is zero for SD */ - snd_hdac_ext_link_stream_clear(hext_stream); + snd_hdac_ext_link_stream_clear(stream); /* program the stream_tag */ - val = readl(hext_stream->pplc_addr + AZX_REG_PPLCCTL); + val = readl(stream->pplc_addr + AZX_REG_PPLCCTL); val = (val & ~AZX_PPLCCTL_STRM_MASK) | (hstream->stream_tag << AZX_PPLCCTL_STRM_SHIFT); - writel(val, hext_stream->pplc_addr + AZX_REG_PPLCCTL); + writel(val, stream->pplc_addr + AZX_REG_PPLCCTL); /* program the stream format */ - writew(fmt, hext_stream->pplc_addr + AZX_REG_PPLCFMT); + writew(fmt, stream->pplc_addr + AZX_REG_PPLCFMT); return 0; } @@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_link_stream_setup); * @stream: stream id */ void snd_hdac_ext_link_set_stream_id(struct hdac_ext_link *link, - int stream) + int stream) { snd_hdac_updatew(link->ml_addr, AZX_REG_ML_LOSIDV, (1 << stream), 1 << stream); } @@ -250,10 +250,10 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_link_clear_stream_id); static struct hdac_ext_stream * hdac_ext_link_stream_assign(struct hdac_bus *bus, - struct snd_pcm_substream *substream) + struct snd_pcm_substream *substream) { struct hdac_ext_stream *res = NULL; - struct hdac_stream *hstream = NULL; + struct hdac_stream *stream = NULL; if (!bus->ppcap) { dev_err(bus->dev, "stream type not supported\n"); @@ -261,22 +261,22 @@ hdac_ext_link_stream_assign(struct hdac_bus *bus, } spin_lock_irq(&bus->reg_lock); - list_for_each_entry(hstream, &bus->stream_list, list) { - struct hdac_ext_stream *hext_stream = container_of(hstream, - struct hdac_ext_stream, - hstream); - if (hstream->direction != substream->stream) + list_for_each_entry(stream, &bus->stream_list, list) { + struct hdac_ext_stream *hstream = container_of(stream, + struct hdac_ext_stream, + hstream); + if (stream->direction != substream->stream) continue; /* check if decoupled stream and not in use is available */ - if (hext_stream->decoupled && !hext_stream->link_locked) { - res = hext_stream; + if (hstream->decoupled && !hstream->link_locked) { + res = hstream; break; } - if (!hext_stream->link_locked) { - snd_hdac_ext_stream_decouple_locked(bus, hext_stream, true); - res = hext_stream; + if (!hstream->link_locked) { + snd_hdac_ext_stream_decouple_locked(bus, hstream, true); + res = hstream; break; } } @@ -290,10 +290,10 @@ hdac_ext_link_stream_assign(struct hdac_bus *bus, static struct hdac_ext_stream * hdac_ext_host_stream_assign(struct hdac_bus *bus, - struct snd_pcm_substream *substream) + struct snd_pcm_substream *substream) { struct hdac_ext_stream *res = NULL; - struct hdac_stream *hstream = NULL; + struct hdac_stream *stream = NULL; if (!bus->ppcap) { dev_err(bus->dev, "stream type not supported\n"); @@ -301,17 +301,17 @@ hdac_ext_host_stream_assign(struct hdac_bus *bus, } spin_lock_irq(&bus->reg_lock); - list_for_each_entry(hstream, &bus->stream_list, list) { - struct hdac_ext_stream *hext_stream = container_of(hstream, - struct hdac_ext_stream, - hstream); - if (hstream->direction != substream->stream) + list_for_each_entry(stream, &bus->stream_list, list) { + struct hdac_ext_stream *hstream = container_of(stream, + struct hdac_ext_stream, + hstream); + if (stream->direction != substream->stream) continue; - if (!hstream->opened) { - if (!hext_stream->decoupled) - snd_hdac_ext_stream_decouple_locked(bus, hext_stream, true); - res = hext_stream; + if (!stream->opened) { + if (!hstream->decoupled) + snd_hdac_ext_stream_decouple_locked(bus, hstream, true); + res = hstream; break; } } @@ -346,17 +346,16 @@ struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus, struct snd_pcm_substream *substream, int type) { - struct hdac_ext_stream *hext_stream = NULL; - struct hdac_stream *hstream = NULL; + struct hdac_ext_stream *hstream = NULL; + struct hdac_stream *stream = NULL; switch (type) { case HDAC_EXT_STREAM_TYPE_COUPLED: - hstream = snd_hdac_stream_assign(bus, substream); - if (hstream) - hext_stream = container_of(hstream, - struct hdac_ext_stream, - hstream); - return hext_stream; + stream = snd_hdac_stream_assign(bus, substream); + if (stream) + hstream = container_of(stream, + struct hdac_ext_stream, hstream); + return hstream; case HDAC_EXT_STREAM_TYPE_HOST: return hdac_ext_host_stream_assign(bus, substream); @@ -372,34 +371,34 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_assign); /** * snd_hdac_ext_stream_release - release the assigned stream - * @hext_stream: HD-audio ext core stream to release + * @stream: HD-audio ext core stream to release * @type: type of stream (coupled, host or link stream) * * Release the stream that has been assigned by snd_hdac_ext_stream_assign(). */ -void snd_hdac_ext_stream_release(struct hdac_ext_stream *hext_stream, int type) +void snd_hdac_ext_stream_release(struct hdac_ext_stream *stream, int type) { - struct hdac_bus *bus = hext_stream->hstream.bus; + struct hdac_bus *bus = stream->hstream.bus; switch (type) { case HDAC_EXT_STREAM_TYPE_COUPLED: - snd_hdac_stream_release(&hext_stream->hstream); + snd_hdac_stream_release(&stream->hstream); break; case HDAC_EXT_STREAM_TYPE_HOST: spin_lock_irq(&bus->reg_lock); - if (hext_stream->decoupled && !hext_stream->link_locked) - snd_hdac_ext_stream_decouple_locked(bus, hext_stream, false); + if (stream->decoupled && !stream->link_locked) + snd_hdac_ext_stream_decouple_locked(bus, stream, false); spin_unlock_irq(&bus->reg_lock); - snd_hdac_stream_release(&hext_stream->hstream); + snd_hdac_stream_release(&stream->hstream); break; case HDAC_EXT_STREAM_TYPE_LINK: spin_lock_irq(&bus->reg_lock); - if (hext_stream->decoupled && !hext_stream->hstream.opened) - snd_hdac_ext_stream_decouple_locked(bus, hext_stream, false); - hext_stream->link_locked = 0; - hext_stream->link_substream = NULL; + if (stream->decoupled && !stream->hstream.opened) + snd_hdac_ext_stream_decouple_locked(bus, stream, false); + stream->link_locked = 0; + stream->link_substream = NULL; spin_unlock_irq(&bus->reg_lock); break; @@ -438,11 +437,11 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_spbcap_enable); /** * snd_hdac_ext_stream_set_spib - sets the spib value of a stream * @bus: HD-audio core bus - * @hext_stream: hdac_ext_stream + * @stream: hdac_ext_stream * @value: spib value to set */ int snd_hdac_ext_stream_set_spib(struct hdac_bus *bus, - struct hdac_ext_stream *hext_stream, u32 value) + struct hdac_ext_stream *stream, u32 value) { if (!bus->spbcap) { @@ -450,7 +449,7 @@ int snd_hdac_ext_stream_set_spib(struct hdac_bus *bus, return -EINVAL; } - writel(value, hext_stream->spib_addr); + writel(value, stream->spib_addr); return 0; } @@ -459,12 +458,12 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_set_spib); /** * snd_hdac_ext_stream_get_spbmaxfifo - gets the spib value of a stream * @bus: HD-audio core bus - * @hext_stream: hdac_ext_stream + * @stream: hdac_ext_stream * * Return maxfifo for the stream */ int snd_hdac_ext_stream_get_spbmaxfifo(struct hdac_bus *bus, - struct hdac_ext_stream *hext_stream) + struct hdac_ext_stream *stream) { if (!bus->spbcap) { @@ -472,10 +471,27 @@ int snd_hdac_ext_stream_get_spbmaxfifo(struct hdac_bus *bus, return -EINVAL; } - return readl(hext_stream->fifo_addr); + return readl(stream->fifo_addr); } EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_get_spbmaxfifo); + +/** + * snd_hdac_ext_stop_streams - stop all stream if running + * @bus: HD-audio core bus + */ +void snd_hdac_ext_stop_streams(struct hdac_bus *bus) +{ + struct hdac_stream *stream; + + if (bus->chip_init) { + list_for_each_entry(stream, &bus->stream_list, list) + snd_hdac_stream_stop(stream); + snd_hdac_bus_stop_chip(bus); + } +} +EXPORT_SYMBOL_GPL(snd_hdac_ext_stop_streams); + /** * snd_hdac_ext_stream_drsm_enable - enable DMA resume for a stream * @bus: HD-audio core bus @@ -504,11 +520,11 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_drsm_enable); /** * snd_hdac_ext_stream_set_dpibr - sets the dpibr value of a stream * @bus: HD-audio core bus - * @hext_stream: hdac_ext_stream + * @stream: hdac_ext_stream * @value: dpib value to set */ int snd_hdac_ext_stream_set_dpibr(struct hdac_bus *bus, - struct hdac_ext_stream *hext_stream, u32 value) + struct hdac_ext_stream *stream, u32 value) { if (!bus->drsmcap) { @@ -516,7 +532,7 @@ int snd_hdac_ext_stream_set_dpibr(struct hdac_bus *bus, return -EINVAL; } - writel(value, hext_stream->dpibr_addr); + writel(value, stream->dpibr_addr); return 0; } @@ -524,12 +540,12 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_set_dpibr); /** * snd_hdac_ext_stream_set_lpib - sets the lpib value of a stream - * @hext_stream: hdac_ext_stream + * @stream: hdac_ext_stream * @value: lpib value to set */ -int snd_hdac_ext_stream_set_lpib(struct hdac_ext_stream *hext_stream, u32 value) +int snd_hdac_ext_stream_set_lpib(struct hdac_ext_stream *stream, u32 value) { - snd_hdac_stream_writel(&hext_stream->hstream, SD_LPIB, value); + snd_hdac_stream_writel(&stream->hstream, SD_LPIB, value); return 0; } diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c index f3582012d2..aa7955fdf6 100644 --- a/sound/hda/hdac_stream.c +++ b/sound/hda/hdac_stream.c @@ -142,22 +142,6 @@ void snd_hdac_stream_stop(struct hdac_stream *azx_dev) } EXPORT_SYMBOL_GPL(snd_hdac_stream_stop); -/** - * snd_hdac_stop_streams_and_chip - stop all streams and chip if running - * @bus: HD-audio core bus - */ -void snd_hdac_stop_streams_and_chip(struct hdac_bus *bus) -{ - struct hdac_stream *stream; - - if (bus->chip_init) { - list_for_each_entry(stream, &bus->stream_list, list) - snd_hdac_stream_stop(stream); - snd_hdac_bus_stop_chip(bus); - } -} -EXPORT_SYMBOL_GPL(snd_hdac_stop_streams_and_chip); - /** * snd_hdac_stream_reset - reset a stream * @azx_dev: HD-audio core stream to reset diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index 4fb90ceb40..4208fa8a4d 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -248,15 +248,15 @@ static const struct config_entry config_table[] = { {} } }, + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = 0x02c8, + }, { .flags = FLAG_SOF, .device = 0x02c8, .codec_hid = "ESSX8336", }, - { - .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, - .device = 0x02c8, - }, /* Cometlake-H */ { .flags = FLAG_SOF, @@ -278,14 +278,14 @@ static const struct config_entry config_table[] = { } }, { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = 0x06c8, + }, + { .flags = FLAG_SOF, .device = 0x06c8, .codec_hid = "ESSX8336", }, - { - .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, - .device = 0x06c8, - }, #endif /* Icelake */ @@ -309,30 +309,13 @@ static const struct config_entry config_table[] = { }, #endif -/* Jasper Lake */ +/* JasperLake */ #if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE) - { - .flags = FLAG_SOF, - .device = 0x4dc8, - .dmi_table = (const struct dmi_system_id []) { - { - .ident = "Google Chromebooks", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Google"), - } - }, - {} - } - }, { .flags = FLAG_SOF, .device = 0x4dc8, .codec_hid = "ESSX8336", }, - { - .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC, - .device = 0x4dc8, - }, #endif /* Tigerlake */ @@ -350,11 +333,6 @@ static const struct config_entry config_table[] = { {} } }, - { - .flags = FLAG_SOF, - .device = 0xa0c8, - .codec_hid = "ESSX8336", - }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0xa0c8, @@ -363,6 +341,11 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x43c8, }, + { + .flags = FLAG_SOF, + .device = 0xa0c8, + .codec_hid = "ESSX8336", + }, #endif /* Elkhart Lake */ @@ -391,14 +374,6 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x51cc, }, - { - .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, - .device = 0x51cd, - }, - { - .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, - .device = 0x54c8, - }, #endif }; @@ -428,7 +403,7 @@ static int snd_intel_dsp_check_dmic(struct pci_dev *pci) nhlt = intel_nhlt_init(&pci->dev); if (nhlt) { - if (intel_nhlt_has_endpoint_type(nhlt, NHLT_LINK_DMIC)) + if (intel_nhlt_get_dmic_geo(&pci->dev, nhlt)) ret = 1; intel_nhlt_free(nhlt); } diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index 128476aa7c..e2237239d9 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -110,105 +110,3 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt) return dmic_geo; } EXPORT_SYMBOL_GPL(intel_nhlt_get_dmic_geo); - -bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, u8 link_type) -{ - struct nhlt_endpoint *epnt; - int i; - - if (!nhlt) - return false; - - epnt = (struct nhlt_endpoint *)nhlt->desc; - for (i = 0; i < nhlt->endpoint_count; i++) { - if (epnt->linktype == link_type) - return true; - - epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length); - } - return false; -} -EXPORT_SYMBOL(intel_nhlt_has_endpoint_type); - -static struct nhlt_specific_cfg * -nhlt_get_specific_cfg(struct device *dev, struct nhlt_fmt *fmt, u8 num_ch, - u32 rate, u8 vbps, u8 bps) -{ - struct nhlt_fmt_cfg *cfg = fmt->fmt_config; - struct wav_fmt *wfmt; - u16 _bps, _vbps; - int i; - - dev_dbg(dev, "Endpoint format count=%d\n", fmt->fmt_count); - - for (i = 0; i < fmt->fmt_count; i++) { - wfmt = &cfg->fmt_ext.fmt; - _bps = wfmt->bits_per_sample; - _vbps = cfg->fmt_ext.sample.valid_bits_per_sample; - - dev_dbg(dev, "Endpoint format: ch=%d fmt=%d/%d rate=%d\n", - wfmt->channels, _vbps, _bps, wfmt->samples_per_sec); - - if (wfmt->channels == num_ch && wfmt->samples_per_sec == rate && - vbps == _vbps && bps == _bps) - return &cfg->config; - - cfg = (struct nhlt_fmt_cfg *)(cfg->config.caps + cfg->config.size); - } - - return NULL; -} - -static bool nhlt_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt, - u32 bus_id, u8 link_type, u8 dir, u8 dev_type) -{ - dev_dbg(dev, "Endpoint: vbus_id=%d link_type=%d dir=%d dev_type = %d\n", - epnt->virtual_bus_id, epnt->linktype, - epnt->direction, epnt->device_type); - - if ((epnt->virtual_bus_id != bus_id) || - (epnt->linktype != link_type) || - (epnt->direction != dir)) - return false; - - /* link of type DMIC bypasses device_type check */ - return epnt->linktype == NHLT_LINK_DMIC || - epnt->device_type == dev_type; -} - -struct nhlt_specific_cfg * -intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, - u32 bus_id, u8 link_type, u8 vbps, u8 bps, - u8 num_ch, u32 rate, u8 dir, u8 dev_type) -{ - struct nhlt_specific_cfg *cfg; - struct nhlt_endpoint *epnt; - struct nhlt_fmt *fmt; - int i; - - if (!nhlt) - return NULL; - - dev_dbg(dev, "Looking for configuration:\n"); - dev_dbg(dev, " vbus_id=%d link_type=%d dir=%d, dev_type=%d\n", - bus_id, link_type, dir, dev_type); - dev_dbg(dev, " ch=%d fmt=%d/%d rate=%d\n", num_ch, vbps, bps, rate); - dev_dbg(dev, "Endpoint count=%d\n", nhlt->endpoint_count); - - epnt = (struct nhlt_endpoint *)nhlt->desc; - - for (i = 0; i < nhlt->endpoint_count; i++) { - if (nhlt_check_ep_match(dev, epnt, bus_id, link_type, dir, dev_type)) { - fmt = (struct nhlt_fmt *)(epnt->config.caps + epnt->config.size); - - cfg = nhlt_get_specific_cfg(dev, fmt, num_ch, rate, vbps, bps); - if (cfg) - return cfg; - } - - epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length); - } - - return NULL; -} -EXPORT_SYMBOL(intel_nhlt_get_endpoint_blob); diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index 5cb92f7ccb..b7758dbe23 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -50,11 +50,11 @@ static bool is_link_enabled(struct fwnode_handle *fw_node, int i) static int sdw_intel_scan_controller(struct sdw_intel_acpi_info *info) { - struct acpi_device *adev = acpi_fetch_acpi_dev(info->handle); + struct acpi_device *adev; int ret, i; u8 count; - if (!adev) + if (acpi_bus_get_device(info->handle, &adev)) return -EINVAL; /* Found controller, find links supported */ @@ -119,6 +119,7 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, void *cdata, void **return_value) { struct sdw_intel_acpi_info *info = cdata; + struct acpi_device *adev; acpi_status status; u64 adr; @@ -126,7 +127,7 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, if (ACPI_FAILURE(status)) return AE_OK; /* keep going */ - if (!acpi_fetch_acpi_dev(handle)) { + if (acpi_bus_get_device(handle, &adev)) { pr_err("%s: Couldn't find ACPI handle\n", __func__); return AE_NOT_FOUND; } diff --git a/sound/isa/gus/gus_mem.c b/sound/isa/gus/gus_mem.c index 3e56c01c45..ff9480f249 100644 --- a/sound/isa/gus/gus_mem.c +++ b/sound/isa/gus/gus_mem.c @@ -24,9 +24,8 @@ void snd_gf1_mem_lock(struct snd_gf1_mem * alloc, int xup) } } -static struct snd_gf1_mem_block * -snd_gf1_mem_xalloc(struct snd_gf1_mem *alloc, struct snd_gf1_mem_block *block, - const char *name) +static struct snd_gf1_mem_block *snd_gf1_mem_xalloc(struct snd_gf1_mem * alloc, + struct snd_gf1_mem_block * block) { struct snd_gf1_mem_block *pblock, *nblock; @@ -34,12 +33,6 @@ snd_gf1_mem_xalloc(struct snd_gf1_mem *alloc, struct snd_gf1_mem_block *block, if (nblock == NULL) return NULL; *nblock = *block; - nblock->name = kstrdup(name, GFP_KERNEL); - if (!nblock->name) { - kfree(nblock); - return NULL; - } - pblock = alloc->first; while (pblock) { if (pblock->ptr > nblock->ptr) { @@ -51,7 +44,7 @@ snd_gf1_mem_xalloc(struct snd_gf1_mem *alloc, struct snd_gf1_mem_block *block, else nblock->prev->next = nblock; mutex_unlock(&alloc->memory_mutex); - return nblock; + return NULL; } pblock = pblock->next; } @@ -205,7 +198,8 @@ struct snd_gf1_mem_block *snd_gf1_mem_alloc(struct snd_gf1_mem * alloc, int owne if (share_id != NULL) memcpy(&block.share_id, share_id, sizeof(block.share_id)); block.owner = owner; - nblock = snd_gf1_mem_xalloc(alloc, &block, name); + block.name = kstrdup(name, GFP_KERNEL); + nblock = snd_gf1_mem_xalloc(alloc, &block); snd_gf1_mem_lock(alloc, 1); return nblock; } @@ -242,12 +236,14 @@ int snd_gf1_mem_init(struct snd_gus_card * gus) if (gus->gf1.enh_mode) { block.ptr = 0; block.size = 1024; - if (!snd_gf1_mem_xalloc(alloc, &block, "InterWave LFOs")) + block.name = kstrdup("InterWave LFOs", GFP_KERNEL); + if (snd_gf1_mem_xalloc(alloc, &block) == NULL) return -ENOMEM; } block.ptr = gus->gf1.default_voice_address; block.size = 4; - if (!snd_gf1_mem_xalloc(alloc, &block, "Voice default (NULL's)")) + block.name = kstrdup("Voice default (NULL's)", GFP_KERNEL); + if (snd_gf1_mem_xalloc(alloc, &block) == NULL) return -ENOMEM; #ifdef CONFIG_SND_DEBUG snd_card_ro_proc_new(gus->card, "gusmem", gus, snd_gf1_mem_info_read); diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index 9a678b5cf2..ea20236f35 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -3218,6 +3218,7 @@ static int snd_cmipci_probe(struct pci_dev *pci, { static int dev; struct snd_card *card; + struct cmipci *cm; int err; if (dev >= SNDRV_CARDS) @@ -3228,9 +3229,10 @@ static int snd_cmipci_probe(struct pci_dev *pci, } err = snd_devm_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE, - sizeof(struct cmipci), &card); + sizeof(*cm), &card); if (err < 0) return err; + cm = card->private_data; switch (pci->device) { case PCI_DEVICE_ID_CMEDIA_CM8738: diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index febe1c2b7d..ab9d2746e8 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -91,39 +91,6 @@ config SND_HDA_PATCH_LOADER start up. The "patch" file can be specified via patch module option, such as patch=hda-init. -config SND_HDA_SCODEC_CS35L41 - tristate - -config SND_HDA_SCODEC_CS35L41_I2C - tristate "Build CS35L41 HD-audio side codec support for I2C Bus" - depends on I2C - depends on ACPI - depends on SND_SOC - select SND_HDA_GENERIC - select SND_SOC_CS35L41_LIB - select SND_HDA_SCODEC_CS35L41 - help - Say Y or M here to include CS35L41 I2C HD-audio side codec support - in snd-hda-intel driver, such as ALC287. - -comment "Set to Y if you want auto-loading the side codec driver" - depends on SND_HDA=y && SND_HDA_SCODEC_CS35L41_I2C=m - -config SND_HDA_SCODEC_CS35L41_SPI - tristate "Build CS35L41 HD-audio codec support for SPI Bus" - depends on SPI_MASTER - depends on ACPI - depends on SND_SOC - select SND_HDA_GENERIC - select SND_SOC_CS35L41_LIB - select SND_HDA_SCODEC_CS35L41 - help - Say Y or M here to include CS35L41 SPI HD-audio side codec support - in snd-hda-intel driver, such as ALC287. - -comment "Set to Y if you want auto-loading the side codec driver" - depends on SND_HDA=y && SND_HDA_SCODEC_CS35L41_SPI=m - config SND_HDA_CODEC_REALTEK tristate "Build Realtek HD-audio codec support" select SND_HDA_GENERIC diff --git a/sound/pci/hda/Makefile b/sound/pci/hda/Makefile index 3e7bc608d4..b8fa682ce6 100644 --- a/sound/pci/hda/Makefile +++ b/sound/pci/hda/Makefile @@ -27,11 +27,6 @@ snd-hda-codec-conexant-objs := patch_conexant.o snd-hda-codec-via-objs := patch_via.o snd-hda-codec-hdmi-objs := patch_hdmi.o hda_eld.o -# side codecs -snd-hda-scodec-cs35l41-objs := cs35l41_hda.o -snd-hda-scodec-cs35l41-i2c-objs := cs35l41_hda_i2c.o -snd-hda-scodec-cs35l41-spi-objs := cs35l41_hda_spi.o - # common driver obj-$(CONFIG_SND_HDA) := snd-hda-codec.o @@ -50,11 +45,6 @@ obj-$(CONFIG_SND_HDA_CODEC_CONEXANT) += snd-hda-codec-conexant.o obj-$(CONFIG_SND_HDA_CODEC_VIA) += snd-hda-codec-via.o obj-$(CONFIG_SND_HDA_CODEC_HDMI) += snd-hda-codec-hdmi.o -# side codecs -obj-$(CONFIG_SND_HDA_SCODEC_CS35L41) += snd-hda-scodec-cs35l41.o -obj-$(CONFIG_SND_HDA_SCODEC_CS35L41_I2C) += snd-hda-scodec-cs35l41-i2c.o -obj-$(CONFIG_SND_HDA_SCODEC_CS35L41_SPI) += snd-hda-scodec-cs35l41-spi.o - # this must be the last entry after codec drivers; # otherwise the codec patches won't be hooked before the PCI probe # when built in kernel diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index cd1db943b7..500d0d474d 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -92,10 +92,14 @@ static int compare_input_type(const void *ap, const void *bp) */ static void reorder_outputs(unsigned int nums, hda_nid_t *pins) { + hda_nid_t nid; + switch (nums) { case 3: case 4: - swap(pins[1], pins[2]); + nid = pins[1]; + pins[1] = pins[2]; + pins[2] = nid; break; } } diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index c572fb5886..7153bd53e1 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -14,7 +14,6 @@ #include #include #include "hda_local.h" -#include "hda_jack.h" /* * find a matching codec id @@ -159,7 +158,6 @@ static int hda_codec_driver_remove(struct device *dev) refcount_dec(&codec->pcm_ref); snd_hda_codec_disconnect_pcms(codec); - snd_hda_jack_tbl_disconnect(codec); wait_event(codec->remove_sleep, !refcount_read(&codec->pcm_ref)); snd_power_sync_ref(codec->bus->card); diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index 34eba40cc6..362ddcaea1 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -183,7 +183,7 @@ struct hda_gen_spec { struct automic_entry am_entry[MAX_AUTO_MIC_PINS]; /* for pin sensing */ - /* current status; set in hda_generic.c */ + /* current status; set in hda_geneic.c */ unsigned int hp_jack_present:1; unsigned int line_jack_present:1; unsigned int speaker_muted:1; /* current status of speaker mute */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 572ff0d1fa..9e36f99260 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1350,12 +1350,8 @@ static void azx_free(struct azx *chip) if (hda->freed) return; - if (azx_has_pm_runtime(chip) && chip->running) { + if (azx_has_pm_runtime(chip) && chip->running) pm_runtime_get_noresume(&pci->dev); - pm_runtime_forbid(&pci->dev); - pm_runtime_dont_use_autosuspend(&pci->dev); - } - chip->running = 0; azx_del_card_list(chip); @@ -1941,7 +1937,6 @@ static int azx_first_init(struct azx *chip) dma_bits = 32; if (dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(dma_bits))) dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32)); - dma_set_max_seg_size(&pci->dev, UINT_MAX); /* read number of streams from GCAP register instead of using * hardcoded value @@ -2374,7 +2369,6 @@ static void azx_remove(struct pci_dev *pci) cancel_delayed_work_sync(&hda->probe_work); device_lock(&pci->dev); - pci_set_drvdata(pci, NULL); snd_card_free(card); } } @@ -2495,14 +2489,9 @@ static const struct pci_device_id azx_ids[] = { /* Alderlake-P */ { PCI_DEVICE(0x8086, 0x51c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, - { PCI_DEVICE(0x8086, 0x51cd), - .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Alderlake-M */ { PCI_DEVICE(0x8086, 0x51cc), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, - /* Alderlake-N */ - { PCI_DEVICE(0x8086, 0x54c8), - .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Elkhart Lake */ { PCI_DEVICE(0x8086, 0x4b55), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, diff --git a/sound/pci/hda/hda_jack.c b/sound/pci/hda/hda_jack.c index 7d7786df60..f29975e3e9 100644 --- a/sound/pci/hda/hda_jack.c +++ b/sound/pci/hda/hda_jack.c @@ -158,17 +158,6 @@ snd_hda_jack_tbl_new(struct hda_codec *codec, hda_nid_t nid, int dev_id) return jack; } -void snd_hda_jack_tbl_disconnect(struct hda_codec *codec) -{ - struct hda_jack_tbl *jack = codec->jacktbl.list; - int i; - - for (i = 0; i < codec->jacktbl.used; i++, jack++) { - if (!codec->bus->shutdown && jack->jack) - snd_device_disconnect(codec->card, jack->jack); - } -} - void snd_hda_jack_tbl_clear(struct hda_codec *codec) { struct hda_jack_tbl *jack = codec->jacktbl.list; diff --git a/sound/pci/hda/hda_jack.h b/sound/pci/hda/hda_jack.h index ff7d289c03..2abf7aac24 100644 --- a/sound/pci/hda/hda_jack.h +++ b/sound/pci/hda/hda_jack.h @@ -69,7 +69,6 @@ struct hda_jack_tbl * snd_hda_jack_tbl_get_from_tag(struct hda_codec *codec, unsigned char tag, int dev_id); -void snd_hda_jack_tbl_disconnect(struct hda_codec *codec); void snd_hda_jack_tbl_clear(struct hda_codec *codec); void snd_hda_jack_set_dirty_all(struct hda_codec *codec); diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c index 2d1fa70632..df0b4522ba 100644 --- a/sound/pci/hda/patch_cs8409-tables.c +++ b/sound/pci/hda/patch_cs8409-tables.c @@ -490,8 +490,6 @@ const struct snd_pci_quirk cs8409_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0ADC, "Warlock", CS8409_WARLOCK), SND_PCI_QUIRK(0x1028, 0x0AF4, "Warlock", CS8409_WARLOCK), SND_PCI_QUIRK(0x1028, 0x0AF5, "Warlock", CS8409_WARLOCK), - SND_PCI_QUIRK(0x1028, 0x0BB5, "Warlock N3 15 TGL-U Nuvoton EC", CS8409_WARLOCK), - SND_PCI_QUIRK(0x1028, 0x0BB6, "Warlock V3 15 TGL-U Nuvoton EC", CS8409_WARLOCK), SND_PCI_QUIRK(0x1028, 0x0A77, "Cyborg", CS8409_CYBORG), SND_PCI_QUIRK(0x1028, 0x0A78, "Cyborg", CS8409_CYBORG), SND_PCI_QUIRK(0x1028, 0x0A79, "Cyborg", CS8409_CYBORG), diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 92df4f243e..ffcde7409d 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1535,7 +1535,7 @@ static void update_eld(struct hda_codec *codec, } } - if (!eld->eld_valid || eld->eld_size <= 0 || eld->info.sad_count <= 0) { + if (!eld->eld_valid || eld->eld_size <= 0) { eld->eld_valid = false; eld->eld_size = 0; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3a42457984..83b56c1ba3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -25,7 +25,6 @@ #include "hda_auto_parser.h" #include "hda_jack.h" #include "hda_generic.h" -#include "hda_component.h" /* keep halting ALC5505 DSP, for power saving */ #define HALT_REALTEK_ALC5505 @@ -128,10 +127,6 @@ struct alc_spec { unsigned int coef0; struct input_dev *kb_dev; u8 alc_mute_keycode_map[1]; - - /* component binding */ - struct component_match *match; - struct hda_component comps[HDA_MAX_COMPONENTS]; }; /* @@ -6581,133 +6576,6 @@ static void alc287_fixup_legion_15imhg05_speakers(struct hda_codec *codec, } } -static int comp_match_dev_name(struct device *dev, void *data) -{ - return strcmp(dev_name(dev), data) == 0; -} - -static int find_comp_by_dev_name(struct alc_spec *spec, const char *name) -{ - int i; - - for (i = 0; i < HDA_MAX_COMPONENTS; i++) { - if (strcmp(spec->comps[i].name, name) == 0) - return i; - } - - return -ENODEV; -} - -static int comp_bind(struct device *dev) -{ - struct hda_codec *cdc = dev_to_hda_codec(dev); - struct alc_spec *spec = cdc->spec; - - return component_bind_all(dev, spec->comps); -} - -static void comp_unbind(struct device *dev) -{ - struct hda_codec *cdc = dev_to_hda_codec(dev); - struct alc_spec *spec = cdc->spec; - - component_unbind_all(dev, spec->comps); -} - -static const struct component_master_ops comp_master_ops = { - .bind = comp_bind, - .unbind = comp_unbind, -}; - -static void comp_generic_playback_hook(struct hda_pcm_stream *hinfo, struct hda_codec *cdc, - struct snd_pcm_substream *sub, int action) -{ - struct alc_spec *spec = cdc->spec; - int i; - - for (i = 0; i < HDA_MAX_COMPONENTS; i++) { - if (spec->comps[i].dev) - spec->comps[i].playback_hook(spec->comps[i].dev, action); - } -} - -static void cs35l41_generic_fixup(struct hda_codec *cdc, int action, const char *bus, - const char *hid, int count) -{ - struct device *dev = hda_codec_dev(cdc); - struct alc_spec *spec = cdc->spec; - char *name; - int ret, i; - - switch (action) { - case HDA_FIXUP_ACT_PRE_PROBE: - for (i = 0; i < count; i++) { - name = devm_kasprintf(dev, GFP_KERNEL, - "%s-%s:00-cs35l41-hda.%d", bus, hid, i); - if (!name) - return; - component_match_add(dev, &spec->match, comp_match_dev_name, name); - } - ret = component_master_add_with_match(dev, &comp_master_ops, spec->match); - if (ret) - codec_err(cdc, "Fail to register component aggregator %d\n", ret); - else - spec->gen.pcm_playback_hook = comp_generic_playback_hook; - break; - } -} - -static void cs35l41_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action) -{ - cs35l41_generic_fixup(cdc, action, "i2c", "CSC3551", 2); -} - -static void alc287_legion_16achg6_playback_hook(struct hda_pcm_stream *hinfo, struct hda_codec *cdc, - struct snd_pcm_substream *sub, int action) -{ - struct alc_spec *spec = cdc->spec; - unsigned int rx_slot; - int i; - - switch (action) { - case HDA_GEN_PCM_ACT_PREPARE: - rx_slot = 0; - i = find_comp_by_dev_name(spec, "i2c-CLSA0100:00-cs35l41-hda.0"); - if (i >= 0) - spec->comps[i].set_channel_map(spec->comps[i].dev, 0, NULL, 1, &rx_slot); - - rx_slot = 1; - i = find_comp_by_dev_name(spec, "i2c-CLSA0100:00-cs35l41-hda.1"); - if (i >= 0) - spec->comps[i].set_channel_map(spec->comps[i].dev, 0, NULL, 1, &rx_slot); - break; - } - - comp_generic_playback_hook(hinfo, cdc, sub, action); -} - -static void alc287_fixup_legion_16achg6_speakers(struct hda_codec *cdc, const struct hda_fixup *fix, - int action) -{ - struct device *dev = hda_codec_dev(cdc); - struct alc_spec *spec = cdc->spec; - int ret; - - switch (action) { - case HDA_FIXUP_ACT_PRE_PROBE: - component_match_add(dev, &spec->match, comp_match_dev_name, - "i2c-CLSA0100:00-cs35l41-hda.0"); - component_match_add(dev, &spec->match, comp_match_dev_name, - "i2c-CLSA0100:00-cs35l41-hda.1"); - ret = component_master_add_with_match(dev, &comp_master_ops, spec->match); - if (ret) - codec_err(cdc, "Fail to register component aggregator %d\n", ret); - else - spec->gen.pcm_playback_hook = alc287_legion_16achg6_playback_hook; - break; - } -} - /* for alc295_fixup_hp_top_speakers */ #include "hp_x360_helper.c" @@ -6997,9 +6865,6 @@ enum { ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME, ALC285_FIXUP_LEGION_Y9000X_SPEAKERS, ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE, - ALC287_FIXUP_LEGION_16ACHG6, - ALC287_FIXUP_CS35L41_I2C_2, - ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED, }; static const struct hda_fixup alc269_fixups[] = { @@ -8742,24 +8607,6 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, - [ALC287_FIXUP_LEGION_16ACHG6] = { - .type = HDA_FIXUP_FUNC, - .v.func = alc287_fixup_legion_16achg6_speakers, - }, - [ALC287_FIXUP_CS35L41_I2C_2] = { - .type = HDA_FIXUP_FUNC, - .v.func = cs35l41_fixup_i2c_two, - }, - [ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED] = { - .type = HDA_FIXUP_VERBS, - .v.verbs = (const struct hda_verb[]) { - { 0x20, AC_VERB_SET_COEF_INDEX, 0x19 }, - { 0x20, AC_VERB_SET_PROC_COEF, 0x8e11 }, - { } - }, - .chained = true, - .chain_id = ALC285_FIXUP_HP_MUTE_LED, - }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8973,7 +8820,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED), SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED), @@ -9159,9 +9005,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22c2, "Thinkpad X1 Extreme Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), - SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), @@ -9181,7 +9024,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x383d, "Legion Y9000X 2019", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP), - SND_PCI_QUIRK(0x17aa, 0x3847, "Legion 7 16ACHG6", ALC287_FIXUP_LEGION_16ACHG6), SND_PCI_QUIRK(0x17aa, 0x384a, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3852, "Lenovo Yoga 7 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), diff --git a/sound/pci/mixart/mixart_core.c b/sound/pci/mixart/mixart_core.c index a047ed0f84..fb8895af03 100644 --- a/sound/pci/mixart/mixart_core.c +++ b/sound/pci/mixart/mixart_core.c @@ -23,6 +23,8 @@ #define MSG_DESCRIPTOR_SIZE 0x24 #define MSG_HEADER_SIZE (MSG_DESCRIPTOR_SIZE + 4) +#define MSG_DEFAULT_SIZE 512 + #define MSG_TYPE_MASK 0x00000003 /* mask for following types */ #define MSG_TYPE_NOTIFY 0 /* embedded -> driver (only notification, do not get_msg() !) */ #define MSG_TYPE_COMMAND 1 /* driver <-> embedded (a command has no answer) */ @@ -442,9 +444,6 @@ irqreturn_t snd_mixart_threaded_irq(int irq, void *dev_id) struct mixart_timer_notify *notify; notify = (struct mixart_timer_notify *)mixart_msg_data; - BUILD_BUG_ON(sizeof(notify) > sizeof(mixart_msg_data)); - if (snd_BUG_ON(notify->stream_count > ARRAY_SIZE(notify->streams))) - break; for(i=0; istream_count; i++) { u32 buffer_id = notify->streams[i].buffer_id; diff --git a/sound/pci/mixart/mixart_core.h b/sound/pci/mixart/mixart_core.h index 2f0e29ed5d..fbf4731a27 100644 --- a/sound/pci/mixart/mixart_core.h +++ b/sound/pci/mixart/mixart_core.h @@ -49,7 +49,6 @@ enum mixart_message_id { MSG_CLOCK_SET_PROPERTIES = 0x200002, }; -#define MSG_DEFAULT_SIZE 512 struct mixart_msg { @@ -252,17 +251,10 @@ struct mixart_sample_pos u32 sample_pos_low_part; } __attribute__((packed)); -/* - * This structure is limited by the size of MSG_DEFAULT_SIZE. Instead of - * having MIXART_MAX_STREAM_PER_CARD * MIXART_MAX_CARDS many streams, - * this is capped to have a total size below MSG_DEFAULT_SIZE. - */ -#define MIXART_MAX_TIMER_NOTIFY_STREAMS \ - ((MSG_DEFAULT_SIZE - sizeof(u32)) / sizeof(struct mixart_sample_pos)) struct mixart_timer_notify { u32 stream_count; - struct mixart_sample_pos streams[MIXART_MAX_TIMER_NOTIFY_STREAMS]; + struct mixart_sample_pos streams[MIXART_MAX_STREAM_PER_CARD * MIXART_MAX_CARDS]; } __attribute__((packed)); diff --git a/sound/ppc/beep.c b/sound/ppc/beep.c index bf289783ea..0f4bce1c0d 100644 --- a/sound/ppc/beep.c +++ b/sound/ppc/beep.c @@ -99,7 +99,7 @@ static int snd_pmac_beep_event(struct input_dev *dev, unsigned int type, return -1; switch (code) { - case SND_BELL: if (hz) hz = 1000; break; + case SND_BELL: if (hz) hz = 1000; case SND_TONE: break; default: return -1; } diff --git a/sound/soc/amd/Kconfig b/sound/soc/amd/Kconfig index 7a9e45094f..49ff5e73e9 100644 --- a/sound/soc/amd/Kconfig +++ b/sound/soc/amd/Kconfig @@ -6,7 +6,6 @@ config SND_SOC_AMD_ACP config SND_SOC_AMD_CZ_DA7219MX98357_MACH tristate "AMD CZ support for DA7219, RT5682 and MAX9835" - select CLK_FIXED_FCH select SND_SOC_DA7219 select SND_SOC_RT5682_I2C select SND_SOC_MAX98357A @@ -31,14 +30,13 @@ config SND_SOC_AMD_ACP3x config SND_SOC_AMD_RV_RT5682_MACH tristate "AMD RV support for RT5682" - select CLK_FIXED_FCH select SND_SOC_RT5682_I2C select SND_SOC_MAX98357A select SND_SOC_CROS_EC_CODEC select I2C_CROS_EC_TUNNEL select SND_SOC_RT1015 select SND_SOC_RT1015P - depends on SND_SOC_AMD_ACP3x && I2C && CROS_EC && GPIOLIB + depends on SND_SOC_AMD_ACP3x && I2C && CROS_EC help This option enables machine driver for RT5682 and MAX9835. @@ -51,7 +49,7 @@ config SND_SOC_AMD_RENOIR config SND_SOC_AMD_RENOIR_MACH tristate "AMD Renoir support for DMIC" select SND_SOC_DMIC - depends on SND_SOC_AMD_RENOIR && GPIOLIB + depends on SND_SOC_AMD_RENOIR help This option enables machine driver for DMIC @@ -63,44 +61,3 @@ config SND_SOC_AMD_ACP5x By enabling this flag build will trigger for ACP PCI driver, ACP DMA driver, CPU DAI driver. - -config SND_SOC_AMD_VANGOGH_MACH - tristate "AMD Vangogh support for NAU8821 CS35L41" - select SND_SOC_NAU8821 - select SND_SOC_CS35L41_SPI - depends on SND_SOC_AMD_ACP5x && I2C && SPI_MASTER - help - This option enables machine driver for Vangogh platform - using NAU8821 and CS35L41 codecs. - Say m if you have such a device. - If unsure select "N". - -config SND_SOC_AMD_ACP6x - tristate "AMD Audio Coprocessor-v6.x Yellow Carp support" - depends on X86 && PCI - help - This option enables Audio Coprocessor i.e ACP v6.x support on - AMD Yellow Carp platform. By enabling this flag build will be - triggered for ACP PCI driver, ACP PDM DMA driver. - Say m if you have such a device. - If unsure select "N". - -config SND_SOC_AMD_YC_MACH - tristate "AMD YC support for DMIC" - select SND_SOC_DMIC - depends on SND_SOC_AMD_ACP6x - help - This option enables machine driver for Yellow Carp platform - using dmic. ACP IP has PDM Decoder block with DMA controller. - DMIC can be connected directly to ACP IP. - Say m if you have such a device. - If unsure select "N". - -config SND_AMD_ACP_CONFIG - tristate "AMD ACP configuration selection" - select SND_SOC_ACPI if ACPI - help - This option adds an auto detection to determine which ACP - driver modules to use - -source "sound/soc/amd/acp/Kconfig" diff --git a/sound/soc/amd/Makefile b/sound/soc/amd/Makefile index 4b1f77930a..07150d26f3 100644 --- a/sound/soc/amd/Makefile +++ b/sound/soc/amd/Makefile @@ -3,7 +3,6 @@ acp_audio_dma-objs := acp-pcm-dma.o snd-soc-acp-da7219mx98357-mach-objs := acp-da7219-max98357a.o snd-soc-acp-rt5645-mach-objs := acp-rt5645.o snd-soc-acp-rt5682-mach-objs := acp3x-rt5682-max9836.o -snd-acp-config-objs := acp-config.o obj-$(CONFIG_SND_SOC_AMD_ACP) += acp_audio_dma.o obj-$(CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH) += snd-soc-acp-da7219mx98357-mach.o @@ -12,6 +11,3 @@ obj-$(CONFIG_SND_SOC_AMD_ACP3x) += raven/ obj-$(CONFIG_SND_SOC_AMD_RV_RT5682_MACH) += snd-soc-acp-rt5682-mach.o obj-$(CONFIG_SND_SOC_AMD_RENOIR) += renoir/ obj-$(CONFIG_SND_SOC_AMD_ACP5x) += vangogh/ -obj-$(CONFIG_SND_SOC_AMD_ACP6x) += yc/ -obj-$(CONFIG_SND_SOC_AMD_ACP_COMMON) += acp/ -obj-$(CONFIG_SND_AMD_ACP_CONFIG) += snd-acp-config.o diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c index 3bf86c2424..b3df98a9f9 100644 --- a/sound/soc/amd/acp-da7219-max98357a.c +++ b/sound/soc/amd/acp-da7219-max98357a.c @@ -33,7 +33,7 @@ static struct clk *da7219_dai_wclk; static struct clk *da7219_dai_bclk; static struct clk *rt5682_dai_wclk; static struct clk *rt5682_dai_bclk; - +extern bool bt_uart_enable; void *acp_soc_is_rltk_max(struct device *dev); static int cz_da7219_init(struct snd_soc_pcm_runtime *rtd) @@ -522,7 +522,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .name = "amd-da7219-play", .stream_name = "Playback", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .init = cz_da7219_init, .dpcm_playback = 1, .stop_dma_first = 1, @@ -533,7 +533,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .name = "amd-da7219-cap", .stream_name = "Capture", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, .stop_dma_first = 1, .ops = &cz_da7219_cap_ops, @@ -543,7 +543,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .name = "amd-max98357-play", .stream_name = "HiFi Playback", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .dpcm_playback = 1, .stop_dma_first = 1, .ops = &cz_max_play_ops, @@ -554,7 +554,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .name = "dmic0", .stream_name = "DMIC0 Capture", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, .stop_dma_first = 1, .ops = &cz_dmic0_cap_ops, @@ -565,7 +565,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .name = "dmic1", .stream_name = "DMIC1 Capture", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, .stop_dma_first = 1, .ops = &cz_dmic1_cap_ops, @@ -578,7 +578,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .name = "amd-rt5682-play", .stream_name = "Playback", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .init = cz_rt5682_init, .dpcm_playback = 1, .stop_dma_first = 1, @@ -589,7 +589,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .name = "amd-rt5682-cap", .stream_name = "Capture", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, .stop_dma_first = 1, .ops = &cz_rt5682_cap_ops, @@ -599,7 +599,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .name = "amd-max98357-play", .stream_name = "HiFi Playback", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .dpcm_playback = 1, .stop_dma_first = 1, .ops = &cz_rt5682_max_play_ops, @@ -610,7 +610,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .name = "dmic0", .stream_name = "DMIC0 Capture", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, .stop_dma_first = 1, .ops = &cz_rt5682_dmic0_cap_ops, @@ -621,7 +621,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .name = "dmic1", .stream_name = "DMIC1 Capture", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, .stop_dma_first = 1, .ops = &cz_rt5682_dmic1_cap_ops, @@ -760,8 +760,8 @@ static int cz_probe(struct platform_device *pdev) "devm_snd_soc_register_card(%s) failed\n", card->name); } - acp_bt_uart_enable = !device_property_read_bool(&pdev->dev, - "bt-pad-enable"); + bt_uart_enable = !device_property_read_bool(&pdev->dev, + "bt-pad-enable"); return 0; } diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index 8fa2e2fde4..11b3c4f39e 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -36,8 +36,8 @@ #define ST_MIN_BUFFER ST_MAX_BUFFER #define DRV_NAME "acp_audio_dma" -bool acp_bt_uart_enable = true; -EXPORT_SYMBOL(acp_bt_uart_enable); +bool bt_uart_enable = true; +EXPORT_SYMBOL(bt_uart_enable); static const struct snd_pcm_hardware acp_pcm_hardware_playback = { .info = SNDRV_PCM_INFO_INTERLEAVED | @@ -596,7 +596,7 @@ static int acp_init(void __iomem *acp_mmio, u32 asic_type) acp_reg_write(val, acp_mmio, mmACP_SOFT_RESET); /* For BT instance change pins from UART to BT */ - if (!acp_bt_uart_enable) { + if (!bt_uart_enable) { val = acp_reg_read(acp_mmio, mmACP_BT_UART_PAD_SEL); val |= ACP_BT_UART_PAD_SELECT_MASK; acp_reg_write(val, acp_mmio, mmACP_BT_UART_PAD_SEL); @@ -1003,7 +1003,6 @@ static snd_pcm_uframes_t acp_dma_pointer(struct snd_soc_component *component, struct snd_pcm_runtime *runtime = substream->runtime; struct audio_substream_data *rtd = runtime->private_data; - struct audio_drv_data *adata = dev_get_drvdata(component->dev); if (!rtd) return -EINVAL; @@ -1024,7 +1023,7 @@ static snd_pcm_uframes_t acp_dma_pointer(struct snd_soc_component *component, } if (bytescount > 0) { delay = do_div(bytescount, period_bytes); - adata->delay += bytes_to_frames(runtime, delay); + runtime->delay = bytes_to_frames(runtime, delay); } } else { buffersize = frames_to_bytes(runtime, runtime->buffer_size); @@ -1036,17 +1035,6 @@ static snd_pcm_uframes_t acp_dma_pointer(struct snd_soc_component *component, return bytes_to_frames(runtime, pos); } -static snd_pcm_sframes_t acp_dma_delay(struct snd_soc_component *component, - struct snd_pcm_substream *substream) -{ - struct audio_drv_data *adata = dev_get_drvdata(component->dev); - snd_pcm_sframes_t delay = adata->delay; - - adata->delay = 0; - - return delay; -} - static int acp_dma_prepare(struct snd_soc_component *component, struct snd_pcm_substream *substream) { @@ -1210,7 +1198,6 @@ static const struct snd_soc_component_driver acp_asoc_platform = { .hw_params = acp_dma_hw_params, .trigger = acp_dma_trigger, .pointer = acp_dma_pointer, - .delay = acp_dma_delay, .prepare = acp_dma_prepare, .pcm_construct = acp_dma_new, }; diff --git a/sound/soc/amd/acp-rt5645.c b/sound/soc/amd/acp-rt5645.c index a79a46646d..d6ba94677a 100644 --- a/sound/soc/amd/acp-rt5645.c +++ b/sound/soc/amd/acp-rt5645.c @@ -91,7 +91,7 @@ static int cz_init(struct snd_soc_pcm_runtime *rtd) return 0; } -static const struct snd_soc_ops cz_aif1_ops = { +static struct snd_soc_ops cz_aif1_ops = { .hw_params = cz_aif1_hw_params, }; @@ -111,7 +111,7 @@ static struct snd_soc_dai_link cz_dai_rt5650[] = { .name = "amd-rt5645-play", .stream_name = "RT5645_AIF1", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .init = cz_init, .ops = &cz_aif1_ops, SND_SOC_DAILINK_REG(designware1, codec, platform), @@ -120,7 +120,7 @@ static struct snd_soc_dai_link cz_dai_rt5650[] = { .name = "amd-rt5645-cap", .stream_name = "RT5645_AIF1", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFP, + | SND_SOC_DAIFMT_CBM_CFM, .ops = &cz_aif1_ops, SND_SOC_DAILINK_REG(designware2, codec, platform), }, diff --git a/sound/soc/amd/acp.h b/sound/soc/amd/acp.h index db80a73aa5..e5ab6c6040 100644 --- a/sound/soc/amd/acp.h +++ b/sound/soc/amd/acp.h @@ -151,7 +151,6 @@ struct audio_drv_data { struct snd_pcm_substream *capture_i2sbt_stream; void __iomem *acp_mmio; u32 asic_type; - snd_pcm_sframes_t delay; }; /* @@ -205,6 +204,4 @@ typedef struct acp_dma_dscr_transfer { u32 reserved; } acp_dma_dscr_transfer_t; -extern bool acp_bt_uart_enable; - #endif /*__ACP_HW_H */ diff --git a/sound/soc/amd/vangogh/Makefile b/sound/soc/amd/vangogh/Makefile index c9e53e04e2..3353f93dc6 100644 --- a/sound/soc/amd/vangogh/Makefile +++ b/sound/soc/amd/vangogh/Makefile @@ -3,9 +3,7 @@ snd-pci-acp5x-objs := pci-acp5x.o snd-acp5x-i2s-objs := acp5x-i2s.o snd-acp5x-pcm-dma-objs := acp5x-pcm-dma.o -snd-soc-acp5x-mach-objs := acp5x-mach.o obj-$(CONFIG_SND_SOC_AMD_ACP5x) += snd-pci-acp5x.o obj-$(CONFIG_SND_SOC_AMD_ACP5x) += snd-acp5x-i2s.o obj-$(CONFIG_SND_SOC_AMD_ACP5x) += snd-acp5x-pcm-dma.o -obj-$(CONFIG_SND_SOC_AMD_VANGOGH_MACH) += snd-soc-acp5x-mach.o diff --git a/sound/soc/amd/vangogh/acp5x-i2s.c b/sound/soc/amd/vangogh/acp5x-i2s.c index 002db3971c..2705e57eb7 100644 --- a/sound/soc/amd/vangogh/acp5x-i2s.c +++ b/sound/soc/amd/vangogh/acp5x-i2s.c @@ -348,7 +348,7 @@ static int acp5x_i2s_trigger(struct snd_pcm_substream *substream, return ret; } -static const struct snd_soc_dai_ops acp5x_i2s_dai_ops = { +static struct snd_soc_dai_ops acp5x_i2s_dai_ops = { .hw_params = acp5x_i2s_hwparams, .trigger = acp5x_i2s_trigger, .set_fmt = acp5x_i2s_set_fmt, diff --git a/sound/soc/amd/vangogh/pci-acp5x.c b/sound/soc/amd/vangogh/pci-acp5x.c index 2b6b9edc36..a57b762d9f 100644 --- a/sound/soc/amd/vangogh/pci-acp5x.c +++ b/sound/soc/amd/vangogh/pci-acp5x.c @@ -213,9 +213,6 @@ static int snd_acp5x_probe(struct pci_dev *pci, pdevinfo[2].num_res = 1; pdevinfo[2].res = &adata->res[2]; - pdevinfo[3].name = "acp5x_mach"; - pdevinfo[3].id = 0; - pdevinfo[3].parent = &pci->dev; for (i = 0; i < ACP5x_DEVS; i++) { adata->pdev[i] = platform_device_register_full(&pdevinfo[i]); diff --git a/sound/soc/atmel/atmel-i2s.c b/sound/soc/atmel/atmel-i2s.c index 1934767690..6b3d9c05ea 100644 --- a/sound/soc/atmel/atmel-i2s.c +++ b/sound/soc/atmel/atmel-i2s.c @@ -342,8 +342,8 @@ static int atmel_i2s_hw_params(struct snd_pcm_substream *substream, return -EINVAL; } - switch (dev->fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: + switch (dev->fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: /* codec is slave, so cpu is master */ mr |= ATMEL_I2SC_MR_MODE_MASTER; ret = atmel_i2s_get_gck_param(dev, params_rate(params)); @@ -351,7 +351,7 @@ static int atmel_i2s_hw_params(struct snd_pcm_substream *substream, return ret; break; - case SND_SOC_DAIFMT_CBP_CFP: + case SND_SOC_DAIFMT_CBM_CFM: /* codec is master, so cpu is slave */ mr |= ATMEL_I2SC_MR_MODE_SLAVE; dev->gck_param = NULL; diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index 26e2bc690d..6a63e8797a 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -209,8 +209,8 @@ static int atmel_ssc_hw_rule_rate(struct snd_pcm_hw_params *params, if (frame_size < 0) return frame_size; - switch (ssc_p->daifmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFC: + switch (ssc_p->daifmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFS: if ((ssc_p->dir_mask & SSC_DIR_MASK_CAPTURE) && ssc->clk_from_rk_pin) /* Receiver Frame Synchro (i.e. capture) @@ -220,7 +220,7 @@ static int atmel_ssc_hw_rule_rate(struct snd_pcm_hw_params *params, mck_div = 3; break; - case SND_SOC_DAIFMT_CBP_CFP: + case SND_SOC_DAIFMT_CBM_CFM: if ((ssc_p->dir_mask & SSC_DIR_MASK_PLAYBACK) && !ssc->clk_from_rk_pin) /* Transmit Frame Synchro (i.e. playback) @@ -232,8 +232,8 @@ static int atmel_ssc_hw_rule_rate(struct snd_pcm_hw_params *params, break; } - switch (ssc_p->daifmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: + switch (ssc_p->daifmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: r.num = ssc_p->mck_rate / mck_div / frame_size; ret = snd_interval_ratnum(i, 1, &r, &num, &den); @@ -243,8 +243,8 @@ static int atmel_ssc_hw_rule_rate(struct snd_pcm_hw_params *params, } break; - case SND_SOC_DAIFMT_CBP_CFC: - case SND_SOC_DAIFMT_CBP_CFP: + case SND_SOC_DAIFMT_CBM_CFS: + case SND_SOC_DAIFMT_CBM_CFM: t.min = 8000; t.max = ssc_p->mck_rate / mck_div / frame_size; t.openmin = t.openmax = 0; @@ -429,9 +429,9 @@ static int atmel_ssc_set_dai_clkdiv(struct snd_soc_dai *cpu_dai, /* Is the cpu-dai master of the frame clock? */ static int atmel_ssc_cfs(struct atmel_ssc_info *ssc_p) { - switch (ssc_p->daifmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFC: - case SND_SOC_DAIFMT_CBC_CFC: + switch (ssc_p->daifmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFS: + case SND_SOC_DAIFMT_CBS_CFS: return 1; } return 0; @@ -440,9 +440,9 @@ static int atmel_ssc_cfs(struct atmel_ssc_info *ssc_p) /* Is the cpu-dai master of the bit clock? */ static int atmel_ssc_cbs(struct atmel_ssc_info *ssc_p) { - switch (ssc_p->daifmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFP: - case SND_SOC_DAIFMT_CBC_CFC: + switch (ssc_p->daifmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFM: + case SND_SOC_DAIFMT_CBS_CFS: return 1; } return 0; diff --git a/sound/soc/atmel/mchp-i2s-mcc.c b/sound/soc/atmel/mchp-i2s-mcc.c index 6d1227a1d6..8988f024a7 100644 --- a/sound/soc/atmel/mchp-i2s-mcc.c +++ b/sound/soc/atmel/mchp-i2s-mcc.c @@ -350,7 +350,7 @@ static int mchp_i2s_mcc_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; /* We can't generate only FSYNC */ - if ((fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) == SND_SOC_DAIFMT_CBP_CFC) + if ((fmt & SND_SOC_DAIFMT_MASTER_MASK) == SND_SOC_DAIFMT_CBM_CFS) return -EINVAL; /* We can only reconfigure the IP when it's stopped */ @@ -546,20 +546,20 @@ static int mchp_i2s_mcc_hw_params(struct snd_pcm_substream *substream, return -EINVAL; } - switch (dev->fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: + switch (dev->fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: /* cpu is BCLK and LRC master */ mra |= MCHP_I2SMCC_MRA_MODE_MASTER; if (dev->sysclk) mra |= MCHP_I2SMCC_MRA_IMCKMODE_GEN; set_divs = 1; break; - case SND_SOC_DAIFMT_CBC_CFP: + case SND_SOC_DAIFMT_CBS_CFM: /* cpu is BCLK master */ mrb |= MCHP_I2SMCC_MRB_CLKSEL_INT; set_divs = 1; fallthrough; - case SND_SOC_DAIFMT_CBP_CFP: + case SND_SOC_DAIFMT_CBM_CFM: /* cpu is slave */ mra |= MCHP_I2SMCC_MRA_MODE_SLAVE; if (dev->sysclk) diff --git a/sound/soc/atmel/mikroe-proto.c b/sound/soc/atmel/mikroe-proto.c index 627564c18c..0be7b4221c 100644 --- a/sound/soc/atmel/mikroe-proto.c +++ b/sound/soc/atmel/mikroe-proto.c @@ -129,9 +129,9 @@ static int snd_proto_probe(struct platform_device *pdev) } if (bitclkmaster) { if (codec_np == bitclkmaster) - dai_fmt |= SND_SOC_DAIFMT_CBP_CFP; + dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; else - dai_fmt |= SND_SOC_DAIFMT_CBC_CFC; + dai_fmt |= SND_SOC_DAIFMT_CBS_CFS; } else { dai_fmt |= snd_soc_daifmt_parse_clock_provider_as_flag(np, NULL); } @@ -144,9 +144,9 @@ static int snd_proto_probe(struct platform_device *pdev) of_node_put(cpu_np); ret = snd_soc_register_card(&snd_proto); - if (ret) - dev_err_probe(&pdev->dev, ret, - "snd_soc_register_card() failed\n"); + if (ret && ret != -EPROBE_DEFER) + dev_err(&pdev->dev, + "snd_soc_register_card() failed: %d\n", ret); return ret; } diff --git a/sound/soc/atmel/tse850-pcm5142.c b/sound/soc/atmel/tse850-pcm5142.c index ef537de771..50c3dc6936 100644 --- a/sound/soc/atmel/tse850-pcm5142.c +++ b/sound/soc/atmel/tse850-pcm5142.c @@ -304,7 +304,7 @@ static struct snd_soc_dai_link tse850_dailink = { .stream_name = "TSE-850-PCM", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBP_CFC, + | SND_SOC_DAIFMT_CBM_CFS, SND_SOC_DAILINK_REG(pcm), }; @@ -371,27 +371,35 @@ static int tse850_probe(struct platform_device *pdev) } tse850->add = devm_gpiod_get(dev, "axentia,add", GPIOD_OUT_HIGH); - if (IS_ERR(tse850->add)) - return dev_err_probe(dev, PTR_ERR(tse850->add), - "failed to get 'add' gpio\n"); + if (IS_ERR(tse850->add)) { + if (PTR_ERR(tse850->add) != -EPROBE_DEFER) + dev_err(dev, "failed to get 'add' gpio\n"); + return PTR_ERR(tse850->add); + } tse850->add_cache = 1; tse850->loop1 = devm_gpiod_get(dev, "axentia,loop1", GPIOD_OUT_HIGH); - if (IS_ERR(tse850->loop1)) - return dev_err_probe(dev, PTR_ERR(tse850->loop1), - "failed to get 'loop1' gpio\n"); + if (IS_ERR(tse850->loop1)) { + if (PTR_ERR(tse850->loop1) != -EPROBE_DEFER) + dev_err(dev, "failed to get 'loop1' gpio\n"); + return PTR_ERR(tse850->loop1); + } tse850->loop1_cache = 1; tse850->loop2 = devm_gpiod_get(dev, "axentia,loop2", GPIOD_OUT_HIGH); - if (IS_ERR(tse850->loop2)) - return dev_err_probe(dev, PTR_ERR(tse850->loop2), - "failed to get 'loop2' gpio\n"); + if (IS_ERR(tse850->loop2)) { + if (PTR_ERR(tse850->loop2) != -EPROBE_DEFER) + dev_err(dev, "failed to get 'loop2' gpio\n"); + return PTR_ERR(tse850->loop2); + } tse850->loop2_cache = 1; tse850->ana = devm_regulator_get(dev, "axentia,ana"); - if (IS_ERR(tse850->ana)) - return dev_err_probe(dev, PTR_ERR(tse850->ana), - "failed to get 'ana' regulator\n"); + if (IS_ERR(tse850->ana)) { + if (PTR_ERR(tse850->ana) != -EPROBE_DEFER) + dev_err(dev, "failed to get 'ana' regulator\n"); + return PTR_ERR(tse850->ana); + } ret = regulator_enable(tse850->ana); if (ret < 0) { diff --git a/sound/soc/au1x/i2sc.c b/sound/soc/au1x/i2sc.c index 740d4e052e..65bd39f503 100644 --- a/sound/soc/au1x/i2sc.c +++ b/sound/soc/au1x/i2sc.c @@ -119,9 +119,9 @@ static int au1xi2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) goto out; } - /* I2S controller only supports provider */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: /* CODEC consumer */ + /* I2S controller only supports master */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: /* CODEC slave */ break; default: goto out; diff --git a/sound/soc/au1x/psc-i2s.c b/sound/soc/au1x/psc-i2s.c index b2b8896bb5..767ce950d0 100644 --- a/sound/soc/au1x/psc-i2s.c +++ b/sound/soc/au1x/psc-i2s.c @@ -90,12 +90,12 @@ static int au1xpsc_i2s_set_fmt(struct snd_soc_dai *cpu_dai, goto out; } - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: /* CODEC provider */ - ct |= PSC_I2SCFG_MS; /* PSC I2S consumer mode */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: /* CODEC master */ + ct |= PSC_I2SCFG_MS; /* PSC I2S slave mode */ break; - case SND_SOC_DAIFMT_CBC_CFC: /* CODEC consumer */ - ct &= ~PSC_I2SCFG_MS; /* PSC I2S provider mode */ + case SND_SOC_DAIFMT_CBS_CFS: /* CODEC slave */ + ct &= ~PSC_I2SCFG_MS; /* PSC I2S Master mode */ break; default: goto out; diff --git a/sound/soc/bcm/bcm2835-i2s.c b/sound/soc/bcm/bcm2835-i2s.c index e3fc4bee8c..3d668f449b 100644 --- a/sound/soc/bcm/bcm2835-i2s.c +++ b/sound/soc/bcm/bcm2835-i2s.c @@ -127,14 +127,14 @@ struct bcm2835_i2s_dev { static void bcm2835_i2s_start_clock(struct bcm2835_i2s_dev *dev) { - unsigned int provider = dev->fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK; + unsigned int master = dev->fmt & SND_SOC_DAIFMT_MASTER_MASK; if (dev->clk_prepared) return; - switch (provider) { - case SND_SOC_DAIFMT_CBC_CFC: - case SND_SOC_DAIFMT_CBC_CFP: + switch (master) { + case SND_SOC_DAIFMT_CBS_CFS: + case SND_SOC_DAIFMT_CBS_CFM: clk_prepare_enable(dev->clk); dev->clk_prepared = true; break; @@ -337,8 +337,8 @@ static int bcm2835_i2s_hw_params(struct snd_pcm_substream *substream, unsigned int rx_mask, tx_mask; unsigned int rx_ch1_pos, rx_ch2_pos, tx_ch1_pos, tx_ch2_pos; unsigned int mode, format; - bool bit_clock_provider = false; - bool frame_sync_provider = false; + bool bit_clock_master = false; + bool frame_sync_master = false; bool frame_start_falling_edge = false; uint32_t csreg; int ret = 0; @@ -383,36 +383,36 @@ static int bcm2835_i2s_hw_params(struct snd_pcm_substream *substream, if (data_length > slot_width) return -EINVAL; - /* Check if CPU is bit clock provider */ - switch (dev->fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: - case SND_SOC_DAIFMT_CBC_CFP: - bit_clock_provider = true; + /* Check if CPU is bit clock master */ + switch (dev->fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: + case SND_SOC_DAIFMT_CBS_CFM: + bit_clock_master = true; break; - case SND_SOC_DAIFMT_CBP_CFC: - case SND_SOC_DAIFMT_CBP_CFP: - bit_clock_provider = false; + case SND_SOC_DAIFMT_CBM_CFS: + case SND_SOC_DAIFMT_CBM_CFM: + bit_clock_master = false; break; default: return -EINVAL; } - /* Check if CPU is frame sync provider */ - switch (dev->fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: - case SND_SOC_DAIFMT_CBP_CFC: - frame_sync_provider = true; + /* Check if CPU is frame sync master */ + switch (dev->fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: + case SND_SOC_DAIFMT_CBM_CFS: + frame_sync_master = true; break; - case SND_SOC_DAIFMT_CBC_CFP: - case SND_SOC_DAIFMT_CBP_CFP: - frame_sync_provider = false; + case SND_SOC_DAIFMT_CBS_CFM: + case SND_SOC_DAIFMT_CBM_CFM: + frame_sync_master = false; break; default: return -EINVAL; } /* Clock should only be set up here if CPU is clock master */ - if (bit_clock_provider && + if (bit_clock_master && (!dev->clk_prepared || dev->clk_rate != bclk_rate)) { if (dev->clk_prepared) bcm2835_i2s_stop_clock(dev); @@ -501,11 +501,11 @@ static int bcm2835_i2s_hw_params(struct snd_pcm_substream *substream, /* * Transmitting data immediately after frame start, eg * in left-justified or DSP mode A, only works stable - * if bcm2835 is the frame clock provider. + * if bcm2835 is the frame clock master. */ - if ((!rx_ch1_pos || !tx_ch1_pos) && !frame_sync_provider) + if ((!rx_ch1_pos || !tx_ch1_pos) && !frame_sync_master) dev_warn(dev->dev, - "Unstable consumer config detected, L/R may be swapped"); + "Unstable slave config detected, L/R may be swapped"); /* * Set format for both streams. @@ -538,11 +538,11 @@ static int bcm2835_i2s_hw_params(struct snd_pcm_substream *substream, mode |= BCM2835_I2S_FSLEN(framesync_length); /* CLKM selects bcm2835 clock slave mode */ - if (!bit_clock_provider) + if (!bit_clock_master) mode |= BCM2835_I2S_CLKM; /* FSM selects bcm2835 frame sync slave mode */ - if (!frame_sync_provider) + if (!frame_sync_master) mode |= BCM2835_I2S_FSM; /* CLKI selects normal clocking mode, sampling on rising edge */ diff --git a/sound/soc/bcm/bcm63xx-i2s.h b/sound/soc/bcm/bcm63xx-i2s.h index f30556bec8..edc328ba53 100644 --- a/sound/soc/bcm/bcm63xx-i2s.h +++ b/sound/soc/bcm/bcm63xx-i2s.h @@ -74,6 +74,7 @@ struct bcm_i2s_priv { struct device *dev; + struct resource *r_irq; struct regmap *regmap_i2s; struct clk *i2s_clk; struct snd_pcm_substream *play_substream; diff --git a/sound/soc/bcm/bcm63xx-pcm-whistler.c b/sound/soc/bcm/bcm63xx-pcm-whistler.c index 2c600b0175..b5096f64c5 100644 --- a/sound/soc/bcm/bcm63xx-pcm-whistler.c +++ b/sound/soc/bcm/bcm63xx-pcm-whistler.c @@ -6,7 +6,6 @@ #include #include -#include #include #include #include @@ -388,12 +387,14 @@ int bcm63xx_soc_platform_probe(struct platform_device *pdev, { int ret; - ret = platform_get_irq(pdev, 0); - if (ret < 0) - return ret; + i2s_priv->r_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!i2s_priv->r_irq) { + dev_err(&pdev->dev, "Unable to get register irq resource.\n"); + return -ENODEV; + } - ret = devm_request_irq(&pdev->dev, ret, i2s_dma_isr, - irq_get_trigger_type(ret), "i2s_dma", (void *)i2s_priv); + ret = devm_request_irq(&pdev->dev, i2s_priv->r_irq->start, i2s_dma_isr, + i2s_priv->r_irq->flags, "i2s_dma", (void *)i2s_priv); if (ret) { dev_err(&pdev->dev, "i2s_init: failed to request interrupt.ret=%d\n", ret); diff --git a/sound/soc/bcm/cygnus-ssp.c b/sound/soc/bcm/cygnus-ssp.c index 9698f4531c..fca5a3f2ee 100644 --- a/sound/soc/bcm/cygnus-ssp.c +++ b/sound/soc/bcm/cygnus-ssp.c @@ -848,12 +848,12 @@ static int cygnus_ssp_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) ssp_newcfg = 0; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: ssp_newcfg |= BIT(I2S_OUT_CFGX_SLAVE_MODE); aio->is_slave = 1; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: ssp_newcfg &= ~BIT(I2S_OUT_CFGX_SLAVE_MODE); aio->is_slave = 0; break; diff --git a/sound/soc/cirrus/ep93xx-i2s.c b/sound/soc/cirrus/ep93xx-i2s.c index 2c8cd843d0..0d26550d0d 100644 --- a/sound/soc/cirrus/ep93xx-i2s.c +++ b/sound/soc/cirrus/ep93xx-i2s.c @@ -111,9 +111,9 @@ static void ep93xx_i2s_enable(struct ep93xx_i2s_info *info, int stream) if ((ep93xx_i2s_read_reg(info, EP93XX_I2S_TX0EN) & 0x1) == 0 && (ep93xx_i2s_read_reg(info, EP93XX_I2S_RX0EN) & 0x1) == 0) { /* Enable clocks */ - clk_prepare_enable(info->mclk); - clk_prepare_enable(info->sclk); - clk_prepare_enable(info->lrclk); + clk_enable(info->mclk); + clk_enable(info->sclk); + clk_enable(info->lrclk); /* Enable i2s */ ep93xx_i2s_write_reg(info, EP93XX_I2S_GLCTRL, 1); @@ -156,9 +156,9 @@ static void ep93xx_i2s_disable(struct ep93xx_i2s_info *info, int stream) ep93xx_i2s_write_reg(info, EP93XX_I2S_GLCTRL, 0); /* Disable clocks */ - clk_disable_unprepare(info->lrclk); - clk_disable_unprepare(info->sclk); - clk_disable_unprepare(info->mclk); + clk_disable(info->lrclk); + clk_disable(info->sclk); + clk_disable(info->mclk); } } @@ -245,14 +245,14 @@ static int ep93xx_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai, return -EINVAL; } - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: - /* CPU is provider */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: + /* CPU is master */ clk_cfg |= EP93XX_I2S_CLKCFG_MASTER; break; - case SND_SOC_DAIFMT_CBP_CFP: - /* Codec is provider */ + case SND_SOC_DAIFMT_CBM_CFM: + /* Codec is master */ clk_cfg &= ~EP93XX_I2S_CLKCFG_MASTER; break; diff --git a/sound/soc/codecs/88pm860x-codec.c b/sound/soc/codecs/88pm860x-codec.c index c6043fa58c..cac7e557ed 100644 --- a/sound/soc/codecs/88pm860x-codec.c +++ b/sound/soc/codecs/88pm860x-codec.c @@ -968,16 +968,16 @@ static int pm860x_pcm_set_dai_fmt(struct snd_soc_dai *codec_dai, mask |= PCM_INF2_BCLK | PCM_INF2_FS | PCM_INF2_MASTER; - /* set audio interface clocking */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: - case SND_SOC_DAIFMT_CBP_CFC: + /* set master/slave audio interface */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + case SND_SOC_DAIFMT_CBM_CFS: if (pm860x->dir == PM860X_CLK_DIR_OUT) { inf |= PCM_INF2_MASTER; ret = 0; } break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: if (pm860x->dir == PM860X_CLK_DIR_IN) { inf &= ~PCM_INF2_MASTER; ret = 0; @@ -1072,15 +1072,15 @@ static int pm860x_i2s_set_dai_fmt(struct snd_soc_dai *codec_dai, mask |= PCM_INF2_BCLK | PCM_INF2_FS | PCM_INF2_MASTER; - /* set audio interface clocking */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + /* set master/slave audio interface */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: if (pm860x->dir == PM860X_CLK_DIR_OUT) inf |= PCM_INF2_MASTER; else return -EINVAL; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: if (pm860x->dir == PM860X_CLK_DIR_IN) inf &= ~PCM_INF2_MASTER; else diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 500f452f4d..8dedbc825b 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -41,7 +41,6 @@ config SND_SOC_ALL_CODECS imply SND_SOC_ADS117X imply SND_SOC_AK4104 imply SND_SOC_AK4118 - imply SND_SOC_AK4375 imply SND_SOC_AK4458 imply SND_SOC_AK4535 imply SND_SOC_AK4554 @@ -62,8 +61,6 @@ config SND_SOC_ALL_CODECS imply SND_SOC_CS35L34 imply SND_SOC_CS35L35 imply SND_SOC_CS35L36 - imply SND_SOC_CS35L41_SPI - imply SND_SOC_CS35L41_I2C imply SND_SOC_CS42L42 imply SND_SOC_CS42L51_I2C imply SND_SOC_CS42L52 @@ -120,7 +117,6 @@ config SND_SOC_ALL_CODECS imply SND_SOC_MAX98357A imply SND_SOC_MAX98371 imply SND_SOC_MAX98504 - imply SND_SOC_MAX98520 imply SND_SOC_MAX9867 imply SND_SOC_MAX98925 imply SND_SOC_MAX98926 @@ -142,7 +138,6 @@ config SND_SOC_ALL_CODECS imply SND_SOC_NAU8315 imply SND_SOC_NAU8540 imply SND_SOC_NAU8810 - imply SND_SOC_NAU8821 imply SND_SOC_NAU8822 imply SND_SOC_NAU8824 imply SND_SOC_NAU8825 @@ -188,7 +183,6 @@ config SND_SOC_ALL_CODECS imply SND_SOC_RT5677 imply SND_SOC_RT5682_I2C imply SND_SOC_RT5682_SDW - imply SND_SOC_RT5682S imply SND_SOC_RT700_SDW imply SND_SOC_RT711_SDW imply SND_SOC_RT711_SDCA_SDW @@ -196,7 +190,6 @@ config SND_SOC_ALL_CODECS imply SND_SOC_RT715_SDCA_SDW imply SND_SOC_RT1308_SDW imply SND_SOC_RT1316_SDW - imply SND_SOC_RT9120 imply SND_SOC_SDW_MOCKUP imply SND_SOC_SGTL5000 imply SND_SOC_SI476X @@ -224,7 +217,6 @@ config SND_SOC_ALL_CODECS imply SND_SOC_TDA7419 imply SND_SOC_TFA9879 imply SND_SOC_TFA989X - imply SND_SOC_TLV320ADC3XXX imply SND_SOC_TLV320ADCX140 imply SND_SOC_TLV320AIC23_I2C imply SND_SOC_TLV320AIC23_SPI @@ -341,22 +333,17 @@ config SND_SOC_WM_HUBS config SND_SOC_WM_ADSP tristate - select CS_DSP select SND_SOC_COMPRESS default y if SND_SOC_MADERA=y default y if SND_SOC_CS47L24=y default y if SND_SOC_WM5102=y default y if SND_SOC_WM5110=y default y if SND_SOC_WM2200=y - default y if SND_SOC_CS35L41_SPI=y - default y if SND_SOC_CS35L41_I2C=y default m if SND_SOC_MADERA=m default m if SND_SOC_CS47L24=m default m if SND_SOC_WM5102=m default m if SND_SOC_WM5110=m default m if SND_SOC_WM2200=m - default m if SND_SOC_CS35L41_SPI=m - default m if SND_SOC_CS35L41_I2C=m config SND_SOC_AB8500_CODEC tristate @@ -528,16 +515,6 @@ config SND_SOC_AK4118 depends on I2C select REGMAP_I2C -config SND_SOC_AK4375 - tristate "AKM AK4375 CODEC" - depends on I2C - select REGMAP_I2C - help - Enable support for the Asahi-Kasei AK4375 codec. - - To compile this driver as a module, choose M here: the module - will be called snd-soc-ak4375. - config SND_SOC_AK4458 tristate "AKM AK4458 CODEC" depends on I2C @@ -628,26 +605,6 @@ config SND_SOC_CS35L36 tristate "Cirrus Logic CS35L36 CODEC" depends on I2C -config SND_SOC_CS35L41_LIB - tristate - -config SND_SOC_CS35L41 - tristate - -config SND_SOC_CS35L41_SPI - tristate "Cirrus Logic CS35L41 CODEC (SPI)" - depends on SPI_MASTER - select SND_SOC_CS35L41_LIB - select SND_SOC_CS35L41 - select REGMAP_SPI - -config SND_SOC_CS35L41_I2C - tristate "Cirrus Logic CS35L41 CODEC (I2C)" - depends on I2C - select SND_SOC_CS35L41_LIB - select SND_SOC_CS35L41 - select REGMAP_I2C - config SND_SOC_CS42L42 tristate "Cirrus Logic CS42L42 CODEC" depends on I2C @@ -975,17 +932,6 @@ config SND_SOC_MAX98927 tristate "Maxim Integrated MAX98927 Speaker Amplifier" depends on I2C -config SND_SOC_MAX98520 - tristate "Maxim Integrated MAX98520 Speaker Amplifier" - depends on I2C - help - Enable support for Maxim Integrated MAX98520 audio - amplifier, which implements a tripler charge pump - based boost converter and supports sample rates of - 8KHz to 192KHz. - - To compile this driver as a module, choose M here. - config SND_SOC_MAX98373 tristate @@ -1317,10 +1263,6 @@ config SND_SOC_RT5682_SDW select SND_SOC_RT5682 select REGMAP_SOUNDWIRE -config SND_SOC_RT5682S - tristate - depends on I2C - config SND_SOC_RT700 tristate @@ -1360,15 +1302,6 @@ config SND_SOC_RT715_SDCA_SDW select REGMAP_SOUNDWIRE select REGMAP_SOUNDWIRE_MBQ -config SND_SOC_RT9120 - tristate "Richtek RT9120 Stereo Class-D Amplifier" - depends on I2C - select REGMAP_I2C - select GPIOLIB - help - Enable support for Richtek RT9120 20W, stereo, inductor-less, - high-efficiency Class-D audio amplifier. - config SND_SOC_SDW_MOCKUP tristate "SoundWire mockup codec" depends on EXPERT @@ -1529,13 +1462,6 @@ config SND_SOC_TFA989X Note that the driver currently bypasses the built-in "CoolFlux DSP" and does not support (hardware) volume control. -config SND_SOC_TLV320ADC3XXX - tristate "Texas Instruments TLV320ADC3001/3101 audio ADC" - depends on I2C - help - Enable support for Texas Instruments TLV320ADC3001 and TLV320ADC3101 - ADCs. - config SND_SOC_TLV320AIC23 tristate @@ -1996,10 +1922,6 @@ config SND_SOC_NAU8810 tristate "Nuvoton Technology Corporation NAU88C10 CODEC" depends on I2C -config SND_SOC_NAU8821 - tristate "Nuvoton Technology Corporation NAU88L21 CODEC" - depends on I2C - config SND_SOC_NAU8822 tristate "Nuvoton Technology Corporation NAU88C22 CODEC" depends on I2C diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 8a93e13e9a..58493a2b0a 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -34,7 +34,6 @@ snd-soc-adav803-objs := adav803.o snd-soc-ads117x-objs := ads117x.o snd-soc-ak4104-objs := ak4104.o snd-soc-ak4118-objs := ak4118.o -snd-soc-ak4375-objs := ak4375.o snd-soc-ak4458-objs := ak4458.o snd-soc-ak4535-objs := ak4535.o snd-soc-ak4554-objs := ak4554.o @@ -55,10 +54,6 @@ snd-soc-cs35l33-objs := cs35l33.o snd-soc-cs35l34-objs := cs35l34.o snd-soc-cs35l35-objs := cs35l35.o snd-soc-cs35l36-objs := cs35l36.o -snd-soc-cs35l41-lib-objs := cs35l41-lib.o -snd-soc-cs35l41-objs := cs35l41.o -snd-soc-cs35l41-spi-objs := cs35l41-spi.o -snd-soc-cs35l41-i2c-objs := cs35l41-i2c.o snd-soc-cs42l42-objs := cs42l42.o snd-soc-cs42l51-objs := cs42l51.o snd-soc-cs42l51-i2c-objs := cs42l51-i2c.o @@ -130,7 +125,6 @@ snd-soc-max9867-objs := max9867.o snd-soc-max98925-objs := max98925.o snd-soc-max98926-objs := max98926.o snd-soc-max98927-objs := max98927.o -snd-soc-max98520-objs := max98520.o snd-soc-max98373-objs := max98373.o snd-soc-max98373-i2c-objs := max98373-i2c.o snd-soc-max98373-sdw-objs := max98373-sdw.o @@ -149,7 +143,6 @@ snd-soc-mt6660-objs := mt6660.o snd-soc-nau8315-objs := nau8315.o snd-soc-nau8540-objs := nau8540.o snd-soc-nau8810-objs := nau8810.o -snd-soc-nau8821-objs := nau8821.o snd-soc-nau8822-objs := nau8822.o snd-soc-nau8824-objs := nau8824.o snd-soc-nau8825-objs := nau8825.o @@ -208,13 +201,11 @@ snd-soc-rt5677-spi-objs := rt5677-spi.o snd-soc-rt5682-objs := rt5682.o snd-soc-rt5682-sdw-objs := rt5682-sdw.o snd-soc-rt5682-i2c-objs := rt5682-i2c.o -snd-soc-rt5682s-objs := rt5682s.o snd-soc-rt700-objs := rt700.o rt700-sdw.o snd-soc-rt711-objs := rt711.o rt711-sdw.o snd-soc-rt711-sdca-objs := rt711-sdca.o rt711-sdca-sdw.o snd-soc-rt715-objs := rt715.o rt715-sdw.o snd-soc-rt715-sdca-objs := rt715-sdca.o rt715-sdca-sdw.o -snd-soc-rt9120-objs := rt9120.o snd-soc-sdw-mockup-objs := sdw-mockup.o snd-soc-sgtl5000-objs := sgtl5000.o snd-soc-alc5623-objs := alc5623.o @@ -245,7 +236,6 @@ snd-soc-tas2770-objs := tas2770.o snd-soc-tfa9879-objs := tfa9879.o snd-soc-tas5713-objs := tas5713.o snd-soc-tfa989x-objs := tfa989x.o -snd-soc-tlv320adc3xxx-objs := tlv320adc3xxx.o snd-soc-tlv320aic23-objs := tlv320aic23.o snd-soc-tlv320aic23-i2c-objs := tlv320aic23-i2c.o snd-soc-tlv320aic23-spi-objs := tlv320aic23-spi.o @@ -377,7 +367,6 @@ obj-$(CONFIG_SND_SOC_ADAV803) += snd-soc-adav803.o obj-$(CONFIG_SND_SOC_ADS117X) += snd-soc-ads117x.o obj-$(CONFIG_SND_SOC_AK4104) += snd-soc-ak4104.o obj-$(CONFIG_SND_SOC_AK4118) += snd-soc-ak4118.o -obj-$(CONFIG_SND_SOC_AK4375) += snd-soc-ak4375.o obj-$(CONFIG_SND_SOC_AK4458) += snd-soc-ak4458.o obj-$(CONFIG_SND_SOC_AK4535) += snd-soc-ak4535.o obj-$(CONFIG_SND_SOC_AK4554) += snd-soc-ak4554.o @@ -400,10 +389,6 @@ obj-$(CONFIG_SND_SOC_CS35L33) += snd-soc-cs35l33.o obj-$(CONFIG_SND_SOC_CS35L34) += snd-soc-cs35l34.o obj-$(CONFIG_SND_SOC_CS35L35) += snd-soc-cs35l35.o obj-$(CONFIG_SND_SOC_CS35L36) += snd-soc-cs35l36.o -obj-$(CONFIG_SND_SOC_CS35L41) += snd-soc-cs35l41.o -obj-$(CONFIG_SND_SOC_CS35L41_LIB) += snd-soc-cs35l41-lib.o -obj-$(CONFIG_SND_SOC_CS35L41_SPI) += snd-soc-cs35l41-spi.o -obj-$(CONFIG_SND_SOC_CS35L41_I2C) += snd-soc-cs35l41-i2c.o obj-$(CONFIG_SND_SOC_CS42L42) += snd-soc-cs42l42.o obj-$(CONFIG_SND_SOC_CS42L51) += snd-soc-cs42l51.o obj-$(CONFIG_SND_SOC_CS42L51_I2C) += snd-soc-cs42l51-i2c.o @@ -471,7 +456,6 @@ obj-$(CONFIG_SND_SOC_MAX9867) += snd-soc-max9867.o obj-$(CONFIG_SND_SOC_MAX98925) += snd-soc-max98925.o obj-$(CONFIG_SND_SOC_MAX98926) += snd-soc-max98926.o obj-$(CONFIG_SND_SOC_MAX98927) += snd-soc-max98927.o -obj-$(CONFIG_SND_SOC_MAX98520) += snd-soc-max98520.o obj-$(CONFIG_SND_SOC_MAX98373) += snd-soc-max98373.o obj-$(CONFIG_SND_SOC_MAX98373_I2C) += snd-soc-max98373-i2c.o obj-$(CONFIG_SND_SOC_MAX98373_SDW) += snd-soc-max98373-sdw.o @@ -490,7 +474,6 @@ obj-$(CONFIG_SND_SOC_MT6660) += snd-soc-mt6660.o obj-$(CONFIG_SND_SOC_NAU8315) += snd-soc-nau8315.o obj-$(CONFIG_SND_SOC_NAU8540) += snd-soc-nau8540.o obj-$(CONFIG_SND_SOC_NAU8810) += snd-soc-nau8810.o -obj-$(CONFIG_SND_SOC_NAU8821) += snd-soc-nau8821.o obj-$(CONFIG_SND_SOC_NAU8822) += snd-soc-nau8822.o obj-$(CONFIG_SND_SOC_NAU8824) += snd-soc-nau8824.o obj-$(CONFIG_SND_SOC_NAU8825) += snd-soc-nau8825.o @@ -550,13 +533,11 @@ obj-$(CONFIG_SND_SOC_RT5677_SPI) += snd-soc-rt5677-spi.o obj-$(CONFIG_SND_SOC_RT5682) += snd-soc-rt5682.o obj-$(CONFIG_SND_SOC_RT5682_I2C) += snd-soc-rt5682-i2c.o obj-$(CONFIG_SND_SOC_RT5682_SDW) += snd-soc-rt5682-sdw.o -obj-$(CONFIG_SND_SOC_RT5682S) += snd-soc-rt5682s.o obj-$(CONFIG_SND_SOC_RT700) += snd-soc-rt700.o obj-$(CONFIG_SND_SOC_RT711) += snd-soc-rt711.o obj-$(CONFIG_SND_SOC_RT711_SDCA_SDW) += snd-soc-rt711-sdca.o obj-$(CONFIG_SND_SOC_RT715) += snd-soc-rt715.o obj-$(CONFIG_SND_SOC_RT715_SDCA_SDW) += snd-soc-rt715-sdca.o -obj-$(CONFIG_SND_SOC_RT9120) += snd-soc-rt9120.o obj-$(CONFIG_SND_SOC_SDW_MOCKUP) += snd-soc-sdw-mockup.o obj-$(CONFIG_SND_SOC_SGTL5000) += snd-soc-sgtl5000.o obj-$(CONFIG_SND_SOC_SIGMADSP) += snd-soc-sigmadsp.o @@ -587,7 +568,6 @@ obj-$(CONFIG_SND_SOC_TAS2770) += snd-soc-tas2770.o obj-$(CONFIG_SND_SOC_TAS5713) += snd-soc-tas5713.o obj-$(CONFIG_SND_SOC_TFA9879) += snd-soc-tfa9879.o obj-$(CONFIG_SND_SOC_TFA989X) += snd-soc-tfa989x.o -obj-$(CONFIG_SND_SOC_TLV320ADC3XXX) += snd-soc-tlv320adc3xxx.o obj-$(CONFIG_SND_SOC_TLV320AIC23) += snd-soc-tlv320aic23.o obj-$(CONFIG_SND_SOC_TLV320AIC23_I2C) += snd-soc-tlv320aic23-i2c.o obj-$(CONFIG_SND_SOC_TLV320AIC23_SPI) += snd-soc-tlv320aic23-spi.o diff --git a/sound/soc/codecs/ab8500-codec.c b/sound/soc/codecs/ab8500-codec.c index aefafb0b7b..5525e1ccab 100644 --- a/sound/soc/codecs/ab8500-codec.c +++ b/sound/soc/codecs/ab8500-codec.c @@ -2104,26 +2104,26 @@ static int ab8500_codec_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) BIT(AB8500_DIGIFCONF3_IF0MASTER); val = 0; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: /* codec clk & FRM master */ dev_dbg(dai->component->dev, - "%s: IF0 Master-mode: AB8500 provider.\n", __func__); + "%s: IF0 Master-mode: AB8500 master.\n", __func__); val |= BIT(AB8500_DIGIFCONF3_IF0MASTER); break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: /* codec clk & FRM slave */ dev_dbg(dai->component->dev, - "%s: IF0 Master-mode: AB8500 consumer.\n", __func__); + "%s: IF0 Master-mode: AB8500 slave.\n", __func__); break; - case SND_SOC_DAIFMT_CBC_CFP: - case SND_SOC_DAIFMT_CBP_CFC: + case SND_SOC_DAIFMT_CBS_CFM: /* codec clk slave & FRM master */ + case SND_SOC_DAIFMT_CBM_CFS: /* codec clk master & frame slave */ dev_err(dai->component->dev, - "%s: ERROR: The device is either a provider or a consumer.\n", + "%s: ERROR: The device is either a master or a slave.\n", __func__); fallthrough; default: dev_err(dai->component->dev, - "%s: ERROR: Unsupporter clocking mask 0x%x\n", - __func__, fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK); + "%s: ERROR: Unsupporter master mask 0x%x\n", + __func__, fmt & SND_SOC_DAIFMT_MASTER_MASK); return -EINVAL; } diff --git a/sound/soc/codecs/ad1836.c b/sound/soc/codecs/ad1836.c index 29e1689da6..08a5651bed 100644 --- a/sound/soc/codecs/ad1836.c +++ b/sound/soc/codecs/ad1836.c @@ -148,9 +148,9 @@ static int ad1836_set_dai_fmt(struct snd_soc_dai *codec_dai, return -EINVAL; } - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - /* ALCLK,ABCLK are both output, AD1836 can only be provider */ - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + /* ALCLK,ABCLK are both output, AD1836 can only be master */ + case SND_SOC_DAIFMT_CBM_CFM: break; default: return -EINVAL; diff --git a/sound/soc/codecs/ad193x.c b/sound/soc/codecs/ad193x.c index 30b98b4267..278a55af15 100644 --- a/sound/soc/codecs/ad193x.c +++ b/sound/soc/codecs/ad193x.c @@ -243,22 +243,22 @@ static int ad193x_set_dai_fmt(struct snd_soc_dai *codec_dai, if (fmt & SND_SOC_DAIFMT_DSP_A) dac_fmt ^= AD193X_DAC_LEFT_HIGH; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: /* codec clk & frm master */ adc_fmt |= AD193X_ADC_LCR_MASTER; adc_fmt |= AD193X_ADC_BCLK_MASTER; dac_fmt |= AD193X_DAC_LCR_MASTER; dac_fmt |= AD193X_DAC_BCLK_MASTER; break; - case SND_SOC_DAIFMT_CBC_CFP: + case SND_SOC_DAIFMT_CBS_CFM: /* codec clk slave & frm master */ adc_fmt |= AD193X_ADC_LCR_MASTER; dac_fmt |= AD193X_DAC_LCR_MASTER; break; - case SND_SOC_DAIFMT_CBP_CFC: + case SND_SOC_DAIFMT_CBM_CFS: /* codec clk master & frame slave */ adc_fmt |= AD193X_ADC_BCLK_MASTER; dac_fmt |= AD193X_DAC_BCLK_MASTER; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: /* codec clk & frm slave */ break; default: return -EINVAL; diff --git a/sound/soc/codecs/adau1372.c b/sound/soc/codecs/adau1372.c index 1faa4c4263..6811a8b386 100644 --- a/sound/soc/codecs/adau1372.c +++ b/sound/soc/codecs/adau1372.c @@ -30,7 +30,7 @@ struct adau1372 { void (*switch_mode)(struct device *dev); bool use_pll; bool enabled; - bool clock_provider; + bool master; struct snd_pcm_hw_constraint_list rate_constraints; unsigned int slot_width; @@ -578,13 +578,13 @@ static int adau1372_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) unsigned int sai0 = 0, sai1 = 0; bool invert_lrclk = false; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: - adau1372->clock_provider = true; + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + adau1372->master = true; sai1 |= ADAU1372_SAI1_MS; break; - case SND_SOC_DAIFMT_CBC_CFC: - adau1372->clock_provider = false; + case SND_SOC_DAIFMT_CBS_CFS: + adau1372->master = false; break; default: return -EINVAL; @@ -714,7 +714,7 @@ static int adau1372_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, break; case 4: sai0 = ADAU1372_SAI0_SAI_TDM4; - if (adau1372->clock_provider) + if (adau1372->master) adau1372->rate_constraints.mask = ADAU1372_RATE_MASK_TDM4_MASTER; else adau1372->rate_constraints.mask = ADAU1372_RATE_MASK_TDM4; diff --git a/sound/soc/codecs/adau1373.c b/sound/soc/codecs/adau1373.c index 46128aacea..9887aa6f0b 100644 --- a/sound/soc/codecs/adau1373.c +++ b/sound/soc/codecs/adau1373.c @@ -28,7 +28,7 @@ struct adau1373_dai { unsigned int clk_src; unsigned int sysclk; bool enable_src; - bool clock_provider; + bool master; }; struct adau1373 { @@ -827,7 +827,7 @@ static int adau1373_check_aif_clk(struct snd_soc_dapm_widget *source, dai = sink->name[3] - '1'; - if (!adau1373->dais[dai].clock_provider) + if (!adau1373->dais[dai].master) return 0; if (adau1373->dais[dai].clk_src == ADAU1373_CLK_SRC_PLL1) @@ -1102,14 +1102,14 @@ static int adau1373_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) struct adau1373_dai *adau1373_dai = &adau1373->dais[dai->id]; unsigned int ctrl; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: ctrl = ADAU1373_DAI_MASTER; - adau1373_dai->clock_provider = true; + adau1373_dai->master = true; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: ctrl = 0; - adau1373_dai->clock_provider = false; + adau1373_dai->master = false; break; default: return -EINVAL; diff --git a/sound/soc/codecs/adau1701.c b/sound/soc/codecs/adau1701.c index dba9af7531..5ce7469756 100644 --- a/sound/soc/codecs/adau1701.c +++ b/sound/soc/codecs/adau1701.c @@ -13,8 +13,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -106,8 +106,8 @@ static const char * const supply_names[] = { }; struct adau1701 { - struct gpio_desc *gpio_nreset; - struct gpio_descs *gpio_pll_mode; + int gpio_nreset; + int gpio_pll_mode[2]; unsigned int dai_fmt; unsigned int pll_clkdiv; unsigned int sysclk; @@ -303,41 +303,39 @@ static int adau1701_reset(struct snd_soc_component *component, unsigned int clkd struct adau1701 *adau1701 = snd_soc_component_get_drvdata(component); int ret; - DECLARE_BITMAP(values, 2); sigmadsp_reset(adau1701->sigmadsp); - if (clkdiv != ADAU1707_CLKDIV_UNSET && adau1701->gpio_pll_mode) { + if (clkdiv != ADAU1707_CLKDIV_UNSET && + gpio_is_valid(adau1701->gpio_pll_mode[0]) && + gpio_is_valid(adau1701->gpio_pll_mode[1])) { switch (clkdiv) { case 64: - __assign_bit(0, values, 0); - __assign_bit(1, values, 0); + gpio_set_value_cansleep(adau1701->gpio_pll_mode[0], 0); + gpio_set_value_cansleep(adau1701->gpio_pll_mode[1], 0); break; case 256: - __assign_bit(0, values, 0); - __assign_bit(1, values, 1); + gpio_set_value_cansleep(adau1701->gpio_pll_mode[0], 0); + gpio_set_value_cansleep(adau1701->gpio_pll_mode[1], 1); break; case 384: - __assign_bit(0, values, 1); - __assign_bit(1, values, 0); + gpio_set_value_cansleep(adau1701->gpio_pll_mode[0], 1); + gpio_set_value_cansleep(adau1701->gpio_pll_mode[1], 0); break; - case 0: /* fallback */ + case 0: /* fallback */ case 512: - __assign_bit(0, values, 1); - __assign_bit(1, values, 1); + gpio_set_value_cansleep(adau1701->gpio_pll_mode[0], 1); + gpio_set_value_cansleep(adau1701->gpio_pll_mode[1], 1); break; } - gpiod_set_array_value_cansleep(adau1701->gpio_pll_mode->ndescs, - adau1701->gpio_pll_mode->desc, adau1701->gpio_pll_mode->info, - values); } adau1701->pll_clkdiv = clkdiv; - if (adau1701->gpio_nreset) { - gpiod_set_value_cansleep(adau1701->gpio_nreset, 0); + if (gpio_is_valid(adau1701->gpio_nreset)) { + gpio_set_value_cansleep(adau1701->gpio_nreset, 0); /* minimum reset time is 20ns */ udelay(1); - gpiod_set_value_cansleep(adau1701->gpio_nreset, 1); + gpio_set_value_cansleep(adau1701->gpio_nreset, 1); /* power-up time may be as long as 85ms */ mdelay(85); } @@ -484,13 +482,13 @@ static int adau1701_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int serictl = 0x00, seroctl = 0x00; bool invert_lrclk; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: /* master, 64-bits per sample, 1 frame per sample */ seroctl |= ADAU1701_SEROCTL_MASTER | ADAU1701_SEROCTL_OBF16 | ADAU1701_SEROCTL_OLF1024; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: break; default: return -EINVAL; @@ -721,8 +719,8 @@ static void adau1701_remove(struct snd_soc_component *component) { struct adau1701 *adau1701 = snd_soc_component_get_drvdata(component); - if (adau1701->gpio_nreset) - gpiod_set_value_cansleep(adau1701->gpio_nreset, 0); + if (gpio_is_valid(adau1701->gpio_nreset)) + gpio_set_value_cansleep(adau1701->gpio_nreset, 0); regulator_bulk_disable(ARRAY_SIZE(adau1701->supplies), adau1701->supplies); } @@ -790,6 +788,8 @@ static int adau1701_i2c_probe(struct i2c_client *client, { struct adau1701 *adau1701; struct device *dev = &client->dev; + int gpio_nreset = -EINVAL; + int gpio_pll_mode[2] = { -EINVAL, -EINVAL }; int ret, i; adau1701 = devm_kzalloc(dev, sizeof(*adau1701), GFP_KERNEL); @@ -823,6 +823,26 @@ static int adau1701_i2c_probe(struct i2c_client *client, if (dev->of_node) { + gpio_nreset = of_get_named_gpio(dev->of_node, "reset-gpio", 0); + if (gpio_nreset < 0 && gpio_nreset != -ENOENT) { + ret = gpio_nreset; + goto exit_regulators_disable; + } + + gpio_pll_mode[0] = of_get_named_gpio(dev->of_node, + "adi,pll-mode-gpios", 0); + if (gpio_pll_mode[0] < 0 && gpio_pll_mode[0] != -ENOENT) { + ret = gpio_pll_mode[0]; + goto exit_regulators_disable; + } + + gpio_pll_mode[1] = of_get_named_gpio(dev->of_node, + "adi,pll-mode-gpios", 1); + if (gpio_pll_mode[1] < 0 && gpio_pll_mode[1] != -ENOENT) { + ret = gpio_pll_mode[1]; + goto exit_regulators_disable; + } + of_property_read_u32(dev->of_node, "adi,pll-clkdiv", &adau1701->pll_clkdiv); @@ -831,20 +851,32 @@ static int adau1701_i2c_probe(struct i2c_client *client, ARRAY_SIZE(adau1701->pin_config)); } - adau1701->gpio_nreset = devm_gpiod_get_optional(dev, "reset", GPIOD_IN); - - if (IS_ERR(adau1701->gpio_nreset)) { - ret = PTR_ERR(adau1701->gpio_nreset); - goto exit_regulators_disable; + if (gpio_is_valid(gpio_nreset)) { + ret = devm_gpio_request_one(dev, gpio_nreset, GPIOF_OUT_INIT_LOW, + "ADAU1701 Reset"); + if (ret < 0) + goto exit_regulators_disable; } - adau1701->gpio_pll_mode = devm_gpiod_get_array_optional(dev, "adi,pll-mode", GPIOD_OUT_LOW); + if (gpio_is_valid(gpio_pll_mode[0]) && + gpio_is_valid(gpio_pll_mode[1])) { + ret = devm_gpio_request_one(dev, gpio_pll_mode[0], + GPIOF_OUT_INIT_LOW, + "ADAU1701 PLL mode 0"); + if (ret < 0) + goto exit_regulators_disable; - if (IS_ERR(adau1701->gpio_pll_mode)) { - ret = PTR_ERR(adau1701->gpio_pll_mode); - goto exit_regulators_disable; + ret = devm_gpio_request_one(dev, gpio_pll_mode[1], + GPIOF_OUT_INIT_LOW, + "ADAU1701 PLL mode 1"); + if (ret < 0) + goto exit_regulators_disable; } + adau1701->gpio_nreset = gpio_nreset; + adau1701->gpio_pll_mode[0] = gpio_pll_mode[0]; + adau1701->gpio_pll_mode[1] = gpio_pll_mode[1]; + i2c_set_clientdata(client, adau1701); adau1701->sigmadsp = devm_sigmadsp_init_i2c(client, diff --git a/sound/soc/codecs/adau17x1.c b/sound/soc/codecs/adau17x1.c index af05463af4..8aae7ab740 100644 --- a/sound/soc/codecs/adau17x1.c +++ b/sound/soc/codecs/adau17x1.c @@ -556,12 +556,12 @@ static int adau17x1_set_dai_fmt(struct snd_soc_dai *dai, unsigned int ctrl0_mask; int lrclk_pol; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: ctrl0 = ADAU17X1_SERIAL_PORT0_MASTER; adau->master = true; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: ctrl0 = 0; adau->master = false; break; diff --git a/sound/soc/codecs/adau1977.c b/sound/soc/codecs/adau1977.c index 5fcbdf2ec3..e347a48131 100644 --- a/sound/soc/codecs/adau1977.c +++ b/sound/soc/codecs/adau1977.c @@ -124,10 +124,10 @@ struct adau1977 { struct device *dev; void (*switch_mode)(struct device *dev); - unsigned int max_clock_provider_fs; + unsigned int max_master_fs; unsigned int slot_width; bool enabled; - bool clock_provider; + bool master; }; static const struct reg_default adau1977_reg_defaults[] = { @@ -330,7 +330,7 @@ static int adau1977_hw_params(struct snd_pcm_substream *substream, ctrl0_mask |= ADAU1977_SAI_CTRL0_FMT_MASK; } - if (adau1977->clock_provider) { + if (adau1977->master) { switch (params_width(params)) { case 16: ctrl1 = ADAU1977_SAI_CTRL1_DATA_WIDTH_16BIT; @@ -504,7 +504,7 @@ static int adau1977_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, if (slots == 0) { /* 0 = No fixed slot width */ adau1977->slot_width = 0; - adau1977->max_clock_provider_fs = 192000; + adau1977->max_master_fs = 192000; return regmap_update_bits(adau1977->regmap, ADAU1977_REG_SAI_CTRL0, ADAU1977_SAI_CTRL0_SAI_MASK, ADAU1977_SAI_CTRL0_SAI_I2S); @@ -533,7 +533,7 @@ static int adau1977_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, break; case 24: /* We can only generate 16 bit or 32 bit wide slots */ - if (adau1977->clock_provider) + if (adau1977->master) return -EINVAL; ctrl1 = ADAU1977_SAI_CTRL1_SLOT_WIDTH_24; break; @@ -593,8 +593,8 @@ static int adau1977_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, adau1977->slot_width = width; - /* In clock provider mode the maximum bitclock is 24.576 MHz */ - adau1977->max_clock_provider_fs = min(192000, 24576000 / width / slots); + /* In master mode the maximum bitclock is 24.576 MHz */ + adau1977->max_master_fs = min(192000, 24576000 / width / slots); return 0; } @@ -620,13 +620,13 @@ static int adau1977_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) bool invert_lrclk; int ret; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: - adau1977->clock_provider = false; + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: + adau1977->master = false; break; - case SND_SOC_DAIFMT_CBP_CFP: + case SND_SOC_DAIFMT_CBM_CFM: ctrl1 |= ADAU1977_SAI_CTRL1_MASTER; - adau1977->clock_provider = true; + adau1977->master = true; break; default: return -EINVAL; @@ -714,10 +714,9 @@ static int adau1977_startup(struct snd_pcm_substream *substream, snd_pcm_hw_constraint_list(substream->runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &adau1977->constraints); - if (adau1977->clock_provider) + if (adau1977->master) snd_pcm_hw_constraint_minmax(substream->runtime, - SNDRV_PCM_HW_PARAM_RATE, 8000, - adau1977->max_clock_provider_fs); + SNDRV_PCM_HW_PARAM_RATE, 8000, adau1977->max_master_fs); if (formats != 0) snd_pcm_hw_constraint_mask64(substream->runtime, @@ -914,7 +913,7 @@ int adau1977_probe(struct device *dev, struct regmap *regmap, adau1977->type = type; adau1977->regmap = regmap; adau1977->switch_mode = switch_mode; - adau1977->max_clock_provider_fs = 192000; + adau1977->max_master_fs = 192000; adau1977->constraints.list = adau1977_rates; adau1977->constraints.count = ARRAY_SIZE(adau1977_rates); diff --git a/sound/soc/codecs/adav80x.c b/sound/soc/codecs/adav80x.c index 90f3a5e9e3..75a6491081 100644 --- a/sound/soc/codecs/adav80x.c +++ b/sound/soc/codecs/adav80x.c @@ -369,12 +369,12 @@ static int adav80x_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) unsigned int capture = 0x00; unsigned int playback = 0x00; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: capture |= ADAV80X_CAPTURE_MODE_MASTER; playback |= ADAV80X_PLAYBACK_MODE_MASTER; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: break; default: return -EINVAL; diff --git a/sound/soc/codecs/ak4104.c b/sound/soc/codecs/ak4104.c index dc4747c77a..979cfb165e 100644 --- a/sound/soc/codecs/ak4104.c +++ b/sound/soc/codecs/ak4104.c @@ -81,8 +81,8 @@ static int ak4104_set_dai_fmt(struct snd_soc_dai *codec_dai, return -EINVAL; } - /* This device can only be consumer */ - if ((format & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) != SND_SOC_DAIFMT_CBC_CFC) + /* This device can only be slave */ + if ((format & SND_SOC_DAIFMT_MASTER_MASK) != SND_SOC_DAIFMT_CBS_CFS) return -EINVAL; ret = regmap_update_bits(ak4104->regmap, AK4104_REG_CONTROL1, diff --git a/sound/soc/codecs/ak4118.c b/sound/soc/codecs/ak4118.c index 2e6bafd2a8..5d46ae8556 100644 --- a/sound/soc/codecs/ak4118.c +++ b/sound/soc/codecs/ak4118.c @@ -151,8 +151,8 @@ static const struct snd_soc_dapm_route ak4118_dapm_routes[] = { }; -static int ak4118_set_dai_fmt_provider(struct ak4118_priv *ak4118, - unsigned int format) +static int ak4118_set_dai_fmt_master(struct ak4118_priv *ak4118, + unsigned int format) { int dif; @@ -173,8 +173,8 @@ static int ak4118_set_dai_fmt_provider(struct ak4118_priv *ak4118, return dif; } -static int ak4118_set_dai_fmt_consumer(struct ak4118_priv *ak4118, - unsigned int format) +static int ak4118_set_dai_fmt_slave(struct ak4118_priv *ak4118, + unsigned int format) { int dif; @@ -201,12 +201,14 @@ static int ak4118_set_dai_fmt(struct snd_soc_dai *dai, int dif; int ret = 0; - switch (format & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: - dif = ak4118_set_dai_fmt_provider(ak4118, format); + switch (format & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + /* component is master */ + dif = ak4118_set_dai_fmt_master(ak4118, format); break; - case SND_SOC_DAIFMT_CBC_CFC: - dif = ak4118_set_dai_fmt_consumer(ak4118, format); + case SND_SOC_DAIFMT_CBS_CFS: + /*component is slave */ + dif = ak4118_set_dai_fmt_slave(ak4118, format); break; default: ret = -ENOTSUPP; @@ -374,14 +376,20 @@ static int ak4118_i2c_probe(struct i2c_client *i2c, i2c_set_clientdata(i2c, ak4118); ak4118->reset = devm_gpiod_get(&i2c->dev, "reset", GPIOD_OUT_HIGH); - if (IS_ERR(ak4118->reset)) - return dev_err_probe(&i2c->dev, PTR_ERR(ak4118->reset), - "Failed to get reset\n"); + if (IS_ERR(ak4118->reset)) { + ret = PTR_ERR(ak4118->reset); + if (ret != -EPROBE_DEFER) + dev_err(&i2c->dev, "Failed to get reset: %d\n", ret); + return ret; + } ak4118->irq = devm_gpiod_get(&i2c->dev, "irq", GPIOD_IN); - if (IS_ERR(ak4118->irq)) - return dev_err_probe(&i2c->dev, PTR_ERR(ak4118->irq), - "Failed to get IRQ\n"); + if (IS_ERR(ak4118->irq)) { + ret = PTR_ERR(ak4118->irq); + if (ret != -EPROBE_DEFER) + dev_err(&i2c->dev, "Failed to get IRQ: %d\n", ret); + return ret; + } ret = devm_request_threaded_irq(&i2c->dev, gpiod_to_irq(ak4118->irq), NULL, ak4118_irq_handler, diff --git a/sound/soc/codecs/ak4458.c b/sound/soc/codecs/ak4458.c index baa9ff5d0c..29eb78702b 100644 --- a/sound/soc/codecs/ak4458.c +++ b/sound/soc/codecs/ak4458.c @@ -464,14 +464,14 @@ static int ak4458_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) struct ak4458_priv *ak4458 = snd_soc_component_get_drvdata(component); int ret; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: /* Consumer Mode */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: /* Slave Mode */ break; - case SND_SOC_DAIFMT_CBP_CFP: /* Provider Mode is not supported */ - case SND_SOC_DAIFMT_CBC_CFP: - case SND_SOC_DAIFMT_CBP_CFC: + case SND_SOC_DAIFMT_CBM_CFM: /* Master Mode is not supported */ + case SND_SOC_DAIFMT_CBS_CFM: + case SND_SOC_DAIFMT_CBM_CFS: default: - dev_err(component->dev, "Clock provider mode unsupported\n"); + dev_err(component->dev, "Master mode unsupported\n"); return -EINVAL; } diff --git a/sound/soc/codecs/ak4642.c b/sound/soc/codecs/ak4642.c index c284dcc5af..c49c58eeb4 100644 --- a/sound/soc/codecs/ak4642.c +++ b/sound/soc/codecs/ak4642.c @@ -392,13 +392,13 @@ static int ak4642_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) data = MCKO | PMPLL; /* use MCKO */ bcko = 0; - /* set clocking for audio interface */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + /* set master/slave audio interface */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: data |= MS; bcko = BCKO_64; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: break; default: return -EINVAL; diff --git a/sound/soc/codecs/ak4671.c b/sound/soc/codecs/ak4671.c index e9d1251c42..eb435235b5 100644 --- a/sound/soc/codecs/ak4671.c +++ b/sound/soc/codecs/ak4671.c @@ -520,11 +520,11 @@ static int ak4671_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) /* set master/slave audio interface */ mode = snd_soc_component_read(component, AK4671_PLL_MODE_SELECT1); - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: mode |= AK4671_M_S; break; - case SND_SOC_DAIFMT_CBP_CFC: + case SND_SOC_DAIFMT_CBM_CFS: mode &= ~(AK4671_M_S); break; default: diff --git a/sound/soc/codecs/ak5558.c b/sound/soc/codecs/ak5558.c index c94cfde3e4..37d4600b6f 100644 --- a/sound/soc/codecs/ak5558.c +++ b/sound/soc/codecs/ak5558.c @@ -198,13 +198,13 @@ static int ak5558_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) struct snd_soc_component *component = dai->component; u8 format; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: break; - case SND_SOC_DAIFMT_CBP_CFP: + case SND_SOC_DAIFMT_CBM_CFM: break; - case SND_SOC_DAIFMT_CBC_CFP: - case SND_SOC_DAIFMT_CBP_CFC: + case SND_SOC_DAIFMT_CBS_CFM: + case SND_SOC_DAIFMT_CBM_CFS: default: dev_err(dai->dev, "Clock mode unsupported"); return -EINVAL; diff --git a/sound/soc/codecs/alc5623.c b/sound/soc/codecs/alc5623.c index b10357a6d6..54f4898371 100644 --- a/sound/soc/codecs/alc5623.c +++ b/sound/soc/codecs/alc5623.c @@ -641,12 +641,12 @@ static int alc5623_set_dai_fmt(struct snd_soc_dai *codec_dai, struct snd_soc_component *component = codec_dai->component; u16 iface = 0; - /* set audio interface clocking */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + /* set master/slave audio interface */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: iface = ALC5623_DAI_SDP_MASTER_MODE; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: iface = ALC5623_DAI_SDP_SLAVE_MODE; break; default: diff --git a/sound/soc/codecs/alc5632.c b/sound/soc/codecs/alc5632.c index 6d7af3736a..79813882a9 100644 --- a/sound/soc/codecs/alc5632.c +++ b/sound/soc/codecs/alc5632.c @@ -815,12 +815,12 @@ static int alc5632_set_dai_fmt(struct snd_soc_dai *codec_dai, struct snd_soc_component *component = codec_dai->component; u16 iface = 0; - /* set audio interface clocking */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + /* set master/slave audio interface */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: iface = ALC5632_DAI_SDP_MASTER_MODE; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: iface = ALC5632_DAI_SDP_SLAVE_MODE; break; default: diff --git a/sound/soc/codecs/cpcap.c b/sound/soc/codecs/cpcap.c index ffdf8b615e..f1c13f42e1 100644 --- a/sound/soc/codecs/cpcap.c +++ b/sound/soc/codecs/cpcap.c @@ -1168,15 +1168,15 @@ static int cpcap_hifi_set_dai_fmt(struct snd_soc_dai *codec_dai, /* * "HiFi Playback" should always be configured as - * SND_SOC_DAIFMT_CBP_CFP - codec clk & frm provider + * SND_SOC_DAIFMT_CBM_CFM - codec clk & frm master * SND_SOC_DAIFMT_I2S - I2S mode */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: val &= ~BIT(CPCAP_BIT_SMB_ST_DAC); break; default: - dev_err(dev, "HiFi dai fmt failed: CPCAP should be provider"); + dev_err(dev, "HiFi dai fmt failed: CPCAP should be master"); return -EINVAL; } @@ -1318,15 +1318,15 @@ static int cpcap_voice_set_dai_fmt(struct snd_soc_dai *codec_dai, /* * "Voice Playback" and "Voice Capture" should always be - * configured as SND_SOC_DAIFMT_CBP_CFP - codec clk & frm - * provider + * configured as SND_SOC_DAIFMT_CBM_CFM - codec clk & frm + * master */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: val &= ~BIT(CPCAP_BIT_SMB_CDC); break; default: - dev_err(component->dev, "Voice dai fmt failed: CPCAP should be the provider"); + dev_err(component->dev, "Voice dai fmt failed: CPCAP should be the master"); val &= ~BIT(CPCAP_BIT_SMB_CDC); break; } diff --git a/sound/soc/codecs/cros_ec_codec.c b/sound/soc/codecs/cros_ec_codec.c index 9b92e1a0d1..a201d652ac 100644 --- a/sound/soc/codecs/cros_ec_codec.c +++ b/sound/soc/codecs/cros_ec_codec.c @@ -283,8 +283,8 @@ static int i2s_rx_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) struct ec_param_ec_codec_i2s_rx p; enum ec_codec_i2s_rx_daifmt daifmt; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: break; default: return -EINVAL; diff --git a/sound/soc/codecs/cs35l35.c b/sound/soc/codecs/cs35l35.c index 961a3e07e7..7a5588f1df 100644 --- a/sound/soc/codecs/cs35l35.c +++ b/sound/soc/codecs/cs35l35.c @@ -1311,7 +1311,7 @@ static int cs35l35_handle_of_data(struct i2c_client *i2c_client, pdata->gain_zc = of_property_read_bool(np, "cirrus,amp-gain-zc"); classh = of_get_child_by_name(np, "cirrus,classh-internal-algo"); - classh_config->classh_algo_enable = (classh != NULL); + classh_config->classh_algo_enable = classh ? true : false; if (classh_config->classh_algo_enable) { classh_config->classh_bst_override = diff --git a/sound/soc/codecs/cs4265.c b/sound/soc/codecs/cs4265.c index 4aaee1873a..b49cb92d7b 100644 --- a/sound/soc/codecs/cs4265.c +++ b/sound/soc/codecs/cs4265.c @@ -150,7 +150,6 @@ static const struct snd_kcontrol_new cs4265_snd_controls[] = { SOC_SINGLE("E to F Buffer Disable Switch", CS4265_SPDIF_CTL1, 6, 1, 0), SOC_ENUM("C Data Access", cam_mode_enum), - SOC_SINGLE("SPDIF Switch", CS4265_SPDIF_CTL2, 5, 1, 1), SOC_SINGLE("Validity Bit Control Switch", CS4265_SPDIF_CTL2, 3, 1, 0), SOC_ENUM("SPDIF Mono/Stereo", spdif_mono_stereo_enum), @@ -186,7 +185,7 @@ static const struct snd_soc_dapm_widget cs4265_dapm_widgets[] = { SND_SOC_DAPM_SWITCH("Loopback", SND_SOC_NOPM, 0, 0, &loopback_ctl), - SND_SOC_DAPM_SWITCH("SPDIF", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_SWITCH("SPDIF", CS4265_SPDIF_CTL2, 5, 1, &spdif_switch), SND_SOC_DAPM_SWITCH("DAC", CS4265_PWRCTL, 1, 1, &dac_switch), @@ -611,8 +610,8 @@ static int cs4265_i2c_probe(struct i2c_client *i2c_client, if (devid != CS4265_CHIP_ID_VAL) { ret = -ENODEV; dev_err(&i2c_client->dev, - "CS4265 Part Number ID: 0x%x Expected: 0x%x\n", - devid >> 4, CS4265_CHIP_ID_VAL >> 4); + "CS4265 Device ID (%X). Expected %X\n", + devid, CS4265_CHIP_ID); return ret; } dev_info(&i2c_client->dev, @@ -626,16 +625,6 @@ static int cs4265_i2c_probe(struct i2c_client *i2c_client, ARRAY_SIZE(cs4265_dai)); } -static int cs4265_i2c_remove(struct i2c_client *i2c) -{ - struct cs4265_private *cs4265 = i2c_get_clientdata(i2c); - - if (cs4265->reset_gpio) - gpiod_set_value_cansleep(cs4265->reset_gpio, 0); - - return 0; -} - static const struct of_device_id cs4265_of_match[] = { { .compatible = "cirrus,cs4265", }, { } @@ -655,7 +644,6 @@ static struct i2c_driver cs4265_i2c_driver = { }, .id_table = cs4265_id, .probe = cs4265_i2c_probe, - .remove = cs4265_i2c_remove, }; module_i2c_driver(cs4265_i2c_driver); diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 43d98bdb5b..762d9de73d 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -40,9 +41,9 @@ static const struct reg_default cs42l42_reg_defaults[] = { { CS42L42_FRZ_CTL, 0x00 }, { CS42L42_SRC_CTL, 0x10 }, + { CS42L42_MCLK_STATUS, 0x02 }, { CS42L42_MCLK_CTL, 0x02 }, { CS42L42_SFTRAMP_RATE, 0xA4 }, - { CS42L42_SLOW_START_ENABLE, 0x70 }, { CS42L42_I2C_DEBOUNCE, 0x88 }, { CS42L42_I2C_STRETCH, 0x03 }, { CS42L42_I2C_TIMEOUT, 0xB7 }, @@ -52,12 +53,15 @@ static const struct reg_default cs42l42_reg_defaults[] = { { CS42L42_RSENSE_CTL1, 0x40 }, { CS42L42_RSENSE_CTL2, 0x00 }, { CS42L42_OSC_SWITCH, 0x00 }, + { CS42L42_OSC_SWITCH_STATUS, 0x05 }, { CS42L42_RSENSE_CTL3, 0x1B }, { CS42L42_TSENSE_CTL, 0x1B }, { CS42L42_TSRS_INT_DISABLE, 0x00 }, + { CS42L42_TRSENSE_STATUS, 0x00 }, { CS42L42_HSDET_CTL1, 0x77 }, { CS42L42_HSDET_CTL2, 0x00 }, { CS42L42_HS_SWITCH_CTL, 0xF3 }, + { CS42L42_HS_DET_STATUS, 0x00 }, { CS42L42_HS_CLAMP_DISABLE, 0x00 }, { CS42L42_MCLK_SRC_SEL, 0x00 }, { CS42L42_SPDIF_CLK_CFG, 0x00 }, @@ -71,6 +75,18 @@ static const struct reg_default cs42l42_reg_defaults[] = { { CS42L42_IN_ASRC_CLK, 0x00 }, { CS42L42_OUT_ASRC_CLK, 0x00 }, { CS42L42_PLL_DIV_CFG1, 0x00 }, + { CS42L42_ADC_OVFL_STATUS, 0x00 }, + { CS42L42_MIXER_STATUS, 0x00 }, + { CS42L42_SRC_STATUS, 0x00 }, + { CS42L42_ASP_RX_STATUS, 0x00 }, + { CS42L42_ASP_TX_STATUS, 0x00 }, + { CS42L42_CODEC_STATUS, 0x00 }, + { CS42L42_DET_INT_STATUS1, 0x00 }, + { CS42L42_DET_INT_STATUS2, 0x00 }, + { CS42L42_SRCPL_INT_STATUS, 0x00 }, + { CS42L42_VPMON_STATUS, 0x00 }, + { CS42L42_PLL_LOCK_STATUS, 0x00 }, + { CS42L42_TSRS_PLUG_STATUS, 0x00 }, { CS42L42_ADC_OVFL_INT_MASK, 0x01 }, { CS42L42_MIXER_INT_MASK, 0x0F }, { CS42L42_SRC_INT_MASK, 0x0F }, @@ -89,6 +105,8 @@ static const struct reg_default cs42l42_reg_defaults[] = { { CS42L42_PLL_CTL3, 0x10 }, { CS42L42_PLL_CAL_RATIO, 0x80 }, { CS42L42_PLL_CTL4, 0x03 }, + { CS42L42_LOAD_DET_RCSTAT, 0x00 }, + { CS42L42_LOAD_DET_DONE, 0x00 }, { CS42L42_LOAD_DET_EN, 0x00 }, { CS42L42_HSBIAS_SC_AUTOCTL, 0x03 }, { CS42L42_WAKE_CTL, 0xC0 }, @@ -97,6 +115,8 @@ static const struct reg_default cs42l42_reg_defaults[] = { { CS42L42_MISC_DET_CTL, 0x03 }, { CS42L42_MIC_DET_CTL1, 0x1F }, { CS42L42_MIC_DET_CTL2, 0x2F }, + { CS42L42_DET_STATUS1, 0x00 }, + { CS42L42_DET_STATUS2, 0x00 }, { CS42L42_DET_INT1_MASK, 0xE0 }, { CS42L42_DET_INT2_MASK, 0xFF }, { CS42L42_HS_BIAS_CTL, 0xC2 }, @@ -162,6 +182,7 @@ static const struct reg_default cs42l42_reg_defaults[] = { { CS42L42_ASP_RX_DAI1_CH2_AP_RES, 0x03 }, { CS42L42_ASP_RX_DAI1_CH2_BIT_MSB, 0x00 }, { CS42L42_ASP_RX_DAI1_CH2_BIT_LSB, 0x00 }, + { CS42L42_SUB_REVID, 0x03 }, }; static bool cs42l42_readable_register(struct device *dev, unsigned int reg) @@ -178,7 +199,6 @@ static bool cs42l42_readable_register(struct device *dev, unsigned int reg) case CS42L42_MCLK_STATUS: case CS42L42_MCLK_CTL: case CS42L42_SFTRAMP_RATE: - case CS42L42_SLOW_START_ENABLE: case CS42L42_I2C_DEBOUNCE: case CS42L42_I2C_STRETCH: case CS42L42_I2C_TIMEOUT: @@ -331,7 +351,6 @@ static bool cs42l42_volatile_register(struct device *dev, unsigned int reg) case CS42L42_DEVID_CD: case CS42L42_DEVID_E: case CS42L42_MCLK_STATUS: - case CS42L42_OSC_SWITCH_STATUS: case CS42L42_TRSENSE_STATUS: case CS42L42_HS_DET_STATUS: case CS42L42_ADC_OVFL_STATUS: @@ -389,28 +408,6 @@ static const struct regmap_config cs42l42_regmap = { static DECLARE_TLV_DB_SCALE(adc_tlv, -9700, 100, true); static DECLARE_TLV_DB_SCALE(mixer_tlv, -6300, 100, true); -static int cs42l42_slow_start_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); - u8 val; - - /* all bits of SLOW_START_EN much change together */ - switch (ucontrol->value.integer.value[0]) { - case 0: - val = 0; - break; - case 1: - val = CS42L42_SLOW_START_EN_MASK; - break; - default: - return -EINVAL; - } - - return snd_soc_component_update_bits(component, CS42L42_SLOW_START_ENABLE, - CS42L42_SLOW_START_EN_MASK, val); -} - static const char * const cs42l42_hpf_freq_text[] = { "1.86Hz", "120Hz", "235Hz", "466Hz" }; @@ -455,43 +452,13 @@ static const struct snd_kcontrol_new cs42l42_snd_controls[] = { CS42L42_DAC_HPF_EN_SHIFT, true, false), SOC_DOUBLE_R_TLV("Mixer Volume", CS42L42_MIXER_CHA_VOL, CS42L42_MIXER_CHB_VOL, CS42L42_MIXER_CH_VOL_SHIFT, - 0x3f, 1, mixer_tlv), - - SOC_SINGLE_EXT("Slow Start Switch", CS42L42_SLOW_START_ENABLE, - CS42L42_SLOW_START_EN_SHIFT, true, false, - snd_soc_get_volsw, cs42l42_slow_start_put), + 0x3f, 1, mixer_tlv) }; -static int cs42l42_hp_adc_ev(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *kcontrol, int event) -{ - struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component); - - switch (event) { - case SND_SOC_DAPM_PRE_PMU: - cs42l42->hp_adc_up_pending = true; - break; - case SND_SOC_DAPM_POST_PMU: - /* Only need one delay if HP and ADC are both powering-up */ - if (cs42l42->hp_adc_up_pending) { - usleep_range(CS42L42_HP_ADC_EN_TIME_US, - CS42L42_HP_ADC_EN_TIME_US + 1000); - cs42l42->hp_adc_up_pending = false; - } - break; - default: - break; - } - - return 0; -} - static const struct snd_soc_dapm_widget cs42l42_dapm_widgets[] = { /* Playback Path */ SND_SOC_DAPM_OUTPUT("HP"), - SND_SOC_DAPM_DAC_E("DAC", NULL, CS42L42_PWR_CTL1, CS42L42_HP_PDN_SHIFT, 1, - cs42l42_hp_adc_ev, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU), + SND_SOC_DAPM_DAC("DAC", NULL, CS42L42_PWR_CTL1, CS42L42_HP_PDN_SHIFT, 1), SND_SOC_DAPM_MIXER("MIXER", CS42L42_PWR_CTL1, CS42L42_MIXER_PDN_SHIFT, 1, NULL, 0), SND_SOC_DAPM_AIF_IN("SDIN1", NULL, 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SDIN2", NULL, 1, SND_SOC_NOPM, 0, 0), @@ -501,8 +468,7 @@ static const struct snd_soc_dapm_widget cs42l42_dapm_widgets[] = { /* Capture Path */ SND_SOC_DAPM_INPUT("HS"), - SND_SOC_DAPM_ADC_E("ADC", NULL, CS42L42_PWR_CTL1, CS42L42_ADC_PDN_SHIFT, 1, - cs42l42_hp_adc_ev, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU), + SND_SOC_DAPM_ADC("ADC", NULL, CS42L42_PWR_CTL1, CS42L42_ADC_PDN_SHIFT, 1), SND_SOC_DAPM_AIF_OUT("SDOUT1", NULL, 0, CS42L42_ASP_TX_CH_EN, CS42L42_ASP_TX0_CH1_SHIFT, 0), SND_SOC_DAPM_AIF_OUT("SDOUT2", NULL, 1, CS42L42_ASP_TX_CH_EN, CS42L42_ASP_TX0_CH2_SHIFT, 0), @@ -549,29 +515,28 @@ static int cs42l42_set_jack(struct snd_soc_component *component, struct snd_soc_ { struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component); - /* Prevent race with interrupt handler */ - mutex_lock(&cs42l42->jack_detect_mutex); cs42l42->jack = jk; - if (jk) { - switch (cs42l42->hs_type) { - case CS42L42_PLUG_CTIA: - case CS42L42_PLUG_OMTP: - snd_soc_jack_report(jk, SND_JACK_HEADSET, SND_JACK_HEADSET); - break; - case CS42L42_PLUG_HEADPHONE: - snd_soc_jack_report(jk, SND_JACK_HEADPHONE, SND_JACK_HEADPHONE); - break; - default: - break; - } - } - mutex_unlock(&cs42l42->jack_detect_mutex); + regmap_update_bits(cs42l42->regmap, CS42L42_TSRS_PLUG_INT_MASK, + CS42L42_RS_PLUG_MASK | CS42L42_RS_UNPLUG_MASK | + CS42L42_TS_PLUG_MASK | CS42L42_TS_UNPLUG_MASK, + (1 << CS42L42_RS_PLUG_SHIFT) | (1 << CS42L42_RS_UNPLUG_SHIFT) | + (0 << CS42L42_TS_PLUG_SHIFT) | (0 << CS42L42_TS_UNPLUG_SHIFT)); + + return 0; +} + +static int cs42l42_component_probe(struct snd_soc_component *component) +{ + struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component); + + cs42l42->component = component; return 0; } static const struct snd_soc_component_driver soc_component_dev_cs42l42 = { + .probe = cs42l42_component_probe, .set_jack = cs42l42_set_jack, .dapm_widgets = cs42l42_dapm_widgets, .num_dapm_widgets = ARRAY_SIZE(cs42l42_dapm_widgets), @@ -604,6 +569,7 @@ static const struct reg_sequence cs42l42_to_osc_seq[] = { struct cs42l42_pll_params { u32 sclk; + u8 mclk_div; u8 mclk_src_sel; u8 sclk_prediv; u8 pll_div_int; @@ -620,24 +586,24 @@ struct cs42l42_pll_params { * Table 4-5 from the Datasheet */ static const struct cs42l42_pll_params pll_ratio_table[] = { - { 1411200, 1, 0x00, 0x80, 0x000000, 0x03, 0x10, 11289600, 128, 2}, - { 1536000, 1, 0x00, 0x7D, 0x000000, 0x03, 0x10, 12000000, 125, 2}, - { 2304000, 1, 0x00, 0x55, 0xC00000, 0x02, 0x10, 12288000, 85, 2}, - { 2400000, 1, 0x00, 0x50, 0x000000, 0x03, 0x10, 12000000, 80, 2}, - { 2822400, 1, 0x00, 0x40, 0x000000, 0x03, 0x10, 11289600, 128, 1}, - { 3000000, 1, 0x00, 0x40, 0x000000, 0x03, 0x10, 12000000, 128, 1}, - { 3072000, 1, 0x00, 0x3E, 0x800000, 0x03, 0x10, 12000000, 125, 1}, - { 4000000, 1, 0x00, 0x30, 0x800000, 0x03, 0x10, 12000000, 96, 1}, - { 4096000, 1, 0x00, 0x2E, 0xE00000, 0x03, 0x10, 12000000, 94, 1}, - { 5644800, 1, 0x01, 0x40, 0x000000, 0x03, 0x10, 11289600, 128, 1}, - { 6000000, 1, 0x01, 0x40, 0x000000, 0x03, 0x10, 12000000, 128, 1}, - { 6144000, 1, 0x01, 0x3E, 0x800000, 0x03, 0x10, 12000000, 125, 1}, - { 11289600, 0, 0, 0, 0, 0, 0, 11289600, 0, 1}, - { 12000000, 0, 0, 0, 0, 0, 0, 12000000, 0, 1}, - { 12288000, 0, 0, 0, 0, 0, 0, 12288000, 0, 1}, - { 22579200, 1, 0x03, 0x40, 0x000000, 0x03, 0x10, 11289600, 128, 1}, - { 24000000, 1, 0x03, 0x40, 0x000000, 0x03, 0x10, 12000000, 128, 1}, - { 24576000, 1, 0x03, 0x40, 0x000000, 0x03, 0x10, 12288000, 128, 1} + { 1411200, 0, 1, 0x00, 0x80, 0x000000, 0x03, 0x10, 11289600, 128, 2}, + { 1536000, 0, 1, 0x00, 0x7D, 0x000000, 0x03, 0x10, 12000000, 125, 2}, + { 2304000, 0, 1, 0x00, 0x55, 0xC00000, 0x02, 0x10, 12288000, 85, 2}, + { 2400000, 0, 1, 0x00, 0x50, 0x000000, 0x03, 0x10, 12000000, 80, 2}, + { 2822400, 0, 1, 0x00, 0x40, 0x000000, 0x03, 0x10, 11289600, 128, 1}, + { 3000000, 0, 1, 0x00, 0x40, 0x000000, 0x03, 0x10, 12000000, 128, 1}, + { 3072000, 0, 1, 0x00, 0x3E, 0x800000, 0x03, 0x10, 12000000, 125, 1}, + { 4000000, 0, 1, 0x00, 0x30, 0x800000, 0x03, 0x10, 12000000, 96, 1}, + { 4096000, 0, 1, 0x00, 0x2E, 0xE00000, 0x03, 0x10, 12000000, 94, 1}, + { 5644800, 0, 1, 0x01, 0x40, 0x000000, 0x03, 0x10, 11289600, 128, 1}, + { 6000000, 0, 1, 0x01, 0x40, 0x000000, 0x03, 0x10, 12000000, 128, 1}, + { 6144000, 0, 1, 0x01, 0x3E, 0x800000, 0x03, 0x10, 12000000, 125, 1}, + { 11289600, 0, 0, 0, 0, 0, 0, 0, 11289600, 0, 1}, + { 12000000, 0, 0, 0, 0, 0, 0, 0, 12000000, 0, 1}, + { 12288000, 0, 0, 0, 0, 0, 0, 0, 12288000, 0, 1}, + { 22579200, 1, 0, 0, 0, 0, 0, 0, 22579200, 0, 1}, + { 24000000, 1, 0, 0, 0, 0, 0, 0, 24000000, 0, 1}, + { 24576000, 1, 0, 0, 0, 0, 0, 0, 24576000, 0, 1} }; static int cs42l42_pll_config(struct snd_soc_component *component) @@ -652,14 +618,6 @@ static int cs42l42_pll_config(struct snd_soc_component *component) else clk = cs42l42->sclk; - /* Don't reconfigure if there is an audio stream running */ - if (cs42l42->stream_use) { - if (pll_ratio_table[cs42l42->pll_config].sclk == clk) - return 0; - else - return -EBUSY; - } - for (i = 0; i < ARRAY_SIZE(pll_ratio_table); i++) { if (pll_ratio_table[i].sclk == clk) { cs42l42->pll_config = i; @@ -673,6 +631,10 @@ static int cs42l42_pll_config(struct snd_soc_component *component) 24000000)) << CS42L42_INTERNAL_FS_SHIFT); + snd_soc_component_update_bits(component, CS42L42_MCLK_SRC_SEL, + CS42L42_MCLKDIV_MASK, + (pll_ratio_table[i].mclk_div << + CS42L42_MCLKDIV_SHIFT)); /* Set up the LRCLK */ fsync = clk / cs42l42->srate; if (((fsync * cs42l42->srate) != clk) @@ -706,6 +668,22 @@ static int cs42l42_pll_config(struct snd_soc_component *component) CS42L42_FSYNC_PULSE_WIDTH_MASK, CS42L42_FRAC1_VAL(fsync - 1) << CS42L42_FSYNC_PULSE_WIDTH_SHIFT); + /* Set the sample rates (96k or lower) */ + snd_soc_component_update_bits(component, CS42L42_FS_RATE_EN, + CS42L42_FS_EN_MASK, + (CS42L42_FS_EN_IASRC_96K | + CS42L42_FS_EN_OASRC_96K) << + CS42L42_FS_EN_SHIFT); + /* Set the input/output internal MCLK clock ~12 MHz */ + snd_soc_component_update_bits(component, CS42L42_IN_ASRC_CLK, + CS42L42_CLK_IASRC_SEL_MASK, + CS42L42_CLK_IASRC_SEL_12 << + CS42L42_CLK_IASRC_SEL_SHIFT); + snd_soc_component_update_bits(component, + CS42L42_OUT_ASRC_CLK, + CS42L42_CLK_OASRC_SEL_MASK, + CS42L42_CLK_OASRC_SEL_12 << + CS42L42_CLK_OASRC_SEL_SHIFT); if (pll_ratio_table[i].mclk_src_sel == 0) { /* Pass the clock straight through */ snd_soc_component_update_bits(component, @@ -751,6 +729,10 @@ static int cs42l42_pll_config(struct snd_soc_component *component) CS42L42_PLL_DIVOUT_MASK, (pll_ratio_table[i].pll_divout * pll_ratio_table[i].n) << CS42L42_PLL_DIVOUT_SHIFT); + if (pll_ratio_table[i].n != 1) + cs42l42->pll_divout = pll_ratio_table[i].pll_divout; + else + cs42l42->pll_divout = 0; snd_soc_component_update_bits(component, CS42L42_PLL_CAL_RATIO, CS42L42_PLL_CAL_RATIO_MASK, @@ -764,39 +746,6 @@ static int cs42l42_pll_config(struct snd_soc_component *component) return -EINVAL; } -static void cs42l42_src_config(struct snd_soc_component *component, unsigned int sample_rate) -{ - struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component); - unsigned int fs; - - /* Don't reconfigure if there is an audio stream running */ - if (cs42l42->stream_use) - return; - - /* SRC MCLK must be as close as possible to 125 * sample rate */ - if (sample_rate <= 48000) - fs = CS42L42_CLK_IASRC_SEL_6; - else - fs = CS42L42_CLK_IASRC_SEL_12; - - /* Set the sample rates (96k or lower) */ - snd_soc_component_update_bits(component, - CS42L42_FS_RATE_EN, - CS42L42_FS_EN_MASK, - (CS42L42_FS_EN_IASRC_96K | - CS42L42_FS_EN_OASRC_96K) << - CS42L42_FS_EN_SHIFT); - - snd_soc_component_update_bits(component, - CS42L42_IN_ASRC_CLK, - CS42L42_CLK_IASRC_SEL_MASK, - fs << CS42L42_CLK_IASRC_SEL_SHIFT); - snd_soc_component_update_bits(component, - CS42L42_OUT_ASRC_CLK, - CS42L42_CLK_OASRC_SEL_MASK, - fs << CS42L42_CLK_OASRC_SEL_SHIFT); -} - static int cs42l42_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) { struct snd_soc_component *component = codec_dai->component; @@ -875,7 +824,7 @@ static int cs42l42_dai_startup(struct snd_pcm_substream *substream, struct snd_s /* Machine driver has not set a SCLK, limit bottom end to 44.1 kHz */ return snd_pcm_hw_constraint_minmax(substream->runtime, SNDRV_PCM_HW_PARAM_RATE, - 44100, 96000); + 44100, 192000); } static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream, @@ -887,7 +836,6 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream, unsigned int channels = params_channels(params); unsigned int width = (params_width(params) / 8) - 1; unsigned int val = 0; - int ret; cs42l42->srate = params_rate(params); cs42l42->bclk = snd_soc_params_to_bclk(params); @@ -903,7 +851,7 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream, if (params_width(params) == 24) cs42l42->bclk = (cs42l42->bclk / 3) * 4; - switch (substream->stream) { + switch(substream->stream) { case SNDRV_PCM_STREAM_CAPTURE: /* channel 2 on high LRCLK */ val = CS42L42_ASP_TX_CH2_AP_MASK | @@ -941,13 +889,7 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream, break; } - ret = cs42l42_pll_config(component); - if (ret) - return ret; - - cs42l42_src_config(component, params_rate(params)); - - return 0; + return cs42l42_pll_config(component); } static int cs42l42_set_sysclk(struct snd_soc_dai *dai, @@ -991,7 +933,7 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream) CS42L42_HP_ANA_BMUTE_MASK); cs42l42->stream_use &= ~(1 << stream); - if (!cs42l42->stream_use) { + if(!cs42l42->stream_use) { /* * Switch to the internal oscillator. * SCLK must remain running until after this clock switch. @@ -1017,13 +959,12 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream) snd_soc_component_update_bits(component, CS42L42_PLL_CTL1, CS42L42_PLL_START_MASK, 1); - if (pll_ratio_table[cs42l42->pll_config].n > 1) { + if (cs42l42->pll_divout) { usleep_range(CS42L42_PLL_DIVOUT_TIME_US, CS42L42_PLL_DIVOUT_TIME_US * 2); - regval = pll_ratio_table[cs42l42->pll_config].pll_divout; snd_soc_component_update_bits(component, CS42L42_PLL_CTL3, CS42L42_PLL_DIVOUT_MASK, - regval << + cs42l42->pll_divout << CS42L42_PLL_DIVOUT_SHIFT); } @@ -1063,7 +1004,7 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream) #define CS42L42_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ SNDRV_PCM_FMTBIT_S24_LE |\ - SNDRV_PCM_FMTBIT_S32_LE) + SNDRV_PCM_FMTBIT_S32_LE ) static const struct snd_soc_dai_ops cs42l42_ops = { .startup = cs42l42_dai_startup, @@ -1079,14 +1020,14 @@ static struct snd_soc_dai_driver cs42l42_dai = { .stream_name = "Playback", .channels_min = 1, .channels_max = 2, - .rates = SNDRV_PCM_RATE_8000_96000, + .rates = SNDRV_PCM_RATE_8000_192000, .formats = CS42L42_FORMATS, }, .capture = { .stream_name = "Capture", .channels_min = 1, .channels_max = 2, - .rates = SNDRV_PCM_RATE_8000_96000, + .rates = SNDRV_PCM_RATE_8000_192000, .formats = CS42L42_FORMATS, }, .symmetric_rate = 1, @@ -1094,121 +1035,11 @@ static struct snd_soc_dai_driver cs42l42_dai = { .ops = &cs42l42_ops, }; -static void cs42l42_manual_hs_type_detect(struct cs42l42_private *cs42l42) -{ - unsigned int hs_det_status; - unsigned int hs_det_comp1; - unsigned int hs_det_comp2; - unsigned int hs_det_sw; - - /* Set hs detect to manual, active mode */ - regmap_update_bits(cs42l42->regmap, - CS42L42_HSDET_CTL2, - CS42L42_HSDET_CTRL_MASK | - CS42L42_HSDET_SET_MASK | - CS42L42_HSBIAS_REF_MASK | - CS42L42_HSDET_AUTO_TIME_MASK, - (1 << CS42L42_HSDET_CTRL_SHIFT) | - (0 << CS42L42_HSDET_SET_SHIFT) | - (0 << CS42L42_HSBIAS_REF_SHIFT) | - (0 << CS42L42_HSDET_AUTO_TIME_SHIFT)); - - /* Configure HS DET comparator reference levels. */ - regmap_update_bits(cs42l42->regmap, - CS42L42_HSDET_CTL1, - CS42L42_HSDET_COMP1_LVL_MASK | - CS42L42_HSDET_COMP2_LVL_MASK, - (CS42L42_HSDET_COMP1_LVL_VAL << CS42L42_HSDET_COMP1_LVL_SHIFT) | - (CS42L42_HSDET_COMP2_LVL_VAL << CS42L42_HSDET_COMP2_LVL_SHIFT)); - - /* Open the SW_HSB_HS3 switch and close SW_HSB_HS4 for a Type 1 headset. */ - regmap_write(cs42l42->regmap, CS42L42_HS_SWITCH_CTL, CS42L42_HSDET_SW_COMP1); - - msleep(100); - - regmap_read(cs42l42->regmap, CS42L42_HS_DET_STATUS, &hs_det_status); - - hs_det_comp1 = (hs_det_status & CS42L42_HSDET_COMP1_OUT_MASK) >> - CS42L42_HSDET_COMP1_OUT_SHIFT; - hs_det_comp2 = (hs_det_status & CS42L42_HSDET_COMP2_OUT_MASK) >> - CS42L42_HSDET_COMP2_OUT_SHIFT; - - /* Close the SW_HSB_HS3 switch for a Type 2 headset. */ - regmap_write(cs42l42->regmap, CS42L42_HS_SWITCH_CTL, CS42L42_HSDET_SW_COMP2); - - msleep(100); - - regmap_read(cs42l42->regmap, CS42L42_HS_DET_STATUS, &hs_det_status); - - hs_det_comp1 |= ((hs_det_status & CS42L42_HSDET_COMP1_OUT_MASK) >> - CS42L42_HSDET_COMP1_OUT_SHIFT) << 1; - hs_det_comp2 |= ((hs_det_status & CS42L42_HSDET_COMP2_OUT_MASK) >> - CS42L42_HSDET_COMP2_OUT_SHIFT) << 1; - - /* Use Comparator 1 with 1.25V Threshold. */ - switch (hs_det_comp1) { - case CS42L42_HSDET_COMP_TYPE1: - cs42l42->hs_type = CS42L42_PLUG_CTIA; - hs_det_sw = CS42L42_HSDET_SW_TYPE1; - break; - case CS42L42_HSDET_COMP_TYPE2: - cs42l42->hs_type = CS42L42_PLUG_OMTP; - hs_det_sw = CS42L42_HSDET_SW_TYPE2; - break; - default: - /* Fallback to Comparator 2 with 1.75V Threshold. */ - switch (hs_det_comp2) { - case CS42L42_HSDET_COMP_TYPE1: - cs42l42->hs_type = CS42L42_PLUG_CTIA; - hs_det_sw = CS42L42_HSDET_SW_TYPE1; - break; - case CS42L42_HSDET_COMP_TYPE2: - cs42l42->hs_type = CS42L42_PLUG_OMTP; - hs_det_sw = CS42L42_HSDET_SW_TYPE2; - break; - case CS42L42_HSDET_COMP_TYPE3: - cs42l42->hs_type = CS42L42_PLUG_HEADPHONE; - hs_det_sw = CS42L42_HSDET_SW_TYPE3; - break; - default: - cs42l42->hs_type = CS42L42_PLUG_INVALID; - hs_det_sw = CS42L42_HSDET_SW_TYPE4; - break; - } - } - - /* Set Switches */ - regmap_write(cs42l42->regmap, CS42L42_HS_SWITCH_CTL, hs_det_sw); - - /* Set HSDET mode to Manual—Disabled */ - regmap_update_bits(cs42l42->regmap, - CS42L42_HSDET_CTL2, - CS42L42_HSDET_CTRL_MASK | - CS42L42_HSDET_SET_MASK | - CS42L42_HSBIAS_REF_MASK | - CS42L42_HSDET_AUTO_TIME_MASK, - (0 << CS42L42_HSDET_CTRL_SHIFT) | - (0 << CS42L42_HSDET_SET_SHIFT) | - (0 << CS42L42_HSBIAS_REF_SHIFT) | - (0 << CS42L42_HSDET_AUTO_TIME_SHIFT)); - - /* Configure HS DET comparator reference levels. */ - regmap_update_bits(cs42l42->regmap, - CS42L42_HSDET_CTL1, - CS42L42_HSDET_COMP1_LVL_MASK | - CS42L42_HSDET_COMP2_LVL_MASK, - (CS42L42_HSDET_COMP1_LVL_DEFAULT << CS42L42_HSDET_COMP1_LVL_SHIFT) | - (CS42L42_HSDET_COMP2_LVL_DEFAULT << CS42L42_HSDET_COMP2_LVL_SHIFT)); -} - static void cs42l42_process_hs_type_detect(struct cs42l42_private *cs42l42) { unsigned int hs_det_status; unsigned int int_status; - /* Read and save the hs detection result */ - regmap_read(cs42l42->regmap, CS42L42_HS_DET_STATUS, &hs_det_status); - /* Mask the auto detect interrupt */ regmap_update_bits(cs42l42->regmap, CS42L42_CODEC_INT_MASK, @@ -1217,10 +1048,6 @@ static void cs42l42_process_hs_type_detect(struct cs42l42_private *cs42l42) (1 << CS42L42_PDN_DONE_SHIFT) | (1 << CS42L42_HSDET_AUTO_DONE_SHIFT)); - - cs42l42->hs_type = (hs_det_status & CS42L42_HSDET_TYPE_MASK) >> - CS42L42_HSDET_TYPE_SHIFT; - /* Set hs detect to automatic, disabled mode */ regmap_update_bits(cs42l42->regmap, CS42L42_HSDET_CTL2, @@ -1233,15 +1060,11 @@ static void cs42l42_process_hs_type_detect(struct cs42l42_private *cs42l42) (0 << CS42L42_HSBIAS_REF_SHIFT) | (3 << CS42L42_HSDET_AUTO_TIME_SHIFT)); - /* Run Manual detection if auto detect has not found a headset. - * We Re-Run with Manual Detection if the original detection was invalid or headphones, - * to ensure that a headset mic is detected in all cases. - */ - if (cs42l42->hs_type == CS42L42_PLUG_INVALID || - cs42l42->hs_type == CS42L42_PLUG_HEADPHONE) { - dev_dbg(cs42l42->dev, "Running Manual Detection Fallback\n"); - cs42l42_manual_hs_type_detect(cs42l42); - } + /* Read and save the hs detection result */ + regmap_read(cs42l42->regmap, CS42L42_HS_DET_STATUS, &hs_det_status); + + cs42l42->hs_type = (hs_det_status & CS42L42_HSDET_TYPE_MASK) >> + CS42L42_HSDET_TYPE_SHIFT; /* Set up button detection */ if ((cs42l42->hs_type == CS42L42_PLUG_CTIA) || @@ -1284,8 +1107,10 @@ static void cs42l42_process_hs_type_detect(struct cs42l42_private *cs42l42) /* Turn on level detect circuitry */ regmap_update_bits(cs42l42->regmap, CS42L42_MISC_DET_CTL, + CS42L42_DETECT_MODE_MASK | CS42L42_HSBIAS_CTL_MASK | CS42L42_PDN_MIC_LVL_DET_MASK, + (0 << CS42L42_DETECT_MODE_SHIFT) | (3 << CS42L42_HSBIAS_CTL_SHIFT) | (0 << CS42L42_PDN_MIC_LVL_DET_SHIFT)); @@ -1312,8 +1137,10 @@ static void cs42l42_process_hs_type_detect(struct cs42l42_private *cs42l42) /* Make sure button detect and HS bias circuits are off */ regmap_update_bits(cs42l42->regmap, CS42L42_MISC_DET_CTL, + CS42L42_DETECT_MODE_MASK | CS42L42_HSBIAS_CTL_MASK | CS42L42_PDN_MIC_LVL_DET_MASK, + (0 << CS42L42_DETECT_MODE_SHIFT) | (1 << CS42L42_HSBIAS_CTL_SHIFT) | (1 << CS42L42_PDN_MIC_LVL_DET_SHIFT)); } @@ -1334,8 +1161,12 @@ static void cs42l42_process_hs_type_detect(struct cs42l42_private *cs42l42) /* Unmask tip sense interrupts */ regmap_update_bits(cs42l42->regmap, CS42L42_TSRS_PLUG_INT_MASK, + CS42L42_RS_PLUG_MASK | + CS42L42_RS_UNPLUG_MASK | CS42L42_TS_PLUG_MASK | CS42L42_TS_UNPLUG_MASK, + (1 << CS42L42_RS_PLUG_SHIFT) | + (1 << CS42L42_RS_UNPLUG_SHIFT) | (0 << CS42L42_TS_PLUG_SHIFT) | (0 << CS42L42_TS_UNPLUG_SHIFT)); } @@ -1345,16 +1176,22 @@ static void cs42l42_init_hs_type_detect(struct cs42l42_private *cs42l42) /* Mask tip sense interrupts */ regmap_update_bits(cs42l42->regmap, CS42L42_TSRS_PLUG_INT_MASK, + CS42L42_RS_PLUG_MASK | + CS42L42_RS_UNPLUG_MASK | CS42L42_TS_PLUG_MASK | CS42L42_TS_UNPLUG_MASK, + (1 << CS42L42_RS_PLUG_SHIFT) | + (1 << CS42L42_RS_UNPLUG_SHIFT) | (1 << CS42L42_TS_PLUG_SHIFT) | (1 << CS42L42_TS_UNPLUG_SHIFT)); /* Make sure button detect and HS bias circuits are off */ regmap_update_bits(cs42l42->regmap, CS42L42_MISC_DET_CTL, + CS42L42_DETECT_MODE_MASK | CS42L42_HSBIAS_CTL_MASK | CS42L42_PDN_MIC_LVL_DET_MASK, + (0 << CS42L42_DETECT_MODE_SHIFT) | (1 << CS42L42_HSBIAS_CTL_SHIFT) | (1 << CS42L42_PDN_MIC_LVL_DET_SHIFT)); @@ -1398,8 +1235,10 @@ static void cs42l42_init_hs_type_detect(struct cs42l42_private *cs42l42) /* Power up HS bias to 2.7V */ regmap_update_bits(cs42l42->regmap, CS42L42_MISC_DET_CTL, + CS42L42_DETECT_MODE_MASK | CS42L42_HSBIAS_CTL_MASK | CS42L42_PDN_MIC_LVL_DET_MASK, + (0 << CS42L42_DETECT_MODE_SHIFT) | (3 << CS42L42_HSBIAS_CTL_SHIFT) | (1 << CS42L42_PDN_MIC_LVL_DET_SHIFT)); @@ -1446,8 +1285,10 @@ static void cs42l42_cancel_hs_type_detect(struct cs42l42_private *cs42l42) /* Ground HS bias */ regmap_update_bits(cs42l42->regmap, CS42L42_MISC_DET_CTL, + CS42L42_DETECT_MODE_MASK | CS42L42_HSBIAS_CTL_MASK | CS42L42_PDN_MIC_LVL_DET_MASK, + (0 << CS42L42_DETECT_MODE_SHIFT) | (1 << CS42L42_HSBIAS_CTL_SHIFT) | (1 << CS42L42_PDN_MIC_LVL_DET_SHIFT)); @@ -1520,19 +1361,19 @@ static int cs42l42_handle_button_press(struct cs42l42_private *cs42l42) switch (bias_level) { case 1: /* Function C button press */ bias_level = SND_JACK_BTN_2; - dev_dbg(cs42l42->dev, "Function C button press\n"); + dev_dbg(cs42l42->component->dev, "Function C button press\n"); break; case 2: /* Function B button press */ bias_level = SND_JACK_BTN_1; - dev_dbg(cs42l42->dev, "Function B button press\n"); + dev_dbg(cs42l42->component->dev, "Function B button press\n"); break; case 3: /* Function D button press */ bias_level = SND_JACK_BTN_3; - dev_dbg(cs42l42->dev, "Function D button press\n"); + dev_dbg(cs42l42->component->dev, "Function D button press\n"); break; case 4: /* Function A button press */ bias_level = SND_JACK_BTN_0; - dev_dbg(cs42l42->dev, "Function A button press\n"); + dev_dbg(cs42l42->component->dev, "Function A button press\n"); break; default: bias_level = 0; @@ -1606,6 +1447,7 @@ static const struct cs42l42_irq_params irq_params_table[] = { static irqreturn_t cs42l42_irq_thread(int irq, void *data) { struct cs42l42_private *cs42l42 = (struct cs42l42_private *)data; + struct snd_soc_component *component = cs42l42->component; unsigned int stickies[12]; unsigned int masks[12]; unsigned int current_plug_status; @@ -1635,13 +1477,11 @@ static irqreturn_t cs42l42_irq_thread(int irq, void *data) CS42L42_M_DETECT_FT_MASK | CS42L42_M_HSBIAS_HIZ_MASK); - mutex_lock(&cs42l42->jack_detect_mutex); - /* Check auto-detect status */ if ((~masks[5]) & irq_params_table[5].mask) { if (stickies[5] & CS42L42_HSDET_AUTO_DONE_MASK) { cs42l42_process_hs_type_detect(cs42l42); - switch (cs42l42->hs_type) { + switch(cs42l42->hs_type){ case CS42L42_PLUG_CTIA: case CS42L42_PLUG_OMTP: snd_soc_jack_report(cs42l42->jack, SND_JACK_HEADSET, @@ -1654,7 +1494,7 @@ static irqreturn_t cs42l42_irq_thread(int irq, void *data) default: break; } - dev_dbg(cs42l42->dev, "Auto detect done (%d)\n", cs42l42->hs_type); + dev_dbg(component->dev, "Auto detect done (%d)\n", cs42l42->hs_type); } } @@ -1673,12 +1513,22 @@ static irqreturn_t cs42l42_irq_thread(int irq, void *data) cs42l42->plug_state = CS42L42_TS_UNPLUG; cs42l42_cancel_hs_type_detect(cs42l42); + switch(cs42l42->hs_type){ + case CS42L42_PLUG_CTIA: + case CS42L42_PLUG_OMTP: + snd_soc_jack_report(cs42l42->jack, 0, SND_JACK_HEADSET); + break; + case CS42L42_PLUG_HEADPHONE: + snd_soc_jack_report(cs42l42->jack, 0, SND_JACK_HEADPHONE); + break; + default: + break; + } snd_soc_jack_report(cs42l42->jack, 0, - SND_JACK_HEADSET | SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | SND_JACK_BTN_3); - dev_dbg(cs42l42->dev, "Unplug event\n"); + dev_dbg(component->dev, "Unplug event\n"); } break; @@ -1694,7 +1544,7 @@ static irqreturn_t cs42l42_irq_thread(int irq, void *data) CS42L42_M_HSBIAS_HIZ_MASK)) { if (current_button_status & CS42L42_M_DETECT_TF_MASK) { - dev_dbg(cs42l42->dev, "Button released\n"); + dev_dbg(component->dev, "Button released\n"); report = 0; } else if (current_button_status & CS42L42_M_DETECT_FT_MASK) { report = cs42l42_handle_button_press(cs42l42); @@ -1705,8 +1555,6 @@ static irqreturn_t cs42l42_irq_thread(int irq, void *data) } } - mutex_unlock(&cs42l42->jack_detect_mutex); - return IRQ_HANDLED; } @@ -1809,8 +1657,8 @@ static void cs42l42_set_interrupt_masks(struct cs42l42_private *cs42l42) CS42L42_TS_UNPLUG_MASK, (1 << CS42L42_RS_PLUG_SHIFT) | (1 << CS42L42_RS_UNPLUG_SHIFT) | - (0 << CS42L42_TS_PLUG_SHIFT) | - (0 << CS42L42_TS_UNPLUG_SHIFT)); + (1 << CS42L42_TS_PLUG_SHIFT) | + (1 << CS42L42_TS_UNPLUG_SHIFT)); } static void cs42l42_setup_hs_type_detect(struct cs42l42_private *cs42l42) @@ -1819,9 +1667,6 @@ static void cs42l42_setup_hs_type_detect(struct cs42l42_private *cs42l42) cs42l42->hs_type = CS42L42_PLUG_INVALID; - regmap_update_bits(cs42l42->regmap, CS42L42_MISC_DET_CTL, - CS42L42_DETECT_MODE_MASK, 0); - /* Latch analog controls to VP power domain */ regmap_update_bits(cs42l42->regmap, CS42L42_MIC_DET_CTL1, CS42L42_LATCH_TO_VP_MASK | @@ -2052,9 +1897,7 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, if (!cs42l42) return -ENOMEM; - cs42l42->dev = &i2c_client->dev; i2c_set_clientdata(i2c_client, cs42l42); - mutex_init(&cs42l42->jack_detect_mutex); cs42l42->regmap = devm_regmap_init_i2c(i2c_client, &cs42l42_regmap); if (IS_ERR(cs42l42->regmap)) { @@ -2088,7 +1931,7 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, "reset", GPIOD_OUT_LOW); if (IS_ERR(cs42l42->reset_gpio)) { ret = PTR_ERR(cs42l42->reset_gpio); - goto err_disable_noreset; + goto err_disable; } if (cs42l42->reset_gpio) { @@ -2097,20 +1940,17 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, } usleep_range(CS42L42_BOOT_TIME_US, CS42L42_BOOT_TIME_US * 2); - /* Request IRQ if one was specified */ - if (i2c_client->irq) { - ret = request_threaded_irq(i2c_client->irq, - NULL, cs42l42_irq_thread, - IRQF_ONESHOT | IRQF_TRIGGER_LOW, - "cs42l42", cs42l42); - if (ret == -EPROBE_DEFER) { - goto err_disable_noirq; - } else if (ret != 0) { - dev_err(&i2c_client->dev, - "Failed to request IRQ: %d\n", ret); - goto err_disable_noirq; - } - } + /* Request IRQ */ + ret = devm_request_threaded_irq(&i2c_client->dev, + i2c_client->irq, + NULL, cs42l42_irq_thread, + IRQF_ONESHOT | IRQF_TRIGGER_LOW, + "cs42l42", cs42l42); + if (ret == -EPROBE_DEFER) + goto err_disable; + else if (ret != 0) + dev_err(&i2c_client->dev, + "Failed to request IRQ: %d\n", ret); /* initialize codec */ devid = cirrus_read_device_id(cs42l42->regmap, CS42L42_DEVID_AB); @@ -2131,7 +1971,7 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, ret = regmap_read(cs42l42->regmap, CS42L42_REVID, ®); if (ret < 0) { dev_err(&i2c_client->dev, "Get Revision ID failed\n"); - goto err_shutdown; + goto err_disable; } dev_info(&i2c_client->dev, @@ -2156,7 +1996,7 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, ret = cs42l42_handle_device_data(&i2c_client->dev, cs42l42); if (ret != 0) - goto err_shutdown; + goto err_disable; /* Setup headset detection */ cs42l42_setup_hs_type_detect(cs42l42); @@ -2168,22 +2008,10 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, ret = devm_snd_soc_register_component(&i2c_client->dev, &soc_component_dev_cs42l42, &cs42l42_dai, 1); if (ret < 0) - goto err_shutdown; - + goto err_disable; return 0; -err_shutdown: - regmap_write(cs42l42->regmap, CS42L42_CODEC_INT_MASK, 0xff); - regmap_write(cs42l42->regmap, CS42L42_TSRS_PLUG_INT_MASK, 0xff); - regmap_write(cs42l42->regmap, CS42L42_PWR_CTL1, 0xff); - err_disable: - if (i2c_client->irq) - free_irq(i2c_client->irq, cs42l42); - -err_disable_noirq: - gpiod_set_value_cansleep(cs42l42->reset_gpio, 0); -err_disable_noreset: regulator_bulk_disable(ARRAY_SIZE(cs42l42->supplies), cs42l42->supplies); return ret; @@ -2193,23 +2021,60 @@ static int cs42l42_i2c_remove(struct i2c_client *i2c_client) { struct cs42l42_private *cs42l42 = i2c_get_clientdata(i2c_client); - if (i2c_client->irq) - free_irq(i2c_client->irq, cs42l42); - - /* - * The driver might not have control of reset and power supplies, - * so ensure that the chip internals are powered down. - */ - regmap_write(cs42l42->regmap, CS42L42_CODEC_INT_MASK, 0xff); - regmap_write(cs42l42->regmap, CS42L42_TSRS_PLUG_INT_MASK, 0xff); - regmap_write(cs42l42->regmap, CS42L42_PWR_CTL1, 0xff); - - gpiod_set_value_cansleep(cs42l42->reset_gpio, 0); - regulator_bulk_disable(ARRAY_SIZE(cs42l42->supplies), cs42l42->supplies); + devm_free_irq(&i2c_client->dev, i2c_client->irq, cs42l42); + pm_runtime_suspend(&i2c_client->dev); + pm_runtime_disable(&i2c_client->dev); return 0; } +#ifdef CONFIG_PM +static int cs42l42_runtime_suspend(struct device *dev) +{ + struct cs42l42_private *cs42l42 = dev_get_drvdata(dev); + + regcache_cache_only(cs42l42->regmap, true); + regcache_mark_dirty(cs42l42->regmap); + + /* Hold down reset */ + gpiod_set_value_cansleep(cs42l42->reset_gpio, 0); + + /* remove power */ + regulator_bulk_disable(ARRAY_SIZE(cs42l42->supplies), + cs42l42->supplies); + + return 0; +} + +static int cs42l42_runtime_resume(struct device *dev) +{ + struct cs42l42_private *cs42l42 = dev_get_drvdata(dev); + int ret; + + /* Enable power */ + ret = regulator_bulk_enable(ARRAY_SIZE(cs42l42->supplies), + cs42l42->supplies); + if (ret != 0) { + dev_err(dev, "Failed to enable supplies: %d\n", + ret); + return ret; + } + + gpiod_set_value_cansleep(cs42l42->reset_gpio, 1); + usleep_range(CS42L42_BOOT_TIME_US, CS42L42_BOOT_TIME_US * 2); + + regcache_cache_only(cs42l42->regmap, false); + regcache_sync(cs42l42->regmap); + + return 0; +} +#endif + +static const struct dev_pm_ops cs42l42_runtime_pm = { + SET_RUNTIME_PM_OPS(cs42l42_runtime_suspend, cs42l42_runtime_resume, + NULL) +}; + #ifdef CONFIG_OF static const struct of_device_id cs42l42_of_match[] = { { .compatible = "cirrus,cs42l42", }, @@ -2236,6 +2101,7 @@ MODULE_DEVICE_TABLE(i2c, cs42l42_id); static struct i2c_driver cs42l42_i2c_driver = { .driver = { .name = "cs42l42", + .pm = &cs42l42_runtime_pm, .of_match_table = of_match_ptr(cs42l42_of_match), .acpi_match_table = ACPI_PTR(cs42l42_acpi_match), }, diff --git a/sound/soc/codecs/cs42l42.h b/sound/soc/codecs/cs42l42.h index 9fff183dce..8734f6828f 100644 --- a/sound/soc/codecs/cs42l42.h +++ b/sound/soc/codecs/cs42l42.h @@ -12,7 +12,6 @@ #ifndef __CS42L42_H__ #define __CS42L42_H__ -#include #include #define CS42L42_PAGE_REGISTER 0x00 /* Page Select Register */ @@ -63,9 +62,6 @@ #define CS42L42_INTERNAL_FS_MASK (1 << CS42L42_INTERNAL_FS_SHIFT) #define CS42L42_SFTRAMP_RATE (CS42L42_PAGE_10 + 0x0A) -#define CS42L42_SLOW_START_ENABLE (CS42L42_PAGE_10 + 0x0B) -#define CS42L42_SLOW_START_EN_MASK GENMASK(6, 4) -#define CS42L42_SLOW_START_EN_SHIFT 4 #define CS42L42_I2C_DEBOUNCE (CS42L42_PAGE_10 + 0x0E) #define CS42L42_I2C_STRETCH (CS42L42_PAGE_10 + 0x0F) #define CS42L42_I2C_TIMEOUT (CS42L42_PAGE_10 + 0x10) @@ -192,11 +188,6 @@ #define CS42L42_HSDET_COMP2_LVL_SHIFT 4 #define CS42L42_HSDET_COMP2_LVL_MASK (15 << CS42L42_HSDET_COMP2_LVL_SHIFT) -#define CS42L42_HSDET_COMP1_LVL_VAL 12 /* 1.25V Comparator */ -#define CS42L42_HSDET_COMP2_LVL_VAL 2 /* 1.75V Comparator */ -#define CS42L42_HSDET_COMP1_LVL_DEFAULT 7 /* 1V Comparator */ -#define CS42L42_HSDET_COMP2_LVL_DEFAULT 7 /* 2V Comparator */ - #define CS42L42_HSDET_CTL2 (CS42L42_PAGE_11 + 0x20) #define CS42L42_HSDET_AUTO_TIME_SHIFT 0 #define CS42L42_HSDET_AUTO_TIME_MASK (3 << CS42L42_HSDET_AUTO_TIME_SHIFT) @@ -237,60 +228,6 @@ #define CS42L42_PLUG_HEADPHONE 2 #define CS42L42_PLUG_INVALID 3 -#define CS42L42_HSDET_SW_COMP1 ((0 << CS42L42_SW_GNDHS_HS4_SHIFT) | \ - (1 << CS42L42_SW_GNDHS_HS3_SHIFT) | \ - (1 << CS42L42_SW_HSB_HS4_SHIFT) | \ - (0 << CS42L42_SW_HSB_HS3_SHIFT) | \ - (0 << CS42L42_SW_HSB_FILT_HS4_SHIFT) | \ - (1 << CS42L42_SW_HSB_FILT_HS3_SHIFT) | \ - (0 << CS42L42_SW_REF_HS4_SHIFT) | \ - (1 << CS42L42_SW_REF_HS3_SHIFT)) -#define CS42L42_HSDET_SW_COMP2 ((1 << CS42L42_SW_GNDHS_HS4_SHIFT) | \ - (0 << CS42L42_SW_GNDHS_HS3_SHIFT) | \ - (0 << CS42L42_SW_HSB_HS4_SHIFT) | \ - (1 << CS42L42_SW_HSB_HS3_SHIFT) | \ - (1 << CS42L42_SW_HSB_FILT_HS4_SHIFT) | \ - (0 << CS42L42_SW_HSB_FILT_HS3_SHIFT) | \ - (1 << CS42L42_SW_REF_HS4_SHIFT) | \ - (0 << CS42L42_SW_REF_HS3_SHIFT)) -#define CS42L42_HSDET_SW_TYPE1 ((0 << CS42L42_SW_GNDHS_HS4_SHIFT) | \ - (1 << CS42L42_SW_GNDHS_HS3_SHIFT) | \ - (1 << CS42L42_SW_HSB_HS4_SHIFT) | \ - (0 << CS42L42_SW_HSB_HS3_SHIFT) | \ - (0 << CS42L42_SW_HSB_FILT_HS4_SHIFT) | \ - (1 << CS42L42_SW_HSB_FILT_HS3_SHIFT) | \ - (0 << CS42L42_SW_REF_HS4_SHIFT) | \ - (1 << CS42L42_SW_REF_HS3_SHIFT)) -#define CS42L42_HSDET_SW_TYPE2 ((1 << CS42L42_SW_GNDHS_HS4_SHIFT) | \ - (0 << CS42L42_SW_GNDHS_HS3_SHIFT) | \ - (0 << CS42L42_SW_HSB_HS4_SHIFT) | \ - (1 << CS42L42_SW_HSB_HS3_SHIFT) | \ - (1 << CS42L42_SW_HSB_FILT_HS4_SHIFT) | \ - (0 << CS42L42_SW_HSB_FILT_HS3_SHIFT) | \ - (1 << CS42L42_SW_REF_HS4_SHIFT) | \ - (0 << CS42L42_SW_REF_HS3_SHIFT)) -#define CS42L42_HSDET_SW_TYPE3 ((1 << CS42L42_SW_GNDHS_HS4_SHIFT) | \ - (1 << CS42L42_SW_GNDHS_HS3_SHIFT) | \ - (0 << CS42L42_SW_HSB_HS4_SHIFT) | \ - (0 << CS42L42_SW_HSB_HS3_SHIFT) | \ - (1 << CS42L42_SW_HSB_FILT_HS4_SHIFT) | \ - (1 << CS42L42_SW_HSB_FILT_HS3_SHIFT) | \ - (1 << CS42L42_SW_REF_HS4_SHIFT) | \ - (1 << CS42L42_SW_REF_HS3_SHIFT)) -#define CS42L42_HSDET_SW_TYPE4 ((0 << CS42L42_SW_GNDHS_HS4_SHIFT) | \ - (1 << CS42L42_SW_GNDHS_HS3_SHIFT) | \ - (1 << CS42L42_SW_HSB_HS4_SHIFT) | \ - (0 << CS42L42_SW_HSB_HS3_SHIFT) | \ - (0 << CS42L42_SW_HSB_FILT_HS4_SHIFT) | \ - (1 << CS42L42_SW_HSB_FILT_HS3_SHIFT) | \ - (0 << CS42L42_SW_REF_HS4_SHIFT) | \ - (1 << CS42L42_SW_REF_HS3_SHIFT)) - -#define CS42L42_HSDET_COMP_TYPE1 1 -#define CS42L42_HSDET_COMP_TYPE2 2 -#define CS42L42_HSDET_COMP_TYPE3 0 -#define CS42L42_HSDET_COMP_TYPE4 3 - #define CS42L42_HS_CLAMP_DISABLE (CS42L42_PAGE_11 + 0x29) #define CS42L42_HS_CLAMP_DISABLE_SHIFT 0 #define CS42L42_HS_CLAMP_DISABLE_MASK (1 << CS42L42_HS_CLAMP_DISABLE_SHIFT) @@ -351,7 +288,6 @@ #define CS42L42_IN_ASRC_CLK (CS42L42_PAGE_12 + 0x0A) #define CS42L42_CLK_IASRC_SEL_SHIFT 0 #define CS42L42_CLK_IASRC_SEL_MASK (1 << CS42L42_CLK_IASRC_SEL_SHIFT) -#define CS42L42_CLK_IASRC_SEL_6 0 #define CS42L42_CLK_IASRC_SEL_12 1 #define CS42L42_OUT_ASRC_CLK (CS42L42_PAGE_12 + 0x0B) @@ -825,7 +761,6 @@ #define CS42L42_CLOCK_SWITCH_DELAY_US 150 #define CS42L42_PLL_LOCK_POLL_US 250 #define CS42L42_PLL_LOCK_TIMEOUT_US 1250 -#define CS42L42_HP_ADC_EN_TIME_US 20000 static const char *const cs42l42_supply_names[CS42L42_NUM_SUPPLIES] = { "VA", @@ -837,16 +772,16 @@ static const char *const cs42l42_supply_names[CS42L42_NUM_SUPPLIES] = { struct cs42l42_private { struct regmap *regmap; - struct device *dev; + struct snd_soc_component *component; struct regulator_bulk_data supplies[CS42L42_NUM_SUPPLIES]; struct gpio_desc *reset_gpio; struct completion pdn_done; struct snd_soc_jack *jack; - struct mutex jack_detect_mutex; int pll_config; int bclk; u32 sclk; u32 srate; + u8 pll_divout; u8 plug_state; u8 hs_type; u8 ts_inv; @@ -859,7 +794,6 @@ struct cs42l42_private { u8 hs_bias_ramp_time; u8 hs_bias_sense_en; u8 stream_use; - bool hp_adc_up_pending; }; #endif /* __CS42L42_H__ */ diff --git a/sound/soc/codecs/cs47l15.c b/sound/soc/codecs/cs47l15.c index 391fd7da33..1ee83160b8 100644 --- a/sound/soc/codecs/cs47l15.c +++ b/sound/soc/codecs/cs47l15.c @@ -45,7 +45,7 @@ struct cs47l15 { bool in1_lp_mode; }; -static const struct cs_dsp_region cs47l15_dsp1_regions[] = { +static const struct wm_adsp_region cs47l15_dsp1_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x080000 }, { .type = WMFW_ADSP2_ZM, .base = 0x0e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x0a0000 }, @@ -1402,18 +1402,18 @@ static int cs47l15_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "Failed to set DSP IRQ wake: %d\n", ret); cs47l15->core.adsp[0].part = "cs47l15"; - cs47l15->core.adsp[0].cs_dsp.num = 1; - cs47l15->core.adsp[0].cs_dsp.type = WMFW_ADSP2; - cs47l15->core.adsp[0].cs_dsp.rev = 2; - cs47l15->core.adsp[0].cs_dsp.dev = madera->dev; - cs47l15->core.adsp[0].cs_dsp.regmap = madera->regmap_32bit; + cs47l15->core.adsp[0].num = 1; + cs47l15->core.adsp[0].type = WMFW_ADSP2; + cs47l15->core.adsp[0].rev = 2; + cs47l15->core.adsp[0].dev = madera->dev; + cs47l15->core.adsp[0].regmap = madera->regmap_32bit; - cs47l15->core.adsp[0].cs_dsp.base = MADERA_DSP1_CONFIG_1; - cs47l15->core.adsp[0].cs_dsp.mem = cs47l15_dsp1_regions; - cs47l15->core.adsp[0].cs_dsp.num_mems = ARRAY_SIZE(cs47l15_dsp1_regions); + cs47l15->core.adsp[0].base = MADERA_DSP1_CONFIG_1; + cs47l15->core.adsp[0].mem = cs47l15_dsp1_regions; + cs47l15->core.adsp[0].num_mems = ARRAY_SIZE(cs47l15_dsp1_regions); - cs47l15->core.adsp[0].cs_dsp.lock_regions = - CS_ADSP2_REGION_1 | CS_ADSP2_REGION_2 | CS_ADSP2_REGION_3; + cs47l15->core.adsp[0].lock_regions = + WM_ADSP2_REGION_1 | WM_ADSP2_REGION_2 | WM_ADSP2_REGION_3; ret = wm_adsp2_init(&cs47l15->core.adsp[0]); if (ret != 0) diff --git a/sound/soc/codecs/cs47l24.c b/sound/soc/codecs/cs47l24.c index 6356f81aaf..6b6d088160 100644 --- a/sound/soc/codecs/cs47l24.c +++ b/sound/soc/codecs/cs47l24.c @@ -37,21 +37,21 @@ struct cs47l24_priv { struct arizona_fll fll[2]; }; -static const struct cs_dsp_region cs47l24_dsp2_regions[] = { +static const struct wm_adsp_region cs47l24_dsp2_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x200000 }, { .type = WMFW_ADSP2_ZM, .base = 0x280000 }, { .type = WMFW_ADSP2_XM, .base = 0x290000 }, { .type = WMFW_ADSP2_YM, .base = 0x2a8000 }, }; -static const struct cs_dsp_region cs47l24_dsp3_regions[] = { +static const struct wm_adsp_region cs47l24_dsp3_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x300000 }, { .type = WMFW_ADSP2_ZM, .base = 0x380000 }, { .type = WMFW_ADSP2_XM, .base = 0x390000 }, { .type = WMFW_ADSP2_YM, .base = 0x3a8000 }, }; -static const struct cs_dsp_region *cs47l24_dsp_regions[] = { +static const struct wm_adsp_region *cs47l24_dsp_regions[] = { cs47l24_dsp2_regions, cs47l24_dsp3_regions, }; @@ -1234,15 +1234,15 @@ static int cs47l24_probe(struct platform_device *pdev) for (i = 1; i <= 2; i++) { cs47l24->core.adsp[i].part = "cs47l24"; - cs47l24->core.adsp[i].cs_dsp.num = i + 1; - cs47l24->core.adsp[i].cs_dsp.type = WMFW_ADSP2; - cs47l24->core.adsp[i].cs_dsp.dev = arizona->dev; - cs47l24->core.adsp[i].cs_dsp.regmap = arizona->regmap; + cs47l24->core.adsp[i].num = i + 1; + cs47l24->core.adsp[i].type = WMFW_ADSP2; + cs47l24->core.adsp[i].dev = arizona->dev; + cs47l24->core.adsp[i].regmap = arizona->regmap; - cs47l24->core.adsp[i].cs_dsp.base = ARIZONA_DSP1_CONTROL_1 + + cs47l24->core.adsp[i].base = ARIZONA_DSP1_CONTROL_1 + (0x100 * i); - cs47l24->core.adsp[i].cs_dsp.mem = cs47l24_dsp_regions[i - 1]; - cs47l24->core.adsp[i].cs_dsp.num_mems = + cs47l24->core.adsp[i].mem = cs47l24_dsp_regions[i - 1]; + cs47l24->core.adsp[i].num_mems = ARRAY_SIZE(cs47l24_dsp2_regions); ret = wm_adsp2_init(&cs47l24->core.adsp[i]); diff --git a/sound/soc/codecs/cs47l35.c b/sound/soc/codecs/cs47l35.c index db2f844b8b..3f04a2a745 100644 --- a/sound/soc/codecs/cs47l35.c +++ b/sound/soc/codecs/cs47l35.c @@ -37,28 +37,28 @@ struct cs47l35 { struct madera_fll fll; }; -static const struct cs_dsp_region cs47l35_dsp1_regions[] = { +static const struct wm_adsp_region cs47l35_dsp1_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x080000 }, { .type = WMFW_ADSP2_ZM, .base = 0x0e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x0a0000 }, { .type = WMFW_ADSP2_YM, .base = 0x0c0000 }, }; -static const struct cs_dsp_region cs47l35_dsp2_regions[] = { +static const struct wm_adsp_region cs47l35_dsp2_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x100000 }, { .type = WMFW_ADSP2_ZM, .base = 0x160000 }, { .type = WMFW_ADSP2_XM, .base = 0x120000 }, { .type = WMFW_ADSP2_YM, .base = 0x140000 }, }; -static const struct cs_dsp_region cs47l35_dsp3_regions[] = { +static const struct wm_adsp_region cs47l35_dsp3_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x180000 }, { .type = WMFW_ADSP2_ZM, .base = 0x1e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x1a0000 }, { .type = WMFW_ADSP2_YM, .base = 0x1c0000 }, }; -static const struct cs_dsp_region *cs47l35_dsp_regions[] = { +static const struct wm_adsp_region *cs47l35_dsp_regions[] = { cs47l35_dsp1_regions, cs47l35_dsp2_regions, cs47l35_dsp3_regions, @@ -1686,15 +1686,15 @@ static int cs47l35_probe(struct platform_device *pdev) for (i = 0; i < CS47L35_NUM_ADSP; i++) { cs47l35->core.adsp[i].part = "cs47l35"; - cs47l35->core.adsp[i].cs_dsp.num = i + 1; - cs47l35->core.adsp[i].cs_dsp.type = WMFW_ADSP2; - cs47l35->core.adsp[i].cs_dsp.rev = 1; - cs47l35->core.adsp[i].cs_dsp.dev = madera->dev; - cs47l35->core.adsp[i].cs_dsp.regmap = madera->regmap_32bit; + cs47l35->core.adsp[i].num = i + 1; + cs47l35->core.adsp[i].type = WMFW_ADSP2; + cs47l35->core.adsp[i].rev = 1; + cs47l35->core.adsp[i].dev = madera->dev; + cs47l35->core.adsp[i].regmap = madera->regmap_32bit; - cs47l35->core.adsp[i].cs_dsp.base = wm_adsp2_control_bases[i]; - cs47l35->core.adsp[i].cs_dsp.mem = cs47l35_dsp_regions[i]; - cs47l35->core.adsp[i].cs_dsp.num_mems = + cs47l35->core.adsp[i].base = wm_adsp2_control_bases[i]; + cs47l35->core.adsp[i].mem = cs47l35_dsp_regions[i]; + cs47l35->core.adsp[i].num_mems = ARRAY_SIZE(cs47l35_dsp1_regions); ret = wm_adsp2_init(&cs47l35->core.adsp[i]); diff --git a/sound/soc/codecs/cs47l85.c b/sound/soc/codecs/cs47l85.c index d4fedc5ad5..748a180870 100644 --- a/sound/soc/codecs/cs47l85.c +++ b/sound/soc/codecs/cs47l85.c @@ -37,56 +37,56 @@ struct cs47l85 { struct madera_fll fll[3]; }; -static const struct cs_dsp_region cs47l85_dsp1_regions[] = { +static const struct wm_adsp_region cs47l85_dsp1_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x080000 }, { .type = WMFW_ADSP2_ZM, .base = 0x0e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x0a0000 }, { .type = WMFW_ADSP2_YM, .base = 0x0c0000 }, }; -static const struct cs_dsp_region cs47l85_dsp2_regions[] = { +static const struct wm_adsp_region cs47l85_dsp2_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x100000 }, { .type = WMFW_ADSP2_ZM, .base = 0x160000 }, { .type = WMFW_ADSP2_XM, .base = 0x120000 }, { .type = WMFW_ADSP2_YM, .base = 0x140000 }, }; -static const struct cs_dsp_region cs47l85_dsp3_regions[] = { +static const struct wm_adsp_region cs47l85_dsp3_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x180000 }, { .type = WMFW_ADSP2_ZM, .base = 0x1e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x1a0000 }, { .type = WMFW_ADSP2_YM, .base = 0x1c0000 }, }; -static const struct cs_dsp_region cs47l85_dsp4_regions[] = { +static const struct wm_adsp_region cs47l85_dsp4_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x200000 }, { .type = WMFW_ADSP2_ZM, .base = 0x260000 }, { .type = WMFW_ADSP2_XM, .base = 0x220000 }, { .type = WMFW_ADSP2_YM, .base = 0x240000 }, }; -static const struct cs_dsp_region cs47l85_dsp5_regions[] = { +static const struct wm_adsp_region cs47l85_dsp5_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x280000 }, { .type = WMFW_ADSP2_ZM, .base = 0x2e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x2a0000 }, { .type = WMFW_ADSP2_YM, .base = 0x2c0000 }, }; -static const struct cs_dsp_region cs47l85_dsp6_regions[] = { +static const struct wm_adsp_region cs47l85_dsp6_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x300000 }, { .type = WMFW_ADSP2_ZM, .base = 0x360000 }, { .type = WMFW_ADSP2_XM, .base = 0x320000 }, { .type = WMFW_ADSP2_YM, .base = 0x340000 }, }; -static const struct cs_dsp_region cs47l85_dsp7_regions[] = { +static const struct wm_adsp_region cs47l85_dsp7_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x380000 }, { .type = WMFW_ADSP2_ZM, .base = 0x3e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x3a0000 }, { .type = WMFW_ADSP2_YM, .base = 0x3c0000 }, }; -static const struct cs_dsp_region *cs47l85_dsp_regions[] = { +static const struct wm_adsp_region *cs47l85_dsp_regions[] = { cs47l85_dsp1_regions, cs47l85_dsp2_regions, cs47l85_dsp3_regions, @@ -2632,15 +2632,15 @@ static int cs47l85_probe(struct platform_device *pdev) for (i = 0; i < CS47L85_NUM_ADSP; i++) { cs47l85->core.adsp[i].part = "cs47l85"; - cs47l85->core.adsp[i].cs_dsp.num = i + 1; - cs47l85->core.adsp[i].cs_dsp.type = WMFW_ADSP2; - cs47l85->core.adsp[i].cs_dsp.rev = 1; - cs47l85->core.adsp[i].cs_dsp.dev = madera->dev; - cs47l85->core.adsp[i].cs_dsp.regmap = madera->regmap_32bit; + cs47l85->core.adsp[i].num = i + 1; + cs47l85->core.adsp[i].type = WMFW_ADSP2; + cs47l85->core.adsp[i].rev = 1; + cs47l85->core.adsp[i].dev = madera->dev; + cs47l85->core.adsp[i].regmap = madera->regmap_32bit; - cs47l85->core.adsp[i].cs_dsp.base = wm_adsp2_control_bases[i]; - cs47l85->core.adsp[i].cs_dsp.mem = cs47l85_dsp_regions[i]; - cs47l85->core.adsp[i].cs_dsp.num_mems = + cs47l85->core.adsp[i].base = wm_adsp2_control_bases[i]; + cs47l85->core.adsp[i].mem = cs47l85_dsp_regions[i]; + cs47l85->core.adsp[i].num_mems = ARRAY_SIZE(cs47l85_dsp1_regions); ret = wm_adsp2_init(&cs47l85->core.adsp[i]); diff --git a/sound/soc/codecs/cs47l90.c b/sound/soc/codecs/cs47l90.c index 5aec937a24..d2911c014b 100644 --- a/sound/soc/codecs/cs47l90.c +++ b/sound/soc/codecs/cs47l90.c @@ -37,56 +37,56 @@ struct cs47l90 { struct madera_fll fll[3]; }; -static const struct cs_dsp_region cs47l90_dsp1_regions[] = { +static const struct wm_adsp_region cs47l90_dsp1_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x080000 }, { .type = WMFW_ADSP2_ZM, .base = 0x0e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x0a0000 }, { .type = WMFW_ADSP2_YM, .base = 0x0c0000 }, }; -static const struct cs_dsp_region cs47l90_dsp2_regions[] = { +static const struct wm_adsp_region cs47l90_dsp2_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x100000 }, { .type = WMFW_ADSP2_ZM, .base = 0x160000 }, { .type = WMFW_ADSP2_XM, .base = 0x120000 }, { .type = WMFW_ADSP2_YM, .base = 0x140000 }, }; -static const struct cs_dsp_region cs47l90_dsp3_regions[] = { +static const struct wm_adsp_region cs47l90_dsp3_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x180000 }, { .type = WMFW_ADSP2_ZM, .base = 0x1e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x1a0000 }, { .type = WMFW_ADSP2_YM, .base = 0x1c0000 }, }; -static const struct cs_dsp_region cs47l90_dsp4_regions[] = { +static const struct wm_adsp_region cs47l90_dsp4_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x200000 }, { .type = WMFW_ADSP2_ZM, .base = 0x260000 }, { .type = WMFW_ADSP2_XM, .base = 0x220000 }, { .type = WMFW_ADSP2_YM, .base = 0x240000 }, }; -static const struct cs_dsp_region cs47l90_dsp5_regions[] = { +static const struct wm_adsp_region cs47l90_dsp5_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x280000 }, { .type = WMFW_ADSP2_ZM, .base = 0x2e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x2a0000 }, { .type = WMFW_ADSP2_YM, .base = 0x2c0000 }, }; -static const struct cs_dsp_region cs47l90_dsp6_regions[] = { +static const struct wm_adsp_region cs47l90_dsp6_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x300000 }, { .type = WMFW_ADSP2_ZM, .base = 0x360000 }, { .type = WMFW_ADSP2_XM, .base = 0x320000 }, { .type = WMFW_ADSP2_YM, .base = 0x340000 }, }; -static const struct cs_dsp_region cs47l90_dsp7_regions[] = { +static const struct wm_adsp_region cs47l90_dsp7_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x380000 }, { .type = WMFW_ADSP2_ZM, .base = 0x3e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x3a0000 }, { .type = WMFW_ADSP2_YM, .base = 0x3c0000 }, }; -static const struct cs_dsp_region *cs47l90_dsp_regions[] = { +static const struct wm_adsp_region *cs47l90_dsp_regions[] = { cs47l90_dsp1_regions, cs47l90_dsp2_regions, cs47l90_dsp3_regions, @@ -2543,18 +2543,18 @@ static int cs47l90_probe(struct platform_device *pdev) for (i = 0; i < CS47L90_NUM_ADSP; i++) { cs47l90->core.adsp[i].part = "cs47l90"; - cs47l90->core.adsp[i].cs_dsp.num = i + 1; - cs47l90->core.adsp[i].cs_dsp.type = WMFW_ADSP2; - cs47l90->core.adsp[i].cs_dsp.rev = 2; - cs47l90->core.adsp[i].cs_dsp.dev = madera->dev; - cs47l90->core.adsp[i].cs_dsp.regmap = madera->regmap_32bit; + cs47l90->core.adsp[i].num = i + 1; + cs47l90->core.adsp[i].type = WMFW_ADSP2; + cs47l90->core.adsp[i].rev = 2; + cs47l90->core.adsp[i].dev = madera->dev; + cs47l90->core.adsp[i].regmap = madera->regmap_32bit; - cs47l90->core.adsp[i].cs_dsp.base = cs47l90_dsp_control_bases[i]; - cs47l90->core.adsp[i].cs_dsp.mem = cs47l90_dsp_regions[i]; - cs47l90->core.adsp[i].cs_dsp.num_mems = + cs47l90->core.adsp[i].base = cs47l90_dsp_control_bases[i]; + cs47l90->core.adsp[i].mem = cs47l90_dsp_regions[i]; + cs47l90->core.adsp[i].num_mems = ARRAY_SIZE(cs47l90_dsp1_regions); - cs47l90->core.adsp[i].cs_dsp.lock_regions = CS_ADSP2_REGION_1_9; + cs47l90->core.adsp[i].lock_regions = WM_ADSP2_REGION_1_9; ret = wm_adsp2_init(&cs47l90->core.adsp[i]); diff --git a/sound/soc/codecs/cs47l92.c b/sound/soc/codecs/cs47l92.c index a1b8dcdb9f..1a0280416d 100644 --- a/sound/soc/codecs/cs47l92.c +++ b/sound/soc/codecs/cs47l92.c @@ -37,7 +37,7 @@ struct cs47l92 { struct madera_fll fll[2]; }; -static const struct cs_dsp_region cs47l92_dsp1_regions[] = { +static const struct wm_adsp_region cs47l92_dsp1_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x080000 }, { .type = WMFW_ADSP2_ZM, .base = 0x0e0000 }, { .type = WMFW_ADSP2_XM, .base = 0x0a0000 }, @@ -2002,17 +2002,17 @@ static int cs47l92_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "Failed to set DSP IRQ wake: %d\n", ret); cs47l92->core.adsp[0].part = "cs47l92"; - cs47l92->core.adsp[0].cs_dsp.num = 1; - cs47l92->core.adsp[0].cs_dsp.type = WMFW_ADSP2; - cs47l92->core.adsp[0].cs_dsp.rev = 2; - cs47l92->core.adsp[0].cs_dsp.dev = madera->dev; - cs47l92->core.adsp[0].cs_dsp.regmap = madera->regmap_32bit; + cs47l92->core.adsp[0].num = 1; + cs47l92->core.adsp[0].type = WMFW_ADSP2; + cs47l92->core.adsp[0].rev = 2; + cs47l92->core.adsp[0].dev = madera->dev; + cs47l92->core.adsp[0].regmap = madera->regmap_32bit; - cs47l92->core.adsp[0].cs_dsp.base = MADERA_DSP1_CONFIG_1; - cs47l92->core.adsp[0].cs_dsp.mem = cs47l92_dsp1_regions; - cs47l92->core.adsp[0].cs_dsp.num_mems = ARRAY_SIZE(cs47l92_dsp1_regions); + cs47l92->core.adsp[0].base = MADERA_DSP1_CONFIG_1; + cs47l92->core.adsp[0].mem = cs47l92_dsp1_regions; + cs47l92->core.adsp[0].num_mems = ARRAY_SIZE(cs47l92_dsp1_regions); - cs47l92->core.adsp[0].cs_dsp.lock_regions = CS_ADSP2_REGION_1_9; + cs47l92->core.adsp[0].lock_regions = WM_ADSP2_REGION_1_9; ret = wm_adsp2_init(&cs47l92->core.adsp[0]); if (ret != 0) diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c index 1af0bf5f1e..13258f3ca9 100644 --- a/sound/soc/codecs/cx20442.c +++ b/sound/soc/codecs/cx20442.c @@ -252,9 +252,10 @@ static void v253_close(struct tty_struct *tty) } /* Line discipline .hangup() */ -static void v253_hangup(struct tty_struct *tty) +static int v253_hangup(struct tty_struct *tty) { v253_close(tty); + return 0; } /* Line discipline .receive_buf() */ diff --git a/sound/soc/codecs/es7241.c b/sound/soc/codecs/es7241.c index 9f20bfb855..2344a0b035 100644 --- a/sound/soc/codecs/es7241.c +++ b/sound/soc/codecs/es7241.c @@ -255,6 +255,7 @@ static int es7241_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct es7241_data *priv; + int err; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -270,19 +271,28 @@ static int es7241_probe(struct platform_device *pdev) es7241_parse_fmt(dev, priv); priv->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); - if (IS_ERR(priv->reset)) - return dev_err_probe(dev, PTR_ERR(priv->reset), - "Failed to get 'reset' gpio"); + if (IS_ERR(priv->reset)) { + err = PTR_ERR(priv->reset); + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get 'reset' gpio: %d", err); + return err; + } priv->m0 = devm_gpiod_get_optional(dev, "m0", GPIOD_OUT_LOW); - if (IS_ERR(priv->m0)) - return dev_err_probe(dev, PTR_ERR(priv->m0), - "Failed to get 'm0' gpio"); + if (IS_ERR(priv->m0)) { + err = PTR_ERR(priv->m0); + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get 'm0' gpio: %d", err); + return err; + } priv->m1 = devm_gpiod_get_optional(dev, "m1", GPIOD_OUT_LOW); - if (IS_ERR(priv->m1)) - return dev_err_probe(dev, PTR_ERR(priv->m1), - "Failed to get 'm1' gpio"); + if (IS_ERR(priv->m1)) { + err = PTR_ERR(priv->m1); + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get 'm1' gpio: %d", err); + return err; + } return devm_snd_soc_register_component(&pdev->dev, &es7241_component_driver, diff --git a/sound/soc/codecs/es8316.c b/sound/soc/codecs/es8316.c index 8f30a3ea8b..5fb02635c1 100644 --- a/sound/soc/codecs/es8316.c +++ b/sound/soc/codecs/es8316.c @@ -840,7 +840,6 @@ MODULE_DEVICE_TABLE(of, es8316_of_match); #ifdef CONFIG_ACPI static const struct acpi_device_id es8316_acpi_match[] = { {"ESSX8316", 0}, - {"ESSX8336", 0}, {}, }; MODULE_DEVICE_TABLE(acpi, es8316_acpi_match); diff --git a/sound/soc/codecs/hdac_hda.c b/sound/soc/codecs/hdac_hda.c index de5955db0a..390dd6c7f6 100644 --- a/sound/soc/codecs/hdac_hda.c +++ b/sound/soc/codecs/hdac_hda.c @@ -46,8 +46,9 @@ static int hdac_hda_dai_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *dai); static int hdac_hda_dai_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai); -static int hdac_hda_dai_set_stream(struct snd_soc_dai *dai, void *stream, - int direction); +static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai, + unsigned int tx_mask, unsigned int rx_mask, + int slots, int slot_width); static struct hda_pcm *snd_soc_find_pcm_from_dai(struct hdac_hda_priv *hda_pvt, struct snd_soc_dai *dai); @@ -57,7 +58,7 @@ static const struct snd_soc_dai_ops hdac_hda_dai_ops = { .prepare = hdac_hda_dai_prepare, .hw_params = hdac_hda_dai_hw_params, .hw_free = hdac_hda_dai_hw_free, - .set_stream = hdac_hda_dai_set_stream, + .set_tdm_slot = hdac_hda_dai_set_tdm_slot, }; static struct snd_soc_dai_driver hdac_hda_dais[] = { @@ -179,22 +180,21 @@ static struct snd_soc_dai_driver hdac_hda_dais[] = { }; -static int hdac_hda_dai_set_stream(struct snd_soc_dai *dai, - void *stream, int direction) +static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai, + unsigned int tx_mask, unsigned int rx_mask, + int slots, int slot_width) { struct snd_soc_component *component = dai->component; struct hdac_hda_priv *hda_pvt; struct hdac_hda_pcm *pcm; - struct hdac_stream *hstream; - - if (!stream) - return -EINVAL; hda_pvt = snd_soc_component_get_drvdata(component); pcm = &hda_pvt->pcm[dai->id]; - hstream = (struct hdac_stream *)stream; - pcm->stream_tag[direction] = hstream->stream_tag; + if (tx_mask) + pcm->stream_tag[SNDRV_PCM_STREAM_PLAYBACK] = tx_mask; + else + pcm->stream_tag[SNDRV_PCM_STREAM_CAPTURE] = rx_mask; return 0; } diff --git a/sound/soc/codecs/jz4770.c b/sound/soc/codecs/jz4770.c index 1d0c467ab5..6b60120f59 100644 --- a/sound/soc/codecs/jz4770.c +++ b/sound/soc/codecs/jz4770.c @@ -307,7 +307,6 @@ static const DECLARE_TLV_DB_MINMAX_MUTE(dac_tlv, -3100, 0); static const DECLARE_TLV_DB_SCALE(adc_tlv, 0, 100, 0); static const DECLARE_TLV_DB_MINMAX(out_tlv, -2500, 600); static const DECLARE_TLV_DB_SCALE(linein_tlv, -2500, 100, 0); -static const DECLARE_TLV_DB_MINMAX(mixer_tlv, -3100, 0); /* Unconditional controls. */ static const struct snd_kcontrol_new jz4770_codec_snd_controls[] = { @@ -320,14 +319,6 @@ static const struct snd_kcontrol_new jz4770_codec_snd_controls[] = { SOC_DOUBLE_R_TLV("Line In Bypass Playback Volume", JZ4770_CODEC_REG_GCR_LIBYL, JZ4770_CODEC_REG_GCR_LIBYR, REG_GCR_GAIN_OFFSET, REG_GCR_GAIN_MAX, 1, linein_tlv), - - SOC_SINGLE_TLV("Mixer Capture Volume", - JZ4770_CODEC_REG_GCR_MIXADC, - REG_GCR_GAIN_OFFSET, REG_GCR_GAIN_MAX, 1, mixer_tlv), - - SOC_SINGLE_TLV("Mixer Playback Volume", - JZ4770_CODEC_REG_GCR_MIXDAC, - REG_GCR_GAIN_OFFSET, REG_GCR_GAIN_MAX, 1, mixer_tlv), }; static const struct snd_kcontrol_new jz4770_codec_pcm_playback_controls[] = { diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index 6ffe88345d..1c0409350e 100644 --- a/sound/soc/codecs/lpass-rx-macro.c +++ b/sound/soc/codecs/lpass-rx-macro.c @@ -3531,7 +3531,7 @@ static int rx_macro_probe(struct platform_device *pdev) rx->clks[3].id = "npl"; rx->clks[4].id = "fsgen"; - ret = devm_clk_bulk_get_optional(dev, RX_NUM_CLKS_MAX, rx->clks); + ret = devm_clk_bulk_get(dev, RX_NUM_CLKS_MAX, rx->clks); if (ret) { dev_err(dev, "Error getting RX Clocks (%d)\n", ret); return ret; @@ -3577,7 +3577,6 @@ static int rx_macro_remove(struct platform_device *pdev) } static const struct of_device_id rx_macro_dt_match[] = { - { .compatible = "qcom,sc7280-lpass-rx-macro" }, { .compatible = "qcom,sm8250-lpass-rx-macro" }, { } }; diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index a4c0a155af..27a0d5defd 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -272,7 +272,7 @@ struct tx_macro { static const DECLARE_TLV_DB_SCALE(digital_gain, -8400, 100, -8400); -static struct reg_default tx_defaults[] = { +static const struct reg_default tx_defaults[] = { /* TX Macro */ { CDC_TX_CLK_RST_CTRL_MCLK_CONTROL, 0x00 }, { CDC_TX_CLK_RST_CTRL_FS_CNT_CONTROL, 0x00 }, @@ -1674,9 +1674,6 @@ static int tx_macro_component_probe(struct snd_soc_component *comp) snd_soc_component_update_bits(comp, CDC_TX0_TX_PATH_SEC7, 0x3F, 0x0A); - /* Enable swr mic0 and mic1 clock */ - snd_soc_component_update_bits(comp, CDC_TX_TOP_CSR_SWR_AMIC0_CTL, 0xFF, 0x00); - snd_soc_component_update_bits(comp, CDC_TX_TOP_CSR_SWR_AMIC1_CTL, 0xFF, 0x00); return 0; } @@ -1781,10 +1778,9 @@ static const struct snd_soc_component_driver tx_macro_component_drv = { static int tx_macro_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; struct tx_macro *tx; void __iomem *base; - int ret, reg; + int ret; tx = devm_kzalloc(dev, sizeof(*tx), GFP_KERNEL); if (!tx) @@ -1796,7 +1792,7 @@ static int tx_macro_probe(struct platform_device *pdev) tx->clks[3].id = "npl"; tx->clks[4].id = "fsgen"; - ret = devm_clk_bulk_get_optional(dev, TX_NUM_CLKS_MAX, tx->clks); + ret = devm_clk_bulk_get(dev, TX_NUM_CLKS_MAX, tx->clks); if (ret) { dev_err(dev, "Error getting RX Clocks (%d)\n", ret); return ret; @@ -1806,20 +1802,6 @@ static int tx_macro_probe(struct platform_device *pdev) if (IS_ERR(base)) return PTR_ERR(base); - /* Update defaults for lpass sc7280 */ - if (of_device_is_compatible(np, "qcom,sc7280-lpass-tx-macro")) { - for (reg = 0; reg < ARRAY_SIZE(tx_defaults); reg++) { - switch (tx_defaults[reg].reg) { - case CDC_TX_TOP_CSR_SWR_AMIC0_CTL: - case CDC_TX_TOP_CSR_SWR_AMIC1_CTL: - tx_defaults[reg].def = 0x0E; - break; - default: - break; - } - } - } - tx->regmap = devm_regmap_init_mmio(dev, base, &tx_regmap_config); dev_set_drvdata(dev, tx); @@ -1861,7 +1843,6 @@ static int tx_macro_remove(struct platform_device *pdev) } static const struct of_device_id tx_macro_dt_match[] = { - { .compatible = "qcom,sc7280-lpass-tx-macro" }, { .compatible = "qcom,sm8250-lpass-tx-macro" }, { } }; diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index 11147e3568..56c93f4465 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -1408,7 +1408,7 @@ static int va_macro_probe(struct platform_device *pdev) va->clks[1].id = "dcodec"; va->clks[2].id = "mclk"; - ret = devm_clk_bulk_get_optional(dev, VA_NUM_CLKS_MAX, va->clks); + ret = devm_clk_bulk_get(dev, VA_NUM_CLKS_MAX, va->clks); if (ret) { dev_err(dev, "Error getting VA Clocks (%d)\n", ret); return ret; @@ -1472,7 +1472,6 @@ static int va_macro_remove(struct platform_device *pdev) } static const struct of_device_id va_macro_dt_match[] = { - { .compatible = "qcom,sc7280-lpass-va-macro" }, { .compatible = "qcom,sm8250-lpass-va-macro" }, {} }; diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index 75baf8eb70..d3ac318fd6 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -2445,7 +2445,6 @@ static int wsa_macro_remove(struct platform_device *pdev) } static const struct of_device_id wsa_macro_dt_match[] = { - {.compatible = "qcom,sc7280-lpass-wsa-macro"}, {.compatible = "qcom,sm8250-lpass-wsa-macro"}, {} }; diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c index 272041c623..f4ed7e0467 100644 --- a/sound/soc/codecs/madera.c +++ b/sound/soc/codecs/madera.c @@ -905,7 +905,7 @@ static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol, */ mutex_lock(&priv->rate_lock); - if (!madera_can_change_grp_rate(priv, priv->adsp[adsp_num].cs_dsp.base)) { + if (!madera_can_change_grp_rate(priv, priv->adsp[adsp_num].base)) { dev_warn(priv->madera->dev, "Cannot change '%s' while in use by active audio paths\n", kcontrol->id.name); @@ -964,7 +964,7 @@ static int madera_write_adsp_clk_setting(struct madera_priv *priv, unsigned int mask = MADERA_DSP_RATE_MASK; int ret; - val = priv->adsp_rate_cache[dsp->cs_dsp.num - 1] << MADERA_DSP_RATE_SHIFT; + val = priv->adsp_rate_cache[dsp->num - 1] << MADERA_DSP_RATE_SHIFT; switch (priv->madera->type) { case CS47L35: @@ -978,15 +978,15 @@ static int madera_write_adsp_clk_setting(struct madera_priv *priv, /* Configure exact dsp frequency */ dev_dbg(priv->madera->dev, "Set DSP frequency to 0x%x\n", freq); - ret = regmap_write(dsp->cs_dsp.regmap, - dsp->cs_dsp.base + MADERA_DSP_CONFIG_2_OFFS, freq); + ret = regmap_write(dsp->regmap, + dsp->base + MADERA_DSP_CONFIG_2_OFFS, freq); if (ret) goto err; break; } - ret = regmap_update_bits(dsp->cs_dsp.regmap, - dsp->cs_dsp.base + MADERA_DSP_CONFIG_1_OFFS, + ret = regmap_update_bits(dsp->regmap, + dsp->base + MADERA_DSP_CONFIG_1_OFFS, mask, val); if (ret) goto err; @@ -996,7 +996,7 @@ static int madera_write_adsp_clk_setting(struct madera_priv *priv, return 0; err: - dev_err(dsp->cs_dsp.dev, "Failed to set DSP%d clock: %d\n", dsp->cs_dsp.num, ret); + dev_err(dsp->dev, "Failed to set DSP%d clock: %d\n", dsp->num, ret); return ret; } @@ -1018,7 +1018,7 @@ int madera_set_adsp_clk(struct madera_priv *priv, int dsp_num, * changes are locked out by the domain_group_ref reference count. */ - ret = regmap_read(dsp->cs_dsp.regmap, dsp->cs_dsp.base, &cur); + ret = regmap_read(dsp->regmap, dsp->base, &cur); if (ret) { dev_err(madera->dev, "Failed to read current DSP rate: %d\n", ret); @@ -1027,7 +1027,7 @@ int madera_set_adsp_clk(struct madera_priv *priv, int dsp_num, cur &= MADERA_DSP_RATE_MASK; - new = priv->adsp_rate_cache[dsp->cs_dsp.num - 1] << MADERA_DSP_RATE_SHIFT; + new = priv->adsp_rate_cache[dsp->num - 1] << MADERA_DSP_RATE_SHIFT; if (new == cur) { dev_dbg(madera->dev, "DSP rate not changed\n"); diff --git a/sound/soc/codecs/max9759.c b/sound/soc/codecs/max9759.c index bc57d7687f..0c261335c8 100644 --- a/sound/soc/codecs/max9759.c +++ b/sound/soc/codecs/max9759.c @@ -141,6 +141,7 @@ static int max9759_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct max9759 *priv; + int err; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -149,20 +150,29 @@ static int max9759_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); priv->gpiod_shutdown = devm_gpiod_get(dev, "shutdown", GPIOD_OUT_HIGH); - if (IS_ERR(priv->gpiod_shutdown)) - return dev_err_probe(dev, PTR_ERR(priv->gpiod_shutdown), - "Failed to get 'shutdown' gpio"); + if (IS_ERR(priv->gpiod_shutdown)) { + err = PTR_ERR(priv->gpiod_shutdown); + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get 'shutdown' gpio: %d", err); + return err; + } priv->gpiod_mute = devm_gpiod_get(dev, "mute", GPIOD_OUT_HIGH); - if (IS_ERR(priv->gpiod_mute)) - return dev_err_probe(dev, PTR_ERR(priv->gpiod_mute), - "Failed to get 'mute' gpio"); + if (IS_ERR(priv->gpiod_mute)) { + err = PTR_ERR(priv->gpiod_mute); + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get 'mute' gpio: %d", err); + return err; + } priv->is_mute = true; priv->gpiod_gain = devm_gpiod_get_array(dev, "gain", GPIOD_OUT_HIGH); - if (IS_ERR(priv->gpiod_gain)) - return dev_err_probe(dev, PTR_ERR(priv->gpiod_gain), - "Failed to get 'gain' gpios"); + if (IS_ERR(priv->gpiod_gain)) { + err = PTR_ERR(priv->gpiod_gain); + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get 'gain' gpios: %d", err); + return err; + } priv->gain = 0; if (priv->gpiod_gain->ndescs != 2) { diff --git a/sound/soc/codecs/max98373-sdw.c b/sound/soc/codecs/max98373-sdw.c index f47e956d4f..dc520effc6 100644 --- a/sound/soc/codecs/max98373-sdw.c +++ b/sound/soc/codecs/max98373-sdw.c @@ -741,7 +741,7 @@ static int max98373_sdw_set_tdm_slot(struct snd_soc_dai *dai, static const struct snd_soc_dai_ops max98373_dai_sdw_ops = { .hw_params = max98373_sdw_dai_hw_params, .hw_free = max98373_pcm_hw_free, - .set_stream = max98373_set_sdw_stream, + .set_sdw_stream = max98373_set_sdw_stream, .shutdown = max98373_shutdown, .set_tdm_slot = max98373_sdw_set_tdm_slot, }; diff --git a/sound/soc/codecs/max98390.c b/sound/soc/codecs/max98390.c index d1882cbc93..b392567c2b 100644 --- a/sound/soc/codecs/max98390.c +++ b/sound/soc/codecs/max98390.c @@ -1021,7 +1021,7 @@ static int max98390_i2c_probe(struct i2c_client *i2c, int reg = 0; struct max98390_priv *max98390 = NULL; - struct i2c_adapter *adapter = i2c->adapter; + struct i2c_adapter *adapter = to_i2c_adapter(i2c->dev.parent); ret = i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE diff --git a/sound/soc/codecs/max9860.c b/sound/soc/codecs/max9860.c index 7c9686be59..dd29b183ec 100644 --- a/sound/soc/codecs/max9860.c +++ b/sound/soc/codecs/max9860.c @@ -606,9 +606,12 @@ static int max9860_probe(struct i2c_client *i2c) return -ENOMEM; max9860->dvddio = devm_regulator_get(dev, "DVDDIO"); - if (IS_ERR(max9860->dvddio)) - return dev_err_probe(dev, PTR_ERR(max9860->dvddio), - "Failed to get DVDDIO supply\n"); + if (IS_ERR(max9860->dvddio)) { + ret = PTR_ERR(max9860->dvddio); + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get DVDDIO supply: %d\n", ret); + return ret; + } max9860->dvddio_nb.notifier_call = max9860_dvddio_event; @@ -640,7 +643,8 @@ static int max9860_probe(struct i2c_client *i2c) if (IS_ERR(mclk)) { ret = PTR_ERR(mclk); - dev_err_probe(dev, ret, "Failed to get MCLK\n"); + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get MCLK: %d\n", ret); goto err_regulator; } diff --git a/sound/soc/codecs/max98927.c b/sound/soc/codecs/max98927.c index 5ba5f876ea..8b206ee777 100644 --- a/sound/soc/codecs/max98927.c +++ b/sound/soc/codecs/max98927.c @@ -897,19 +897,6 @@ static int max98927_i2c_probe(struct i2c_client *i2c, "Failed to allocate regmap: %d\n", ret); return ret; } - - max98927->reset_gpio - = devm_gpiod_get_optional(&i2c->dev, "reset", GPIOD_OUT_HIGH); - if (IS_ERR(max98927->reset_gpio)) { - ret = PTR_ERR(max98927->reset_gpio); - return dev_err_probe(&i2c->dev, ret, "failed to request GPIO reset pin"); - } - - if (max98927->reset_gpio) { - gpiod_set_value_cansleep(max98927->reset_gpio, 0); - /* Wait for i2c port to be ready */ - usleep_range(5000, 6000); - } /* Check Revision ID */ ret = regmap_read(max98927->regmap, @@ -934,17 +921,6 @@ static int max98927_i2c_probe(struct i2c_client *i2c, return ret; } -static int max98927_i2c_remove(struct i2c_client *i2c) -{ - struct max98927_priv *max98927 = i2c_get_clientdata(i2c); - - if (max98927->reset_gpio) { - gpiod_set_value_cansleep(max98927->reset_gpio, 1); - } - - return 0; -} - static const struct i2c_device_id max98927_i2c_id[] = { { "max98927", 0}, { }, @@ -976,7 +952,6 @@ static struct i2c_driver max98927_i2c_driver = { .pm = &max98927_pm, }, .probe = max98927_i2c_probe, - .remove = max98927_i2c_remove, .id_table = max98927_i2c_id, }; diff --git a/sound/soc/codecs/max98927.h b/sound/soc/codecs/max98927.h index 13f5066d74..05f495db91 100644 --- a/sound/soc/codecs/max98927.h +++ b/sound/soc/codecs/max98927.h @@ -255,7 +255,6 @@ struct max98927_priv { struct regmap *regmap; struct snd_soc_component *component; struct max98927_pdata *pdata; - struct gpio_desc *reset_gpio; unsigned int spk_gain; unsigned int sysclk; unsigned int v_l_slot; diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 485cda46db..3ddd822240 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -822,8 +822,8 @@ static const struct snd_soc_dapm_route pm8916_wcd_analog_audio_map[] = { {"EAR PA", NULL, "EAR CP"}, /* Headset (RX MIX1 and RX MIX2) */ - {"HPH_L", NULL, "HPHL PA"}, - {"HPH_R", NULL, "HPHR PA"}, + {"HEADPHONE", NULL, "HPHL PA"}, + {"HEADPHONE", NULL, "HPHR PA"}, {"HPHL DAC", NULL, "EAR_HPHL_CLK"}, {"HPHR DAC", NULL, "EAR_HPHR_CLK"}, @@ -870,8 +870,7 @@ static const struct snd_soc_dapm_widget pm8916_wcd_analog_dapm_widgets[] = { SND_SOC_DAPM_INPUT("AMIC3"), SND_SOC_DAPM_INPUT("AMIC2"), SND_SOC_DAPM_OUTPUT("EAR"), - SND_SOC_DAPM_OUTPUT("HPH_L"), - SND_SOC_DAPM_OUTPUT("HPH_R"), + SND_SOC_DAPM_OUTPUT("HEADPHONE"), /* RX stuff */ SND_SOC_DAPM_SUPPLY("INT_LDO_H", SND_SOC_NOPM, 1, 0, NULL, 0), diff --git a/sound/soc/codecs/mt6359.c b/sound/soc/codecs/mt6359.c index f8532aa7e4..2d6a4a29b8 100644 --- a/sound/soc/codecs/mt6359.c +++ b/sound/soc/codecs/mt6359.c @@ -2697,7 +2697,7 @@ static int mt6359_codec_probe(struct snd_soc_component *cmpnt) static void mt6359_codec_remove(struct snd_soc_component *cmpnt) { - cmpnt->regmap = NULL; + snd_soc_component_exit_regmap(cmpnt); } static const DECLARE_TLV_DB_SCALE(hp_playback_tlv, -2200, 100, 0); diff --git a/sound/soc/codecs/mt6660.c b/sound/soc/codecs/mt6660.c index 3a881523c3..358c500377 100644 --- a/sound/soc/codecs/mt6660.c +++ b/sound/soc/codecs/mt6660.c @@ -47,12 +47,13 @@ static int mt6660_reg_write(void *context, unsigned int reg, unsigned int val) struct mt6660_chip *chip = context; int size = mt6660_get_reg_size(reg); u8 reg_data[4]; - int i; + int i, ret; for (i = 0; i < size; i++) reg_data[size - i - 1] = (val >> (8 * i)) & 0xff; - return i2c_smbus_write_i2c_block_data(chip->i2c, reg, size, reg_data); + ret = i2c_smbus_write_i2c_block_data(chip->i2c, reg, size, reg_data); + return ret; } static int mt6660_reg_read(void *context, unsigned int reg, unsigned int *val) diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index d0dd1542f7..f7018f2dd2 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -29,7 +29,6 @@ #include "nau8824.h" #define NAU8824_JD_ACTIVE_HIGH BIT(0) -#define NAU8824_MONO_SPEAKER BIT(1) static int nau8824_quirk; static int quirk_override = -1; @@ -1862,25 +1861,7 @@ static const struct dmi_system_id nau8824_quirk_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "20170531"), }, - .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH | - NAU8824_MONO_SPEAKER), - }, - { - /* CUBE iwork8 Air */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "cube"), - DMI_MATCH(DMI_PRODUCT_NAME, "i1-TF"), - DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), - }, - .driver_data = (void *)(NAU8824_MONO_SPEAKER), - }, - { - /* Pipo W2S */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "PIPO"), - DMI_MATCH(DMI_PRODUCT_NAME, "W2S"), - }, - .driver_data = (void *)(NAU8824_MONO_SPEAKER), + .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH), }, {} }; @@ -1899,17 +1880,6 @@ static void nau8824_check_quirks(void) nau8824_quirk = (unsigned long)dmi_id->driver_data; } -const char *nau8824_components(void) -{ - nau8824_check_quirks(); - - if (nau8824_quirk & NAU8824_MONO_SPEAKER) - return "cfg-spk:1"; - else - return "cfg-spk:2"; -} -EXPORT_SYMBOL_GPL(nau8824_components); - static int nau8824_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { diff --git a/sound/soc/codecs/nau8824.h b/sound/soc/codecs/nau8824.h index de4bae8281..1d7bdd8e05 100644 --- a/sound/soc/codecs/nau8824.h +++ b/sound/soc/codecs/nau8824.h @@ -197,7 +197,7 @@ /* JACK_DET_CTRL (0x0D) */ #define NAU8824_JACK_EJECT_DT_SFT 2 #define NAU8824_JACK_EJECT_DT_MASK (0x3 << NAU8824_JACK_EJECT_DT_SFT) -#define NAU8824_JACK_LOGIC (0x1 << 1) +#define NAU8824_JACK_LOGIC 0x1 /* INTERRUPT_SETTING_1 (0x0F) */ @@ -470,7 +470,6 @@ struct nau8824_osr_attr { int nau8824_enable_jack_detect(struct snd_soc_component *component, struct snd_soc_jack *jack); -const char *nau8824_components(void); #endif /* _NAU8824_H */ diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 7734bc35ab..67de0e49cc 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -47,7 +47,6 @@ static int nau8825_configure_sysclk(struct nau8825 *nau8825, int clk_id, unsigned int freq); -static bool nau8825_is_jack_inserted(struct regmap *regmap); struct nau8825_fll { int mclk_src; @@ -982,31 +981,6 @@ static int nau8825_output_dac_event(struct snd_soc_dapm_widget *w, return 0; } -static int system_clock_control(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *k, int event) -{ - struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - struct nau8825 *nau8825 = snd_soc_component_get_drvdata(component); - struct regmap *regmap = nau8825->regmap; - - if (SND_SOC_DAPM_EVENT_OFF(event)) { - dev_dbg(nau8825->dev, "system clock control : POWER OFF\n"); - /* Set clock source to disable or internal clock before the - * playback or capture end. Codec needs clock for Jack - * detection and button press if jack inserted; otherwise, - * the clock should be closed. - */ - if (nau8825_is_jack_inserted(regmap)) { - nau8825_configure_sysclk(nau8825, - NAU8825_CLK_INTERNAL, 0); - } else { - nau8825_configure_sysclk(nau8825, NAU8825_CLK_DIS, 0); - } - } - - return 0; -} - static int nau8825_biq_coeff_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -1120,9 +1094,6 @@ static const struct snd_kcontrol_new nau8825_dacr_mux = static const struct snd_soc_dapm_widget nau8825_dapm_widgets[] = { SND_SOC_DAPM_AIF_OUT("AIFTX", "Capture", 0, NAU8825_REG_I2S_PCM_CTRL2, 15, 1), - SND_SOC_DAPM_AIF_IN("AIFRX", "Playback", 0, SND_SOC_NOPM, 0, 0), - SND_SOC_DAPM_SUPPLY("System Clock", SND_SOC_NOPM, 0, 0, - system_clock_control, SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_INPUT("MIC"), SND_SOC_DAPM_MICBIAS("MICBIAS", NAU8825_REG_MIC_BIAS, 8, 0), @@ -1211,11 +1182,9 @@ static const struct snd_soc_dapm_route nau8825_dapm_routes[] = { {"ADC", NULL, "ADC Clock"}, {"ADC", NULL, "ADC Power"}, {"AIFTX", NULL, "ADC"}, - {"AIFTX", NULL, "System Clock"}, - {"AIFRX", NULL, "System Clock"}, - {"DDACL", NULL, "AIFRX"}, - {"DDACR", NULL, "AIFRX"}, + {"DDACL", NULL, "Playback"}, + {"DDACR", NULL, "Playback"}, {"DDACL", NULL, "DDAC Clock"}, {"DDACR", NULL, "DDAC Clock"}, {"DACL Mux", "DACL", "DDACL"}, @@ -1465,12 +1434,6 @@ int nau8825_enable_jack_detect(struct snd_soc_component *component, nau8825->jack = jack; - if (!nau8825->jack) { - regmap_update_bits(regmap, NAU8825_REG_HSD_CTRL, - NAU8825_HSD_AUTO_MODE | NAU8825_SPKR_DWN1R | - NAU8825_SPKR_DWN1L, 0); - return 0; - } /* Ground HP Outputs[1:0], needed for headset auto detection * Enable Automatic Mic/Gnd switching reading on insert interrupt[6] */ @@ -2453,12 +2416,6 @@ static int __maybe_unused nau8825_resume(struct snd_soc_component *component) return 0; } -static int nau8825_set_jack(struct snd_soc_component *component, - struct snd_soc_jack *jack, void *data) -{ - return nau8825_enable_jack_detect(component, jack); -} - static const struct snd_soc_component_driver nau8825_component_driver = { .probe = nau8825_component_probe, .remove = nau8825_component_remove, @@ -2473,7 +2430,6 @@ static const struct snd_soc_component_driver nau8825_component_driver = { .num_dapm_widgets = ARRAY_SIZE(nau8825_dapm_widgets), .dapm_routes = nau8825_dapm_routes, .num_dapm_routes = ARRAY_SIZE(nau8825_dapm_routes), - .set_jack = nau8825_set_jack, .suspend_bias_off = 1, .idle_bias_on = 1, .use_pmdown_time = 1, diff --git a/sound/soc/codecs/pcm3168a.c b/sound/soc/codecs/pcm3168a.c index fdf92c8b28..b6fd412441 100644 --- a/sound/soc/codecs/pcm3168a.c +++ b/sound/soc/codecs/pcm3168a.c @@ -751,14 +751,21 @@ int pcm3168a_probe(struct device *dev, struct regmap *regmap) pcm3168a->gpio_rst = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_NONEXCLUSIVE); - if (IS_ERR(pcm3168a->gpio_rst)) - return dev_err_probe(dev, PTR_ERR(pcm3168a->gpio_rst), - "failed to acquire RST gpio\n"); + if (IS_ERR(pcm3168a->gpio_rst)) { + ret = PTR_ERR(pcm3168a->gpio_rst); + if (ret != -EPROBE_DEFER ) + dev_err(dev, "failed to acquire RST gpio: %d\n", ret); + + return ret; + } pcm3168a->scki = devm_clk_get(dev, "scki"); - if (IS_ERR(pcm3168a->scki)) - return dev_err_probe(dev, PTR_ERR(pcm3168a->scki), - "failed to acquire clock 'scki'\n"); + if (IS_ERR(pcm3168a->scki)) { + ret = PTR_ERR(pcm3168a->scki); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to acquire clock 'scki': %d\n", ret); + return ret; + } ret = clk_prepare_enable(pcm3168a->scki); if (ret) { @@ -774,7 +781,8 @@ int pcm3168a_probe(struct device *dev, struct regmap *regmap) ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(pcm3168a->supplies), pcm3168a->supplies); if (ret) { - dev_err_probe(dev, ret, "failed to request supplies\n"); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to request supplies: %d\n", ret); goto err_clk; } diff --git a/sound/soc/codecs/rt1011.c b/sound/soc/codecs/rt1011.c index b62301a628..faff2b5586 100644 --- a/sound/soc/codecs/rt1011.c +++ b/sound/soc/codecs/rt1011.c @@ -1311,55 +1311,6 @@ static int rt1011_r0_load_info(struct snd_kcontrol *kcontrol, .put = rt1011_r0_load_mode_put \ } -static const char * const rt1011_i2s_ref[] = { - "None", "Left Channel", "Right Channel" -}; - -static SOC_ENUM_SINGLE_DECL(rt1011_i2s_ref_enum, 0, 0, - rt1011_i2s_ref); - -static int rt1011_i2s_ref_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = - snd_soc_kcontrol_component(kcontrol); - struct rt1011_priv *rt1011 = - snd_soc_component_get_drvdata(component); - - rt1011->i2s_ref = ucontrol->value.enumerated.item[0]; - switch (rt1011->i2s_ref) { - case RT1011_I2S_REF_LEFT_CH: - regmap_write(rt1011->regmap, RT1011_TDM_TOTAL_SET, 0x0240); - regmap_write(rt1011->regmap, RT1011_TDM1_SET_2, 0x8); - regmap_write(rt1011->regmap, RT1011_TDM1_SET_1, 0x1022); - regmap_write(rt1011->regmap, RT1011_ADCDAT_OUT_SOURCE, 0x4); - break; - case RT1011_I2S_REF_RIGHT_CH: - regmap_write(rt1011->regmap, RT1011_TDM_TOTAL_SET, 0x0240); - regmap_write(rt1011->regmap, RT1011_TDM1_SET_2, 0x8); - regmap_write(rt1011->regmap, RT1011_TDM1_SET_1, 0x10a2); - regmap_write(rt1011->regmap, RT1011_ADCDAT_OUT_SOURCE, 0x4); - break; - default: - dev_info(component->dev, "I2S Reference: Do nothing\n"); - } - - return 0; -} - -static int rt1011_i2s_ref_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = - snd_soc_kcontrol_component(kcontrol); - struct rt1011_priv *rt1011 = - snd_soc_component_get_drvdata(component); - - ucontrol->value.enumerated.item[0] = rt1011->i2s_ref; - - return 0; -} - static const struct snd_kcontrol_new rt1011_snd_controls[] = { /* I2S Data In Selection */ SOC_ENUM("DIN Source", rt1011_din_source_enum), @@ -1398,9 +1349,6 @@ static const struct snd_kcontrol_new rt1011_snd_controls[] = { /* R0 temperature */ SOC_SINGLE("R0 Temperature", RT1011_STP_INITIAL_RESISTANCE_TEMP, 2, 255, 0), - /* I2S Reference */ - SOC_ENUM_EXT("I2S Reference", rt1011_i2s_ref_enum, - rt1011_i2s_ref_get, rt1011_i2s_ref_put), }; static int rt1011_is_sys_clk_from_pll(struct snd_soc_dapm_widget *source, @@ -2059,7 +2007,6 @@ static int rt1011_probe(struct snd_soc_component *component) schedule_work(&rt1011->cali_work); - rt1011->i2s_ref = 0; rt1011->bq_drc_params = devm_kcalloc(component->dev, RT1011_ADVMODE_NUM, sizeof(struct rt1011_bq_drc_params *), GFP_KERNEL); diff --git a/sound/soc/codecs/rt1011.h b/sound/soc/codecs/rt1011.h index 4d6e7492d9..68fadc15fa 100644 --- a/sound/soc/codecs/rt1011.h +++ b/sound/soc/codecs/rt1011.h @@ -654,12 +654,6 @@ enum { RT1011_AIFS }; -enum { - RT1011_I2S_REF_NONE, - RT1011_I2S_REF_LEFT_CH, - RT1011_I2S_REF_RIGHT_CH, -}; - /* BiQual & DRC related settings */ #define RT1011_BQ_DRC_NUM 128 struct rt1011_bq_drc_params { @@ -698,7 +692,6 @@ struct rt1011_priv { unsigned int r0_reg, cali_done; unsigned int r0_calib, temperature_calib; int recv_spk_mode; - int i2s_ref; }; #endif /* end of _RT1011_H_ */ diff --git a/sound/soc/codecs/rt1015.c b/sound/soc/codecs/rt1015.c index 6a27dfacd8..c0c5952cdf 100644 --- a/sound/soc/codecs/rt1015.c +++ b/sound/soc/codecs/rt1015.c @@ -864,7 +864,7 @@ static int rt1015_set_component_pll(struct snd_soc_component *component, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt1016.c b/sound/soc/codecs/rt1016.c index 9845cdddcb..7561d20227 100644 --- a/sound/soc/codecs/rt1016.c +++ b/sound/soc/codecs/rt1016.c @@ -490,7 +490,7 @@ static int rt1016_set_component_pll(struct snd_soc_component *component, ret = rl6231_pll_calc(freq_in, freq_out * 4, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt1019.c b/sound/soc/codecs/rt1019.c index 80b7ca0e4e..8c0b00242b 100644 --- a/sound/soc/codecs/rt1019.c +++ b/sound/soc/codecs/rt1019.c @@ -359,7 +359,7 @@ static int rt1019_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int source, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt1305.c b/sound/soc/codecs/rt1305.c index a9c473537a..7a0094578e 100644 --- a/sound/soc/codecs/rt1305.c +++ b/sound/soc/codecs/rt1305.c @@ -841,7 +841,7 @@ static int rt1305_set_component_pll(struct snd_soc_component *component, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt1308-sdw.c b/sound/soc/codecs/rt1308-sdw.c index 149a76075c..f716668de6 100644 --- a/sound/soc/codecs/rt1308-sdw.c +++ b/sound/soc/codecs/rt1308-sdw.c @@ -613,7 +613,7 @@ static const struct snd_soc_component_driver soc_component_sdw_rt1308 = { static const struct snd_soc_dai_ops rt1308_aif_dai_ops = { .hw_params = rt1308_sdw_hw_params, .hw_free = rt1308_sdw_pcm_hw_free, - .set_stream = rt1308_set_sdw_stream, + .set_sdw_stream = rt1308_set_sdw_stream, .shutdown = rt1308_sdw_shutdown, .set_tdm_slot = rt1308_sdw_set_tdm_slot, }; diff --git a/sound/soc/codecs/rt1308.c b/sound/soc/codecs/rt1308.c index c555b77b3c..b4e5546e2e 100644 --- a/sound/soc/codecs/rt1308.c +++ b/sound/soc/codecs/rt1308.c @@ -664,7 +664,7 @@ static int rt1308_set_component_pll(struct snd_soc_component *component, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt1316-sdw.c b/sound/soc/codecs/rt1316-sdw.c index c66d7b20cb..09b4914bba 100644 --- a/sound/soc/codecs/rt1316-sdw.c +++ b/sound/soc/codecs/rt1316-sdw.c @@ -602,7 +602,7 @@ static const struct snd_soc_component_driver soc_component_sdw_rt1316 = { static const struct snd_soc_dai_ops rt1316_aif_dai_ops = { .hw_params = rt1316_sdw_hw_params, .hw_free = rt1316_sdw_pcm_hw_free, - .set_stream = rt1316_set_sdw_stream, + .set_sdw_stream = rt1316_set_sdw_stream, .shutdown = rt1316_sdw_shutdown, }; diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 577680df70..4b1ad5054e 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -936,7 +936,7 @@ static int rt5514_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int source, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt5616.c b/sound/soc/codecs/rt5616.c index 8e6414468a..fd0d3a08e9 100644 --- a/sound/soc/codecs/rt5616.c +++ b/sound/soc/codecs/rt5616.c @@ -1133,7 +1133,7 @@ static int rt5616_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int source, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index e7a82565b9..cd1db5caab 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -195,7 +195,6 @@ static bool rt5640_volatile_register(struct device *dev, unsigned int reg) case RT5640_PRIV_DATA: case RT5640_PGM_REG_ARR1: case RT5640_PGM_REG_ARR3: - case RT5640_DUMMY2: case RT5640_VENDOR_ID: case RT5640_VENDOR_ID1: case RT5640_VENDOR_ID2: @@ -1910,7 +1909,7 @@ static int rt5640_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int source, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } @@ -1973,7 +1972,7 @@ static int rt5640_set_bias_level(struct snd_soc_component *component, RT5640_PWR_FV1 | RT5640_PWR_FV2, RT5640_PWR_FV1 | RT5640_PWR_FV2); snd_soc_component_update_bits(component, RT5640_DUMMY1, - 0x1, 0x1); + 0x0301, 0x0301); snd_soc_component_update_bits(component, RT5640_MICBIAS, 0x0030, 0x0030); } @@ -2160,11 +2159,7 @@ static bool rt5640_jack_inserted(struct snd_soc_component *component) struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component); int val; - if (rt5640->jd_gpio) - val = gpiod_get_value(rt5640->jd_gpio) ? RT5640_JD_STATUS : 0; - else - val = snd_soc_component_read(component, RT5640_INT_IRQ_ST); - + val = snd_soc_component_read(component, RT5640_INT_IRQ_ST); dev_dbg(component->dev, "irq status %#04x\n", val); if (rt5640->jd_inverted) @@ -2302,42 +2297,10 @@ EXPORT_SYMBOL_GPL(rt5640_detect_headset); static void rt5640_jack_work(struct work_struct *work) { struct rt5640_priv *rt5640 = - container_of(work, struct rt5640_priv, jack_work.work); + container_of(work, struct rt5640_priv, jack_work); struct snd_soc_component *component = rt5640->component; int status; - if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) { - int val, jack_type = 0, hda_mic_plugged, hda_hp_plugged; - - /* mic jack */ - val = snd_soc_component_read(component, RT5640_INT_IRQ_ST); - hda_mic_plugged = !(val & RT5640_JD_STATUS); - dev_dbg(component->dev, "mic jack status %d\n", - hda_mic_plugged); - - snd_soc_component_update_bits(component, RT5640_IRQ_CTRL1, - RT5640_JD_P_MASK, !hda_mic_plugged << RT5640_JD_P_SFT); - - if (hda_mic_plugged) - jack_type |= SND_JACK_MICROPHONE; - - /* headphone jack */ - val = snd_soc_component_read(component, RT5640_DUMMY2); - hda_hp_plugged = !(val & (0x1 << 11)); - dev_dbg(component->dev, "headphone jack status %d\n", - hda_hp_plugged); - - snd_soc_component_update_bits(component, RT5640_DUMMY2, - (0x1 << 10), !hda_hp_plugged << 10); - - if (hda_hp_plugged) - jack_type |= SND_JACK_HEADPHONE; - - snd_soc_jack_report(rt5640->jack, jack_type, SND_JACK_HEADSET); - - return; - } - if (!rt5640_jack_inserted(component)) { /* Jack removed, or spurious IRQ? */ if (rt5640->jack->status & SND_JACK_HEADPHONE) { @@ -2385,7 +2348,7 @@ static void rt5640_jack_work(struct work_struct *work) * disabled the OVCD IRQ, the IRQ pin will stay high and as * we react to edges, we miss the unplug event -> recheck. */ - queue_delayed_work(system_long_wq, &rt5640->jack_work, 0); + queue_work(system_long_wq, &rt5640->jack_work); } } @@ -2394,17 +2357,7 @@ static irqreturn_t rt5640_irq(int irq, void *data) struct rt5640_priv *rt5640 = data; if (rt5640->jack) - queue_delayed_work(system_long_wq, &rt5640->jack_work, 0); - - return IRQ_HANDLED; -} - -static irqreturn_t rt5640_jd_gpio_irq(int irq, void *data) -{ - struct rt5640_priv *rt5640 = data; - - queue_delayed_work(system_long_wq, &rt5640->jack_work, - msecs_to_jiffies(JACK_SETTLE_TIME)); + queue_work(system_long_wq, &rt5640->jack_work); return IRQ_HANDLED; } @@ -2413,7 +2366,7 @@ static void rt5640_cancel_work(void *data) { struct rt5640_priv *rt5640 = data; - cancel_delayed_work_sync(&rt5640->jack_work); + cancel_work_sync(&rt5640->jack_work); cancel_delayed_work_sync(&rt5640->bp_work); } @@ -2453,12 +2406,7 @@ static void rt5640_disable_jack_detect(struct snd_soc_component *component) if (!rt5640->jack) return; - if (rt5640->jd_gpio_irq_requested) - free_irq(rt5640->jd_gpio_irq, rt5640); - - if (rt5640->irq_requested) - free_irq(rt5640->irq, rt5640); - + free_irq(rt5640->irq, rt5640); rt5640_cancel_work(rt5640); if (rt5640->jack->status & SND_JACK_MICROPHONE) { @@ -2467,15 +2415,11 @@ static void rt5640_disable_jack_detect(struct snd_soc_component *component) snd_soc_jack_report(rt5640->jack, 0, SND_JACK_BTN_0); } - rt5640->jd_gpio_irq_requested = false; - rt5640->irq_requested = false; - rt5640->jd_gpio = NULL; rt5640->jack = NULL; } static void rt5640_enable_jack_detect(struct snd_soc_component *component, - struct snd_soc_jack *jack, - struct rt5640_set_jack_data *jack_data) + struct snd_soc_jack *jack) { struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component); int ret; @@ -2519,90 +2463,28 @@ static void rt5640_enable_jack_detect(struct snd_soc_component *component, rt5640_enable_micbias1_ovcd_irq(component); } - if (jack_data && jack_data->codec_irq_override) - rt5640->irq = jack_data->codec_irq_override; - - if (jack_data && jack_data->jd_gpio) { - rt5640->jd_gpio = jack_data->jd_gpio; - rt5640->jd_gpio_irq = gpiod_to_irq(rt5640->jd_gpio); - - ret = request_irq(rt5640->jd_gpio_irq, rt5640_jd_gpio_irq, - IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, - "rt5640-jd-gpio", rt5640); - if (ret) { - dev_warn(component->dev, "Failed to request jd GPIO IRQ %d: %d\n", - rt5640->jd_gpio_irq, ret); - rt5640_disable_jack_detect(component); - return; - } - rt5640->jd_gpio_irq_requested = true; - } - ret = request_irq(rt5640->irq, rt5640_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "rt5640", rt5640); if (ret) { dev_warn(component->dev, "Failed to reguest IRQ %d: %d\n", rt5640->irq, ret); + rt5640->irq = -ENXIO; + /* Undo above settings */ rt5640_disable_jack_detect(component); return; } - rt5640->irq_requested = true; /* sync initial jack state */ - queue_delayed_work(system_long_wq, &rt5640->jack_work, 0); -} - -static void rt5640_enable_hda_jack_detect( - struct snd_soc_component *component, struct snd_soc_jack *jack) -{ - struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component); - int ret; - - /* Select JD1 for Mic */ - snd_soc_component_update_bits(component, RT5640_JD_CTRL, - RT5640_JD_MASK, RT5640_JD_JD1_IN4P); - snd_soc_component_write(component, RT5640_IRQ_CTRL1, RT5640_IRQ_JD_NOR); - - /* Select JD2 for Headphone */ - snd_soc_component_update_bits(component, RT5640_DUMMY2, 0x1100, 0x1100); - - /* Selecting GPIO01 as an interrupt */ - snd_soc_component_update_bits(component, RT5640_GPIO_CTRL1, - RT5640_GP1_PIN_MASK, RT5640_GP1_PIN_IRQ); - - /* Set GPIO1 output */ - snd_soc_component_update_bits(component, RT5640_GPIO_CTRL3, - RT5640_GP1_PF_MASK, RT5640_GP1_PF_OUT); - - snd_soc_component_update_bits(component, RT5640_DUMMY1, 0x400, 0x0); - - rt5640->jack = jack; - - ret = request_irq(rt5640->irq, rt5640_irq, - IRQF_TRIGGER_RISING | IRQF_ONESHOT, "rt5640", rt5640); - if (ret) { - dev_warn(component->dev, "Failed to reguest IRQ %d: %d\n", rt5640->irq, ret); - rt5640->irq = -ENXIO; - return; - } - - /* sync initial jack state */ - queue_delayed_work(system_long_wq, &rt5640->jack_work, 0); + queue_work(system_long_wq, &rt5640->jack_work); } static int rt5640_set_jack(struct snd_soc_component *component, struct snd_soc_jack *jack, void *data) { - struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component); - - if (jack) { - if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) - rt5640_enable_hda_jack_detect(component, jack); - else - rt5640_enable_jack_detect(component, jack, data); - } else { + if (jack) + rt5640_enable_jack_detect(component, jack); + else rt5640_disable_jack_detect(component); - } return 0; } @@ -2692,16 +2574,11 @@ static int rt5640_probe(struct snd_soc_component *component) if (device_property_read_u32(component->dev, "realtek,jack-detect-source", &val) == 0) { - if (val <= RT5640_JD_SRC_GPIO4) { + if (val <= RT5640_JD_SRC_GPIO4) rt5640->jd_src = val << RT5640_JD_SFT; - } else if (val == RT5640_JD_SRC_HDA_HEADER) { - rt5640->jd_src = RT5640_JD_SRC_HDA_HEADER; - snd_soc_component_update_bits(component, RT5640_DUMMY1, - 0x0300, 0x0); - } else { + else dev_warn(component->dev, "Warning: Invalid jack-detect-source value: %d, leaving jack-detect disabled\n", val); - } } if (!device_property_read_bool(component->dev, "realtek,jack-detect-not-inverted")) @@ -2755,7 +2632,6 @@ static int rt5640_suspend(struct snd_soc_component *component) { struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component); - rt5640_cancel_work(rt5640); snd_soc_component_force_bias_level(component, SND_SOC_BIAS_OFF); rt5640_reset(component); regcache_cache_only(rt5640->regmap, true); @@ -2778,17 +2654,6 @@ static int rt5640_resume(struct snd_soc_component *component) regcache_cache_only(rt5640->regmap, false); regcache_sync(rt5640->regmap); - if (rt5640->jack) { - if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) - snd_soc_component_update_bits(component, - RT5640_DUMMY2, 0x1100, 0x1100); - else - snd_soc_component_write(component, RT5640_DUMMY2, - 0x4001); - - queue_delayed_work(system_long_wq, &rt5640->jack_work, 0); - } - return 0; } #else @@ -2991,7 +2856,7 @@ static int rt5640_i2c_probe(struct i2c_client *i2c, rt5640->hp_mute = true; rt5640->irq = i2c->irq; INIT_DELAYED_WORK(&rt5640->bp_work, rt5640_button_press_work); - INIT_DELAYED_WORK(&rt5640->jack_work, rt5640_jack_work); + INIT_WORK(&rt5640->jack_work, rt5640_jack_work); /* Make sure work is stopped on probe-error / remove */ ret = devm_add_action_or_reset(&i2c->dev, rt5640_cancel_work, rt5640); diff --git a/sound/soc/codecs/rt5640.h b/sound/soc/codecs/rt5640.h index 9e49b9a0cc..2c28f83e33 100644 --- a/sound/soc/codecs/rt5640.h +++ b/sound/soc/codecs/rt5640.h @@ -2124,7 +2124,6 @@ struct rt5640_priv { int ldo1_en; /* GPIO for LDO1_EN */ int irq; - int jd_gpio_irq; int sysclk; int sysclk_src; int lrck[RT5640_AIFS]; @@ -2137,8 +2136,6 @@ struct rt5640_priv { bool hp_mute; bool asrc_en; - bool irq_requested; - bool jd_gpio_irq_requested; /* Jack and button detect data */ bool ovcd_irq_enabled; @@ -2148,20 +2145,14 @@ struct rt5640_priv { int release_count; int poll_count; struct delayed_work bp_work; - struct delayed_work jack_work; + struct work_struct jack_work; struct snd_soc_jack *jack; - struct gpio_desc *jd_gpio; unsigned int jd_src; bool jd_inverted; unsigned int ovcd_th; unsigned int ovcd_sf; }; -struct rt5640_set_jack_data { - int codec_irq_override; - struct gpio_desc *jd_gpio; -}; - int rt5640_dmic_enable(struct snd_soc_component *component, bool dmic1_data_pin, bool dmic2_data_pin); int rt5640_sel_asrc_clk_src(struct snd_soc_component *component, diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 197c560479..9408ee63cb 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -2969,7 +2969,7 @@ static int rt5645_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int source, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt5651.c b/sound/soc/codecs/rt5651.c index f302c25688..93820561b9 100644 --- a/sound/soc/codecs/rt5651.c +++ b/sound/soc/codecs/rt5651.c @@ -1487,7 +1487,7 @@ static int rt5651_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int source, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt5659.c b/sound/soc/codecs/rt5659.c index e1503c2eee..4a50b169fe 100644 --- a/sound/soc/codecs/rt5659.c +++ b/sound/soc/codecs/rt5659.c @@ -3509,7 +3509,7 @@ static int rt5659_set_component_pll(struct snd_soc_component *component, int pll ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt5660.c b/sound/soc/codecs/rt5660.c index 3b50fb2986..33ff915635 100644 --- a/sound/soc/codecs/rt5660.c +++ b/sound/soc/codecs/rt5660.c @@ -1046,7 +1046,7 @@ static int rt5660_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int source, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c index 2138f62e6a..ee09ccd448 100644 --- a/sound/soc/codecs/rt5663.c +++ b/sound/soc/codecs/rt5663.c @@ -2941,7 +2941,7 @@ static int rt5663_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int source, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c index 33e889802f..e59323fd5b 100644 --- a/sound/soc/codecs/rt5665.c +++ b/sound/soc/codecs/rt5665.c @@ -4374,7 +4374,7 @@ static int rt5665_set_component_pll(struct snd_soc_component *component, int pll ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt5668.c b/sound/soc/codecs/rt5668.c index 5b12cbf2ba..1186ceb5a9 100644 --- a/sound/soc/codecs/rt5668.c +++ b/sound/soc/codecs/rt5668.c @@ -2173,7 +2173,7 @@ static int rt5668_set_component_pll(struct snd_soc_component *component, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index ce7684752b..ecbaf129a6 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -2577,7 +2577,7 @@ static int rt5670_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int source, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index 4a8c267d4f..f655228c8c 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -4557,7 +4557,7 @@ static int rt5677_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int source, ret = rt5677_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", freq_in); + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c index 20fc0f3766..b9d5d7a097 100644 --- a/sound/soc/codecs/rt5682-i2c.c +++ b/sound/soc/codecs/rt5682-i2c.c @@ -59,12 +59,18 @@ static void rt5682_jd_check_handler(struct work_struct *work) struct rt5682_priv *rt5682 = container_of(work, struct rt5682_priv, jd_check_work.work); - if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) & RT5682_JDH_RS_MASK) + if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) + & RT5682_JDH_RS_MASK) { /* jack out */ - mod_delayed_work(system_power_efficient_wq, - &rt5682->jack_detect_work, 0); - else + rt5682->jack_type = rt5682_headset_detect(rt5682->component, 0); + + snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type, + SND_JACK_HEADSET | + SND_JACK_BTN_0 | SND_JACK_BTN_1 | + SND_JACK_BTN_2 | SND_JACK_BTN_3); + } else { schedule_delayed_work(&rt5682->jd_check_work, 500); + } } static irqreturn_t rt5682_irq(int irq, void *data) @@ -133,8 +139,6 @@ static int rt5682_i2c_probe(struct i2c_client *i2c, i2c_set_clientdata(i2c, rt5682); - rt5682->i2c_dev = &i2c->dev; - rt5682->pdata = i2s_default_platform_data; if (pdata) @@ -272,21 +276,6 @@ static int rt5682_i2c_probe(struct i2c_client *i2c, dev_err(&i2c->dev, "Failed to reguest IRQ: %d\n", ret); } -#ifdef CONFIG_COMMON_CLK - /* Check if MCLK provided */ - rt5682->mclk = devm_clk_get_optional(&i2c->dev, "mclk"); - if (IS_ERR(rt5682->mclk)) - return PTR_ERR(rt5682->mclk); - - /* Register CCF DAI clock control */ - ret = rt5682_register_dai_clks(rt5682); - if (ret) - return ret; - - /* Initial setup for CCF */ - rt5682->lrck[RT5682_AIF1] = 48000; -#endif - return devm_snd_soc_register_component(&i2c->dev, &rt5682_soc_component_dev, rt5682_dai, ARRAY_SIZE(rt5682_dai)); diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c index 248257a2e4..31a4f28604 100644 --- a/sound/soc/codecs/rt5682-sdw.c +++ b/sound/soc/codecs/rt5682-sdw.c @@ -272,7 +272,7 @@ static int rt5682_sdw_hw_free(struct snd_pcm_substream *substream, static const struct snd_soc_dai_ops rt5682_sdw_ops = { .hw_params = rt5682_sdw_hw_params, .hw_free = rt5682_sdw_hw_free, - .set_stream = rt5682_set_sdw_stream, + .set_sdw_stream = rt5682_set_sdw_stream, .shutdown = rt5682_sdw_shutdown, }; diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index be68d573a4..6ad3159ece 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -48,8 +48,6 @@ static const struct reg_sequence patch_list[] = { {RT5682_SAR_IL_CMD_6, 0x0110}, {RT5682_CHARGE_PUMP_1, 0x0210}, {RT5682_HP_LOGIC_CTRL_2, 0x0007}, - {RT5682_SAR_IL_CMD_2, 0xac00}, - {RT5682_CBJ_CTRL_7, 0x0104}, }; void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev) @@ -922,13 +920,15 @@ static void rt5682_enable_push_button_irq(struct snd_soc_component *component, * * Returns detect status. */ -static int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) +int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) { struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); struct snd_soc_dapm_context *dapm = &component->dapm; unsigned int val, count; if (jack_insert) { + snd_soc_dapm_mutex_lock(dapm); + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_VREF2 | RT5682_PWR_MB, RT5682_PWR_VREF2 | RT5682_PWR_MB); @@ -942,10 +942,6 @@ static int rt5682_headset_detect(struct snd_soc_component *component, int jack_i snd_soc_component_update_bits(component, RT5682_HP_CHARGE_PUMP_1, RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, 0); - rt5682_enable_push_button_irq(component, false); - snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, - RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW); - usleep_range(55000, 60000); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_HIGH); @@ -979,6 +975,8 @@ static int rt5682_headset_detect(struct snd_soc_component *component, int jack_i snd_soc_component_update_bits(component, RT5682_MICBIAS_2, RT5682_PWR_CLK25M_MASK | RT5682_PWR_CLK1M_MASK, RT5682_PWR_CLK25M_PU | RT5682_PWR_CLK1M_PU); + + snd_soc_dapm_mutex_unlock(dapm); } else { rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, @@ -1007,6 +1005,7 @@ static int rt5682_headset_detect(struct snd_soc_component *component, int jack_i dev_dbg(component->dev, "jack_type = %d\n", rt5682->jack_type); return rt5682->jack_type; } +EXPORT_SYMBOL_GPL(rt5682_headset_detect); static int rt5682_set_jack_detect(struct snd_soc_component *component, struct snd_soc_jack *hs_jack, void *data) @@ -1089,7 +1088,6 @@ void rt5682_jack_detect_handler(struct work_struct *work) { struct rt5682_priv *rt5682 = container_of(work, struct rt5682_priv, jack_detect_work.work); - struct snd_soc_dapm_context *dapm; int val, btn_type; if (!rt5682->component || !rt5682->component->card || @@ -1100,9 +1098,6 @@ void rt5682_jack_detect_handler(struct work_struct *work) return; } - dapm = snd_soc_component_get_dapm(rt5682->component); - - snd_soc_dapm_mutex_lock(dapm); mutex_lock(&rt5682->calibrate_mutex); val = snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) @@ -1162,9 +1157,6 @@ void rt5682_jack_detect_handler(struct work_struct *work) rt5682->irq_work_delay_time = 50; } - mutex_unlock(&rt5682->calibrate_mutex); - snd_soc_dapm_mutex_unlock(dapm); - snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type, SND_JACK_HEADSET | SND_JACK_BTN_0 | SND_JACK_BTN_1 | @@ -1177,6 +1169,8 @@ void rt5682_jack_detect_handler(struct work_struct *work) else cancel_delayed_work_sync(&rt5682->jd_check_work); } + + mutex_unlock(&rt5682->calibrate_mutex); } EXPORT_SYMBOL_GPL(rt5682_jack_detect_handler); @@ -2379,7 +2373,7 @@ static int rt5682_set_component_pll(struct snd_soc_component *component, pll2_fout1 = 3840000; ret = rl6231_pll_calc(freq_in, pll2_fout1, &pll2f_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } @@ -2391,7 +2385,7 @@ static int rt5682_set_component_pll(struct snd_soc_component *component, ret = rl6231_pll_calc(pll2_fout1, freq_out, &pll2b_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", + dev_err(component->dev, "Unsupport input clock %d\n", pll2_fout1); return ret; } @@ -2442,7 +2436,7 @@ static int rt5682_set_component_pll(struct snd_soc_component *component, ret = rl6231_pll_calc(freq_in, freq_out, &pll_code); if (ret < 0) { - dev_err(component->dev, "Unsupported input clock %d\n", + dev_err(component->dev, "Unsupport input clock %d\n", freq_in); return ret; } @@ -2562,7 +2556,7 @@ static int rt5682_set_bias_level(struct snd_soc_component *component, static bool rt5682_clk_check(struct rt5682_priv *rt5682) { if (!rt5682->master[RT5682_AIF1]) { - dev_dbg(rt5682->i2c_dev, "sysclk/dai not set correctly\n"); + dev_dbg(rt5682->component->dev, "sysclk/dai not set correctly\n"); return false; } return true; @@ -2573,15 +2567,13 @@ static int rt5682_wclk_prepare(struct clk_hw *hw) struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_WCLK_IDX]); - struct snd_soc_component *component; - struct snd_soc_dapm_context *dapm; + struct snd_soc_component *component = rt5682->component; + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); if (!rt5682_clk_check(rt5682)) return -EINVAL; - component = rt5682->component; - dapm = snd_soc_component_get_dapm(component); - snd_soc_dapm_mutex_lock(dapm); snd_soc_dapm_force_enable_pin_unlocked(dapm, "MICBIAS"); @@ -2611,15 +2603,13 @@ static void rt5682_wclk_unprepare(struct clk_hw *hw) struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_WCLK_IDX]); - struct snd_soc_component *component; - struct snd_soc_dapm_context *dapm; + struct snd_soc_component *component = rt5682->component; + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); if (!rt5682_clk_check(rt5682)) return; - component = rt5682->component; - dapm = snd_soc_component_get_dapm(component); - snd_soc_dapm_mutex_lock(dapm); snd_soc_dapm_disable_pin_unlocked(dapm, "MICBIAS"); @@ -2643,6 +2633,7 @@ static unsigned long rt5682_wclk_recalc_rate(struct clk_hw *hw, struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_WCLK_IDX]); + struct snd_soc_component *component = rt5682->component; const char * const clk_name = clk_hw_get_name(hw); if (!rt5682_clk_check(rt5682)) @@ -2652,7 +2643,7 @@ static unsigned long rt5682_wclk_recalc_rate(struct clk_hw *hw, */ if (rt5682->lrck[RT5682_AIF1] != CLK_48 && rt5682->lrck[RT5682_AIF1] != CLK_44) { - dev_warn(rt5682->i2c_dev, "%s: clk %s only support %d or %d Hz output\n", + dev_warn(component->dev, "%s: clk %s only support %d or %d Hz output\n", __func__, clk_name, CLK_44, CLK_48); return 0; } @@ -2666,6 +2657,7 @@ static long rt5682_wclk_round_rate(struct clk_hw *hw, unsigned long rate, struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_WCLK_IDX]); + struct snd_soc_component *component = rt5682->component; const char * const clk_name = clk_hw_get_name(hw); if (!rt5682_clk_check(rt5682)) @@ -2675,7 +2667,7 @@ static long rt5682_wclk_round_rate(struct clk_hw *hw, unsigned long rate, * It will force to 48kHz if not both. */ if (rate != CLK_48 && rate != CLK_44) { - dev_warn(rt5682->i2c_dev, "%s: clk %s only support %d or %d Hz output\n", + dev_warn(component->dev, "%s: clk %s only support %d or %d Hz output\n", __func__, clk_name, CLK_44, CLK_48); rate = CLK_48; } @@ -2689,7 +2681,7 @@ static int rt5682_wclk_set_rate(struct clk_hw *hw, unsigned long rate, struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_WCLK_IDX]); - struct snd_soc_component *component; + struct snd_soc_component *component = rt5682->component; struct clk_hw *parent_hw; const char * const clk_name = clk_hw_get_name(hw); int pre_div; @@ -2698,8 +2690,6 @@ static int rt5682_wclk_set_rate(struct clk_hw *hw, unsigned long rate, if (!rt5682_clk_check(rt5682)) return -EINVAL; - component = rt5682->component; - /* * Whether the wclk's parent clk (mclk) exists or not, please ensure * it is fixed or set to 48MHz before setting wclk rate. It's a @@ -2709,12 +2699,12 @@ static int rt5682_wclk_set_rate(struct clk_hw *hw, unsigned long rate, */ parent_hw = clk_hw_get_parent(hw); if (!parent_hw) - dev_warn(rt5682->i2c_dev, + dev_warn(component->dev, "Parent mclk of wclk not acquired in driver. Please ensure mclk was provided as %d Hz.\n", CLK_PLL2_FIN); if (parent_rate != CLK_PLL2_FIN) - dev_warn(rt5682->i2c_dev, "clk %s only support %d Hz input\n", + dev_warn(component->dev, "clk %s only support %d Hz input\n", clk_name, CLK_PLL2_FIN); /* @@ -2746,9 +2736,10 @@ static unsigned long rt5682_bclk_recalc_rate(struct clk_hw *hw, struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_BCLK_IDX]); + struct snd_soc_component *component = rt5682->component; unsigned int bclks_per_wclk; - regmap_read(rt5682->regmap, RT5682_TDM_TCON_CTRL, &bclks_per_wclk); + bclks_per_wclk = snd_soc_component_read(component, RT5682_TDM_TCON_CTRL); switch (bclks_per_wclk & RT5682_TDM_BCLK_MS1_MASK) { case RT5682_TDM_BCLK_MS1_256: @@ -2809,22 +2800,20 @@ static int rt5682_bclk_set_rate(struct clk_hw *hw, unsigned long rate, struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_BCLK_IDX]); - struct snd_soc_component *component; + struct snd_soc_component *component = rt5682->component; struct snd_soc_dai *dai; unsigned long factor; if (!rt5682_clk_check(rt5682)) return -EINVAL; - component = rt5682->component; - factor = rt5682_bclk_get_factor(rate, parent_rate); for_each_component_dais(component, dai) if (dai->id == RT5682_AIF1) break; if (!dai) { - dev_err(rt5682->i2c_dev, "dai %d not found in component\n", + dev_err(component->dev, "dai %d not found in component\n", RT5682_AIF1); return -ENODEV; } @@ -2847,15 +2836,17 @@ static const struct clk_ops rt5682_dai_clk_ops[RT5682_DAI_NUM_CLKS] = { }, }; -int rt5682_register_dai_clks(struct rt5682_priv *rt5682) +static int rt5682_register_dai_clks(struct snd_soc_component *component) { - struct device *dev = rt5682->i2c_dev; + struct device *dev = component->dev; + struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); struct rt5682_platform_data *pdata = &rt5682->pdata; struct clk_hw *dai_clk_hw; int i, ret; for (i = 0; i < RT5682_DAI_NUM_CLKS; ++i) { struct clk_init_data init = { }; + struct clk_parent_data parent_data; const struct clk_hw *parent; dai_clk_hw = &rt5682->dai_clks_hw[i]; @@ -2864,8 +2855,10 @@ int rt5682_register_dai_clks(struct rt5682_priv *rt5682) case RT5682_DAI_WCLK_IDX: /* Make MCLK the parent of WCLK */ if (rt5682->mclk) { - parent = __clk_get_hw(rt5682->mclk); - init.parent_hws = &parent; + parent_data = (struct clk_parent_data){ + .fw_name = "mclk", + }; + init.parent_data = &parent_data; init.num_parents = 1; } break; @@ -2906,7 +2899,6 @@ int rt5682_register_dai_clks(struct rt5682_priv *rt5682) return 0; } -EXPORT_SYMBOL_GPL(rt5682_register_dai_clks); #endif /* CONFIG_COMMON_CLK */ static int rt5682_probe(struct snd_soc_component *component) @@ -2916,6 +2908,9 @@ static int rt5682_probe(struct snd_soc_component *component) unsigned long time; struct snd_soc_dapm_context *dapm = &component->dapm; +#ifdef CONFIG_COMMON_CLK + int ret; +#endif rt5682->component = component; if (rt5682->is_sdw) { @@ -2927,6 +2922,26 @@ static int rt5682_probe(struct snd_soc_component *component) dev_err(&slave->dev, "Initialization not complete, timed out\n"); return -ETIMEDOUT; } + } else { +#ifdef CONFIG_COMMON_CLK + /* Check if MCLK provided */ + rt5682->mclk = devm_clk_get(component->dev, "mclk"); + if (IS_ERR(rt5682->mclk)) { + if (PTR_ERR(rt5682->mclk) != -ENOENT) { + ret = PTR_ERR(rt5682->mclk); + return ret; + } + rt5682->mclk = NULL; + } + + /* Register CCF DAI clock control */ + ret = rt5682_register_dai_clks(component); + if (ret) + return ret; + + /* Initial setup for CCF */ + rt5682->lrck[RT5682_AIF1] = CLK_48; +#endif } snd_soc_dapm_disable_pin(dapm, "MICBIAS"); @@ -2953,7 +2968,10 @@ static int rt5682_suspend(struct snd_soc_component *component) cancel_delayed_work_sync(&rt5682->jack_detect_work); cancel_delayed_work_sync(&rt5682->jd_check_work); - if (rt5682->hs_jack && (rt5682->jack_type & SND_JACK_HEADSET) == SND_JACK_HEADSET) { + if (rt5682->hs_jack && rt5682->jack_type == SND_JACK_HEADSET) { + snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, + RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK, + RT5682_CTRL_MB1_REG | RT5682_CTRL_MB2_REG); val = snd_soc_component_read(component, RT5682_CBJ_CTRL_2) & RT5682_JACK_TYPE_MASK; @@ -2975,17 +2993,10 @@ static int rt5682_suspend(struct snd_soc_component *component) /* enter SAR ADC power saving mode */ snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1, RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK | - RT5682_SAR_SEL_MB1_MB2_MASK, 0); - usleep_range(5000, 6000); - snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, - RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK, - RT5682_CTRL_MB1_REG | RT5682_CTRL_MB2_REG); - usleep_range(10000, 12000); + RT5682_SAR_BUTDET_RST_MASK | RT5682_SAR_SEL_MB1_MB2_MASK, 0); snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1, - RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK, - RT5682_SAR_BUTT_DET_EN | RT5682_SAR_BUTDET_POW_SAV); - snd_soc_component_update_bits(component, RT5682_HP_CHARGE_PUMP_1, - RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, 0); + RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK | RT5682_SAR_BUTDET_RST_MASK, + RT5682_SAR_BUTT_DET_EN | RT5682_SAR_BUTDET_POW_SAV | RT5682_SAR_BUTDET_RST_NORMAL); } regcache_cache_only(rt5682->regmap, true); @@ -3003,11 +3014,10 @@ static int rt5682_resume(struct snd_soc_component *component) regcache_cache_only(rt5682->regmap, false); regcache_sync(rt5682->regmap); - if (rt5682->hs_jack && (rt5682->jack_type & SND_JACK_HEADSET) == SND_JACK_HEADSET) { + if (rt5682->hs_jack && rt5682->jack_type == SND_JACK_HEADSET) { snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1, RT5682_SAR_BUTDET_MODE_MASK | RT5682_SAR_SEL_MB1_MB2_MASK, RT5682_SAR_BUTDET_POW_NORM | RT5682_SAR_SEL_MB1_MB2_AUTO); - usleep_range(5000, 6000); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK, RT5682_CTRL_MB1_FSM | RT5682_CTRL_MB2_FSM); @@ -3015,9 +3025,8 @@ static int rt5682_resume(struct snd_soc_component *component) RT5682_PWR_CBJ, RT5682_PWR_CBJ); } - rt5682->jack_type = 0; mod_delayed_work(system_power_efficient_wq, - &rt5682->jack_detect_work, msecs_to_jiffies(0)); + &rt5682->jack_detect_work, msecs_to_jiffies(250)); return 0; } diff --git a/sound/soc/codecs/rt5682.h b/sound/soc/codecs/rt5682.h index 52ff0d9c36..8e3244a62c 100644 --- a/sound/soc/codecs/rt5682.h +++ b/sound/soc/codecs/rt5682.h @@ -1428,7 +1428,6 @@ enum { struct rt5682_priv { struct snd_soc_component *component; - struct device *i2c_dev; struct rt5682_platform_data pdata; struct regmap *regmap; struct regmap *sdw_regmap; @@ -1472,6 +1471,7 @@ int rt5682_sel_asrc_clk_src(struct snd_soc_component *component, void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev); +int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert); void rt5682_jack_detect_handler(struct work_struct *work); bool rt5682_volatile_register(struct device *dev, unsigned int reg); @@ -1482,8 +1482,6 @@ void rt5682_calibrate(struct rt5682_priv *rt5682); void rt5682_reset(struct rt5682_priv *rt5682); int rt5682_parse_dt(struct rt5682_priv *rt5682, struct device *dev); -int rt5682_register_dai_clks(struct rt5682_priv *rt5682); - #define RT5682_REG_NUM 318 extern const struct reg_default rt5682_reg[RT5682_REG_NUM]; diff --git a/sound/soc/codecs/rt700.c b/sound/soc/codecs/rt700.c index e61a8257bf..921382724f 100644 --- a/sound/soc/codecs/rt700.c +++ b/sound/soc/codecs/rt700.c @@ -1005,7 +1005,7 @@ static int rt700_pcm_hw_free(struct snd_pcm_substream *substream, static const struct snd_soc_dai_ops rt700_ops = { .hw_params = rt700_pcm_hw_params, .hw_free = rt700_pcm_hw_free, - .set_stream = rt700_set_sdw_stream, + .set_sdw_stream = rt700_set_sdw_stream, .shutdown = rt700_shutdown, }; diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index bdb1375f03..2e992589f1 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -1358,7 +1358,7 @@ static int rt711_sdca_pcm_hw_free(struct snd_pcm_substream *substream, static const struct snd_soc_dai_ops rt711_sdca_ops = { .hw_params = rt711_sdca_pcm_hw_params, .hw_free = rt711_sdca_pcm_hw_free, - .set_stream = rt711_sdca_set_sdw_stream, + .set_sdw_stream = rt711_sdca_set_sdw_stream, .shutdown = rt711_sdca_shutdown, }; diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index 6770825d03..a7c5608a0e 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -1089,7 +1089,7 @@ static int rt711_pcm_hw_free(struct snd_pcm_substream *substream, static const struct snd_soc_dai_ops rt711_ops = { .hw_params = rt711_pcm_hw_params, .hw_free = rt711_pcm_hw_free, - .set_stream = rt711_set_sdw_stream, + .set_sdw_stream = rt711_set_sdw_stream, .shutdown = rt711_shutdown, }; diff --git a/sound/soc/codecs/rt715-sdca.c b/sound/soc/codecs/rt715-sdca.c index bfa536bd71..66e166568c 100644 --- a/sound/soc/codecs/rt715-sdca.c +++ b/sound/soc/codecs/rt715-sdca.c @@ -938,7 +938,7 @@ static int rt715_sdca_pcm_hw_free(struct snd_pcm_substream *substream, static const struct snd_soc_dai_ops rt715_sdca_ops = { .hw_params = rt715_sdca_pcm_hw_params, .hw_free = rt715_sdca_pcm_hw_free, - .set_stream = rt715_sdca_set_sdw_stream, + .set_sdw_stream = rt715_sdca_set_sdw_stream, .shutdown = rt715_sdca_shutdown, }; diff --git a/sound/soc/codecs/rt715.c b/sound/soc/codecs/rt715.c index a64d11a747..1352869cc0 100644 --- a/sound/soc/codecs/rt715.c +++ b/sound/soc/codecs/rt715.c @@ -909,7 +909,7 @@ static int rt715_pcm_hw_free(struct snd_pcm_substream *substream, static const struct snd_soc_dai_ops rt715_ops = { .hw_params = rt715_pcm_hw_params, .hw_free = rt715_pcm_hw_free, - .set_stream = rt715_set_sdw_stream, + .set_sdw_stream = rt715_set_sdw_stream, .shutdown = rt715_shutdown, }; diff --git a/sound/soc/codecs/sdw-mockup.c b/sound/soc/codecs/sdw-mockup.c index 7c612aaf31..8ea13cfa9f 100644 --- a/sound/soc/codecs/sdw-mockup.c +++ b/sound/soc/codecs/sdw-mockup.c @@ -138,7 +138,7 @@ static int sdw_mockup_pcm_hw_free(struct snd_pcm_substream *substream, static const struct snd_soc_dai_ops sdw_mockup_ops = { .hw_params = sdw_mockup_pcm_hw_params, .hw_free = sdw_mockup_pcm_hw_free, - .set_stream = sdw_mockup_set_sdw_stream, + .set_sdw_stream = sdw_mockup_set_sdw_stream, .shutdown = sdw_mockup_shutdown, }; diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 8eebf27d0e..97bf1f2228 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1612,8 +1612,9 @@ static int sgtl5000_i2c_probe(struct i2c_client *client, if (ret == -ENOENT) ret = -EPROBE_DEFER; - dev_err_probe(&client->dev, ret, "Failed to get mclock\n"); - + if (ret != -EPROBE_DEFER) + dev_err(&client->dev, "Failed to get mclock: %d\n", + ret); goto disable_regs; } diff --git a/sound/soc/codecs/simple-amplifier.c b/sound/soc/codecs/simple-amplifier.c index d306c585b5..b30fc1f894 100644 --- a/sound/soc/codecs/simple-amplifier.c +++ b/sound/soc/codecs/simple-amplifier.c @@ -69,6 +69,7 @@ static int simple_amp_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct simple_amp *priv; + int err; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (priv == NULL) @@ -77,9 +78,12 @@ static int simple_amp_probe(struct platform_device *pdev) priv->gpiod_enable = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR(priv->gpiod_enable)) - return dev_err_probe(dev, PTR_ERR(priv->gpiod_enable), - "Failed to get 'enable' gpio"); + if (IS_ERR(priv->gpiod_enable)) { + err = PTR_ERR(priv->gpiod_enable); + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get 'enable' gpio: %d", err); + return err; + } return devm_snd_soc_register_component(dev, &simple_amp_component_driver, diff --git a/sound/soc/codecs/simple-mux.c b/sound/soc/codecs/simple-mux.c index d30c0d24d9..e0a09dadfa 100644 --- a/sound/soc/codecs/simple-mux.c +++ b/sound/soc/codecs/simple-mux.c @@ -82,6 +82,7 @@ static int simple_mux_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct simple_mux *priv; + int err; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -90,9 +91,12 @@ static int simple_mux_probe(struct platform_device *pdev) dev_set_drvdata(dev, priv); priv->gpiod_mux = devm_gpiod_get(dev, "mux", GPIOD_OUT_LOW); - if (IS_ERR(priv->gpiod_mux)) - return dev_err_probe(dev, PTR_ERR(priv->gpiod_mux), - "Failed to get 'mux' gpio"); + if (IS_ERR(priv->gpiod_mux)) { + err = PTR_ERR(priv->gpiod_mux); + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get 'mux' gpio: %d", err); + return err; + } return devm_snd_soc_register_component(dev, &simple_mux_component_driver, NULL, 0); } diff --git a/sound/soc/codecs/ssm2305.c b/sound/soc/codecs/ssm2305.c index 1d022643c3..2968959c4b 100644 --- a/sound/soc/codecs/ssm2305.c +++ b/sound/soc/codecs/ssm2305.c @@ -57,6 +57,7 @@ static int ssm2305_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ssm2305 *priv; + int err; /* Allocate the private data */ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -68,9 +69,13 @@ static int ssm2305_probe(struct platform_device *pdev) /* Get shutdown gpio */ priv->gpiod_shutdown = devm_gpiod_get(dev, "shutdown", GPIOD_OUT_LOW); - if (IS_ERR(priv->gpiod_shutdown)) - return dev_err_probe(dev, PTR_ERR(priv->gpiod_shutdown), - "Failed to get 'shutdown' gpio\n"); + if (IS_ERR(priv->gpiod_shutdown)) { + err = PTR_ERR(priv->gpiod_shutdown); + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get 'shutdown' gpio: %d\n", + err); + return err; + } return devm_snd_soc_register_component(dev, &ssm2305_component_driver, NULL, 0); diff --git a/sound/soc/codecs/sta350.h b/sound/soc/codecs/sta350.h index 80bf56093d..f16900e00a 100644 --- a/sound/soc/codecs/sta350.h +++ b/sound/soc/codecs/sta350.h @@ -14,7 +14,7 @@ #ifndef _ASOC_STA_350_H #define _ASOC_STA_350_H -/* STA350 register addresses */ +/* STA50 register addresses */ #define STA350_REGISTER_COUNT 0x4D #define STA350_COEF_COUNT 62 diff --git a/sound/soc/codecs/tfa989x.c b/sound/soc/codecs/tfa989x.c index dc86852752..643b45188b 100644 --- a/sound/soc/codecs/tfa989x.c +++ b/sound/soc/codecs/tfa989x.c @@ -7,7 +7,6 @@ * Copyright (C) 2013 Sony Mobile Communications Inc. */ -#include #include #include #include @@ -20,7 +19,6 @@ #define TFA989X_REVISIONNUMBER 0x03 #define TFA989X_REVISIONNUMBER_REV_MSK GENMASK(7, 0) /* device revision */ #define TFA989X_I2SREG 0x04 -#define TFA989X_I2SREG_RCV 2 /* receiver mode */ #define TFA989X_I2SREG_CHSA 6 /* amplifier input select */ #define TFA989X_I2SREG_CHSA_MSK GENMASK(7, 6) #define TFA989X_I2SREG_I2SSR 12 /* sample rate */ @@ -55,9 +53,7 @@ struct tfa989x_rev { }; struct tfa989x { - const struct tfa989x_rev *rev; struct regulator *vddd_supply; - struct gpio_desc *rcv_gpiod; }; static bool tfa989x_writeable_reg(struct device *dev, unsigned int reg) @@ -101,35 +97,7 @@ static const struct snd_soc_dapm_route tfa989x_dapm_routes[] = { {"Amp Input", "Right", "AIFINR"}, }; -static int tfa989x_put_mode(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); - struct tfa989x *tfa989x = snd_soc_component_get_drvdata(component); - - gpiod_set_value_cansleep(tfa989x->rcv_gpiod, ucontrol->value.enumerated.item[0]); - - return snd_soc_put_enum_double(kcontrol, ucontrol); -} - -static const char * const mode_text[] = { "Speaker", "Receiver" }; -static SOC_ENUM_SINGLE_DECL(mode_enum, TFA989X_I2SREG, TFA989X_I2SREG_RCV, mode_text); -static const struct snd_kcontrol_new tfa989x_mode_controls[] = { - SOC_ENUM_EXT("Mode", mode_enum, snd_soc_get_enum_double, tfa989x_put_mode), -}; - -static int tfa989x_probe(struct snd_soc_component *component) -{ - struct tfa989x *tfa989x = snd_soc_component_get_drvdata(component); - - if (tfa989x->rev->rev == TFA9897_REVISION) - return snd_soc_add_component_controls(component, tfa989x_mode_controls, - ARRAY_SIZE(tfa989x_mode_controls)); - - return 0; -} - static const struct snd_soc_component_driver tfa989x_component = { - .probe = tfa989x_probe, .dapm_widgets = tfa989x_dapm_widgets, .num_dapm_widgets = ARRAY_SIZE(tfa989x_dapm_widgets), .dapm_routes = tfa989x_dapm_routes, @@ -305,7 +273,6 @@ static int tfa989x_i2c_probe(struct i2c_client *i2c) if (!tfa989x) return -ENOMEM; - tfa989x->rev = rev; i2c_set_clientdata(i2c, tfa989x); tfa989x->vddd_supply = devm_regulator_get(dev, "vddd"); @@ -313,12 +280,6 @@ static int tfa989x_i2c_probe(struct i2c_client *i2c) return dev_err_probe(dev, PTR_ERR(tfa989x->vddd_supply), "Failed to get vddd regulator\n"); - if (tfa989x->rev->rev == TFA9897_REVISION) { - tfa989x->rcv_gpiod = devm_gpiod_get_optional(dev, "rcv", GPIOD_OUT_LOW); - if (IS_ERR(tfa989x->rcv_gpiod)) - return PTR_ERR(tfa989x->rcv_gpiod); - } - regmap = devm_regmap_init_i2c(i2c, &tfa989x_regmap); if (IS_ERR(regmap)) return PTR_ERR(regmap); diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index e77342aff4..52d2c968b5 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -170,7 +169,6 @@ struct aic31xx_priv { struct regulator_bulk_data supplies[AIC31XX_NUM_SUPPLIES]; struct aic31xx_disable_nb disable_nb[AIC31XX_NUM_SUPPLIES]; struct snd_soc_jack *jack; - u32 sysclk_id; unsigned int sysclk; u8 p_div; int rate_div_line; @@ -182,7 +180,6 @@ struct aic31xx_priv { struct aic31xx_rate_divs { u32 mclk_p; u32 rate; - u8 pll_r; u8 pll_j; u16 pll_d; u16 dosr; @@ -195,71 +192,51 @@ struct aic31xx_rate_divs { /* ADC dividers can be disabled by configuring them to 0 */ static const struct aic31xx_rate_divs aic31xx_divs[] = { - /* mclk/p rate pll: r j d dosr ndac mdac aors nadc madc */ + /* mclk/p rate pll: j d dosr ndac mdac aors nadc madc */ /* 8k rate */ - { 512000, 8000, 4, 48, 0, 128, 48, 2, 128, 48, 2}, - {12000000, 8000, 1, 8, 1920, 128, 48, 2, 128, 48, 2}, - {12000000, 8000, 1, 8, 1920, 128, 32, 3, 128, 32, 3}, - {12500000, 8000, 1, 7, 8643, 128, 48, 2, 128, 48, 2}, + {12000000, 8000, 8, 1920, 128, 48, 2, 128, 48, 2}, + {12000000, 8000, 8, 1920, 128, 32, 3, 128, 32, 3}, + {12500000, 8000, 7, 8643, 128, 48, 2, 128, 48, 2}, /* 11.025k rate */ - { 705600, 11025, 3, 48, 0, 128, 24, 3, 128, 24, 3}, - {12000000, 11025, 1, 7, 5264, 128, 32, 2, 128, 32, 2}, - {12000000, 11025, 1, 8, 4672, 128, 24, 3, 128, 24, 3}, - {12500000, 11025, 1, 7, 2253, 128, 32, 2, 128, 32, 2}, + {12000000, 11025, 7, 5264, 128, 32, 2, 128, 32, 2}, + {12000000, 11025, 8, 4672, 128, 24, 3, 128, 24, 3}, + {12500000, 11025, 7, 2253, 128, 32, 2, 128, 32, 2}, /* 16k rate */ - { 512000, 16000, 4, 48, 0, 128, 16, 3, 128, 16, 3}, - { 1024000, 16000, 2, 48, 0, 128, 16, 3, 128, 16, 3}, - {12000000, 16000, 1, 8, 1920, 128, 24, 2, 128, 24, 2}, - {12000000, 16000, 1, 8, 1920, 128, 16, 3, 128, 16, 3}, - {12500000, 16000, 1, 7, 8643, 128, 24, 2, 128, 24, 2}, + {12000000, 16000, 8, 1920, 128, 24, 2, 128, 24, 2}, + {12000000, 16000, 8, 1920, 128, 16, 3, 128, 16, 3}, + {12500000, 16000, 7, 8643, 128, 24, 2, 128, 24, 2}, /* 22.05k rate */ - { 705600, 22050, 4, 36, 0, 128, 12, 3, 128, 12, 3}, - { 1411200, 22050, 2, 36, 0, 128, 12, 3, 128, 12, 3}, - {12000000, 22050, 1, 7, 5264, 128, 16, 2, 128, 16, 2}, - {12000000, 22050, 1, 8, 4672, 128, 12, 3, 128, 12, 3}, - {12500000, 22050, 1, 7, 2253, 128, 16, 2, 128, 16, 2}, + {12000000, 22050, 7, 5264, 128, 16, 2, 128, 16, 2}, + {12000000, 22050, 8, 4672, 128, 12, 3, 128, 12, 3}, + {12500000, 22050, 7, 2253, 128, 16, 2, 128, 16, 2}, /* 32k rate */ - { 1024000, 32000, 2, 48, 0, 128, 12, 2, 128, 12, 2}, - { 2048000, 32000, 1, 48, 0, 128, 12, 2, 128, 12, 2}, - {12000000, 32000, 1, 8, 1920, 128, 12, 2, 128, 12, 2}, - {12000000, 32000, 1, 8, 1920, 128, 8, 3, 128, 8, 3}, - {12500000, 32000, 1, 7, 8643, 128, 12, 2, 128, 12, 2}, + {12000000, 32000, 8, 1920, 128, 12, 2, 128, 12, 2}, + {12000000, 32000, 8, 1920, 128, 8, 3, 128, 8, 3}, + {12500000, 32000, 7, 8643, 128, 12, 2, 128, 12, 2}, /* 44.1k rate */ - { 1411200, 44100, 2, 32, 0, 128, 8, 2, 128, 8, 2}, - { 2822400, 44100, 1, 32, 0, 128, 8, 2, 128, 8, 2}, - {12000000, 44100, 1, 7, 5264, 128, 8, 2, 128, 8, 2}, - {12000000, 44100, 1, 8, 4672, 128, 6, 3, 128, 6, 3}, - {12500000, 44100, 1, 7, 2253, 128, 8, 2, 128, 8, 2}, + {12000000, 44100, 7, 5264, 128, 8, 2, 128, 8, 2}, + {12000000, 44100, 8, 4672, 128, 6, 3, 128, 6, 3}, + {12500000, 44100, 7, 2253, 128, 8, 2, 128, 8, 2}, /* 48k rate */ - { 1536000, 48000, 2, 32, 0, 128, 8, 2, 128, 8, 2}, - { 3072000, 48000, 1, 32, 0, 128, 8, 2, 128, 8, 2}, - {12000000, 48000, 1, 8, 1920, 128, 8, 2, 128, 8, 2}, - {12000000, 48000, 1, 7, 6800, 96, 5, 4, 96, 5, 4}, - {12500000, 48000, 1, 7, 8643, 128, 8, 2, 128, 8, 2}, + {12000000, 48000, 8, 1920, 128, 8, 2, 128, 8, 2}, + {12000000, 48000, 7, 6800, 96, 5, 4, 96, 5, 4}, + {12500000, 48000, 7, 8643, 128, 8, 2, 128, 8, 2}, /* 88.2k rate */ - { 2822400, 88200, 2, 16, 0, 64, 8, 2, 64, 8, 2}, - { 5644800, 88200, 1, 16, 0, 64, 8, 2, 64, 8, 2}, - {12000000, 88200, 1, 7, 5264, 64, 8, 2, 64, 8, 2}, - {12000000, 88200, 1, 8, 4672, 64, 6, 3, 64, 6, 3}, - {12500000, 88200, 1, 7, 2253, 64, 8, 2, 64, 8, 2}, + {12000000, 88200, 7, 5264, 64, 8, 2, 64, 8, 2}, + {12000000, 88200, 8, 4672, 64, 6, 3, 64, 6, 3}, + {12500000, 88200, 7, 2253, 64, 8, 2, 64, 8, 2}, /* 96k rate */ - { 3072000, 96000, 2, 16, 0, 64, 8, 2, 64, 8, 2}, - { 6144000, 96000, 1, 16, 0, 64, 8, 2, 64, 8, 2}, - {12000000, 96000, 1, 8, 1920, 64, 8, 2, 64, 8, 2}, - {12000000, 96000, 1, 7, 6800, 48, 5, 4, 48, 5, 4}, - {12500000, 96000, 1, 7, 8643, 64, 8, 2, 64, 8, 2}, + {12000000, 96000, 8, 1920, 64, 8, 2, 64, 8, 2}, + {12000000, 96000, 7, 6800, 48, 5, 4, 48, 5, 4}, + {12500000, 96000, 7, 8643, 64, 8, 2, 64, 8, 2}, /* 176.4k rate */ - { 5644800, 176400, 2, 8, 0, 32, 8, 2, 32, 8, 2}, - {11289600, 176400, 1, 8, 0, 32, 8, 2, 32, 8, 2}, - {12000000, 176400, 1, 7, 5264, 32, 8, 2, 32, 8, 2}, - {12000000, 176400, 1, 8, 4672, 32, 6, 3, 32, 6, 3}, - {12500000, 176400, 1, 7, 2253, 32, 8, 2, 32, 8, 2}, + {12000000, 176400, 7, 5264, 32, 8, 2, 32, 8, 2}, + {12000000, 176400, 8, 4672, 32, 6, 3, 32, 6, 3}, + {12500000, 176400, 7, 2253, 32, 8, 2, 32, 8, 2}, /* 192k rate */ - { 6144000, 192000, 2, 8, 0, 32, 8, 2, 32, 8, 2}, - {12288000, 192000, 1, 8, 0, 32, 8, 2, 32, 8, 2}, - {12000000, 192000, 1, 8, 1920, 32, 8, 2, 32, 8, 2}, - {12000000, 192000, 1, 7, 6800, 24, 5, 4, 24, 5, 4}, - {12500000, 192000, 1, 7, 8643, 32, 8, 2, 32, 8, 2}, + {12000000, 192000, 8, 1920, 32, 8, 2, 32, 8, 2}, + {12000000, 192000, 7, 6800, 24, 5, 4, 24, 5, 4}, + {12500000, 192000, 7, 8643, 32, 8, 2, 32, 8, 2}, }; static const char * const ldac_in_text[] = { @@ -911,7 +888,7 @@ static int aic31xx_setup_pll(struct snd_soc_component *component, /* PLL configuration */ snd_soc_component_update_bits(component, AIC31XX_PLLPR, AIC31XX_PLL_MASK, - (aic31xx->p_div << 4) | aic31xx_divs[i].pll_r); + (aic31xx->p_div << 4) | 0x01); snd_soc_component_write(component, AIC31XX_PLLJ, aic31xx_divs[i].pll_j); snd_soc_component_write(component, AIC31XX_PLLDMSB, @@ -964,7 +941,6 @@ static int aic31xx_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_component *component = dai->component; - struct aic31xx_priv *aic31xx = snd_soc_component_get_drvdata(component); u8 data = 0; dev_dbg(component->dev, "## %s: width %d rate %d\n", @@ -996,16 +972,6 @@ static int aic31xx_hw_params(struct snd_pcm_substream *substream, AIC31XX_IFACE1_DATALEN_MASK, data); - /* - * If BCLK is used as PLL input, the sysclk is determined by the hw - * params. So it must be updated here to match the input frequency. - */ - if (aic31xx->sysclk_id == AIC31XX_PLL_CLKIN_BCLK) { - aic31xx->sysclk = params_rate(params) * params_width(params) * - params_channels(params); - aic31xx->p_div = 1; - } - return aic31xx_setup_pll(component, params); } @@ -1190,7 +1156,6 @@ static int aic31xx_set_dai_sysclk(struct snd_soc_dai *codec_dai, snd_soc_component_update_bits(component, AIC31XX_CLKMUX, AIC31XX_PLL_CLKIN_MASK, clk_id << AIC31XX_PLL_CLKIN_SHIFT); - aic31xx->sysclk_id = clk_id; aic31xx->sysclk = freq; return 0; @@ -1680,9 +1645,11 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c, aic31xx->gpio_reset = devm_gpiod_get_optional(aic31xx->dev, "reset", GPIOD_OUT_LOW); - if (IS_ERR(aic31xx->gpio_reset)) - return dev_err_probe(aic31xx->dev, PTR_ERR(aic31xx->gpio_reset), - "not able to acquire gpio\n"); + if (IS_ERR(aic31xx->gpio_reset)) { + if (PTR_ERR(aic31xx->gpio_reset) != -EPROBE_DEFER) + dev_err(aic31xx->dev, "not able to acquire gpio\n"); + return PTR_ERR(aic31xx->gpio_reset); + } for (i = 0; i < ARRAY_SIZE(aic31xx->supplies); i++) aic31xx->supplies[i].supply = aic31xx_supply_names[i]; @@ -1690,8 +1657,12 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c, ret = devm_regulator_bulk_get(aic31xx->dev, ARRAY_SIZE(aic31xx->supplies), aic31xx->supplies); - if (ret) - return dev_err_probe(aic31xx->dev, ret, "Failed to request supplies\n"); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(aic31xx->dev, + "Failed to request supplies: %d\n", ret); + return ret; + } aic31xx_configure_ocmv(aic31xx); diff --git a/sound/soc/codecs/tlv320aic32x4-i2c.c b/sound/soc/codecs/tlv320aic32x4-i2c.c index ed70e3d9ba..04ad383113 100644 --- a/sound/soc/codecs/tlv320aic32x4-i2c.c +++ b/sound/soc/codecs/tlv320aic32x4-i2c.c @@ -44,9 +44,7 @@ static int aic32x4_i2c_probe(struct i2c_client *i2c, static int aic32x4_i2c_remove(struct i2c_client *i2c) { - aic32x4_remove(&i2c->dev); - - return 0; + return aic32x4_remove(&i2c->dev); } static const struct i2c_device_id aic32x4_i2c_id[] = { diff --git a/sound/soc/codecs/tlv320aic32x4-spi.c b/sound/soc/codecs/tlv320aic32x4-spi.c index a8958cd1c6..e81c72958a 100644 --- a/sound/soc/codecs/tlv320aic32x4-spi.c +++ b/sound/soc/codecs/tlv320aic32x4-spi.c @@ -48,9 +48,7 @@ static int aic32x4_spi_probe(struct spi_device *spi) static int aic32x4_spi_remove(struct spi_device *spi) { - aic32x4_remove(&spi->dev); - - return 0; + return aic32x4_remove(&spi->dev); } static const struct spi_device_id aic32x4_spi_id[] = { diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index 8f42fd7bc0..d39c7d52ec 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -1418,11 +1418,13 @@ int aic32x4_probe(struct device *dev, struct regmap *regmap) } EXPORT_SYMBOL(aic32x4_probe); -void aic32x4_remove(struct device *dev) +int aic32x4_remove(struct device *dev) { struct aic32x4_priv *aic32x4 = dev_get_drvdata(dev); aic32x4_disable_regulators(aic32x4); + + return 0; } EXPORT_SYMBOL(aic32x4_remove); diff --git a/sound/soc/codecs/tlv320aic32x4.h b/sound/soc/codecs/tlv320aic32x4.h index 4de5bd9e8c..e9fd2e55d6 100644 --- a/sound/soc/codecs/tlv320aic32x4.h +++ b/sound/soc/codecs/tlv320aic32x4.h @@ -18,7 +18,7 @@ enum aic32x4_type { extern const struct regmap_config aic32x4_regmap_config; int aic32x4_probe(struct device *dev, struct regmap *regmap); -void aic32x4_remove(struct device *dev); +int aic32x4_remove(struct device *dev); int aic32x4_register_clocks(struct device *dev, const char *mclk_name); /* tlv320aic32x4 register space (in decimal to match datasheet) */ diff --git a/sound/soc/codecs/tlv320aic3x-i2c.c b/sound/soc/codecs/tlv320aic3x-i2c.c index 2f272bc3f5..cd0558ed4d 100644 --- a/sound/soc/codecs/tlv320aic3x-i2c.c +++ b/sound/soc/codecs/tlv320aic3x-i2c.c @@ -32,9 +32,7 @@ static int aic3x_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *i static int aic3x_i2c_remove(struct i2c_client *i2c) { - aic3x_remove(&i2c->dev); - - return 0; + return aic3x_remove(&i2c->dev); } static const struct i2c_device_id aic3x_i2c_id[] = { diff --git a/sound/soc/codecs/tlv320aic3x-spi.c b/sound/soc/codecs/tlv320aic3x-spi.c index 494e844022..8c7b6bb922 100644 --- a/sound/soc/codecs/tlv320aic3x-spi.c +++ b/sound/soc/codecs/tlv320aic3x-spi.c @@ -37,9 +37,7 @@ static int aic3x_spi_probe(struct spi_device *spi) static int aic3x_spi_remove(struct spi_device *spi) { - aic3x_remove(&spi->dev); - - return 0; + return aic3x_remove(&spi->dev); } static const struct spi_device_id aic3x_spi_id[] = { diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index d53037b150..7731593a55 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -1870,7 +1870,7 @@ int aic3x_probe(struct device *dev, struct regmap *regmap, kernel_ulong_t driver } EXPORT_SYMBOL(aic3x_probe); -void aic3x_remove(struct device *dev) +int aic3x_remove(struct device *dev) { struct aic3x_priv *aic3x = dev_get_drvdata(dev); @@ -1881,6 +1881,7 @@ void aic3x_remove(struct device *dev) gpio_set_value(aic3x->gpio_reset, 0); gpio_free(aic3x->gpio_reset); } + return 0; } EXPORT_SYMBOL(aic3x_remove); diff --git a/sound/soc/codecs/tlv320aic3x.h b/sound/soc/codecs/tlv320aic3x.h index 14298f9e6d..7e00639130 100644 --- a/sound/soc/codecs/tlv320aic3x.h +++ b/sound/soc/codecs/tlv320aic3x.h @@ -14,7 +14,7 @@ struct regmap_config; extern const struct regmap_config aic3x_regmap; int aic3x_probe(struct device *dev, struct regmap *regmap, kernel_ulong_t driver_data); -void aic3x_remove(struct device *dev); +int aic3x_remove(struct device *dev); #define AIC3X_MODEL_3X 0 #define AIC3X_MODEL_33 1 diff --git a/sound/soc/codecs/wcd-mbhc-v2.c b/sound/soc/codecs/wcd-mbhc-v2.c index 7488a150a1..405128ccb4 100644 --- a/sound/soc/codecs/wcd-mbhc-v2.c +++ b/sound/soc/codecs/wcd-mbhc-v2.c @@ -1022,52 +1022,6 @@ static int wcd_mbhc_get_plug_from_adc(struct wcd_mbhc *mbhc, int adc_result) return plug_type; } -static int wcd_mbhc_get_spl_hs_thres(struct wcd_mbhc *mbhc) -{ - int hs_threshold, micbias_mv; - - micbias_mv = wcd_mbhc_get_micbias(mbhc); - if (mbhc->cfg->hs_thr && mbhc->cfg->micb_mv != WCD_MBHC_ADC_MICBIAS_MV) { - if (mbhc->cfg->micb_mv == micbias_mv) - hs_threshold = mbhc->cfg->hs_thr; - else - hs_threshold = (mbhc->cfg->hs_thr * micbias_mv) / mbhc->cfg->micb_mv; - } else { - hs_threshold = ((WCD_MBHC_ADC_HS_THRESHOLD_MV * micbias_mv) / - WCD_MBHC_ADC_MICBIAS_MV); - } - return hs_threshold; -} - -static bool wcd_mbhc_check_for_spl_headset(struct wcd_mbhc *mbhc) -{ - bool is_spl_hs = false; - int output_mv, hs_threshold, hph_threshold; - - if (!mbhc->mbhc_cb->mbhc_micb_ctrl_thr_mic) - return false; - - /* Bump up MIC_BIAS2 to 2.7V */ - mbhc->mbhc_cb->mbhc_micb_ctrl_thr_mic(mbhc->component, MIC_BIAS_2, true); - usleep_range(10000, 10100); - - output_mv = wcd_measure_adc_once(mbhc, MUX_CTL_IN2P); - hs_threshold = wcd_mbhc_get_spl_hs_thres(mbhc); - hph_threshold = wcd_mbhc_adc_get_hph_thres(mbhc); - - if (!(output_mv > hs_threshold || output_mv < hph_threshold)) - is_spl_hs = true; - - /* Back MIC_BIAS2 to 1.8v if the type is not special headset */ - if (!is_spl_hs) { - mbhc->mbhc_cb->mbhc_micb_ctrl_thr_mic(mbhc->component, MIC_BIAS_2, false); - /* Add 10ms delay for micbias to settle */ - usleep_range(10000, 10100); - } - - return is_spl_hs; -} - static void wcd_correct_swch_plug(struct work_struct *work) { struct wcd_mbhc *mbhc; @@ -1075,14 +1029,12 @@ static void wcd_correct_swch_plug(struct work_struct *work) enum wcd_mbhc_plug_type plug_type = MBHC_PLUG_TYPE_INVALID; unsigned long timeout; int pt_gnd_mic_swap_cnt = 0; - int output_mv, cross_conn, hs_threshold, try = 0, micbias_mv; - bool is_spl_hs = false; + int output_mv, cross_conn, hs_threshold, try = 0; bool is_pa_on; mbhc = container_of(work, struct wcd_mbhc, correct_plug_swch); component = mbhc->component; - micbias_mv = wcd_mbhc_get_micbias(mbhc); hs_threshold = wcd_mbhc_adc_get_hs_thres(mbhc); /* Mask ADC COMPLETE interrupt */ @@ -1145,16 +1097,6 @@ static void wcd_correct_swch_plug(struct work_struct *work) plug_type = wcd_mbhc_get_plug_from_adc(mbhc, output_mv); is_pa_on = wcd_mbhc_read_field(mbhc, WCD_MBHC_HPH_PA_EN); - if (output_mv > hs_threshold && !is_spl_hs) { - is_spl_hs = wcd_mbhc_check_for_spl_headset(mbhc); - output_mv = wcd_measure_adc_once(mbhc, MUX_CTL_IN2P); - - if (is_spl_hs) { - hs_threshold *= wcd_mbhc_get_micbias(mbhc); - hs_threshold /= micbias_mv; - } - } - if ((output_mv <= hs_threshold) && !is_pa_on) { /* Check for cross connection*/ cross_conn = wcd_check_cross_conn(mbhc); @@ -1180,19 +1122,14 @@ static void wcd_correct_swch_plug(struct work_struct *work) } } - /* cable is extension cable */ - if (output_mv > hs_threshold || mbhc->force_linein) + if (output_mv > hs_threshold) /* cable is extension cable */ plug_type = MBHC_PLUG_TYPE_HIGH_HPH; } wcd_mbhc_bcs_enable(mbhc, plug_type, true); - if (plug_type == MBHC_PLUG_TYPE_HIGH_HPH) { - if (is_spl_hs) - plug_type = MBHC_PLUG_TYPE_HEADSET; - else - wcd_mbhc_write_field(mbhc, WCD_MBHC_ELECT_ISRC_EN, 1); - } + if (plug_type == MBHC_PLUG_TYPE_HIGH_HPH) + wcd_mbhc_write_field(mbhc, WCD_MBHC_ELECT_ISRC_EN, 1); wcd_mbhc_write_field(mbhc, WCD_MBHC_ADC_MODE, 0); wcd_mbhc_write_field(mbhc, WCD_MBHC_ADC_EN, 0); @@ -1239,6 +1176,7 @@ static irqreturn_t wcd_mbhc_adc_hs_rem_irq(int irq, void *data) struct wcd_mbhc *mbhc = data; unsigned long timeout; int adc_threshold, output_mv, retry = 0; + bool hphpa_on = false; mutex_lock(&mbhc->lock); timeout = jiffies + msecs_to_jiffies(WCD_FAKE_REMOVAL_MIN_PERIOD_MS); @@ -1272,6 +1210,10 @@ static irqreturn_t wcd_mbhc_adc_hs_rem_irq(int irq, void *data) wcd_mbhc_elec_hs_report_unplug(mbhc); wcd_mbhc_write_field(mbhc, WCD_MBHC_BTN_ISRC_CTL, 0); + if (hphpa_on) { + hphpa_on = false; + wcd_mbhc_write_field(mbhc, WCD_MBHC_HPH_PA_EN, 3); + } exit: mutex_unlock(&mbhc->lock); return IRQ_HANDLED; diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c index 1e60db4056..bc5d68c53e 100644 --- a/sound/soc/codecs/wcd9335.c +++ b/sound/soc/codecs/wcd9335.c @@ -341,7 +341,7 @@ struct wcd9335_codec { int reset_gpio; struct regulator_bulk_data supplies[WCD9335_MAX_SUPPLY]; - unsigned int rx_port_value[WCD9335_RX_MAX]; + unsigned int rx_port_value; unsigned int tx_port_value; int hph_l_gain; int hph_r_gain; @@ -1269,11 +1269,10 @@ static const struct snd_kcontrol_new sb_tx8_mux = static int slim_rx_mux_get(struct snd_kcontrol *kc, struct snd_ctl_elem_value *ucontrol) { - struct snd_soc_dapm_widget *w = snd_soc_dapm_kcontrol_widget(kc); - struct wcd9335_codec *wcd = dev_get_drvdata(w->dapm->dev); - u32 port_id = w->shift; + struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kc); + struct wcd9335_codec *wcd = dev_get_drvdata(dapm->dev); - ucontrol->value.enumerated.item[0] = wcd->rx_port_value[port_id]; + ucontrol->value.enumerated.item[0] = wcd->rx_port_value; return 0; } @@ -1287,9 +1286,9 @@ static int slim_rx_mux_put(struct snd_kcontrol *kc, struct snd_soc_dapm_update *update = NULL; u32 port_id = w->shift; - wcd->rx_port_value[port_id] = ucontrol->value.enumerated.item[0]; + wcd->rx_port_value = ucontrol->value.enumerated.item[0]; - switch (wcd->rx_port_value[port_id]) { + switch (wcd->rx_port_value) { case 0: list_del_init(&wcd->rx_chs[port_id].list); break; @@ -1310,11 +1309,11 @@ static int slim_rx_mux_put(struct snd_kcontrol *kc, &wcd->dai[AIF4_PB].slim_ch_list); break; default: - dev_err(wcd->dev, "Unknown AIF %d\n", wcd->rx_port_value[port_id]); + dev_err(wcd->dev, "Unknown AIF %d\n", wcd->rx_port_value); goto err; } - snd_soc_dapm_mux_update_power(w->dapm, kc, wcd->rx_port_value[port_id], + snd_soc_dapm_mux_update_power(w->dapm, kc, wcd->rx_port_value, e, update); return 0; diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index 6c468527fe..e63c6b723d 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -3423,7 +3423,7 @@ static int wcd934x_int_dem_inp_mux_put(struct snd_kcontrol *kc, { struct soc_enum *e = (struct soc_enum *)kc->private_value; struct snd_soc_component *component; - int reg, val; + int reg, val, ret; component = snd_soc_dapm_kcontrol_component(kc); val = ucontrol->value.enumerated.item[0]; @@ -3446,7 +3446,9 @@ static int wcd934x_int_dem_inp_mux_put(struct snd_kcontrol *kc, WCD934X_RX_DLY_ZN_EN_MASK, WCD934X_RX_DLY_ZN_DISABLE); - return snd_soc_dapm_put_enum_double(kc, ucontrol); + ret = snd_soc_dapm_put_enum_double(kc, ucontrol); + + return ret; } static int wcd934x_dec_enum_put(struct snd_kcontrol *kcontrol, diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 36cbc66914..bbc261ab20 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3089,7 +3089,7 @@ static int wcd938x_mbhc_micb_ctrl_threshold_mic(struct snd_soc_component *compon int micb_num, bool req_en) { struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component); - int micb_mv; + int rc, micb_mv; if (micb_num != MIC_BIAS_2) return -EINVAL; @@ -3103,7 +3103,9 @@ static int wcd938x_mbhc_micb_ctrl_threshold_mic(struct snd_soc_component *compon micb_mv = req_en ? WCD_MBHC_THR_HS_MICB_MV : wcd938x->micb2_mv; - return wcd938x_mbhc_micb_adjust_voltage(component, micb_mv, MIC_BIAS_2); + rc = wcd938x_mbhc_micb_adjust_voltage(component, micb_mv, MIC_BIAS_2); + + return rc; } static inline void wcd938x_mbhc_get_result_params(struct wcd938x_priv *wcd938x, @@ -4288,7 +4290,7 @@ static int wcd938x_codec_set_sdw_stream(struct snd_soc_dai *dai, static const struct snd_soc_dai_ops wcd938x_sdw_dai_ops = { .hw_params = wcd938x_codec_hw_params, .hw_free = wcd938x_codec_free, - .set_stream = wcd938x_codec_set_sdw_stream, + .set_sdw_stream = wcd938x_codec_set_sdw_stream, }; static struct snd_soc_dai_driver wcd938x_dais[] = { diff --git a/sound/soc/codecs/wm2200.c b/sound/soc/codecs/wm2200.c index 8863b533f9..c35673e7f4 100644 --- a/sound/soc/codecs/wm2200.c +++ b/sound/soc/codecs/wm2200.c @@ -145,13 +145,13 @@ static const struct regmap_range_cfg wm2200_ranges[] = { .window_start = WM2200_DSP2_ZM_0, .window_len = 1024, }, }; -static const struct cs_dsp_region wm2200_dsp1_regions[] = { +static const struct wm_adsp_region wm2200_dsp1_regions[] = { { .type = WMFW_ADSP1_PM, .base = WM2200_DSP1_PM_BASE }, { .type = WMFW_ADSP1_DM, .base = WM2200_DSP1_DM_BASE }, { .type = WMFW_ADSP1_ZM, .base = WM2200_DSP1_ZM_BASE }, }; -static const struct cs_dsp_region wm2200_dsp2_regions[] = { +static const struct wm_adsp_region wm2200_dsp2_regions[] = { { .type = WMFW_ADSP1_PM, .base = WM2200_DSP2_PM_BASE }, { .type = WMFW_ADSP1_DM, .base = WM2200_DSP2_DM_BASE }, { .type = WMFW_ADSP1_ZM, .base = WM2200_DSP2_ZM_BASE }, @@ -2202,23 +2202,23 @@ static int wm2200_i2c_probe(struct i2c_client *i2c, } for (i = 0; i < 2; i++) { - wm2200->dsp[i].cs_dsp.type = WMFW_ADSP1; + wm2200->dsp[i].type = WMFW_ADSP1; wm2200->dsp[i].part = "wm2200"; - wm2200->dsp[i].cs_dsp.num = i + 1; - wm2200->dsp[i].cs_dsp.dev = &i2c->dev; - wm2200->dsp[i].cs_dsp.regmap = wm2200->regmap; - wm2200->dsp[i].cs_dsp.sysclk_reg = WM2200_CLOCKING_3; - wm2200->dsp[i].cs_dsp.sysclk_mask = WM2200_SYSCLK_FREQ_MASK; - wm2200->dsp[i].cs_dsp.sysclk_shift = WM2200_SYSCLK_FREQ_SHIFT; + wm2200->dsp[i].num = i + 1; + wm2200->dsp[i].dev = &i2c->dev; + wm2200->dsp[i].regmap = wm2200->regmap; + wm2200->dsp[i].sysclk_reg = WM2200_CLOCKING_3; + wm2200->dsp[i].sysclk_mask = WM2200_SYSCLK_FREQ_MASK; + wm2200->dsp[i].sysclk_shift = WM2200_SYSCLK_FREQ_SHIFT; } - wm2200->dsp[0].cs_dsp.base = WM2200_DSP1_CONTROL_1; - wm2200->dsp[0].cs_dsp.mem = wm2200_dsp1_regions; - wm2200->dsp[0].cs_dsp.num_mems = ARRAY_SIZE(wm2200_dsp1_regions); + wm2200->dsp[0].base = WM2200_DSP1_CONTROL_1; + wm2200->dsp[0].mem = wm2200_dsp1_regions; + wm2200->dsp[0].num_mems = ARRAY_SIZE(wm2200_dsp1_regions); - wm2200->dsp[1].cs_dsp.base = WM2200_DSP2_CONTROL_1; - wm2200->dsp[1].cs_dsp.mem = wm2200_dsp2_regions; - wm2200->dsp[1].cs_dsp.num_mems = ARRAY_SIZE(wm2200_dsp2_regions); + wm2200->dsp[1].base = WM2200_DSP2_CONTROL_1; + wm2200->dsp[1].mem = wm2200_dsp2_regions; + wm2200->dsp[1].num_mems = ARRAY_SIZE(wm2200_dsp2_regions); for (i = 0; i < ARRAY_SIZE(wm2200->dsp); i++) wm_adsp1_init(&wm2200->dsp[i]); diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index da2f8998df..621598608b 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -44,7 +44,7 @@ static DECLARE_TLV_DB_SCALE(digital_tlv, -6400, 50, 0); static DECLARE_TLV_DB_SCALE(noise_tlv, -13200, 600, 0); static DECLARE_TLV_DB_SCALE(ng_tlv, -10200, 600, 0); -static const struct cs_dsp_region wm5102_dsp1_regions[] = { +static const struct wm_adsp_region wm5102_dsp1_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x100000 }, { .type = WMFW_ADSP2_ZM, .base = 0x180000 }, { .type = WMFW_ADSP2_XM, .base = 0x190000 }, @@ -2046,13 +2046,13 @@ static int wm5102_probe(struct platform_device *pdev) arizona_init_dvfs(&wm5102->core); wm5102->core.adsp[0].part = "wm5102"; - wm5102->core.adsp[0].cs_dsp.num = 1; - wm5102->core.adsp[0].cs_dsp.type = WMFW_ADSP2; - wm5102->core.adsp[0].cs_dsp.base = ARIZONA_DSP1_CONTROL_1; - wm5102->core.adsp[0].cs_dsp.dev = arizona->dev; - wm5102->core.adsp[0].cs_dsp.regmap = arizona->regmap; - wm5102->core.adsp[0].cs_dsp.mem = wm5102_dsp1_regions; - wm5102->core.adsp[0].cs_dsp.num_mems = ARRAY_SIZE(wm5102_dsp1_regions); + wm5102->core.adsp[0].num = 1; + wm5102->core.adsp[0].type = WMFW_ADSP2; + wm5102->core.adsp[0].base = ARIZONA_DSP1_CONTROL_1; + wm5102->core.adsp[0].dev = arizona->dev; + wm5102->core.adsp[0].regmap = arizona->regmap; + wm5102->core.adsp[0].mem = wm5102_dsp1_regions; + wm5102->core.adsp[0].num_mems = ARRAY_SIZE(wm5102_dsp1_regions); ret = wm_adsp2_init(&wm5102->core.adsp[0]); if (ret != 0) diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index 4973ba1ed7..5c2d45d05c 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -45,35 +45,35 @@ struct wm5110_priv { unsigned int in_pga_cache[6]; }; -static const struct cs_dsp_region wm5110_dsp1_regions[] = { +static const struct wm_adsp_region wm5110_dsp1_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x100000 }, { .type = WMFW_ADSP2_ZM, .base = 0x180000 }, { .type = WMFW_ADSP2_XM, .base = 0x190000 }, { .type = WMFW_ADSP2_YM, .base = 0x1a8000 }, }; -static const struct cs_dsp_region wm5110_dsp2_regions[] = { +static const struct wm_adsp_region wm5110_dsp2_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x200000 }, { .type = WMFW_ADSP2_ZM, .base = 0x280000 }, { .type = WMFW_ADSP2_XM, .base = 0x290000 }, { .type = WMFW_ADSP2_YM, .base = 0x2a8000 }, }; -static const struct cs_dsp_region wm5110_dsp3_regions[] = { +static const struct wm_adsp_region wm5110_dsp3_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x300000 }, { .type = WMFW_ADSP2_ZM, .base = 0x380000 }, { .type = WMFW_ADSP2_XM, .base = 0x390000 }, { .type = WMFW_ADSP2_YM, .base = 0x3a8000 }, }; -static const struct cs_dsp_region wm5110_dsp4_regions[] = { +static const struct wm_adsp_region wm5110_dsp4_regions[] = { { .type = WMFW_ADSP2_PM, .base = 0x400000 }, { .type = WMFW_ADSP2_ZM, .base = 0x480000 }, { .type = WMFW_ADSP2_XM, .base = 0x490000 }, { .type = WMFW_ADSP2_YM, .base = 0x4a8000 }, }; -static const struct cs_dsp_region *wm5110_dsp_regions[] = { +static const struct wm_adsp_region *wm5110_dsp_regions[] = { wm5110_dsp1_regions, wm5110_dsp2_regions, wm5110_dsp3_regions, @@ -2409,15 +2409,15 @@ static int wm5110_probe(struct platform_device *pdev) for (i = 0; i < WM5110_NUM_ADSP; i++) { wm5110->core.adsp[i].part = "wm5110"; - wm5110->core.adsp[i].cs_dsp.num = i + 1; - wm5110->core.adsp[i].cs_dsp.type = WMFW_ADSP2; - wm5110->core.adsp[i].cs_dsp.dev = arizona->dev; - wm5110->core.adsp[i].cs_dsp.regmap = arizona->regmap; + wm5110->core.adsp[i].num = i + 1; + wm5110->core.adsp[i].type = WMFW_ADSP2; + wm5110->core.adsp[i].dev = arizona->dev; + wm5110->core.adsp[i].regmap = arizona->regmap; - wm5110->core.adsp[i].cs_dsp.base = ARIZONA_DSP1_CONTROL_1 + wm5110->core.adsp[i].base = ARIZONA_DSP1_CONTROL_1 + (0x100 * i); - wm5110->core.adsp[i].cs_dsp.mem = wm5110_dsp_regions[i]; - wm5110->core.adsp[i].cs_dsp.num_mems + wm5110->core.adsp[i].mem = wm5110_dsp_regions[i]; + wm5110->core.adsp[i].num_mems = ARRAY_SIZE(wm5110_dsp1_regions); ret = wm_adsp2_init(&wm5110->core.adsp[i]); diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c index 86b1f6eaa5..dcee7b2bd3 100644 --- a/sound/soc/codecs/wm8731.c +++ b/sound/soc/codecs/wm8731.c @@ -713,12 +713,18 @@ static int wm8731_spi_probe(struct spi_device *spi) return 0; } +static int wm8731_spi_remove(struct spi_device *spi) +{ + return 0; +} + static struct spi_driver wm8731_spi_driver = { .driver = { .name = "wm8731", .of_match_table = wm8731_of_match, }, .probe = wm8731_spi_probe, + .remove = wm8731_spi_remove, }; #endif /* CONFIG_SPI_MASTER */ diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c index bf3a4415a8..a9a6d766a1 100644 --- a/sound/soc/codecs/wm8900.c +++ b/sound/soc/codecs/wm8900.c @@ -1252,11 +1252,17 @@ static int wm8900_spi_probe(struct spi_device *spi) return ret; } +static int wm8900_spi_remove(struct spi_device *spi) +{ + return 0; +} + static struct spi_driver wm8900_spi_driver = { .driver = { .name = "wm8900", }, .probe = wm8900_spi_probe, + .remove = wm8900_spi_remove, }; #endif /* CONFIG_SPI_MASTER */ diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index a5584ba962..ba16bdf9e4 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -3538,8 +3538,9 @@ static int wm8962_set_pdata_from_of(struct i2c_client *i2c, pdata->gpio_init[i] = 0x0; } - pdata->mclk = devm_clk_get_optional(&i2c->dev, NULL); - return PTR_ERR_OR_ZERO(pdata->mclk); + pdata->mclk = devm_clk_get(&i2c->dev, NULL); + + return 0; } static int wm8962_i2c_probe(struct i2c_client *i2c, @@ -3571,6 +3572,14 @@ static int wm8962_i2c_probe(struct i2c_client *i2c, return ret; } + /* Mark the mclk pointer to NULL if no mclk assigned */ + if (IS_ERR(wm8962->pdata.mclk)) { + /* But do not ignore the request for probe defer */ + if (PTR_ERR(wm8962->pdata.mclk) == -EPROBE_DEFER) + return -EPROBE_DEFER; + wm8962->pdata.mclk = NULL; + } + for (i = 0; i < ARRAY_SIZE(wm8962->supplies); i++) wm8962->supplies[i].supply = wm8962_supply_names[i]; diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 0582585236..f7c800927c 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -32,15 +33,15 @@ #include "wm_adsp.h" #define adsp_crit(_dsp, fmt, ...) \ - dev_crit(_dsp->cs_dsp.dev, "%s: " fmt, _dsp->cs_dsp.name, ##__VA_ARGS__) + dev_crit(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__) #define adsp_err(_dsp, fmt, ...) \ - dev_err(_dsp->cs_dsp.dev, "%s: " fmt, _dsp->cs_dsp.name, ##__VA_ARGS__) + dev_err(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__) #define adsp_warn(_dsp, fmt, ...) \ - dev_warn(_dsp->cs_dsp.dev, "%s: " fmt, _dsp->cs_dsp.name, ##__VA_ARGS__) + dev_warn(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__) #define adsp_info(_dsp, fmt, ...) \ - dev_info(_dsp->cs_dsp.dev, "%s: " fmt, _dsp->cs_dsp.name, ##__VA_ARGS__) + dev_info(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__) #define adsp_dbg(_dsp, fmt, ...) \ - dev_dbg(_dsp->cs_dsp.dev, "%s: " fmt, _dsp->cs_dsp.name, ##__VA_ARGS__) + dev_dbg(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__) #define compr_err(_obj, fmt, ...) \ adsp_err(_obj->dsp, "%s: " fmt, _obj->name ? _obj->name : "legacy", \ @@ -49,10 +50,301 @@ adsp_dbg(_obj->dsp, "%s: " fmt, _obj->name ? _obj->name : "legacy", \ ##__VA_ARGS__) +#define ADSP1_CONTROL_1 0x00 +#define ADSP1_CONTROL_2 0x02 +#define ADSP1_CONTROL_3 0x03 +#define ADSP1_CONTROL_4 0x04 +#define ADSP1_CONTROL_5 0x06 +#define ADSP1_CONTROL_6 0x07 +#define ADSP1_CONTROL_7 0x08 +#define ADSP1_CONTROL_8 0x09 +#define ADSP1_CONTROL_9 0x0A +#define ADSP1_CONTROL_10 0x0B +#define ADSP1_CONTROL_11 0x0C +#define ADSP1_CONTROL_12 0x0D +#define ADSP1_CONTROL_13 0x0F +#define ADSP1_CONTROL_14 0x10 +#define ADSP1_CONTROL_15 0x11 +#define ADSP1_CONTROL_16 0x12 +#define ADSP1_CONTROL_17 0x13 +#define ADSP1_CONTROL_18 0x14 +#define ADSP1_CONTROL_19 0x16 +#define ADSP1_CONTROL_20 0x17 +#define ADSP1_CONTROL_21 0x18 +#define ADSP1_CONTROL_22 0x1A +#define ADSP1_CONTROL_23 0x1B +#define ADSP1_CONTROL_24 0x1C +#define ADSP1_CONTROL_25 0x1E +#define ADSP1_CONTROL_26 0x20 +#define ADSP1_CONTROL_27 0x21 +#define ADSP1_CONTROL_28 0x22 +#define ADSP1_CONTROL_29 0x23 +#define ADSP1_CONTROL_30 0x24 +#define ADSP1_CONTROL_31 0x26 + +/* + * ADSP1 Control 19 + */ +#define ADSP1_WDMA_BUFFER_LENGTH_MASK 0x00FF /* DSP1_WDMA_BUFFER_LENGTH - [7:0] */ +#define ADSP1_WDMA_BUFFER_LENGTH_SHIFT 0 /* DSP1_WDMA_BUFFER_LENGTH - [7:0] */ +#define ADSP1_WDMA_BUFFER_LENGTH_WIDTH 8 /* DSP1_WDMA_BUFFER_LENGTH - [7:0] */ + + +/* + * ADSP1 Control 30 + */ +#define ADSP1_DBG_CLK_ENA 0x0008 /* DSP1_DBG_CLK_ENA */ +#define ADSP1_DBG_CLK_ENA_MASK 0x0008 /* DSP1_DBG_CLK_ENA */ +#define ADSP1_DBG_CLK_ENA_SHIFT 3 /* DSP1_DBG_CLK_ENA */ +#define ADSP1_DBG_CLK_ENA_WIDTH 1 /* DSP1_DBG_CLK_ENA */ +#define ADSP1_SYS_ENA 0x0004 /* DSP1_SYS_ENA */ +#define ADSP1_SYS_ENA_MASK 0x0004 /* DSP1_SYS_ENA */ +#define ADSP1_SYS_ENA_SHIFT 2 /* DSP1_SYS_ENA */ +#define ADSP1_SYS_ENA_WIDTH 1 /* DSP1_SYS_ENA */ +#define ADSP1_CORE_ENA 0x0002 /* DSP1_CORE_ENA */ +#define ADSP1_CORE_ENA_MASK 0x0002 /* DSP1_CORE_ENA */ +#define ADSP1_CORE_ENA_SHIFT 1 /* DSP1_CORE_ENA */ +#define ADSP1_CORE_ENA_WIDTH 1 /* DSP1_CORE_ENA */ +#define ADSP1_START 0x0001 /* DSP1_START */ +#define ADSP1_START_MASK 0x0001 /* DSP1_START */ +#define ADSP1_START_SHIFT 0 /* DSP1_START */ +#define ADSP1_START_WIDTH 1 /* DSP1_START */ + +/* + * ADSP1 Control 31 + */ +#define ADSP1_CLK_SEL_MASK 0x0007 /* CLK_SEL_ENA */ +#define ADSP1_CLK_SEL_SHIFT 0 /* CLK_SEL_ENA */ +#define ADSP1_CLK_SEL_WIDTH 3 /* CLK_SEL_ENA */ + +#define ADSP2_CONTROL 0x0 +#define ADSP2_CLOCKING 0x1 +#define ADSP2V2_CLOCKING 0x2 +#define ADSP2_STATUS1 0x4 +#define ADSP2_WDMA_CONFIG_1 0x30 +#define ADSP2_WDMA_CONFIG_2 0x31 +#define ADSP2V2_WDMA_CONFIG_2 0x32 +#define ADSP2_RDMA_CONFIG_1 0x34 + +#define ADSP2_SCRATCH0 0x40 +#define ADSP2_SCRATCH1 0x41 +#define ADSP2_SCRATCH2 0x42 +#define ADSP2_SCRATCH3 0x43 + +#define ADSP2V2_SCRATCH0_1 0x40 +#define ADSP2V2_SCRATCH2_3 0x42 + +/* + * ADSP2 Control + */ + +#define ADSP2_MEM_ENA 0x0010 /* DSP1_MEM_ENA */ +#define ADSP2_MEM_ENA_MASK 0x0010 /* DSP1_MEM_ENA */ +#define ADSP2_MEM_ENA_SHIFT 4 /* DSP1_MEM_ENA */ +#define ADSP2_MEM_ENA_WIDTH 1 /* DSP1_MEM_ENA */ +#define ADSP2_SYS_ENA 0x0004 /* DSP1_SYS_ENA */ +#define ADSP2_SYS_ENA_MASK 0x0004 /* DSP1_SYS_ENA */ +#define ADSP2_SYS_ENA_SHIFT 2 /* DSP1_SYS_ENA */ +#define ADSP2_SYS_ENA_WIDTH 1 /* DSP1_SYS_ENA */ +#define ADSP2_CORE_ENA 0x0002 /* DSP1_CORE_ENA */ +#define ADSP2_CORE_ENA_MASK 0x0002 /* DSP1_CORE_ENA */ +#define ADSP2_CORE_ENA_SHIFT 1 /* DSP1_CORE_ENA */ +#define ADSP2_CORE_ENA_WIDTH 1 /* DSP1_CORE_ENA */ +#define ADSP2_START 0x0001 /* DSP1_START */ +#define ADSP2_START_MASK 0x0001 /* DSP1_START */ +#define ADSP2_START_SHIFT 0 /* DSP1_START */ +#define ADSP2_START_WIDTH 1 /* DSP1_START */ + +/* + * ADSP2 clocking + */ +#define ADSP2_CLK_SEL_MASK 0x0007 /* CLK_SEL_ENA */ +#define ADSP2_CLK_SEL_SHIFT 0 /* CLK_SEL_ENA */ +#define ADSP2_CLK_SEL_WIDTH 3 /* CLK_SEL_ENA */ + +/* + * ADSP2V2 clocking + */ +#define ADSP2V2_CLK_SEL_MASK 0x70000 /* CLK_SEL_ENA */ +#define ADSP2V2_CLK_SEL_SHIFT 16 /* CLK_SEL_ENA */ +#define ADSP2V2_CLK_SEL_WIDTH 3 /* CLK_SEL_ENA */ + +#define ADSP2V2_RATE_MASK 0x7800 /* DSP_RATE */ +#define ADSP2V2_RATE_SHIFT 11 /* DSP_RATE */ +#define ADSP2V2_RATE_WIDTH 4 /* DSP_RATE */ + +/* + * ADSP2 Status 1 + */ +#define ADSP2_RAM_RDY 0x0001 +#define ADSP2_RAM_RDY_MASK 0x0001 +#define ADSP2_RAM_RDY_SHIFT 0 +#define ADSP2_RAM_RDY_WIDTH 1 + +/* + * ADSP2 Lock support + */ +#define ADSP2_LOCK_CODE_0 0x5555 +#define ADSP2_LOCK_CODE_1 0xAAAA + +#define ADSP2_WATCHDOG 0x0A +#define ADSP2_BUS_ERR_ADDR 0x52 +#define ADSP2_REGION_LOCK_STATUS 0x64 +#define ADSP2_LOCK_REGION_1_LOCK_REGION_0 0x66 +#define ADSP2_LOCK_REGION_3_LOCK_REGION_2 0x68 +#define ADSP2_LOCK_REGION_5_LOCK_REGION_4 0x6A +#define ADSP2_LOCK_REGION_7_LOCK_REGION_6 0x6C +#define ADSP2_LOCK_REGION_9_LOCK_REGION_8 0x6E +#define ADSP2_LOCK_REGION_CTRL 0x7A +#define ADSP2_PMEM_ERR_ADDR_XMEM_ERR_ADDR 0x7C + +#define ADSP2_REGION_LOCK_ERR_MASK 0x8000 +#define ADSP2_ADDR_ERR_MASK 0x4000 +#define ADSP2_WDT_TIMEOUT_STS_MASK 0x2000 +#define ADSP2_CTRL_ERR_PAUSE_ENA 0x0002 +#define ADSP2_CTRL_ERR_EINT 0x0001 + +#define ADSP2_BUS_ERR_ADDR_MASK 0x00FFFFFF +#define ADSP2_XMEM_ERR_ADDR_MASK 0x0000FFFF +#define ADSP2_PMEM_ERR_ADDR_MASK 0x7FFF0000 +#define ADSP2_PMEM_ERR_ADDR_SHIFT 16 +#define ADSP2_WDT_ENA_MASK 0xFFFFFFFD + +#define ADSP2_LOCK_REGION_SHIFT 16 + #define ADSP_MAX_STD_CTRL_SIZE 512 -static const struct cs_dsp_client_ops wm_adsp1_client_ops; -static const struct cs_dsp_client_ops wm_adsp2_client_ops; +#define WM_ADSP_ACKED_CTL_TIMEOUT_MS 100 +#define WM_ADSP_ACKED_CTL_N_QUICKPOLLS 10 +#define WM_ADSP_ACKED_CTL_MIN_VALUE 0 +#define WM_ADSP_ACKED_CTL_MAX_VALUE 0xFFFFFF + +/* + * Event control messages + */ +#define WM_ADSP_FW_EVENT_SHUTDOWN 0x000001 + +/* + * HALO system info + */ +#define HALO_AHBM_WINDOW_DEBUG_0 0x02040 +#define HALO_AHBM_WINDOW_DEBUG_1 0x02044 + +/* + * HALO core + */ +#define HALO_SCRATCH1 0x005c0 +#define HALO_SCRATCH2 0x005c8 +#define HALO_SCRATCH3 0x005d0 +#define HALO_SCRATCH4 0x005d8 +#define HALO_CCM_CORE_CONTROL 0x41000 +#define HALO_CORE_SOFT_RESET 0x00010 +#define HALO_WDT_CONTROL 0x47000 + +/* + * HALO MPU banks + */ +#define HALO_MPU_XMEM_ACCESS_0 0x43000 +#define HALO_MPU_YMEM_ACCESS_0 0x43004 +#define HALO_MPU_WINDOW_ACCESS_0 0x43008 +#define HALO_MPU_XREG_ACCESS_0 0x4300C +#define HALO_MPU_YREG_ACCESS_0 0x43014 +#define HALO_MPU_XMEM_ACCESS_1 0x43018 +#define HALO_MPU_YMEM_ACCESS_1 0x4301C +#define HALO_MPU_WINDOW_ACCESS_1 0x43020 +#define HALO_MPU_XREG_ACCESS_1 0x43024 +#define HALO_MPU_YREG_ACCESS_1 0x4302C +#define HALO_MPU_XMEM_ACCESS_2 0x43030 +#define HALO_MPU_YMEM_ACCESS_2 0x43034 +#define HALO_MPU_WINDOW_ACCESS_2 0x43038 +#define HALO_MPU_XREG_ACCESS_2 0x4303C +#define HALO_MPU_YREG_ACCESS_2 0x43044 +#define HALO_MPU_XMEM_ACCESS_3 0x43048 +#define HALO_MPU_YMEM_ACCESS_3 0x4304C +#define HALO_MPU_WINDOW_ACCESS_3 0x43050 +#define HALO_MPU_XREG_ACCESS_3 0x43054 +#define HALO_MPU_YREG_ACCESS_3 0x4305C +#define HALO_MPU_XM_VIO_ADDR 0x43100 +#define HALO_MPU_XM_VIO_STATUS 0x43104 +#define HALO_MPU_YM_VIO_ADDR 0x43108 +#define HALO_MPU_YM_VIO_STATUS 0x4310C +#define HALO_MPU_PM_VIO_ADDR 0x43110 +#define HALO_MPU_PM_VIO_STATUS 0x43114 +#define HALO_MPU_LOCK_CONFIG 0x43140 + +/* + * HALO_AHBM_WINDOW_DEBUG_1 + */ +#define HALO_AHBM_CORE_ERR_ADDR_MASK 0x0fffff00 +#define HALO_AHBM_CORE_ERR_ADDR_SHIFT 8 +#define HALO_AHBM_FLAGS_ERR_MASK 0x000000ff + +/* + * HALO_CCM_CORE_CONTROL + */ +#define HALO_CORE_RESET 0x00000200 +#define HALO_CORE_EN 0x00000001 + +/* + * HALO_CORE_SOFT_RESET + */ +#define HALO_CORE_SOFT_RESET_MASK 0x00000001 + +/* + * HALO_WDT_CONTROL + */ +#define HALO_WDT_EN_MASK 0x00000001 + +/* + * HALO_MPU_?M_VIO_STATUS + */ +#define HALO_MPU_VIO_STS_MASK 0x007e0000 +#define HALO_MPU_VIO_STS_SHIFT 17 +#define HALO_MPU_VIO_ERR_WR_MASK 0x00008000 +#define HALO_MPU_VIO_ERR_SRC_MASK 0x00007fff +#define HALO_MPU_VIO_ERR_SRC_SHIFT 0 + +static const struct wm_adsp_ops wm_adsp1_ops; +static const struct wm_adsp_ops wm_adsp2_ops[]; +static const struct wm_adsp_ops wm_halo_ops; + +struct wm_adsp_buf { + struct list_head list; + void *buf; +}; + +static struct wm_adsp_buf *wm_adsp_buf_alloc(const void *src, size_t len, + struct list_head *list) +{ + struct wm_adsp_buf *buf = kzalloc(sizeof(*buf), GFP_KERNEL); + + if (buf == NULL) + return NULL; + + buf->buf = vmalloc(len); + if (!buf->buf) { + kfree(buf); + return NULL; + } + memcpy(buf->buf, src, len); + + if (list) + list_add_tail(&buf->list, list); + + return buf; +} + +static void wm_adsp_buf_free(struct list_head *list) +{ + while (!list_empty(list)) { + struct wm_adsp_buf *buf = list_first_entry(list, + struct wm_adsp_buf, + list); + list_del(&buf->list); + vfree(buf->buf); + kfree(buf); + } +} #define WM_ADSP_FW_MBC_VSS 0 #define WM_ADSP_FW_HIFI 1 @@ -178,10 +470,12 @@ struct wm_adsp_compr { const char *name; }; +#define WM_ADSP_DATA_WORD_SIZE 3 + #define WM_ADSP_MIN_FRAGMENTS 1 #define WM_ADSP_MAX_FRAGMENTS 256 -#define WM_ADSP_MIN_FRAGMENT_SIZE (64 * CS_DSP_DATA_WORD_SIZE) -#define WM_ADSP_MAX_FRAGMENT_SIZE (4096 * CS_DSP_DATA_WORD_SIZE) +#define WM_ADSP_MIN_FRAGMENT_SIZE (64 * WM_ADSP_DATA_WORD_SIZE) +#define WM_ADSP_MAX_FRAGMENT_SIZE (4096 * WM_ADSP_DATA_WORD_SIZE) #define WM_ADSP_ALG_XM_STRUCT_MAGIC 0x49aec7 @@ -304,11 +598,183 @@ static const struct { struct wm_coeff_ctl { const char *name; - struct cs_dsp_coeff_ctl *cs_ctl; + const char *fw_name; + /* Subname is needed to match with firmware */ + const char *subname; + unsigned int subname_len; + struct wm_adsp_alg_region alg_region; + struct wm_adsp *dsp; + unsigned int enabled:1; + struct list_head list; + void *cache; + unsigned int offset; + size_t len; + unsigned int set:1; struct soc_bytes_ext bytes_ext; - struct work_struct work; + unsigned int flags; + snd_ctl_elem_type_t type; }; +static const char *wm_adsp_mem_region_name(unsigned int type) +{ + switch (type) { + case WMFW_ADSP1_PM: + return "PM"; + case WMFW_HALO_PM_PACKED: + return "PM_PACKED"; + case WMFW_ADSP1_DM: + return "DM"; + case WMFW_ADSP2_XM: + return "XM"; + case WMFW_HALO_XM_PACKED: + return "XM_PACKED"; + case WMFW_ADSP2_YM: + return "YM"; + case WMFW_HALO_YM_PACKED: + return "YM_PACKED"; + case WMFW_ADSP1_ZM: + return "ZM"; + default: + return NULL; + } +} + +#ifdef CONFIG_DEBUG_FS +static void wm_adsp_debugfs_save_wmfwname(struct wm_adsp *dsp, const char *s) +{ + char *tmp = kasprintf(GFP_KERNEL, "%s\n", s); + + kfree(dsp->wmfw_file_name); + dsp->wmfw_file_name = tmp; +} + +static void wm_adsp_debugfs_save_binname(struct wm_adsp *dsp, const char *s) +{ + char *tmp = kasprintf(GFP_KERNEL, "%s\n", s); + + kfree(dsp->bin_file_name); + dsp->bin_file_name = tmp; +} + +static void wm_adsp_debugfs_clear(struct wm_adsp *dsp) +{ + kfree(dsp->wmfw_file_name); + kfree(dsp->bin_file_name); + dsp->wmfw_file_name = NULL; + dsp->bin_file_name = NULL; +} + +static ssize_t wm_adsp_debugfs_wmfw_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct wm_adsp *dsp = file->private_data; + ssize_t ret; + + mutex_lock(&dsp->pwr_lock); + + if (!dsp->wmfw_file_name || !dsp->booted) + ret = 0; + else + ret = simple_read_from_buffer(user_buf, count, ppos, + dsp->wmfw_file_name, + strlen(dsp->wmfw_file_name)); + + mutex_unlock(&dsp->pwr_lock); + return ret; +} + +static ssize_t wm_adsp_debugfs_bin_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct wm_adsp *dsp = file->private_data; + ssize_t ret; + + mutex_lock(&dsp->pwr_lock); + + if (!dsp->bin_file_name || !dsp->booted) + ret = 0; + else + ret = simple_read_from_buffer(user_buf, count, ppos, + dsp->bin_file_name, + strlen(dsp->bin_file_name)); + + mutex_unlock(&dsp->pwr_lock); + return ret; +} + +static const struct { + const char *name; + const struct file_operations fops; +} wm_adsp_debugfs_fops[] = { + { + .name = "wmfw_file_name", + .fops = { + .open = simple_open, + .read = wm_adsp_debugfs_wmfw_read, + }, + }, + { + .name = "bin_file_name", + .fops = { + .open = simple_open, + .read = wm_adsp_debugfs_bin_read, + }, + }, +}; + +static void wm_adsp2_init_debugfs(struct wm_adsp *dsp, + struct snd_soc_component *component) +{ + struct dentry *root = NULL; + int i; + + root = debugfs_create_dir(dsp->name, component->debugfs_root); + + debugfs_create_bool("booted", 0444, root, &dsp->booted); + debugfs_create_bool("running", 0444, root, &dsp->running); + debugfs_create_x32("fw_id", 0444, root, &dsp->fw_id); + debugfs_create_x32("fw_version", 0444, root, &dsp->fw_id_version); + + for (i = 0; i < ARRAY_SIZE(wm_adsp_debugfs_fops); ++i) + debugfs_create_file(wm_adsp_debugfs_fops[i].name, 0444, root, + dsp, &wm_adsp_debugfs_fops[i].fops); + + dsp->debugfs_root = root; +} + +static void wm_adsp2_cleanup_debugfs(struct wm_adsp *dsp) +{ + wm_adsp_debugfs_clear(dsp); + debugfs_remove_recursive(dsp->debugfs_root); + dsp->debugfs_root = NULL; +} +#else +static inline void wm_adsp2_init_debugfs(struct wm_adsp *dsp, + struct snd_soc_component *component) +{ +} + +static inline void wm_adsp2_cleanup_debugfs(struct wm_adsp *dsp) +{ +} + +static inline void wm_adsp_debugfs_save_wmfwname(struct wm_adsp *dsp, + const char *s) +{ +} + +static inline void wm_adsp_debugfs_save_binname(struct wm_adsp *dsp, + const char *s) +{ +} + +static inline void wm_adsp_debugfs_clear(struct wm_adsp *dsp) +{ +} +#endif + int wm_adsp_fw_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -336,14 +802,14 @@ int wm_adsp_fw_put(struct snd_kcontrol *kcontrol, if (ucontrol->value.enumerated.item[0] >= WM_ADSP_NUM_FW) return -EINVAL; - mutex_lock(&dsp[e->shift_l].cs_dsp.pwr_lock); + mutex_lock(&dsp[e->shift_l].pwr_lock); - if (dsp[e->shift_l].cs_dsp.booted || !list_empty(&dsp[e->shift_l].compr_list)) + if (dsp[e->shift_l].booted || !list_empty(&dsp[e->shift_l].compr_list)) ret = -EBUSY; else dsp[e->shift_l].fw = ucontrol->value.enumerated.item[0]; - mutex_unlock(&dsp[e->shift_l].cs_dsp.pwr_lock); + mutex_unlock(&dsp[e->shift_l].pwr_lock); return ret; } @@ -360,49 +826,270 @@ const struct soc_enum wm_adsp_fw_enum[] = { }; EXPORT_SYMBOL_GPL(wm_adsp_fw_enum); +static const struct wm_adsp_region *wm_adsp_find_region(struct wm_adsp *dsp, + int type) +{ + int i; + + for (i = 0; i < dsp->num_mems; i++) + if (dsp->mem[i].type == type) + return &dsp->mem[i]; + + return NULL; +} + +static unsigned int wm_adsp_region_to_reg(struct wm_adsp_region const *mem, + unsigned int offset) +{ + switch (mem->type) { + case WMFW_ADSP1_PM: + return mem->base + (offset * 3); + case WMFW_ADSP1_DM: + case WMFW_ADSP2_XM: + case WMFW_ADSP2_YM: + case WMFW_ADSP1_ZM: + return mem->base + (offset * 2); + default: + WARN(1, "Unknown memory region type"); + return offset; + } +} + +static unsigned int wm_halo_region_to_reg(struct wm_adsp_region const *mem, + unsigned int offset) +{ + switch (mem->type) { + case WMFW_ADSP2_XM: + case WMFW_ADSP2_YM: + return mem->base + (offset * 4); + case WMFW_HALO_XM_PACKED: + case WMFW_HALO_YM_PACKED: + return (mem->base + (offset * 3)) & ~0x3; + case WMFW_HALO_PM_PACKED: + return mem->base + (offset * 5); + default: + WARN(1, "Unknown memory region type"); + return offset; + } +} + +static void wm_adsp_read_fw_status(struct wm_adsp *dsp, + int noffs, unsigned int *offs) +{ + unsigned int i; + int ret; + + for (i = 0; i < noffs; ++i) { + ret = regmap_read(dsp->regmap, dsp->base + offs[i], &offs[i]); + if (ret) { + adsp_err(dsp, "Failed to read SCRATCH%u: %d\n", i, ret); + return; + } + } +} + +static void wm_adsp2_show_fw_status(struct wm_adsp *dsp) +{ + unsigned int offs[] = { + ADSP2_SCRATCH0, ADSP2_SCRATCH1, ADSP2_SCRATCH2, ADSP2_SCRATCH3, + }; + + wm_adsp_read_fw_status(dsp, ARRAY_SIZE(offs), offs); + + adsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n", + offs[0], offs[1], offs[2], offs[3]); +} + +static void wm_adsp2v2_show_fw_status(struct wm_adsp *dsp) +{ + unsigned int offs[] = { ADSP2V2_SCRATCH0_1, ADSP2V2_SCRATCH2_3 }; + + wm_adsp_read_fw_status(dsp, ARRAY_SIZE(offs), offs); + + adsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n", + offs[0] & 0xFFFF, offs[0] >> 16, + offs[1] & 0xFFFF, offs[1] >> 16); +} + +static void wm_halo_show_fw_status(struct wm_adsp *dsp) +{ + unsigned int offs[] = { + HALO_SCRATCH1, HALO_SCRATCH2, HALO_SCRATCH3, HALO_SCRATCH4, + }; + + wm_adsp_read_fw_status(dsp, ARRAY_SIZE(offs), offs); + + adsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n", + offs[0], offs[1], offs[2], offs[3]); +} + static inline struct wm_coeff_ctl *bytes_ext_to_ctl(struct soc_bytes_ext *ext) { return container_of(ext, struct wm_coeff_ctl, bytes_ext); } +static int wm_coeff_base_reg(struct wm_coeff_ctl *ctl, unsigned int *reg) +{ + const struct wm_adsp_alg_region *alg_region = &ctl->alg_region; + struct wm_adsp *dsp = ctl->dsp; + const struct wm_adsp_region *mem; + + mem = wm_adsp_find_region(dsp, alg_region->type); + if (!mem) { + adsp_err(dsp, "No base for region %x\n", + alg_region->type); + return -EINVAL; + } + + *reg = dsp->ops->region_to_reg(mem, ctl->alg_region.base + ctl->offset); + + return 0; +} + static int wm_coeff_info(struct snd_kcontrol *kctl, struct snd_ctl_elem_info *uinfo) { struct soc_bytes_ext *bytes_ext = (struct soc_bytes_ext *)kctl->private_value; struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext); - struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl; - switch (cs_ctl->type) { + switch (ctl->type) { case WMFW_CTL_TYPE_ACKED: uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; - uinfo->value.integer.min = CS_DSP_ACKED_CTL_MIN_VALUE; - uinfo->value.integer.max = CS_DSP_ACKED_CTL_MAX_VALUE; + uinfo->value.integer.min = WM_ADSP_ACKED_CTL_MIN_VALUE; + uinfo->value.integer.max = WM_ADSP_ACKED_CTL_MAX_VALUE; uinfo->value.integer.step = 1; uinfo->count = 1; break; default: uinfo->type = SNDRV_CTL_ELEM_TYPE_BYTES; - uinfo->count = cs_ctl->len; + uinfo->count = ctl->len; break; } return 0; } +static int wm_coeff_write_acked_control(struct wm_coeff_ctl *ctl, + unsigned int event_id) +{ + struct wm_adsp *dsp = ctl->dsp; + __be32 val = cpu_to_be32(event_id); + unsigned int reg; + int i, ret; + + ret = wm_coeff_base_reg(ctl, ®); + if (ret) + return ret; + + adsp_dbg(dsp, "Sending 0x%x to acked control alg 0x%x %s:0x%x\n", + event_id, ctl->alg_region.alg, + wm_adsp_mem_region_name(ctl->alg_region.type), ctl->offset); + + ret = regmap_raw_write(dsp->regmap, reg, &val, sizeof(val)); + if (ret) { + adsp_err(dsp, "Failed to write %x: %d\n", reg, ret); + return ret; + } + + /* + * Poll for ack, we initially poll at ~1ms intervals for firmwares + * that respond quickly, then go to ~10ms polls. A firmware is unlikely + * to ack instantly so we do the first 1ms delay before reading the + * control to avoid a pointless bus transaction + */ + for (i = 0; i < WM_ADSP_ACKED_CTL_TIMEOUT_MS;) { + switch (i) { + case 0 ... WM_ADSP_ACKED_CTL_N_QUICKPOLLS - 1: + usleep_range(1000, 2000); + i++; + break; + default: + usleep_range(10000, 20000); + i += 10; + break; + } + + ret = regmap_raw_read(dsp->regmap, reg, &val, sizeof(val)); + if (ret) { + adsp_err(dsp, "Failed to read %x: %d\n", reg, ret); + return ret; + } + + if (val == 0) { + adsp_dbg(dsp, "Acked control ACKED at poll %u\n", i); + return 0; + } + } + + adsp_warn(dsp, "Acked control @0x%x alg:0x%x %s:0x%x timed out\n", + reg, ctl->alg_region.alg, + wm_adsp_mem_region_name(ctl->alg_region.type), + ctl->offset); + + return -ETIMEDOUT; +} + +static int wm_coeff_write_ctrl_raw(struct wm_coeff_ctl *ctl, + const void *buf, size_t len) +{ + struct wm_adsp *dsp = ctl->dsp; + void *scratch; + int ret; + unsigned int reg; + + ret = wm_coeff_base_reg(ctl, ®); + if (ret) + return ret; + + scratch = kmemdup(buf, len, GFP_KERNEL | GFP_DMA); + if (!scratch) + return -ENOMEM; + + ret = regmap_raw_write(dsp->regmap, reg, scratch, + len); + if (ret) { + adsp_err(dsp, "Failed to write %zu bytes to %x: %d\n", + len, reg, ret); + kfree(scratch); + return ret; + } + adsp_dbg(dsp, "Wrote %zu bytes to %x\n", len, reg); + + kfree(scratch); + + return 0; +} + +static int wm_coeff_write_ctrl(struct wm_coeff_ctl *ctl, + const void *buf, size_t len) +{ + int ret = 0; + + if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) + ret = -EPERM; + else if (buf != ctl->cache) + memcpy(ctl->cache, buf, len); + + ctl->set = 1; + if (ctl->enabled && ctl->dsp->running) + ret = wm_coeff_write_ctrl_raw(ctl, buf, len); + + return ret; +} + static int wm_coeff_put(struct snd_kcontrol *kctl, struct snd_ctl_elem_value *ucontrol) { struct soc_bytes_ext *bytes_ext = (struct soc_bytes_ext *)kctl->private_value; struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext); - struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl; char *p = ucontrol->value.bytes.data; int ret = 0; - mutex_lock(&cs_ctl->dsp->pwr_lock); - ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, p, cs_ctl->len); - mutex_unlock(&cs_ctl->dsp->pwr_lock); + mutex_lock(&ctl->dsp->pwr_lock); + ret = wm_coeff_write_ctrl(ctl, p, ctl->len); + mutex_unlock(&ctl->dsp->pwr_lock); return ret; } @@ -413,17 +1100,16 @@ static int wm_coeff_tlv_put(struct snd_kcontrol *kctl, struct soc_bytes_ext *bytes_ext = (struct soc_bytes_ext *)kctl->private_value; struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext); - struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl; int ret = 0; - mutex_lock(&cs_ctl->dsp->pwr_lock); + mutex_lock(&ctl->dsp->pwr_lock); - if (copy_from_user(cs_ctl->cache, bytes, size)) + if (copy_from_user(ctl->cache, bytes, size)) ret = -EFAULT; else - ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, cs_ctl->cache, size); + ret = wm_coeff_write_ctrl(ctl, ctl->cache, size); - mutex_unlock(&cs_ctl->dsp->pwr_lock); + mutex_unlock(&ctl->dsp->pwr_lock); return ret; } @@ -434,21 +1120,71 @@ static int wm_coeff_put_acked(struct snd_kcontrol *kctl, struct soc_bytes_ext *bytes_ext = (struct soc_bytes_ext *)kctl->private_value; struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext); - struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl; unsigned int val = ucontrol->value.integer.value[0]; int ret; if (val == 0) return 0; /* 0 means no event */ - mutex_lock(&cs_ctl->dsp->pwr_lock); + mutex_lock(&ctl->dsp->pwr_lock); - if (cs_ctl->enabled) - ret = cs_dsp_coeff_write_acked_control(cs_ctl, val); + if (ctl->enabled && ctl->dsp->running) + ret = wm_coeff_write_acked_control(ctl, val); else ret = -EPERM; - mutex_unlock(&cs_ctl->dsp->pwr_lock); + mutex_unlock(&ctl->dsp->pwr_lock); + + return ret; +} + +static int wm_coeff_read_ctrl_raw(struct wm_coeff_ctl *ctl, + void *buf, size_t len) +{ + struct wm_adsp *dsp = ctl->dsp; + void *scratch; + int ret; + unsigned int reg; + + ret = wm_coeff_base_reg(ctl, ®); + if (ret) + return ret; + + scratch = kmalloc(len, GFP_KERNEL | GFP_DMA); + if (!scratch) + return -ENOMEM; + + ret = regmap_raw_read(dsp->regmap, reg, scratch, len); + if (ret) { + adsp_err(dsp, "Failed to read %zu bytes from %x: %d\n", + len, reg, ret); + kfree(scratch); + return ret; + } + adsp_dbg(dsp, "Read %zu bytes from %x\n", len, reg); + + memcpy(buf, scratch, len); + kfree(scratch); + + return 0; +} + +static int wm_coeff_read_ctrl(struct wm_coeff_ctl *ctl, void *buf, size_t len) +{ + int ret = 0; + + if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) { + if (ctl->enabled && ctl->dsp->running) + return wm_coeff_read_ctrl_raw(ctl, buf, len); + else + return -EPERM; + } else { + if (!ctl->flags && ctl->enabled && ctl->dsp->running) + ret = wm_coeff_read_ctrl_raw(ctl, ctl->cache, ctl->len); + + if (buf != ctl->cache) + memcpy(buf, ctl->cache, len); + } return ret; } @@ -459,13 +1195,12 @@ static int wm_coeff_get(struct snd_kcontrol *kctl, struct soc_bytes_ext *bytes_ext = (struct soc_bytes_ext *)kctl->private_value; struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext); - struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl; char *p = ucontrol->value.bytes.data; int ret; - mutex_lock(&cs_ctl->dsp->pwr_lock); - ret = cs_dsp_coeff_read_ctrl(cs_ctl, 0, p, cs_ctl->len); - mutex_unlock(&cs_ctl->dsp->pwr_lock); + mutex_lock(&ctl->dsp->pwr_lock); + ret = wm_coeff_read_ctrl(ctl, p, ctl->len); + mutex_unlock(&ctl->dsp->pwr_lock); return ret; } @@ -476,17 +1211,16 @@ static int wm_coeff_tlv_get(struct snd_kcontrol *kctl, struct soc_bytes_ext *bytes_ext = (struct soc_bytes_ext *)kctl->private_value; struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext); - struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl; int ret = 0; - mutex_lock(&cs_ctl->dsp->pwr_lock); + mutex_lock(&ctl->dsp->pwr_lock); - ret = cs_dsp_coeff_read_ctrl(cs_ctl, 0, cs_ctl->cache, size); + ret = wm_coeff_read_ctrl(ctl, ctl->cache, size); - if (!ret && copy_to_user(bytes, cs_ctl->cache, size)) + if (!ret && copy_to_user(bytes, ctl->cache, size)) ret = -EFAULT; - mutex_unlock(&cs_ctl->dsp->pwr_lock); + mutex_unlock(&ctl->dsp->pwr_lock); return ret; } @@ -506,6 +1240,12 @@ static int wm_coeff_get_acked(struct snd_kcontrol *kcontrol, return 0; } +struct wmfw_ctl_work { + struct wm_adsp *dsp; + struct wm_coeff_ctl *ctl; + struct work_struct work; +}; + static unsigned int wmfw_convert_flags(unsigned int in, unsigned int len) { unsigned int out, rd, wr, vol; @@ -537,36 +1277,33 @@ static unsigned int wmfw_convert_flags(unsigned int in, unsigned int len) return out; } -static void wm_adsp_ctl_work(struct work_struct *work) +static int wmfw_add_ctl(struct wm_adsp *dsp, struct wm_coeff_ctl *ctl) { - struct wm_coeff_ctl *ctl = container_of(work, - struct wm_coeff_ctl, - work); - struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl; - struct wm_adsp *dsp = container_of(cs_ctl->dsp, - struct wm_adsp, - cs_dsp); struct snd_kcontrol_new *kcontrol; + int ret; + + if (!ctl || !ctl->name) + return -EINVAL; kcontrol = kzalloc(sizeof(*kcontrol), GFP_KERNEL); if (!kcontrol) - return; + return -ENOMEM; kcontrol->name = ctl->name; kcontrol->info = wm_coeff_info; kcontrol->iface = SNDRV_CTL_ELEM_IFACE_MIXER; kcontrol->tlv.c = snd_soc_bytes_tlv_callback; kcontrol->private_value = (unsigned long)&ctl->bytes_ext; - kcontrol->access = wmfw_convert_flags(cs_ctl->flags, cs_ctl->len); + kcontrol->access = wmfw_convert_flags(ctl->flags, ctl->len); - switch (cs_ctl->type) { + switch (ctl->type) { case WMFW_CTL_TYPE_ACKED: kcontrol->get = wm_coeff_get_acked; kcontrol->put = wm_coeff_put_acked; break; default: if (kcontrol->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { - ctl->bytes_ext.max = cs_ctl->len; + ctl->bytes_ext.max = ctl->len; ctl->bytes_ext.get = wm_coeff_tlv_get; ctl->bytes_ext.put = wm_coeff_tlv_put; } else { @@ -576,49 +1313,141 @@ static void wm_adsp_ctl_work(struct work_struct *work) break; } - snd_soc_add_component_controls(dsp->component, kcontrol, 1); + ret = snd_soc_add_component_controls(dsp->component, kcontrol, 1); + if (ret < 0) + goto err_kcontrol; kfree(kcontrol); + + return 0; + +err_kcontrol: + kfree(kcontrol); + return ret; } -static int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl) +static int wm_coeff_init_control_caches(struct wm_adsp *dsp) { - struct wm_adsp *dsp = container_of(cs_ctl->dsp, struct wm_adsp, cs_dsp); - struct cs_dsp *cs_dsp = &dsp->cs_dsp; struct wm_coeff_ctl *ctl; + int ret; + + list_for_each_entry(ctl, &dsp->ctl_list, list) { + if (!ctl->enabled || ctl->set) + continue; + if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) + continue; + + /* + * For readable controls populate the cache from the DSP memory. + * For non-readable controls the cache was zero-filled when + * created so we don't need to do anything. + */ + if (!ctl->flags || (ctl->flags & WMFW_CTL_FLAG_READABLE)) { + ret = wm_coeff_read_ctrl_raw(ctl, ctl->cache, ctl->len); + if (ret < 0) + return ret; + } + } + + return 0; +} + +static int wm_coeff_sync_controls(struct wm_adsp *dsp) +{ + struct wm_coeff_ctl *ctl; + int ret; + + list_for_each_entry(ctl, &dsp->ctl_list, list) { + if (!ctl->enabled) + continue; + if (ctl->set && !(ctl->flags & WMFW_CTL_FLAG_VOLATILE)) { + ret = wm_coeff_write_ctrl_raw(ctl, ctl->cache, + ctl->len); + if (ret < 0) + return ret; + } + } + + return 0; +} + +static void wm_adsp_signal_event_controls(struct wm_adsp *dsp, + unsigned int event) +{ + struct wm_coeff_ctl *ctl; + int ret; + + list_for_each_entry(ctl, &dsp->ctl_list, list) { + if (ctl->type != WMFW_CTL_TYPE_HOSTEVENT) + continue; + + if (!ctl->enabled) + continue; + + ret = wm_coeff_write_acked_control(ctl, event); + if (ret) + adsp_warn(dsp, + "Failed to send 0x%x event to alg 0x%x (%d)\n", + event, ctl->alg_region.alg, ret); + } +} + +static void wm_adsp_ctl_work(struct work_struct *work) +{ + struct wmfw_ctl_work *ctl_work = container_of(work, + struct wmfw_ctl_work, + work); + + wmfw_add_ctl(ctl_work->dsp, ctl_work->ctl); + kfree(ctl_work); +} + +static void wm_adsp_free_ctl_blk(struct wm_coeff_ctl *ctl) +{ + kfree(ctl->cache); + kfree(ctl->name); + kfree(ctl->subname); + kfree(ctl); +} + +static int wm_adsp_create_control(struct wm_adsp *dsp, + const struct wm_adsp_alg_region *alg_region, + unsigned int offset, unsigned int len, + const char *subname, unsigned int subname_len, + unsigned int flags, snd_ctl_elem_type_t type) +{ + struct wm_coeff_ctl *ctl; + struct wmfw_ctl_work *ctl_work; char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; const char *region_name; int ret; - if (cs_ctl->flags & WMFW_CTL_FLAG_SYS) - return 0; - - region_name = cs_dsp_mem_region_name(cs_ctl->alg_region.type); + region_name = wm_adsp_mem_region_name(alg_region->type); if (!region_name) { - adsp_err(dsp, "Unknown region type: %d\n", cs_ctl->alg_region.type); + adsp_err(dsp, "Unknown region type: %d\n", alg_region->type); return -EINVAL; } - switch (cs_dsp->fw_ver) { + switch (dsp->fw_ver) { case 0: case 1: - ret = scnprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, - "%s %s %x", cs_dsp->name, region_name, - cs_ctl->alg_region.alg); + snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s %s %x", + dsp->name, region_name, alg_region->alg); + subname = NULL; /* don't append subname */ break; case 2: ret = scnprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, - "%s%c %.12s %x", cs_dsp->name, *region_name, - wm_adsp_fw_text[dsp->fw], cs_ctl->alg_region.alg); + "%s%c %.12s %x", dsp->name, *region_name, + wm_adsp_fw_text[dsp->fw], alg_region->alg); break; default: ret = scnprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, - "%s %.12s %x", cs_dsp->name, - wm_adsp_fw_text[dsp->fw], cs_ctl->alg_region.alg); + "%s %.12s %x", dsp->name, + wm_adsp_fw_text[dsp->fw], alg_region->alg); break; } - if (cs_ctl->subname) { + if (subname) { int avail = SNDRV_CTL_ELEM_ID_NAME_MAXLEN - ret - 2; int skip = 0; @@ -626,70 +1455,613 @@ static int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl) avail -= strlen(dsp->component->name_prefix) + 1; /* Truncate the subname from the start if it is too long */ - if (cs_ctl->subname_len > avail) - skip = cs_ctl->subname_len - avail; + if (subname_len > avail) + skip = subname_len - avail; snprintf(name + ret, SNDRV_CTL_ELEM_ID_NAME_MAXLEN - ret, - " %.*s", cs_ctl->subname_len - skip, cs_ctl->subname + skip); + " %.*s", subname_len - skip, subname + skip); + } + + list_for_each_entry(ctl, &dsp->ctl_list, list) { + if (!strcmp(ctl->name, name)) { + if (!ctl->enabled) + ctl->enabled = 1; + return 0; + } } ctl = kzalloc(sizeof(*ctl), GFP_KERNEL); if (!ctl) return -ENOMEM; - ctl->cs_ctl = cs_ctl; - + ctl->fw_name = wm_adsp_fw_text[dsp->fw]; + ctl->alg_region = *alg_region; ctl->name = kmemdup(name, strlen(name) + 1, GFP_KERNEL); if (!ctl->name) { ret = -ENOMEM; goto err_ctl; } + if (subname) { + ctl->subname_len = subname_len; + ctl->subname = kmemdup(subname, + strlen(subname) + 1, GFP_KERNEL); + if (!ctl->subname) { + ret = -ENOMEM; + goto err_ctl_name; + } + } + ctl->enabled = 1; + ctl->set = 0; + ctl->dsp = dsp; - cs_ctl->priv = ctl; + ctl->flags = flags; + ctl->type = type; + ctl->offset = offset; + ctl->len = len; + ctl->cache = kzalloc(ctl->len, GFP_KERNEL); + if (!ctl->cache) { + ret = -ENOMEM; + goto err_ctl_subname; + } - INIT_WORK(&ctl->work, wm_adsp_ctl_work); - schedule_work(&ctl->work); + list_add(&ctl->list, &dsp->ctl_list); + + if (flags & WMFW_CTL_FLAG_SYS) + return 0; + + ctl_work = kzalloc(sizeof(*ctl_work), GFP_KERNEL); + if (!ctl_work) { + ret = -ENOMEM; + goto err_list_del; + } + + ctl_work->dsp = dsp; + ctl_work->ctl = ctl; + INIT_WORK(&ctl_work->work, wm_adsp_ctl_work); + schedule_work(&ctl_work->work); return 0; +err_list_del: + list_del(&ctl->list); + kfree(ctl->cache); +err_ctl_subname: + kfree(ctl->subname); +err_ctl_name: + kfree(ctl->name); err_ctl: kfree(ctl); return ret; } -static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl) +struct wm_coeff_parsed_alg { + int id; + const u8 *name; + int name_len; + int ncoeff; +}; + +struct wm_coeff_parsed_coeff { + int offset; + int mem_type; + const u8 *name; + int name_len; + snd_ctl_elem_type_t ctl_type; + int flags; + int len; +}; + +static int wm_coeff_parse_string(int bytes, const u8 **pos, const u8 **str) { - struct wm_coeff_ctl *ctl = cs_ctl->priv; + int length; - cancel_work_sync(&ctl->work); + switch (bytes) { + case 1: + length = **pos; + break; + case 2: + length = le16_to_cpu(*((__le16 *)*pos)); + break; + default: + return 0; + } - kfree(ctl->name); - kfree(ctl); + if (str) + *str = *pos + bytes; + + *pos += ((length + bytes) + 3) & ~0x03; + + return length; +} + +static int wm_coeff_parse_int(int bytes, const u8 **pos) +{ + int val = 0; + + switch (bytes) { + case 2: + val = le16_to_cpu(*((__le16 *)*pos)); + break; + case 4: + val = le32_to_cpu(*((__le32 *)*pos)); + break; + default: + break; + } + + *pos += bytes; + + return val; +} + +static inline void wm_coeff_parse_alg(struct wm_adsp *dsp, const u8 **data, + struct wm_coeff_parsed_alg *blk) +{ + const struct wmfw_adsp_alg_data *raw; + + switch (dsp->fw_ver) { + case 0: + case 1: + raw = (const struct wmfw_adsp_alg_data *)*data; + *data = raw->data; + + blk->id = le32_to_cpu(raw->id); + blk->name = raw->name; + blk->name_len = strlen(raw->name); + blk->ncoeff = le32_to_cpu(raw->ncoeff); + break; + default: + blk->id = wm_coeff_parse_int(sizeof(raw->id), data); + blk->name_len = wm_coeff_parse_string(sizeof(u8), data, + &blk->name); + wm_coeff_parse_string(sizeof(u16), data, NULL); + blk->ncoeff = wm_coeff_parse_int(sizeof(raw->ncoeff), data); + break; + } + + adsp_dbg(dsp, "Algorithm ID: %#x\n", blk->id); + adsp_dbg(dsp, "Algorithm name: %.*s\n", blk->name_len, blk->name); + adsp_dbg(dsp, "# of coefficient descriptors: %#x\n", blk->ncoeff); +} + +static inline void wm_coeff_parse_coeff(struct wm_adsp *dsp, const u8 **data, + struct wm_coeff_parsed_coeff *blk) +{ + const struct wmfw_adsp_coeff_data *raw; + const u8 *tmp; + int length; + + switch (dsp->fw_ver) { + case 0: + case 1: + raw = (const struct wmfw_adsp_coeff_data *)*data; + *data = *data + sizeof(raw->hdr) + le32_to_cpu(raw->hdr.size); + + blk->offset = le16_to_cpu(raw->hdr.offset); + blk->mem_type = le16_to_cpu(raw->hdr.type); + blk->name = raw->name; + blk->name_len = strlen(raw->name); + blk->ctl_type = (__force snd_ctl_elem_type_t)le16_to_cpu(raw->ctl_type); + blk->flags = le16_to_cpu(raw->flags); + blk->len = le32_to_cpu(raw->len); + break; + default: + tmp = *data; + blk->offset = wm_coeff_parse_int(sizeof(raw->hdr.offset), &tmp); + blk->mem_type = wm_coeff_parse_int(sizeof(raw->hdr.type), &tmp); + length = wm_coeff_parse_int(sizeof(raw->hdr.size), &tmp); + blk->name_len = wm_coeff_parse_string(sizeof(u8), &tmp, + &blk->name); + wm_coeff_parse_string(sizeof(u8), &tmp, NULL); + wm_coeff_parse_string(sizeof(u16), &tmp, NULL); + blk->ctl_type = + (__force snd_ctl_elem_type_t)wm_coeff_parse_int(sizeof(raw->ctl_type), + &tmp); + blk->flags = wm_coeff_parse_int(sizeof(raw->flags), &tmp); + blk->len = wm_coeff_parse_int(sizeof(raw->len), &tmp); + + *data = *data + sizeof(raw->hdr) + length; + break; + } + + adsp_dbg(dsp, "\tCoefficient type: %#x\n", blk->mem_type); + adsp_dbg(dsp, "\tCoefficient offset: %#x\n", blk->offset); + adsp_dbg(dsp, "\tCoefficient name: %.*s\n", blk->name_len, blk->name); + adsp_dbg(dsp, "\tCoefficient flags: %#x\n", blk->flags); + adsp_dbg(dsp, "\tALSA control type: %#x\n", blk->ctl_type); + adsp_dbg(dsp, "\tALSA control len: %#x\n", blk->len); +} + +static int wm_adsp_check_coeff_flags(struct wm_adsp *dsp, + const struct wm_coeff_parsed_coeff *coeff_blk, + unsigned int f_required, + unsigned int f_illegal) +{ + if ((coeff_blk->flags & f_illegal) || + ((coeff_blk->flags & f_required) != f_required)) { + adsp_err(dsp, "Illegal flags 0x%x for control type 0x%x\n", + coeff_blk->flags, coeff_blk->ctl_type); + return -EINVAL; + } + + return 0; +} + +static int wm_adsp_parse_coeff(struct wm_adsp *dsp, + const struct wmfw_region *region) +{ + struct wm_adsp_alg_region alg_region = {}; + struct wm_coeff_parsed_alg alg_blk; + struct wm_coeff_parsed_coeff coeff_blk; + const u8 *data = region->data; + int i, ret; + + wm_coeff_parse_alg(dsp, &data, &alg_blk); + for (i = 0; i < alg_blk.ncoeff; i++) { + wm_coeff_parse_coeff(dsp, &data, &coeff_blk); + + switch (coeff_blk.ctl_type) { + case SNDRV_CTL_ELEM_TYPE_BYTES: + break; + case WMFW_CTL_TYPE_ACKED: + if (coeff_blk.flags & WMFW_CTL_FLAG_SYS) + continue; /* ignore */ + + ret = wm_adsp_check_coeff_flags(dsp, &coeff_blk, + WMFW_CTL_FLAG_VOLATILE | + WMFW_CTL_FLAG_WRITEABLE | + WMFW_CTL_FLAG_READABLE, + 0); + if (ret) + return -EINVAL; + break; + case WMFW_CTL_TYPE_HOSTEVENT: + ret = wm_adsp_check_coeff_flags(dsp, &coeff_blk, + WMFW_CTL_FLAG_SYS | + WMFW_CTL_FLAG_VOLATILE | + WMFW_CTL_FLAG_WRITEABLE | + WMFW_CTL_FLAG_READABLE, + 0); + if (ret) + return -EINVAL; + break; + case WMFW_CTL_TYPE_HOST_BUFFER: + ret = wm_adsp_check_coeff_flags(dsp, &coeff_blk, + WMFW_CTL_FLAG_SYS | + WMFW_CTL_FLAG_VOLATILE | + WMFW_CTL_FLAG_READABLE, + 0); + if (ret) + return -EINVAL; + break; + default: + adsp_err(dsp, "Unknown control type: %d\n", + coeff_blk.ctl_type); + return -EINVAL; + } + + alg_region.type = coeff_blk.mem_type; + alg_region.alg = alg_blk.id; + + ret = wm_adsp_create_control(dsp, &alg_region, + coeff_blk.offset, + coeff_blk.len, + coeff_blk.name, + coeff_blk.name_len, + coeff_blk.flags, + coeff_blk.ctl_type); + if (ret < 0) + adsp_err(dsp, "Failed to create control: %.*s, %d\n", + coeff_blk.name_len, coeff_blk.name, ret); + } + + return 0; +} + +static unsigned int wm_adsp1_parse_sizes(struct wm_adsp *dsp, + const char * const file, + unsigned int pos, + const struct firmware *firmware) +{ + const struct wmfw_adsp1_sizes *adsp1_sizes; + + adsp1_sizes = (void *)&firmware->data[pos]; + + adsp_dbg(dsp, "%s: %d DM, %d PM, %d ZM\n", file, + le32_to_cpu(adsp1_sizes->dm), le32_to_cpu(adsp1_sizes->pm), + le32_to_cpu(adsp1_sizes->zm)); + + return pos + sizeof(*adsp1_sizes); +} + +static unsigned int wm_adsp2_parse_sizes(struct wm_adsp *dsp, + const char * const file, + unsigned int pos, + const struct firmware *firmware) +{ + const struct wmfw_adsp2_sizes *adsp2_sizes; + + adsp2_sizes = (void *)&firmware->data[pos]; + + adsp_dbg(dsp, "%s: %d XM, %d YM %d PM, %d ZM\n", file, + le32_to_cpu(adsp2_sizes->xm), le32_to_cpu(adsp2_sizes->ym), + le32_to_cpu(adsp2_sizes->pm), le32_to_cpu(adsp2_sizes->zm)); + + return pos + sizeof(*adsp2_sizes); +} + +static bool wm_adsp_validate_version(struct wm_adsp *dsp, unsigned int version) +{ + switch (version) { + case 0: + adsp_warn(dsp, "Deprecated file format %d\n", version); + return true; + case 1: + case 2: + return true; + default: + return false; + } +} + +static bool wm_halo_validate_version(struct wm_adsp *dsp, unsigned int version) +{ + switch (version) { + case 3: + return true; + default: + return false; + } +} + +static int wm_adsp_load(struct wm_adsp *dsp) +{ + LIST_HEAD(buf_list); + const struct firmware *firmware; + struct regmap *regmap = dsp->regmap; + unsigned int pos = 0; + const struct wmfw_header *header; + const struct wmfw_adsp1_sizes *adsp1_sizes; + const struct wmfw_footer *footer; + const struct wmfw_region *region; + const struct wm_adsp_region *mem; + const char *region_name; + char *file, *text = NULL; + struct wm_adsp_buf *buf; + unsigned int reg; + int regions = 0; + int ret, offset, type; + + file = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (file == NULL) + return -ENOMEM; + + snprintf(file, PAGE_SIZE, "%s-%s-%s.wmfw", dsp->part, dsp->fwf_name, + wm_adsp_fw[dsp->fw].file); + file[PAGE_SIZE - 1] = '\0'; + + ret = request_firmware(&firmware, file, dsp->dev); + if (ret != 0) { + adsp_err(dsp, "Failed to request '%s'\n", file); + goto out; + } + ret = -EINVAL; + + pos = sizeof(*header) + sizeof(*adsp1_sizes) + sizeof(*footer); + if (pos >= firmware->size) { + adsp_err(dsp, "%s: file too short, %zu bytes\n", + file, firmware->size); + goto out_fw; + } + + header = (void *)&firmware->data[0]; + + if (memcmp(&header->magic[0], "WMFW", 4) != 0) { + adsp_err(dsp, "%s: invalid magic\n", file); + goto out_fw; + } + + if (!dsp->ops->validate_version(dsp, header->ver)) { + adsp_err(dsp, "%s: unknown file format %d\n", + file, header->ver); + goto out_fw; + } + + adsp_info(dsp, "Firmware version: %d\n", header->ver); + dsp->fw_ver = header->ver; + + if (header->core != dsp->type) { + adsp_err(dsp, "%s: invalid core %d != %d\n", + file, header->core, dsp->type); + goto out_fw; + } + + pos = sizeof(*header); + pos = dsp->ops->parse_sizes(dsp, file, pos, firmware); + + footer = (void *)&firmware->data[pos]; + pos += sizeof(*footer); + + if (le32_to_cpu(header->len) != pos) { + adsp_err(dsp, "%s: unexpected header length %d\n", + file, le32_to_cpu(header->len)); + goto out_fw; + } + + adsp_dbg(dsp, "%s: timestamp %llu\n", file, + le64_to_cpu(footer->timestamp)); + + while (pos < firmware->size && + sizeof(*region) < firmware->size - pos) { + region = (void *)&(firmware->data[pos]); + region_name = "Unknown"; + reg = 0; + text = NULL; + offset = le32_to_cpu(region->offset) & 0xffffff; + type = be32_to_cpu(region->type) & 0xff; + + switch (type) { + case WMFW_NAME_TEXT: + region_name = "Firmware name"; + text = kzalloc(le32_to_cpu(region->len) + 1, + GFP_KERNEL); + break; + case WMFW_ALGORITHM_DATA: + region_name = "Algorithm"; + ret = wm_adsp_parse_coeff(dsp, region); + if (ret != 0) + goto out_fw; + break; + case WMFW_INFO_TEXT: + region_name = "Information"; + text = kzalloc(le32_to_cpu(region->len) + 1, + GFP_KERNEL); + break; + case WMFW_ABSOLUTE: + region_name = "Absolute"; + reg = offset; + break; + case WMFW_ADSP1_PM: + case WMFW_ADSP1_DM: + case WMFW_ADSP2_XM: + case WMFW_ADSP2_YM: + case WMFW_ADSP1_ZM: + case WMFW_HALO_PM_PACKED: + case WMFW_HALO_XM_PACKED: + case WMFW_HALO_YM_PACKED: + mem = wm_adsp_find_region(dsp, type); + if (!mem) { + adsp_err(dsp, "No region of type: %x\n", type); + ret = -EINVAL; + goto out_fw; + } + + region_name = wm_adsp_mem_region_name(type); + reg = dsp->ops->region_to_reg(mem, offset); + break; + default: + adsp_warn(dsp, + "%s.%d: Unknown region type %x at %d(%x)\n", + file, regions, type, pos, pos); + break; + } + + adsp_dbg(dsp, "%s.%d: %d bytes at %d in %s\n", file, + regions, le32_to_cpu(region->len), offset, + region_name); + + if (le32_to_cpu(region->len) > + firmware->size - pos - sizeof(*region)) { + adsp_err(dsp, + "%s.%d: %s region len %d bytes exceeds file length %zu\n", + file, regions, region_name, + le32_to_cpu(region->len), firmware->size); + ret = -EINVAL; + goto out_fw; + } + + if (text) { + memcpy(text, region->data, le32_to_cpu(region->len)); + adsp_info(dsp, "%s: %s\n", file, text); + kfree(text); + text = NULL; + } + + if (reg) { + buf = wm_adsp_buf_alloc(region->data, + le32_to_cpu(region->len), + &buf_list); + if (!buf) { + adsp_err(dsp, "Out of memory\n"); + ret = -ENOMEM; + goto out_fw; + } + + ret = regmap_raw_write_async(regmap, reg, buf->buf, + le32_to_cpu(region->len)); + if (ret != 0) { + adsp_err(dsp, + "%s.%d: Failed to write %d bytes at %d in %s: %d\n", + file, regions, + le32_to_cpu(region->len), offset, + region_name, ret); + goto out_fw; + } + } + + pos += le32_to_cpu(region->len) + sizeof(*region); + regions++; + } + + ret = regmap_async_complete(regmap); + if (ret != 0) { + adsp_err(dsp, "Failed to complete async write: %d\n", ret); + goto out_fw; + } + + if (pos > firmware->size) + adsp_warn(dsp, "%s.%d: %zu bytes at end of file\n", + file, regions, pos - firmware->size); + + wm_adsp_debugfs_save_wmfwname(dsp, file); + +out_fw: + regmap_async_complete(regmap); + wm_adsp_buf_free(&buf_list); + release_firmware(firmware); + kfree(text); +out: + kfree(file); + + return ret; +} + +/* + * Find wm_coeff_ctl with input name as its subname + * If not found, return NULL + */ +static struct wm_coeff_ctl *wm_adsp_get_ctl(struct wm_adsp *dsp, + const char *name, int type, + unsigned int alg) +{ + struct wm_coeff_ctl *pos, *rslt = NULL; + const char *fw_txt = wm_adsp_fw_text[dsp->fw]; + + list_for_each_entry(pos, &dsp->ctl_list, list) { + if (!pos->subname) + continue; + if (strncmp(pos->subname, name, pos->subname_len) == 0 && + pos->fw_name == fw_txt && + pos->alg_region.alg == alg && + pos->alg_region.type == type) { + rslt = pos; + break; + } + } + + return rslt; } int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type, unsigned int alg, void *buf, size_t len) { - struct cs_dsp_coeff_ctl *cs_ctl; struct wm_coeff_ctl *ctl; struct snd_kcontrol *kcontrol; char ctl_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; int ret; - cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg); - if (!cs_ctl) + ctl = wm_adsp_get_ctl(dsp, name, type, alg); + if (!ctl) return -EINVAL; - ctl = cs_ctl->priv; - - if (len > cs_ctl->len) + if (len > ctl->len) return -EINVAL; - ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len); + ret = wm_coeff_write_ctrl(ctl, buf, len); if (ret) return ret; - if (cs_ctl->flags & WMFW_CTL_FLAG_SYS) + if (ctl->flags & WMFW_CTL_FLAG_SYS) return 0; if (dsp->component->name_prefix) @@ -715,83 +2087,683 @@ EXPORT_SYMBOL_GPL(wm_adsp_write_ctl); int wm_adsp_read_ctl(struct wm_adsp *dsp, const char *name, int type, unsigned int alg, void *buf, size_t len) { - struct cs_dsp_coeff_ctl *cs_ctl; + struct wm_coeff_ctl *ctl; - cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg); - if (!cs_ctl) + ctl = wm_adsp_get_ctl(dsp, name, type, alg); + if (!ctl) return -EINVAL; - if (len > cs_ctl->len) + if (len > ctl->len) return -EINVAL; - return cs_dsp_coeff_read_ctrl(cs_ctl, 0, buf, len); + return wm_coeff_read_ctrl(ctl, buf, len); } EXPORT_SYMBOL_GPL(wm_adsp_read_ctl); -static void wm_adsp_release_firmware_files(struct wm_adsp *dsp, - const struct firmware *wmfw_firmware, - char *wmfw_filename, - const struct firmware *coeff_firmware, - char *coeff_filename) +static void wm_adsp_ctl_fixup_base(struct wm_adsp *dsp, + const struct wm_adsp_alg_region *alg_region) { - if (wmfw_firmware) - release_firmware(wmfw_firmware); - kfree(wmfw_filename); + struct wm_coeff_ctl *ctl; - if (coeff_firmware) - release_firmware(coeff_firmware); - kfree(coeff_filename); + list_for_each_entry(ctl, &dsp->ctl_list, list) { + if (ctl->fw_name == wm_adsp_fw_text[dsp->fw] && + alg_region->alg == ctl->alg_region.alg && + alg_region->type == ctl->alg_region.type) { + ctl->alg_region.base = alg_region->base; + } + } } -static int wm_adsp_request_firmware_file(struct wm_adsp *dsp, - const struct firmware **firmware, - char **filename, - char *suffix) +static void *wm_adsp_read_algs(struct wm_adsp *dsp, size_t n_algs, + const struct wm_adsp_region *mem, + unsigned int pos, unsigned int len) { - struct cs_dsp *cs_dsp = &dsp->cs_dsp; - int ret = 0; + void *alg; + unsigned int reg; + int ret; + __be32 val; - *filename = kasprintf(GFP_KERNEL, "%s-%s-%s.%s", dsp->part, dsp->fwf_name, - wm_adsp_fw[dsp->fw].file, suffix); - if (*filename == NULL) - return -ENOMEM; - - ret = request_firmware(firmware, *filename, cs_dsp->dev); - if (ret != 0) { - adsp_err(dsp, "Failed to request '%s'\n", *filename); - kfree(*filename); - *filename = NULL; + if (n_algs == 0) { + adsp_err(dsp, "No algorithms\n"); + return ERR_PTR(-EINVAL); } - return ret; + if (n_algs > 1024) { + adsp_err(dsp, "Algorithm count %zx excessive\n", n_algs); + return ERR_PTR(-EINVAL); + } + + /* Read the terminator first to validate the length */ + reg = dsp->ops->region_to_reg(mem, pos + len); + + ret = regmap_raw_read(dsp->regmap, reg, &val, sizeof(val)); + if (ret != 0) { + adsp_err(dsp, "Failed to read algorithm list end: %d\n", + ret); + return ERR_PTR(ret); + } + + if (be32_to_cpu(val) != 0xbedead) + adsp_warn(dsp, "Algorithm list end %x 0x%x != 0xbedead\n", + reg, be32_to_cpu(val)); + + /* Convert length from DSP words to bytes */ + len *= sizeof(u32); + + alg = kzalloc(len, GFP_KERNEL | GFP_DMA); + if (!alg) + return ERR_PTR(-ENOMEM); + + reg = dsp->ops->region_to_reg(mem, pos); + + ret = regmap_raw_read(dsp->regmap, reg, alg, len); + if (ret != 0) { + adsp_err(dsp, "Failed to read algorithm list: %d\n", ret); + kfree(alg); + return ERR_PTR(ret); + } + + return alg; } -static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, - const struct firmware **wmfw_firmware, - char **wmfw_filename, - const struct firmware **coeff_firmware, - char **coeff_filename) +static struct wm_adsp_alg_region * + wm_adsp_find_alg_region(struct wm_adsp *dsp, int type, unsigned int id) { - int ret = 0; + struct wm_adsp_alg_region *alg_region; - ret = wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename, "wmfw"); - if (ret != 0) - return ret; + list_for_each_entry(alg_region, &dsp->alg_regions, list) { + if (id == alg_region->alg && type == alg_region->type) + return alg_region; + } - wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, "bin"); + return NULL; +} + +static struct wm_adsp_alg_region *wm_adsp_create_region(struct wm_adsp *dsp, + int type, __be32 id, + __be32 base) +{ + struct wm_adsp_alg_region *alg_region; + + alg_region = kzalloc(sizeof(*alg_region), GFP_KERNEL); + if (!alg_region) + return ERR_PTR(-ENOMEM); + + alg_region->type = type; + alg_region->alg = be32_to_cpu(id); + alg_region->base = be32_to_cpu(base); + + list_add_tail(&alg_region->list, &dsp->alg_regions); + + if (dsp->fw_ver > 0) + wm_adsp_ctl_fixup_base(dsp, alg_region); + + return alg_region; +} + +static void wm_adsp_free_alg_regions(struct wm_adsp *dsp) +{ + struct wm_adsp_alg_region *alg_region; + + while (!list_empty(&dsp->alg_regions)) { + alg_region = list_first_entry(&dsp->alg_regions, + struct wm_adsp_alg_region, + list); + list_del(&alg_region->list); + kfree(alg_region); + } +} + +static void wmfw_parse_id_header(struct wm_adsp *dsp, + struct wmfw_id_hdr *fw, int nalgs) +{ + dsp->fw_id = be32_to_cpu(fw->id); + dsp->fw_id_version = be32_to_cpu(fw->ver); + + adsp_info(dsp, "Firmware: %x v%d.%d.%d, %d algorithms\n", + dsp->fw_id, (dsp->fw_id_version & 0xff0000) >> 16, + (dsp->fw_id_version & 0xff00) >> 8, dsp->fw_id_version & 0xff, + nalgs); +} + +static void wmfw_v3_parse_id_header(struct wm_adsp *dsp, + struct wmfw_v3_id_hdr *fw, int nalgs) +{ + dsp->fw_id = be32_to_cpu(fw->id); + dsp->fw_id_version = be32_to_cpu(fw->ver); + dsp->fw_vendor_id = be32_to_cpu(fw->vendor_id); + + adsp_info(dsp, "Firmware: %x vendor: 0x%x v%d.%d.%d, %d algorithms\n", + dsp->fw_id, dsp->fw_vendor_id, + (dsp->fw_id_version & 0xff0000) >> 16, + (dsp->fw_id_version & 0xff00) >> 8, dsp->fw_id_version & 0xff, + nalgs); +} + +static int wm_adsp_create_regions(struct wm_adsp *dsp, __be32 id, int nregions, + const int *type, __be32 *base) +{ + struct wm_adsp_alg_region *alg_region; + int i; + + for (i = 0; i < nregions; i++) { + alg_region = wm_adsp_create_region(dsp, type[i], id, base[i]); + if (IS_ERR(alg_region)) + return PTR_ERR(alg_region); + } return 0; } -static int wm_adsp_common_init(struct wm_adsp *dsp) +static int wm_adsp1_setup_algs(struct wm_adsp *dsp) +{ + struct wmfw_adsp1_id_hdr adsp1_id; + struct wmfw_adsp1_alg_hdr *adsp1_alg; + struct wm_adsp_alg_region *alg_region; + const struct wm_adsp_region *mem; + unsigned int pos, len; + size_t n_algs; + int i, ret; + + mem = wm_adsp_find_region(dsp, WMFW_ADSP1_DM); + if (WARN_ON(!mem)) + return -EINVAL; + + ret = regmap_raw_read(dsp->regmap, mem->base, &adsp1_id, + sizeof(adsp1_id)); + if (ret != 0) { + adsp_err(dsp, "Failed to read algorithm info: %d\n", + ret); + return ret; + } + + n_algs = be32_to_cpu(adsp1_id.n_algs); + + wmfw_parse_id_header(dsp, &adsp1_id.fw, n_algs); + + alg_region = wm_adsp_create_region(dsp, WMFW_ADSP1_ZM, + adsp1_id.fw.id, adsp1_id.zm); + if (IS_ERR(alg_region)) + return PTR_ERR(alg_region); + + alg_region = wm_adsp_create_region(dsp, WMFW_ADSP1_DM, + adsp1_id.fw.id, adsp1_id.dm); + if (IS_ERR(alg_region)) + return PTR_ERR(alg_region); + + /* Calculate offset and length in DSP words */ + pos = sizeof(adsp1_id) / sizeof(u32); + len = (sizeof(*adsp1_alg) * n_algs) / sizeof(u32); + + adsp1_alg = wm_adsp_read_algs(dsp, n_algs, mem, pos, len); + if (IS_ERR(adsp1_alg)) + return PTR_ERR(adsp1_alg); + + for (i = 0; i < n_algs; i++) { + adsp_info(dsp, "%d: ID %x v%d.%d.%d DM@%x ZM@%x\n", + i, be32_to_cpu(adsp1_alg[i].alg.id), + (be32_to_cpu(adsp1_alg[i].alg.ver) & 0xff0000) >> 16, + (be32_to_cpu(adsp1_alg[i].alg.ver) & 0xff00) >> 8, + be32_to_cpu(adsp1_alg[i].alg.ver) & 0xff, + be32_to_cpu(adsp1_alg[i].dm), + be32_to_cpu(adsp1_alg[i].zm)); + + alg_region = wm_adsp_create_region(dsp, WMFW_ADSP1_DM, + adsp1_alg[i].alg.id, + adsp1_alg[i].dm); + if (IS_ERR(alg_region)) { + ret = PTR_ERR(alg_region); + goto out; + } + if (dsp->fw_ver == 0) { + if (i + 1 < n_algs) { + len = be32_to_cpu(adsp1_alg[i + 1].dm); + len -= be32_to_cpu(adsp1_alg[i].dm); + len *= 4; + wm_adsp_create_control(dsp, alg_region, 0, + len, NULL, 0, 0, + SNDRV_CTL_ELEM_TYPE_BYTES); + } else { + adsp_warn(dsp, "Missing length info for region DM with ID %x\n", + be32_to_cpu(adsp1_alg[i].alg.id)); + } + } + + alg_region = wm_adsp_create_region(dsp, WMFW_ADSP1_ZM, + adsp1_alg[i].alg.id, + adsp1_alg[i].zm); + if (IS_ERR(alg_region)) { + ret = PTR_ERR(alg_region); + goto out; + } + if (dsp->fw_ver == 0) { + if (i + 1 < n_algs) { + len = be32_to_cpu(adsp1_alg[i + 1].zm); + len -= be32_to_cpu(adsp1_alg[i].zm); + len *= 4; + wm_adsp_create_control(dsp, alg_region, 0, + len, NULL, 0, 0, + SNDRV_CTL_ELEM_TYPE_BYTES); + } else { + adsp_warn(dsp, "Missing length info for region ZM with ID %x\n", + be32_to_cpu(adsp1_alg[i].alg.id)); + } + } + } + +out: + kfree(adsp1_alg); + return ret; +} + +static int wm_adsp2_setup_algs(struct wm_adsp *dsp) +{ + struct wmfw_adsp2_id_hdr adsp2_id; + struct wmfw_adsp2_alg_hdr *adsp2_alg; + struct wm_adsp_alg_region *alg_region; + const struct wm_adsp_region *mem; + unsigned int pos, len; + size_t n_algs; + int i, ret; + + mem = wm_adsp_find_region(dsp, WMFW_ADSP2_XM); + if (WARN_ON(!mem)) + return -EINVAL; + + ret = regmap_raw_read(dsp->regmap, mem->base, &adsp2_id, + sizeof(adsp2_id)); + if (ret != 0) { + adsp_err(dsp, "Failed to read algorithm info: %d\n", + ret); + return ret; + } + + n_algs = be32_to_cpu(adsp2_id.n_algs); + + wmfw_parse_id_header(dsp, &adsp2_id.fw, n_algs); + + alg_region = wm_adsp_create_region(dsp, WMFW_ADSP2_XM, + adsp2_id.fw.id, adsp2_id.xm); + if (IS_ERR(alg_region)) + return PTR_ERR(alg_region); + + alg_region = wm_adsp_create_region(dsp, WMFW_ADSP2_YM, + adsp2_id.fw.id, adsp2_id.ym); + if (IS_ERR(alg_region)) + return PTR_ERR(alg_region); + + alg_region = wm_adsp_create_region(dsp, WMFW_ADSP2_ZM, + adsp2_id.fw.id, adsp2_id.zm); + if (IS_ERR(alg_region)) + return PTR_ERR(alg_region); + + /* Calculate offset and length in DSP words */ + pos = sizeof(adsp2_id) / sizeof(u32); + len = (sizeof(*adsp2_alg) * n_algs) / sizeof(u32); + + adsp2_alg = wm_adsp_read_algs(dsp, n_algs, mem, pos, len); + if (IS_ERR(adsp2_alg)) + return PTR_ERR(adsp2_alg); + + for (i = 0; i < n_algs; i++) { + adsp_info(dsp, + "%d: ID %x v%d.%d.%d XM@%x YM@%x ZM@%x\n", + i, be32_to_cpu(adsp2_alg[i].alg.id), + (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff0000) >> 16, + (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff00) >> 8, + be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff, + be32_to_cpu(adsp2_alg[i].xm), + be32_to_cpu(adsp2_alg[i].ym), + be32_to_cpu(adsp2_alg[i].zm)); + + alg_region = wm_adsp_create_region(dsp, WMFW_ADSP2_XM, + adsp2_alg[i].alg.id, + adsp2_alg[i].xm); + if (IS_ERR(alg_region)) { + ret = PTR_ERR(alg_region); + goto out; + } + if (dsp->fw_ver == 0) { + if (i + 1 < n_algs) { + len = be32_to_cpu(adsp2_alg[i + 1].xm); + len -= be32_to_cpu(adsp2_alg[i].xm); + len *= 4; + wm_adsp_create_control(dsp, alg_region, 0, + len, NULL, 0, 0, + SNDRV_CTL_ELEM_TYPE_BYTES); + } else { + adsp_warn(dsp, "Missing length info for region XM with ID %x\n", + be32_to_cpu(adsp2_alg[i].alg.id)); + } + } + + alg_region = wm_adsp_create_region(dsp, WMFW_ADSP2_YM, + adsp2_alg[i].alg.id, + adsp2_alg[i].ym); + if (IS_ERR(alg_region)) { + ret = PTR_ERR(alg_region); + goto out; + } + if (dsp->fw_ver == 0) { + if (i + 1 < n_algs) { + len = be32_to_cpu(adsp2_alg[i + 1].ym); + len -= be32_to_cpu(adsp2_alg[i].ym); + len *= 4; + wm_adsp_create_control(dsp, alg_region, 0, + len, NULL, 0, 0, + SNDRV_CTL_ELEM_TYPE_BYTES); + } else { + adsp_warn(dsp, "Missing length info for region YM with ID %x\n", + be32_to_cpu(adsp2_alg[i].alg.id)); + } + } + + alg_region = wm_adsp_create_region(dsp, WMFW_ADSP2_ZM, + adsp2_alg[i].alg.id, + adsp2_alg[i].zm); + if (IS_ERR(alg_region)) { + ret = PTR_ERR(alg_region); + goto out; + } + if (dsp->fw_ver == 0) { + if (i + 1 < n_algs) { + len = be32_to_cpu(adsp2_alg[i + 1].zm); + len -= be32_to_cpu(adsp2_alg[i].zm); + len *= 4; + wm_adsp_create_control(dsp, alg_region, 0, + len, NULL, 0, 0, + SNDRV_CTL_ELEM_TYPE_BYTES); + } else { + adsp_warn(dsp, "Missing length info for region ZM with ID %x\n", + be32_to_cpu(adsp2_alg[i].alg.id)); + } + } + } + +out: + kfree(adsp2_alg); + return ret; +} + +static int wm_halo_create_regions(struct wm_adsp *dsp, __be32 id, + __be32 xm_base, __be32 ym_base) +{ + static const int types[] = { + WMFW_ADSP2_XM, WMFW_HALO_XM_PACKED, + WMFW_ADSP2_YM, WMFW_HALO_YM_PACKED + }; + __be32 bases[] = { xm_base, xm_base, ym_base, ym_base }; + + return wm_adsp_create_regions(dsp, id, ARRAY_SIZE(types), types, bases); +} + +static int wm_halo_setup_algs(struct wm_adsp *dsp) +{ + struct wmfw_halo_id_hdr halo_id; + struct wmfw_halo_alg_hdr *halo_alg; + const struct wm_adsp_region *mem; + unsigned int pos, len; + size_t n_algs; + int i, ret; + + mem = wm_adsp_find_region(dsp, WMFW_ADSP2_XM); + if (WARN_ON(!mem)) + return -EINVAL; + + ret = regmap_raw_read(dsp->regmap, mem->base, &halo_id, + sizeof(halo_id)); + if (ret != 0) { + adsp_err(dsp, "Failed to read algorithm info: %d\n", + ret); + return ret; + } + + n_algs = be32_to_cpu(halo_id.n_algs); + + wmfw_v3_parse_id_header(dsp, &halo_id.fw, n_algs); + + ret = wm_halo_create_regions(dsp, halo_id.fw.id, + halo_id.xm_base, halo_id.ym_base); + if (ret) + return ret; + + /* Calculate offset and length in DSP words */ + pos = sizeof(halo_id) / sizeof(u32); + len = (sizeof(*halo_alg) * n_algs) / sizeof(u32); + + halo_alg = wm_adsp_read_algs(dsp, n_algs, mem, pos, len); + if (IS_ERR(halo_alg)) + return PTR_ERR(halo_alg); + + for (i = 0; i < n_algs; i++) { + adsp_info(dsp, + "%d: ID %x v%d.%d.%d XM@%x YM@%x\n", + i, be32_to_cpu(halo_alg[i].alg.id), + (be32_to_cpu(halo_alg[i].alg.ver) & 0xff0000) >> 16, + (be32_to_cpu(halo_alg[i].alg.ver) & 0xff00) >> 8, + be32_to_cpu(halo_alg[i].alg.ver) & 0xff, + be32_to_cpu(halo_alg[i].xm_base), + be32_to_cpu(halo_alg[i].ym_base)); + + ret = wm_halo_create_regions(dsp, halo_alg[i].alg.id, + halo_alg[i].xm_base, + halo_alg[i].ym_base); + if (ret) + goto out; + } + +out: + kfree(halo_alg); + return ret; +} + +static int wm_adsp_load_coeff(struct wm_adsp *dsp) +{ + LIST_HEAD(buf_list); + struct regmap *regmap = dsp->regmap; + struct wmfw_coeff_hdr *hdr; + struct wmfw_coeff_item *blk; + const struct firmware *firmware; + const struct wm_adsp_region *mem; + struct wm_adsp_alg_region *alg_region; + const char *region_name; + int ret, pos, blocks, type, offset, reg; + char *file; + struct wm_adsp_buf *buf; + + file = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (file == NULL) + return -ENOMEM; + + snprintf(file, PAGE_SIZE, "%s-%s-%s.bin", dsp->part, dsp->fwf_name, + wm_adsp_fw[dsp->fw].file); + file[PAGE_SIZE - 1] = '\0'; + + ret = request_firmware(&firmware, file, dsp->dev); + if (ret != 0) { + adsp_warn(dsp, "Failed to request '%s'\n", file); + ret = 0; + goto out; + } + ret = -EINVAL; + + if (sizeof(*hdr) >= firmware->size) { + adsp_err(dsp, "%s: file too short, %zu bytes\n", + file, firmware->size); + goto out_fw; + } + + hdr = (void *)&firmware->data[0]; + if (memcmp(hdr->magic, "WMDR", 4) != 0) { + adsp_err(dsp, "%s: invalid magic\n", file); + goto out_fw; + } + + switch (be32_to_cpu(hdr->rev) & 0xff) { + case 1: + break; + default: + adsp_err(dsp, "%s: Unsupported coefficient file format %d\n", + file, be32_to_cpu(hdr->rev) & 0xff); + ret = -EINVAL; + goto out_fw; + } + + adsp_dbg(dsp, "%s: v%d.%d.%d\n", file, + (le32_to_cpu(hdr->ver) >> 16) & 0xff, + (le32_to_cpu(hdr->ver) >> 8) & 0xff, + le32_to_cpu(hdr->ver) & 0xff); + + pos = le32_to_cpu(hdr->len); + + blocks = 0; + while (pos < firmware->size && + sizeof(*blk) < firmware->size - pos) { + blk = (void *)(&firmware->data[pos]); + + type = le16_to_cpu(blk->type); + offset = le16_to_cpu(blk->offset); + + adsp_dbg(dsp, "%s.%d: %x v%d.%d.%d\n", + file, blocks, le32_to_cpu(blk->id), + (le32_to_cpu(blk->ver) >> 16) & 0xff, + (le32_to_cpu(blk->ver) >> 8) & 0xff, + le32_to_cpu(blk->ver) & 0xff); + adsp_dbg(dsp, "%s.%d: %d bytes at 0x%x in %x\n", + file, blocks, le32_to_cpu(blk->len), offset, type); + + reg = 0; + region_name = "Unknown"; + switch (type) { + case (WMFW_NAME_TEXT << 8): + case (WMFW_INFO_TEXT << 8): + case (WMFW_METADATA << 8): + break; + case (WMFW_ABSOLUTE << 8): + /* + * Old files may use this for global + * coefficients. + */ + if (le32_to_cpu(blk->id) == dsp->fw_id && + offset == 0) { + region_name = "global coefficients"; + mem = wm_adsp_find_region(dsp, type); + if (!mem) { + adsp_err(dsp, "No ZM\n"); + break; + } + reg = dsp->ops->region_to_reg(mem, 0); + + } else { + region_name = "register"; + reg = offset; + } + break; + + case WMFW_ADSP1_DM: + case WMFW_ADSP1_ZM: + case WMFW_ADSP2_XM: + case WMFW_ADSP2_YM: + case WMFW_HALO_XM_PACKED: + case WMFW_HALO_YM_PACKED: + case WMFW_HALO_PM_PACKED: + adsp_dbg(dsp, "%s.%d: %d bytes in %x for %x\n", + file, blocks, le32_to_cpu(blk->len), + type, le32_to_cpu(blk->id)); + + mem = wm_adsp_find_region(dsp, type); + if (!mem) { + adsp_err(dsp, "No base for region %x\n", type); + break; + } + + alg_region = wm_adsp_find_alg_region(dsp, type, + le32_to_cpu(blk->id)); + if (alg_region) { + reg = alg_region->base; + reg = dsp->ops->region_to_reg(mem, reg); + reg += offset; + } else { + adsp_err(dsp, "No %x for algorithm %x\n", + type, le32_to_cpu(blk->id)); + } + break; + + default: + adsp_err(dsp, "%s.%d: Unknown region type %x at %d\n", + file, blocks, type, pos); + break; + } + + if (reg) { + if (le32_to_cpu(blk->len) > + firmware->size - pos - sizeof(*blk)) { + adsp_err(dsp, + "%s.%d: %s region len %d bytes exceeds file length %zu\n", + file, blocks, region_name, + le32_to_cpu(blk->len), + firmware->size); + ret = -EINVAL; + goto out_fw; + } + + buf = wm_adsp_buf_alloc(blk->data, + le32_to_cpu(blk->len), + &buf_list); + if (!buf) { + adsp_err(dsp, "Out of memory\n"); + ret = -ENOMEM; + goto out_fw; + } + + adsp_dbg(dsp, "%s.%d: Writing %d bytes at %x\n", + file, blocks, le32_to_cpu(blk->len), + reg); + ret = regmap_raw_write_async(regmap, reg, buf->buf, + le32_to_cpu(blk->len)); + if (ret != 0) { + adsp_err(dsp, + "%s.%d: Failed to write to %x in %s: %d\n", + file, blocks, reg, region_name, ret); + } + } + + pos += (le32_to_cpu(blk->len) + sizeof(*blk) + 3) & ~0x03; + blocks++; + } + + ret = regmap_async_complete(regmap); + if (ret != 0) + adsp_err(dsp, "Failed to complete async write: %d\n", ret); + + if (pos > firmware->size) + adsp_warn(dsp, "%s.%d: %zu bytes at end of file\n", + file, blocks, pos - firmware->size); + + wm_adsp_debugfs_save_binname(dsp, file); + +out_fw: + regmap_async_complete(regmap); + release_firmware(firmware); + wm_adsp_buf_free(&buf_list); +out: + kfree(file); + return ret; +} + +static int wm_adsp_create_name(struct wm_adsp *dsp) { char *p; - INIT_LIST_HEAD(&dsp->compr_list); - INIT_LIST_HEAD(&dsp->buffer_list); + if (!dsp->name) { + dsp->name = devm_kasprintf(dsp->dev, GFP_KERNEL, "DSP%d", + dsp->num); + if (!dsp->name) + return -ENOMEM; + } if (!dsp->fwf_name) { - p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL); + p = devm_kstrdup(dsp->dev, dsp->name, GFP_KERNEL); if (!p) return -ENOMEM; @@ -803,16 +2775,28 @@ static int wm_adsp_common_init(struct wm_adsp *dsp) return 0; } -int wm_adsp1_init(struct wm_adsp *dsp) +static int wm_adsp_common_init(struct wm_adsp *dsp) { int ret; - dsp->cs_dsp.client_ops = &wm_adsp1_client_ops; - - ret = cs_dsp_adsp1_init(&dsp->cs_dsp); + ret = wm_adsp_create_name(dsp); if (ret) return ret; + INIT_LIST_HEAD(&dsp->alg_regions); + INIT_LIST_HEAD(&dsp->ctl_list); + INIT_LIST_HEAD(&dsp->compr_list); + INIT_LIST_HEAD(&dsp->buffer_list); + + mutex_init(&dsp->pwr_lock); + + return 0; +} + +int wm_adsp1_init(struct wm_adsp *dsp) +{ + dsp->ops = &wm_adsp1_ops; + return wm_adsp_common_init(dsp); } EXPORT_SYMBOL_GPL(wm_adsp1_init); @@ -824,49 +2808,314 @@ int wm_adsp1_event(struct snd_soc_dapm_widget *w, struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); struct wm_adsp *dsps = snd_soc_component_get_drvdata(component); struct wm_adsp *dsp = &dsps[w->shift]; - int ret = 0; - char *wmfw_filename = NULL; - const struct firmware *wmfw_firmware = NULL; - char *coeff_filename = NULL; - const struct firmware *coeff_firmware = NULL; + struct wm_coeff_ctl *ctl; + int ret; + unsigned int val; dsp->component = component; + mutex_lock(&dsp->pwr_lock); + switch (event) { case SND_SOC_DAPM_POST_PMU: - ret = wm_adsp_request_firmware_files(dsp, - &wmfw_firmware, &wmfw_filename, - &coeff_firmware, &coeff_filename); - if (ret) - break; + regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30, + ADSP1_SYS_ENA, ADSP1_SYS_ENA); - ret = cs_dsp_adsp1_power_up(&dsp->cs_dsp, - wmfw_firmware, wmfw_filename, - coeff_firmware, coeff_filename, - wm_adsp_fw_text[dsp->fw]); + /* + * For simplicity set the DSP clock rate to be the + * SYSCLK rate rather than making it configurable. + */ + if (dsp->sysclk_reg) { + ret = regmap_read(dsp->regmap, dsp->sysclk_reg, &val); + if (ret != 0) { + adsp_err(dsp, "Failed to read SYSCLK state: %d\n", + ret); + goto err_mutex; + } - wm_adsp_release_firmware_files(dsp, - wmfw_firmware, wmfw_filename, - coeff_firmware, coeff_filename); + val = (val & dsp->sysclk_mask) >> dsp->sysclk_shift; + + ret = regmap_update_bits(dsp->regmap, + dsp->base + ADSP1_CONTROL_31, + ADSP1_CLK_SEL_MASK, val); + if (ret != 0) { + adsp_err(dsp, "Failed to set clock rate: %d\n", + ret); + goto err_mutex; + } + } + + ret = wm_adsp_load(dsp); + if (ret != 0) + goto err_ena; + + ret = wm_adsp1_setup_algs(dsp); + if (ret != 0) + goto err_ena; + + ret = wm_adsp_load_coeff(dsp); + if (ret != 0) + goto err_ena; + + /* Initialize caches for enabled and unset controls */ + ret = wm_coeff_init_control_caches(dsp); + if (ret != 0) + goto err_ena; + + /* Sync set controls */ + ret = wm_coeff_sync_controls(dsp); + if (ret != 0) + goto err_ena; + + dsp->booted = true; + + /* Start the core running */ + regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30, + ADSP1_CORE_ENA | ADSP1_START, + ADSP1_CORE_ENA | ADSP1_START); + + dsp->running = true; break; + case SND_SOC_DAPM_PRE_PMD: - cs_dsp_adsp1_power_down(&dsp->cs_dsp); + dsp->running = false; + dsp->booted = false; + + /* Halt the core */ + regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30, + ADSP1_CORE_ENA | ADSP1_START, 0); + + regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_19, + ADSP1_WDMA_BUFFER_LENGTH_MASK, 0); + + regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30, + ADSP1_SYS_ENA, 0); + + list_for_each_entry(ctl, &dsp->ctl_list, list) + ctl->enabled = 0; + + + wm_adsp_free_alg_regions(dsp); break; + default: break; } + mutex_unlock(&dsp->pwr_lock); + + return 0; + +err_ena: + regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30, + ADSP1_SYS_ENA, 0); +err_mutex: + mutex_unlock(&dsp->pwr_lock); + return ret; } EXPORT_SYMBOL_GPL(wm_adsp1_event); +static int wm_adsp2v2_enable_core(struct wm_adsp *dsp) +{ + unsigned int val; + int ret, count; + + /* Wait for the RAM to start, should be near instantaneous */ + for (count = 0; count < 10; ++count) { + ret = regmap_read(dsp->regmap, dsp->base + ADSP2_STATUS1, &val); + if (ret != 0) + return ret; + + if (val & ADSP2_RAM_RDY) + break; + + usleep_range(250, 500); + } + + if (!(val & ADSP2_RAM_RDY)) { + adsp_err(dsp, "Failed to start DSP RAM\n"); + return -EBUSY; + } + + adsp_dbg(dsp, "RAM ready after %d polls\n", count); + + return 0; +} + +static int wm_adsp2_enable_core(struct wm_adsp *dsp) +{ + int ret; + + ret = regmap_update_bits_async(dsp->regmap, dsp->base + ADSP2_CONTROL, + ADSP2_SYS_ENA, ADSP2_SYS_ENA); + if (ret != 0) + return ret; + + return wm_adsp2v2_enable_core(dsp); +} + +static int wm_adsp2_lock(struct wm_adsp *dsp, unsigned int lock_regions) +{ + struct regmap *regmap = dsp->regmap; + unsigned int code0, code1, lock_reg; + + if (!(lock_regions & WM_ADSP2_REGION_ALL)) + return 0; + + lock_regions &= WM_ADSP2_REGION_ALL; + lock_reg = dsp->base + ADSP2_LOCK_REGION_1_LOCK_REGION_0; + + while (lock_regions) { + code0 = code1 = 0; + if (lock_regions & BIT(0)) { + code0 = ADSP2_LOCK_CODE_0; + code1 = ADSP2_LOCK_CODE_1; + } + if (lock_regions & BIT(1)) { + code0 |= ADSP2_LOCK_CODE_0 << ADSP2_LOCK_REGION_SHIFT; + code1 |= ADSP2_LOCK_CODE_1 << ADSP2_LOCK_REGION_SHIFT; + } + regmap_write(regmap, lock_reg, code0); + regmap_write(regmap, lock_reg, code1); + lock_regions >>= 2; + lock_reg += 2; + } + + return 0; +} + +static int wm_adsp2_enable_memory(struct wm_adsp *dsp) +{ + return regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, + ADSP2_MEM_ENA, ADSP2_MEM_ENA); +} + +static void wm_adsp2_disable_memory(struct wm_adsp *dsp) +{ + regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, + ADSP2_MEM_ENA, 0); +} + +static void wm_adsp2_disable_core(struct wm_adsp *dsp) +{ + regmap_write(dsp->regmap, dsp->base + ADSP2_RDMA_CONFIG_1, 0); + regmap_write(dsp->regmap, dsp->base + ADSP2_WDMA_CONFIG_1, 0); + regmap_write(dsp->regmap, dsp->base + ADSP2_WDMA_CONFIG_2, 0); + + regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, + ADSP2_SYS_ENA, 0); +} + +static void wm_adsp2v2_disable_core(struct wm_adsp *dsp) +{ + regmap_write(dsp->regmap, dsp->base + ADSP2_RDMA_CONFIG_1, 0); + regmap_write(dsp->regmap, dsp->base + ADSP2_WDMA_CONFIG_1, 0); + regmap_write(dsp->regmap, dsp->base + ADSP2V2_WDMA_CONFIG_2, 0); +} + +static void wm_adsp_boot_work(struct work_struct *work) +{ + struct wm_adsp *dsp = container_of(work, + struct wm_adsp, + boot_work); + int ret; + + mutex_lock(&dsp->pwr_lock); + + if (dsp->ops->enable_memory) { + ret = dsp->ops->enable_memory(dsp); + if (ret != 0) + goto err_mutex; + } + + if (dsp->ops->enable_core) { + ret = dsp->ops->enable_core(dsp); + if (ret != 0) + goto err_mem; + } + + ret = wm_adsp_load(dsp); + if (ret != 0) + goto err_ena; + + ret = dsp->ops->setup_algs(dsp); + if (ret != 0) + goto err_ena; + + ret = wm_adsp_load_coeff(dsp); + if (ret != 0) + goto err_ena; + + /* Initialize caches for enabled and unset controls */ + ret = wm_coeff_init_control_caches(dsp); + if (ret != 0) + goto err_ena; + + if (dsp->ops->disable_core) + dsp->ops->disable_core(dsp); + + dsp->booted = true; + + mutex_unlock(&dsp->pwr_lock); + + return; + +err_ena: + if (dsp->ops->disable_core) + dsp->ops->disable_core(dsp); +err_mem: + if (dsp->ops->disable_memory) + dsp->ops->disable_memory(dsp); +err_mutex: + mutex_unlock(&dsp->pwr_lock); +} + +static int wm_halo_configure_mpu(struct wm_adsp *dsp, unsigned int lock_regions) +{ + struct reg_sequence config[] = { + { dsp->base + HALO_MPU_LOCK_CONFIG, 0x5555 }, + { dsp->base + HALO_MPU_LOCK_CONFIG, 0xAAAA }, + { dsp->base + HALO_MPU_XMEM_ACCESS_0, 0xFFFFFFFF }, + { dsp->base + HALO_MPU_YMEM_ACCESS_0, 0xFFFFFFFF }, + { dsp->base + HALO_MPU_WINDOW_ACCESS_0, lock_regions }, + { dsp->base + HALO_MPU_XREG_ACCESS_0, lock_regions }, + { dsp->base + HALO_MPU_YREG_ACCESS_0, lock_regions }, + { dsp->base + HALO_MPU_XMEM_ACCESS_1, 0xFFFFFFFF }, + { dsp->base + HALO_MPU_YMEM_ACCESS_1, 0xFFFFFFFF }, + { dsp->base + HALO_MPU_WINDOW_ACCESS_1, lock_regions }, + { dsp->base + HALO_MPU_XREG_ACCESS_1, lock_regions }, + { dsp->base + HALO_MPU_YREG_ACCESS_1, lock_regions }, + { dsp->base + HALO_MPU_XMEM_ACCESS_2, 0xFFFFFFFF }, + { dsp->base + HALO_MPU_YMEM_ACCESS_2, 0xFFFFFFFF }, + { dsp->base + HALO_MPU_WINDOW_ACCESS_2, lock_regions }, + { dsp->base + HALO_MPU_XREG_ACCESS_2, lock_regions }, + { dsp->base + HALO_MPU_YREG_ACCESS_2, lock_regions }, + { dsp->base + HALO_MPU_XMEM_ACCESS_3, 0xFFFFFFFF }, + { dsp->base + HALO_MPU_YMEM_ACCESS_3, 0xFFFFFFFF }, + { dsp->base + HALO_MPU_WINDOW_ACCESS_3, lock_regions }, + { dsp->base + HALO_MPU_XREG_ACCESS_3, lock_regions }, + { dsp->base + HALO_MPU_YREG_ACCESS_3, lock_regions }, + { dsp->base + HALO_MPU_LOCK_CONFIG, 0 }, + }; + + return regmap_multi_reg_write(dsp->regmap, config, ARRAY_SIZE(config)); +} + int wm_adsp2_set_dspclk(struct snd_soc_dapm_widget *w, unsigned int freq) { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); struct wm_adsp *dsps = snd_soc_component_get_drvdata(component); struct wm_adsp *dsp = &dsps[w->shift]; + int ret; - return cs_dsp_set_dspclk(&dsp->cs_dsp, freq); + ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CLOCKING, + ADSP2_CLK_SEL_MASK, + freq << ADSP2_CLK_SEL_SHIFT); + if (ret) + adsp_err(dsp, "Failed to set clock rate: %d\n", ret); + + return ret; } EXPORT_SYMBOL_GPL(wm_adsp2_set_dspclk); @@ -896,12 +3145,11 @@ int wm_adsp2_preloader_put(struct snd_kcontrol *kcontrol, struct wm_adsp *dsp = &dsps[mc->shift - 1]; char preload[32]; - if (dsp->preloaded == ucontrol->value.integer.value[0]) - return 0; + snprintf(preload, ARRAY_SIZE(preload), "%s Preload", dsp->name); - snprintf(preload, ARRAY_SIZE(preload), "%s Preload", dsp->cs_dsp.name); + dsp->preloaded = ucontrol->value.integer.value[0]; - if (ucontrol->value.integer.value[0] || dsp->toggle_preload) + if (ucontrol->value.integer.value[0]) snd_soc_component_force_enable_pin(component, preload); else snd_soc_component_disable_pin(component, preload); @@ -910,42 +3158,20 @@ int wm_adsp2_preloader_put(struct snd_kcontrol *kcontrol, flush_work(&dsp->boot_work); - dsp->preloaded = ucontrol->value.integer.value[0]; - - if (dsp->toggle_preload) { - snd_soc_component_disable_pin(component, preload); - snd_soc_dapm_sync(dapm); - } - return 0; } EXPORT_SYMBOL_GPL(wm_adsp2_preloader_put); -static void wm_adsp_boot_work(struct work_struct *work) +static void wm_adsp_stop_watchdog(struct wm_adsp *dsp) { - struct wm_adsp *dsp = container_of(work, - struct wm_adsp, - boot_work); - int ret = 0; - char *wmfw_filename = NULL; - const struct firmware *wmfw_firmware = NULL; - char *coeff_filename = NULL; - const struct firmware *coeff_firmware = NULL; + regmap_update_bits(dsp->regmap, dsp->base + ADSP2_WATCHDOG, + ADSP2_WDT_ENA_MASK, 0); +} - ret = wm_adsp_request_firmware_files(dsp, - &wmfw_firmware, &wmfw_filename, - &coeff_firmware, &coeff_filename); - if (ret) - return; - - cs_dsp_power_up(&dsp->cs_dsp, - wmfw_firmware, wmfw_filename, - coeff_firmware, coeff_filename, - wm_adsp_fw_text[dsp->fw]); - - wm_adsp_release_firmware_files(dsp, - wmfw_firmware, wmfw_filename, - coeff_firmware, coeff_filename); +static void wm_halo_stop_watchdog(struct wm_adsp *dsp) +{ + regmap_update_bits(dsp->regmap, dsp->base + HALO_WDT_CONTROL, + HALO_WDT_EN_MASK, 0); } int wm_adsp_early_event(struct snd_soc_dapm_widget *w, @@ -954,13 +3180,33 @@ int wm_adsp_early_event(struct snd_soc_dapm_widget *w, struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); struct wm_adsp *dsps = snd_soc_component_get_drvdata(component); struct wm_adsp *dsp = &dsps[w->shift]; + struct wm_coeff_ctl *ctl; switch (event) { case SND_SOC_DAPM_PRE_PMU: queue_work(system_unbound_wq, &dsp->boot_work); break; case SND_SOC_DAPM_PRE_PMD: - cs_dsp_power_down(&dsp->cs_dsp); + mutex_lock(&dsp->pwr_lock); + + wm_adsp_debugfs_clear(dsp); + + dsp->fw_id = 0; + dsp->fw_id_version = 0; + + dsp->booted = false; + + if (dsp->ops->disable_memory) + dsp->ops->disable_memory(dsp); + + list_for_each_entry(ctl, &dsp->ctl_list, list) + ctl->enabled = 0; + + wm_adsp_free_alg_regions(dsp); + + mutex_unlock(&dsp->pwr_lock); + + adsp_dbg(dsp, "Shutdown complete\n"); break; default: break; @@ -970,24 +3216,17 @@ int wm_adsp_early_event(struct snd_soc_dapm_widget *w, } EXPORT_SYMBOL_GPL(wm_adsp_early_event); -static int wm_adsp_event_post_run(struct cs_dsp *cs_dsp) +static int wm_adsp2_start_core(struct wm_adsp *dsp) { - struct wm_adsp *dsp = container_of(cs_dsp, struct wm_adsp, cs_dsp); - - if (wm_adsp_fw[dsp->fw].num_caps != 0) - return wm_adsp_buffer_init(dsp); - - return 0; + return regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, + ADSP2_CORE_ENA | ADSP2_START, + ADSP2_CORE_ENA | ADSP2_START); } -static void wm_adsp_event_post_stop(struct cs_dsp *cs_dsp) +static void wm_adsp2_stop_core(struct wm_adsp *dsp) { - struct wm_adsp *dsp = container_of(cs_dsp, struct wm_adsp, cs_dsp); - - if (wm_adsp_fw[dsp->fw].num_caps != 0) - wm_adsp_buffer_free(dsp); - - dsp->fatal_error = false; + regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, + ADSP2_CORE_ENA | ADSP2_START, 0); } int wm_adsp_event(struct snd_soc_dapm_widget *w, @@ -996,32 +3235,127 @@ int wm_adsp_event(struct snd_soc_dapm_widget *w, struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); struct wm_adsp *dsps = snd_soc_component_get_drvdata(component); struct wm_adsp *dsp = &dsps[w->shift]; - int ret = 0; + int ret; switch (event) { case SND_SOC_DAPM_POST_PMU: flush_work(&dsp->boot_work); - ret = cs_dsp_run(&dsp->cs_dsp); + + mutex_lock(&dsp->pwr_lock); + + if (!dsp->booted) { + ret = -EIO; + goto err; + } + + if (dsp->ops->enable_core) { + ret = dsp->ops->enable_core(dsp); + if (ret != 0) + goto err; + } + + /* Sync set controls */ + ret = wm_coeff_sync_controls(dsp); + if (ret != 0) + goto err; + + if (dsp->ops->lock_memory) { + ret = dsp->ops->lock_memory(dsp, dsp->lock_regions); + if (ret != 0) { + adsp_err(dsp, "Error configuring MPU: %d\n", + ret); + goto err; + } + } + + if (dsp->ops->start_core) { + ret = dsp->ops->start_core(dsp); + if (ret != 0) + goto err; + } + + if (wm_adsp_fw[dsp->fw].num_caps != 0) { + ret = wm_adsp_buffer_init(dsp); + if (ret < 0) + goto err; + } + + dsp->running = true; + + mutex_unlock(&dsp->pwr_lock); break; + case SND_SOC_DAPM_PRE_PMD: - cs_dsp_stop(&dsp->cs_dsp); + /* Tell the firmware to cleanup */ + wm_adsp_signal_event_controls(dsp, WM_ADSP_FW_EVENT_SHUTDOWN); + + if (dsp->ops->stop_watchdog) + dsp->ops->stop_watchdog(dsp); + + /* Log firmware state, it can be useful for analysis */ + if (dsp->ops->show_fw_status) + dsp->ops->show_fw_status(dsp); + + mutex_lock(&dsp->pwr_lock); + + dsp->running = false; + + if (dsp->ops->stop_core) + dsp->ops->stop_core(dsp); + if (dsp->ops->disable_core) + dsp->ops->disable_core(dsp); + + if (wm_adsp_fw[dsp->fw].num_caps != 0) + wm_adsp_buffer_free(dsp); + + dsp->fatal_error = false; + + mutex_unlock(&dsp->pwr_lock); + + adsp_dbg(dsp, "Execution stopped\n"); break; + default: break; } + return 0; +err: + if (dsp->ops->stop_core) + dsp->ops->stop_core(dsp); + if (dsp->ops->disable_core) + dsp->ops->disable_core(dsp); + mutex_unlock(&dsp->pwr_lock); return ret; } EXPORT_SYMBOL_GPL(wm_adsp_event); +static int wm_halo_start_core(struct wm_adsp *dsp) +{ + return regmap_update_bits(dsp->regmap, + dsp->base + HALO_CCM_CORE_CONTROL, + HALO_CORE_RESET | HALO_CORE_EN, + HALO_CORE_RESET | HALO_CORE_EN); +} + +static void wm_halo_stop_core(struct wm_adsp *dsp) +{ + regmap_update_bits(dsp->regmap, dsp->base + HALO_CCM_CORE_CONTROL, + HALO_CORE_EN, 0); + + /* reset halo core with CORE_SOFT_RESET */ + regmap_update_bits(dsp->regmap, dsp->base + HALO_CORE_SOFT_RESET, + HALO_CORE_SOFT_RESET_MASK, 1); +} + int wm_adsp2_component_probe(struct wm_adsp *dsp, struct snd_soc_component *component) { char preload[32]; - snprintf(preload, ARRAY_SIZE(preload), "%s Preload", dsp->cs_dsp.name); + snprintf(preload, ARRAY_SIZE(preload), "%s Preload", dsp->name); snd_soc_component_disable_pin(component, preload); - cs_dsp_init_debugfs(&dsp->cs_dsp, component->debugfs_root); + wm_adsp2_init_debugfs(dsp, component); dsp->component = component; @@ -1031,7 +3365,7 @@ EXPORT_SYMBOL_GPL(wm_adsp2_component_probe); int wm_adsp2_component_remove(struct wm_adsp *dsp, struct snd_soc_component *component) { - cs_dsp_cleanup_debugfs(&dsp->cs_dsp); + wm_adsp2_cleanup_debugfs(dsp); return 0; } @@ -1041,16 +3375,37 @@ int wm_adsp2_init(struct wm_adsp *dsp) { int ret; - INIT_WORK(&dsp->boot_work, wm_adsp_boot_work); - - dsp->sys_config_size = sizeof(struct wm_adsp_system_config_xm_hdr); - dsp->cs_dsp.client_ops = &wm_adsp2_client_ops; - - ret = cs_dsp_adsp2_init(&dsp->cs_dsp); + ret = wm_adsp_common_init(dsp); if (ret) return ret; - return wm_adsp_common_init(dsp); + switch (dsp->rev) { + case 0: + /* + * Disable the DSP memory by default when in reset for a small + * power saving. + */ + ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, + ADSP2_MEM_ENA, 0); + if (ret) { + adsp_err(dsp, + "Failed to clear memory retention: %d\n", ret); + return ret; + } + + dsp->ops = &wm_adsp2_ops[0]; + break; + case 1: + dsp->ops = &wm_adsp2_ops[1]; + break; + default: + dsp->ops = &wm_adsp2_ops[2]; + break; + } + + INIT_WORK(&dsp->boot_work, wm_adsp_boot_work); + + return 0; } EXPORT_SYMBOL_GPL(wm_adsp2_init); @@ -1058,22 +3413,28 @@ int wm_halo_init(struct wm_adsp *dsp) { int ret; - INIT_WORK(&dsp->boot_work, wm_adsp_boot_work); - - dsp->sys_config_size = sizeof(struct wm_halo_system_config_xm_hdr); - dsp->cs_dsp.client_ops = &wm_adsp2_client_ops; - - ret = cs_dsp_halo_init(&dsp->cs_dsp); + ret = wm_adsp_common_init(dsp); if (ret) return ret; - return wm_adsp_common_init(dsp); + dsp->ops = &wm_halo_ops; + + INIT_WORK(&dsp->boot_work, wm_adsp_boot_work); + + return 0; } EXPORT_SYMBOL_GPL(wm_halo_init); void wm_adsp2_remove(struct wm_adsp *dsp) { - cs_dsp_remove(&dsp->cs_dsp); + struct wm_coeff_ctl *ctl; + + while (!list_empty(&dsp->ctl_list)) { + ctl = list_first_entry(&dsp->ctl_list, struct wm_coeff_ctl, + list); + list_del(&ctl->list); + wm_adsp_free_ctl_blk(ctl); + } } EXPORT_SYMBOL_GPL(wm_adsp2_remove); @@ -1126,7 +3487,7 @@ int wm_adsp_compr_open(struct wm_adsp *dsp, struct snd_compr_stream *stream) struct snd_soc_pcm_runtime *rtd = stream->private_data; int ret = 0; - mutex_lock(&dsp->cs_dsp.pwr_lock); + mutex_lock(&dsp->pwr_lock); if (wm_adsp_fw[dsp->fw].num_caps == 0) { adsp_err(dsp, "%s: Firmware does not support compressed API\n", @@ -1166,7 +3527,7 @@ int wm_adsp_compr_open(struct wm_adsp *dsp, struct snd_compr_stream *stream) stream->runtime->private_data = compr; out: - mutex_unlock(&dsp->cs_dsp.pwr_lock); + mutex_unlock(&dsp->pwr_lock); return ret; } @@ -1178,7 +3539,7 @@ int wm_adsp_compr_free(struct snd_soc_component *component, struct wm_adsp_compr *compr = stream->runtime->private_data; struct wm_adsp *dsp = compr->dsp; - mutex_lock(&dsp->cs_dsp.pwr_lock); + mutex_lock(&dsp->pwr_lock); wm_adsp_compr_detach(compr); list_del(&compr->list); @@ -1186,7 +3547,7 @@ int wm_adsp_compr_free(struct snd_soc_component *component, kfree(compr->raw_buf); kfree(compr); - mutex_unlock(&dsp->cs_dsp.pwr_lock); + mutex_unlock(&dsp->pwr_lock); return 0; } @@ -1205,7 +3566,7 @@ static int wm_adsp_compr_check_params(struct snd_compr_stream *stream, params->buffer.fragment_size > WM_ADSP_MAX_FRAGMENT_SIZE || params->buffer.fragments < WM_ADSP_MIN_FRAGMENTS || params->buffer.fragments > WM_ADSP_MAX_FRAGMENTS || - params->buffer.fragment_size % CS_DSP_DATA_WORD_SIZE) { + params->buffer.fragment_size % WM_ADSP_DATA_WORD_SIZE) { compr_err(compr, "Invalid buffer fragsize=%d fragments=%d\n", params->buffer.fragment_size, params->buffer.fragments); @@ -1244,7 +3605,7 @@ static int wm_adsp_compr_check_params(struct snd_compr_stream *stream, static inline unsigned int wm_adsp_compr_frag_words(struct wm_adsp_compr *compr) { - return compr->size.fragment_size / CS_DSP_DATA_WORD_SIZE; + return compr->size.fragment_size / WM_ADSP_DATA_WORD_SIZE; } int wm_adsp_compr_set_params(struct snd_soc_component *component, @@ -1300,19 +3661,88 @@ int wm_adsp_compr_get_caps(struct snd_soc_component *component, } EXPORT_SYMBOL_GPL(wm_adsp_compr_get_caps); +static int wm_adsp_read_raw_data_block(struct wm_adsp *dsp, int mem_type, + unsigned int mem_addr, + unsigned int num_words, __be32 *data) +{ + struct wm_adsp_region const *mem = wm_adsp_find_region(dsp, mem_type); + unsigned int reg; + int ret; + + if (!mem) + return -EINVAL; + + reg = dsp->ops->region_to_reg(mem, mem_addr); + + ret = regmap_raw_read(dsp->regmap, reg, data, + sizeof(*data) * num_words); + if (ret < 0) + return ret; + + return 0; +} + +static inline int wm_adsp_read_data_word(struct wm_adsp *dsp, int mem_type, + unsigned int mem_addr, u32 *data) +{ + __be32 raw; + int ret; + + ret = wm_adsp_read_raw_data_block(dsp, mem_type, mem_addr, 1, &raw); + if (ret < 0) + return ret; + + *data = be32_to_cpu(raw) & 0x00ffffffu; + + return 0; +} + +static int wm_adsp_write_data_word(struct wm_adsp *dsp, int mem_type, + unsigned int mem_addr, u32 data) +{ + struct wm_adsp_region const *mem = wm_adsp_find_region(dsp, mem_type); + __be32 val = cpu_to_be32(data & 0x00ffffffu); + unsigned int reg; + + if (!mem) + return -EINVAL; + + reg = dsp->ops->region_to_reg(mem, mem_addr); + + return regmap_raw_write(dsp->regmap, reg, &val, sizeof(val)); +} + static inline int wm_adsp_buffer_read(struct wm_adsp_compr_buf *buf, unsigned int field_offset, u32 *data) { - return cs_dsp_read_data_word(&buf->dsp->cs_dsp, buf->host_buf_mem_type, - buf->host_buf_ptr + field_offset, data); + return wm_adsp_read_data_word(buf->dsp, buf->host_buf_mem_type, + buf->host_buf_ptr + field_offset, data); } static inline int wm_adsp_buffer_write(struct wm_adsp_compr_buf *buf, unsigned int field_offset, u32 data) { - return cs_dsp_write_data_word(&buf->dsp->cs_dsp, buf->host_buf_mem_type, - buf->host_buf_ptr + field_offset, - data); + return wm_adsp_write_data_word(buf->dsp, buf->host_buf_mem_type, + buf->host_buf_ptr + field_offset, data); +} + +static void wm_adsp_remove_padding(u32 *buf, int nwords) +{ + const __be32 *pack_in = (__be32 *)buf; + u8 *pack_out = (u8 *)buf; + int i; + + /* + * DSP words from the register map have pad bytes and the data bytes + * are in swapped order. This swaps back to the original little-endian + * order and strips the pad bytes. + */ + for (i = 0; i < nwords; i++) { + u32 word = be32_to_cpu(*pack_in++); + *pack_out++ = (u8)word; + *pack_out++ = (u8)(word >> 8); + *pack_out++ = (u8)(word >> 16); + } } static int wm_adsp_buffer_populate(struct wm_adsp_compr_buf *buf) @@ -1380,12 +3810,12 @@ static struct wm_adsp_compr_buf *wm_adsp_buffer_alloc(struct wm_adsp *dsp) static int wm_adsp_buffer_parse_legacy(struct wm_adsp *dsp) { - struct cs_dsp_alg_region *alg_region; + struct wm_adsp_alg_region *alg_region; struct wm_adsp_compr_buf *buf; u32 xmalg, addr, magic; int i, ret; - alg_region = cs_dsp_find_alg_region(&dsp->cs_dsp, WMFW_ADSP2_XM, dsp->cs_dsp.fw_id); + alg_region = wm_adsp_find_alg_region(dsp, WMFW_ADSP2_XM, dsp->fw_id); if (!alg_region) { adsp_err(dsp, "No algorithm region found\n"); return -EINVAL; @@ -1395,10 +3825,10 @@ static int wm_adsp_buffer_parse_legacy(struct wm_adsp *dsp) if (!buf) return -ENOMEM; - xmalg = dsp->sys_config_size / sizeof(__be32); + xmalg = dsp->ops->sys_config_size / sizeof(__be32); addr = alg_region->base + xmalg + ALG_XM_FIELD(magic); - ret = cs_dsp_read_data_word(&dsp->cs_dsp, WMFW_ADSP2_XM, addr, &magic); + ret = wm_adsp_read_data_word(dsp, WMFW_ADSP2_XM, addr, &magic); if (ret < 0) return ret; @@ -1407,8 +3837,8 @@ static int wm_adsp_buffer_parse_legacy(struct wm_adsp *dsp) addr = alg_region->base + xmalg + ALG_XM_FIELD(host_buf_ptr); for (i = 0; i < 5; ++i) { - ret = cs_dsp_read_data_word(&dsp->cs_dsp, WMFW_ADSP2_XM, addr, - &buf->host_buf_ptr); + ret = wm_adsp_read_data_word(dsp, WMFW_ADSP2_XM, addr, + &buf->host_buf_ptr); if (ret < 0) return ret; @@ -1432,37 +3862,40 @@ static int wm_adsp_buffer_parse_legacy(struct wm_adsp *dsp) return 0; } -static int wm_adsp_buffer_parse_coeff(struct cs_dsp_coeff_ctl *cs_ctl) +static int wm_adsp_buffer_parse_coeff(struct wm_coeff_ctl *ctl) { struct wm_adsp_host_buf_coeff_v1 coeff_v1; struct wm_adsp_compr_buf *buf; - struct wm_adsp *dsp = container_of(cs_ctl->dsp, struct wm_adsp, cs_dsp); - unsigned int version; + unsigned int reg, version; + __be32 bufp; int ret, i; + ret = wm_coeff_base_reg(ctl, ®); + if (ret) + return ret; + for (i = 0; i < 5; ++i) { - ret = cs_dsp_coeff_read_ctrl(cs_ctl, 0, &coeff_v1, - min(cs_ctl->len, sizeof(coeff_v1))); + ret = regmap_raw_read(ctl->dsp->regmap, reg, &bufp, sizeof(bufp)); if (ret < 0) return ret; - if (coeff_v1.host_buf_ptr) + if (bufp) break; usleep_range(1000, 2000); } - if (!coeff_v1.host_buf_ptr) { - adsp_err(dsp, "Failed to acquire host buffer\n"); + if (!bufp) { + adsp_err(ctl->dsp, "Failed to acquire host buffer\n"); return -EIO; } - buf = wm_adsp_buffer_alloc(dsp); + buf = wm_adsp_buffer_alloc(ctl->dsp); if (!buf) return -ENOMEM; - buf->host_buf_mem_type = cs_ctl->alg_region.type; - buf->host_buf_ptr = be32_to_cpu(coeff_v1.host_buf_ptr); + buf->host_buf_mem_type = ctl->alg_region.type; + buf->host_buf_ptr = be32_to_cpu(bufp); ret = wm_adsp_buffer_populate(buf); if (ret < 0) @@ -1472,24 +3905,29 @@ static int wm_adsp_buffer_parse_coeff(struct cs_dsp_coeff_ctl *cs_ctl) * v0 host_buffer coefficients didn't have versioning, so if the * control is one word, assume version 0. */ - if (cs_ctl->len == 4) { + if (ctl->len == 4) { compr_dbg(buf, "host_buf_ptr=%x\n", buf->host_buf_ptr); return 0; } + ret = regmap_raw_read(ctl->dsp->regmap, reg, &coeff_v1, + sizeof(coeff_v1)); + if (ret < 0) + return ret; + version = be32_to_cpu(coeff_v1.versions) & HOST_BUF_COEFF_COMPAT_VER_MASK; version >>= HOST_BUF_COEFF_COMPAT_VER_SHIFT; if (version > HOST_BUF_COEFF_SUPPORTED_COMPAT_VER) { - adsp_err(dsp, + adsp_err(ctl->dsp, "Host buffer coeff ver %u > supported version %u\n", version, HOST_BUF_COEFF_SUPPORTED_COMPAT_VER); return -EINVAL; } - cs_dsp_remove_padding((u32 *)&coeff_v1.name, ARRAY_SIZE(coeff_v1.name)); + wm_adsp_remove_padding((u32 *)&coeff_v1.name, ARRAY_SIZE(coeff_v1.name)); - buf->name = kasprintf(GFP_KERNEL, "%s-dsp-%s", dsp->part, + buf->name = kasprintf(GFP_KERNEL, "%s-dsp-%s", ctl->dsp->part, (char *)&coeff_v1.name); compr_dbg(buf, "host_buf_ptr=%x coeff version %u\n", @@ -1500,17 +3938,17 @@ static int wm_adsp_buffer_parse_coeff(struct cs_dsp_coeff_ctl *cs_ctl) static int wm_adsp_buffer_init(struct wm_adsp *dsp) { - struct cs_dsp_coeff_ctl *cs_ctl; + struct wm_coeff_ctl *ctl; int ret; - list_for_each_entry(cs_ctl, &dsp->cs_dsp.ctl_list, list) { - if (cs_ctl->type != WMFW_CTL_TYPE_HOST_BUFFER) + list_for_each_entry(ctl, &dsp->ctl_list, list) { + if (ctl->type != WMFW_CTL_TYPE_HOST_BUFFER) continue; - if (!cs_ctl->enabled) + if (!ctl->enabled) continue; - ret = wm_adsp_buffer_parse_coeff(cs_ctl); + ret = wm_adsp_buffer_parse_coeff(ctl); if (ret < 0) { adsp_err(dsp, "Failed to parse coeff: %d\n", ret); goto error; @@ -1578,7 +4016,7 @@ int wm_adsp_compr_trigger(struct snd_soc_component *component, compr_dbg(compr, "Trigger: %d\n", cmd); - mutex_lock(&dsp->cs_dsp.pwr_lock); + mutex_lock(&dsp->pwr_lock); switch (cmd) { case SNDRV_PCM_TRIGGER_START: @@ -1614,7 +4052,7 @@ int wm_adsp_compr_trigger(struct snd_soc_component *component, break; } - mutex_unlock(&dsp->cs_dsp.pwr_lock); + mutex_unlock(&dsp->pwr_lock); return ret; } @@ -1663,7 +4101,7 @@ static int wm_adsp_buffer_update_avail(struct wm_adsp_compr_buf *buf) avail += wm_adsp_buffer_size(buf); compr_dbg(buf, "readindex=0x%x, writeindex=0x%x, avail=%d\n", - buf->read_index, write_index, avail * CS_DSP_DATA_WORD_SIZE); + buf->read_index, write_index, avail * WM_ADSP_DATA_WORD_SIZE); buf->avail = avail; @@ -1676,7 +4114,7 @@ int wm_adsp_compr_handle_irq(struct wm_adsp *dsp) struct wm_adsp_compr *compr; int ret = 0; - mutex_lock(&dsp->cs_dsp.pwr_lock); + mutex_lock(&dsp->pwr_lock); if (list_empty(&dsp->buffer_list)) { ret = -ENODEV; @@ -1714,7 +4152,7 @@ int wm_adsp_compr_handle_irq(struct wm_adsp *dsp) } out: - mutex_unlock(&dsp->cs_dsp.pwr_lock); + mutex_unlock(&dsp->pwr_lock); return ret; } @@ -1744,7 +4182,7 @@ int wm_adsp_compr_pointer(struct snd_soc_component *component, compr_dbg(compr, "Pointer request\n"); - mutex_lock(&dsp->cs_dsp.pwr_lock); + mutex_lock(&dsp->pwr_lock); buf = compr->buf; @@ -1784,11 +4222,11 @@ int wm_adsp_compr_pointer(struct snd_soc_component *component, } tstamp->copied_total = compr->copied_total; - tstamp->copied_total += buf->avail * CS_DSP_DATA_WORD_SIZE; + tstamp->copied_total += buf->avail * WM_ADSP_DATA_WORD_SIZE; tstamp->sampling_rate = compr->sample_rate; out: - mutex_unlock(&dsp->cs_dsp.pwr_lock); + mutex_unlock(&dsp->pwr_lock); return ret; } @@ -1826,12 +4264,12 @@ static int wm_adsp_buffer_capture_block(struct wm_adsp_compr *compr, int target) return 0; /* Read data from DSP */ - ret = cs_dsp_read_raw_data_block(&buf->dsp->cs_dsp, mem_type, adsp_addr, - nwords, (__be32 *)compr->raw_buf); + ret = wm_adsp_read_raw_data_block(buf->dsp, mem_type, adsp_addr, + nwords, (__be32 *)compr->raw_buf); if (ret < 0) return ret; - cs_dsp_remove_padding(compr->raw_buf, nwords); + wm_adsp_remove_padding(compr->raw_buf, nwords); /* update read index to account for words read */ buf->read_index += nwords; @@ -1863,7 +4301,7 @@ static int wm_adsp_compr_read(struct wm_adsp_compr *compr, return -EIO; } - count /= CS_DSP_DATA_WORD_SIZE; + count /= WM_ADSP_DATA_WORD_SIZE; do { nwords = wm_adsp_buffer_capture_block(compr, count); @@ -1873,7 +4311,7 @@ static int wm_adsp_compr_read(struct wm_adsp_compr *compr, return nwords; } - nbytes = nwords * CS_DSP_DATA_WORD_SIZE; + nbytes = nwords * WM_ADSP_DATA_WORD_SIZE; compr_dbg(compr, "Read %d bytes\n", nbytes); @@ -1900,22 +4338,21 @@ int wm_adsp_compr_copy(struct snd_soc_component *component, struct wm_adsp *dsp = compr->dsp; int ret; - mutex_lock(&dsp->cs_dsp.pwr_lock); + mutex_lock(&dsp->pwr_lock); if (stream->direction == SND_COMPRESS_CAPTURE) ret = wm_adsp_compr_read(compr, buf, count); else ret = -ENOTSUPP; - mutex_unlock(&dsp->cs_dsp.pwr_lock); + mutex_unlock(&dsp->pwr_lock); return ret; } EXPORT_SYMBOL_GPL(wm_adsp_compr_copy); -static void wm_adsp_fatal_error(struct cs_dsp *cs_dsp) +static void wm_adsp_fatal_error(struct wm_adsp *dsp) { - struct wm_adsp *dsp = container_of(cs_dsp, struct wm_adsp, cs_dsp); struct wm_adsp_compr *compr; dsp->fatal_error = true; @@ -1929,8 +4366,64 @@ static void wm_adsp_fatal_error(struct cs_dsp *cs_dsp) irqreturn_t wm_adsp2_bus_error(int irq, void *data) { struct wm_adsp *dsp = (struct wm_adsp *)data; + unsigned int val; + struct regmap *regmap = dsp->regmap; + int ret = 0; - cs_dsp_adsp2_bus_error(&dsp->cs_dsp); + mutex_lock(&dsp->pwr_lock); + + ret = regmap_read(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, &val); + if (ret) { + adsp_err(dsp, + "Failed to read Region Lock Ctrl register: %d\n", ret); + goto error; + } + + if (val & ADSP2_WDT_TIMEOUT_STS_MASK) { + adsp_err(dsp, "watchdog timeout error\n"); + dsp->ops->stop_watchdog(dsp); + wm_adsp_fatal_error(dsp); + } + + if (val & (ADSP2_ADDR_ERR_MASK | ADSP2_REGION_LOCK_ERR_MASK)) { + if (val & ADSP2_ADDR_ERR_MASK) + adsp_err(dsp, "bus error: address error\n"); + else + adsp_err(dsp, "bus error: region lock error\n"); + + ret = regmap_read(regmap, dsp->base + ADSP2_BUS_ERR_ADDR, &val); + if (ret) { + adsp_err(dsp, + "Failed to read Bus Err Addr register: %d\n", + ret); + goto error; + } + + adsp_err(dsp, "bus error address = 0x%x\n", + val & ADSP2_BUS_ERR_ADDR_MASK); + + ret = regmap_read(regmap, + dsp->base + ADSP2_PMEM_ERR_ADDR_XMEM_ERR_ADDR, + &val); + if (ret) { + adsp_err(dsp, + "Failed to read Pmem Xmem Err Addr register: %d\n", + ret); + goto error; + } + + adsp_err(dsp, "xmem error address = 0x%x\n", + val & ADSP2_XMEM_ERR_ADDR_MASK); + adsp_err(dsp, "pmem error address = 0x%x\n", + (val & ADSP2_PMEM_ERR_ADDR_MASK) >> + ADSP2_PMEM_ERR_ADDR_SHIFT); + } + + regmap_update_bits(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, + ADSP2_CTRL_ERR_EINT, ADSP2_CTRL_ERR_EINT); + +error: + mutex_unlock(&dsp->pwr_lock); return IRQ_HANDLED; } @@ -1939,8 +4432,55 @@ EXPORT_SYMBOL_GPL(wm_adsp2_bus_error); irqreturn_t wm_halo_bus_error(int irq, void *data) { struct wm_adsp *dsp = (struct wm_adsp *)data; + struct regmap *regmap = dsp->regmap; + unsigned int fault[6]; + struct reg_sequence clear[] = { + { dsp->base + HALO_MPU_XM_VIO_STATUS, 0x0 }, + { dsp->base + HALO_MPU_YM_VIO_STATUS, 0x0 }, + { dsp->base + HALO_MPU_PM_VIO_STATUS, 0x0 }, + }; + int ret; - cs_dsp_halo_bus_error(&dsp->cs_dsp); + mutex_lock(&dsp->pwr_lock); + + ret = regmap_read(regmap, dsp->base_sysinfo + HALO_AHBM_WINDOW_DEBUG_1, + fault); + if (ret) { + adsp_warn(dsp, "Failed to read AHB DEBUG_1: %d\n", ret); + goto exit_unlock; + } + + adsp_warn(dsp, "AHB: STATUS: 0x%x ADDR: 0x%x\n", + *fault & HALO_AHBM_FLAGS_ERR_MASK, + (*fault & HALO_AHBM_CORE_ERR_ADDR_MASK) >> + HALO_AHBM_CORE_ERR_ADDR_SHIFT); + + ret = regmap_read(regmap, dsp->base_sysinfo + HALO_AHBM_WINDOW_DEBUG_0, + fault); + if (ret) { + adsp_warn(dsp, "Failed to read AHB DEBUG_0: %d\n", ret); + goto exit_unlock; + } + + adsp_warn(dsp, "AHB: SYS_ADDR: 0x%x\n", *fault); + + ret = regmap_bulk_read(regmap, dsp->base + HALO_MPU_XM_VIO_ADDR, + fault, ARRAY_SIZE(fault)); + if (ret) { + adsp_warn(dsp, "Failed to read MPU fault info: %d\n", ret); + goto exit_unlock; + } + + adsp_warn(dsp, "XM: STATUS:0x%x ADDR:0x%x\n", fault[1], fault[0]); + adsp_warn(dsp, "YM: STATUS:0x%x ADDR:0x%x\n", fault[3], fault[2]); + adsp_warn(dsp, "PM: STATUS:0x%x ADDR:0x%x\n", fault[5], fault[4]); + + ret = regmap_multi_reg_write(dsp->regmap, clear, ARRAY_SIZE(clear)); + if (ret) + adsp_warn(dsp, "Failed to clear MPU status: %d\n", ret); + +exit_unlock: + mutex_unlock(&dsp->pwr_lock); return IRQ_HANDLED; } @@ -1950,23 +4490,99 @@ irqreturn_t wm_halo_wdt_expire(int irq, void *data) { struct wm_adsp *dsp = data; - cs_dsp_halo_wdt_expire(&dsp->cs_dsp); + mutex_lock(&dsp->pwr_lock); + + adsp_warn(dsp, "WDT Expiry Fault\n"); + dsp->ops->stop_watchdog(dsp); + wm_adsp_fatal_error(dsp); + + mutex_unlock(&dsp->pwr_lock); return IRQ_HANDLED; } EXPORT_SYMBOL_GPL(wm_halo_wdt_expire); -static const struct cs_dsp_client_ops wm_adsp1_client_ops = { - .control_add = wm_adsp_control_add, - .control_remove = wm_adsp_control_remove, +static const struct wm_adsp_ops wm_adsp1_ops = { + .validate_version = wm_adsp_validate_version, + .parse_sizes = wm_adsp1_parse_sizes, + .region_to_reg = wm_adsp_region_to_reg, }; -static const struct cs_dsp_client_ops wm_adsp2_client_ops = { - .control_add = wm_adsp_control_add, - .control_remove = wm_adsp_control_remove, - .post_run = wm_adsp_event_post_run, - .post_stop = wm_adsp_event_post_stop, - .watchdog_expired = wm_adsp_fatal_error, +static const struct wm_adsp_ops wm_adsp2_ops[] = { + { + .sys_config_size = sizeof(struct wm_adsp_system_config_xm_hdr), + .parse_sizes = wm_adsp2_parse_sizes, + .validate_version = wm_adsp_validate_version, + .setup_algs = wm_adsp2_setup_algs, + .region_to_reg = wm_adsp_region_to_reg, + + .show_fw_status = wm_adsp2_show_fw_status, + + .enable_memory = wm_adsp2_enable_memory, + .disable_memory = wm_adsp2_disable_memory, + + .enable_core = wm_adsp2_enable_core, + .disable_core = wm_adsp2_disable_core, + + .start_core = wm_adsp2_start_core, + .stop_core = wm_adsp2_stop_core, + + }, + { + .sys_config_size = sizeof(struct wm_adsp_system_config_xm_hdr), + .parse_sizes = wm_adsp2_parse_sizes, + .validate_version = wm_adsp_validate_version, + .setup_algs = wm_adsp2_setup_algs, + .region_to_reg = wm_adsp_region_to_reg, + + .show_fw_status = wm_adsp2v2_show_fw_status, + + .enable_memory = wm_adsp2_enable_memory, + .disable_memory = wm_adsp2_disable_memory, + .lock_memory = wm_adsp2_lock, + + .enable_core = wm_adsp2v2_enable_core, + .disable_core = wm_adsp2v2_disable_core, + + .start_core = wm_adsp2_start_core, + .stop_core = wm_adsp2_stop_core, + }, + { + .sys_config_size = sizeof(struct wm_adsp_system_config_xm_hdr), + .parse_sizes = wm_adsp2_parse_sizes, + .validate_version = wm_adsp_validate_version, + .setup_algs = wm_adsp2_setup_algs, + .region_to_reg = wm_adsp_region_to_reg, + + .show_fw_status = wm_adsp2v2_show_fw_status, + .stop_watchdog = wm_adsp_stop_watchdog, + + .enable_memory = wm_adsp2_enable_memory, + .disable_memory = wm_adsp2_disable_memory, + .lock_memory = wm_adsp2_lock, + + .enable_core = wm_adsp2v2_enable_core, + .disable_core = wm_adsp2v2_disable_core, + + .start_core = wm_adsp2_start_core, + .stop_core = wm_adsp2_stop_core, + }, +}; + +static const struct wm_adsp_ops wm_halo_ops = { + .sys_config_size = sizeof(struct wm_halo_system_config_xm_hdr), + .parse_sizes = wm_adsp2_parse_sizes, + .validate_version = wm_halo_validate_version, + .setup_algs = wm_halo_setup_algs, + .region_to_reg = wm_halo_region_to_reg, + + .show_fw_status = wm_halo_show_fw_status, + .stop_watchdog = wm_halo_stop_watchdog, + + .lock_memory = wm_halo_configure_mpu, + + .start_core = wm_halo_start_core, + .stop_core = wm_halo_stop_core, }; MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h index 7f4fabbc6a..f22131d9cc 100644 --- a/sound/soc/codecs/wm_adsp.h +++ b/sound/soc/codecs/wm_adsp.h @@ -10,45 +10,128 @@ #ifndef __WM_ADSP_H #define __WM_ADSP_H -#include -#include - #include #include #include +#include "wmfw.h" + /* Return values for wm_adsp_compr_handle_irq */ #define WM_ADSP_COMPR_OK 0 #define WM_ADSP_COMPR_VOICE_TRIGGER 1 +#define WM_ADSP2_REGION_0 BIT(0) +#define WM_ADSP2_REGION_1 BIT(1) +#define WM_ADSP2_REGION_2 BIT(2) +#define WM_ADSP2_REGION_3 BIT(3) +#define WM_ADSP2_REGION_4 BIT(4) +#define WM_ADSP2_REGION_5 BIT(5) +#define WM_ADSP2_REGION_6 BIT(6) +#define WM_ADSP2_REGION_7 BIT(7) +#define WM_ADSP2_REGION_8 BIT(8) +#define WM_ADSP2_REGION_9 BIT(9) +#define WM_ADSP2_REGION_1_9 (WM_ADSP2_REGION_1 | \ + WM_ADSP2_REGION_2 | WM_ADSP2_REGION_3 | \ + WM_ADSP2_REGION_4 | WM_ADSP2_REGION_5 | \ + WM_ADSP2_REGION_6 | WM_ADSP2_REGION_7 | \ + WM_ADSP2_REGION_8 | WM_ADSP2_REGION_9) +#define WM_ADSP2_REGION_ALL (WM_ADSP2_REGION_0 | WM_ADSP2_REGION_1_9) + +struct wm_adsp_region { + int type; + unsigned int base; +}; + +struct wm_adsp_alg_region { + struct list_head list; + unsigned int alg; + int type; + unsigned int base; +}; + struct wm_adsp_compr; struct wm_adsp_compr_buf; +struct wm_adsp_ops; struct wm_adsp { - struct cs_dsp cs_dsp; const char *part; + const char *name; const char *fwf_name; + int rev; + int num; + int type; + struct device *dev; + struct regmap *regmap; struct snd_soc_component *component; - unsigned int sys_config_size; + const struct wm_adsp_ops *ops; + + unsigned int base; + unsigned int base_sysinfo; + unsigned int sysclk_reg; + unsigned int sysclk_mask; + unsigned int sysclk_shift; + + struct list_head alg_regions; + + unsigned int fw_id; + unsigned int fw_id_version; + unsigned int fw_vendor_id; + + const struct wm_adsp_region *mem; + int num_mems; int fw; - - struct work_struct boot_work; + int fw_ver; bool preloaded; + bool booted; + bool running; bool fatal_error; + struct list_head ctl_list; + + struct work_struct boot_work; + struct list_head compr_list; struct list_head buffer_list; - /* - * Flag indicating the preloader widget only needs power toggled - * on state change rather than held on for the duration of the - * preload, useful for devices that can retain firmware memory - * across power down. - */ - bool toggle_preload; + struct mutex pwr_lock; + + unsigned int lock_regions; + +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_root; + char *wmfw_file_name; + char *bin_file_name; +#endif + +}; + +struct wm_adsp_ops { + unsigned int sys_config_size; + + bool (*validate_version)(struct wm_adsp *dsp, unsigned int version); + unsigned int (*parse_sizes)(struct wm_adsp *dsp, + const char * const file, + unsigned int pos, + const struct firmware *firmware); + int (*setup_algs)(struct wm_adsp *dsp); + unsigned int (*region_to_reg)(struct wm_adsp_region const *mem, + unsigned int offset); + + void (*show_fw_status)(struct wm_adsp *dsp); + void (*stop_watchdog)(struct wm_adsp *dsp); + + int (*enable_memory)(struct wm_adsp *dsp); + void (*disable_memory)(struct wm_adsp *dsp); + int (*lock_memory)(struct wm_adsp *dsp, unsigned int lock_regions); + + int (*enable_core)(struct wm_adsp *dsp); + void (*disable_core)(struct wm_adsp *dsp); + + int (*start_core)(struct wm_adsp *dsp); + void (*stop_core)(struct wm_adsp *dsp); }; #define WM_ADSP1(wname, num) \ diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index 0222370ff9..564b78f3cd 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -1026,7 +1026,7 @@ static const struct snd_soc_dai_ops wsa881x_dai_ops = { .hw_params = wsa881x_hw_params, .hw_free = wsa881x_hw_free, .mute_stream = wsa881x_digital_mute, - .set_stream = wsa881x_set_sdw_stream, + .set_sdw_stream = wsa881x_set_sdw_stream, }; static struct snd_soc_dai_driver wsa881x_dais[] = { diff --git a/sound/soc/codecs/zl38060.c b/sound/soc/codecs/zl38060.c index 6cae0fb080..d21a72314d 100644 --- a/sound/soc/codecs/zl38060.c +++ b/sound/soc/codecs/zl38060.c @@ -250,8 +250,8 @@ static int zl38_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: /* always 32 bits per frame (= 16 bits/channel, 2 channels) */ err = regmap_update_bits(priv->regmap, REG_TDMA_CFG_CLK, CFG_CLK_MASTER | CFG_CLK_PCLK_MASK, @@ -589,7 +589,9 @@ static int zl38_spi_probe(struct spi_device *spi) sizeof(template_chip), GFP_KERNEL); if (!priv->gpio_chip) return -ENOMEM; - priv->gpio_chip->parent = dev; +#ifdef CONFIG_OF_GPIO + priv->gpio_chip->of_node = dev->of_node; +#endif err = devm_gpiochip_add_data(dev, priv->gpio_chip, priv->regmap); if (err) return err; diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c index 5cb5892909..33ce257ae1 100644 --- a/sound/soc/dwc/dwc-i2s.c +++ b/sound/soc/dwc/dwc-i2s.c @@ -356,25 +356,25 @@ static int dw_i2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) struct dw_i2s_dev *dev = snd_soc_dai_get_drvdata(cpu_dai); int ret = 0; - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: if (dev->capability & DW_I2S_SLAVE) ret = 0; else ret = -EINVAL; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: if (dev->capability & DW_I2S_MASTER) ret = 0; else ret = -EINVAL; break; - case SND_SOC_DAIFMT_CBP_CFC: - case SND_SOC_DAIFMT_CBC_CFP: + case SND_SOC_DAIFMT_CBM_CFS: + case SND_SOC_DAIFMT_CBS_CFM: ret = -EINVAL; break; default: - dev_dbg(dev->dev, "dwc : Invalid clock provider format\n"); + dev_dbg(dev->dev, "dwc : Invalid master/slave format\n"); ret = -EINVAL; break; } diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 10fa387534..8e05d09279 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -311,7 +311,6 @@ config SND_SOC_FSL_ASOC_CARD select SND_SOC_FSL_ESAI select SND_SOC_FSL_SAI select SND_SOC_FSL_SSI - select SND_SOC_TLV320AIC31XX select SND_SOC_WM8994 select MFD_WM8994 help diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 5ee9455052..06107ae46e 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -26,7 +26,6 @@ #include "../codecs/wm8962.h" #include "../codecs/wm8960.h" #include "../codecs/wm8994.h" -#include "../codecs/tlv320aic31xx.h" #define CS427x_SYSCLK_MCLK 0 @@ -357,8 +356,8 @@ static int fsl_asoc_card_audmux_init(struct device_node *np, * If only 4 wires are needed, just set SSI into * synchronous mode and enable 4 PADs in IOMUX. */ - switch (priv->dai_fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: + switch (priv->dai_fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: int_ptcr = IMX_AUDMUX_V2_PTCR_RFSEL(8 | ext_port) | IMX_AUDMUX_V2_PTCR_RCSEL(8 | ext_port) | IMX_AUDMUX_V2_PTCR_TFSEL(ext_port) | @@ -368,7 +367,7 @@ static int fsl_asoc_card_audmux_init(struct device_node *np, IMX_AUDMUX_V2_PTCR_TFSDIR | IMX_AUDMUX_V2_PTCR_TCLKDIR; break; - case SND_SOC_DAIFMT_CBP_CFC: + case SND_SOC_DAIFMT_CBM_CFS: int_ptcr = IMX_AUDMUX_V2_PTCR_RCSEL(8 | ext_port) | IMX_AUDMUX_V2_PTCR_TCSEL(ext_port) | IMX_AUDMUX_V2_PTCR_RCLKDIR | @@ -378,7 +377,7 @@ static int fsl_asoc_card_audmux_init(struct device_node *np, IMX_AUDMUX_V2_PTCR_RFSDIR | IMX_AUDMUX_V2_PTCR_TFSDIR; break; - case SND_SOC_DAIFMT_CBC_CFP: + case SND_SOC_DAIFMT_CBS_CFM: int_ptcr = IMX_AUDMUX_V2_PTCR_RFSEL(8 | ext_port) | IMX_AUDMUX_V2_PTCR_TFSEL(ext_port) | IMX_AUDMUX_V2_PTCR_RFSDIR | @@ -388,7 +387,7 @@ static int fsl_asoc_card_audmux_init(struct device_node *np, IMX_AUDMUX_V2_PTCR_RCLKDIR | IMX_AUDMUX_V2_PTCR_TCLKDIR; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: ext_ptcr = IMX_AUDMUX_V2_PTCR_RFSEL(8 | int_port) | IMX_AUDMUX_V2_PTCR_RCSEL(8 | int_port) | IMX_AUDMUX_V2_PTCR_TFSEL(int_port) | @@ -534,8 +533,8 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) struct device_node *cpu_np, *codec_np, *asrc_np; struct device_node *np = pdev->dev.of_node; struct platform_device *asrc_pdev = NULL; - struct device_node *bitclkprovider = NULL; - struct device_node *frameprovider = NULL; + struct device_node *bitclkmaster = NULL; + struct device_node *framemaster = NULL; struct platform_device *cpu_pdev; struct fsl_asoc_card_priv *priv; struct device *codec_dev = NULL; @@ -618,39 +617,29 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) priv->cpu_priv.sysclk_dir[TX] = SND_SOC_CLOCK_OUT; priv->cpu_priv.sysclk_dir[RX] = SND_SOC_CLOCK_OUT; priv->cpu_priv.slot_width = 32; - priv->dai_fmt |= SND_SOC_DAIFMT_CBC_CFC; + priv->dai_fmt |= SND_SOC_DAIFMT_CBS_CFS; } else if (of_device_is_compatible(np, "fsl,imx-audio-cs427x")) { codec_dai_name = "cs4271-hifi"; priv->codec_priv.mclk_id = CS427x_SYSCLK_MCLK; - priv->dai_fmt |= SND_SOC_DAIFMT_CBP_CFP; + priv->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; } else if (of_device_is_compatible(np, "fsl,imx-audio-sgtl5000")) { codec_dai_name = "sgtl5000"; priv->codec_priv.mclk_id = SGTL5000_SYSCLK; - priv->dai_fmt |= SND_SOC_DAIFMT_CBP_CFP; + priv->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; } else if (of_device_is_compatible(np, "fsl,imx-audio-tlv320aic32x4")) { codec_dai_name = "tlv320aic32x4-hifi"; - priv->dai_fmt |= SND_SOC_DAIFMT_CBP_CFP; - } else if (of_device_is_compatible(np, "fsl,imx-audio-tlv320aic31xx")) { - codec_dai_name = "tlv320dac31xx-hifi"; - priv->dai_fmt |= SND_SOC_DAIFMT_CBS_CFS; - priv->dai_link[1].dpcm_capture = 0; - priv->dai_link[2].dpcm_capture = 0; - priv->cpu_priv.sysclk_dir[TX] = SND_SOC_CLOCK_OUT; - priv->cpu_priv.sysclk_dir[RX] = SND_SOC_CLOCK_OUT; - priv->codec_priv.mclk_id = AIC31XX_PLL_CLKIN_BCLK; - priv->card.dapm_routes = audio_map_tx; - priv->card.num_dapm_routes = ARRAY_SIZE(audio_map_tx); + priv->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; } else if (of_device_is_compatible(np, "fsl,imx-audio-wm8962")) { codec_dai_name = "wm8962"; priv->codec_priv.mclk_id = WM8962_SYSCLK_MCLK; priv->codec_priv.fll_id = WM8962_SYSCLK_FLL; priv->codec_priv.pll_id = WM8962_FLL; - priv->dai_fmt |= SND_SOC_DAIFMT_CBP_CFP; + priv->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; } else if (of_device_is_compatible(np, "fsl,imx-audio-wm8960")) { codec_dai_name = "wm8960-hifi"; priv->codec_priv.fll_id = WM8960_SYSCLK_AUTO; priv->codec_priv.pll_id = WM8960_SYSCLK_AUTO; - priv->dai_fmt |= SND_SOC_DAIFMT_CBP_CFP; + priv->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; } else if (of_device_is_compatible(np, "fsl,imx-audio-ac97")) { codec_dai_name = "ac97-hifi"; priv->dai_fmt = SND_SOC_DAIFMT_AC97; @@ -659,7 +648,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) } else if (of_device_is_compatible(np, "fsl,imx-audio-mqs")) { codec_dai_name = "fsl-mqs-dai"; priv->dai_fmt = SND_SOC_DAIFMT_LEFT_J | - SND_SOC_DAIFMT_CBC_CFC | + SND_SOC_DAIFMT_CBS_CFS | SND_SOC_DAIFMT_NB_NF; priv->dai_link[1].dpcm_capture = 0; priv->dai_link[2].dpcm_capture = 0; @@ -667,7 +656,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) priv->card.num_dapm_routes = ARRAY_SIZE(audio_map_tx); } else if (of_device_is_compatible(np, "fsl,imx-audio-wm8524")) { codec_dai_name = "wm8524-hifi"; - priv->dai_fmt |= SND_SOC_DAIFMT_CBC_CFC; + priv->dai_fmt |= SND_SOC_DAIFMT_CBS_CFS; priv->dai_link[1].dpcm_capture = 0; priv->dai_link[2].dpcm_capture = 0; priv->cpu_priv.slot_width = 32; @@ -675,12 +664,12 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) priv->card.num_dapm_routes = ARRAY_SIZE(audio_map_tx); } else if (of_device_is_compatible(np, "fsl,imx-audio-si476x")) { codec_dai_name = "si476x-codec"; - priv->dai_fmt |= SND_SOC_DAIFMT_CBC_CFC; + priv->dai_fmt |= SND_SOC_DAIFMT_CBS_CFS; priv->card.dapm_routes = audio_map_rx; priv->card.num_dapm_routes = ARRAY_SIZE(audio_map_rx); } else if (of_device_is_compatible(np, "fsl,imx-audio-wm8958")) { codec_dai_name = "wm8994-aif1"; - priv->dai_fmt |= SND_SOC_DAIFMT_CBP_CFP; + priv->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; priv->codec_priv.mclk_id = WM8994_FLL_SRC_MCLK1; priv->codec_priv.fll_id = WM8994_SYSCLK_FLL1; priv->codec_priv.pll_id = WM8994_FLL1; @@ -694,29 +683,29 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) } /* Format info from DT is optional. */ - snd_soc_daifmt_parse_clock_provider_as_phandle(np, NULL, &bitclkprovider, &frameprovider); - if (bitclkprovider || frameprovider) { + snd_soc_daifmt_parse_clock_provider_as_phandle(np, NULL, &bitclkmaster, &framemaster); + if (bitclkmaster || framemaster) { unsigned int daifmt = snd_soc_daifmt_parse_format(np, NULL); - if (codec_np == bitclkprovider) - daifmt |= (codec_np == frameprovider) ? - SND_SOC_DAIFMT_CBP_CFP : SND_SOC_DAIFMT_CBP_CFC; + if (codec_np == bitclkmaster) + daifmt |= (codec_np == framemaster) ? + SND_SOC_DAIFMT_CBM_CFM : SND_SOC_DAIFMT_CBM_CFS; else - daifmt |= (codec_np == frameprovider) ? - SND_SOC_DAIFMT_CBC_CFP : SND_SOC_DAIFMT_CBC_CFC; + daifmt |= (codec_np == framemaster) ? + SND_SOC_DAIFMT_CBS_CFM : SND_SOC_DAIFMT_CBS_CFS; /* Override dai_fmt with value from DT */ priv->dai_fmt = daifmt; } /* Change direction according to format */ - if (priv->dai_fmt & SND_SOC_DAIFMT_CBP_CFP) { + if (priv->dai_fmt & SND_SOC_DAIFMT_CBM_CFM) { priv->cpu_priv.sysclk_dir[TX] = SND_SOC_CLOCK_IN; priv->cpu_priv.sysclk_dir[RX] = SND_SOC_CLOCK_IN; } - of_node_put(bitclkprovider); - of_node_put(frameprovider); + of_node_put(bitclkmaster); + of_node_put(framemaster); if (!fsl_asoc_card_is_ac97(priv) && !codec_dev) { dev_dbg(&pdev->dev, "failed to find codec device\n"); @@ -853,7 +842,8 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(&pdev->dev, &priv->card); if (ret) { - dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n"); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", ret); goto asrc_fail; } @@ -898,7 +888,6 @@ static const struct of_device_id fsl_asoc_card_dt_ids[] = { { .compatible = "fsl,imx-audio-cs42888", }, { .compatible = "fsl,imx-audio-cs427x", }, { .compatible = "fsl,imx-audio-tlv320aic32x4", }, - { .compatible = "fsl,imx-audio-tlv320aic31xx", }, { .compatible = "fsl,imx-audio-sgtl5000", }, { .compatible = "fsl,imx-audio-wm8962", }, { .compatible = "fsl,imx-audio-wm8960", }, diff --git a/sound/soc/fsl/fsl_audmix.c b/sound/soc/fsl/fsl_audmix.c index 6dbb8c99f6..f931288e25 100644 --- a/sound/soc/fsl/fsl_audmix.c +++ b/sound/soc/fsl/fsl_audmix.c @@ -257,10 +257,10 @@ static int fsl_audmix_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } - /* For playback the AUDMIX is consumer, and for record is provider */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: - case SND_SOC_DAIFMT_CBC_CFC: + /* For playback the AUDMIX is slave, and for record is master */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + case SND_SOC_DAIFMT_CBS_CFS: break; default: return -EINVAL; diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c index 3a9e2df4e1..bda66b30e0 100644 --- a/sound/soc/fsl/fsl_esai.c +++ b/sound/soc/fsl/fsl_esai.c @@ -52,7 +52,7 @@ struct fsl_esai_soc_data { * @sck_rate: clock rate of desired SCKx clock * @hck_dir: the direction of HCKx pads * @sck_div: if using PSR/PM dividers for SCKx clock - * @consumer_mode: if fully using DAI clock consumer mode + * @slave_mode: if fully using DAI slave mode * @synchronous: if using tx/rx synchronous mode * @name: driver name */ @@ -78,7 +78,7 @@ struct fsl_esai { u32 sck_rate[2]; bool hck_dir[2]; bool sck_div[2]; - bool consumer_mode; + bool slave_mode; bool synchronous; char name[32]; }; @@ -366,8 +366,8 @@ static int fsl_esai_set_bclk(struct snd_soc_dai *dai, bool tx, u32 freq) u32 sub, ratio = hck_rate / freq; int ret; - /* Don't apply for fully consumer mode or unchanged bclk */ - if (esai_priv->consumer_mode || esai_priv->sck_rate[tx] == freq) + /* Don't apply for fully slave mode or unchanged bclk */ + if (esai_priv->slave_mode || esai_priv->sck_rate[tx] == freq) return 0; if (ratio * freq > hck_rate) @@ -476,20 +476,20 @@ static int fsl_esai_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } - esai_priv->consumer_mode = false; + esai_priv->slave_mode = false; - /* DAI clock provider masks */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBP_CFP: - esai_priv->consumer_mode = true; + /* DAI clock master masks */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + esai_priv->slave_mode = true; break; - case SND_SOC_DAIFMT_CBC_CFP: + case SND_SOC_DAIFMT_CBS_CFM: xccr |= ESAI_xCCR_xCKD; break; - case SND_SOC_DAIFMT_CBP_CFC: + case SND_SOC_DAIFMT_CBM_CFS: xccr |= ESAI_xCCR_xFSD; break; - case SND_SOC_DAIFMT_CBC_CFC: + case SND_SOC_DAIFMT_CBS_CFS: xccr |= ESAI_xCCR_xFSD | ESAI_xCCR_xCKD; break; default: @@ -1016,8 +1016,8 @@ static int fsl_esai_probe(struct platform_device *pdev) /* Set a default slot number */ esai_priv->slots = 2; - /* Set a default clock provider state */ - esai_priv->consumer_mode = true; + /* Set a default master/slave state */ + esai_priv->slave_mode = true; /* Determine the FIFO depth */ iprop = of_get_property(np, "fsl,fifo-depth", NULL); diff --git a/sound/soc/fsl/fsl_mqs.c b/sound/soc/fsl/fsl_mqs.c index ceaecbe3a2..0d4efbed41 100644 --- a/sound/soc/fsl/fsl_mqs.c +++ b/sound/soc/fsl/fsl_mqs.c @@ -102,8 +102,8 @@ static int fsl_mqs_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: break; default: return -EINVAL; diff --git a/sound/soc/fsl/fsl_rpmsg.c b/sound/soc/fsl/fsl_rpmsg.c index 8508bc7f23..d60f4dac6c 100644 --- a/sound/soc/fsl/fsl_rpmsg.c +++ b/sound/soc/fsl/fsl_rpmsg.c @@ -138,43 +138,11 @@ static const struct snd_soc_component_driver fsl_component = { .name = "fsl-rpmsg", }; -static const struct fsl_rpmsg_soc_data imx7ulp_data = { - .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 | - SNDRV_PCM_RATE_48000, - .formats = SNDRV_PCM_FMTBIT_S16_LE, -}; - -static const struct fsl_rpmsg_soc_data imx8mm_data = { - .rates = SNDRV_PCM_RATE_KNOT, - .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE | - SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_DSD_U8 | - SNDRV_PCM_FMTBIT_DSD_U16_LE | SNDRV_PCM_FMTBIT_DSD_U32_LE, -}; - -static const struct fsl_rpmsg_soc_data imx8mn_data = { - .rates = SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | - SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 | - SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 | - SNDRV_PCM_RATE_192000, - .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE | - SNDRV_PCM_FMTBIT_S32_LE, -}; - -static const struct fsl_rpmsg_soc_data imx8mp_data = { - .rates = SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | - SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 | - SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 | - SNDRV_PCM_RATE_192000, - .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE | - SNDRV_PCM_FMTBIT_S32_LE, -}; - static const struct of_device_id fsl_rpmsg_ids[] = { - { .compatible = "fsl,imx7ulp-rpmsg-audio", .data = &imx7ulp_data}, - { .compatible = "fsl,imx8mm-rpmsg-audio", .data = &imx8mm_data}, - { .compatible = "fsl,imx8mn-rpmsg-audio", .data = &imx8mn_data}, - { .compatible = "fsl,imx8mp-rpmsg-audio", .data = &imx8mp_data}, - { .compatible = "fsl,imx8ulp-rpmsg-audio", .data = &imx7ulp_data}, + { .compatible = "fsl,imx7ulp-rpmsg-audio"}, + { .compatible = "fsl,imx8mm-rpmsg-audio"}, + { .compatible = "fsl,imx8mn-rpmsg-audio"}, + { .compatible = "fsl,imx8mp-rpmsg-audio"}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, fsl_rpmsg_ids); @@ -189,13 +157,6 @@ static int fsl_rpmsg_probe(struct platform_device *pdev) if (!rpmsg) return -ENOMEM; - rpmsg->soc_data = of_device_get_match_data(&pdev->dev); - - fsl_rpmsg_dai.playback.rates = rpmsg->soc_data->rates; - fsl_rpmsg_dai.capture.rates = rpmsg->soc_data->rates; - fsl_rpmsg_dai.playback.formats = rpmsg->soc_data->formats; - fsl_rpmsg_dai.capture.formats = rpmsg->soc_data->formats; - if (of_property_read_bool(np, "fsl,enable-lpa")) { rpmsg->enable_lpa = 1; rpmsg->buffer_size = LPA_LARGE_BUFFER_SIZE; diff --git a/sound/soc/fsl/fsl_rpmsg.h b/sound/soc/fsl/fsl_rpmsg.h index b04086fbf8..4f5b49eb18 100644 --- a/sound/soc/fsl/fsl_rpmsg.h +++ b/sound/soc/fsl/fsl_rpmsg.h @@ -6,16 +6,6 @@ #ifndef __FSL_RPMSG_H #define __FSL_RPMSG_H -/* - * struct fsl_rpmsg_soc_data - * @rates: supported rates - * @formats: supported formats - */ -struct fsl_rpmsg_soc_data { - int rates; - u64 formats; -}; - /* * struct fsl_rpmsg - rpmsg private data * @@ -25,7 +15,6 @@ struct fsl_rpmsg_soc_data { * @pll8k: parent clock for multiple of 8kHz frequency * @pll11k: parent clock for multiple of 11kHz frequency * @card_pdev: Platform_device pointer to register a sound card - * @soc_data: soc specific data * @mclk_streams: Active streams that are using baudclk * @force_lpa: force enable low power audio routine if condition satisfy * @enable_lpa: enable low power audio routine according to dts setting @@ -38,7 +27,6 @@ struct fsl_rpmsg { struct clk *pll8k; struct clk *pll11k; struct platform_device *card_pdev; - const struct fsl_rpmsg_soc_data *soc_data; unsigned int mclk_streams; int force_lpa; int enable_lpa; diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 10544fa27d..38f6362099 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -297,23 +297,23 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai, return -EINVAL; } - /* DAI clock provider masks */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: + /* DAI clock master masks */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: val_cr2 |= FSL_SAI_CR2_BCD_MSTR; val_cr4 |= FSL_SAI_CR4_FSD_MSTR; - sai->is_consumer_mode = false; + sai->is_slave_mode = false; break; - case SND_SOC_DAIFMT_CBP_CFP: - sai->is_consumer_mode = true; + case SND_SOC_DAIFMT_CBM_CFM: + sai->is_slave_mode = true; break; - case SND_SOC_DAIFMT_CBC_CFP: + case SND_SOC_DAIFMT_CBS_CFM: val_cr2 |= FSL_SAI_CR2_BCD_MSTR; - sai->is_consumer_mode = false; + sai->is_slave_mode = false; break; - case SND_SOC_DAIFMT_CBP_CFC: + case SND_SOC_DAIFMT_CBM_CFS: val_cr4 |= FSL_SAI_CR4_FSD_MSTR; - sai->is_consumer_mode = true; + sai->is_slave_mode = true; break; default: return -EINVAL; @@ -356,8 +356,8 @@ static int fsl_sai_set_bclk(struct snd_soc_dai *dai, bool tx, u32 freq) u32 id; int ret = 0; - /* Don't apply to consumer mode */ - if (sai->is_consumer_mode) + /* Don't apply to slave mode */ + if (sai->is_slave_mode) return 0; /* @@ -462,7 +462,7 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, pins = DIV_ROUND_UP(channels, slots); - if (!sai->is_consumer_mode) { + if (!sai->is_slave_mode) { if (sai->bclk_ratio) ret = fsl_sai_set_bclk(cpu_dai, tx, sai->bclk_ratio * @@ -502,12 +502,12 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, val_cr4 |= FSL_SAI_CR4_CHMOD; /* - * For SAI provider mode, when Tx(Rx) sync with Rx(Tx) clock, Rx(Tx) will + * For SAI master mode, when Tx(Rx) sync with Rx(Tx) clock, Rx(Tx) will * generate bclk and frame clock for Tx(Rx), we should set RCR4(TCR4), * RCR5(TCR5) for playback(capture), or there will be sync error. */ - if (!sai->is_consumer_mode && fsl_sai_dir_is_synced(sai, adir)) { + if (!sai->is_slave_mode && fsl_sai_dir_is_synced(sai, adir)) { regmap_update_bits(sai->regmap, FSL_SAI_xCR4(!tx, ofs), FSL_SAI_CR4_SYWD_MASK | FSL_SAI_CR4_FRSZ_MASK | FSL_SAI_CR4_CHMOD_MASK, @@ -543,7 +543,7 @@ static int fsl_sai_hw_free(struct snd_pcm_substream *substream, regmap_update_bits(sai->regmap, FSL_SAI_xCR3(tx, ofs), FSL_SAI_CR3_TRCE_MASK, 0); - if (!sai->is_consumer_mode && + if (!sai->is_slave_mode && sai->mclk_streams & BIT(substream->stream)) { clk_disable_unprepare(sai->mclk_clk[sai->mclk_id[tx]]); sai->mclk_streams &= ~BIT(substream->stream); @@ -577,7 +577,7 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir) * This is a hardware bug, and will be fix in the * next sai version. */ - if (!sai->is_consumer_mode) { + if (!sai->is_slave_mode) { /* Software Reset */ regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR); /* Clear SR bit to finish the reset */ diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h index 9aaf231bc0..bc60030967 100644 --- a/sound/soc/fsl/fsl_sai.h +++ b/sound/soc/fsl/fsl_sai.h @@ -259,7 +259,7 @@ struct fsl_sai { struct clk *bus_clk; struct clk *mclk_clk[FSL_SAI_MCLK_MAX]; - bool is_consumer_mode; + bool is_slave_mode; bool is_lsb_first; bool is_dsp_mode; bool synchronous[2]; diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c index d178b479c8..1c53719bb6 100644 --- a/sound/soc/fsl/fsl_spdif.c +++ b/sound/soc/fsl/fsl_spdif.c @@ -111,7 +111,6 @@ struct spdif_mixer_control { * @dma_params_tx: DMA parameters for transmit channel * @dma_params_rx: DMA parameters for receive channel * @regcache_srpc: regcache for SRPC - * @bypass: status of bypass input to output */ struct fsl_spdif_priv { const struct fsl_spdif_soc_data *soc; @@ -134,7 +133,6 @@ struct fsl_spdif_priv { struct snd_dmaengine_dai_dma_data dma_params_rx; /* regcache for SRPC */ u32 regcache_srpc; - bool bypass; }; static struct fsl_spdif_soc_data fsl_spdif_vf610 = { @@ -188,16 +186,6 @@ static struct fsl_spdif_soc_data fsl_spdif_imx8mm = { .tx_formats = FSL_SPDIF_FORMATS_PLAYBACK, }; -static struct fsl_spdif_soc_data fsl_spdif_imx8ulp = { - .imx = true, - .shared_root_clock = true, - .raw_capture_mode = false, - .interrupts = 1, - .tx_burst = 2, /* Applied for EDMA */ - .rx_burst = 2, /* Applied for EDMA */ - .tx_formats = SNDRV_PCM_FMTBIT_S24_LE, /* Applied for EDMA */ -}; - /* Check if clk is a root clock that does not share clock source with others */ static inline bool fsl_spdif_can_set_clk_rate(struct fsl_spdif_priv *spdif, int clk) { @@ -907,69 +895,6 @@ static int fsl_spdif_rx_rcm_put(struct snd_kcontrol *kcontrol, return 0; } -static int fsl_spdif_bypass_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol); - struct fsl_spdif_priv *priv = snd_soc_dai_get_drvdata(dai); - - ucontrol->value.integer.value[0] = priv->bypass ? 1 : 0; - - return 0; -} - -static int fsl_spdif_bypass_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol); - struct fsl_spdif_priv *priv = snd_soc_dai_get_drvdata(dai); - struct snd_soc_card *card = dai->component->card; - bool set = (ucontrol->value.integer.value[0] != 0); - struct regmap *regmap = priv->regmap; - struct snd_soc_pcm_runtime *rtd; - u32 scr, mask; - int stream; - - rtd = snd_soc_get_pcm_runtime(card, card->dai_link); - - if (priv->bypass == set) - return 0; /* nothing to do */ - - if (snd_soc_dai_active(dai)) { - dev_err(dai->dev, "Cannot change BYPASS mode while stream is running.\n"); - return -EBUSY; - } - - pm_runtime_get_sync(dai->dev); - - if (set) { - /* Disable interrupts */ - regmap_update_bits(regmap, REG_SPDIF_SIE, 0xffffff, 0); - - /* Configure BYPASS mode */ - scr = SCR_TXSEL_RX | SCR_RXFIFO_OFF; - mask = SCR_RXFIFO_FSEL_MASK | SCR_RXFIFO_AUTOSYNC_MASK | - SCR_RXFIFO_CTL_MASK | SCR_RXFIFO_OFF_MASK | SCR_TXSEL_MASK; - /* Power up SPDIF module */ - mask |= SCR_LOW_POWER; - } else { - /* Power down SPDIF module, disable TX */ - scr = SCR_LOW_POWER | SCR_TXSEL_OFF; - mask = SCR_LOW_POWER | SCR_TXSEL_MASK; - } - - regmap_update_bits(regmap, REG_SPDIF_SCR, mask, scr); - - /* Disable playback & capture if BYPASS mode is enabled, enable otherwise */ - for_each_pcm_streams(stream) - rtd->pcm->streams[stream].substream_count = (set ? 0 : 1); - - priv->bypass = set; - pm_runtime_put_sync(dai->dev); - - return 0; -} - /* DPLL lock information */ static int fsl_spdif_rxrate_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) @@ -1140,15 +1065,6 @@ static struct snd_kcontrol_new fsl_spdif_ctrls[] = { .info = fsl_spdif_rxrate_info, .get = fsl_spdif_rxrate_get, }, - /* RX bypass controller */ - { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, - .name = "Bypass Mode", - .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, - .info = snd_ctl_boolean_mono_info, - .get = fsl_spdif_bypass_get, - .put = fsl_spdif_bypass_put, - }, /* User bit sync mode set/get controller */ { .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1644,7 +1560,6 @@ static const struct of_device_id fsl_spdif_dt_ids[] = { { .compatible = "fsl,imx6sx-spdif", .data = &fsl_spdif_imx6sx, }, { .compatible = "fsl,imx8qm-spdif", .data = &fsl_spdif_imx8qm, }, { .compatible = "fsl,imx8mm-spdif", .data = &fsl_spdif_imx8mm, }, - { .compatible = "fsl,imx8ulp-spdif", .data = &fsl_spdif_imx8ulp, }, {} }; MODULE_DEVICE_TABLE(of, fsl_spdif_dt_ids); diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 1169d1104b..ecbc1c365d 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -350,16 +350,16 @@ static bool fsl_ssi_is_ac97(struct fsl_ssi *ssi) SND_SOC_DAIFMT_AC97; } -static bool fsl_ssi_is_i2s_clock_provider(struct fsl_ssi *ssi) +static bool fsl_ssi_is_i2s_master(struct fsl_ssi *ssi) { - return (ssi->dai_fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) == - SND_SOC_DAIFMT_CBC_CFC; + return (ssi->dai_fmt & SND_SOC_DAIFMT_MASTER_MASK) == + SND_SOC_DAIFMT_CBS_CFS; } -static bool fsl_ssi_is_i2s_cbp_cfc(struct fsl_ssi *ssi) +static bool fsl_ssi_is_i2s_cbm_cfs(struct fsl_ssi *ssi) { - return (ssi->dai_fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) == - SND_SOC_DAIFMT_CBP_CFC; + return (ssi->dai_fmt & SND_SOC_DAIFMT_MASTER_MASK) == + SND_SOC_DAIFMT_CBM_CFS; } /** @@ -808,7 +808,7 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, u32 wl = SSI_SxCCR_WL(sample_size); int ret; - if (fsl_ssi_is_i2s_clock_provider(ssi)) { + if (fsl_ssi_is_i2s_master(ssi)) { ret = fsl_ssi_set_bclk(substream, dai, hw_params); if (ret) return ret; @@ -841,7 +841,7 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, u8 i2s_net = ssi->i2s_net; /* Normal + Network mode to send 16-bit data in 32-bit frames */ - if (fsl_ssi_is_i2s_cbp_cfc(ssi) && sample_size == 16) + if (fsl_ssi_is_i2s_cbm_cfs(ssi) && sample_size == 16) i2s_net = SSI_SCR_I2S_MODE_NORMAL | SSI_SCR_NET; /* Use Normal mode to send mono data at 1st slot of 2 slots */ @@ -865,7 +865,7 @@ static int fsl_ssi_hw_free(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(asoc_rtd_to_cpu(rtd, 0)); - if (fsl_ssi_is_i2s_clock_provider(ssi) && + if (fsl_ssi_is_i2s_master(ssi) && ssi->baudclk_streams & BIT(substream->stream)) { clk_disable_unprepare(ssi->baudclk); ssi->baudclk_streams &= ~BIT(substream->stream); @@ -891,18 +891,18 @@ static int _fsl_ssi_set_dai_fmt(struct fsl_ssi *ssi, unsigned int fmt) ssi->i2s_net = SSI_SCR_NET; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: if (IS_ERR(ssi->baudclk)) { dev_err(ssi->dev, "missing baudclk for master mode\n"); return -EINVAL; } fallthrough; - case SND_SOC_DAIFMT_CBP_CFC: + case SND_SOC_DAIFMT_CBM_CFS: ssi->i2s_net |= SSI_SCR_I2S_MODE_MASTER; break; - case SND_SOC_DAIFMT_CBP_CFP: + case SND_SOC_DAIFMT_CBM_CFM: ssi->i2s_net |= SSI_SCR_I2S_MODE_SLAVE; break; default: @@ -962,17 +962,17 @@ static int _fsl_ssi_set_dai_fmt(struct fsl_ssi *ssi, unsigned int fmt) return -EINVAL; } - /* DAI clock provider masks */ - switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { - case SND_SOC_DAIFMT_CBC_CFC: + /* DAI clock master masks */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: /* Output bit and frame sync clocks */ strcr |= SSI_STCR_TFDIR | SSI_STCR_TXDIR; scr |= SSI_SCR_SYS_CLK_EN; break; - case SND_SOC_DAIFMT_CBP_CFP: + case SND_SOC_DAIFMT_CBM_CFM: /* Input bit or frame sync clocks */ break; - case SND_SOC_DAIFMT_CBP_CFC: + case SND_SOC_DAIFMT_CBM_CFS: /* Input bit clock but output frame sync clock */ strcr |= SSI_STCR_TFDIR; break; @@ -1341,7 +1341,7 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, } } - /* Do not error out for consumer cases that live without a baud clock */ + /* Do not error out for slave cases that live without a baud clock */ ssi->baudclk = devm_clk_get(dev, "baud"); if (IS_ERR(ssi->baudclk)) dev_dbg(dev, "failed to get baud clock: %ld\n", diff --git a/sound/soc/fsl/imx-audmix.c b/sound/soc/fsl/imx-audmix.c index 502fe1b522..a364e2415d 100644 --- a/sound/soc/fsl/imx-audmix.c +++ b/sound/soc/fsl/imx-audmix.c @@ -80,8 +80,8 @@ static int imx_audmix_fe_hw_params(struct snd_pcm_substream *substream, u32 channels = params_channels(params); int ret, dir; - /* For playback the AUDMIX is consumer, and for record is provider */ - fmt |= tx ? SND_SOC_DAIFMT_CBC_CFC : SND_SOC_DAIFMT_CBP_CFP; + /* For playback the AUDMIX is slave, and for record is master */ + fmt |= tx ? SND_SOC_DAIFMT_CBS_CFS : SND_SOC_DAIFMT_CBM_CFM; dir = tx ? SND_SOC_CLOCK_OUT : SND_SOC_CLOCK_IN; /* set DAI configuration */ @@ -121,8 +121,8 @@ static int imx_audmix_be_hw_params(struct snd_pcm_substream *substream, if (!tx) return 0; - /* For playback the AUDMIX is consumer */ - fmt |= SND_SOC_DAIFMT_CBP_CFP; + /* For playback the AUDMIX is slave */ + fmt |= SND_SOC_DAIFMT_CBM_CFM; /* set AUDMIX DAI configuration */ ret = snd_soc_dai_set_fmt(asoc_rtd_to_cpu(rtd, 0), fmt); @@ -132,12 +132,12 @@ static int imx_audmix_be_hw_params(struct snd_pcm_substream *substream, return ret; } -static const struct snd_soc_ops imx_audmix_fe_ops = { +static struct snd_soc_ops imx_audmix_fe_ops = { .startup = imx_audmix_fe_startup, .hw_params = imx_audmix_fe_hw_params, }; -static const struct snd_soc_ops imx_audmix_be_ops = { +static struct snd_soc_ops imx_audmix_be_ops = { .hw_params = imx_audmix_be_hw_params, }; diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index 6f8efd838f..db94718061 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c @@ -443,12 +443,12 @@ static int imx_aif_startup(struct snd_pcm_substream *substream) return ret; } -static const struct snd_soc_ops imx_aif_ops = { +static struct snd_soc_ops imx_aif_ops = { .hw_params = imx_aif_hw_params, .startup = imx_aif_startup, }; -static const struct snd_soc_ops imx_aif_ops_be = { +static struct snd_soc_ops imx_aif_ops_be = { .hw_params = imx_aif_hw_params, }; @@ -579,8 +579,9 @@ static int imx_card_parse_of(struct imx_card_data *data) ret = snd_soc_of_get_dai_name(cpu, &link->cpus->dai_name); if (ret) { - dev_err_probe(card->dev, ret, - "%s: error getting cpu dai name\n", link->name); + if (ret != -EPROBE_DEFER) + dev_err(card->dev, "%s: error getting cpu dai name: %d\n", + link->name, ret); goto err; } @@ -588,8 +589,9 @@ static int imx_card_parse_of(struct imx_card_data *data) if (codec) { ret = snd_soc_of_get_dai_link_codecs(dev, codec, link); if (ret < 0) { - dev_err_probe(dev, ret, "%s: codec dai not found\n", - link->name); + if (ret != -EPROBE_DEFER) + dev_err(dev, "%s: codec dai not found: %d\n", + link->name, ret); goto err; } @@ -666,7 +668,7 @@ static int imx_card_parse_of(struct imx_card_data *data) NULL, &link->dai_fmt); if (ret) link->dai_fmt = SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBC_CFC | + SND_SOC_DAIFMT_CBS_CFS | SND_SOC_DAIFMT_I2S; /* Get tdm slot */ @@ -828,8 +830,11 @@ static int imx_card_probe(struct platform_device *pdev) } ret = devm_snd_soc_register_card(&pdev->dev, &data->card); - if (ret) - return dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n"); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", ret); + return ret; + } return 0; } diff --git a/sound/soc/fsl/imx-hdmi.c b/sound/soc/fsl/imx-hdmi.c index 929f69b758..ef8d7a65eb 100644 --- a/sound/soc/fsl/imx-hdmi.c +++ b/sound/soc/fsl/imx-hdmi.c @@ -59,7 +59,7 @@ static int imx_hdmi_hw_params(struct snd_pcm_substream *substream, return 0; } -static const struct snd_soc_ops imx_hdmi_ops = { +static struct snd_soc_ops imx_hdmi_ops = { .hw_params = imx_hdmi_hw_params, }; @@ -173,7 +173,7 @@ static int imx_hdmi_probe(struct platform_device *pdev) data->dai.codecs->name = "hdmi-audio-codec.1"; data->dai.dai_fmt = data->dai_fmt | SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBC_CFC; + SND_SOC_DAIFMT_CBS_CFS; } if (hdmi_in) { @@ -183,7 +183,7 @@ static int imx_hdmi_probe(struct platform_device *pdev) data->dai.codecs->name = "hdmi-audio-codec.2"; data->dai.dai_fmt = data->dai_fmt | SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBP_CFP; + SND_SOC_DAIFMT_CBM_CFM; } data->card.dapm_widgets = imx_hdmi_widgets; diff --git a/sound/soc/fsl/imx-sgtl5000.c b/sound/soc/fsl/imx-sgtl5000.c index 8daced42d5..f45cb4bbb6 100644 --- a/sound/soc/fsl/imx-sgtl5000.c +++ b/sound/soc/fsl/imx-sgtl5000.c @@ -153,7 +153,7 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) data->dai.platforms->of_node = ssi_np; data->dai.init = &imx_sgtl5000_dai_init; data->dai.dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBP_CFP; + SND_SOC_DAIFMT_CBM_CFM; data->card.dev = &pdev->dev; ret = snd_soc_of_parse_card_name(&data->card, "model"); @@ -173,7 +173,9 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(&pdev->dev, &data->card); if (ret) { - dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n"); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", + ret); goto fail; } diff --git a/sound/soc/fsl/imx-spdif.c b/sound/soc/fsl/imx-spdif.c index 4446fba755..6c4dadf603 100644 --- a/sound/soc/fsl/imx-spdif.c +++ b/sound/soc/fsl/imx-spdif.c @@ -70,8 +70,8 @@ static int imx_spdif_audio_probe(struct platform_device *pdev) goto end; ret = devm_snd_soc_register_card(&pdev->dev, &data->card); - if (ret) - dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n"); + if (ret && ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card failed: %d\n", ret); end: of_node_put(spdif_np); diff --git a/sound/soc/generic/Kconfig b/sound/soc/generic/Kconfig index b6df4e26bc..4cafcf0e2b 100644 --- a/sound/soc/generic/Kconfig +++ b/sound/soc/generic/Kconfig @@ -17,23 +17,3 @@ config SND_AUDIO_GRAPH_CARD This option enables generic simple sound card support with OF-graph DT bindings. It also support DPCM of multi CPU single Codec ststem. - -config SND_AUDIO_GRAPH_CARD2 - tristate "ASoC Audio Graph sound card2 support" - depends on OF - select SND_SIMPLE_CARD_UTILS - help - This option enables generic simple sound card2 support - with OF-graph DT bindings. - -config SND_AUDIO_GRAPH_CARD2_CUSTOM_SAMPLE - tristate "ASoC Audio Graph Card2 base custom sample support" - depends on SND_AUDIO_GRAPH_CARD2 - help - This option enables Audio Graph Card2 base custom sample - -config SND_TEST_COMPONENT - tristate "ASoC Test component sound support" - depends on OF - help - This option enables test component sound driver support. diff --git a/sound/soc/generic/Makefile b/sound/soc/generic/Makefile index 0848621565..21c29e5e06 100644 --- a/sound/soc/generic/Makefile +++ b/sound/soc/generic/Makefile @@ -2,13 +2,7 @@ snd-soc-simple-card-utils-objs := simple-card-utils.o snd-soc-simple-card-objs := simple-card.o snd-soc-audio-graph-card-objs := audio-graph-card.o -snd-soc-audio-graph-card2-objs := audio-graph-card2.o -snd-soc-audio-graph-card2-custom-sample-objs := audio-graph-card2-custom-sample.o -snd-soc-test-component-objs := test-component.o obj-$(CONFIG_SND_SIMPLE_CARD_UTILS) += snd-soc-simple-card-utils.o obj-$(CONFIG_SND_SIMPLE_CARD) += snd-soc-simple-card.o obj-$(CONFIG_SND_AUDIO_GRAPH_CARD) += snd-soc-audio-graph-card.o -obj-$(CONFIG_SND_AUDIO_GRAPH_CARD2) += snd-soc-audio-graph-card2.o -obj-$(CONFIG_SND_AUDIO_GRAPH_CARD2_CUSTOM_SAMPLE) += snd-soc-audio-graph-card2-custom-sample.o -obj-$(CONFIG_SND_TEST_COMPONENT) += snd-soc-test-component.o diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c index 2b598af8fe..546f6fd060 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -310,10 +310,8 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, * For example: FE <-> BE1 <-> BE2 <-> ... <-> BEn where * there are 'n' BE components in the path. */ - if (card->component_chaining && !soc_component_is_pcm(cpus)) { + if (card->component_chaining && !soc_component_is_pcm(cpus)) dai_link->no_pcm = 1; - dai_link->be_hw_params_fixup = asoc_simple_be_hw_params_fixup; - } asoc_simple_canonicalize_cpu(cpus, is_single_links); asoc_simple_canonicalize_platform(platforms, cpus); @@ -593,7 +591,10 @@ int audio_graph_parse_of(struct asoc_simple_priv *priv, struct device *dev) err: asoc_simple_clean_reference(card); - return dev_err_probe(dev, ret, "parse error\n"); + if (ret != -EPROBE_DEFER) + dev_err(dev, "parse error %d\n", ret); + + return ret; } EXPORT_SYMBOL_GPL(audio_graph_parse_of); diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index a81323d169..10c63b7390 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -355,9 +355,9 @@ static int asoc_simple_init_dai_link_params(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hardware hw; int i, ret, stream; - /* Only Codecs */ + /* Only codecs should have non_legacy_dai_naming set. */ for_each_rtd_components(rtd, i, component) { - if (!snd_soc_component_is_codec(component)) + if (!component->driver->non_legacy_dai_naming) return 0; } @@ -499,14 +499,57 @@ EXPORT_SYMBOL_GPL(asoc_simple_parse_widgets); int asoc_simple_parse_pin_switches(struct snd_soc_card *card, char *prefix) { + const unsigned int nb_controls_max = 16; + const char **strings, *control_name; + struct snd_kcontrol_new *controls; + struct device *dev = card->dev; + unsigned int i, nb_controls; char prop[128]; + int ret; if (!prefix) prefix = ""; snprintf(prop, sizeof(prop), "%s%s", prefix, "pin-switches"); - return snd_soc_of_parse_pin_switches(card, prop); + if (!of_property_read_bool(dev->of_node, prop)) + return 0; + + strings = devm_kcalloc(dev, nb_controls_max, + sizeof(*strings), GFP_KERNEL); + if (!strings) + return -ENOMEM; + + ret = of_property_read_string_array(dev->of_node, prop, + strings, nb_controls_max); + if (ret < 0) + return ret; + + nb_controls = (unsigned int)ret; + + controls = devm_kcalloc(dev, nb_controls, + sizeof(*controls), GFP_KERNEL); + if (!controls) + return -ENOMEM; + + for (i = 0; i < nb_controls; i++) { + control_name = devm_kasprintf(dev, GFP_KERNEL, + "%s Switch", strings[i]); + if (!control_name) + return -ENOMEM; + + controls[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER; + controls[i].name = control_name; + controls[i].info = snd_soc_dapm_info_pin_switch; + controls[i].get = snd_soc_dapm_get_pin_switch; + controls[i].put = snd_soc_dapm_put_pin_switch; + controls[i].private_value = (unsigned long)strings[i]; + } + + card->controls = controls; + card->num_controls = nb_controls; + + return 0; } EXPORT_SYMBOL_GPL(asoc_simple_parse_pin_switches); @@ -576,8 +619,7 @@ int asoc_simple_init_priv(struct asoc_simple_priv *priv, struct asoc_simple_dai *dais; struct snd_soc_dai_link_component *dlcs; struct snd_soc_codec_conf *cconf = NULL; - struct snd_soc_pcm_stream *c2c_conf = NULL; - int i, dai_num = 0, dlc_num = 0, cnf_num = 0, c2c_num = 0; + int i, dai_num = 0, dlc_num = 0, cnf_num = 0; dai_props = devm_kcalloc(dev, li->link, sizeof(*dai_props), GFP_KERNEL); dai_link = devm_kcalloc(dev, li->link, sizeof(*dai_link), GFP_KERNEL); @@ -596,8 +638,6 @@ int asoc_simple_init_priv(struct asoc_simple_priv *priv, if (!li->num[i].cpus) cnf_num += li->num[i].codecs; - - c2c_num += li->num[i].c2c; } dais = devm_kcalloc(dev, dai_num, sizeof(*dais), GFP_KERNEL); @@ -611,12 +651,6 @@ int asoc_simple_init_priv(struct asoc_simple_priv *priv, return -ENOMEM; } - if (c2c_num) { - c2c_conf = devm_kcalloc(dev, c2c_num, sizeof(*c2c_conf), GFP_KERNEL); - if (!c2c_conf) - return -ENOMEM; - } - dev_dbg(dev, "link %d, dais %d, ccnf %d\n", li->link, dai_num, cnf_num); @@ -630,7 +664,6 @@ int asoc_simple_init_priv(struct asoc_simple_priv *priv, priv->dais = dais; priv->dlcs = dlcs; priv->codec_conf = cconf; - priv->c2c_conf = c2c_conf; card->dai_link = priv->dai_link; card->num_links = li->link; @@ -648,12 +681,6 @@ int asoc_simple_init_priv(struct asoc_simple_priv *priv, dlcs += li->num[i].cpus; dais += li->num[i].cpus; - - if (li->num[i].c2c) { - /* Codec2Codec */ - dai_props[i].c2c_conf = c2c_conf; - c2c_conf += li->num[i].c2c; - } } else { /* DPCM Be's CPU = dummy */ dai_props[i].cpus = @@ -732,34 +759,6 @@ int asoc_graph_card_probe(struct snd_soc_card *card) } EXPORT_SYMBOL_GPL(asoc_graph_card_probe); -int asoc_graph_is_ports0(struct device_node *np) -{ - struct device_node *port, *ports, *ports0, *top; - int ret; - - /* np is "endpoint" or "port" */ - if (of_node_name_eq(np, "endpoint")) { - port = of_get_parent(np); - } else { - port = np; - of_node_get(port); - } - - ports = of_get_parent(port); - top = of_get_parent(ports); - ports0 = of_get_child_by_name(top, "ports"); - - ret = ports0 == ports; - - of_node_put(port); - of_node_put(ports); - of_node_put(ports0); - of_node_put(top); - - return ret; -} -EXPORT_SYMBOL_GPL(asoc_graph_is_ports0); - /* Module information */ MODULE_AUTHOR("Kuninori Morimoto "); MODULE_DESCRIPTION("ALSA SoC Simple Card Utils"); diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index 78419e1871..bc3e24c6a2 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -666,7 +666,8 @@ static int asoc_simple_probe(struct platform_device *pdev) ret = simple_parse_of(priv, li); if (ret < 0) { - dev_err_probe(dev, ret, "parse error\n"); + if (ret != -EPROBE_DEFER) + dev_err(dev, "parse error %d\n", ret); goto err; } diff --git a/sound/soc/img/img-i2s-in.c b/sound/soc/img/img-i2s-in.c index f1f36f15a5..1bf5d6edbd 100644 --- a/sound/soc/img/img-i2s-in.c +++ b/sound/soc/img/img-i2s-in.c @@ -451,9 +451,11 @@ static int img_i2s_in_probe(struct platform_device *pdev) i2s->channel_base = base + (max_i2s_chan_pow_2 * 0x20); i2s->clk_sys = devm_clk_get(dev, "sys"); - if (IS_ERR(i2s->clk_sys)) - return dev_err_probe(dev, PTR_ERR(i2s->clk_sys), - "Failed to acquire clock 'sys'\n"); + if (IS_ERR(i2s->clk_sys)) { + if (PTR_ERR(i2s->clk_sys) != -EPROBE_DEFER) + dev_err(dev, "Failed to acquire clock 'sys'\n"); + return PTR_ERR(i2s->clk_sys); + } pm_runtime_enable(&pdev->dev); if (!pm_runtime_enabled(&pdev->dev)) { diff --git a/sound/soc/img/img-i2s-out.c b/sound/soc/img/img-i2s-out.c index 28f48ca150..4f90d36dc7 100644 --- a/sound/soc/img/img-i2s-out.c +++ b/sound/soc/img/img-i2s-out.c @@ -457,19 +457,25 @@ static int img_i2s_out_probe(struct platform_device *pdev) i2s->channel_base = base + (max_i2s_chan_pow_2 * 0x20); i2s->rst = devm_reset_control_get_exclusive(&pdev->dev, "rst"); - if (IS_ERR(i2s->rst)) - return dev_err_probe(&pdev->dev, PTR_ERR(i2s->rst), - "No top level reset found\n"); + if (IS_ERR(i2s->rst)) { + if (PTR_ERR(i2s->rst) != -EPROBE_DEFER) + dev_err(&pdev->dev, "No top level reset found\n"); + return PTR_ERR(i2s->rst); + } i2s->clk_sys = devm_clk_get(&pdev->dev, "sys"); - if (IS_ERR(i2s->clk_sys)) - return dev_err_probe(dev, PTR_ERR(i2s->clk_sys), - "Failed to acquire clock 'sys'\n"); + if (IS_ERR(i2s->clk_sys)) { + if (PTR_ERR(i2s->clk_sys) != -EPROBE_DEFER) + dev_err(dev, "Failed to acquire clock 'sys'\n"); + return PTR_ERR(i2s->clk_sys); + } i2s->clk_ref = devm_clk_get(&pdev->dev, "ref"); - if (IS_ERR(i2s->clk_ref)) - return dev_err_probe(dev, PTR_ERR(i2s->clk_ref), - "Failed to acquire clock 'ref'\n"); + if (IS_ERR(i2s->clk_ref)) { + if (PTR_ERR(i2s->clk_ref) != -EPROBE_DEFER) + dev_err(dev, "Failed to acquire clock 'ref'\n"); + return PTR_ERR(i2s->clk_ref); + } i2s->suspend_ch_ctl = devm_kcalloc(dev, i2s->max_i2s_chan, sizeof(*i2s->suspend_ch_ctl), GFP_KERNEL); diff --git a/sound/soc/img/img-parallel-out.c b/sound/soc/img/img-parallel-out.c index 800f247283..ce0f08d377 100644 --- a/sound/soc/img/img-parallel-out.c +++ b/sound/soc/img/img-parallel-out.c @@ -229,19 +229,25 @@ static int img_prl_out_probe(struct platform_device *pdev) prl->base = base; prl->rst = devm_reset_control_get_exclusive(&pdev->dev, "rst"); - if (IS_ERR(prl->rst)) - return dev_err_probe(&pdev->dev, PTR_ERR(prl->rst), - "No top level reset found\n"); + if (IS_ERR(prl->rst)) { + if (PTR_ERR(prl->rst) != -EPROBE_DEFER) + dev_err(&pdev->dev, "No top level reset found\n"); + return PTR_ERR(prl->rst); + } prl->clk_sys = devm_clk_get(&pdev->dev, "sys"); - if (IS_ERR(prl->clk_sys)) - return dev_err_probe(dev, PTR_ERR(prl->clk_sys), - "Failed to acquire clock 'sys'\n"); + if (IS_ERR(prl->clk_sys)) { + if (PTR_ERR(prl->clk_sys) != -EPROBE_DEFER) + dev_err(dev, "Failed to acquire clock 'sys'\n"); + return PTR_ERR(prl->clk_sys); + } prl->clk_ref = devm_clk_get(&pdev->dev, "ref"); - if (IS_ERR(prl->clk_ref)) - return dev_err_probe(dev, PTR_ERR(prl->clk_ref), - "Failed to acquire clock 'ref'\n"); + if (IS_ERR(prl->clk_ref)) { + if (PTR_ERR(prl->clk_ref) != -EPROBE_DEFER) + dev_err(dev, "Failed to acquire clock 'ref'\n"); + return PTR_ERR(prl->clk_ref); + } ret = clk_prepare_enable(prl->clk_sys); if (ret) diff --git a/sound/soc/img/img-spdif-in.c b/sound/soc/img/img-spdif-in.c index 95914d0612..6364eb742f 100644 --- a/sound/soc/img/img-spdif-in.c +++ b/sound/soc/img/img-spdif-in.c @@ -739,9 +739,11 @@ static int img_spdif_in_probe(struct platform_device *pdev) spdif->base = base; spdif->clk_sys = devm_clk_get(dev, "sys"); - if (IS_ERR(spdif->clk_sys)) - return dev_err_probe(dev, PTR_ERR(spdif->clk_sys), - "Failed to acquire clock 'sys'\n"); + if (IS_ERR(spdif->clk_sys)) { + if (PTR_ERR(spdif->clk_sys) != -EPROBE_DEFER) + dev_err(dev, "Failed to acquire clock 'sys'\n"); + return PTR_ERR(spdif->clk_sys); + } pm_runtime_enable(&pdev->dev); if (!pm_runtime_enabled(&pdev->dev)) { diff --git a/sound/soc/img/img-spdif-out.c b/sound/soc/img/img-spdif-out.c index c3189d9ff7..858e1b8538 100644 --- a/sound/soc/img/img-spdif-out.c +++ b/sound/soc/img/img-spdif-out.c @@ -342,19 +342,25 @@ static int img_spdif_out_probe(struct platform_device *pdev) spdif->base = base; spdif->rst = devm_reset_control_get_exclusive(&pdev->dev, "rst"); - if (IS_ERR(spdif->rst)) - return dev_err_probe(&pdev->dev, PTR_ERR(spdif->rst), - "No top level reset found\n"); + if (IS_ERR(spdif->rst)) { + if (PTR_ERR(spdif->rst) != -EPROBE_DEFER) + dev_err(&pdev->dev, "No top level reset found\n"); + return PTR_ERR(spdif->rst); + } spdif->clk_sys = devm_clk_get(&pdev->dev, "sys"); - if (IS_ERR(spdif->clk_sys)) - return dev_err_probe(dev, PTR_ERR(spdif->clk_sys), - "Failed to acquire clock 'sys'\n"); + if (IS_ERR(spdif->clk_sys)) { + if (PTR_ERR(spdif->clk_sys) != -EPROBE_DEFER) + dev_err(dev, "Failed to acquire clock 'sys'\n"); + return PTR_ERR(spdif->clk_sys); + } spdif->clk_ref = devm_clk_get(&pdev->dev, "ref"); - if (IS_ERR(spdif->clk_ref)) - return dev_err_probe(dev, PTR_ERR(spdif->clk_ref), - "Failed to acquire clock 'ref'\n"); + if (IS_ERR(spdif->clk_ref)) { + if (PTR_ERR(spdif->clk_ref) != -EPROBE_DEFER) + dev_err(dev, "Failed to acquire clock 'ref'\n"); + return PTR_ERR(spdif->clk_ref); + } pm_runtime_enable(&pdev->dev); if (!pm_runtime_enabled(&pdev->dev)) { diff --git a/sound/soc/img/pistachio-internal-dac.c b/sound/soc/img/pistachio-internal-dac.c index 802c0ee63a..fe181c2e51 100644 --- a/sound/soc/img/pistachio-internal-dac.c +++ b/sound/soc/img/pistachio-internal-dac.c @@ -161,9 +161,12 @@ static int pistachio_internal_dac_probe(struct platform_device *pdev) return PTR_ERR(dac->regmap); dac->supply = devm_regulator_get(dev, "VDD"); - if (IS_ERR(dac->supply)) - return dev_err_probe(dev, PTR_ERR(dac->supply), - "failed to acquire supply 'VDD-supply'\n"); + if (IS_ERR(dac->supply)) { + ret = PTR_ERR(dac->supply); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to acquire supply 'VDD-supply': %d\n", ret); + return ret; + } ret = regulator_enable(dac->supply); if (ret) { diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index a56dd48c04..5db2f4865b 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -653,21 +653,10 @@ static snd_pcm_uframes_t sst_soc_pointer(struct snd_soc_component *component, dev_err(rtd->dev, "sst: error code = %d\n", ret_val); return ret_val; } + substream->runtime->delay = str_info->pcm_delay; return str_info->buffer_ptr; } -static snd_pcm_sframes_t sst_soc_delay(struct snd_soc_component *component, - struct snd_pcm_substream *substream) -{ - struct sst_runtime_stream *stream = substream->runtime->private_data; - struct pcm_stream_info *str_info = &stream->stream_info; - - if (sst_get_stream_status(stream) == SST_PLATFORM_INIT) - return 0; - - return str_info->pcm_delay; -} - static int sst_soc_pcm_new(struct snd_soc_component *component, struct snd_soc_pcm_runtime *rtd) { @@ -706,7 +695,6 @@ static const struct snd_soc_component_driver sst_soc_platform_drv = { .open = sst_soc_open, .trigger = sst_soc_trigger, .pointer = sst_soc_pointer, - .delay = sst_soc_delay, .compress_ops = &sst_platform_compress_ops, .pcm_construct = sst_soc_pcm_new, }; diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index 34ccefcc30..61b71d6c44 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -371,7 +371,7 @@ config SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH config SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH tristate "KBL with DA7219 and MAX98357A in I2S Mode" - depends on I2C && ACPI && GPIOLIB + depends on I2C && ACPI depends on MFD_INTEL_LPSS || COMPILE_TEST select SND_SOC_INTEL_DA7219_MAX98357A_GENERIC help @@ -427,7 +427,6 @@ config SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH depends on MFD_INTEL_LPSS || COMPILE_TEST depends on SND_HDA_CODEC_HDMI && SND_SOC_SOF_HDA_AUDIO_CODEC select SND_SOC_RT5682_I2C - select SND_SOC_RT5682S select SND_SOC_MAX98357A select SND_SOC_DMIC select SND_SOC_HDAC_HDMI @@ -467,12 +466,10 @@ config SND_SOC_INTEL_SOF_RT5682_MACH (MFD_INTEL_LPSS || COMPILE_TEST)) ||\ (SND_SOC_SOF_BAYTRAIL && (X86_INTEL_LPSS || COMPILE_TEST)) select SND_SOC_MAX98373_I2C - select SND_SOC_MAX98390 select SND_SOC_RT1011 select SND_SOC_RT1015 select SND_SOC_RT1015P select SND_SOC_RT5682_I2C - select SND_SOC_RT5682S select SND_SOC_DMIC select SND_SOC_HDAC_HDMI select SND_SOC_INTEL_HDA_DSP_COMMON @@ -514,39 +511,6 @@ config SND_SOC_INTEL_SOF_PCM512x_MACH Say Y or m if you have such a device. If unsure select "N". -config SND_SOC_INTEL_SOF_ES8336_MACH - tristate "SOF with ES8336 codec in I2S mode" - depends on I2C && ACPI && GPIOLIB - depends on MFD_INTEL_LPSS || COMPILE_TEST - depends on SND_HDA_CODEC_HDMI && SND_SOC_SOF_HDA_AUDIO_CODEC - select SND_SOC_ES8316 - select SND_SOC_DMIC - select SND_SOC_INTEL_HDA_DSP_COMMON - help - This adds support for ASoC machine driver for SOF platforms - with es8336 codec. - Say Y if you have such a device. - If unsure select "N". - -config SND_SOC_INTEL_SOF_NAU8825_MACH - tristate "SOF with nau8825 codec in I2S Mode" - depends on I2C && ACPI && GPIOLIB - depends on ((SND_HDA_CODEC_HDMI && SND_SOC_SOF_HDA_AUDIO_CODEC) &&\ - (MFD_INTEL_LPSS || COMPILE_TEST)) - select SND_SOC_NAU8825 - select SND_SOC_RT1015P - select SND_SOC_MAX98373_I2C - select SND_SOC_MAX98357A - select SND_SOC_DMIC - select SND_SOC_HDAC_HDMI - select SND_SOC_INTEL_HDA_DSP_COMMON - select SND_SOC_INTEL_SOF_MAXIM_COMMON - help - This adds support for ASoC machine driver for SOF platforms - with nau8825 codec. - Say Y if you have such a device. - If unsure select "N". - endif ## SND_SOC_SOF_HDA_LINK || SND_SOC_SOF_BAYTRAIL if (SND_SOC_SOF_COMETLAKE && SND_SOC_SOF_HDA_LINK) diff --git a/sound/soc/intel/boards/Makefile b/sound/soc/intel/boards/Makefile index 3ea273d271..ed21b82a4c 100644 --- a/sound/soc/intel/boards/Makefile +++ b/sound/soc/intel/boards/Makefile @@ -21,8 +21,6 @@ snd-soc-sst-byt-cht-es8316-objs := bytcht_es8316.o snd-soc-sst-byt-cht-nocodec-objs := bytcht_nocodec.o snd-soc-sof_rt5682-objs := sof_rt5682.o sof_realtek_common.o snd-soc-sof_cs42l42-objs := sof_cs42l42.o -snd-soc-sof_es8336-objs := sof_es8336.o -snd-soc-sof_nau8825-objs := sof_nau8825.o sof_realtek_common.o snd-soc-cml_rt1011_rt5682-objs := cml_rt1011_rt5682.o snd-soc-kbl_da7219_max98357a-objs := kbl_da7219_max98357a.o snd-soc-kbl_da7219_max98927-objs := kbl_da7219_max98927.o @@ -44,8 +42,6 @@ snd-soc-sof-sdw-objs += sof_sdw.o \ sof_sdw_dmic.o sof_sdw_hdmi.o obj-$(CONFIG_SND_SOC_INTEL_SOF_RT5682_MACH) += snd-soc-sof_rt5682.o obj-$(CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH) += snd-soc-sof_cs42l42.o -obj-$(CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH) += snd-soc-sof_es8336.o -obj-$(CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH) += snd-soc-sof_nau8825.o obj-$(CONFIG_SND_SOC_INTEL_HASWELL_MACH) += snd-soc-sst-haswell.o obj-$(CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_COMMON) += snd-soc-sst-bxt-da7219_max98357a.o obj-$(CONFIG_SND_SOC_INTEL_BXT_RT298_MACH) += snd-soc-sst-bxt-rt298.o diff --git a/sound/soc/intel/boards/bytcht_cx2072x.c b/sound/soc/intel/boards/bytcht_cx2072x.c index ffd497a5b5..a9e51bbf01 100644 --- a/sound/soc/intel/boards/bytcht_cx2072x.c +++ b/sound/soc/intel/boards/bytcht_cx2072x.c @@ -126,7 +126,7 @@ static int byt_cht_cx2072x_fixup(struct snd_soc_pcm_runtime *rtd, ret = snd_soc_dai_set_fmt(asoc_rtd_to_cpu(rtd, 0), SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBC_CFC); + SND_SOC_DAIFMT_CBS_CFS); if (ret < 0) { dev_err(rtd->dev, "can't set format to I2S, err %d\n", ret); return ret; @@ -147,7 +147,7 @@ static int byt_cht_cx2072x_aif1_startup(struct snd_pcm_substream *substream) SNDRV_PCM_HW_PARAM_RATE, 48000); } -static const struct snd_soc_ops byt_cht_cx2072x_aif1_ops = { +static struct snd_soc_ops byt_cht_cx2072x_aif1_ops = { .startup = byt_cht_cx2072x_aif1_startup, }; @@ -195,7 +195,7 @@ static struct snd_soc_dai_link byt_cht_cx2072x_dais[] = { .id = 0, .no_pcm = 1, .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBC_CFC, + | SND_SOC_DAIFMT_CBS_CFS, .init = byt_cht_cx2072x_init, .be_hw_params_fixup = byt_cht_cx2072x_fixup, .dpcm_playback = 1, diff --git a/sound/soc/intel/boards/bytcht_nocodec.c b/sound/soc/intel/boards/bytcht_nocodec.c index 115c2bcaab..9b48fe701a 100644 --- a/sound/soc/intel/boards/bytcht_nocodec.c +++ b/sound/soc/intel/boards/bytcht_nocodec.c @@ -61,7 +61,7 @@ static int codec_fixup(struct snd_soc_pcm_runtime *rtd, ret = snd_soc_dai_set_fmt(asoc_rtd_to_cpu(rtd, 0), SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBC_CFC); + SND_SOC_DAIFMT_CBS_CFS); if (ret < 0) { dev_err(rtd->dev, "can't set format to I2S, err %d\n", ret); @@ -93,7 +93,7 @@ static int aif1_startup(struct snd_pcm_substream *substream) &constraints_48000); } -static const struct snd_soc_ops aif1_ops = { +static struct snd_soc_ops aif1_ops = { .startup = aif1_startup, }; @@ -141,7 +141,7 @@ static struct snd_soc_dai_link dais[] = { .id = 0, .no_pcm = 1, .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBC_CFC, + | SND_SOC_DAIFMT_CBS_CFS, .be_hw_params_fixup = codec_fixup, .ignore_suspend = 1, .dpcm_playback = 1, diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 2ace32c03e..a6e837290c 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -40,8 +40,6 @@ enum { BYT_RT5640_NO_INTERNAL_MIC_MAP, }; -#define RT5640_JD_SRC_EXT_GPIO 0x0f - enum { BYT_RT5640_JD_SRC_GPIO1 = (RT5640_JD_SRC_GPIO1 << 4), BYT_RT5640_JD_SRC_JD1_IN4P = (RT5640_JD_SRC_JD1_IN4P << 4), @@ -49,7 +47,6 @@ enum { BYT_RT5640_JD_SRC_GPIO2 = (RT5640_JD_SRC_GPIO2 << 4), BYT_RT5640_JD_SRC_GPIO3 = (RT5640_JD_SRC_GPIO3 << 4), BYT_RT5640_JD_SRC_GPIO4 = (RT5640_JD_SRC_GPIO4 << 4), - BYT_RT5640_JD_SRC_EXT_GPIO = (RT5640_JD_SRC_EXT_GPIO << 4) }; enum { @@ -82,7 +79,6 @@ enum { #define BYT_RT5640_LINEOUT_AS_HP2 BIT(26) #define BYT_RT5640_HSMIC2_ON_IN1 BIT(27) #define BYT_RT5640_JD_HP_ELITEP_1000G2 BIT(28) -#define BYT_RT5640_USE_AMCR0F28 BIT(29) #define BYTCR_INPUT_DEFAULTS \ (BYT_RT5640_IN3_MAP | \ @@ -97,7 +93,6 @@ enum { struct byt_rt5640_private { struct snd_soc_jack jack; struct snd_soc_jack jack2; - struct rt5640_set_jack_data jack_data; struct gpio_desc *hsmic_detect; struct clk *mclk; struct device *codec_dev; @@ -274,10 +269,13 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w, return -EIO; if (SND_SOC_DAPM_EVENT_ON(event)) { - ret = clk_prepare_enable(priv->mclk); - if (ret < 0) { - dev_err(card->dev, "could not configure MCLK state\n"); - return ret; + if (byt_rt5640_quirk & BYT_RT5640_MCLK_EN) { + ret = clk_prepare_enable(priv->mclk); + if (ret < 0) { + dev_err(card->dev, + "could not configure MCLK state\n"); + return ret; + } } ret = byt_rt5640_prepare_and_enable_pll1(codec_dai, 48000); } else { @@ -289,8 +287,10 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w, ret = snd_soc_dai_set_sysclk(codec_dai, RT5640_SCLK_S_RCCLK, 48000 * 512, SND_SOC_CLOCK_IN); - if (!ret) - clk_disable_unprepare(priv->mclk); + if (!ret) { + if (byt_rt5640_quirk & BYT_RT5640_MCLK_EN) + clk_disable_unprepare(priv->mclk); + } } if (ret < 0) { @@ -602,8 +602,7 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_OVCD_TH_2000UA | BYT_RT5640_OVCD_SF_0P75 | BYT_RT5640_SSP0_AIF1 | - BYT_RT5640_MCLK_EN | - BYT_RT5640_USE_AMCR0F28), + BYT_RT5640_MCLK_EN), }, { .matches = { @@ -630,19 +629,6 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, - { - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "TF103C"), - }, - .driver_data = (void *)(BYT_RT5640_IN1_MAP | - BYT_RT5640_JD_SRC_EXT_GPIO | - BYT_RT5640_OVCD_TH_2000UA | - BYT_RT5640_OVCD_SF_0P75 | - BYT_RT5640_SSP0_AIF1 | - BYT_RT5640_MCLK_EN | - BYT_RT5640_USE_AMCR0F28), - }, { /* Chuwi Vi8 (CWI506) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"), @@ -1099,11 +1085,9 @@ static int byt_rt5640_add_codec_device_props(struct device *i2c_dev, } if (BYT_RT5640_JDSRC(byt_rt5640_quirk)) { - if (BYT_RT5640_JDSRC(byt_rt5640_quirk) != RT5640_JD_SRC_EXT_GPIO) { - props[cnt++] = PROPERTY_ENTRY_U32( - "realtek,jack-detect-source", - BYT_RT5640_JDSRC(byt_rt5640_quirk)); - } + props[cnt++] = PROPERTY_ENTRY_U32( + "realtek,jack-detect-source", + BYT_RT5640_JDSRC(byt_rt5640_quirk)); props[cnt++] = PROPERTY_ENTRY_U32( "realtek,over-current-threshold-microamp", @@ -1130,51 +1114,6 @@ static int byt_rt5640_add_codec_device_props(struct device *i2c_dev, return ret; } -/* Some Android devs specify IRQs/GPIOS in a special AMCR0F28 ACPI device */ -static const struct acpi_gpio_params amcr0f28_jd_gpio = { 1, 0, false }; - -static const struct acpi_gpio_mapping amcr0f28_gpios[] = { - { "rt5640-jd-gpios", &amcr0f28_jd_gpio, 1 }, - { } -}; - -static int byt_rt5640_get_amcr0f28_settings(struct snd_soc_card *card) -{ - struct byt_rt5640_private *priv = snd_soc_card_get_drvdata(card); - struct rt5640_set_jack_data *data = &priv->jack_data; - struct acpi_device *adev; - int ret = 0; - - adev = acpi_dev_get_first_match_dev("AMCR0F28", "1", -1); - if (!adev) { - dev_err(card->dev, "error cannot find AMCR0F28 adev\n"); - return -ENOENT; - } - - data->codec_irq_override = acpi_dev_gpio_irq_get(adev, 0); - if (data->codec_irq_override < 0) { - ret = data->codec_irq_override; - dev_err(card->dev, "error %d getting codec IRQ\n", ret); - goto put_adev; - } - - if (BYT_RT5640_JDSRC(byt_rt5640_quirk) == RT5640_JD_SRC_EXT_GPIO) { - acpi_dev_add_driver_gpios(adev, amcr0f28_gpios); - data->jd_gpio = devm_fwnode_gpiod_get(card->dev, acpi_fwnode_handle(adev), - "rt5640-jd", GPIOD_IN, "rt5640-jd"); - acpi_dev_remove_driver_gpios(adev); - - if (IS_ERR(data->jd_gpio)) { - ret = PTR_ERR(data->jd_gpio); - dev_err(card->dev, "error %d getting jd GPIO\n", ret); - } - } - -put_adev: - acpi_dev_put(adev); - return ret; -} - static int byt_rt5640_init(struct snd_soc_pcm_runtime *runtime) { struct snd_soc_card *card = runtime->card; @@ -1278,25 +1217,30 @@ static int byt_rt5640_init(struct snd_soc_pcm_runtime *runtime) return ret; } - /* - * The firmware might enable the clock at boot (this information - * may or may not be reflected in the enable clock register). - * To change the rate we must disable the clock first to cover - * these cases. Due to common clock framework restrictions that - * do not allow to disable a clock that has not been enabled, - * we need to enable the clock first. - */ - ret = clk_prepare_enable(priv->mclk); - if (!ret) - clk_disable_unprepare(priv->mclk); + if (byt_rt5640_quirk & BYT_RT5640_MCLK_EN) { + /* + * The firmware might enable the clock at + * boot (this information may or may not + * be reflected in the enable clock register). + * To change the rate we must disable the clock + * first to cover these cases. Due to common + * clock framework restrictions that do not allow + * to disable a clock that has not been enabled, + * we need to enable the clock first. + */ + ret = clk_prepare_enable(priv->mclk); + if (!ret) + clk_disable_unprepare(priv->mclk); - if (byt_rt5640_quirk & BYT_RT5640_MCLK_25MHZ) - ret = clk_set_rate(priv->mclk, 25000000); - else - ret = clk_set_rate(priv->mclk, 19200000); - if (ret) { - dev_err(card->dev, "unable to set MCLK rate\n"); - return ret; + if (byt_rt5640_quirk & BYT_RT5640_MCLK_25MHZ) + ret = clk_set_rate(priv->mclk, 25000000); + else + ret = clk_set_rate(priv->mclk, 19200000); + + if (ret) { + dev_err(card->dev, "unable to set MCLK rate\n"); + return ret; + } } if (BYT_RT5640_JDSRC(byt_rt5640_quirk)) { @@ -1310,14 +1254,7 @@ static int byt_rt5640_init(struct snd_soc_pcm_runtime *runtime) } snd_jack_set_key(priv->jack.jack, SND_JACK_BTN_0, KEY_PLAYPAUSE); - - if (byt_rt5640_quirk & BYT_RT5640_USE_AMCR0F28) { - ret = byt_rt5640_get_amcr0f28_settings(card); - if (ret) - return ret; - } - - snd_soc_component_set_jack(component, &priv->jack, &priv->jack_data); + snd_soc_component_set_jack(component, &priv->jack, NULL); } if (byt_rt5640_quirk & BYT_RT5640_JD_HP_ELITEP_1000G2) { @@ -1399,7 +1336,7 @@ static int byt_rt5640_codec_fixup(struct snd_soc_pcm_runtime *rtd, ret = snd_soc_dai_set_fmt(asoc_rtd_to_cpu(rtd, 0), SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBC_CFC); + SND_SOC_DAIFMT_CBS_CFS); if (ret < 0) { dev_err(rtd->dev, "can't set format to I2S, err %d\n", ret); return ret; @@ -1474,7 +1411,7 @@ static struct snd_soc_dai_link byt_rt5640_dais[] = { .id = 0, .no_pcm = 1, .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBC_CFC, + | SND_SOC_DAIFMT_CBS_CFS, .be_hw_params_fixup = byt_rt5640_codec_fixup, .dpcm_playback = 1, .dpcm_capture = 1, @@ -1521,8 +1458,7 @@ static int byt_rt5640_resume(struct snd_soc_card *card) for_each_card_components(card, component) { if (!strcmp(component->name, byt_rt5640_codec_name)) { dev_dbg(component->dev, "re-enabling jack detect after resume\n"); - snd_soc_component_set_jack(component, &priv->jack, - &priv->jack_data); + snd_soc_component_set_jack(component, &priv->jack, NULL); break; } } @@ -1559,12 +1495,12 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; static const char * const map_name[] = { "dmic1", "dmic2", "in1", "in3", "none" }; - struct snd_soc_acpi_mach *mach = dev_get_platdata(dev); __maybe_unused const char *spk_type; const struct dmi_system_id *dmi_id; const char *headset2_string = ""; const char *lineout_string = ""; struct byt_rt5640_private *priv; + struct snd_soc_acpi_mach *mach; const char *platform_name; struct acpi_device *adev; struct device *codec_dev; @@ -1575,12 +1511,13 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) int aif; is_bytcr = false; - priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; /* register the soc card */ - byt_rt5640_card.dev = dev; + byt_rt5640_card.dev = &pdev->dev; + mach = byt_rt5640_card.dev->platform_data; snd_soc_card_set_drvdata(&byt_rt5640_card, priv); /* fix index of codec dai */ @@ -1600,7 +1537,7 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) put_device(&adev->dev); byt_rt5640_dais[dai_index].codecs->name = byt_rt5640_codec_name; } else { - dev_err(dev, "Error cannot find '%s' dev\n", mach->id); + dev_err(&pdev->dev, "Error cannot find '%s' dev\n", mach->id); return -ENXIO; } @@ -1643,13 +1580,13 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) &pkg_ctx); if (pkg_found) { if (chan_package.aif_value == 1) { - dev_info(dev, "BIOS Routing: AIF1 connected\n"); + dev_info(&pdev->dev, "BIOS Routing: AIF1 connected\n"); byt_rt5640_quirk |= BYT_RT5640_SSP0_AIF1; } else if (chan_package.aif_value == 2) { - dev_info(dev, "BIOS Routing: AIF2 connected\n"); + dev_info(&pdev->dev, "BIOS Routing: AIF2 connected\n"); byt_rt5640_quirk |= BYT_RT5640_SSP0_AIF2; } else { - dev_info(dev, "BIOS Routing isn't valid, ignored\n"); + dev_info(&pdev->dev, "BIOS Routing isn't valid, ignored\n"); pkg_found = false; } } @@ -1673,7 +1610,7 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) if (dmi_id) byt_rt5640_quirk = (unsigned long)dmi_id->driver_data; if (quirk_override != -1) { - dev_info(dev, "Overriding quirk 0x%lx => 0x%x\n", + dev_info(&pdev->dev, "Overriding quirk 0x%lx => 0x%x\n", byt_rt5640_quirk, quirk_override); byt_rt5640_quirk = quirk_override; } @@ -1687,12 +1624,12 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) acpi_dev_add_driver_gpios(ACPI_COMPANION(priv->codec_dev), byt_rt5640_hp_elitepad_1000g2_gpios); - priv->hsmic_detect = devm_fwnode_gpiod_get(dev, codec_dev->fwnode, + priv->hsmic_detect = devm_fwnode_gpiod_get(&pdev->dev, codec_dev->fwnode, "headset-mic-detect", GPIOD_IN, "headset-mic-detect"); if (IS_ERR(priv->hsmic_detect)) { - ret_val = dev_err_probe(dev, PTR_ERR(priv->hsmic_detect), - "getting hsmic-detect GPIO\n"); + ret_val = PTR_ERR(priv->hsmic_detect); + dev_err_probe(&pdev->dev, ret_val, "getting hsmic-detect GPIO\n"); goto err_device; } } @@ -1702,7 +1639,7 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) if (ret_val) goto err_remove_gpios; - log_quirks(dev); + log_quirks(&pdev->dev); if ((byt_rt5640_quirk & BYT_RT5640_SSP2_AIF2) || (byt_rt5640_quirk & BYT_RT5640_SSP0_AIF2)) { @@ -1717,18 +1654,23 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) byt_rt5640_dais[dai_index].cpus->dai_name = "ssp0-port"; if (byt_rt5640_quirk & BYT_RT5640_MCLK_EN) { - priv->mclk = devm_clk_get_optional(dev, "pmc_plt_clk_3"); + priv->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3"); if (IS_ERR(priv->mclk)) { - ret_val = dev_err_probe(dev, PTR_ERR(priv->mclk), - "Failed to get MCLK from pmc_plt_clk_3\n"); - goto err; - } - /* - * Fall back to bit clock usage when clock is not - * available likely due to missing dependencies. - */ - if (!priv->mclk) + ret_val = PTR_ERR(priv->mclk); + + dev_err(&pdev->dev, + "Failed to get MCLK from pmc_plt_clk_3: %d\n", + ret_val); + + /* + * Fall back to bit clock usage for -ENOENT (clock not + * available likely due to missing dependencies), bail + * for all other errors, including -EPROBE_DEFER + */ + if (ret_val != -ENOENT) + goto err; byt_rt5640_quirk &= ~BYT_RT5640_MCLK_EN; + } } if (byt_rt5640_quirk & BYT_RT5640_NO_SPEAKERS) { @@ -1772,7 +1714,7 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) if (ret_val) goto err; - sof_parent = snd_soc_acpi_sof_parent(dev); + sof_parent = snd_soc_acpi_sof_parent(&pdev->dev); /* set card and driver name */ if (sof_parent) { @@ -1787,9 +1729,11 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) if (sof_parent) dev->driver->pm = &snd_soc_pm_ops; - ret_val = devm_snd_soc_register_card(dev, &byt_rt5640_card); + ret_val = devm_snd_soc_register_card(&pdev->dev, &byt_rt5640_card); + if (ret_val) { - dev_err(dev, "devm_snd_soc_register_card failed %d\n", ret_val); + dev_err(&pdev->dev, "devm_snd_soc_register_card failed %d\n", + ret_val); goto err; } platform_set_drvdata(pdev, &byt_rt5640_card); diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index 5e9c53dadb..e94c9124d4 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -188,10 +188,13 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w, } if (SND_SOC_DAPM_EVENT_ON(event)) { - ret = clk_prepare_enable(priv->mclk); - if (ret < 0) { - dev_err(card->dev, "could not configure MCLK state"); - return ret; + if (byt_rt5651_quirk & BYT_RT5651_MCLK_EN) { + ret = clk_prepare_enable(priv->mclk); + if (ret < 0) { + dev_err(card->dev, + "could not configure MCLK state"); + return ret; + } } ret = byt_rt5651_prepare_and_enable_pll1(codec_dai, 48000, 50); } else { @@ -204,7 +207,8 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w, 48000 * 512, SND_SOC_CLOCK_IN); if (!ret) - clk_disable_unprepare(priv->mclk); + if (byt_rt5651_quirk & BYT_RT5651_MCLK_EN) + clk_disable_unprepare(priv->mclk); } if (ret < 0) { @@ -625,25 +629,29 @@ static int byt_rt5651_init(struct snd_soc_pcm_runtime *runtime) return ret; } - /* - * The firmware might enable the clock at boot (this information - * may or may not be reflected in the enable clock register). - * To change the rate we must disable the clock first to cover - * these cases. Due to common clock framework restrictions that - * do not allow to disable a clock that has not been enabled, - * we need to enable the clock first. - */ - ret = clk_prepare_enable(priv->mclk); - if (!ret) - clk_disable_unprepare(priv->mclk); + if (byt_rt5651_quirk & BYT_RT5651_MCLK_EN) { + /* + * The firmware might enable the clock at + * boot (this information may or may not + * be reflected in the enable clock register). + * To change the rate we must disable the clock + * first to cover these cases. Due to common + * clock framework restrictions that do not allow + * to disable a clock that has not been enabled, + * we need to enable the clock first. + */ + ret = clk_prepare_enable(priv->mclk); + if (!ret) + clk_disable_unprepare(priv->mclk); - if (byt_rt5651_quirk & BYT_RT5651_MCLK_25MHZ) - ret = clk_set_rate(priv->mclk, 25000000); - else - ret = clk_set_rate(priv->mclk, 19200000); + if (byt_rt5651_quirk & BYT_RT5651_MCLK_25MHZ) + ret = clk_set_rate(priv->mclk, 25000000); + else + ret = clk_set_rate(priv->mclk, 19200000); - if (ret) - dev_err(card->dev, "unable to set MCLK rate\n"); + if (ret) + dev_err(card->dev, "unable to set MCLK rate\n"); + } report = 0; if (BYT_RT5651_JDSRC(byt_rt5651_quirk)) @@ -705,7 +713,7 @@ static int byt_rt5651_codec_fixup(struct snd_soc_pcm_runtime *rtd, ret = snd_soc_dai_set_fmt(asoc_rtd_to_cpu(rtd, 0), SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBC_CFC + SND_SOC_DAIFMT_CBS_CFS ); if (ret < 0) { @@ -790,7 +798,7 @@ static struct snd_soc_dai_link byt_rt5651_dais[] = { .id = 0, .no_pcm = 1, .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF - | SND_SOC_DAIFMT_CBC_CFC, + | SND_SOC_DAIFMT_CBS_CFS, .be_hw_params_fixup = byt_rt5651_codec_fixup, .dpcm_playback = 1, .dpcm_capture = 1, @@ -886,10 +894,9 @@ struct acpi_chan_package { /* ACPICA seems to require 64 bit integers */ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) { - struct device *dev = &pdev->dev; static const char * const mic_name[] = { "dmic", "in1", "in2", "in12" }; - struct snd_soc_acpi_mach *mach = dev_get_platdata(dev); struct byt_rt5651_private *priv; + struct snd_soc_acpi_mach *mach; const char *platform_name; struct acpi_device *adev; struct device *codec_dev; @@ -899,12 +906,14 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) int dai_index = 0; int i; - priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; /* register the soc card */ - byt_rt5651_card.dev = dev; + byt_rt5651_card.dev = &pdev->dev; + + mach = byt_rt5651_card.dev->platform_data; snd_soc_card_set_drvdata(&byt_rt5651_card, priv); /* fix index of codec dai */ @@ -924,7 +933,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) put_device(&adev->dev); byt_rt5651_dais[dai_index].codecs->name = byt_rt5651_codec_name; } else { - dev_err(dev, "Error cannot find '%s' dev\n", mach->id); + dev_err(&pdev->dev, "Error cannot find '%s' dev\n", mach->id); return -ENXIO; } @@ -972,13 +981,13 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) &pkg_ctx); if (pkg_found) { if (chan_package.aif_value == 1) { - dev_info(dev, "BIOS Routing: AIF1 connected\n"); + dev_info(&pdev->dev, "BIOS Routing: AIF1 connected\n"); byt_rt5651_quirk |= BYT_RT5651_SSP0_AIF1; } else if (chan_package.aif_value == 2) { - dev_info(dev, "BIOS Routing: AIF2 connected\n"); + dev_info(&pdev->dev, "BIOS Routing: AIF2 connected\n"); byt_rt5651_quirk |= BYT_RT5651_SSP0_AIF2; } else { - dev_info(dev, "BIOS Routing isn't valid, ignored\n"); + dev_info(&pdev->dev, "BIOS Routing isn't valid, ignored\n"); pkg_found = false; } } @@ -993,7 +1002,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) dmi_check_system(byt_rt5651_quirk_table); if (quirk_override != -1) { - dev_info(dev, "Overriding quirk 0x%lx => 0x%x\n", + dev_info(&pdev->dev, "Overriding quirk 0x%lx => 0x%x\n", byt_rt5651_quirk, quirk_override); byt_rt5651_quirk = quirk_override; } @@ -1009,7 +1018,8 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) if (byt_rt5651_gpios) { devm_acpi_dev_add_driver_gpios(codec_dev, byt_rt5651_gpios); - priv->ext_amp_gpio = devm_fwnode_gpiod_get(dev, codec_dev->fwnode, + priv->ext_amp_gpio = devm_fwnode_gpiod_get(&pdev->dev, + codec_dev->fwnode, "ext-amp-enable", GPIOD_OUT_LOW, "speaker-amp"); @@ -1020,13 +1030,15 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) priv->ext_amp_gpio = NULL; break; default: - dev_err(dev, "Failed to get ext-amp-enable GPIO: %d\n", ret_val); + dev_err(&pdev->dev, "Failed to get ext-amp-enable GPIO: %d\n", + ret_val); fallthrough; case -EPROBE_DEFER: goto err; } } - priv->hp_detect = devm_fwnode_gpiod_get(dev, codec_dev->fwnode, + priv->hp_detect = devm_fwnode_gpiod_get(&pdev->dev, + codec_dev->fwnode, "hp-detect", GPIOD_IN, "hp-detect"); @@ -1037,7 +1049,8 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) priv->hp_detect = NULL; break; default: - dev_err(dev, "Failed to get hp-detect GPIO: %d\n", ret_val); + dev_err(&pdev->dev, "Failed to get hp-detect GPIO: %d\n", + ret_val); fallthrough; case -EPROBE_DEFER: goto err; @@ -1045,7 +1058,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) } } - log_quirks(dev); + log_quirks(&pdev->dev); if ((byt_rt5651_quirk & BYT_RT5651_SSP2_AIF2) || (byt_rt5651_quirk & BYT_RT5651_SSP0_AIF2)) @@ -1056,18 +1069,21 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) byt_rt5651_dais[dai_index].cpus->dai_name = "ssp0-port"; if (byt_rt5651_quirk & BYT_RT5651_MCLK_EN) { - priv->mclk = devm_clk_get_optional(dev, "pmc_plt_clk_3"); + priv->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3"); if (IS_ERR(priv->mclk)) { - ret_val = dev_err_probe(dev, PTR_ERR(priv->mclk), - "Failed to get MCLK from pmc_plt_clk_3\n"); - goto err; - } - /* - * Fall back to bit clock usage when clock is not - * available likely due to missing dependencies. - */ - if (!priv->mclk) + ret_val = PTR_ERR(priv->mclk); + dev_err(&pdev->dev, + "Failed to get MCLK from pmc_plt_clk_3: %d\n", + ret_val); + /* + * Fall back to bit clock usage for -ENOENT (clock not + * available likely due to missing dependencies), bail + * for all other errors, including -EPROBE_DEFER + */ + if (ret_val != -ENOENT) + goto err; byt_rt5651_quirk &= ~BYT_RT5651_MCLK_EN; + } } snprintf(byt_rt5651_components, sizeof(byt_rt5651_components), @@ -1096,7 +1112,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) if (ret_val) goto err; - sof_parent = snd_soc_acpi_sof_parent(dev); + sof_parent = snd_soc_acpi_sof_parent(&pdev->dev); /* set card and driver name */ if (sof_parent) { @@ -1109,11 +1125,13 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) /* set pm ops */ if (sof_parent) - dev->driver->pm = &snd_soc_pm_ops; + pdev->dev.driver->pm = &snd_soc_pm_ops; + + ret_val = devm_snd_soc_register_card(&pdev->dev, &byt_rt5651_card); - ret_val = devm_snd_soc_register_card(dev, &byt_rt5651_card); if (ret_val) { - dev_err(dev, "devm_snd_soc_register_card failed %d\n", ret_val); + dev_err(&pdev->dev, "devm_snd_soc_register_card failed %d\n", + ret_val); goto err; } platform_set_drvdata(pdev, &byt_rt5651_card); diff --git a/sound/soc/intel/boards/cht_bsw_nau8824.c b/sound/soc/intel/boards/cht_bsw_nau8824.c index bad32d2bdf..da5a5cbc87 100644 --- a/sound/soc/intel/boards/cht_bsw_nau8824.c +++ b/sound/soc/intel/boards/cht_bsw_nau8824.c @@ -214,7 +214,7 @@ static struct snd_soc_dai_link cht_dailink[] = { .id = 0, .no_pcm = 1, .dai_fmt = SND_SOC_DAIFMT_DSP_B | SND_SOC_DAIFMT_IB_NF - | SND_SOC_DAIFMT_CBC_CFC, + | SND_SOC_DAIFMT_CBS_CFS, .init = cht_codec_init, .be_hw_params_fixup = cht_codec_fixup, .dpcm_playback = 1, @@ -278,8 +278,6 @@ static int snd_cht_mc_probe(struct platform_device *pdev) snd_soc_card_cht.driver_name = DRIVER_NAME; } - snd_soc_card_cht.components = nau8824_components(); - /* set pm ops */ if (sof_parent) pdev->dev.driver->pm = &snd_soc_pm_ops; diff --git a/sound/soc/intel/boards/glk_rt5682_max98357a.c b/sound/soc/intel/boards/glk_rt5682_max98357a.c index bad3829e52..71fe26a1b7 100644 --- a/sound/soc/intel/boards/glk_rt5682_max98357a.c +++ b/sound/soc/intel/boards/glk_rt5682_max98357a.c @@ -18,18 +18,14 @@ #include #include #include "../../codecs/rt5682.h" -#include "../../codecs/rt5682s.h" #include "../../codecs/hdac_hdmi.h" #include "hda_dsp_common.h" /* The platform clock outputs 19.2Mhz clock to codec as I2S MCLK */ #define GLK_PLAT_CLK_FREQ 19200000 #define RT5682_PLL_FREQ (48000 * 512) -#define RT5682_DAI_NAME "rt5682-aif1" -#define RT5682S_DAI_NAME "rt5682s-aif1" +#define GLK_REALTEK_CODEC_DAI "rt5682-aif1" #define GLK_MAXIM_CODEC_DAI "HiFi" -#define RT5682_DEV0_NAME "i2c-10EC5682:00" -#define RT5682S_DEV0_NAME "i2c-RTL5682:00" #define MAXIM_DEV0_NAME "MX98357A:00" #define DUAL_CHANNEL 2 #define QUAD_CHANNEL 4 @@ -47,7 +43,6 @@ struct glk_card_private { struct snd_soc_jack geminilake_headset; struct list_head hdmi_pcm_list; bool common_hdmi_codec_drv; - int is_rt5682s; }; enum { @@ -144,19 +139,9 @@ static int geminilake_rt5682_codec_init(struct snd_soc_pcm_runtime *rtd) struct snd_soc_component *component = asoc_rtd_to_codec(rtd, 0)->component; struct snd_soc_dai *codec_dai = asoc_rtd_to_codec(rtd, 0); struct snd_soc_jack *jack; - int pll_id, pll_source, clk_id, ret; + int ret; - if (ctx->is_rt5682s) { - pll_id = RT5682S_PLL2; - pll_source = RT5682S_PLL_S_MCLK; - clk_id = RT5682S_SCLK_S_PLL2; - } else { - pll_id = RT5682_PLL1; - pll_source = RT5682_PLL1_S_MCLK; - clk_id = RT5682_SCLK_S_PLL1; - } - - ret = snd_soc_dai_set_pll(codec_dai, pll_id, pll_source, + ret = snd_soc_dai_set_pll(codec_dai, 0, RT5682_PLL1_S_MCLK, GLK_PLAT_CLK_FREQ, RT5682_PLL_FREQ); if (ret < 0) { dev_err(rtd->dev, "can't set codec pll: %d\n", ret); @@ -164,7 +149,7 @@ static int geminilake_rt5682_codec_init(struct snd_soc_pcm_runtime *rtd) } /* Configure sysclk for codec */ - ret = snd_soc_dai_set_sysclk(codec_dai, clk_id, + ret = snd_soc_dai_set_sysclk(codec_dai, RT5682_SCLK_S_PLL1, RT5682_PLL_FREQ, SND_SOC_CLOCK_IN); if (ret < 0) dev_err(rtd->dev, "snd_soc_dai_set_sysclk err = %d\n", ret); @@ -359,12 +344,9 @@ SND_SOC_DAILINK_DEF(ssp1_codec, SND_SOC_DAILINK_DEF(ssp2_pin, DAILINK_COMP_ARRAY(COMP_CPU("SSP2 Pin"))); -SND_SOC_DAILINK_DEF(ssp2_codec_5682, - DAILINK_COMP_ARRAY(COMP_CODEC(RT5682_DEV0_NAME, - RT5682_DAI_NAME))); -SND_SOC_DAILINK_DEF(ssp2_codec_5682s, - DAILINK_COMP_ARRAY(COMP_CODEC(RT5682S_DEV0_NAME, - RT5682S_DAI_NAME))); +SND_SOC_DAILINK_DEF(ssp2_codec, + DAILINK_COMP_ARRAY(COMP_CODEC("i2c-10EC5682:00", + GLK_REALTEK_CODEC_DAI))); SND_SOC_DAILINK_DEF(dmic_pin, DAILINK_COMP_ARRAY(COMP_CPU("DMIC01 Pin"))); @@ -491,7 +473,7 @@ static struct snd_soc_dai_link geminilake_dais[] = { .no_pcm = 1, .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBC_CFC, + SND_SOC_DAIFMT_CBS_CFS, .ignore_pmdown_time = 1, .be_hw_params_fixup = geminilake_ssp_fixup, .dpcm_playback = 1, @@ -504,13 +486,13 @@ static struct snd_soc_dai_link geminilake_dais[] = { .no_pcm = 1, .init = geminilake_rt5682_codec_init, .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBC_CFC, + SND_SOC_DAIFMT_CBS_CFS, .ignore_pmdown_time = 1, .be_hw_params_fixup = geminilake_ssp_fixup, .ops = &geminilake_rt5682_ops, .dpcm_playback = 1, .dpcm_capture = 1, - SND_SOC_DAILINK_REG(ssp2_pin, ssp2_codec_5682, platform), + SND_SOC_DAILINK_REG(ssp2_pin, ssp2_codec, platform), }, { .name = "dmic01", @@ -610,28 +592,12 @@ static int geminilake_audio_probe(struct platform_device *pdev) struct snd_soc_acpi_mach *mach; const char *platform_name; struct snd_soc_card *card; - int ret, i; + int ret; ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - /* Detect the headset codec variant */ - if (acpi_dev_present("RTL5682", NULL, -1)) { - /* ALC5682I-VS is detected */ - ctx->is_rt5682s = 1; - - for (i = 0; i < glk_audio_card_rt5682_m98357a.num_links; i++) { - if (strcmp(geminilake_dais[i].name, "SSP2-Codec")) - continue; - - /* update the dai link to use rt5682s codec */ - geminilake_dais[i].codecs = ssp2_codec_5682s; - geminilake_dais[i].num_codecs = ARRAY_SIZE(ssp2_codec_5682s); - break; - } - } - INIT_LIST_HEAD(&ctx->hdmi_pcm_list); card = &glk_audio_card_rt5682_m98357a; diff --git a/sound/soc/intel/boards/hda_dsp_common.c b/sound/soc/intel/boards/hda_dsp_common.c index 5c31ddc088..efdc4bc4bb 100644 --- a/sound/soc/intel/boards/hda_dsp_common.c +++ b/sound/soc/intel/boards/hda_dsp_common.c @@ -68,7 +68,7 @@ int hda_dsp_hdmi_build_controls(struct snd_soc_card *card, hpcm->pcm = NULL; hpcm->device = SNDRV_PCM_INVALID_DEVICE; dev_warn(card->dev, - "%s: no PCM in topology for HDMI converter %d\n", + "%s: no PCM in topology for HDMI converter %d\n\n", __func__, i); } i++; diff --git a/sound/soc/intel/boards/sof_maxim_common.c b/sound/soc/intel/boards/sof_maxim_common.c index 112e89951d..e66dfe6669 100644 --- a/sound/soc/intel/boards/sof_maxim_common.c +++ b/sound/soc/intel/boards/sof_maxim_common.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -134,185 +133,6 @@ void max_98373_set_codec_conf(struct snd_soc_card *card) } EXPORT_SYMBOL_NS(max_98373_set_codec_conf, SND_SOC_INTEL_SOF_MAXIM_COMMON); -/* - * Maxim MAX98390 - */ -static const struct snd_soc_dapm_route max_98390_dapm_routes[] = { - /* speaker */ - { "Left Spk", NULL, "Left BE_OUT" }, - { "Right Spk", NULL, "Right BE_OUT" }, -}; - -static const struct snd_kcontrol_new max_98390_tt_kcontrols[] = { - SOC_DAPM_PIN_SWITCH("TL Spk"), - SOC_DAPM_PIN_SWITCH("TR Spk"), -}; - -static const struct snd_soc_dapm_widget max_98390_tt_dapm_widgets[] = { - SND_SOC_DAPM_SPK("TL Spk", NULL), - SND_SOC_DAPM_SPK("TR Spk", NULL), -}; - -static const struct snd_soc_dapm_route max_98390_tt_dapm_routes[] = { - /* Tweeter speaker */ - { "TL Spk", NULL, "Tweeter Left BE_OUT" }, - { "TR Spk", NULL, "Tweeter Right BE_OUT" }, -}; - -static struct snd_soc_codec_conf max_98390_codec_conf[] = { - { - .dlc = COMP_CODEC_CONF(MAX_98390_DEV0_NAME), - .name_prefix = "Right", - }, - { - .dlc = COMP_CODEC_CONF(MAX_98390_DEV1_NAME), - .name_prefix = "Left", - }, -}; - -static struct snd_soc_codec_conf max_98390_4spk_codec_conf[] = { - { - .dlc = COMP_CODEC_CONF(MAX_98390_DEV0_NAME), - .name_prefix = "Right", - }, - { - .dlc = COMP_CODEC_CONF(MAX_98390_DEV1_NAME), - .name_prefix = "Left", - }, - { - .dlc = COMP_CODEC_CONF(MAX_98390_DEV2_NAME), - .name_prefix = "Tweeter Right", - }, - { - .dlc = COMP_CODEC_CONF(MAX_98390_DEV3_NAME), - .name_prefix = "Tweeter Left", - }, -}; - -struct snd_soc_dai_link_component max_98390_components[] = { - { - .name = MAX_98390_DEV0_NAME, - .dai_name = MAX_98390_CODEC_DAI, - }, - { - .name = MAX_98390_DEV1_NAME, - .dai_name = MAX_98390_CODEC_DAI, - }, -}; -EXPORT_SYMBOL_NS(max_98390_components, SND_SOC_INTEL_SOF_MAXIM_COMMON); - -struct snd_soc_dai_link_component max_98390_4spk_components[] = { - { - .name = MAX_98390_DEV0_NAME, - .dai_name = MAX_98390_CODEC_DAI, - }, - { - .name = MAX_98390_DEV1_NAME, - .dai_name = MAX_98390_CODEC_DAI, - }, - { - .name = MAX_98390_DEV2_NAME, - .dai_name = MAX_98390_CODEC_DAI, - }, - { - .name = MAX_98390_DEV3_NAME, - .dai_name = MAX_98390_CODEC_DAI, - }, -}; -EXPORT_SYMBOL_NS(max_98390_4spk_components, SND_SOC_INTEL_SOF_MAXIM_COMMON); - -static int max_98390_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) -{ - struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); - struct snd_soc_dai *codec_dai; - int i; - - for_each_rtd_codec_dais(rtd, i, codec_dai) { - if (i >= ARRAY_SIZE(max_98390_4spk_components)) { - dev_err(codec_dai->dev, "invalid codec index %d\n", i); - return -ENODEV; - } - - if (!strcmp(codec_dai->component->name, MAX_98390_DEV0_NAME)) { - /* DEV0 tdm slot configuration Right */ - snd_soc_dai_set_tdm_slot(codec_dai, 0x01, 3, 4, 32); - } - if (!strcmp(codec_dai->component->name, MAX_98390_DEV1_NAME)) { - /* DEV1 tdm slot configuration Left */ - snd_soc_dai_set_tdm_slot(codec_dai, 0x02, 3, 4, 32); - } - - if (!strcmp(codec_dai->component->name, MAX_98390_DEV2_NAME)) { - /* DEVi2 tdm slot configuration Tweeter Right */ - snd_soc_dai_set_tdm_slot(codec_dai, 0x04, 3, 4, 32); - } - if (!strcmp(codec_dai->component->name, MAX_98390_DEV3_NAME)) { - /* DEV3 tdm slot configuration Tweeter Left */ - snd_soc_dai_set_tdm_slot(codec_dai, 0x08, 3, 4, 32); - } - } - return 0; -} - -int max_98390_spk_codec_init(struct snd_soc_pcm_runtime *rtd) -{ - struct snd_soc_card *card = rtd->card; - int ret; - - /* add regular speakers dapm route */ - ret = snd_soc_dapm_add_routes(&card->dapm, max_98390_dapm_routes, - ARRAY_SIZE(max_98390_dapm_routes)); - if (ret) { - dev_err(rtd->dev, "unable to add Left/Right Speaker dapm, ret %d\n", ret); - return ret; - } - - /* add widgets/controls/dapm for tweeter speakers */ - if (acpi_dev_present("MX98390", "3", -1)) { - ret = snd_soc_dapm_new_controls(&card->dapm, max_98390_tt_dapm_widgets, - ARRAY_SIZE(max_98390_tt_dapm_widgets)); - - if (ret) { - dev_err(rtd->dev, "unable to add tweeter dapm controls, ret %d\n", ret); - /* Don't need to add routes if widget addition failed */ - return ret; - } - - ret = snd_soc_add_card_controls(card, max_98390_tt_kcontrols, - ARRAY_SIZE(max_98390_tt_kcontrols)); - if (ret) { - dev_err(rtd->dev, "unable to add tweeter card controls, ret %d\n", ret); - return ret; - } - - ret = snd_soc_dapm_add_routes(&card->dapm, max_98390_tt_dapm_routes, - ARRAY_SIZE(max_98390_tt_dapm_routes)); - if (ret) - dev_err(rtd->dev, - "unable to add Tweeter Left/Right Speaker dapm, ret %d\n", ret); - } - return ret; -} -EXPORT_SYMBOL_NS(max_98390_spk_codec_init, SND_SOC_INTEL_SOF_MAXIM_COMMON); - -const struct snd_soc_ops max_98390_ops = { - .hw_params = max_98390_hw_params, -}; -EXPORT_SYMBOL_NS(max_98390_ops, SND_SOC_INTEL_SOF_MAXIM_COMMON); - -void max_98390_set_codec_conf(struct snd_soc_card *card, int ch) -{ - if (ch == ARRAY_SIZE(max_98390_4spk_codec_conf)) { - card->codec_conf = max_98390_4spk_codec_conf; - card->num_configs = ARRAY_SIZE(max_98390_4spk_codec_conf); - } else { - card->codec_conf = max_98390_codec_conf; - card->num_configs = ARRAY_SIZE(max_98390_codec_conf); - } -} -EXPORT_SYMBOL_NS(max_98390_set_codec_conf, SND_SOC_INTEL_SOF_MAXIM_COMMON); - /* * Maxim MAX98357A/MAX98360A */ diff --git a/sound/soc/intel/boards/sof_maxim_common.h b/sound/soc/intel/boards/sof_maxim_common.h index 7a8c53049e..3ff5e8fec4 100644 --- a/sound/soc/intel/boards/sof_maxim_common.h +++ b/sound/soc/intel/boards/sof_maxim_common.h @@ -24,22 +24,6 @@ int max_98373_spk_codec_init(struct snd_soc_pcm_runtime *rtd); void max_98373_set_codec_conf(struct snd_soc_card *card); int max_98373_trigger(struct snd_pcm_substream *substream, int cmd); -/* - * Maxim MAX98390 - */ -#define MAX_98390_CODEC_DAI "max98390-aif1" -#define MAX_98390_DEV0_NAME "i2c-MX98390:00" -#define MAX_98390_DEV1_NAME "i2c-MX98390:01" -#define MAX_98390_DEV2_NAME "i2c-MX98390:02" -#define MAX_98390_DEV3_NAME "i2c-MX98390:03" - -extern struct snd_soc_dai_link_component max_98390_components[2]; -extern struct snd_soc_dai_link_component max_98390_4spk_components[4]; -extern const struct snd_soc_ops max_98390_ops; - -void max_98390_set_codec_conf(struct snd_soc_card *card, int ch); -int max_98390_spk_codec_init(struct snd_soc_pcm_runtime *rtd); - /* * Maxim MAX98357A/MAX98360A */ diff --git a/sound/soc/intel/boards/sof_realtek_common.c b/sound/soc/intel/boards/sof_realtek_common.c index 4cf131310a..2ec34f8df9 100644 --- a/sound/soc/intel/boards/sof_realtek_common.c +++ b/sound/soc/intel/boards/sof_realtek_common.c @@ -12,13 +12,12 @@ #include #include #include "../../codecs/rt1011.h" -#include "../../codecs/rt1015.h" #include "sof_realtek_common.h" /* * Current only 2-amp configuration is supported for rt1011 */ -static const struct snd_soc_dapm_route speaker_map_lr[] = { +static const struct snd_soc_dapm_route rt1011_dapm_routes[] = { /* speaker */ { "Left Spk", NULL, "Left SPO" }, { "Right Spk", NULL, "Right SPO" }, @@ -118,8 +117,8 @@ static int rt1011_init(struct snd_soc_pcm_runtime *rtd) struct snd_soc_card *card = rtd->card; int ret; - ret = snd_soc_dapm_add_routes(&card->dapm, speaker_map_lr, - ARRAY_SIZE(speaker_map_lr)); + ret = snd_soc_dapm_add_routes(&card->dapm, rt1011_dapm_routes, + ARRAY_SIZE(rt1011_dapm_routes)); if (ret) dev_err(rtd->dev, "Speaker map addition failed: %d\n", ret); return ret; @@ -242,115 +241,3 @@ void sof_rt1015p_codec_conf(struct snd_soc_card *card) card->codec_conf = rt1015p_codec_confs; card->num_configs = ARRAY_SIZE(rt1015p_codec_confs); } - -/* - * RT1015 audio amplifier - */ - -static int rt1015_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) -{ - struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); - struct snd_soc_dai *codec_dai; - int i, fs = 64, ret; - - for_each_rtd_codec_dais(rtd, i, codec_dai) { - ret = snd_soc_dai_set_pll(codec_dai, 0, RT1015_PLL_S_BCLK, - params_rate(params) * fs, - params_rate(params) * 256); - if (ret) - return ret; - - ret = snd_soc_dai_set_sysclk(codec_dai, RT1015_SCLK_S_PLL, - params_rate(params) * 256, - SND_SOC_CLOCK_IN); - if (ret) - return ret; - } - - return 0; -} - -static int rt1015_hw_params_pll_and_tdm(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) -{ - struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); - struct snd_soc_dai *codec_dai; - int i, fs = 100, ret; - - for_each_rtd_codec_dais(rtd, i, codec_dai) { - ret = snd_soc_dai_set_pll(codec_dai, 0, RT1015_PLL_S_BCLK, - params_rate(params) * fs, - params_rate(params) * 256); - if (ret) - return ret; - - ret = snd_soc_dai_set_sysclk(codec_dai, RT1015_SCLK_S_PLL, - params_rate(params) * 256, - SND_SOC_CLOCK_IN); - if (ret) - return ret; - } - /* rx slot 1 for RT1015_DEV0_NAME */ - ret = snd_soc_dai_set_tdm_slot(asoc_rtd_to_codec(rtd, 0), - 0x0, 0x1, 4, 24); - if (ret) - return ret; - - /* rx slot 2 for RT1015_DEV1_NAME */ - ret = snd_soc_dai_set_tdm_slot(asoc_rtd_to_codec(rtd, 1), - 0x0, 0x2, 4, 24); - if (ret) - return ret; - - return 0; -} - -static struct snd_soc_ops rt1015_ops = { - .hw_params = rt1015_hw_params, -}; - -static struct snd_soc_codec_conf rt1015_amp_conf[] = { - { - .dlc = COMP_CODEC_CONF(RT1015_DEV0_NAME), - .name_prefix = "Left", - }, - { - .dlc = COMP_CODEC_CONF(RT1015_DEV1_NAME), - .name_prefix = "Right", - }, -}; - -static struct snd_soc_dai_link_component rt1015_components[] = { - { - .name = RT1015_DEV0_NAME, - .dai_name = RT1015_CODEC_DAI, - }, - { - .name = RT1015_DEV1_NAME, - .dai_name = RT1015_CODEC_DAI, - }, -}; - -static int speaker_codec_init_lr(struct snd_soc_pcm_runtime *rtd) -{ - return snd_soc_dapm_add_routes(&rtd->card->dapm, speaker_map_lr, - ARRAY_SIZE(speaker_map_lr)); -} - -void sof_rt1015_codec_conf(struct snd_soc_card *card) -{ - card->codec_conf = rt1015_amp_conf; - card->num_configs = ARRAY_SIZE(rt1015_amp_conf); -} - -void sof_rt1015_dai_link(struct snd_soc_dai_link *link, unsigned int fs) -{ - link->codecs = rt1015_components; - link->num_codecs = ARRAY_SIZE(rt1015_components); - link->init = speaker_codec_init_lr; - link->ops = &rt1015_ops; - - if (fs == 100) - rt1015_ops.hw_params = rt1015_hw_params_pll_and_tdm; -} diff --git a/sound/soc/intel/boards/sof_realtek_common.h b/sound/soc/intel/boards/sof_realtek_common.h index 228ac9c084..cb0b49b285 100644 --- a/sound/soc/intel/boards/sof_realtek_common.h +++ b/sound/soc/intel/boards/sof_realtek_common.h @@ -28,11 +28,4 @@ void sof_rt1011_codec_conf(struct snd_soc_card *card); void sof_rt1015p_dai_link(struct snd_soc_dai_link *link); void sof_rt1015p_codec_conf(struct snd_soc_card *card); -#define RT1015_CODEC_DAI "rt1015-aif" -#define RT1015_DEV0_NAME "i2c-10EC1015:00" -#define RT1015_DEV1_NAME "i2c-10EC1015:01" - -void sof_rt1015_dai_link(struct snd_soc_dai_link *link, unsigned int fs); -void sof_rt1015_codec_conf(struct snd_soc_card *card); - #endif /* __SOF_REALTEK_COMMON_H */ diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c index bd6d2e7dea..f096bd6d69 100644 --- a/sound/soc/intel/boards/sof_rt5682.c +++ b/sound/soc/intel/boards/sof_rt5682.c @@ -18,10 +18,9 @@ #include #include #include -#include #include +#include "../../codecs/rt1015.h" #include "../../codecs/rt5682.h" -#include "../../codecs/rt5682s.h" #include "../../codecs/hdac_hdmi.h" #include "../common/soc-intel-quirks.h" #include "hda_dsp_common.h" @@ -57,10 +56,6 @@ #define SOF_BT_OFFLOAD_SSP(quirk) \ (((quirk) << SOF_BT_OFFLOAD_SSP_SHIFT) & SOF_BT_OFFLOAD_SSP_MASK) #define SOF_SSP_BT_OFFLOAD_PRESENT BIT(22) -#define SOF_RT5682S_HEADPHONE_CODEC_PRESENT BIT(23) -#define SOF_MAX98390_SPEAKER_AMP_PRESENT BIT(24) -#define SOF_MAX98390_TWEETER_SPEAKER_PRESENT BIT(25) - /* Default: MCLK on, MCLK 19.2M, SSP0 */ static unsigned long sof_rt5682_quirk = SOF_RT5682_MCLK_EN | @@ -181,36 +176,6 @@ static const struct dmi_system_id sof_rt5682_quirk_table[] = { SOF_RT5682_SSP_AMP(2) | SOF_RT5682_NUM_HDMIDEV(4)), }, - { - .callback = sof_rt5682_quirk_cb, - .matches = { - DMI_MATCH(DMI_PRODUCT_FAMILY, "Google_Brya"), - DMI_MATCH(DMI_OEM_STRING, "AUDIO-MAX98390_ALC5682I_I2S"), - }, - .driver_data = (void *)(SOF_RT5682_MCLK_EN | - SOF_RT5682_SSP_CODEC(0) | - SOF_SPEAKER_AMP_PRESENT | - SOF_MAX98390_SPEAKER_AMP_PRESENT | - SOF_RT5682_SSP_AMP(2) | - SOF_RT5682_NUM_HDMIDEV(4)), - }, - { - .callback = sof_rt5682_quirk_cb, - .matches = { - DMI_MATCH(DMI_PRODUCT_FAMILY, "Google_Brya"), - DMI_MATCH(DMI_OEM_STRING, "AUDIO-MAX98390_ALC5682I_I2S_4SPK"), - }, - .driver_data = (void *)(SOF_RT5682_MCLK_EN | - SOF_RT5682_SSP_CODEC(0) | - SOF_SPEAKER_AMP_PRESENT | - SOF_MAX98390_SPEAKER_AMP_PRESENT | - SOF_MAX98390_TWEETER_SPEAKER_PRESENT | - SOF_RT5682_SSP_AMP(1) | - SOF_RT5682_NUM_HDMIDEV(4) | - SOF_BT_OFFLOAD_SSP(2) | - SOF_SSP_BT_OFFLOAD_PRESENT), - - }, {} }; @@ -243,16 +208,9 @@ static int sof_rt5682_codec_init(struct snd_soc_pcm_runtime *rtd) /* need to enable ASRC function for 24MHz mclk rate */ if ((sof_rt5682_quirk & SOF_RT5682_MCLK_EN) && (sof_rt5682_quirk & SOF_RT5682_MCLK_24MHZ)) { - if (sof_rt5682_quirk & SOF_RT5682S_HEADPHONE_CODEC_PRESENT) - rt5682s_sel_asrc_clk_src(component, - RT5682S_DA_STEREO1_FILTER | - RT5682S_AD_STEREO1_FILTER, - RT5682S_CLK_SEL_I2S1_ASRC); - else - rt5682_sel_asrc_clk_src(component, - RT5682_DA_STEREO1_FILTER | - RT5682_AD_STEREO1_FILTER, - RT5682_CLK_SEL_I2S1_ASRC); + rt5682_sel_asrc_clk_src(component, RT5682_DA_STEREO1_FILTER | + RT5682_AD_STEREO1_FILTER, + RT5682_CLK_SEL_I2S1_ASRC); } if (sof_rt5682_quirk & SOF_RT5682_MCLK_BYTCHT_EN) { @@ -319,7 +277,7 @@ static int sof_rt5682_hw_params(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); struct sof_card_private *ctx = snd_soc_card_get_drvdata(rtd->card); struct snd_soc_dai *codec_dai = asoc_rtd_to_codec(rtd, 0); - int pll_id, pll_source, pll_in, pll_out, clk_id, ret; + int clk_id, clk_freq, pll_out, ret; if (sof_rt5682_quirk & SOF_RT5682_MCLK_EN) { if (sof_rt5682_quirk & SOF_RT5682_MCLK_BYTCHT_EN) { @@ -331,52 +289,35 @@ static int sof_rt5682_hw_params(struct snd_pcm_substream *substream, } } - if (sof_rt5682_quirk & SOF_RT5682S_HEADPHONE_CODEC_PRESENT) - pll_source = RT5682S_PLL_S_MCLK; - else - pll_source = RT5682_PLL1_S_MCLK; + clk_id = RT5682_PLL1_S_MCLK; /* get the tplg configured mclk. */ - pll_in = sof_dai_get_mclk(rtd); + clk_freq = sof_dai_get_mclk(rtd); /* mclk from the quirk is the first choice */ if (sof_rt5682_quirk & SOF_RT5682_MCLK_24MHZ) { - if (pll_in != 24000000) + if (clk_freq != 24000000) dev_warn(rtd->dev, "configure wrong mclk in tplg, please use 24MHz.\n"); - pll_in = 24000000; - } else if (pll_in == 0) { + clk_freq = 24000000; + } else if (clk_freq == 0) { /* use default mclk if not specified correct in topology */ - pll_in = 19200000; - } else if (pll_in < 0) { - return pll_in; + clk_freq = 19200000; + } else if (clk_freq < 0) { + return clk_freq; } } else { - if (sof_rt5682_quirk & SOF_RT5682S_HEADPHONE_CODEC_PRESENT) - pll_source = RT5682S_PLL_S_BCLK1; - else - pll_source = RT5682_PLL1_S_BCLK1; - - pll_in = params_rate(params) * 50; - } - - if (sof_rt5682_quirk & SOF_RT5682S_HEADPHONE_CODEC_PRESENT) { - pll_id = RT5682S_PLL2; - clk_id = RT5682S_SCLK_S_PLL2; - } else { - pll_id = RT5682_PLL1; - clk_id = RT5682_SCLK_S_PLL1; + clk_id = RT5682_PLL1_S_BCLK1; + clk_freq = params_rate(params) * 50; } pll_out = params_rate(params) * 512; - /* Configure pll for codec */ - ret = snd_soc_dai_set_pll(codec_dai, pll_id, pll_source, pll_in, - pll_out); + ret = snd_soc_dai_set_pll(codec_dai, 0, clk_id, clk_freq, pll_out); if (ret < 0) dev_err(rtd->dev, "snd_soc_dai_set_pll err = %d\n", ret); /* Configure sysclk for codec */ - ret = snd_soc_dai_set_sysclk(codec_dai, clk_id, + ret = snd_soc_dai_set_sysclk(codec_dai, RT5682_SCLK_S_PLL1, pll_out, SND_SOC_CLOCK_IN); if (ret < 0) dev_err(rtd->dev, "snd_soc_dai_set_sysclk err = %d\n", ret); @@ -399,6 +340,67 @@ static struct snd_soc_ops sof_rt5682_ops = { .hw_params = sof_rt5682_hw_params, }; +static int sof_rt1015_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); + struct snd_soc_card *card = rtd->card; + struct snd_soc_dai *codec_dai; + int i, fs, ret; + + if (!snd_soc_card_get_codec_dai(card, "rt1015-aif")) + return 0; + + if (sof_rt5682_quirk & SOF_RT1015_SPEAKER_AMP_100FS) + fs = 100; + else + fs = 64; + + for_each_rtd_codec_dais(rtd, i, codec_dai) { + ret = snd_soc_dai_set_pll(codec_dai, 0, RT1015_PLL_S_BCLK, + params_rate(params) * fs, + params_rate(params) * 256); + if (ret < 0) { + dev_err(card->dev, "failed to set pll\n"); + return ret; + } + /* Configure sysclk for codec */ + ret = snd_soc_dai_set_sysclk(codec_dai, RT1015_SCLK_S_PLL, + params_rate(params) * 256, + SND_SOC_CLOCK_IN); + if (ret < 0) { + dev_err(card->dev, "failed to set sysclk\n"); + return ret; + } + + if (sof_rt5682_quirk & SOF_RT1015_SPEAKER_AMP_100FS) { + if (!strcmp(codec_dai->component->name, "i2c-10EC1015:00")) { + ret = snd_soc_dai_set_tdm_slot(codec_dai, + 0x0, 0x1, 4, 24); + if (ret < 0) { + dev_err(card->dev, "failed to set tdm slot\n"); + return ret; + } + } + + if (!strcmp(codec_dai->component->name, "i2c-10EC1015:01")) { + ret = snd_soc_dai_set_tdm_slot(codec_dai, + 0x0, 0x2, 4, 24); + if (ret < 0) { + dev_err(card->dev, "failed to set tdm slot\n"); + return ret; + } + } + } + } + + return 0; +} + +static struct snd_soc_ops sof_rt1015_ops = { + .hw_params = sof_rt1015_hw_params, +}; + static struct snd_soc_dai_link_component platform_component[] = { { /* name might be overridden during probe */ @@ -457,7 +459,6 @@ static int sof_card_late_probe(struct snd_soc_card *card) if (err < 0) return err; } - return hdac_hdmi_jack_port_init(component, &card->dapm); } @@ -489,11 +490,22 @@ static const struct snd_soc_dapm_route sof_map[] = { { "IN1P", NULL, "Headset Mic" }, }; +static const struct snd_soc_dapm_route speaker_map_lr[] = { + { "Left Spk", NULL, "Left SPO" }, + { "Right Spk", NULL, "Right SPO" }, +}; + static const struct snd_soc_dapm_route dmic_map[] = { /* digital mics */ {"DMic", NULL, "SoC DMIC"}, }; +static int speaker_codec_init_lr(struct snd_soc_pcm_runtime *rtd) +{ + return snd_soc_dapm_add_routes(&rtd->card->dapm, speaker_map_lr, + ARRAY_SIZE(speaker_map_lr)); +} + static int dmic_init(struct snd_soc_pcm_runtime *rtd) { struct snd_soc_card *card = rtd->card; @@ -516,6 +528,17 @@ static int dmic_init(struct snd_soc_pcm_runtime *rtd) return ret; } +static struct snd_soc_codec_conf rt1015_amp_conf[] = { + { + .dlc = COMP_CODEC_CONF("i2c-10EC1015:00"), + .name_prefix = "Left", + }, + { + .dlc = COMP_CODEC_CONF("i2c-10EC1015:01"), + .name_prefix = "Right", + }, +}; + /* sof audio machine driver for rt5682 codec */ static struct snd_soc_card sof_audio_card_rt5682 = { .name = "rt5682", /* the sof- prefix is added by the core */ @@ -537,13 +560,6 @@ static struct snd_soc_dai_link_component rt5682_component[] = { } }; -static struct snd_soc_dai_link_component rt5682s_component[] = { - { - .name = "i2c-RTL5682:00", - .dai_name = "rt5682s-aif1", - } -}; - static struct snd_soc_dai_link_component dmic_component[] = { { .name = "dmic-codec", @@ -551,6 +567,17 @@ static struct snd_soc_dai_link_component dmic_component[] = { } }; +static struct snd_soc_dai_link_component rt1015_components[] = { + { + .name = "i2c-10EC1015:00", + .dai_name = "rt1015-aif", + }, + { + .name = "i2c-10EC1015:01", + .dai_name = "rt1015-aif", + }, +}; + static struct snd_soc_dai_link_component dummy_component[] = { { .name = "snd-soc-dummy", @@ -583,13 +610,8 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev, goto devm_err; links[id].id = id; - if (sof_rt5682_quirk & SOF_RT5682S_HEADPHONE_CODEC_PRESENT) { - links[id].codecs = rt5682s_component; - links[id].num_codecs = ARRAY_SIZE(rt5682s_component); - } else { - links[id].codecs = rt5682_component; - links[id].num_codecs = ARRAY_SIZE(rt5682_component); - } + links[id].codecs = rt5682_component; + links[id].num_codecs = ARRAY_SIZE(rt5682_component); links[id].platforms = platform_component; links[id].num_platforms = ARRAY_SIZE(platform_component); links[id].init = sof_rt5682_codec_init; @@ -703,8 +725,10 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev, links[id].id = id; if (sof_rt5682_quirk & SOF_RT1015_SPEAKER_AMP_PRESENT) { - sof_rt1015_dai_link(&links[id], (sof_rt5682_quirk & - SOF_RT1015_SPEAKER_AMP_100FS) ? 100 : 64); + links[id].codecs = rt1015_components; + links[id].num_codecs = ARRAY_SIZE(rt1015_components); + links[id].init = speaker_codec_init_lr; + links[id].ops = &sof_rt1015_ops; } else if (sof_rt5682_quirk & SOF_RT1015P_SPEAKER_AMP_PRESENT) { sof_rt1015p_dai_link(&links[id]); } else if (sof_rt5682_quirk & @@ -721,20 +745,6 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev, } else if (sof_rt5682_quirk & SOF_RT1011_SPEAKER_AMP_PRESENT) { sof_rt1011_dai_link(&links[id]); - } else if (sof_rt5682_quirk & - SOF_MAX98390_SPEAKER_AMP_PRESENT) { - if (sof_rt5682_quirk & - SOF_MAX98390_TWEETER_SPEAKER_PRESENT) { - links[id].codecs = max_98390_4spk_components; - links[id].num_codecs = ARRAY_SIZE(max_98390_4spk_components); - } else { - links[id].codecs = max_98390_components; - links[id].num_codecs = ARRAY_SIZE(max_98390_components); - } - links[id].init = max_98390_spk_codec_init; - links[id].ops = &max_98390_ops; - links[id].dpcm_capture = 1; - } else { max_98357a_dai_link(&links[id]); } @@ -815,14 +825,6 @@ static int sof_audio_probe(struct platform_device *pdev) if ((sof_rt5682_quirk & SOF_SPEAKER_AMP_PRESENT) && !mach->quirk_data) sof_rt5682_quirk &= ~SOF_SPEAKER_AMP_PRESENT; - /* Detect the headset codec variant */ - if (acpi_dev_present("RTL5682", NULL, -1)) - sof_rt5682_quirk |= SOF_RT5682S_HEADPHONE_CODEC_PRESENT; - - /* Detect the headset codec variant to support machines in DMI quirk */ - if (acpi_dev_present("RTL5682", NULL, -1)) - sof_rt5682_quirk |= SOF_RT5682S_HEADPHONE_CODEC_PRESENT; - if (soc_intel_is_byt() || soc_intel_is_cht()) { is_legacy_cpu = 1; dmic_be_num = 0; @@ -879,14 +881,6 @@ static int sof_audio_probe(struct platform_device *pdev) sof_rt1011_codec_conf(&sof_audio_card_rt5682); else if (sof_rt5682_quirk & SOF_RT1015P_SPEAKER_AMP_PRESENT) sof_rt1015p_codec_conf(&sof_audio_card_rt5682); - else if (sof_rt5682_quirk & SOF_MAX98390_SPEAKER_AMP_PRESENT) { - if (sof_rt5682_quirk & SOF_MAX98390_TWEETER_SPEAKER_PRESENT) - max_98390_set_codec_conf(&sof_audio_card_rt5682, - ARRAY_SIZE(max_98390_4spk_components)); - else - max_98390_set_codec_conf(&sof_audio_card_rt5682, - ARRAY_SIZE(max_98390_components)); - } if (sof_rt5682_quirk & SOF_SSP_BT_OFFLOAD_PRESENT) sof_audio_card_rt5682.num_links++; @@ -898,8 +892,10 @@ static int sof_audio_probe(struct platform_device *pdev) sof_audio_card_rt5682.dai_link = dai_links; - if (sof_rt5682_quirk & SOF_RT1015_SPEAKER_AMP_PRESENT) - sof_rt1015_codec_conf(&sof_audio_card_rt5682); + if (sof_rt5682_quirk & SOF_RT1015_SPEAKER_AMP_PRESENT) { + sof_audio_card_rt5682.codec_conf = rt1015_amp_conf; + sof_audio_card_rt5682.num_configs = ARRAY_SIZE(rt1015_amp_conf); + } INIT_LIST_HEAD(&ctx->hdmi_pcm_list); @@ -924,7 +920,7 @@ static const struct platform_device_id board_ids[] = { .name = "sof_rt5682", }, { - .name = "tgl_mx98357_rt5682", + .name = "tgl_mx98357a_rt5682", .driver_data = (kernel_ulong_t)(SOF_RT5682_MCLK_EN | SOF_RT5682_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | @@ -954,7 +950,7 @@ static const struct platform_device_id board_ids[] = { SOF_SSP_BT_OFFLOAD_PRESENT), }, { - .name = "jsl_rt5682_mx98360", + .name = "jsl_rt5682_mx98360a", .driver_data = (kernel_ulong_t)(SOF_RT5682_MCLK_EN | SOF_RT5682_MCLK_24MHZ | SOF_RT5682_SSP_CODEC(0) | @@ -1004,35 +1000,13 @@ static const struct platform_device_id board_ids[] = { SOF_SSP_BT_OFFLOAD_PRESENT), }, { - .name = "adl_mx98357_rt5682", + .name = "adl_mx98357a_rt5682", .driver_data = (kernel_ulong_t)(SOF_RT5682_MCLK_EN | SOF_RT5682_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_RT5682_SSP_AMP(2) | SOF_RT5682_NUM_HDMIDEV(4)), }, - { - .name = "adl_max98390_rt5682", - .driver_data = (kernel_ulong_t)(SOF_RT5682_MCLK_EN | - SOF_RT5682_SSP_CODEC(0) | - SOF_SPEAKER_AMP_PRESENT | - SOF_MAX98390_SPEAKER_AMP_PRESENT | - SOF_RT5682_SSP_AMP(1) | - SOF_RT5682_NUM_HDMIDEV(4) | - SOF_BT_OFFLOAD_SSP(2) | - SOF_SSP_BT_OFFLOAD_PRESENT), - }, - { - .name = "adl_mx98360_rt5682", - .driver_data = (kernel_ulong_t)(SOF_RT5682_MCLK_EN | - SOF_RT5682_SSP_CODEC(0) | - SOF_SPEAKER_AMP_PRESENT | - SOF_MAX98360A_SPEAKER_AMP_PRESENT | - SOF_RT5682_SSP_AMP(1) | - SOF_RT5682_NUM_HDMIDEV(4) | - SOF_BT_OFFLOAD_SSP(2) | - SOF_SSP_BT_OFFLOAD_PRESENT), - }, { } }; MODULE_DEVICE_TABLE(platform, board_ids); @@ -1052,7 +1026,6 @@ MODULE_DESCRIPTION("SOF Audio Machine driver"); MODULE_AUTHOR("Bard Liao "); MODULE_AUTHOR("Sathya Prakash M R "); MODULE_AUTHOR("Brent Lu "); -MODULE_AUTHOR("Mac Chiang "); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(SND_SOC_INTEL_HDA_DSP_COMMON); MODULE_IMPORT_NS(SND_SOC_INTEL_SOF_MAXIM_COMMON); diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index da515eb1dd..76759b2099 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -36,6 +36,8 @@ static void log_quirks(struct device *dev) if (SOF_SSP_GET_PORT(sof_sdw_quirk)) dev_dbg(dev, "SSP port %ld\n", SOF_SSP_GET_PORT(sof_sdw_quirk)); + if (sof_sdw_quirk & SOF_RT715_DAI_ID_FIX) + dev_dbg(dev, "quirk SOF_RT715_DAI_ID_FIX enabled\n"); if (sof_sdw_quirk & SOF_SDW_NO_AGGREGATION) dev_dbg(dev, "quirk SOF_SDW_NO_AGGREGATION enabled\n"); } @@ -62,7 +64,8 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "09C6") }, - .driver_data = (void *)RT711_JD2, + .driver_data = (void *)(RT711_JD2 | + SOF_RT715_DAI_ID_FIX), }, { /* early version of SKU 09C6 */ @@ -71,7 +74,8 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0983") }, - .driver_data = (void *)RT711_JD2, + .driver_data = (void *)(RT711_JD2 | + SOF_RT715_DAI_ID_FIX), }, { .callback = sof_sdw_quirk_cb, @@ -80,6 +84,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "098F"), }, .driver_data = (void *)(RT711_JD2 | + SOF_RT715_DAI_ID_FIX | SOF_SDW_FOUR_SPK), }, { @@ -89,6 +94,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0990"), }, .driver_data = (void *)(RT711_JD2 | + SOF_RT715_DAI_ID_FIX | SOF_SDW_FOUR_SPK), }, /* IceLake devices */ @@ -120,17 +126,8 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0A3E") }, .driver_data = (void *)(SOF_SDW_TGL_HDMI | - RT711_JD2), - }, - { - /* another SKU of Dell Latitude 9520 */ - .callback = sof_sdw_quirk_cb, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0A3F") - }, - .driver_data = (void *)(SOF_SDW_TGL_HDMI | - RT711_JD2), + RT711_JD2 | + SOF_RT715_DAI_ID_FIX), }, { /* Dell XPS 9710 */ @@ -141,6 +138,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { }, .driver_data = (void *)(SOF_SDW_TGL_HDMI | RT711_JD2 | + SOF_RT715_DAI_ID_FIX | SOF_SDW_FOUR_SPK), }, { @@ -151,6 +149,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { }, .driver_data = (void *)(SOF_SDW_TGL_HDMI | RT711_JD2 | + SOF_RT715_DAI_ID_FIX | SOF_SDW_FOUR_SPK), }, { @@ -211,6 +210,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { }, .driver_data = (void *)(SOF_SDW_TGL_HDMI | RT711_JD2 | + SOF_RT715_DAI_ID_FIX | SOF_SDW_FOUR_SPK), }, { @@ -220,7 +220,8 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0A45") }, .driver_data = (void *)(SOF_SDW_TGL_HDMI | - RT711_JD2), + RT711_JD2 | + SOF_RT715_DAI_ID_FIX), }, /* AlderLake devices */ { @@ -231,6 +232,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { }, .driver_data = (void *)(RT711_JD2_100K | SOF_SDW_TGL_HDMI | + SOF_RT715_DAI_ID_FIX | SOF_BT_OFFLOAD_SSP(2) | SOF_SSP_BT_OFFLOAD_PRESENT), }, @@ -246,75 +248,6 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { SOF_BT_OFFLOAD_SSP(2) | SOF_SSP_BT_OFFLOAD_PRESENT), }, - { - .callback = sof_sdw_quirk_cb, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0AF3"), - }, - /* No Jack */ - .driver_data = (void *)(SOF_SDW_TGL_HDMI | - SOF_SDW_FOUR_SPK), - }, - { - .callback = sof_sdw_quirk_cb, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B00") - }, - .driver_data = (void *)(SOF_SDW_TGL_HDMI | - RT711_JD2 | - SOF_SDW_FOUR_SPK), - }, - { - .callback = sof_sdw_quirk_cb, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B01") - }, - .driver_data = (void *)(SOF_SDW_TGL_HDMI | - RT711_JD2 | - SOF_SDW_FOUR_SPK), - }, - { - .callback = sof_sdw_quirk_cb, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B11") - }, - .driver_data = (void *)(SOF_SDW_TGL_HDMI | - RT711_JD2 | - SOF_SDW_FOUR_SPK), - }, - { - .callback = sof_sdw_quirk_cb, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B12") - }, - .driver_data = (void *)(SOF_SDW_TGL_HDMI | - RT711_JD2 | - SOF_SDW_FOUR_SPK), - }, - { - .callback = sof_sdw_quirk_cb, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B13"), - }, - /* No Jack */ - .driver_data = (void *)SOF_SDW_TGL_HDMI, - }, - { - .callback = sof_sdw_quirk_cb, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B29"), - }, - .driver_data = (void *)(SOF_SDW_TGL_HDMI | - RT711_JD2 | - SOF_SDW_FOUR_SPK), - }, {} }; @@ -347,7 +280,7 @@ int sdw_prepare(struct snd_pcm_substream *substream) /* Find stream from first CPU DAI */ dai = asoc_rtd_to_cpu(rtd, 0); - sdw_stream = snd_soc_dai_get_stream(dai, substream->stream); + sdw_stream = snd_soc_dai_get_sdw_stream(dai, substream->stream); if (IS_ERR(sdw_stream)) { dev_err(rtd->dev, "no stream found for DAI %s", dai->name); @@ -367,7 +300,7 @@ int sdw_trigger(struct snd_pcm_substream *substream, int cmd) /* Find stream from first CPU DAI */ dai = asoc_rtd_to_cpu(rtd, 0); - sdw_stream = snd_soc_dai_get_stream(dai, substream->stream); + sdw_stream = snd_soc_dai_get_sdw_stream(dai, substream->stream); if (IS_ERR(sdw_stream)) { dev_err(rtd->dev, "no stream found for DAI %s", dai->name); @@ -406,7 +339,7 @@ int sdw_hw_free(struct snd_pcm_substream *substream) /* Find stream from first CPU DAI */ dai = asoc_rtd_to_cpu(rtd, 0); - sdw_stream = snd_soc_dai_get_stream(dai, substream->stream); + sdw_stream = snd_soc_dai_get_sdw_stream(dai, substream->stream); if (IS_ERR(sdw_stream)) { dev_err(rtd->dev, "no stream found for DAI %s", dai->name); @@ -429,13 +362,26 @@ static const struct snd_soc_ops sdw_ops = { .shutdown = sdw_shutdown, }; +static int sof_sdw_mic_codec_mockup_init(struct snd_soc_card *card, + const struct snd_soc_acpi_link_adr *link, + struct snd_soc_dai_link *dai_links, + struct sof_sdw_codec_info *info, + bool playback) +{ + /* + * force DAI link to use same ID as RT715 and DMIC + * to reuse topologies + */ + dai_links->id = SDW_DMIC_DAI_ID; + return 0; +} + static struct sof_sdw_codec_info codec_info_list[] = { { .part_id = 0x700, .direction = {true, true}, .dai_name = "rt700-aif1", .init = sof_sdw_rt700_init, - .codec_type = SOF_SDW_CODEC_TYPE_JACK, }, { .part_id = 0x711, @@ -444,7 +390,6 @@ static struct sof_sdw_codec_info codec_info_list[] = { .dai_name = "rt711-sdca-aif1", .init = sof_sdw_rt711_sdca_init, .exit = sof_sdw_rt711_sdca_exit, - .codec_type = SOF_SDW_CODEC_TYPE_JACK, }, { .part_id = 0x711, @@ -453,7 +398,6 @@ static struct sof_sdw_codec_info codec_info_list[] = { .dai_name = "rt711-aif1", .init = sof_sdw_rt711_init, .exit = sof_sdw_rt711_exit, - .codec_type = SOF_SDW_CODEC_TYPE_JACK, }, { .part_id = 0x1308, @@ -462,14 +406,12 @@ static struct sof_sdw_codec_info codec_info_list[] = { .dai_name = "rt1308-aif", .ops = &sof_sdw_rt1308_i2s_ops, .init = sof_sdw_rt1308_init, - .codec_type = SOF_SDW_CODEC_TYPE_AMP, }, { .part_id = 0x1316, .direction = {true, true}, .dai_name = "rt1316-aif", .init = sof_sdw_rt1316_init, - .codec_type = SOF_SDW_CODEC_TYPE_AMP, }, { .part_id = 0x714, @@ -478,7 +420,6 @@ static struct sof_sdw_codec_info codec_info_list[] = { .ignore_pch_dmic = true, .dai_name = "rt715-aif2", .init = sof_sdw_rt715_sdca_init, - .codec_type = SOF_SDW_CODEC_TYPE_MIC, }, { .part_id = 0x715, @@ -487,7 +428,6 @@ static struct sof_sdw_codec_info codec_info_list[] = { .ignore_pch_dmic = true, .dai_name = "rt715-aif2", .init = sof_sdw_rt715_sdca_init, - .codec_type = SOF_SDW_CODEC_TYPE_MIC, }, { .part_id = 0x714, @@ -496,7 +436,6 @@ static struct sof_sdw_codec_info codec_info_list[] = { .ignore_pch_dmic = true, .dai_name = "rt715-aif2", .init = sof_sdw_rt715_init, - .codec_type = SOF_SDW_CODEC_TYPE_MIC, }, { .part_id = 0x715, @@ -505,7 +444,6 @@ static struct sof_sdw_codec_info codec_info_list[] = { .ignore_pch_dmic = true, .dai_name = "rt715-aif2", .init = sof_sdw_rt715_init, - .codec_type = SOF_SDW_CODEC_TYPE_MIC, }, { .part_id = 0x8373, @@ -513,14 +451,12 @@ static struct sof_sdw_codec_info codec_info_list[] = { .dai_name = "max98373-aif1", .init = sof_sdw_mx8373_init, .codec_card_late_probe = sof_sdw_mx8373_late_probe, - .codec_type = SOF_SDW_CODEC_TYPE_AMP, }, { .part_id = 0x5682, .direction = {true, true}, .dai_name = "rt5682-sdw", .init = sof_sdw_rt5682_init, - .codec_type = SOF_SDW_CODEC_TYPE_JACK, }, { .part_id = 0xaaaa, /* generic codec mockup */ @@ -528,7 +464,6 @@ static struct sof_sdw_codec_info codec_info_list[] = { .direction = {true, true}, .dai_name = "sdw-mockup-aif1", .init = NULL, - .codec_type = SOF_SDW_CODEC_TYPE_JACK, }, { .part_id = 0xaa55, /* headset codec mockup */ @@ -536,7 +471,6 @@ static struct sof_sdw_codec_info codec_info_list[] = { .direction = {true, true}, .dai_name = "sdw-mockup-aif1", .init = NULL, - .codec_type = SOF_SDW_CODEC_TYPE_JACK, }, { .part_id = 0x55aa, /* amplifier mockup */ @@ -544,14 +478,13 @@ static struct sof_sdw_codec_info codec_info_list[] = { .direction = {true, false}, .dai_name = "sdw-mockup-aif1", .init = NULL, - .codec_type = SOF_SDW_CODEC_TYPE_AMP, }, { .part_id = 0x5555, .version_id = 0, .direction = {false, true}, .dai_name = "sdw-mockup-aif1", - .codec_type = SOF_SDW_CODEC_TYPE_MIC, + .init = sof_sdw_mic_codec_mockup_init, }, }; @@ -599,11 +532,10 @@ static inline int find_codec_info_acpi(const u8 *acpi_id) * Since some sdw slaves may be aggregated, the CPU DAI number * may be larger than the number of BE dailinks. */ -static int get_sdw_dailink_info(struct device *dev, const struct snd_soc_acpi_link_adr *links, +static int get_sdw_dailink_info(const struct snd_soc_acpi_link_adr *links, int *sdw_be_num, int *sdw_cpu_dai_num) { const struct snd_soc_acpi_link_adr *link; - int _codec_type = SOF_SDW_CODEC_TYPE_JACK; bool group_visited[SDW_MAX_GROUPS]; bool no_aggregation; int i; @@ -629,12 +561,6 @@ static int get_sdw_dailink_info(struct device *dev, const struct snd_soc_acpi_li if (codec_index < 0) return codec_index; - if (codec_info_list[codec_index].codec_type < _codec_type) - dev_warn(dev, - "Unexpected address table ordering. Expected order: jack -> amp -> mic\n"); - - _codec_type = codec_info_list[codec_index].codec_type; - endpoint = link->adr_d->endpoints; /* count DAI number for playback and capture */ @@ -893,14 +819,14 @@ static int get_slave_info(const struct snd_soc_acpi_link_adr *adr_link, } static int create_sdw_dailink(struct snd_soc_card *card, - struct device *dev, int *link_index, + struct device *dev, int *be_index, struct snd_soc_dai_link *dai_links, int sdw_be_num, int sdw_cpu_dai_num, struct snd_soc_dai_link_component *cpus, const struct snd_soc_acpi_link_adr *link, int *cpu_id, bool *group_generated, struct snd_soc_codec_conf *codec_conf, - int codec_count, int *link_id, + int codec_count, int *codec_conf_index, bool *ignore_pch_dmic) { @@ -958,19 +884,6 @@ static int create_sdw_dailink(struct snd_soc_card *card, if (codec_info_list[codec_index].ignore_pch_dmic) *ignore_pch_dmic = true; - /* Shift the first amplifier's *link_id to SDW_AMP_DAI_ID */ - if (codec_info_list[codec_index].codec_type == SOF_SDW_CODEC_TYPE_AMP && - *link_id < SDW_AMP_DAI_ID) - *link_id = SDW_AMP_DAI_ID; - - /* - * DAI ID is fixed at SDW_DMIC_DAI_ID for MICs to - * keep sdw DMIC and HDMI setting static in UCM - */ - if (codec_info_list[codec_index].codec_type == SOF_SDW_CODEC_TYPE_MIC && - *link_id < SDW_DMIC_DAI_ID) - *link_id = SDW_DMIC_DAI_ID; - cpu_dai_index = *cpu_id; for_each_pcm_streams(stream) { char *name, *cpu_name; @@ -1009,12 +922,8 @@ static int create_sdw_dailink(struct snd_soc_card *card, cpus[cpu_dai_index++].dai_name = cpu_name; } - /* - * We create sdw dai links at first stage, so link index should - * not be larger than sdw_be_num - */ - if (*link_index >= sdw_be_num) { - dev_err(dev, "invalid dai link index %d", *link_index); + if (*be_index >= sdw_be_num) { + dev_err(dev, " invalid be dai index %d", *be_index); return -EINVAL; } @@ -1025,19 +934,18 @@ static int create_sdw_dailink(struct snd_soc_card *card, playback = (stream == SNDRV_PCM_STREAM_PLAYBACK); capture = (stream == SNDRV_PCM_STREAM_CAPTURE); - init_dai_link(dev, dai_links + *link_index, (*link_id)++, name, + init_dai_link(dev, dai_links + *be_index, *be_index, name, playback, capture, cpus + *cpu_id, cpu_dai_num, codecs, codec_num, NULL, &sdw_ops); - /* * SoundWire DAILINKs use 'stream' functions and Bank Switch operations * based on wait_for_completion(), tag them as 'nonatomic'. */ - dai_links[*link_index].nonatomic = true; + dai_links[*be_index].nonatomic = true; - ret = set_codec_init_func(card, link, dai_links + (*link_index)++, + ret = set_codec_init_func(card, link, dai_links + (*be_index)++, playback, group_id); if (ret < 0) { dev_err(dev, "failed to init codec %d", codec_index); @@ -1051,6 +959,17 @@ static int create_sdw_dailink(struct snd_soc_card *card, return 0; } +/* + * DAI link ID of SSP & DMIC & HDMI are based on last + * link ID used by sdw link. Since be_id may be changed + * in init func of sdw codec, it is not equal to be_id + */ +static inline int get_next_be_id(struct snd_soc_dai_link *links, + int be_id) +{ + return links[be_id - 1].id + 1; +} + #define IDISP_CODEC_MASK 0x4 static int sof_card_codec_conf_alloc(struct device *dev, @@ -1107,7 +1026,7 @@ static int sof_card_dai_links_create(struct device *dev, bool group_generated[SDW_MAX_GROUPS]; int ssp_codec_index, ssp_mask; struct snd_soc_dai_link *links; - int num_links, link_index = 0; + int num_links, link_id = 0; char *name, *cpu_name; int total_cpu_dai_num; int sdw_cpu_dai_num; @@ -1143,7 +1062,7 @@ static int sof_card_dai_links_create(struct device *dev, ssp_num = ssp_codec_index >= 0 ? hweight_long(ssp_mask) : 0; comp_num = hdmi_num + ssp_num; - ret = get_sdw_dailink_info(dev, mach_params->links, + ret = get_sdw_dailink_info(mach_params->links, &sdw_be_num, &sdw_cpu_dai_num); if (ret < 0) { dev_err(dev, "failed to get sdw link info %d", ret); @@ -1207,18 +1126,24 @@ static int sof_card_dai_links_create(struct device *dev, group_generated[endpoint->group_id]) continue; - ret = create_sdw_dailink(card, dev, &link_index, links, sdw_be_num, + ret = create_sdw_dailink(card, dev, &be_id, links, sdw_be_num, sdw_cpu_dai_num, cpus, adr_link, &cpu_id, group_generated, codec_conf, codec_conf_count, - &be_id, &codec_conf_index, + &codec_conf_index, &ignore_pch_dmic); if (ret < 0) { - dev_err(dev, "failed to create dai link %d", link_index); - return ret; + dev_err(dev, "failed to create dai link %d", be_id); + return -ENOMEM; } } + /* non-sdw DAI follows sdw DAI */ + link_id = be_id; + + /* get BE ID for non-sdw DAI */ + be_id = get_next_be_id(links, be_id); + SSP: /* SSP */ if (!ssp_num) @@ -1258,17 +1183,17 @@ static int sof_card_dai_links_create(struct device *dev, playback = info->direction[SNDRV_PCM_STREAM_PLAYBACK]; capture = info->direction[SNDRV_PCM_STREAM_CAPTURE]; - init_dai_link(dev, links + link_index, be_id, name, + init_dai_link(dev, links + link_id, be_id, name, playback, capture, cpus + cpu_id, 1, ssp_components, 1, NULL, info->ops); - ret = info->init(card, NULL, links + link_index, info, 0); + ret = info->init(card, NULL, links + link_id, info, 0); if (ret < 0) return ret; - INC_ID(be_id, cpu_id, link_index); + INC_ID(be_id, cpu_id, link_id); } DMIC: @@ -1279,21 +1204,21 @@ static int sof_card_dai_links_create(struct device *dev, goto HDMI; } cpus[cpu_id].dai_name = "DMIC01 Pin"; - init_dai_link(dev, links + link_index, be_id, "dmic01", + init_dai_link(dev, links + link_id, be_id, "dmic01", 0, 1, // DMIC only supports capture cpus + cpu_id, 1, dmic_component, 1, sof_sdw_dmic_init, NULL); - INC_ID(be_id, cpu_id, link_index); + INC_ID(be_id, cpu_id, link_id); cpus[cpu_id].dai_name = "DMIC16k Pin"; - init_dai_link(dev, links + link_index, be_id, "dmic16k", + init_dai_link(dev, links + link_id, be_id, "dmic16k", 0, 1, // DMIC only supports capture cpus + cpu_id, 1, dmic_component, 1, /* don't call sof_sdw_dmic_init() twice */ NULL, NULL); - INC_ID(be_id, cpu_id, link_index); + INC_ID(be_id, cpu_id, link_id); } HDMI: @@ -1331,12 +1256,12 @@ static int sof_card_dai_links_create(struct device *dev, return -ENOMEM; cpus[cpu_id].dai_name = cpu_name; - init_dai_link(dev, links + link_index, be_id, name, + init_dai_link(dev, links + link_id, be_id, name, 1, 0, // HDMI only supports playback cpus + cpu_id, 1, idisp_components + i, 1, sof_sdw_hdmi_init, NULL); - INC_ID(be_id, cpu_id, link_index); + INC_ID(be_id, cpu_id, link_id); } if (sof_sdw_quirk & SOF_SSP_BT_OFFLOAD_PRESENT) { @@ -1360,7 +1285,7 @@ static int sof_card_dai_links_create(struct device *dev, return -ENOMEM; cpus[cpu_id].dai_name = cpu_name; - init_dai_link(dev, links + link_index, be_id, name, 1, 1, + init_dai_link(dev, links + link_id, be_id, name, 1, 1, cpus + cpu_id, 1, ssp_components, 1, NULL, NULL); } diff --git a/sound/soc/intel/boards/sof_sdw_common.h b/sound/soc/intel/boards/sof_sdw_common.h index e2457738a3..b35f5a9b96 100644 --- a/sound/soc/intel/boards/sof_sdw_common.h +++ b/sound/soc/intel/boards/sof_sdw_common.h @@ -15,7 +15,6 @@ #define MAX_NO_PROPS 2 #define MAX_HDMI_NUM 4 -#define SDW_AMP_DAI_ID 2 #define SDW_DMIC_DAI_ID 4 #define SDW_MAX_CPU_DAIS 16 #define SDW_INTEL_BIDIR_PDI_BASE 2 @@ -43,6 +42,7 @@ enum { #define SOF_SDW_PCH_DMIC BIT(6) #define SOF_SSP_PORT(x) (((x) & GENMASK(5, 0)) << 7) #define SOF_SSP_GET_PORT(quirk) (((quirk) >> 7) & GENMASK(5, 0)) +#define SOF_RT715_DAI_ID_FIX BIT(13) #define SOF_SDW_NO_AGGREGATION BIT(14) /* BT audio offload: reserve 3 bits for future */ @@ -52,14 +52,9 @@ enum { (((quirk) << SOF_BT_OFFLOAD_SSP_SHIFT) & SOF_BT_OFFLOAD_SSP_MASK) #define SOF_SSP_BT_OFFLOAD_PRESENT BIT(18) -#define SOF_SDW_CODEC_TYPE_JACK 0 -#define SOF_SDW_CODEC_TYPE_AMP 1 -#define SOF_SDW_CODEC_TYPE_MIC 2 - struct sof_sdw_codec_info { const int part_id; const int version_id; - const int codec_type; int amp_num; const u8 acpi_id[ACPI_ID_LEN]; const bool direction[2]; // playback & capture support diff --git a/sound/soc/intel/boards/sof_sdw_rt715.c b/sound/soc/intel/boards/sof_sdw_rt715.c index 7c068dc6b9..c8af3780cb 100644 --- a/sound/soc/intel/boards/sof_sdw_rt715.c +++ b/sound/soc/intel/boards/sof_sdw_rt715.c @@ -30,6 +30,13 @@ int sof_sdw_rt715_init(struct snd_soc_card *card, struct sof_sdw_codec_info *info, bool playback) { + /* + * DAI ID is fixed at SDW_DMIC_DAI_ID for 715 to + * keep sdw DMIC and HDMI setting static in UCM + */ + if (sof_sdw_quirk & SOF_RT715_DAI_ID_FIX) + dai_links->id = SDW_DMIC_DAI_ID; + dai_links->init = rt715_rtd_init; return 0; diff --git a/sound/soc/intel/boards/sof_sdw_rt715_sdca.c b/sound/soc/intel/boards/sof_sdw_rt715_sdca.c index ca0cf3db2e..85d3d8c355 100644 --- a/sound/soc/intel/boards/sof_sdw_rt715_sdca.c +++ b/sound/soc/intel/boards/sof_sdw_rt715_sdca.c @@ -30,6 +30,13 @@ int sof_sdw_rt715_sdca_init(struct snd_soc_card *card, struct sof_sdw_codec_info *info, bool playback) { + /* + * DAI ID is fixed at SDW_DMIC_DAI_ID for 715-SDCA to + * keep sdw DMIC and HDMI setting static in UCM + */ + if (sof_sdw_quirk & SOF_RT715_DAI_ID_FIX) + dai_links->id = SDW_DMIC_DAI_ID; + dai_links->init = rt715_sdca_rtd_init; return 0; diff --git a/sound/soc/intel/catpt/pcm.c b/sound/soc/intel/catpt/pcm.c index 939a9b801d..ebb27daeb1 100644 --- a/sound/soc/intel/catpt/pcm.c +++ b/sound/soc/intel/catpt/pcm.c @@ -259,9 +259,9 @@ static enum catpt_channel_config catpt_get_channel_config(u32 num_channels) static int catpt_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { + struct catpt_dev *cdev = dev_get_drvdata(dai->dev); struct catpt_stream_template *template; struct catpt_stream_runtime *stream; - struct catpt_dev *cdev = dev_get_drvdata(dai->dev); struct resource *res; int ret; @@ -306,8 +306,8 @@ static int catpt_dai_startup(struct snd_pcm_substream *substream, static void catpt_dai_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct catpt_stream_runtime *stream; struct catpt_dev *cdev = dev_get_drvdata(dai->dev); + struct catpt_stream_runtime *stream; stream = snd_soc_dai_get_dma_data(dai, substream); @@ -329,9 +329,9 @@ static int catpt_set_dspvol(struct catpt_dev *cdev, u8 stream_id, long *ctlvol); static int catpt_dai_apply_usettings(struct snd_soc_dai *dai, struct catpt_stream_runtime *stream) { + struct catpt_dev *cdev = dev_get_drvdata(dai->dev); struct snd_soc_component *component = dai->component; struct snd_kcontrol *pos; - struct catpt_dev *cdev = dev_get_drvdata(dai->dev); const char *name; int ret; u32 id = stream->info.stream_hw_id; @@ -374,12 +374,12 @@ static int catpt_dai_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) { - struct snd_pcm_runtime *rtm = substream->runtime; - struct snd_dma_buffer *dmab; + struct catpt_dev *cdev = dev_get_drvdata(dai->dev); struct catpt_stream_runtime *stream; struct catpt_audio_format afmt; struct catpt_ring_info rinfo; - struct catpt_dev *cdev = dev_get_drvdata(dai->dev); + struct snd_pcm_runtime *rtm = substream->runtime; + struct snd_dma_buffer *dmab; int ret; stream = snd_soc_dai_get_dma_data(dai, substream); @@ -427,8 +427,8 @@ static int catpt_dai_hw_params(struct snd_pcm_substream *substream, static int catpt_dai_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct catpt_stream_runtime *stream; struct catpt_dev *cdev = dev_get_drvdata(dai->dev); + struct catpt_stream_runtime *stream; stream = snd_soc_dai_get_dma_data(dai, substream); if (!stream->allocated) @@ -444,8 +444,8 @@ static int catpt_dai_hw_free(struct snd_pcm_substream *substream, static int catpt_dai_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct catpt_stream_runtime *stream; struct catpt_dev *cdev = dev_get_drvdata(dai->dev); + struct catpt_stream_runtime *stream; int ret; stream = snd_soc_dai_get_dma_data(dai, substream); @@ -467,9 +467,9 @@ static int catpt_dai_prepare(struct snd_pcm_substream *substream, static int catpt_dai_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { - struct snd_pcm_runtime *runtime = substream->runtime; - struct catpt_stream_runtime *stream; struct catpt_dev *cdev = dev_get_drvdata(dai->dev); + struct catpt_stream_runtime *stream; + struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_uframes_t pos; int ret; @@ -595,8 +595,9 @@ static int catpt_component_open(struct snd_soc_component *component, { struct snd_soc_pcm_runtime *rtm = substream->private_data; - if (!rtm->dai_link->no_pcm) - snd_soc_set_runtime_hwparams(substream, &catpt_pcm_hardware); + if (rtm->dai_link->no_pcm) + return 0; + snd_soc_set_runtime_hwparams(substream, &catpt_pcm_hardware); return 0; } @@ -604,10 +605,10 @@ static snd_pcm_uframes_t catpt_component_pointer(struct snd_soc_component *component, struct snd_pcm_substream *substream) { + struct catpt_dev *cdev = dev_get_drvdata(component->dev); + struct catpt_stream_runtime *stream; struct snd_soc_pcm_runtime *rtm = substream->private_data; struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtm, 0); - struct catpt_stream_runtime *stream; - struct catpt_dev *cdev = dev_get_drvdata(component->dev); u32 pos; if (rtm->dai_link->no_pcm) @@ -632,8 +633,8 @@ static int catpt_dai_pcm_new(struct snd_soc_pcm_runtime *rtm, struct snd_soc_dai *dai) { struct snd_soc_dai *codec_dai = asoc_rtd_to_codec(rtm, 0); - struct catpt_ssp_device_format devfmt; struct catpt_dev *cdev = dev_get_drvdata(dai->dev); + struct catpt_ssp_device_format devfmt; int ret; devfmt.iface = dai->driver->id; @@ -893,8 +894,8 @@ static int catpt_stream_volume_get(struct snd_kcontrol *kcontrol, { struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); - struct catpt_stream_runtime *stream; struct catpt_dev *cdev = dev_get_drvdata(component->dev); + struct catpt_stream_runtime *stream; long *ctlvol = (long *)kcontrol->private_value; u32 dspvol; int i; @@ -925,8 +926,8 @@ static int catpt_stream_volume_put(struct snd_kcontrol *kcontrol, { struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); - struct catpt_stream_runtime *stream; struct catpt_dev *cdev = dev_get_drvdata(component->dev); + struct catpt_stream_runtime *stream; long *ctlvol = (long *)kcontrol->private_value; int ret, i; @@ -1001,8 +1002,8 @@ static int catpt_loopback_switch_put(struct snd_kcontrol *kcontrol, { struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); - struct catpt_stream_runtime *stream; struct catpt_dev *cdev = dev_get_drvdata(component->dev); + struct catpt_stream_runtime *stream; bool mute; int ret; diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c index f32bcb2b2e..a0f6a69c70 100644 --- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c @@ -74,15 +74,6 @@ static const struct snd_soc_acpi_adr_device rt711_sdca_0_adr[] = { } }; -static const struct snd_soc_acpi_adr_device rt711_sdca_2_adr[] = { - { - .adr = 0x000230025D071101ull, - .num_endpoints = 1, - .endpoints = &single_endpoint, - .name_prefix = "rt711" - } -}; - static const struct snd_soc_acpi_adr_device rt1316_1_group1_adr[] = { { .adr = 0x000131025D131601ull, /* unique ID is set for some reason */ @@ -110,24 +101,6 @@ static const struct snd_soc_acpi_adr_device rt1316_3_group1_adr[] = { } }; -static const struct snd_soc_acpi_adr_device rt1316_0_group2_adr[] = { - { - .adr = 0x000031025D131601ull, - .num_endpoints = 1, - .endpoints = &spk_l_endpoint, - .name_prefix = "rt1316-1" - } -}; - -static const struct snd_soc_acpi_adr_device rt1316_1_group2_adr[] = { - { - .adr = 0x000130025D131601ull, - .num_endpoints = 1, - .endpoints = &spk_r_endpoint, - .name_prefix = "rt1316-2" - } -}; - static const struct snd_soc_acpi_adr_device rt1316_2_single_adr[] = { { .adr = 0x000230025D131601ull, @@ -236,63 +209,6 @@ static const struct snd_soc_acpi_link_adr adl_sdca_3_in_1[] = { {} }; -static const struct snd_soc_acpi_link_adr adl_sdw_rt711_link2_rt1316_link01_rt714_link3[] = { - { - .mask = BIT(2), - .num_adr = ARRAY_SIZE(rt711_sdca_2_adr), - .adr_d = rt711_sdca_2_adr, - }, - { - .mask = BIT(0), - .num_adr = ARRAY_SIZE(rt1316_0_group2_adr), - .adr_d = rt1316_0_group2_adr, - }, - { - .mask = BIT(1), - .num_adr = ARRAY_SIZE(rt1316_1_group2_adr), - .adr_d = rt1316_1_group2_adr, - }, - { - .mask = BIT(3), - .num_adr = ARRAY_SIZE(rt714_3_adr), - .adr_d = rt714_3_adr, - }, - {} -}; - -static const struct snd_soc_acpi_link_adr adl_sdw_rt1316_link12_rt714_link0[] = { - { - .mask = BIT(1), - .num_adr = ARRAY_SIZE(rt1316_1_group1_adr), - .adr_d = rt1316_1_group1_adr, - }, - { - .mask = BIT(2), - .num_adr = ARRAY_SIZE(rt1316_2_group1_adr), - .adr_d = rt1316_2_group1_adr, - }, - { - .mask = BIT(0), - .num_adr = ARRAY_SIZE(rt714_0_adr), - .adr_d = rt714_0_adr, - }, - {} -}; - -static const struct snd_soc_acpi_link_adr adl_sdw_rt1316_link2_rt714_link3[] = { - { - .mask = BIT(2), - .num_adr = ARRAY_SIZE(rt1316_2_single_adr), - .adr_d = rt1316_2_single_adr, - }, - { - .mask = BIT(3), - .num_adr = ARRAY_SIZE(rt714_3_adr), - .adr_d = rt714_3_adr, - }, - {} -}; - static const struct snd_soc_acpi_link_adr adl_sdw_rt1316_link2_rt714_link0[] = { { .mask = BIT(2), @@ -364,29 +280,9 @@ static const struct snd_soc_acpi_codecs adl_max98357a_amp = { .codecs = {"MX98357A"} }; -static const struct snd_soc_acpi_codecs adl_max98360a_amp = { - .num_codecs = 1, - .codecs = {"MX98360A"} -}; - -static const struct snd_soc_acpi_codecs adl_rt5682_rt5682s_hp = { - .num_codecs = 2, - .codecs = {"10EC5682", "RTL5682"}, -}; - -static const struct snd_soc_acpi_codecs adl_rt1019p_amp = { - .num_codecs = 1, - .codecs = {"RTL1019"} -}; - -static const struct snd_soc_acpi_codecs adl_max98390_amp = { - .num_codecs = 1, - .codecs = {"MX98390"} -}; - struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { { - .comp_ids = &adl_rt5682_rt5682s_hp, + .id = "10EC5682", .drv_name = "adl_mx98373_rt5682", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_max98373_amp, @@ -394,59 +290,13 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { .sof_tplg_filename = "sof-adl-max98373-rt5682.tplg", }, { - .comp_ids = &adl_rt5682_rt5682s_hp, - .drv_name = "adl_mx98357_rt5682", + .id = "10EC5682", + .drv_name = "adl_mx98357a_rt5682", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_max98357a_amp, .sof_fw_filename = "sof-adl.ri", .sof_tplg_filename = "sof-adl-max98357a-rt5682.tplg", }, - { - .comp_ids = &adl_rt5682_rt5682s_hp, - .drv_name = "adl_mx98360_rt5682", - .machine_quirk = snd_soc_acpi_codec_list, - .quirk_data = &adl_max98360a_amp, - .sof_fw_filename = "sof-adl.ri", - .sof_tplg_filename = "sof-adl-max98360a-rt5682.tplg", - }, - { - .id = "10508825", - .drv_name = "adl_rt1019p_nau8825", - .machine_quirk = snd_soc_acpi_codec_list, - .quirk_data = &adl_rt1019p_amp, - .sof_fw_filename = "sof-adl.ri", - .sof_tplg_filename = "sof-adl-rt1019-nau8825.tplg", - }, - { - .id = "10508825", - .drv_name = "adl_max98373_nau8825", - .machine_quirk = snd_soc_acpi_codec_list, - .quirk_data = &adl_max98373_amp, - .sof_fw_filename = "sof-adl.ri", - .sof_tplg_filename = "sof-adl-max98373-nau8825.tplg", - }, - { - .id = "10508825", - .drv_name = "adl_mx98360a_nau8825", - .machine_quirk = snd_soc_acpi_codec_list, - .quirk_data = &adl_max98360a_amp, - .sof_fw_filename = "sof-adl.ri", - .sof_tplg_filename = "sof-adl-mx98360a-nau8825.tplg", - }, - { - .id = "10508825", - .drv_name = "sof_nau8825", - .sof_fw_filename = "sof-adl.ri", - .sof_tplg_filename = "sof-adl-nau8825.tplg", - }, - { - .comp_ids = &adl_rt5682_rt5682s_hp, - .drv_name = "adl_max98390_rt5682", - .machine_quirk = snd_soc_acpi_codec_list, - .quirk_data = &adl_max98390_amp, - .sof_fw_filename = "sof-adl.ri", - .sof_tplg_filename = "sof-adl-max98390-rt5682.tplg", - }, {}, }; EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_adl_machines); @@ -471,27 +321,6 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_sdw_machines[] = { .drv_name = "sof_sdw", .sof_tplg_filename = "sof-adl-rt711-l0-rt1316-l13-rt714-l2.tplg", }, - { - .link_mask = 0xF, /* 4 active links required */ - .links = adl_sdw_rt711_link2_rt1316_link01_rt714_link3, - .drv_name = "sof_sdw", - .sof_fw_filename = "sof-adl.ri", - .sof_tplg_filename = "sof-adl-rt711-l2-rt1316-l01-rt714-l3.tplg", - }, - { - .link_mask = 0xC, /* rt1316 on link2 & rt714 on link3 */ - .links = adl_sdw_rt1316_link2_rt714_link3, - .drv_name = "sof_sdw", - .sof_fw_filename = "sof-adl.ri", - .sof_tplg_filename = "sof-adl-rt1316-l2-mono-rt714-l3.tplg", - }, - { - .link_mask = 0x7, /* rt714 on link0 & two rt1316s on link1 and link2 */ - .links = adl_sdw_rt1316_link12_rt714_link0, - .drv_name = "sof_sdw", - .sof_fw_filename = "sof-adl.ri", - .sof_tplg_filename = "sof-adl-rt1316-l12-rt714-l0.tplg", - }, { .link_mask = 0x5, /* 2 active links required */ .links = adl_sdw_rt1316_link2_rt714_link0, diff --git a/sound/soc/intel/common/soc-acpi-intel-bxt-match.c b/sound/soc/intel/common/soc-acpi-intel-bxt-match.c index 342d340522..576407b5da 100644 --- a/sound/soc/intel/common/soc-acpi-intel-bxt-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-bxt-match.c @@ -41,7 +41,7 @@ static struct snd_soc_acpi_mach *apl_quirk(void *arg) return mach; } -static const struct snd_soc_acpi_codecs bxt_codecs = { +static struct snd_soc_acpi_codecs bxt_codecs = { .num_codecs = 1, .codecs = {"MX98357A"} }; @@ -82,12 +82,6 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_bxt_machines[] = { .sof_fw_filename = "sof-apl.ri", .sof_tplg_filename = "sof-apl-tdf8532.tplg", }, - { - .id = "ESSX8336", - .drv_name = "sof-essx8336", - .sof_fw_filename = "sof-apl.ri", - .sof_tplg_filename = "sof-apl-es8336.tplg", - }, {}, }; EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_bxt_machines); diff --git a/sound/soc/intel/common/soc-acpi-intel-byt-match.c b/sound/soc/intel/common/soc-acpi-intel-byt-match.c index 1420009918..510a5f38b7 100644 --- a/sound/soc/intel/common/soc-acpi-intel-byt-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-byt-match.c @@ -120,29 +120,9 @@ static struct snd_soc_acpi_mach *byt_quirk(void *arg) } } -static const struct snd_soc_acpi_codecs rt5640_comp_ids = { - .num_codecs = 3, - .codecs = { "10EC5640", "10EC5642", "INTCCFFD"}, -}; - -static const struct snd_soc_acpi_codecs wm5102_comp_ids = { - .num_codecs = 2, - .codecs = { "WM510204", "WM510205"}, -}; - -static const struct snd_soc_acpi_codecs da7213_comp_ids = { - .num_codecs = 2, - .codecs = { "DGLS7212", "DGLS7213"}, -}; - -static const struct snd_soc_acpi_codecs rt5645_comp_ids = { - .num_codecs = 2, - .codecs = { "10EC5645", "10EC5648"}, -}; - struct snd_soc_acpi_mach snd_soc_acpi_intel_baytrail_machines[] = { { - .comp_ids = &rt5640_comp_ids, + .id = "10EC5640", .drv_name = "bytcr_rt5640", .fw_filename = "intel/fw_sst_0f28.bin", .board = "bytcr_rt5640", @@ -150,6 +130,22 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_baytrail_machines[] = { .sof_fw_filename = "sof-byt.ri", .sof_tplg_filename = "sof-byt-rt5640.tplg", }, + { + .id = "10EC5642", + .drv_name = "bytcr_rt5640", + .fw_filename = "intel/fw_sst_0f28.bin", + .board = "bytcr_rt5640", + .sof_fw_filename = "sof-byt.ri", + .sof_tplg_filename = "sof-byt-rt5640.tplg", + }, + { + .id = "INTCCFFD", + .drv_name = "bytcr_rt5640", + .fw_filename = "intel/fw_sst_0f28.bin", + .board = "bytcr_rt5640", + .sof_fw_filename = "sof-byt.ri", + .sof_tplg_filename = "sof-byt-rt5640.tplg", + }, { .id = "10EC5651", .drv_name = "bytcr_rt5651", @@ -159,7 +155,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_baytrail_machines[] = { .sof_tplg_filename = "sof-byt-rt5651.tplg", }, { - .comp_ids = &wm5102_comp_ids, + .id = "WM510204", .drv_name = "bytcr_wm5102", .fw_filename = "intel/fw_sst_0f28.bin", .board = "bytcr_wm5102", @@ -167,7 +163,23 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_baytrail_machines[] = { .sof_tplg_filename = "sof-byt-wm5102.tplg", }, { - .comp_ids = &da7213_comp_ids, + .id = "WM510205", + .drv_name = "bytcr_wm5102", + .fw_filename = "intel/fw_sst_0f28.bin", + .board = "bytcr_wm5102", + .sof_fw_filename = "sof-byt.ri", + .sof_tplg_filename = "sof-byt-wm5102.tplg", + }, + { + .id = "DLGS7212", + .drv_name = "bytcht_da7213", + .fw_filename = "intel/fw_sst_0f28.bin", + .board = "bytcht_da7213", + .sof_fw_filename = "sof-byt.ri", + .sof_tplg_filename = "sof-byt-da7213.tplg", + }, + { + .id = "DLGS7213", .drv_name = "bytcht_da7213", .fw_filename = "intel/fw_sst_0f28.bin", .board = "bytcht_da7213", @@ -190,7 +202,15 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_baytrail_machines[] = { }, /* some Baytrail platforms rely on RT5645, use CHT machine driver */ { - .comp_ids = &rt5645_comp_ids, + .id = "10EC5645", + .drv_name = "cht-bsw-rt5645", + .fw_filename = "intel/fw_sst_0f28.bin", + .board = "cht-bsw", + .sof_fw_filename = "sof-byt.ri", + .sof_tplg_filename = "sof-byt-rt5645.tplg", + }, + { + .id = "10EC5648", .drv_name = "cht-bsw-rt5645", .fw_filename = "intel/fw_sst_0f28.bin", .board = "cht-bsw", diff --git a/sound/soc/intel/common/soc-acpi-intel-cht-match.c b/sound/soc/intel/common/soc-acpi-intel-cht-match.c index c60a5e8e7b..227424236f 100644 --- a/sound/soc/intel/common/soc-acpi-intel-cht-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-cht-match.c @@ -51,31 +51,10 @@ static struct snd_soc_acpi_mach *cht_quirk(void *arg) return mach; } -static const struct snd_soc_acpi_codecs rt5640_comp_ids = { - .num_codecs = 2, - .codecs = { "10EC5640", "10EC3276" }, -}; - -static const struct snd_soc_acpi_codecs rt5670_comp_ids = { - .num_codecs = 2, - .codecs = { "10EC5670", "10EC5672" }, -}; - -static const struct snd_soc_acpi_codecs rt5645_comp_ids = { - .num_codecs = 3, - .codecs = { "10EC5645", "10EC5650", "10EC3270" }, -}; - -static const struct snd_soc_acpi_codecs da7213_comp_ids = { - .num_codecs = 2, - .codecs = { "DGLS7212", "DGLS7213"}, - -}; - /* Cherryview-based platforms: CherryTrail and Braswell */ struct snd_soc_acpi_mach snd_soc_acpi_intel_cherrytrail_machines[] = { { - .comp_ids = &rt5670_comp_ids, + .id = "10EC5670", .drv_name = "cht-bsw-rt5672", .fw_filename = "intel/fw_sst_22a8.bin", .board = "cht-bsw", @@ -83,7 +62,31 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_cherrytrail_machines[] = { .sof_tplg_filename = "sof-cht-rt5670.tplg", }, { - .comp_ids = &rt5645_comp_ids, + .id = "10EC5672", + .drv_name = "cht-bsw-rt5672", + .fw_filename = "intel/fw_sst_22a8.bin", + .board = "cht-bsw", + .sof_fw_filename = "sof-cht.ri", + .sof_tplg_filename = "sof-cht-rt5670.tplg", + }, + { + .id = "10EC5645", + .drv_name = "cht-bsw-rt5645", + .fw_filename = "intel/fw_sst_22a8.bin", + .board = "cht-bsw", + .sof_fw_filename = "sof-cht.ri", + .sof_tplg_filename = "sof-cht-rt5645.tplg", + }, + { + .id = "10EC5650", + .drv_name = "cht-bsw-rt5645", + .fw_filename = "intel/fw_sst_22a8.bin", + .board = "cht-bsw", + .sof_fw_filename = "sof-cht.ri", + .sof_tplg_filename = "sof-cht-rt5645.tplg", + }, + { + .id = "10EC3270", .drv_name = "cht-bsw-rt5645", .fw_filename = "intel/fw_sst_22a8.bin", .board = "cht-bsw", @@ -107,7 +110,15 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_cherrytrail_machines[] = { .sof_tplg_filename = "sof-cht-nau8824.tplg", }, { - .comp_ids = &da7213_comp_ids, + .id = "DLGS7212", + .drv_name = "bytcht_da7213", + .fw_filename = "intel/fw_sst_22a8.bin", + .board = "bytcht_da7213", + .sof_fw_filename = "sof-cht.ri", + .sof_tplg_filename = "sof-cht-da7213.tplg", + }, + { + .id = "DLGS7213", .drv_name = "bytcht_da7213", .fw_filename = "intel/fw_sst_22a8.bin", .board = "bytcht_da7213", @@ -124,7 +135,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_cherrytrail_machines[] = { }, /* some CHT-T platforms rely on RT5640, use Baytrail machine driver */ { - .comp_ids = &rt5640_comp_ids, + .id = "10EC5640", .drv_name = "bytcr_rt5640", .fw_filename = "intel/fw_sst_22a8.bin", .board = "bytcr_rt5640", @@ -132,6 +143,14 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_cherrytrail_machines[] = { .sof_fw_filename = "sof-cht.ri", .sof_tplg_filename = "sof-cht-rt5640.tplg", }, + { + .id = "10EC3276", + .drv_name = "bytcr_rt5640", + .fw_filename = "intel/fw_sst_22a8.bin", + .board = "bytcr_rt5640", + .sof_fw_filename = "sof-cht.ri", + .sof_tplg_filename = "sof-cht-rt5640.tplg", + }, { .id = "10EC5682", .drv_name = "sof_rt5682", diff --git a/sound/soc/intel/common/soc-acpi-intel-cml-match.c b/sound/soc/intel/common/soc-acpi-intel-cml-match.c index 4eebc79d4b..b591c6fd13 100644 --- a/sound/soc/intel/common/soc-acpi-intel-cml-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-cml-match.c @@ -9,22 +9,22 @@ #include #include -static const struct snd_soc_acpi_codecs rt1011_spk_codecs = { +static struct snd_soc_acpi_codecs rt1011_spk_codecs = { .num_codecs = 1, .codecs = {"10EC1011"} }; -static const struct snd_soc_acpi_codecs rt1015_spk_codecs = { +static struct snd_soc_acpi_codecs rt1015_spk_codecs = { .num_codecs = 1, .codecs = {"10EC1015"} }; -static const struct snd_soc_acpi_codecs max98357a_spk_codecs = { +static struct snd_soc_acpi_codecs max98357a_spk_codecs = { .num_codecs = 1, .codecs = {"MX98357A"} }; -static const struct snd_soc_acpi_codecs max98390_spk_codecs = { +static struct snd_soc_acpi_codecs max98390_spk_codecs = { .num_codecs = 1, .codecs = {"MX98390"} }; @@ -81,12 +81,6 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_cml_machines[] = { .sof_fw_filename = "sof-cml.ri", .sof_tplg_filename = "sof-cml-da7219-max98390.tplg", }, - { - .id = "ESSX8336", - .drv_name = "sof-essx8336", - .sof_fw_filename = "sof-cml.ri", - .sof_tplg_filename = "sof-cml-es8336.tplg", - }, {}, }; EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_cml_machines); diff --git a/sound/soc/intel/common/soc-acpi-intel-glk-match.c b/sound/soc/intel/common/soc-acpi-intel-glk-match.c index 8492b7e2a9..da1e151190 100644 --- a/sound/soc/intel/common/soc-acpi-intel-glk-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-glk-match.c @@ -9,7 +9,7 @@ #include #include -static const struct snd_soc_acpi_codecs glk_codecs = { +static struct snd_soc_acpi_codecs glk_codecs = { .num_codecs = 1, .codecs = {"MX98357A"} }; @@ -40,14 +40,6 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_glk_machines[] = { .sof_fw_filename = "sof-glk.ri", .sof_tplg_filename = "sof-glk-rt5682.tplg", }, - { - .id = "RTL5682", - .drv_name = "glk_rt5682_max98357a", - .machine_quirk = snd_soc_acpi_codec_list, - .quirk_data = &glk_codecs, - .sof_fw_filename = "sof-glk.ri", - .sof_tplg_filename = "sof-glk-rt5682.tplg", - }, { .id = "10134242", .drv_name = "glk_cs4242_mx98357a", @@ -57,12 +49,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_glk_machines[] = { .sof_fw_filename = "sof-glk.ri", .sof_tplg_filename = "sof-glk-cs42l42.tplg", }, - { - .id = "ESSX8336", - .drv_name = "sof-essx8336", - .sof_fw_filename = "sof-glk.ri", - .sof_tplg_filename = "sof-glk-es8336.tplg", - }, + {}, }; EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_glk_machines); diff --git a/sound/soc/intel/common/soc-acpi-intel-jsl-match.c b/sound/soc/intel/common/soc-acpi-intel-jsl-match.c index 278ec196da..69ff7286d3 100644 --- a/sound/soc/intel/common/soc-acpi-intel-jsl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-jsl-match.c @@ -9,31 +9,26 @@ #include #include -static const struct snd_soc_acpi_codecs jsl_7219_98373_codecs = { +static struct snd_soc_acpi_codecs jsl_7219_98373_codecs = { .num_codecs = 1, .codecs = {"MX98373"} }; -static const struct snd_soc_acpi_codecs rt1015_spk = { +static struct snd_soc_acpi_codecs rt1015_spk = { .num_codecs = 1, .codecs = {"10EC1015"} }; -static const struct snd_soc_acpi_codecs rt1015p_spk = { +static struct snd_soc_acpi_codecs rt1015p_spk = { .num_codecs = 1, .codecs = {"RTL1015"} }; -static const struct snd_soc_acpi_codecs mx98360a_spk = { +static struct snd_soc_acpi_codecs mx98360a_spk = { .num_codecs = 1, .codecs = {"MX98360A"} }; -static const struct snd_soc_acpi_codecs rt5682_rt5682s_hp = { - .num_codecs = 2, - .codecs = {"10EC5682", "RTL5682"}, -}; - /* * When adding new entry to the snd_soc_acpi_intel_jsl_machines array, * use .quirk_data member to distinguish different machine driver, @@ -55,7 +50,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_jsl_machines[] = { .sof_tplg_filename = "sof-jsl-da7219-mx98360a.tplg", }, { - .comp_ids = &rt5682_rt5682s_hp, + .id = "10EC5682", .drv_name = "jsl_rt5682_rt1015", .sof_fw_filename = "sof-jsl.ri", .machine_quirk = snd_soc_acpi_codec_list, @@ -63,7 +58,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_jsl_machines[] = { .sof_tplg_filename = "sof-jsl-rt5682-rt1015.tplg", }, { - .comp_ids = &rt5682_rt5682s_hp, + .id = "10EC5682", .drv_name = "jsl_rt5682_rt1015p", .sof_fw_filename = "sof-jsl.ri", .machine_quirk = snd_soc_acpi_codec_list, @@ -71,8 +66,8 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_jsl_machines[] = { .sof_tplg_filename = "sof-jsl-rt5682-rt1015.tplg", }, { - .comp_ids = &rt5682_rt5682s_hp, - .drv_name = "jsl_rt5682_mx98360", + .id = "10EC5682", + .drv_name = "jsl_rt5682_mx98360a", .sof_fw_filename = "sof-jsl.ri", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &mx98360a_spk, @@ -86,12 +81,6 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_jsl_machines[] = { .quirk_data = &mx98360a_spk, .sof_tplg_filename = "sof-jsl-cs42l42-mx98360a.tplg", }, - { - .id = "ESSX8336", - .drv_name = "sof-essx8336", - .sof_fw_filename = "sof-jsl.ri", - .sof_tplg_filename = "sof-jsl-es8336.tplg", - }, {}, }; EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_jsl_machines); diff --git a/sound/soc/intel/common/soc-acpi-intel-kbl-match.c b/sound/soc/intel/common/soc-acpi-intel-kbl-match.c index 4e817f559d..741bf2f9e0 100644 --- a/sound/soc/intel/common/soc-acpi-intel-kbl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-kbl-match.c @@ -12,32 +12,32 @@ static struct skl_machine_pdata skl_dmic_data; -static const struct snd_soc_acpi_codecs kbl_codecs = { +static struct snd_soc_acpi_codecs kbl_codecs = { .num_codecs = 1, .codecs = {"10508825"} }; -static const struct snd_soc_acpi_codecs kbl_poppy_codecs = { +static struct snd_soc_acpi_codecs kbl_poppy_codecs = { .num_codecs = 1, .codecs = {"10EC5663"} }; -static const struct snd_soc_acpi_codecs kbl_5663_5514_codecs = { +static struct snd_soc_acpi_codecs kbl_5663_5514_codecs = { .num_codecs = 2, .codecs = {"10EC5663", "10EC5514"} }; -static const struct snd_soc_acpi_codecs kbl_7219_98357_codecs = { +static struct snd_soc_acpi_codecs kbl_7219_98357_codecs = { .num_codecs = 1, .codecs = {"MX98357A"} }; -static const struct snd_soc_acpi_codecs kbl_7219_98927_codecs = { +static struct snd_soc_acpi_codecs kbl_7219_98927_codecs = { .num_codecs = 1, .codecs = {"MX98927"} }; -static const struct snd_soc_acpi_codecs kbl_7219_98373_codecs = { +static struct snd_soc_acpi_codecs kbl_7219_98373_codecs = { .num_codecs = 1, .codecs = {"MX98373"} }; diff --git a/sound/soc/intel/common/soc-acpi-intel-skl-match.c b/sound/soc/intel/common/soc-acpi-intel-skl-match.c index 75302e9567..961df8d6b5 100644 --- a/sound/soc/intel/common/soc-acpi-intel-skl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-skl-match.c @@ -12,7 +12,7 @@ static struct skl_machine_pdata skl_dmic_data; -static const struct snd_soc_acpi_codecs skl_codecs = { +static struct snd_soc_acpi_codecs skl_codecs = { .num_codecs = 1, .codecs = {"10508825"} }; diff --git a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c index da31bb3cca..11801b905e 100644 --- a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c @@ -358,22 +358,17 @@ static const struct snd_soc_acpi_codecs tgl_rt1011_amp = { .codecs = {"10EC1011"} }; -static const struct snd_soc_acpi_codecs tgl_rt5682_rt5682s_hp = { - .num_codecs = 2, - .codecs = {"10EC5682", "RTL5682"}, -}; - struct snd_soc_acpi_mach snd_soc_acpi_intel_tgl_machines[] = { { - .comp_ids = &tgl_rt5682_rt5682s_hp, - .drv_name = "tgl_mx98357_rt5682", + .id = "10EC5682", + .drv_name = "tgl_mx98357a_rt5682", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &tgl_codecs, .sof_fw_filename = "sof-tgl.ri", .sof_tplg_filename = "sof-tgl-max98357a-rt5682.tplg", }, { - .comp_ids = &tgl_rt5682_rt5682s_hp, + .id = "10EC5682", .drv_name = "tgl_mx98373_rt5682", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &tgl_max98373_amp, @@ -381,19 +376,13 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_tgl_machines[] = { .sof_tplg_filename = "sof-tgl-max98373-rt5682.tplg", }, { - .comp_ids = &tgl_rt5682_rt5682s_hp, + .id = "10EC5682", .drv_name = "tgl_rt1011_rt5682", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &tgl_rt1011_amp, .sof_fw_filename = "sof-tgl.ri", .sof_tplg_filename = "sof-tgl-rt1011-rt5682.tplg", }, - { - .id = "ESSX8336", - .drv_name = "sof-essx8336", - .sof_fw_filename = "sof-tgl.ri", - .sof_tplg_filename = "sof-tgl-es8336.tplg", - }, {}, }; EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_tgl_machines); diff --git a/sound/soc/intel/common/soc-intel-quirks.h b/sound/soc/intel/common/soc-intel-quirks.h index de4e550c5b..a93987ab7f 100644 --- a/sound/soc/intel/common/soc-intel-quirks.h +++ b/sound/soc/intel/common/soc-intel-quirks.h @@ -9,13 +9,34 @@ #ifndef _SND_SOC_INTEL_QUIRKS_H #define _SND_SOC_INTEL_QUIRKS_H -#include - #if IS_ENABLED(CONFIG_X86) #include +#include +#include #include +#define SOC_INTEL_IS_CPU(soc, type) \ +static inline bool soc_intel_is_##soc(void) \ +{ \ + static const struct x86_cpu_id soc##_cpu_ids[] = { \ + X86_MATCH_INTEL_FAM6_MODEL(type, NULL), \ + {} \ + }; \ + const struct x86_cpu_id *id; \ + \ + id = x86_match_cpu(soc##_cpu_ids); \ + if (id) \ + return true; \ + return false; \ +} + +SOC_INTEL_IS_CPU(byt, ATOM_SILVERMONT); +SOC_INTEL_IS_CPU(cht, ATOM_AIRMONT); +SOC_INTEL_IS_CPU(apl, ATOM_GOLDMONT); +SOC_INTEL_IS_CPU(glk, ATOM_GOLDMONT_PLUS); +SOC_INTEL_IS_CPU(cml, KABYLAKE_L); + static inline bool soc_intel_is_byt_cr(struct platform_device *pdev) { /* @@ -93,6 +114,30 @@ static inline bool soc_intel_is_byt_cr(struct platform_device *pdev) return false; } +static inline bool soc_intel_is_byt(void) +{ + return false; +} + +static inline bool soc_intel_is_cht(void) +{ + return false; +} + +static inline bool soc_intel_is_apl(void) +{ + return false; +} + +static inline bool soc_intel_is_glk(void) +{ + return false; +} + +static inline bool soc_intel_is_cml(void) +{ + return false; +} #endif -#endif /* _SND_SOC_INTEL_QUIRKS_H */ + #endif /* _SND_SOC_INTEL_QUIRKS_H */ diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index 2439a574ac..64226072f0 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -13,6 +13,108 @@ #include "skl.h" #include "skl-i2s.h" +static struct nhlt_specific_cfg *skl_get_specific_cfg( + struct device *dev, struct nhlt_fmt *fmt, + u8 no_ch, u32 rate, u16 bps, u8 linktype) +{ + struct nhlt_specific_cfg *sp_config; + struct wav_fmt *wfmt; + struct nhlt_fmt_cfg *fmt_config = fmt->fmt_config; + int i; + + dev_dbg(dev, "Format count =%d\n", fmt->fmt_count); + + for (i = 0; i < fmt->fmt_count; i++) { + wfmt = &fmt_config->fmt_ext.fmt; + dev_dbg(dev, "ch=%d fmt=%d s_rate=%d\n", wfmt->channels, + wfmt->bits_per_sample, wfmt->samples_per_sec); + if (wfmt->channels == no_ch && wfmt->bits_per_sample == bps) { + /* + * if link type is dmic ignore rate check as the blob is + * generic for all rates + */ + sp_config = &fmt_config->config; + if (linktype == NHLT_LINK_DMIC) + return sp_config; + + if (wfmt->samples_per_sec == rate) + return sp_config; + } + + fmt_config = (struct nhlt_fmt_cfg *)(fmt_config->config.caps + + fmt_config->config.size); + } + + return NULL; +} + +static void dump_config(struct device *dev, u32 instance_id, u8 linktype, + u8 s_fmt, u8 num_channels, u32 s_rate, u8 dirn, u16 bps) +{ + dev_dbg(dev, "Input configuration\n"); + dev_dbg(dev, "ch=%d fmt=%d s_rate=%d\n", num_channels, s_fmt, s_rate); + dev_dbg(dev, "vbus_id=%d link_type=%d\n", instance_id, linktype); + dev_dbg(dev, "bits_per_sample=%d\n", bps); +} + +static bool skl_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt, + u32 instance_id, u8 link_type, u8 dirn, u8 dev_type) +{ + dev_dbg(dev, "vbus_id=%d link_type=%d dir=%d dev_type = %d\n", + epnt->virtual_bus_id, epnt->linktype, + epnt->direction, epnt->device_type); + + if ((epnt->virtual_bus_id == instance_id) && + (epnt->linktype == link_type) && + (epnt->direction == dirn)) { + /* do not check dev_type for DMIC link type */ + if (epnt->linktype == NHLT_LINK_DMIC) + return true; + + if (epnt->device_type == dev_type) + return true; + } + + return false; +} + +struct nhlt_specific_cfg +*skl_get_ep_blob(struct skl_dev *skl, u32 instance, u8 link_type, + u8 s_fmt, u8 num_ch, u32 s_rate, + u8 dirn, u8 dev_type) +{ + struct nhlt_fmt *fmt; + struct nhlt_endpoint *epnt; + struct hdac_bus *bus = skl_to_bus(skl); + struct device *dev = bus->dev; + struct nhlt_specific_cfg *sp_config; + struct nhlt_acpi_table *nhlt = skl->nhlt; + u16 bps = (s_fmt == 16) ? 16 : 32; + u8 j; + + dump_config(dev, instance, link_type, s_fmt, num_ch, s_rate, dirn, bps); + + epnt = (struct nhlt_endpoint *)nhlt->desc; + + dev_dbg(dev, "endpoint count =%d\n", nhlt->endpoint_count); + + for (j = 0; j < nhlt->endpoint_count; j++) { + if (skl_check_ep_match(dev, epnt, instance, link_type, + dirn, dev_type)) { + fmt = (struct nhlt_fmt *)(epnt->config.caps + + epnt->config.size); + sp_config = skl_get_specific_cfg(dev, fmt, num_ch, + s_rate, bps, link_type); + if (sp_config) + return sp_config; + } + + epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length); + } + + return NULL; +} + static void skl_nhlt_trim_space(char *trim) { char *s = trim; diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index 55f310e91b..e4aa366d35 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -317,7 +317,6 @@ static int skl_pcm_hw_params(struct snd_pcm_substream *substream, dev_dbg(dai->dev, "dma_id=%d\n", dma_id); p_params.s_fmt = snd_pcm_format_width(params_format(params)); - p_params.s_cont = snd_pcm_format_physical_width(params_format(params)); p_params.ch = params_channels(params); p_params.s_freq = params_rate(params); p_params.host_dma_id = dma_id; @@ -406,7 +405,6 @@ static int skl_be_hw_params(struct snd_pcm_substream *substream, struct skl_pipe_params p_params = {0}; p_params.s_fmt = snd_pcm_format_width(params_format(params)); - p_params.s_cont = snd_pcm_format_physical_width(params_format(params)); p_params.ch = params_channels(params); p_params.s_freq = params_rate(params); p_params.stream = substream->stream; @@ -564,11 +562,13 @@ static int skl_link_hw_params(struct snd_pcm_substream *substream, stream_tag = hdac_stream(link_dev)->stream_tag; - /* set the hdac_stream in the codec dai */ - snd_soc_dai_set_stream(codec_dai, hdac_stream(link_dev), substream->stream); + /* set the stream tag in the codec dai dma params */ + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + snd_soc_dai_set_tdm_slot(codec_dai, stream_tag, 0, 0, 0); + else + snd_soc_dai_set_tdm_slot(codec_dai, 0, stream_tag, 0, 0); p_params.s_fmt = snd_pcm_format_width(params_format(params)); - p_params.s_cont = snd_pcm_format_physical_width(params_format(params)); p_params.ch = params_channels(params); p_params.s_freq = params_rate(params); p_params.stream = substream->stream; diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index 9bdf020a2b..b036852d68 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -285,7 +285,7 @@ static int skl_tplg_update_be_blob(struct snd_soc_dapm_widget *w, { struct skl_module_cfg *m_cfg = w->priv; int link_type, dir; - u32 ch, s_freq, s_fmt, s_cont; + u32 ch, s_freq, s_fmt; struct nhlt_specific_cfg *cfg; u8 dev_type = skl_tplg_be_dev_type(m_cfg->dev_type); int fmt_idx = m_cfg->fmt_idx; @@ -301,8 +301,7 @@ static int skl_tplg_update_be_blob(struct snd_soc_dapm_widget *w, link_type = NHLT_LINK_DMIC; dir = SNDRV_PCM_STREAM_CAPTURE; s_freq = m_iface->inputs[0].fmt.s_freq; - s_fmt = m_iface->inputs[0].fmt.valid_bit_depth; - s_cont = m_iface->inputs[0].fmt.bit_depth; + s_fmt = m_iface->inputs[0].fmt.bit_depth; ch = m_iface->inputs[0].fmt.channels; break; @@ -311,14 +310,12 @@ static int skl_tplg_update_be_blob(struct snd_soc_dapm_widget *w, if (m_cfg->hw_conn_type == SKL_CONN_SOURCE) { dir = SNDRV_PCM_STREAM_PLAYBACK; s_freq = m_iface->outputs[0].fmt.s_freq; - s_fmt = m_iface->outputs[0].fmt.valid_bit_depth; - s_cont = m_iface->outputs[0].fmt.bit_depth; + s_fmt = m_iface->outputs[0].fmt.bit_depth; ch = m_iface->outputs[0].fmt.channels; } else { dir = SNDRV_PCM_STREAM_CAPTURE; s_freq = m_iface->inputs[0].fmt.s_freq; - s_fmt = m_iface->inputs[0].fmt.valid_bit_depth; - s_cont = m_iface->inputs[0].fmt.bit_depth; + s_fmt = m_iface->inputs[0].fmt.bit_depth; ch = m_iface->inputs[0].fmt.channels; } break; @@ -328,17 +325,16 @@ static int skl_tplg_update_be_blob(struct snd_soc_dapm_widget *w, } /* update the blob based on virtual bus_id and default params */ - cfg = intel_nhlt_get_endpoint_blob(skl->dev, skl->nhlt, m_cfg->vbus_id, - link_type, s_fmt, s_cont, ch, - s_freq, dir, dev_type); + cfg = skl_get_ep_blob(skl, m_cfg->vbus_id, link_type, + s_fmt, ch, s_freq, dir, dev_type); if (cfg) { m_cfg->formats_config[SKL_PARAM_INIT].caps_size = cfg->size; m_cfg->formats_config[SKL_PARAM_INIT].caps = (u32 *)&cfg->caps; } else { dev_err(skl->dev, "Blob NULL for id %x type %d dirn %d\n", m_cfg->vbus_id, link_type, dir); - dev_err(skl->dev, "PCM: ch %d, freq %d, fmt %d/%d\n", - ch, s_freq, s_fmt, s_cont); + dev_err(skl->dev, "PCM: ch %d, freq %d, fmt %d\n", + ch, s_freq, s_fmt); return -EIO; } @@ -1853,11 +1849,10 @@ static int skl_tplg_be_fill_pipe_params(struct snd_soc_dai *dai, pipe_fmt = &pipe->configs[pipe->pipe_config_idx].in_fmt; /* update the blob based on virtual bus_id*/ - cfg = intel_nhlt_get_endpoint_blob(dai->dev, skl->nhlt, - mconfig->vbus_id, link_type, - pipe_fmt->bps, params->s_cont, - pipe_fmt->channels, pipe_fmt->freq, - pipe->direction, dev_type); + cfg = skl_get_ep_blob(skl, mconfig->vbus_id, link_type, + pipe_fmt->bps, pipe_fmt->channels, + pipe_fmt->freq, pipe->direction, + dev_type); if (cfg) { mconfig->formats_config[SKL_PARAM_INIT].caps_size = cfg->size; mconfig->formats_config[SKL_PARAM_INIT].caps = (u32 *)&cfg->caps; @@ -3642,7 +3637,7 @@ static int skl_manifest_load(struct snd_soc_component *cmpnt, int index, return 0; } -static int skl_tplg_complete(struct snd_soc_component *component) +static void skl_tplg_complete(struct snd_soc_component *component) { struct snd_soc_dobj *dobj; struct snd_soc_acpi_mach *mach; @@ -3651,7 +3646,7 @@ static int skl_tplg_complete(struct snd_soc_component *component) val = kmalloc(sizeof(*val), GFP_KERNEL); if (!val) - return -ENOMEM; + return; mach = dev_get_platdata(component->card->dev); list_for_each_entry(dobj, &component->dobj_list, list) { @@ -3676,9 +3671,7 @@ static int skl_tplg_complete(struct snd_soc_component *component) } } } - kfree(val); - return 0; } static struct snd_soc_tplg_ops skl_tplg_ops = { diff --git a/sound/soc/intel/skylake/skl-topology.h b/sound/soc/intel/skylake/skl-topology.h index 22963634fb..f0695b2ac5 100644 --- a/sound/soc/intel/skylake/skl-topology.h +++ b/sound/soc/intel/skylake/skl-topology.h @@ -284,7 +284,6 @@ struct skl_pipe_params { u32 ch; u32 s_freq; u32 s_fmt; - u32 s_cont; u8 linktype; snd_pcm_format_t format; int link_index; diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index aeca58246f..5b1a15e399 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -439,7 +439,7 @@ static int skl_free(struct hdac_bus *bus) skl->init_done = 0; /* to be sure */ - snd_hdac_stop_streams_and_chip(bus); + snd_hdac_ext_stop_streams(bus); if (bus->irq >= 0) free_irq(bus->irq, (void *)bus); @@ -952,7 +952,6 @@ static int skl_first_init(struct hdac_bus *bus) /* allow 64bit DMA address if supported by H/W */ if (dma_set_mask_and_coherent(bus->dev, DMA_BIT_MASK(64))) dma_set_mask_and_coherent(bus->dev, DMA_BIT_MASK(32)); - dma_set_max_seg_size(bus->dev, UINT_MAX); /* initialize streams */ snd_hdac_ext_stream_init_all @@ -1097,7 +1096,7 @@ static void skl_shutdown(struct pci_dev *pci) if (!skl->init_done) return; - snd_hdac_stop_streams_and_chip(bus); + snd_hdac_ext_stop_streams(bus); list_for_each_entry(s, &bus->stream_list, list) { stream = stream_to_hdac_ext_stream(s); snd_hdac_ext_stream_decouple(bus, stream, false); diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h index f55f8b3dbd..33ed274fc0 100644 --- a/sound/soc/intel/skylake/skl.h +++ b/sound/soc/intel/skylake/skl.h @@ -165,6 +165,10 @@ struct skl_dsp_ops { int skl_platform_unregister(struct device *dev); int skl_platform_register(struct device *dev); +struct nhlt_specific_cfg *skl_get_ep_blob(struct skl_dev *skl, u32 instance, + u8 link_type, u8 s_fmt, u8 num_ch, + u32 s_rate, u8 dirn, u8 dev_type); + int skl_nhlt_update_topology_bin(struct skl_dev *skl); int skl_init_dsp(struct skl_dev *skl); int skl_free_dsp(struct skl_dev *skl); diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig index 0d154350f1..81ad2dcee9 100644 --- a/sound/soc/mediatek/Kconfig +++ b/sound/soc/mediatek/Kconfig @@ -120,7 +120,7 @@ config SND_SOC_MT8183 config SND_SOC_MT8183_MT6358_TS3A227E_MAX98357A tristate "ASoC Audio driver for MT8183 with MT6358 TS3A227E MAX98357A RT1015 codec" - depends on I2C && GPIOLIB + depends on I2C depends on SND_SOC_MT8183 select SND_SOC_MT6358 select SND_SOC_MAX98357A @@ -138,7 +138,7 @@ config SND_SOC_MT8183_MT6358_TS3A227E_MAX98357A config SND_SOC_MT8183_DA7219_MAX98357A tristate "ASoC Audio driver for MT8183 with DA7219 MAX98357A RT1015 codec" - depends on SND_SOC_MT8183 && I2C && GPIOLIB + depends on SND_SOC_MT8183 && I2C select SND_SOC_MT6358 select SND_SOC_MAX98357A select SND_SOC_RT1015 @@ -173,7 +173,7 @@ config SND_SOC_MT8192 config SND_SOC_MT8192_MT6359_RT1015_RT5682 tristate "ASoC Audio driver for MT8192 with MT6359 RT1015 RT5682 codec" - depends on I2C && GPIOLIB + depends on I2C depends on SND_SOC_MT8192 && MTK_PMIC_WRAP select SND_SOC_MT6359 select SND_SOC_RT1015 @@ -200,12 +200,11 @@ config SND_SOC_MT8195 config SND_SOC_MT8195_MT6359_RT1019_RT5682 tristate "ASoC Audio driver for MT8195 with MT6359 RT1019 RT5682 codec" - depends on I2C && GPIOLIB + depends on I2C depends on SND_SOC_MT8195 && MTK_PMIC_WRAP select SND_SOC_MT6359 select SND_SOC_RT1015P select SND_SOC_RT5682_I2C - select SND_SOC_RT5682S select SND_SOC_DMIC select SND_SOC_HDMI_CODEC help @@ -213,19 +212,3 @@ config SND_SOC_MT8195_MT6359_RT1019_RT5682 with the MT6359 RT1019 RT5682 audio codec. Select Y if you have such device. If unsure select "N". - -config SND_SOC_MT8195_MT6359_RT1011_RT5682 - tristate "ASoC Audio driver for MT8195 with MT6359 RT1011 RT5682 codec" - depends on I2C && GPIOLIB - depends on SND_SOC_MT8195 && MTK_PMIC_WRAP - select SND_SOC_MT6359 - select SND_SOC_RT1011 - select SND_SOC_RT5682_I2C - select SND_SOC_RT5682S - select SND_SOC_DMIC - select SND_SOC_HDMI_CODEC - help - This adds ASoC driver for Mediatek MT8195 boards - with the MT6359 RT1011 RT5682 audio codec. - Select Y if you have such device. - If unsure select "N". diff --git a/sound/soc/mediatek/common/mtk-afe-fe-dai.c b/sound/soc/mediatek/common/mtk-afe-fe-dai.c index 395be97f13..e95c7c018e 100644 --- a/sound/soc/mediatek/common/mtk-afe-fe-dai.c +++ b/sound/soc/mediatek/common/mtk-afe-fe-dai.c @@ -288,6 +288,7 @@ const struct snd_soc_dai_ops mtk_afe_fe_ops = { }; EXPORT_SYMBOL_GPL(mtk_afe_fe_ops); +static DEFINE_MUTEX(irqs_lock); int mtk_dynamic_irq_acquire(struct mtk_base_afe *afe) { int i; @@ -350,7 +351,7 @@ int mtk_afe_resume(struct snd_soc_component *component) struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component); struct device *dev = afe->dev; struct regmap *regmap = afe->regmap; - int i; + int i = 0; if (pm_runtime_status_suspended(dev) || !afe->suspended) return 0; diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c index 0f178de92a..bc3d046647 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c +++ b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c @@ -1474,7 +1474,9 @@ static struct platform_driver mt2701_afe_pcm_driver = { .driver = { .name = "mt2701-audio", .of_match_table = mt2701_afe_pcm_dt_match, +#ifdef CONFIG_PM .pm = &mt2701_afe_pm_ops, +#endif }, .probe = mt2701_afe_pcm_dev_probe, .remove = mt2701_afe_pcm_dev_remove, diff --git a/sound/soc/mediatek/mt2701/mt2701-cs42448.c b/sound/soc/mediatek/mt2701/mt2701-cs42448.c index d9fd6eb786..44a8d5cfb0 100644 --- a/sound/soc/mediatek/mt2701/mt2701-cs42448.c +++ b/sound/soc/mediatek/mt2701/mt2701-cs42448.c @@ -146,7 +146,7 @@ static int mt2701_cs42448_be_ops_hw_params(struct snd_pcm_substream *substream, return 0; } -static const struct snd_soc_ops mt2701_cs42448_be_ops = { +static struct snd_soc_ops mt2701_cs42448_be_ops = { .hw_params = mt2701_cs42448_be_ops_hw_params }; diff --git a/sound/soc/mediatek/mt2701/mt2701-wm8960.c b/sound/soc/mediatek/mt2701/mt2701-wm8960.c index f56de1b918..414e422c0e 100644 --- a/sound/soc/mediatek/mt2701/mt2701-wm8960.c +++ b/sound/soc/mediatek/mt2701/mt2701-wm8960.c @@ -40,7 +40,7 @@ static int mt2701_wm8960_be_ops_hw_params(struct snd_pcm_substream *substream, return 0; } -static const struct snd_soc_ops mt2701_wm8960_be_ops = { +static struct snd_soc_ops mt2701_wm8960_be_ops = { .hw_params = mt2701_wm8960_be_ops_hw_params }; diff --git a/sound/soc/mediatek/mt6797/mt6797-afe-pcm.c b/sound/soc/mediatek/mt6797/mt6797-afe-pcm.c index fb4abec9aa..3d68e4726e 100644 --- a/sound/soc/mediatek/mt6797/mt6797-afe-pcm.c +++ b/sound/soc/mediatek/mt6797/mt6797-afe-pcm.c @@ -901,7 +901,9 @@ static struct platform_driver mt6797_afe_pcm_driver = { .driver = { .name = "mt6797-audio", .of_match_table = mt6797_afe_pcm_dt_match, +#ifdef CONFIG_PM .pm = &mt6797_afe_pm_ops, +#endif }, .probe = mt6797_afe_pcm_dev_probe, .remove = mt6797_afe_pcm_dev_remove, diff --git a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c index 3149493043..6350390414 100644 --- a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c +++ b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c @@ -1054,7 +1054,6 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev) int irq_id; struct mtk_base_afe *afe; struct mt8173_afe_private *afe_priv; - struct snd_soc_component *comp_pcm, *comp_hdmi; ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(33)); if (ret) @@ -1143,55 +1142,23 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev) if (ret) goto err_pm_disable; - comp_pcm = devm_kzalloc(&pdev->dev, sizeof(*comp_pcm), GFP_KERNEL); - if (!comp_pcm) { - ret = -ENOMEM; - goto err_pm_disable; - } - - ret = snd_soc_component_initialize(comp_pcm, - &mt8173_afe_pcm_dai_component, - &pdev->dev); + ret = devm_snd_soc_register_component(&pdev->dev, + &mt8173_afe_pcm_dai_component, + mt8173_afe_pcm_dais, + ARRAY_SIZE(mt8173_afe_pcm_dais)); if (ret) goto err_pm_disable; -#ifdef CONFIG_DEBUG_FS - comp_pcm->debugfs_prefix = "pcm"; -#endif - - ret = snd_soc_add_component(comp_pcm, - mt8173_afe_pcm_dais, - ARRAY_SIZE(mt8173_afe_pcm_dais)); + ret = devm_snd_soc_register_component(&pdev->dev, + &mt8173_afe_hdmi_dai_component, + mt8173_afe_hdmi_dais, + ARRAY_SIZE(mt8173_afe_hdmi_dais)); if (ret) goto err_pm_disable; - comp_hdmi = devm_kzalloc(&pdev->dev, sizeof(*comp_hdmi), GFP_KERNEL); - if (!comp_hdmi) { - ret = -ENOMEM; - goto err_pm_disable; - } - - ret = snd_soc_component_initialize(comp_hdmi, - &mt8173_afe_hdmi_dai_component, - &pdev->dev); - if (ret) - goto err_pm_disable; - -#ifdef CONFIG_DEBUG_FS - comp_hdmi->debugfs_prefix = "hdmi"; -#endif - - ret = snd_soc_add_component(comp_hdmi, - mt8173_afe_hdmi_dais, - ARRAY_SIZE(mt8173_afe_hdmi_dais)); - if (ret) - goto err_cleanup_components; - dev_info(&pdev->dev, "MT8173 AFE driver initialized.\n"); return 0; -err_cleanup_components: - snd_soc_unregister_component(&pdev->dev); err_pm_disable: pm_runtime_disable(&pdev->dev); return ret; @@ -1199,8 +1166,6 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev) static int mt8173_afe_pcm_dev_remove(struct platform_device *pdev) { - snd_soc_unregister_component(&pdev->dev); - pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) mt8173_afe_runtime_suspend(&pdev->dev); diff --git a/sound/soc/mediatek/mt8173/mt8173-max98090.c b/sound/soc/mediatek/mt8173/mt8173-max98090.c index 4cb90da892..3bdd493131 100644 --- a/sound/soc/mediatek/mt8173/mt8173-max98090.c +++ b/sound/soc/mediatek/mt8173/mt8173-max98090.c @@ -177,6 +177,9 @@ static int mt8173_max98090_dev_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); + if (ret) + dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", + __func__, ret); of_node_put(codec_node); of_node_put(platform_node); @@ -193,7 +196,9 @@ static struct platform_driver mt8173_max98090_driver = { .driver = { .name = "mt8173-max98090", .of_match_table = mt8173_max98090_dt_match, +#ifdef CONFIG_PM .pm = &snd_soc_pm_ops, +#endif }, .probe = mt8173_max98090_dev_probe, }; diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c index b55122b99f..390da5bf72 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c @@ -215,6 +215,9 @@ static int mt8173_rt5650_rt5514_dev_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); + if (ret) + dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", + __func__, ret); of_node_put(platform_node); return ret; @@ -230,7 +233,9 @@ static struct platform_driver mt8173_rt5650_rt5514_driver = { .driver = { .name = "mtk-rt5650-rt5514", .of_match_table = mt8173_rt5650_rt5514_dt_match, +#ifdef CONFIG_PM .pm = &snd_soc_pm_ops, +#endif }, .probe = mt8173_rt5650_rt5514_dev_probe, }; diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c index 5716d92990..c8e4e85e10 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c @@ -282,6 +282,9 @@ static int mt8173_rt5650_rt5676_dev_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); + if (ret) + dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", + __func__, ret); of_node_put(platform_node); return ret; @@ -297,7 +300,9 @@ static struct platform_driver mt8173_rt5650_rt5676_driver = { .driver = { .name = "mtk-rt5650-rt5676", .of_match_table = mt8173_rt5650_rt5676_dt_match, +#ifdef CONFIG_PM .pm = &snd_soc_pm_ops, +#endif }, .probe = mt8173_rt5650_rt5676_dev_probe, }; diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650.c b/sound/soc/mediatek/mt8173/mt8173-rt5650.c index fc164f4f95..e168d31f44 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650.c @@ -30,15 +30,15 @@ static struct mt8173_rt5650_platform_data mt8173_rt5650_priv = { }; static const struct snd_soc_dapm_widget mt8173_rt5650_widgets[] = { - SND_SOC_DAPM_SPK("Ext Spk", NULL), + SND_SOC_DAPM_SPK("Speaker", NULL), SND_SOC_DAPM_MIC("Int Mic", NULL), SND_SOC_DAPM_HP("Headphone", NULL), SND_SOC_DAPM_MIC("Headset Mic", NULL), }; static const struct snd_soc_dapm_route mt8173_rt5650_routes[] = { - {"Ext Spk", NULL, "SPOL"}, - {"Ext Spk", NULL, "SPOR"}, + {"Speaker", NULL, "SPOL"}, + {"Speaker", NULL, "SPOR"}, {"DMIC L1", NULL, "Int Mic"}, {"DMIC R1", NULL, "Int Mic"}, {"Headphone", NULL, "HPOL"}, @@ -48,7 +48,7 @@ static const struct snd_soc_dapm_route mt8173_rt5650_routes[] = { }; static const struct snd_kcontrol_new mt8173_rt5650_controls[] = { - SOC_DAPM_PIN_SWITCH("Ext Spk"), + SOC_DAPM_PIN_SWITCH("Speaker"), SOC_DAPM_PIN_SWITCH("Int Mic"), SOC_DAPM_PIN_SWITCH("Headphone"), SOC_DAPM_PIN_SWITCH("Headset Mic"), @@ -320,6 +320,9 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); + if (ret) + dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", + __func__, ret); of_node_put(platform_node); return ret; @@ -335,7 +338,9 @@ static struct platform_driver mt8173_rt5650_driver = { .driver = { .name = "mtk-rt5650", .of_match_table = mt8173_rt5650_dt_match, +#ifdef CONFIG_PM .pm = &snd_soc_pm_ops, +#endif }, .probe = mt8173_rt5650_dev_probe, }; diff --git a/sound/soc/mediatek/mt8183/mt8183-afe-pcm.c b/sound/soc/mediatek/mt8183/mt8183-afe-pcm.c index 86c8a523fe..14e77df06b 100644 --- a/sound/soc/mediatek/mt8183/mt8183-afe-pcm.c +++ b/sound/soc/mediatek/mt8183/mt8183-afe-pcm.c @@ -1279,7 +1279,9 @@ static struct platform_driver mt8183_afe_pcm_driver = { .driver = { .name = "mt8183-audio", .of_match_table = mt8183_afe_pcm_dt_match, +#ifdef CONFIG_PM .pm = &mt8183_afe_pm_ops, +#endif }, .probe = mt8183_afe_pcm_dev_probe, .remove = mt8183_afe_pcm_dev_remove, diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c index 718505c754..bda103211e 100644 --- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c @@ -155,9 +155,9 @@ static const struct snd_soc_ops mt8183_da7219_rt1015_i2s_ops = { static int mt8183_i2s_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params) { - /* fix BE i2s format to S32_LE, clean param mask first */ + /* fix BE i2s format to 32bit, clean param mask first */ snd_mask_reset_range(hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT), - 0, (__force unsigned int)SNDRV_PCM_FORMAT_LAST); + 0, SNDRV_PCM_FORMAT_LAST); params_set_format(params, SNDRV_PCM_FORMAT_S32_LE); @@ -167,9 +167,9 @@ static int mt8183_i2s_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, static int mt8183_rt1015_i2s_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params) { - /* fix BE i2s format to S24_LE, clean param mask first */ + /* fix BE i2s format to 32bit, clean param mask first */ snd_mask_reset_range(hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT), - 0, (__force unsigned int)SNDRV_PCM_FORMAT_LAST); + 0, SNDRV_PCM_FORMAT_LAST); params_set_format(params, SNDRV_PCM_FORMAT_S24_LE); @@ -685,6 +685,7 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) struct snd_soc_dai_link *dai_link; struct mt8183_da7219_max98357_priv *priv; struct pinctrl *pinctrl; + const struct of_device_id *match; int ret, i; platform_node = of_parse_phandle(pdev->dev.of_node, @@ -694,9 +695,11 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) return -EINVAL; } - card = (struct snd_soc_card *)of_device_get_match_data(&pdev->dev); - if (!card) + match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev); + if (!match || !match->data) return -EINVAL; + + card = (struct snd_soc_card *)match->data; card->dev = &pdev->dev; hdmi_codec = of_parse_phandle(pdev->dev.of_node, diff --git a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c index b0ec5ebd4f..c7b10c48c6 100644 --- a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c @@ -94,11 +94,11 @@ static const struct snd_soc_ops mt8183_mt6358_rt1015_i2s_ops = { static int mt8183_i2s_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params) { - dev_dbg(rtd->dev, "%s(), fix format to S32_LE\n", __func__); + dev_dbg(rtd->dev, "%s(), fix format to 32bit\n", __func__); - /* fix BE i2s format to S32_LE, clean param mask first */ + /* fix BE i2s format to 32bit, clean param mask first */ snd_mask_reset_range(hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT), - 0, (__force unsigned int)SNDRV_PCM_FORMAT_LAST); + 0, SNDRV_PCM_FORMAT_LAST); params_set_format(params, SNDRV_PCM_FORMAT_S32_LE); return 0; @@ -107,11 +107,11 @@ static int mt8183_i2s_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, static int mt8183_rt1015_i2s_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params) { - dev_dbg(rtd->dev, "%s(), fix format to S24_LE\n", __func__); + dev_dbg(rtd->dev, "%s(), fix format to 32bit\n", __func__); - /* fix BE i2s format to S24_LE, clean param mask first */ + /* fix BE i2s format to 32bit, clean param mask first */ snd_mask_reset_range(hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT), - 0, (__force unsigned int)SNDRV_PCM_FORMAT_LAST); + 0, SNDRV_PCM_FORMAT_LAST); params_set_format(params, SNDRV_PCM_FORMAT_S24_LE); return 0; @@ -335,7 +335,7 @@ static void mt8183_mt6358_tdm_shutdown(struct snd_pcm_substream *substream) __func__, ret); } -static const struct snd_soc_ops mt8183_mt6358_tdm_ops = { +static struct snd_soc_ops mt8183_mt6358_tdm_ops = { .startup = mt8183_mt6358_tdm_startup, .shutdown = mt8183_mt6358_tdm_shutdown, }; @@ -637,6 +637,7 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) struct device_node *platform_node, *ec_codec, *hdmi_codec; struct snd_soc_dai_link *dai_link; struct mt8183_mt6358_ts3a227_max98357_priv *priv; + const struct of_device_id *match; int ret, i; platform_node = of_parse_phandle(pdev->dev.of_node, @@ -646,9 +647,11 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) return -EINVAL; } - card = (struct snd_soc_card *)of_device_get_match_data(&pdev->dev); - if (!card) + match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev); + if (!match || !match->data) return -EINVAL; + + card = (struct snd_soc_card *)match->data; card->dev = &pdev->dev; ec_codec = of_parse_phandle(pdev->dev.of_node, "mediatek,ec-codec", 0); diff --git a/sound/soc/mediatek/mt8192/mt8192-afe-pcm.c b/sound/soc/mediatek/mt8192/mt8192-afe-pcm.c index e1e4ca9315..31c280339c 100644 --- a/sound/soc/mediatek/mt8192/mt8192-afe-pcm.c +++ b/sound/soc/mediatek/mt8192/mt8192-afe-pcm.c @@ -2381,7 +2381,9 @@ static struct platform_driver mt8192_afe_pcm_driver = { .driver = { .name = "mt8192-audio", .of_match_table = mt8192_afe_pcm_dt_match, +#ifdef CONFIG_PM .pm = &mt8192_afe_pm_ops, +#endif }, .probe = mt8192_afe_pcm_dev_probe, .remove = mt8192_afe_pcm_dev_remove, diff --git a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c index f7daad1bfe..24a5d0adec 100644 --- a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c +++ b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c @@ -350,9 +350,9 @@ static int mt8192_mt6359_hdmi_init(struct snd_soc_pcm_runtime *rtd) static int mt8192_i2s_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params) { - /* fix BE i2s format to S24_LE, clean param mask first */ + /* fix BE i2s format to 32bit, clean param mask first */ snd_mask_reset_range(hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT), - 0, (__force unsigned int)SNDRV_PCM_FORMAT_LAST); + 0, SNDRV_PCM_FORMAT_LAST); params_set_format(params, SNDRV_PCM_FORMAT_S24_LE); @@ -1106,6 +1106,7 @@ static int mt8192_mt6359_dev_probe(struct platform_device *pdev) struct device_node *platform_node, *hdmi_codec; int ret, i; struct snd_soc_dai_link *dai_link; + const struct of_device_id *match; struct mt8192_mt6359_priv *priv; platform_node = of_parse_phandle(pdev->dev.of_node, @@ -1115,9 +1116,11 @@ static int mt8192_mt6359_dev_probe(struct platform_device *pdev) return -EINVAL; } - card = (struct snd_soc_card *)of_device_get_match_data(&pdev->dev); - if (!card) + match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev); + if (!match || !match->data) return -EINVAL; + + card = (struct snd_soc_card *)match->data; card->dev = &pdev->dev; hdmi_codec = of_parse_phandle(pdev->dev.of_node, diff --git a/sound/soc/mediatek/mt8195/Makefile b/sound/soc/mediatek/mt8195/Makefile index e5f0df5010..44775f400b 100644 --- a/sound/soc/mediatek/mt8195/Makefile +++ b/sound/soc/mediatek/mt8195/Makefile @@ -13,4 +13,3 @@ obj-$(CONFIG_SND_SOC_MT8195) += snd-soc-mt8195-afe.o # machine driver obj-$(CONFIG_SND_SOC_MT8195_MT6359_RT1019_RT5682) += mt8195-mt6359-rt1019-rt5682.o -obj-$(CONFIG_SND_SOC_MT8195_MT6359_RT1011_RT5682) += mt8195-mt6359-rt1011-rt5682.o diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-clk.c b/sound/soc/mediatek/mt8195/mt8195-afe-clk.c index c2543f4cff..8420b2c713 100644 --- a/sound/soc/mediatek/mt8195/mt8195-afe-clk.c +++ b/sound/soc/mediatek/mt8195/mt8195-afe-clk.c @@ -326,7 +326,7 @@ int mt8195_afe_enable_reg_rw_clk(struct mtk_base_afe *afe) { struct mt8195_afe_private *afe_priv = afe->platform_priv; int i; - static const unsigned int clk_array[] = { + unsigned int clk_array[] = { MT8195_CLK_SCP_ADSP_AUDIODSP, /* bus clock for infra */ MT8195_CLK_TOP_AUDIO_H_SEL, /* clock for ADSP bus */ MT8195_CLK_TOP_AUDIO_LOCAL_BUS_SEL, /* bus clock for DRAM access */ @@ -347,7 +347,7 @@ int mt8195_afe_disable_reg_rw_clk(struct mtk_base_afe *afe) { struct mt8195_afe_private *afe_priv = afe->platform_priv; int i; - static const unsigned int clk_array[] = { + unsigned int clk_array[] = { MT8195_CLK_AUD_A1SYS, MT8195_CLK_AUD_A1SYS_HP, MT8195_CLK_AUD_AFE, @@ -380,11 +380,11 @@ static int mt8195_afe_enable_timing_sys(struct mtk_base_afe *afe) { struct mt8195_afe_private *afe_priv = afe->platform_priv; int i; - static const unsigned int clk_array[] = { + unsigned int clk_array[] = { MT8195_CLK_AUD_A1SYS, MT8195_CLK_AUD_A2SYS, }; - static const unsigned int cg_array[] = { + unsigned int cg_array[] = { MT8195_TOP_CG_A1SYS_TIMING, MT8195_TOP_CG_A2SYS_TIMING, MT8195_TOP_CG_26M_TIMING, @@ -403,11 +403,11 @@ static int mt8195_afe_disable_timing_sys(struct mtk_base_afe *afe) { struct mt8195_afe_private *afe_priv = afe->platform_priv; int i; - static const unsigned int clk_array[] = { + unsigned int clk_array[] = { MT8195_CLK_AUD_A2SYS, MT8195_CLK_AUD_A1SYS, }; - static const unsigned int cg_array[] = { + unsigned int cg_array[] = { MT8195_TOP_CG_26M_TIMING, MT8195_TOP_CG_A2SYS_TIMING, MT8195_TOP_CG_A1SYS_TIMING, diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c index e425f86847..2edb40fe27 100644 --- a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c +++ b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include "mt8195-afe-common.h" #include "mt8195-afe-clk.h" @@ -2233,7 +2232,7 @@ static const struct mtk_base_memif_data memif_data[MT8195_AFE_MEMIF_NUM] = { }, }; -static const struct mtk_base_irq_data irq_data_array[MT8195_AFE_IRQ_NUM] = { +static const struct mtk_base_irq_data irq_data[MT8195_AFE_IRQ_NUM] = { [MT8195_AFE_IRQ_1] = { .id = MT8195_AFE_IRQ_1, .irq_cnt_reg = -1, @@ -3058,16 +3057,11 @@ static int mt8195_afe_pcm_dev_probe(struct platform_device *pdev) { struct mtk_base_afe *afe; struct mt8195_afe_private *afe_priv; + struct resource *res; struct device *dev = &pdev->dev; int i, irq_id, ret; struct snd_soc_component *component; - ret = of_reserved_mem_device_init(dev); - if (ret) { - dev_err(dev, "failed to assign memory region: %d\n", ret); - return ret; - } - ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(33)); if (ret) return ret; @@ -3084,7 +3078,8 @@ static int mt8195_afe_pcm_dev_probe(struct platform_device *pdev) afe_priv = afe->platform_priv; afe->dev = &pdev->dev; - afe->base_addr = devm_platform_ioremap_resource(pdev, 0); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + afe->base_addr = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(afe->base_addr)) return PTR_ERR(afe->base_addr); @@ -3107,7 +3102,7 @@ static int mt8195_afe_pcm_dev_probe(struct platform_device *pdev) return -ENOMEM; for (i = 0; i < afe->irqs_size; i++) - afe->irqs[i].irq_data = &irq_data_array[i]; + afe->irqs[i].irq_data = &irq_data[i]; /* init memif */ afe->memif_size = MT8195_AFE_MEMIF_NUM; @@ -3271,7 +3266,9 @@ static struct platform_driver mt8195_afe_pcm_driver = { .driver = { .name = "mt8195-audio", .of_match_table = mt8195_afe_pcm_dt_match, +#ifdef CONFIG_PM .pm = &mt8195_afe_pm_ops, +#endif }, .probe = mt8195_afe_pcm_dev_probe, .remove = mt8195_afe_pcm_dev_remove, diff --git a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c index e0670e0dbd..740aa6ddda 100644 --- a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c +++ b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c @@ -59,93 +59,93 @@ struct afe_gate { static const struct afe_gate aud_clks[CLK_AUD_NR_CLK] = { /* AUD0 */ - GATE_AUD0(CLK_AUD_AFE, "aud_afe", "top_a1sys_hp", 2), - GATE_AUD0(CLK_AUD_LRCK_CNT, "aud_lrck_cnt", "top_a1sys_hp", 4), - GATE_AUD0(CLK_AUD_SPDIFIN_TUNER_APLL, "aud_spdifin_tuner_apll", "top_apll4", 10), - GATE_AUD0(CLK_AUD_SPDIFIN_TUNER_DBG, "aud_spdifin_tuner_dbg", "top_apll4", 11), - GATE_AUD0(CLK_AUD_UL_TML, "aud_ul_tml", "top_a1sys_hp", 18), - GATE_AUD0(CLK_AUD_APLL1_TUNER, "aud_apll1_tuner", "top_apll1", 19), - GATE_AUD0(CLK_AUD_APLL2_TUNER, "aud_apll2_tuner", "top_apll2", 20), - GATE_AUD0(CLK_AUD_TOP0_SPDF, "aud_top0_spdf", "top_aud_iec_clk", 21), - GATE_AUD0(CLK_AUD_APLL, "aud_apll", "top_apll1", 23), - GATE_AUD0(CLK_AUD_APLL2, "aud_apll2", "top_apll2", 24), - GATE_AUD0(CLK_AUD_DAC, "aud_dac", "top_a1sys_hp", 25), - GATE_AUD0(CLK_AUD_DAC_PREDIS, "aud_dac_predis", "top_a1sys_hp", 26), - GATE_AUD0(CLK_AUD_TML, "aud_tml", "top_a1sys_hp", 27), - GATE_AUD0(CLK_AUD_ADC, "aud_adc", "top_a1sys_hp", 28), - GATE_AUD0(CLK_AUD_DAC_HIRES, "aud_dac_hires", "top_audio_h", 31), + GATE_AUD0(CLK_AUD_AFE, "aud_afe", "a1sys_hp_sel", 2), + GATE_AUD0(CLK_AUD_LRCK_CNT, "aud_lrck_cnt", "a1sys_hp_sel", 4), + GATE_AUD0(CLK_AUD_SPDIFIN_TUNER_APLL, "aud_spdifin_tuner_apll", "apll4_sel", 10), + GATE_AUD0(CLK_AUD_SPDIFIN_TUNER_DBG, "aud_spdifin_tuner_dbg", "apll4_sel", 11), + GATE_AUD0(CLK_AUD_UL_TML, "aud_ul_tml", "a1sys_hp_sel", 18), + GATE_AUD0(CLK_AUD_APLL1_TUNER, "aud_apll1_tuner", "apll1_sel", 19), + GATE_AUD0(CLK_AUD_APLL2_TUNER, "aud_apll2_tuner", "apll2_sel", 20), + GATE_AUD0(CLK_AUD_TOP0_SPDF, "aud_top0_spdf", "aud_iec_sel", 21), + GATE_AUD0(CLK_AUD_APLL, "aud_apll", "apll1_sel", 23), + GATE_AUD0(CLK_AUD_APLL2, "aud_apll2", "apll2_sel", 24), + GATE_AUD0(CLK_AUD_DAC, "aud_dac", "a1sys_hp_sel", 25), + GATE_AUD0(CLK_AUD_DAC_PREDIS, "aud_dac_predis", "a1sys_hp_sel", 26), + GATE_AUD0(CLK_AUD_TML, "aud_tml", "a1sys_hp_sel", 27), + GATE_AUD0(CLK_AUD_ADC, "aud_adc", "a1sys_hp_sel", 28), + GATE_AUD0(CLK_AUD_DAC_HIRES, "aud_dac_hires", "audio_h_sel", 31), /* AUD1 */ - GATE_AUD1(CLK_AUD_A1SYS_HP, "aud_a1sys_hp", "top_a1sys_hp", 2), - GATE_AUD1(CLK_AUD_AFE_DMIC1, "aud_afe_dmic1", "top_a1sys_hp", 10), - GATE_AUD1(CLK_AUD_AFE_DMIC2, "aud_afe_dmic2", "top_a1sys_hp", 11), - GATE_AUD1(CLK_AUD_AFE_DMIC3, "aud_afe_dmic3", "top_a1sys_hp", 12), - GATE_AUD1(CLK_AUD_AFE_DMIC4, "aud_afe_dmic4", "top_a1sys_hp", 13), - GATE_AUD1(CLK_AUD_AFE_26M_DMIC_TM, "aud_afe_26m_dmic_tm", "top_a1sys_hp", 14), - GATE_AUD1(CLK_AUD_UL_TML_HIRES, "aud_ul_tml_hires", "top_audio_h", 16), - GATE_AUD1(CLK_AUD_ADC_HIRES, "aud_adc_hires", "top_audio_h", 17), - GATE_AUD1(CLK_AUD_ADDA6_ADC, "aud_adda6_adc", "top_a1sys_hp", 18), - GATE_AUD1(CLK_AUD_ADDA6_ADC_HIRES, "aud_adda6_adc_hires", "top_audio_h", 19), + GATE_AUD1(CLK_AUD_A1SYS_HP, "aud_a1sys_hp", "a1sys_hp_sel", 2), + GATE_AUD1(CLK_AUD_AFE_DMIC1, "aud_afe_dmic1", "a1sys_hp_sel", 10), + GATE_AUD1(CLK_AUD_AFE_DMIC2, "aud_afe_dmic2", "a1sys_hp_sel", 11), + GATE_AUD1(CLK_AUD_AFE_DMIC3, "aud_afe_dmic3", "a1sys_hp_sel", 12), + GATE_AUD1(CLK_AUD_AFE_DMIC4, "aud_afe_dmic4", "a1sys_hp_sel", 13), + GATE_AUD1(CLK_AUD_AFE_26M_DMIC_TM, "aud_afe_26m_dmic_tm", "a1sys_hp_sel", 14), + GATE_AUD1(CLK_AUD_UL_TML_HIRES, "aud_ul_tml_hires", "audio_h_sel", 16), + GATE_AUD1(CLK_AUD_ADC_HIRES, "aud_adc_hires", "audio_h_sel", 17), + GATE_AUD1(CLK_AUD_ADDA6_ADC, "aud_adda6_adc", "a1sys_hp_sel", 18), + GATE_AUD1(CLK_AUD_ADDA6_ADC_HIRES, "aud_adda6_adc_hires", "audio_h_sel", 19), /* AUD3 */ - GATE_AUD3(CLK_AUD_LINEIN_TUNER, "aud_linein_tuner", "top_apll5", 5), - GATE_AUD3(CLK_AUD_EARC_TUNER, "aud_earc_tuner", "top_apll3", 7), + GATE_AUD3(CLK_AUD_LINEIN_TUNER, "aud_linein_tuner", "apll5_sel", 5), + GATE_AUD3(CLK_AUD_EARC_TUNER, "aud_earc_tuner", "apll3_sel", 7), /* AUD4 */ - GATE_AUD4(CLK_AUD_I2SIN, "aud_i2sin", "top_a1sys_hp", 0), - GATE_AUD4(CLK_AUD_TDM_IN, "aud_tdm_in", "top_a1sys_hp", 1), - GATE_AUD4(CLK_AUD_I2S_OUT, "aud_i2s_out", "top_a1sys_hp", 6), - GATE_AUD4(CLK_AUD_TDM_OUT, "aud_tdm_out", "top_a1sys_hp", 7), - GATE_AUD4(CLK_AUD_HDMI_OUT, "aud_hdmi_out", "top_a1sys_hp", 8), - GATE_AUD4(CLK_AUD_ASRC11, "aud_asrc11", "top_a1sys_hp", 16), - GATE_AUD4(CLK_AUD_ASRC12, "aud_asrc12", "top_a1sys_hp", 17), + GATE_AUD4(CLK_AUD_I2SIN, "aud_i2sin", "a1sys_hp_sel", 0), + GATE_AUD4(CLK_AUD_TDM_IN, "aud_tdm_in", "a1sys_hp_sel", 1), + GATE_AUD4(CLK_AUD_I2S_OUT, "aud_i2s_out", "a1sys_hp_sel", 6), + GATE_AUD4(CLK_AUD_TDM_OUT, "aud_tdm_out", "a1sys_hp_sel", 7), + GATE_AUD4(CLK_AUD_HDMI_OUT, "aud_hdmi_out", "a1sys_hp_sel", 8), + GATE_AUD4(CLK_AUD_ASRC11, "aud_asrc11", "a1sys_hp_sel", 16), + GATE_AUD4(CLK_AUD_ASRC12, "aud_asrc12", "a1sys_hp_sel", 17), GATE_AUD4(CLK_AUD_MULTI_IN, "aud_multi_in", "mphone_slave_b", 19), - GATE_AUD4(CLK_AUD_INTDIR, "aud_intdir", "top_intdir", 20), - GATE_AUD4(CLK_AUD_A1SYS, "aud_a1sys", "top_a1sys_hp", 21), - GATE_AUD4(CLK_AUD_A2SYS, "aud_a2sys", "top_a2sys_hf", 22), - GATE_AUD4(CLK_AUD_PCMIF, "aud_pcmif", "top_a1sys_hp", 24), - GATE_AUD4(CLK_AUD_A3SYS, "aud_a3sys", "top_a3sys_hf", 30), - GATE_AUD4(CLK_AUD_A4SYS, "aud_a4sys", "top_a4sys_hf", 31), + GATE_AUD4(CLK_AUD_INTDIR, "aud_intdir", "intdir_sel", 20), + GATE_AUD4(CLK_AUD_A1SYS, "aud_a1sys", "a1sys_hp_sel", 21), + GATE_AUD4(CLK_AUD_A2SYS, "aud_a2sys", "a2sys_sel", 22), + GATE_AUD4(CLK_AUD_PCMIF, "aud_pcmif", "a1sys_hp_sel", 24), + GATE_AUD4(CLK_AUD_A3SYS, "aud_a3sys", "a3sys_sel", 30), + GATE_AUD4(CLK_AUD_A4SYS, "aud_a4sys", "a4sys_sel", 31), /* AUD5 */ - GATE_AUD5(CLK_AUD_MEMIF_UL1, "aud_memif_ul1", "top_a1sys_hp", 0), - GATE_AUD5(CLK_AUD_MEMIF_UL2, "aud_memif_ul2", "top_a1sys_hp", 1), - GATE_AUD5(CLK_AUD_MEMIF_UL3, "aud_memif_ul3", "top_a1sys_hp", 2), - GATE_AUD5(CLK_AUD_MEMIF_UL4, "aud_memif_ul4", "top_a1sys_hp", 3), - GATE_AUD5(CLK_AUD_MEMIF_UL5, "aud_memif_ul5", "top_a1sys_hp", 4), - GATE_AUD5(CLK_AUD_MEMIF_UL6, "aud_memif_ul6", "top_a1sys_hp", 5), - GATE_AUD5(CLK_AUD_MEMIF_UL8, "aud_memif_ul8", "top_a1sys_hp", 7), - GATE_AUD5(CLK_AUD_MEMIF_UL9, "aud_memif_ul9", "top_a1sys_hp", 8), - GATE_AUD5(CLK_AUD_MEMIF_UL10, "aud_memif_ul10", "top_a1sys_hp", 9), - GATE_AUD5(CLK_AUD_MEMIF_DL2, "aud_memif_dl2", "top_a1sys_hp", 18), - GATE_AUD5(CLK_AUD_MEMIF_DL3, "aud_memif_dl3", "top_a1sys_hp", 19), - GATE_AUD5(CLK_AUD_MEMIF_DL6, "aud_memif_dl6", "top_a1sys_hp", 22), - GATE_AUD5(CLK_AUD_MEMIF_DL7, "aud_memif_dl7", "top_a1sys_hp", 23), - GATE_AUD5(CLK_AUD_MEMIF_DL8, "aud_memif_dl8", "top_a1sys_hp", 24), - GATE_AUD5(CLK_AUD_MEMIF_DL10, "aud_memif_dl10", "top_a1sys_hp", 26), - GATE_AUD5(CLK_AUD_MEMIF_DL11, "aud_memif_dl11", "top_a1sys_hp", 27), + GATE_AUD5(CLK_AUD_MEMIF_UL1, "aud_memif_ul1", "a1sys_hp_sel", 0), + GATE_AUD5(CLK_AUD_MEMIF_UL2, "aud_memif_ul2", "a1sys_hp_sel", 1), + GATE_AUD5(CLK_AUD_MEMIF_UL3, "aud_memif_ul3", "a1sys_hp_sel", 2), + GATE_AUD5(CLK_AUD_MEMIF_UL4, "aud_memif_ul4", "a1sys_hp_sel", 3), + GATE_AUD5(CLK_AUD_MEMIF_UL5, "aud_memif_ul5", "a1sys_hp_sel", 4), + GATE_AUD5(CLK_AUD_MEMIF_UL6, "aud_memif_ul6", "a1sys_hp_sel", 5), + GATE_AUD5(CLK_AUD_MEMIF_UL8, "aud_memif_ul8", "a1sys_hp_sel", 7), + GATE_AUD5(CLK_AUD_MEMIF_UL9, "aud_memif_ul9", "a1sys_hp_sel", 8), + GATE_AUD5(CLK_AUD_MEMIF_UL10, "aud_memif_ul10", "a1sys_hp_sel", 9), + GATE_AUD5(CLK_AUD_MEMIF_DL2, "aud_memif_dl2", "a1sys_hp_sel", 18), + GATE_AUD5(CLK_AUD_MEMIF_DL3, "aud_memif_dl3", "a1sys_hp_sel", 19), + GATE_AUD5(CLK_AUD_MEMIF_DL6, "aud_memif_dl6", "a1sys_hp_sel", 22), + GATE_AUD5(CLK_AUD_MEMIF_DL7, "aud_memif_dl7", "a1sys_hp_sel", 23), + GATE_AUD5(CLK_AUD_MEMIF_DL8, "aud_memif_dl8", "a1sys_hp_sel", 24), + GATE_AUD5(CLK_AUD_MEMIF_DL10, "aud_memif_dl10", "a1sys_hp_sel", 26), + GATE_AUD5(CLK_AUD_MEMIF_DL11, "aud_memif_dl11", "a1sys_hp_sel", 27), /* AUD6 */ - GATE_AUD6(CLK_AUD_GASRC0, "aud_gasrc0", "top_asm_h", 0), - GATE_AUD6(CLK_AUD_GASRC1, "aud_gasrc1", "top_asm_h", 1), - GATE_AUD6(CLK_AUD_GASRC2, "aud_gasrc2", "top_asm_h", 2), - GATE_AUD6(CLK_AUD_GASRC3, "aud_gasrc3", "top_asm_h", 3), - GATE_AUD6(CLK_AUD_GASRC4, "aud_gasrc4", "top_asm_h", 4), - GATE_AUD6(CLK_AUD_GASRC5, "aud_gasrc5", "top_asm_h", 5), - GATE_AUD6(CLK_AUD_GASRC6, "aud_gasrc6", "top_asm_h", 6), - GATE_AUD6(CLK_AUD_GASRC7, "aud_gasrc7", "top_asm_h", 7), - GATE_AUD6(CLK_AUD_GASRC8, "aud_gasrc8", "top_asm_h", 8), - GATE_AUD6(CLK_AUD_GASRC9, "aud_gasrc9", "top_asm_h", 9), - GATE_AUD6(CLK_AUD_GASRC10, "aud_gasrc10", "top_asm_h", 10), - GATE_AUD6(CLK_AUD_GASRC11, "aud_gasrc11", "top_asm_h", 11), - GATE_AUD6(CLK_AUD_GASRC12, "aud_gasrc12", "top_asm_h", 12), - GATE_AUD6(CLK_AUD_GASRC13, "aud_gasrc13", "top_asm_h", 13), - GATE_AUD6(CLK_AUD_GASRC14, "aud_gasrc14", "top_asm_h", 14), - GATE_AUD6(CLK_AUD_GASRC15, "aud_gasrc15", "top_asm_h", 15), - GATE_AUD6(CLK_AUD_GASRC16, "aud_gasrc16", "top_asm_h", 16), - GATE_AUD6(CLK_AUD_GASRC17, "aud_gasrc17", "top_asm_h", 17), - GATE_AUD6(CLK_AUD_GASRC18, "aud_gasrc18", "top_asm_h", 18), - GATE_AUD6(CLK_AUD_GASRC19, "aud_gasrc19", "top_asm_h", 19), + GATE_AUD6(CLK_AUD_GASRC0, "aud_gasrc0", "asm_h_sel", 0), + GATE_AUD6(CLK_AUD_GASRC1, "aud_gasrc1", "asm_h_sel", 1), + GATE_AUD6(CLK_AUD_GASRC2, "aud_gasrc2", "asm_h_sel", 2), + GATE_AUD6(CLK_AUD_GASRC3, "aud_gasrc3", "asm_h_sel", 3), + GATE_AUD6(CLK_AUD_GASRC4, "aud_gasrc4", "asm_h_sel", 4), + GATE_AUD6(CLK_AUD_GASRC5, "aud_gasrc5", "asm_h_sel", 5), + GATE_AUD6(CLK_AUD_GASRC6, "aud_gasrc6", "asm_h_sel", 6), + GATE_AUD6(CLK_AUD_GASRC7, "aud_gasrc7", "asm_h_sel", 7), + GATE_AUD6(CLK_AUD_GASRC8, "aud_gasrc8", "asm_h_sel", 8), + GATE_AUD6(CLK_AUD_GASRC9, "aud_gasrc9", "asm_h_sel", 9), + GATE_AUD6(CLK_AUD_GASRC10, "aud_gasrc10", "asm_h_sel", 10), + GATE_AUD6(CLK_AUD_GASRC11, "aud_gasrc11", "asm_h_sel", 11), + GATE_AUD6(CLK_AUD_GASRC12, "aud_gasrc12", "asm_h_sel", 12), + GATE_AUD6(CLK_AUD_GASRC13, "aud_gasrc13", "asm_h_sel", 13), + GATE_AUD6(CLK_AUD_GASRC14, "aud_gasrc14", "asm_h_sel", 14), + GATE_AUD6(CLK_AUD_GASRC15, "aud_gasrc15", "asm_h_sel", 15), + GATE_AUD6(CLK_AUD_GASRC16, "aud_gasrc16", "asm_h_sel", 16), + GATE_AUD6(CLK_AUD_GASRC17, "aud_gasrc17", "asm_h_sel", 17), + GATE_AUD6(CLK_AUD_GASRC18, "aud_gasrc18", "asm_h_sel", 18), + GATE_AUD6(CLK_AUD_GASRC19, "aud_gasrc19", "asm_h_sel", 19), }; int mt8195_audsys_clk_register(struct mtk_base_afe *afe) diff --git a/sound/soc/mediatek/mt8195/mt8195-dai-adda.c b/sound/soc/mediatek/mt8195/mt8195-dai-adda.c index f04bd17813..878dec0b69 100644 --- a/sound/soc/mediatek/mt8195/mt8195-dai-adda.c +++ b/sound/soc/mediatek/mt8195/mt8195-dai-adda.c @@ -788,11 +788,9 @@ static int init_adda_priv_data(struct mtk_base_afe *afe) { struct mt8195_afe_private *afe_priv = afe->platform_priv; struct mtk_dai_adda_priv *adda_priv; - static const int adda_dai_list[] = { - MT8195_AFE_IO_DL_SRC, - MT8195_AFE_IO_UL_SRC1, - MT8195_AFE_IO_UL_SRC2 - }; + int adda_dai_list[] = { MT8195_AFE_IO_DL_SRC, + MT8195_AFE_IO_UL_SRC1, + MT8195_AFE_IO_UL_SRC2}; int i; for (i = 0; i < ARRAY_SIZE(adda_dai_list); i++) { diff --git a/sound/soc/mediatek/mt8195/mt8195-dai-etdm.c b/sound/soc/mediatek/mt8195/mt8195-dai-etdm.c index c02c10da36..7378e42f27 100644 --- a/sound/soc/mediatek/mt8195/mt8195-dai-etdm.c +++ b/sound/soc/mediatek/mt8195/mt8195-dai-etdm.c @@ -1316,7 +1316,7 @@ static int mt8195_afe_disable_etdm(struct mtk_base_afe *afe, int dai_id) } out: spin_unlock_irqrestore(&afe_priv->afe_ctrl_lock, flags); - return ret; + return 0; } static int etdm_cowork_slv_sel(int id, int slave_mode) @@ -2094,7 +2094,7 @@ static int mtk_dai_etdm_set_sysclk(struct snd_soc_dai *dai, { struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai); struct mt8195_afe_private *afe_priv = afe->platform_priv; - struct mtk_dai_etdm_priv *etdm_data; + struct mtk_dai_etdm_priv *etdm_data = afe_priv->dai_priv[dai->id]; int dai_id; dev_dbg(dai->dev, "%s id %d freq %u, dir %d\n", diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c b/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c index 29c2d3407c..a3fa8efc8f 100644 --- a/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c +++ b/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c @@ -1,12 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -/* - * mt8195-mt6359-rt1019-rt5682.c -- - * MT8195-MT6359-RT1019-RT5682 ALSA SoC machine driver - * - * Copyright (c) 2021 MediaTek Inc. - * Author: Trevor Wu - * YC Hung - */ +// +// mt8195-mt6359-rt1019-rt5682.c -- +// MT8195-MT6359-RT1019-RT6358 ALSA SoC machine driver +// +// Copyright (c) 2021 MediaTek Inc. +// Author: Trevor Wu +// #include #include @@ -14,12 +13,10 @@ #include #include #include -#include #include #include "../../codecs/mt6359.h" #include "../../codecs/rt5682.h" #include "../common/mtk-afe-platform-driver.h" -#include "mt8195-afe-clk.h" #include "mt8195-afe-common.h" #define RT1019_CODEC_DAI "HiFi" @@ -28,26 +25,10 @@ #define RT5682_CODEC_DAI "rt5682-aif1" #define RT5682_DEV0_NAME "rt5682.2-001a" -#define RT5682S_CODEC_DAI "rt5682s-aif1" -#define RT5682S_DEV0_NAME "rt5682s.2-001a" - -#define SOF_DMA_DL2 "SOF_DMA_DL2" -#define SOF_DMA_DL3 "SOF_DMA_DL3" -#define SOF_DMA_UL4 "SOF_DMA_UL4" -#define SOF_DMA_UL5 "SOF_DMA_UL5" - -struct sof_conn_stream { - const char *normal_link; - const char *sof_link; - const char *sof_dma; - int stream_dir; -}; - struct mt8195_mt6359_rt1019_rt5682_priv { struct snd_soc_jack headset_jack; struct snd_soc_jack dp_jack; struct snd_soc_jack hdmi_jack; - struct clk *i2so1_mclk; }; static const struct snd_soc_dapm_widget @@ -55,10 +36,6 @@ static const struct snd_soc_dapm_widget SND_SOC_DAPM_SPK("Speakers", NULL), SND_SOC_DAPM_HP("Headphone Jack", NULL), SND_SOC_DAPM_MIC("Headset Mic", NULL), - SND_SOC_DAPM_MIXER(SOF_DMA_DL2, SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_MIXER(SOF_DMA_DL3, SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_MIXER(SOF_DMA_UL4, SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_MIXER(SOF_DMA_UL5, SND_SOC_NOPM, 0, 0, NULL, 0), }; static const struct snd_soc_dapm_route mt8195_mt6359_rt1019_rt5682_routes[] = { @@ -68,16 +45,6 @@ static const struct snd_soc_dapm_route mt8195_mt6359_rt1019_rt5682_routes[] = { { "Headphone Jack", NULL, "HPOL" }, { "Headphone Jack", NULL, "HPOR" }, { "IN1P", NULL, "Headset Mic" }, - /* SOF Uplink */ - {SOF_DMA_UL4, NULL, "O034"}, - {SOF_DMA_UL4, NULL, "O035"}, - {SOF_DMA_UL5, NULL, "O036"}, - {SOF_DMA_UL5, NULL, "O037"}, - /* SOF Downlink */ - {"I070", NULL, SOF_DMA_DL2}, - {"I071", NULL, SOF_DMA_DL2}, - {"I020", NULL, SOF_DMA_DL3}, - {"I021", NULL, SOF_DMA_DL3}, }; static const struct snd_kcontrol_new mt8195_mt6359_rt1019_rt5682_controls[] = { @@ -94,6 +61,8 @@ static int mt8195_rt5682_etdm_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); struct snd_soc_dai *codec_dai = asoc_rtd_to_codec(rtd, 0); unsigned int rate = params_rate(params); + unsigned int mclk_fs_ratio = 128; + unsigned int mclk_fs = rate * mclk_fs_ratio; int bitwidth; int ret; @@ -109,22 +78,25 @@ static int mt8195_rt5682_etdm_hw_params(struct snd_pcm_substream *substream, return ret; } - ret = snd_soc_dai_set_pll(codec_dai, RT5682_PLL1, RT5682_PLL1_S_MCLK, - rate * 256, rate * 512); + ret = snd_soc_dai_set_pll(codec_dai, RT5682_PLL1, + RT5682_PLL1_S_BCLK1, + params_rate(params) * 64, + params_rate(params) * 512); if (ret) { dev_err(card->dev, "failed to set pll\n"); return ret; } - ret = snd_soc_dai_set_sysclk(codec_dai, RT5682_SCLK_S_PLL1, - rate * 512, SND_SOC_CLOCK_IN); + ret = snd_soc_dai_set_sysclk(codec_dai, + RT5682_SCLK_S_PLL1, + params_rate(params) * 512, + SND_SOC_CLOCK_IN); if (ret) { dev_err(card->dev, "failed to set sysclk\n"); return ret; } - return snd_soc_dai_set_sysclk(cpu_dai, 0, rate * 256, - SND_SOC_CLOCK_OUT); + return snd_soc_dai_set_sysclk(cpu_dai, 0, mclk_fs, SND_SOC_CLOCK_OUT); } static const struct snd_soc_ops mt8195_rt5682_etdm_ops = { @@ -319,14 +291,8 @@ static int mt8195_rt5682_init(struct snd_soc_pcm_runtime *rtd) struct mt8195_mt6359_rt1019_rt5682_priv *priv = snd_soc_card_get_drvdata(rtd->card); struct snd_soc_jack *jack = &priv->headset_jack; - struct snd_soc_component *cmpnt_afe = - snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME); - struct mtk_base_afe *afe = snd_soc_component_get_drvdata(cmpnt_afe); - struct mt8195_afe_private *afe_priv = afe->platform_priv; int ret; - priv->i2so1_mclk = afe_priv->clk[MT8195_CLK_TOP_APLL12_DIV2]; - ret = snd_soc_card_jack_new(rtd->card, "Headset Jack", SND_JACK_HEADSET | SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | @@ -354,7 +320,7 @@ static int mt8195_rt5682_init(struct snd_soc_pcm_runtime *rtd) static int mt8195_etdm_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params) { - /* fix BE i2s format to S24_LE, clean param mask first */ + /* fix BE i2s format to 32bit, clean param mask first */ snd_mask_reset_range(hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT), 0, (__force unsigned int)SNDRV_PCM_FORMAT_LAST); @@ -422,7 +388,7 @@ static int mt8195_dptx_hw_params(struct snd_pcm_substream *substream, SND_SOC_CLOCK_OUT); } -static const struct snd_soc_ops mt8195_dptx_ops = { +static struct snd_soc_ops mt8195_dptx_ops = { .hw_params = mt8195_dptx_hw_params, }; @@ -462,7 +428,7 @@ static int mt8195_dptx_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params) { - /* fix BE i2s format to S24_LE, clean param mask first */ + /* fix BE i2s format to 32bit, clean param mask first */ snd_mask_reset_range(hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT), 0, (__force unsigned int)SNDRV_PCM_FORMAT_LAST); @@ -563,48 +529,6 @@ static const struct snd_soc_ops mt8195_capture_ops = { .startup = mt8195_capture_startup, }; -static int mt8195_set_bias_level_post(struct snd_soc_card *card, - struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level) -{ - struct snd_soc_component *component = dapm->component; - struct mt8195_mt6359_rt1019_rt5682_priv *priv = - snd_soc_card_get_drvdata(card); - int ret; - - /* - * It's required to control mclk directly in the set_bias_level_post - * function for rt5682 and rt5682s codec, or the unexpected pop happens - * at the end of playback. - */ - if (!component || - (strcmp(component->name, RT5682_DEV0_NAME) && - strcmp(component->name, RT5682S_DEV0_NAME))) - return 0; - - - switch (level) { - case SND_SOC_BIAS_OFF: - if (!__clk_is_enabled(priv->i2so1_mclk)) - return 0; - - clk_disable_unprepare(priv->i2so1_mclk); - dev_dbg(card->dev, "Disable i2so1 mclk\n"); - break; - case SND_SOC_BIAS_ON: - ret = clk_prepare_enable(priv->i2so1_mclk); - if (ret) { - dev_err(card->dev, "Can't enable i2so1 mclk: %d\n", ret); - return ret; - } - dev_dbg(card->dev, "Enable i2so1 mclk\n"); - break; - default: - break; - } - - return 0; -} - enum { DAI_LINK_DL2_FE, DAI_LINK_DL3_FE, @@ -632,17 +556,8 @@ enum { DAI_LINK_PCM1_BE, DAI_LINK_UL_SRC1_BE, DAI_LINK_UL_SRC2_BE, - DAI_LINK_REGULAR_LAST = DAI_LINK_UL_SRC2_BE, - DAI_LINK_SOF_START, - DAI_LINK_SOF_DL2_BE = DAI_LINK_SOF_START, - DAI_LINK_SOF_DL3_BE, - DAI_LINK_SOF_UL4_BE, - DAI_LINK_SOF_UL5_BE, - DAI_LINK_SOF_END = DAI_LINK_SOF_UL5_BE, }; -#define DAI_LINK_REGULAR_NUM (DAI_LINK_REGULAR_LAST + 1) - /* FE */ SND_SOC_DAILINK_DEFS(DL2_FE, DAILINK_COMP_ARRAY(COMP_CPU("DL2")), @@ -743,12 +658,14 @@ SND_SOC_DAILINK_DEFS(ETDM1_IN_BE, SND_SOC_DAILINK_DEFS(ETDM2_IN_BE, DAILINK_COMP_ARRAY(COMP_CPU("ETDM2_IN")), - DAILINK_COMP_ARRAY(COMP_DUMMY()), + DAILINK_COMP_ARRAY(COMP_CODEC(RT5682_DEV0_NAME, + RT5682_CODEC_DAI)), DAILINK_COMP_ARRAY(COMP_EMPTY())); SND_SOC_DAILINK_DEFS(ETDM1_OUT_BE, DAILINK_COMP_ARRAY(COMP_CPU("ETDM1_OUT")), - DAILINK_COMP_ARRAY(COMP_DUMMY()), + DAILINK_COMP_ARRAY(COMP_CODEC(RT5682_DEV0_NAME, + RT5682_CODEC_DAI)), DAILINK_COMP_ARRAY(COMP_EMPTY())); SND_SOC_DAILINK_DEFS(ETDM2_OUT_BE, @@ -781,154 +698,6 @@ SND_SOC_DAILINK_DEFS(UL_SRC2_BE, "mt6359-snd-codec-aif2")), DAILINK_COMP_ARRAY(COMP_EMPTY())); -SND_SOC_DAILINK_DEFS(AFE_SOF_DL2, - DAILINK_COMP_ARRAY(COMP_CPU("SOF_DL2")), - DAILINK_COMP_ARRAY(COMP_DUMMY()), - DAILINK_COMP_ARRAY(COMP_EMPTY())); - -SND_SOC_DAILINK_DEFS(AFE_SOF_DL3, - DAILINK_COMP_ARRAY(COMP_CPU("SOF_DL3")), - DAILINK_COMP_ARRAY(COMP_DUMMY()), - DAILINK_COMP_ARRAY(COMP_EMPTY())); - -SND_SOC_DAILINK_DEFS(AFE_SOF_UL4, - DAILINK_COMP_ARRAY(COMP_CPU("SOF_UL4")), - DAILINK_COMP_ARRAY(COMP_DUMMY()), - DAILINK_COMP_ARRAY(COMP_EMPTY())); - -SND_SOC_DAILINK_DEFS(AFE_SOF_UL5, - DAILINK_COMP_ARRAY(COMP_CPU("SOF_UL5")), - DAILINK_COMP_ARRAY(COMP_DUMMY()), - DAILINK_COMP_ARRAY(COMP_EMPTY())); - -static const struct sof_conn_stream g_sof_conn_streams[] = { - { "ETDM2_OUT_BE", "AFE_SOF_DL2", SOF_DMA_DL2, SNDRV_PCM_STREAM_PLAYBACK}, - { "ETDM1_OUT_BE", "AFE_SOF_DL3", SOF_DMA_DL3, SNDRV_PCM_STREAM_PLAYBACK}, - { "UL_SRC1_BE", "AFE_SOF_UL4", SOF_DMA_UL4, SNDRV_PCM_STREAM_CAPTURE}, - { "ETDM2_IN_BE", "AFE_SOF_UL5", SOF_DMA_UL5, SNDRV_PCM_STREAM_CAPTURE}, -}; - -/* fixup the BE DAI link to match any values from topology */ -static int mt8195_dai_link_fixup(struct snd_soc_pcm_runtime *rtd, - struct snd_pcm_hw_params *params) -{ - struct snd_soc_card *card = rtd->card; - struct snd_soc_dai_link *sof_dai_link = NULL; - struct snd_soc_pcm_runtime *runtime; - struct snd_soc_dai *cpu_dai; - int i, j, ret = 0; - - for (i = 0; i < ARRAY_SIZE(g_sof_conn_streams); i++) { - const struct sof_conn_stream *conn = &g_sof_conn_streams[i]; - - if (strcmp(rtd->dai_link->name, conn->normal_link)) - continue; - - for_each_card_rtds(card, runtime) { - if (strcmp(runtime->dai_link->name, conn->sof_link)) - continue; - - for_each_rtd_cpu_dais(runtime, j, cpu_dai) { - if (cpu_dai->stream_active[conn->stream_dir] > 0) { - sof_dai_link = runtime->dai_link; - break; - } - } - break; - } - - if (sof_dai_link && sof_dai_link->be_hw_params_fixup) - ret = sof_dai_link->be_hw_params_fixup(runtime, params); - - break; - } - - if (!strcmp(rtd->dai_link->name, "ETDM2_IN_BE") || - !strcmp(rtd->dai_link->name, "ETDM1_OUT_BE")) { - mt8195_etdm_hw_params_fixup(runtime, params); - } - - return ret; -} - -static int mt8195_mt6359_rt1019_rt5682_card_late_probe(struct snd_soc_card *card) -{ - struct snd_soc_pcm_runtime *runtime; - struct snd_soc_component *sof_comp = NULL; - int i; - - /* 1. find sof component */ - for_each_card_rtds(card, runtime) { - for (i = 0; i < runtime->num_components; i++) { - if (!runtime->components[i]->driver->name) - continue; - if (!strcmp(runtime->components[i]->driver->name, "sof-audio-component")) { - sof_comp = runtime->components[i]; - break; - } - } - } - - if (!sof_comp) { - dev_info(card->dev, " probe without component\n"); - return 0; - } - /* 2. add route path and fixup callback */ - for (i = 0; i < ARRAY_SIZE(g_sof_conn_streams); i++) { - const struct sof_conn_stream *conn = &g_sof_conn_streams[i]; - struct snd_soc_pcm_runtime *sof_rtd = NULL; - struct snd_soc_pcm_runtime *normal_rtd = NULL; - struct snd_soc_pcm_runtime *rtd = NULL; - - for_each_card_rtds(card, rtd) { - if (!strcmp(rtd->dai_link->name, conn->sof_link)) { - sof_rtd = rtd; - continue; - } - if (!strcmp(rtd->dai_link->name, conn->normal_link)) { - normal_rtd = rtd; - continue; - } - if (normal_rtd && sof_rtd) - break; - } - if (normal_rtd && sof_rtd) { - int j; - struct snd_soc_dai *cpu_dai; - - for_each_rtd_cpu_dais(sof_rtd, j, cpu_dai) { - struct snd_soc_dapm_route route; - struct snd_soc_dapm_path *p = NULL; - struct snd_soc_dapm_widget *play_widget = - cpu_dai->playback_widget; - struct snd_soc_dapm_widget *cap_widget = - cpu_dai->capture_widget; - memset(&route, 0, sizeof(route)); - if (conn->stream_dir == SNDRV_PCM_STREAM_CAPTURE && - cap_widget) { - snd_soc_dapm_widget_for_each_sink_path(cap_widget, p) { - route.source = conn->sof_dma; - route.sink = p->sink->name; - snd_soc_dapm_add_routes(&card->dapm, &route, 1); - } - } else if (conn->stream_dir == SNDRV_PCM_STREAM_PLAYBACK && - play_widget){ - snd_soc_dapm_widget_for_each_source_path(play_widget, p) { - route.source = p->source->name; - route.sink = conn->sof_dma; - snd_soc_dapm_add_routes(&card->dapm, &route, 1); - } - } else { - dev_err(cpu_dai->dev, "stream dir and widget not pair\n"); - } - } - normal_rtd->dai_link->be_hw_params_fixup = mt8195_dai_link_fixup; - } - } - - return 0; -} - static struct snd_soc_dai_link mt8195_mt6359_rt1019_rt5682_dai_links[] = { /* FE */ [DAI_LINK_DL2_FE] = { @@ -1123,6 +892,7 @@ static struct snd_soc_dai_link mt8195_mt6359_rt1019_rt5682_dai_links[] = { /* BE */ [DAI_LINK_DL_SRC_BE] = { .name = "DL_SRC_BE", + .init = mt8195_mt6359_init, .no_pcm = 1, .dpcm_playback = 1, SND_SOC_DAILINK_REG(DL_SRC_BE), @@ -1191,7 +961,6 @@ static struct snd_soc_dai_link mt8195_mt6359_rt1019_rt5682_dai_links[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS, - .dpcm_playback = 1, .dpcm_capture = 1, SND_SOC_DAILINK_REG(PCM1_BE), }, @@ -1207,31 +976,6 @@ static struct snd_soc_dai_link mt8195_mt6359_rt1019_rt5682_dai_links[] = { .dpcm_capture = 1, SND_SOC_DAILINK_REG(UL_SRC2_BE), }, - /* SOF BE */ - [DAI_LINK_SOF_DL2_BE] = { - .name = "AFE_SOF_DL2", - .no_pcm = 1, - .dpcm_playback = 1, - SND_SOC_DAILINK_REG(AFE_SOF_DL2), - }, - [DAI_LINK_SOF_DL3_BE] = { - .name = "AFE_SOF_DL3", - .no_pcm = 1, - .dpcm_playback = 1, - SND_SOC_DAILINK_REG(AFE_SOF_DL3), - }, - [DAI_LINK_SOF_UL4_BE] = { - .name = "AFE_SOF_UL4", - .no_pcm = 1, - .dpcm_capture = 1, - SND_SOC_DAILINK_REG(AFE_SOF_UL4), - }, - [DAI_LINK_SOF_UL5_BE] = { - .name = "AFE_SOF_UL5", - .no_pcm = 1, - .dpcm_capture = 1, - SND_SOC_DAILINK_REG(AFE_SOF_UL5), - }, }; static struct snd_soc_card mt8195_mt6359_rt1019_rt5682_soc_card = { @@ -1245,83 +989,18 @@ static struct snd_soc_card mt8195_mt6359_rt1019_rt5682_soc_card = { .num_dapm_widgets = ARRAY_SIZE(mt8195_mt6359_rt1019_rt5682_widgets), .dapm_routes = mt8195_mt6359_rt1019_rt5682_routes, .num_dapm_routes = ARRAY_SIZE(mt8195_mt6359_rt1019_rt5682_routes), - .set_bias_level_post = mt8195_set_bias_level_post, }; -static int mt8195_dailink_parse_of(struct snd_soc_card *card, struct device_node *np, - const char *propname) -{ - struct device *dev = card->dev; - struct snd_soc_dai_link *link; - const char *dai_name = NULL; - int i, j, ret, num_links; - - num_links = of_property_count_strings(np, "mediatek,dai-link"); - - if (num_links < 0 || num_links > ARRAY_SIZE(mt8195_mt6359_rt1019_rt5682_dai_links)) { - dev_dbg(dev, "number of dai-link is invalid\n"); - return -EINVAL; - } - - card->dai_link = devm_kcalloc(dev, num_links, sizeof(*link), GFP_KERNEL); - if (!card->dai_link) - return -ENOMEM; - - card->num_links = 0; - link = card->dai_link; - - for (i = 0; i < num_links; i++) { - ret = of_property_read_string_index(np, propname, i, &dai_name); - if (ret) { - dev_dbg(dev, "ASoC: Property '%s' index %d could not be read: %d\n", - propname, i, ret); - return -EINVAL; - } - - for (j = 0; j < ARRAY_SIZE(mt8195_mt6359_rt1019_rt5682_dai_links); j++) { - if (!strcmp(dai_name, mt8195_mt6359_rt1019_rt5682_dai_links[j].name)) { - memcpy(link, &mt8195_mt6359_rt1019_rt5682_dai_links[j], - sizeof(struct snd_soc_dai_link)); - link++; - card->num_links++; - break; - } - } - } - - if (card->num_links != num_links) - return -EINVAL; - - return 0; -} - static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev) { struct snd_soc_card *card = &mt8195_mt6359_rt1019_rt5682_soc_card; + struct device_node *platform_node; struct snd_soc_dai_link *dai_link; - struct mt8195_mt6359_rt1019_rt5682_priv *priv; - struct device_node *platform_node, *adsp_node, *dp_node, *hdmi_node; - int is5682s = 0; - int init6359 = 0; - int sof_on = 0; + struct mt8195_mt6359_rt1019_rt5682_priv *priv = NULL; int ret, i; card->dev = &pdev->dev; - ret = snd_soc_of_parse_card_name(card, "model"); - if (ret) { - dev_err(&pdev->dev, "%s new card name parsing error %d\n", - __func__, ret); - return ret; - } - - if (strstr(card->name, "_5682s")) - is5682s = 1; - - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - platform_node = of_parse_phandle(pdev->dev.of_node, "mediatek,platform", 0); if (!platform_node) { @@ -1329,79 +1008,51 @@ static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev) return -EINVAL; } - adsp_node = of_parse_phandle(pdev->dev.of_node, "mediatek,adsp", 0); - if (adsp_node) - sof_on = 1; - - dp_node = of_parse_phandle(pdev->dev.of_node, "mediatek,dptx-codec", 0); - hdmi_node = of_parse_phandle(pdev->dev.of_node, - "mediatek,hdmi-codec", 0); - - if (of_property_read_bool(pdev->dev.of_node, "mediatek,dai-link")) { - ret = mt8195_dailink_parse_of(card, pdev->dev.of_node, - "mediatek,dai-link"); - if (ret) { - dev_dbg(&pdev->dev, "Parse dai-link fail\n"); - return -EINVAL; - } - } else { - if (!sof_on) - card->num_links = DAI_LINK_REGULAR_NUM; - } - for_each_card_prelinks(card, i, dai_link) { - if (!dai_link->platforms->name) { - if (!strncmp(dai_link->name, "AFE_SOF", strlen("AFE_SOF")) && sof_on) - dai_link->platforms->of_node = adsp_node; - else - dai_link->platforms->of_node = platform_node; - } + if (!dai_link->platforms->name) + dai_link->platforms->of_node = platform_node; if (strcmp(dai_link->name, "DPTX_BE") == 0) { - if (!dp_node) { + dai_link->codecs->of_node = + of_parse_phandle(pdev->dev.of_node, + "mediatek,dptx-codec", 0); + if (!dai_link->codecs->of_node) { dev_dbg(&pdev->dev, "No property 'dptx-codec'\n"); } else { - dai_link->codecs->of_node = dp_node; dai_link->codecs->name = NULL; dai_link->codecs->dai_name = "i2s-hifi"; dai_link->init = mt8195_dptx_codec_init; } - } else if (strcmp(dai_link->name, "ETDM3_OUT_BE") == 0) { - if (!hdmi_node) { + } + + if (strcmp(dai_link->name, "ETDM3_OUT_BE") == 0) { + dai_link->codecs->of_node = + of_parse_phandle(pdev->dev.of_node, + "mediatek,hdmi-codec", 0); + if (!dai_link->codecs->of_node) { dev_dbg(&pdev->dev, "No property 'hdmi-codec'\n"); } else { - dai_link->codecs->of_node = hdmi_node; dai_link->codecs->name = NULL; dai_link->codecs->dai_name = "i2s-hifi"; dai_link->init = mt8195_hdmi_codec_init; } - } else if (strcmp(dai_link->name, "ETDM1_OUT_BE") == 0 || - strcmp(dai_link->name, "ETDM2_IN_BE") == 0) { - dai_link->codecs->name = - is5682s ? RT5682S_DEV0_NAME : RT5682_DEV0_NAME; - dai_link->codecs->dai_name = - is5682s ? RT5682S_CODEC_DAI : RT5682_CODEC_DAI; - } else if (strcmp(dai_link->name, "DL_SRC_BE") == 0 || - strcmp(dai_link->name, "UL_SRC1_BE") == 0 || - strcmp(dai_link->name, "UL_SRC2_BE") == 0) { - if (!init6359) { - dai_link->init = mt8195_mt6359_init; - init6359 = 1; - } } } - if (sof_on) - card->late_probe = mt8195_mt6359_rt1019_rt5682_card_late_probe; + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) { + of_node_put(platform_node); + return -ENOMEM; + } snd_soc_card_set_drvdata(card, priv); ret = devm_snd_soc_register_card(&pdev->dev, card); + if (ret) + dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", + __func__, ret); of_node_put(platform_node); - of_node_put(adsp_node); - of_node_put(dp_node); - of_node_put(hdmi_node); return ret; } @@ -1433,6 +1084,5 @@ module_platform_driver(mt8195_mt6359_rt1019_rt5682_driver); /* Module information */ MODULE_DESCRIPTION("MT8195-MT6359-RT1019-RT5682 ALSA SoC machine driver"); MODULE_AUTHOR("Trevor Wu "); -MODULE_AUTHOR("YC Hung "); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_ALIAS("mt8195_mt6359_rt1019_rt5682 soc card"); diff --git a/sound/soc/meson/aiu-encoder-spdif.c b/sound/soc/meson/aiu-encoder-spdif.c index 97da60db2c..de85091397 100644 --- a/sound/soc/meson/aiu-encoder-spdif.c +++ b/sound/soc/meson/aiu-encoder-spdif.c @@ -113,7 +113,7 @@ static int aiu_encoder_spdif_hw_params(struct snd_pcm_substream *substream, val |= AIU_958_MISC_MODE_32BITS; break; default: - dev_err(dai->dev, "Unsupported physical width\n"); + dev_err(dai->dev, "Unsupport physical width\n"); return -EINVAL; } diff --git a/sound/soc/meson/aiu.c b/sound/soc/meson/aiu.c index d299a70db7..ba15d5762b 100644 --- a/sound/soc/meson/aiu.c +++ b/sound/soc/meson/aiu.c @@ -218,23 +218,34 @@ static int aiu_clk_get(struct device *dev) int ret; aiu->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(aiu->pclk)) - return dev_err_probe(dev, PTR_ERR(aiu->pclk), "Can't get the aiu pclk\n"); + if (IS_ERR(aiu->pclk)) { + if (PTR_ERR(aiu->pclk) != -EPROBE_DEFER) + dev_err(dev, "Can't get the aiu pclk\n"); + return PTR_ERR(aiu->pclk); + } aiu->spdif_mclk = devm_clk_get(dev, "spdif_mclk"); - if (IS_ERR(aiu->spdif_mclk)) - return dev_err_probe(dev, PTR_ERR(aiu->spdif_mclk), - "Can't get the aiu spdif master clock\n"); + if (IS_ERR(aiu->spdif_mclk)) { + if (PTR_ERR(aiu->spdif_mclk) != -EPROBE_DEFER) + dev_err(dev, "Can't get the aiu spdif master clock\n"); + return PTR_ERR(aiu->spdif_mclk); + } ret = aiu_clk_bulk_get(dev, aiu_i2s_ids, ARRAY_SIZE(aiu_i2s_ids), &aiu->i2s); - if (ret) - return dev_err_probe(dev, ret, "Can't get the i2s clocks\n"); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(dev, "Can't get the i2s clocks\n"); + return ret; + } ret = aiu_clk_bulk_get(dev, aiu_spdif_ids, ARRAY_SIZE(aiu_spdif_ids), &aiu->spdif); - if (ret) - return dev_err_probe(dev, ret, "Can't get the spdif clocks\n"); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(dev, "Can't get the spdif clocks\n"); + return ret; + } ret = clk_prepare_enable(aiu->pclk); if (ret) { @@ -270,8 +281,11 @@ static int aiu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, aiu); ret = device_reset(dev); - if (ret) - return dev_err_probe(dev, ret, "Failed to reset device\n"); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to reset device\n"); + return ret; + } regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(regs)) diff --git a/sound/soc/meson/axg-card.c b/sound/soc/meson/axg-card.c index cbbaa55d92..2b77010c2c 100644 --- a/sound/soc/meson/axg-card.c +++ b/sound/soc/meson/axg-card.c @@ -320,7 +320,6 @@ static int axg_card_add_link(struct snd_soc_card *card, struct device_node *np, dai_link->cpus = cpu; dai_link->num_cpus = 1; - dai_link->nonatomic = true; ret = meson_card_parse_dai(card, np, &dai_link->cpus->of_node, &dai_link->cpus->dai_name); diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index bccfb770b3..b9af2d513e 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -351,12 +351,20 @@ int axg_fifo_probe(struct platform_device *pdev) } fifo->pclk = devm_clk_get(dev, NULL); - if (IS_ERR(fifo->pclk)) - return dev_err_probe(dev, PTR_ERR(fifo->pclk), "failed to get pclk\n"); + if (IS_ERR(fifo->pclk)) { + if (PTR_ERR(fifo->pclk) != -EPROBE_DEFER) + dev_err(dev, "failed to get pclk: %ld\n", + PTR_ERR(fifo->pclk)); + return PTR_ERR(fifo->pclk); + } fifo->arb = devm_reset_control_get_exclusive(dev, NULL); - if (IS_ERR(fifo->arb)) - return dev_err_probe(dev, PTR_ERR(fifo->arb), "failed to get arb reset\n"); + if (IS_ERR(fifo->arb)) { + if (PTR_ERR(fifo->arb) != -EPROBE_DEFER) + dev_err(dev, "failed to get arb reset: %ld\n", + PTR_ERR(fifo->arb)); + return PTR_ERR(fifo->arb); + } fifo->irq = of_irq_get(dev->of_node, 0); if (fifo->irq <= 0) { diff --git a/sound/soc/meson/axg-pdm.c b/sound/soc/meson/axg-pdm.c index 672e43a972..bfd37d49a7 100644 --- a/sound/soc/meson/axg-pdm.c +++ b/sound/soc/meson/axg-pdm.c @@ -586,6 +586,7 @@ static int axg_pdm_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct axg_pdm *priv; void __iomem *regs; + int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -610,16 +611,28 @@ static int axg_pdm_probe(struct platform_device *pdev) } priv->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(priv->pclk)) - return dev_err_probe(dev, PTR_ERR(priv->pclk), "failed to get pclk\n"); + if (IS_ERR(priv->pclk)) { + ret = PTR_ERR(priv->pclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get pclk: %d\n", ret); + return ret; + } priv->dclk = devm_clk_get(dev, "dclk"); - if (IS_ERR(priv->dclk)) - return dev_err_probe(dev, PTR_ERR(priv->dclk), "failed to get dclk\n"); + if (IS_ERR(priv->dclk)) { + ret = PTR_ERR(priv->dclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get dclk: %d\n", ret); + return ret; + } priv->sysclk = devm_clk_get(dev, "sysclk"); - if (IS_ERR(priv->sysclk)) - return dev_err_probe(dev, PTR_ERR(priv->sysclk), "failed to get dclk\n"); + if (IS_ERR(priv->sysclk)) { + ret = PTR_ERR(priv->sysclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get dclk: %d\n", ret); + return ret; + } return devm_snd_soc_register_component(dev, &axg_pdm_component_drv, &axg_pdm_dai_drv, 1); diff --git a/sound/soc/meson/axg-spdifin.c b/sound/soc/meson/axg-spdifin.c index 4ba44e0d65..d0d09f945b 100644 --- a/sound/soc/meson/axg-spdifin.c +++ b/sound/soc/meson/axg-spdifin.c @@ -454,6 +454,7 @@ static int axg_spdifin_probe(struct platform_device *pdev) struct axg_spdifin *priv; struct snd_soc_dai_driver *dai_drv; void __iomem *regs; + int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -478,12 +479,20 @@ static int axg_spdifin_probe(struct platform_device *pdev) } priv->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(priv->pclk)) - return dev_err_probe(dev, PTR_ERR(priv->pclk), "failed to get pclk\n"); + if (IS_ERR(priv->pclk)) { + ret = PTR_ERR(priv->pclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get pclk: %d\n", ret); + return ret; + } priv->refclk = devm_clk_get(dev, "refclk"); - if (IS_ERR(priv->refclk)) - return dev_err_probe(dev, PTR_ERR(priv->refclk), "failed to get mclk\n"); + if (IS_ERR(priv->refclk)) { + ret = PTR_ERR(priv->refclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get mclk: %d\n", ret); + return ret; + } dai_drv = axg_spdifin_get_dai_drv(dev, priv); if (IS_ERR(dai_drv)) { diff --git a/sound/soc/meson/axg-spdifout.c b/sound/soc/meson/axg-spdifout.c index 3960d082e1..e769a5ee6e 100644 --- a/sound/soc/meson/axg-spdifout.c +++ b/sound/soc/meson/axg-spdifout.c @@ -403,6 +403,7 @@ static int axg_spdifout_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct axg_spdifout *priv; void __iomem *regs; + int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -421,12 +422,20 @@ static int axg_spdifout_probe(struct platform_device *pdev) } priv->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(priv->pclk)) - return dev_err_probe(dev, PTR_ERR(priv->pclk), "failed to get pclk\n"); + if (IS_ERR(priv->pclk)) { + ret = PTR_ERR(priv->pclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get pclk: %d\n", ret); + return ret; + } priv->mclk = devm_clk_get(dev, "mclk"); - if (IS_ERR(priv->mclk)) - return dev_err_probe(dev, PTR_ERR(priv->mclk), "failed to get mclk\n"); + if (IS_ERR(priv->mclk)) { + ret = PTR_ERR(priv->mclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get mclk: %d\n", ret); + return ret; + } return devm_snd_soc_register_component(dev, &axg_spdifout_component_drv, axg_spdifout_dai_drv, ARRAY_SIZE(axg_spdifout_dai_drv)); diff --git a/sound/soc/meson/axg-tdm-formatter.c b/sound/soc/meson/axg-tdm-formatter.c index 9883dc777f..cab7fa2851 100644 --- a/sound/soc/meson/axg-tdm-formatter.c +++ b/sound/soc/meson/axg-tdm-formatter.c @@ -255,6 +255,7 @@ int axg_tdm_formatter_probe(struct platform_device *pdev) const struct axg_tdm_formatter_driver *drv; struct axg_tdm_formatter *formatter; void __iomem *regs; + int ret; drv = of_device_get_match_data(dev); if (!drv) { @@ -281,34 +282,57 @@ int axg_tdm_formatter_probe(struct platform_device *pdev) /* Peripharal clock */ formatter->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(formatter->pclk)) - return dev_err_probe(dev, PTR_ERR(formatter->pclk), "failed to get pclk\n"); + if (IS_ERR(formatter->pclk)) { + ret = PTR_ERR(formatter->pclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get pclk: %d\n", ret); + return ret; + } /* Formatter bit clock */ formatter->sclk = devm_clk_get(dev, "sclk"); - if (IS_ERR(formatter->sclk)) - return dev_err_probe(dev, PTR_ERR(formatter->sclk), "failed to get sclk\n"); + if (IS_ERR(formatter->sclk)) { + ret = PTR_ERR(formatter->sclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get sclk: %d\n", ret); + return ret; + } /* Formatter sample clock */ formatter->lrclk = devm_clk_get(dev, "lrclk"); - if (IS_ERR(formatter->lrclk)) - return dev_err_probe(dev, PTR_ERR(formatter->lrclk), "failed to get lrclk\n"); + if (IS_ERR(formatter->lrclk)) { + ret = PTR_ERR(formatter->lrclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get lrclk: %d\n", ret); + return ret; + } /* Formatter bit clock input multiplexer */ formatter->sclk_sel = devm_clk_get(dev, "sclk_sel"); - if (IS_ERR(formatter->sclk_sel)) - return dev_err_probe(dev, PTR_ERR(formatter->sclk_sel), "failed to get sclk_sel\n"); + if (IS_ERR(formatter->sclk_sel)) { + ret = PTR_ERR(formatter->sclk_sel); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get sclk_sel: %d\n", ret); + return ret; + } /* Formatter sample clock input multiplexer */ formatter->lrclk_sel = devm_clk_get(dev, "lrclk_sel"); - if (IS_ERR(formatter->lrclk_sel)) - return dev_err_probe(dev, PTR_ERR(formatter->lrclk_sel), - "failed to get lrclk_sel\n"); + if (IS_ERR(formatter->lrclk_sel)) { + ret = PTR_ERR(formatter->lrclk_sel); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get lrclk_sel: %d\n", ret); + return ret; + } /* Formatter dedicated reset line */ formatter->reset = devm_reset_control_get_optional_exclusive(dev, NULL); - if (IS_ERR(formatter->reset)) - return dev_err_probe(dev, PTR_ERR(formatter->reset), "failed to get reset\n"); + if (IS_ERR(formatter->reset)) { + ret = PTR_ERR(formatter->reset); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get reset: %d\n", ret); + return ret; + } return devm_snd_soc_register_component(dev, drv->component_drv, NULL, 0); diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index 0c31934a96..87cac440b3 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -351,29 +351,13 @@ static int axg_tdm_iface_hw_free(struct snd_pcm_substream *substream, return 0; } -static int axg_tdm_iface_trigger(struct snd_pcm_substream *substream, - int cmd, +static int axg_tdm_iface_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct axg_tdm_stream *ts = - snd_soc_dai_get_dma_data(dai, substream); + struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - axg_tdm_stream_start(ts); - break; - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - case SNDRV_PCM_TRIGGER_STOP: - axg_tdm_stream_stop(ts); - break; - default: - return -EINVAL; - } - - return 0; + /* Force all attached formatters to update */ + return axg_tdm_stream_reset(ts); } static int axg_tdm_iface_remove_dai(struct snd_soc_dai *dai) @@ -413,8 +397,8 @@ static const struct snd_soc_dai_ops axg_tdm_iface_ops = { .set_fmt = axg_tdm_iface_set_fmt, .startup = axg_tdm_iface_startup, .hw_params = axg_tdm_iface_hw_params, + .prepare = axg_tdm_iface_prepare, .hw_free = axg_tdm_iface_hw_free, - .trigger = axg_tdm_iface_trigger, }; /* TDM Backend DAIs */ @@ -533,13 +517,21 @@ static int axg_tdm_iface_probe(struct platform_device *pdev) /* Bit clock provided on the pad */ iface->sclk = devm_clk_get(dev, "sclk"); - if (IS_ERR(iface->sclk)) - return dev_err_probe(dev, PTR_ERR(iface->sclk), "failed to get sclk\n"); + if (IS_ERR(iface->sclk)) { + ret = PTR_ERR(iface->sclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get sclk: %d\n", ret); + return ret; + } /* Sample clock provided on the pad */ iface->lrclk = devm_clk_get(dev, "lrclk"); - if (IS_ERR(iface->lrclk)) - return dev_err_probe(dev, PTR_ERR(iface->lrclk), "failed to get lrclk\n"); + if (IS_ERR(iface->lrclk)) { + ret = PTR_ERR(iface->lrclk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get lrclk: %d\n", ret); + return ret; + } /* * mclk maybe be missing when the cpu dai is in slave mode and @@ -550,10 +542,13 @@ static int axg_tdm_iface_probe(struct platform_device *pdev) iface->mclk = devm_clk_get(dev, "mclk"); if (IS_ERR(iface->mclk)) { ret = PTR_ERR(iface->mclk); - if (ret == -ENOENT) + if (ret == -ENOENT) { iface->mclk = NULL; - else - return dev_err_probe(dev, ret, "failed to get mclk\n"); + } else { + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get mclk: %d\n", ret); + return ret; + } } return devm_snd_soc_register_component(dev, diff --git a/sound/soc/meson/meson-card-utils.c b/sound/soc/meson/meson-card-utils.c index 2d8d5717fd..415cc0046e 100644 --- a/sound/soc/meson/meson-card-utils.c +++ b/sound/soc/meson/meson-card-utils.c @@ -85,9 +85,11 @@ int meson_card_parse_dai(struct snd_soc_card *card, ret = of_parse_phandle_with_args(node, "sound-dai", "#sound-dai-cells", 0, &args); - if (ret) - return dev_err_probe(card->dev, ret, "can't parse dai\n"); - + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(card->dev, "can't parse dai %d\n", ret); + return ret; + } *dai_of_node = args.np; return snd_soc_get_dai_name(&args, dai_name); @@ -300,7 +302,6 @@ int meson_card_probe(struct platform_device *pdev) priv->card.owner = THIS_MODULE; priv->card.dev = dev; - priv->card.driver_name = dev->driver->name; priv->match_data = data; ret = snd_soc_of_parse_card_name(&priv->card, "model"); diff --git a/sound/soc/meson/meson-codec-glue.c b/sound/soc/meson/meson-codec-glue.c index 2870cfad81..d07270d17c 100644 --- a/sound/soc/meson/meson-codec-glue.c +++ b/sound/soc/meson/meson-codec-glue.c @@ -113,6 +113,9 @@ int meson_codec_glue_output_startup(struct snd_pcm_substream *substream, /* Replace link params with the input params */ rtd->dai_link->params = &in_data->params; + if (!in_data->fmt) + return 0; + return snd_soc_runtime_set_dai_fmt(rtd, in_data->fmt); } EXPORT_SYMBOL_GPL(meson_codec_glue_output_startup); diff --git a/sound/soc/meson/t9015.c b/sound/soc/meson/t9015.c index a9b8c4e77d..4c1349dd1e 100644 --- a/sound/soc/meson/t9015.c +++ b/sound/soc/meson/t9015.c @@ -258,12 +258,18 @@ static int t9015_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); priv->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(priv->pclk)) - return dev_err_probe(dev, PTR_ERR(priv->pclk), "failed to get core clock\n"); + if (IS_ERR(priv->pclk)) { + if (PTR_ERR(priv->pclk) != -EPROBE_DEFER) + dev_err(dev, "failed to get core clock\n"); + return PTR_ERR(priv->pclk); + } priv->avdd = devm_regulator_get(dev, "AVDD"); - if (IS_ERR(priv->avdd)) - return dev_err_probe(dev, PTR_ERR(priv->avdd), "failed to AVDD\n"); + if (IS_ERR(priv->avdd)) { + if (PTR_ERR(priv->avdd) != -EPROBE_DEFER) + dev_err(dev, "failed to AVDD\n"); + return PTR_ERR(priv->avdd); + } ret = clk_prepare_enable(priv->pclk); if (ret) { diff --git a/sound/soc/mxs/mxs-sgtl5000.c b/sound/soc/mxs/mxs-sgtl5000.c index 2412dc7e65..a6407f4388 100644 --- a/sound/soc/mxs/mxs-sgtl5000.c +++ b/sound/soc/mxs/mxs-sgtl5000.c @@ -160,8 +160,12 @@ static int mxs_sgtl5000_probe(struct platform_device *pdev) } ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret) - return dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n"); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", + ret); + return ret; + } return 0; } diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig index cf3e151bb6..cc7c1de2f1 100644 --- a/sound/soc/qcom/Kconfig +++ b/sound/soc/qcom/Kconfig @@ -38,7 +38,6 @@ config SND_SOC_LPASS_SC7180 config SND_SOC_STORM tristate "ASoC I2S support for Storm boards" - depends on GPIOLIB select SND_SOC_LPASS_IPQ806X select SND_SOC_MAX98357A help @@ -85,25 +84,6 @@ config SND_SOC_QDSP6_ASM_DAI select SND_SOC_COMPRESS tristate -config SND_SOC_QDSP6_APM_DAI - tristate - select SND_SOC_COMPRESS - -config SND_SOC_QDSP6_APM_LPASS_DAI - tristate - -config SND_SOC_QDSP6_APM - tristate - select SND_SOC_QDSP6_APM_DAI - select SND_SOC_QDSP6_APM_LPASS_DAI - -config SND_SOC_QDSP6_PRM_LPASS_CLOCKS - tristate - -config SND_SOC_QDSP6_PRM - tristate - select SND_SOC_QDSP6_PRM_LPASS_CLOCKS - config SND_SOC_QDSP6 tristate "SoC ALSA audio driver for QDSP6" depends on QCOM_APR @@ -117,9 +97,6 @@ config SND_SOC_QDSP6 select SND_SOC_QDSP6_ROUTING select SND_SOC_QDSP6_ASM select SND_SOC_QDSP6_ASM_DAI - select SND_SOC_TOPOLOGY - select SND_SOC_QDSP6_APM - select SND_SOC_QDSP6_PRM help To add support for MSM QDSP6 Soc Audio. This will enable sound soc platform specific @@ -164,12 +141,11 @@ config SND_SOC_SM8250 config SND_SOC_SC7180 tristate "SoC Machine driver for SC7180 boards" - depends on I2C && GPIOLIB + depends on I2C select SND_SOC_QCOM_COMMON select SND_SOC_LPASS_SC7180 select SND_SOC_MAX98357A select SND_SOC_RT5682_I2C - select SND_SOC_RT5682S select SND_SOC_ADAU7002 help To add support for audio on Qualcomm Technologies Inc. diff --git a/sound/soc/qcom/apq8016_sbc.c b/sound/soc/qcom/apq8016_sbc.c index f9d6937532..ba2a98268e 100644 --- a/sound/soc/qcom/apq8016_sbc.c +++ b/sound/soc/qcom/apq8016_sbc.c @@ -17,9 +17,6 @@ #include #include #include "common.h" -#include "qdsp6/q6afe.h" - -#define MI2S_COUNT (MI2S_QUATERNARY + 1) struct apq8016_sbc_data { struct snd_soc_card card; @@ -27,7 +24,6 @@ struct apq8016_sbc_data { void __iomem *spkr_iomux; struct snd_soc_jack jack; bool jack_setup; - int mi2s_clk_count[MI2S_COUNT]; }; #define MIC_CTRL_TER_WS_SLAVE_SEL BIT(21) @@ -42,10 +38,10 @@ struct apq8016_sbc_data { #define SPKR_CTL_TLMM_WS_EN_SEL_MASK GENMASK(19, 18) #define SPKR_CTL_TLMM_WS_EN_SEL_SEC BIT(18) #define DEFAULT_MCLK_RATE 9600000 -#define MI2S_BCLK_RATE 1536000 -static int apq8016_dai_init(struct snd_soc_pcm_runtime *rtd, int mi2s) +static int apq8016_sbc_dai_init(struct snd_soc_pcm_runtime *rtd) { + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); struct snd_soc_dai *codec_dai; struct snd_soc_component *component; struct snd_soc_card *card = rtd->card; @@ -53,7 +49,7 @@ static int apq8016_dai_init(struct snd_soc_pcm_runtime *rtd, int mi2s) int i, rval; u32 value; - switch (mi2s) { + switch (cpu_dai->id) { case MI2S_PRIMARY: writel(readl(pdata->spkr_iomux) | SPKR_CTL_PRI_WS_SLAVE_SEL_11, pdata->spkr_iomux); @@ -132,13 +128,6 @@ static int apq8016_dai_init(struct snd_soc_pcm_runtime *rtd, int mi2s) return 0; } -static int apq8016_sbc_dai_init(struct snd_soc_pcm_runtime *rtd) -{ - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - - return apq8016_dai_init(rtd, cpu_dai->id); -} - static void apq8016_sbc_add_ops(struct snd_soc_card *card) { struct snd_soc_dai_link *link; @@ -148,113 +137,6 @@ static void apq8016_sbc_add_ops(struct snd_soc_card *card) link->init = apq8016_sbc_dai_init; } -static int qdsp6_dai_get_lpass_id(struct snd_soc_dai *cpu_dai) -{ - switch (cpu_dai->id) { - case PRIMARY_MI2S_RX: - case PRIMARY_MI2S_TX: - return MI2S_PRIMARY; - case SECONDARY_MI2S_RX: - case SECONDARY_MI2S_TX: - return MI2S_SECONDARY; - case TERTIARY_MI2S_RX: - case TERTIARY_MI2S_TX: - return MI2S_TERTIARY; - case QUATERNARY_MI2S_RX: - case QUATERNARY_MI2S_TX: - return MI2S_QUATERNARY; - default: - return -EINVAL; - } -} - -static int msm8916_qdsp6_dai_init(struct snd_soc_pcm_runtime *rtd) -{ - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - - snd_soc_dai_set_fmt(cpu_dai, SND_SOC_DAIFMT_CBS_CFS); - return apq8016_dai_init(rtd, qdsp6_dai_get_lpass_id(cpu_dai)); -} - -static int msm8916_qdsp6_startup(struct snd_pcm_substream *substream) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_card *card = rtd->card; - struct apq8016_sbc_data *data = snd_soc_card_get_drvdata(card); - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - int mi2s, ret; - - mi2s = qdsp6_dai_get_lpass_id(cpu_dai); - if (mi2s < 0) - return mi2s; - - if (++data->mi2s_clk_count[mi2s] > 1) - return 0; - - ret = snd_soc_dai_set_sysclk(cpu_dai, LPAIF_BIT_CLK, MI2S_BCLK_RATE, 0); - if (ret) - dev_err(card->dev, "Failed to enable LPAIF bit clk: %d\n", ret); - return ret; -} - -static void msm8916_qdsp6_shutdown(struct snd_pcm_substream *substream) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_card *card = rtd->card; - struct apq8016_sbc_data *data = snd_soc_card_get_drvdata(card); - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - int mi2s, ret; - - mi2s = qdsp6_dai_get_lpass_id(cpu_dai); - if (mi2s < 0) - return; - - if (--data->mi2s_clk_count[mi2s] > 0) - return; - - ret = snd_soc_dai_set_sysclk(cpu_dai, LPAIF_BIT_CLK, 0, 0); - if (ret) - dev_err(card->dev, "Failed to disable LPAIF bit clk: %d\n", ret); -} - -static const struct snd_soc_ops msm8916_qdsp6_be_ops = { - .startup = msm8916_qdsp6_startup, - .shutdown = msm8916_qdsp6_shutdown, -}; - -static int msm8916_qdsp6_be_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, - struct snd_pcm_hw_params *params) -{ - struct snd_interval *rate = hw_param_interval(params, - SNDRV_PCM_HW_PARAM_RATE); - struct snd_interval *channels = hw_param_interval(params, - SNDRV_PCM_HW_PARAM_CHANNELS); - struct snd_mask *fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); - - rate->min = rate->max = 48000; - channels->min = channels->max = 2; - snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S16_LE); - - return 0; -} - -static void msm8916_qdsp6_add_ops(struct snd_soc_card *card) -{ - struct snd_soc_dai_link *link; - int i; - - /* Make it obvious to userspace that QDSP6 is used */ - card->components = "qdsp6"; - - for_each_card_prelinks(card, i, link) { - if (link->no_pcm) { - link->init = msm8916_qdsp6_dai_init; - link->ops = &msm8916_qdsp6_be_ops; - link->be_hw_params_fixup = msm8916_qdsp6_be_hw_params_fixup; - } - } -} - static const struct snd_soc_dapm_widget apq8016_sbc_dapm_widgets[] = { SND_SOC_DAPM_MIC("Handset Mic", NULL), @@ -266,16 +148,11 @@ static const struct snd_soc_dapm_widget apq8016_sbc_dapm_widgets[] = { static int apq8016_sbc_platform_probe(struct platform_device *pdev) { - void (*add_ops)(struct snd_soc_card *card); struct device *dev = &pdev->dev; struct snd_soc_card *card; struct apq8016_sbc_data *data; int ret; - add_ops = device_get_match_data(&pdev->dev); - if (!add_ops) - return -EINVAL; - data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -300,13 +177,12 @@ static int apq8016_sbc_platform_probe(struct platform_device *pdev) snd_soc_card_set_drvdata(card, data); - add_ops(card); + apq8016_sbc_add_ops(card); return devm_snd_soc_register_card(&pdev->dev, card); } static const struct of_device_id apq8016_sbc_device_id[] __maybe_unused = { - { .compatible = "qcom,apq8016-sbc-sndcard", .data = apq8016_sbc_add_ops }, - { .compatible = "qcom,msm8916-qdsp6-sndcard", .data = msm8916_qdsp6_add_ops }, + { .compatible = "qcom,apq8016-sbc-sndcard" }, {}, }; MODULE_DEVICE_TABLE(of, apq8016_sbc_device_id); diff --git a/sound/soc/qcom/apq8096.c b/sound/soc/qcom/apq8096.c index c7b7d0864d..1a69baefc5 100644 --- a/sound/soc/qcom/apq8096.c +++ b/sound/soc/qcom/apq8096.c @@ -60,7 +60,7 @@ static int msm_snd_hw_params(struct snd_pcm_substream *substream, return ret; } -static const struct snd_soc_ops apq8096_ops = { +static struct snd_soc_ops apq8096_ops = { .hw_params = msm_snd_hw_params, }; diff --git a/sound/soc/qcom/common.c b/sound/soc/qcom/common.c index c407684ce1..09af007007 100644 --- a/sound/soc/qcom/common.c +++ b/sound/soc/qcom/common.c @@ -26,12 +26,6 @@ int qcom_snd_parse_of(struct snd_soc_card *card) return ret; } - if (of_property_read_bool(dev->of_node, "widgets")) { - ret = snd_soc_of_parse_audio_simple_widgets(card, "widgets"); - if (ret) - return ret; - } - /* DAPM routes */ if (of_property_read_bool(dev->of_node, "audio-routing")) { ret = snd_soc_of_parse_audio_routing(card, "audio-routing"); @@ -45,16 +39,12 @@ int qcom_snd_parse_of(struct snd_soc_card *card) return ret; } - ret = snd_soc_of_parse_pin_switches(card, "pin-switches"); - if (ret) - return ret; - ret = snd_soc_of_parse_aux_devs(card, "aux-devs"); if (ret) return ret; /* Populate links */ - num_links = of_get_available_child_count(dev->of_node); + num_links = of_get_child_count(dev->of_node); /* Allocate the DAI link array */ card->dai_link = devm_kcalloc(dev, num_links, sizeof(*link), GFP_KERNEL); @@ -64,7 +54,7 @@ int qcom_snd_parse_of(struct snd_soc_card *card) card->num_links = num_links; link = card->dai_link; - for_each_available_child_of_node(dev->of_node, np) { + for_each_child_of_node(dev->of_node, np) { dlc = devm_kzalloc(dev, 2 * sizeof(*dlc), GFP_KERNEL); if (!dlc) { ret = -ENOMEM; @@ -104,8 +94,9 @@ int qcom_snd_parse_of(struct snd_soc_card *card) ret = snd_soc_of_get_dai_name(cpu, &link->cpus->dai_name); if (ret) { - dev_err_probe(card->dev, ret, - "%s: error getting cpu dai name\n", link->name); + if (ret != -EPROBE_DEFER) + dev_err(card->dev, "%s: error getting cpu dai name: %d\n", + link->name, ret); goto err; } @@ -125,8 +116,9 @@ int qcom_snd_parse_of(struct snd_soc_card *card) if (codec) { ret = snd_soc_of_get_dai_link_codecs(dev, codec, link); if (ret < 0) { - dev_err_probe(card->dev, ret, - "%s: codec dai not found\n", link->name); + if (ret != -EPROBE_DEFER) + dev_err(card->dev, "%s: codec dai not found: %d\n", + link->name, ret); goto err; } diff --git a/sound/soc/qcom/qdsp6/Makefile b/sound/soc/qcom/qdsp6/Makefile index 3963bf2346..3c1dd9f32f 100644 --- a/sound/soc/qcom/qdsp6/Makefile +++ b/sound/soc/qcom/qdsp6/Makefile @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -snd-q6dsp-common-objs := q6dsp-common.o q6dsp-lpass-ports.o q6dsp-lpass-clocks.o -snd-q6apm-objs := q6apm.o audioreach.o topology.o - -obj-$(CONFIG_SND_SOC_QDSP6_COMMON) += snd-q6dsp-common.o +obj-$(CONFIG_SND_SOC_QDSP6_COMMON) += q6dsp-common.o obj-$(CONFIG_SND_SOC_QDSP6_CORE) += q6core.o obj-$(CONFIG_SND_SOC_QDSP6_AFE) += q6afe.o obj-$(CONFIG_SND_SOC_QDSP6_AFE_DAI) += q6afe-dai.o @@ -11,9 +8,3 @@ obj-$(CONFIG_SND_SOC_QDSP6_ADM) += q6adm.o obj-$(CONFIG_SND_SOC_QDSP6_ROUTING) += q6routing.o obj-$(CONFIG_SND_SOC_QDSP6_ASM) += q6asm.o obj-$(CONFIG_SND_SOC_QDSP6_ASM_DAI) += q6asm-dai.o - -obj-$(CONFIG_SND_SOC_QDSP6_APM) += snd-q6apm.o -obj-$(CONFIG_SND_SOC_QDSP6_APM_DAI) += q6apm-dai.o -obj-$(CONFIG_SND_SOC_QDSP6_APM_LPASS_DAI) += q6apm-lpass-dais.o -obj-$(CONFIG_SND_SOC_QDSP6_PRM) += q6prm.o -obj-$(CONFIG_SND_SOC_QDSP6_PRM_LPASS_CLOCKS) += q6prm-clocks.o diff --git a/sound/soc/qcom/qdsp6/q6adm.c b/sound/soc/qcom/qdsp6/q6adm.c index 72c5719f1d..3d831b6355 100644 --- a/sound/soc/qcom/qdsp6/q6adm.c +++ b/sound/soc/qcom/qdsp6/q6adm.c @@ -390,7 +390,7 @@ struct q6copp *q6adm_open(struct device *dev, int port_id, int path, int rate, int ret = 0; if (port_id < 0) { - dev_err(dev, "Invalid port_id %d\n", port_id); + dev_err(dev, "Invalid port_id 0x%x\n", port_id); return ERR_PTR(-EINVAL); } @@ -508,7 +508,7 @@ int q6adm_matrix_map(struct device *dev, int path, int port_idx = payload_map.port_id[i]; if (port_idx < 0) { - dev_err(dev, "Invalid port_id %d\n", + dev_err(dev, "Invalid port_id 0x%x\n", payload_map.port_id[i]); kfree(pkt); return -EINVAL; diff --git a/sound/soc/qcom/qdsp6/q6afe-clocks.c b/sound/soc/qcom/qdsp6/q6afe-clocks.c index 1ccab64ff0..9431656283 100644 --- a/sound/soc/qcom/qdsp6/q6afe-clocks.c +++ b/sound/soc/qcom/qdsp6/q6afe-clocks.c @@ -7,18 +7,115 @@ #include #include #include -#include "q6dsp-lpass-clocks.h" +#include +#include #include "q6afe.h" #define Q6AFE_CLK(id) { \ .clk_id = id, \ - .q6dsp_clk_id = Q6AFE_##id, \ + .afe_clk_id = Q6AFE_##id, \ .name = #id, \ .rate = 19200000, \ } +#define Q6AFE_VOTE_CLK(id, blkid, n) { \ + .clk_id = id, \ + .afe_clk_id = blkid, \ + .name = n, \ + } -static const struct q6dsp_clk_init q6afe_clks[] = { +struct q6afe_clk_init { + int clk_id; + int afe_clk_id; + char *name; + int rate; +}; + +struct q6afe_clk { + struct device *dev; + int afe_clk_id; + int attributes; + int rate; + uint32_t handle; + struct clk_hw hw; +}; + +#define to_q6afe_clk(_hw) container_of(_hw, struct q6afe_clk, hw) + +struct q6afe_cc { + struct device *dev; + struct q6afe_clk *clks[Q6AFE_MAX_CLK_ID]; +}; + +static int clk_q6afe_prepare(struct clk_hw *hw) +{ + struct q6afe_clk *clk = to_q6afe_clk(hw); + + return q6afe_set_lpass_clock(clk->dev, clk->afe_clk_id, clk->attributes, + Q6AFE_LPASS_CLK_ROOT_DEFAULT, clk->rate); +} + +static void clk_q6afe_unprepare(struct clk_hw *hw) +{ + struct q6afe_clk *clk = to_q6afe_clk(hw); + + q6afe_set_lpass_clock(clk->dev, clk->afe_clk_id, clk->attributes, + Q6AFE_LPASS_CLK_ROOT_DEFAULT, 0); +} + +static int clk_q6afe_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct q6afe_clk *clk = to_q6afe_clk(hw); + + clk->rate = rate; + + return 0; +} + +static unsigned long clk_q6afe_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct q6afe_clk *clk = to_q6afe_clk(hw); + + return clk->rate; +} + +static long clk_q6afe_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + return rate; +} + +static const struct clk_ops clk_q6afe_ops = { + .prepare = clk_q6afe_prepare, + .unprepare = clk_q6afe_unprepare, + .set_rate = clk_q6afe_set_rate, + .round_rate = clk_q6afe_round_rate, + .recalc_rate = clk_q6afe_recalc_rate, +}; + +static int clk_vote_q6afe_block(struct clk_hw *hw) +{ + struct q6afe_clk *clk = to_q6afe_clk(hw); + + return q6afe_vote_lpass_core_hw(clk->dev, clk->afe_clk_id, + clk_hw_get_name(&clk->hw), &clk->handle); +} + +static void clk_unvote_q6afe_block(struct clk_hw *hw) +{ + struct q6afe_clk *clk = to_q6afe_clk(hw); + + q6afe_unvote_lpass_core_hw(clk->dev, clk->afe_clk_id, clk->handle); +} + +static const struct clk_ops clk_vote_q6afe_ops = { + .prepare = clk_vote_q6afe_block, + .unprepare = clk_unvote_q6afe_block, +}; + +static const struct q6afe_clk_init q6afe_clks[] = { Q6AFE_CLK(LPASS_CLK_ID_PRI_MI2S_IBIT), Q6AFE_CLK(LPASS_CLK_ID_PRI_MI2S_EBIT), Q6AFE_CLK(LPASS_CLK_ID_SEC_MI2S_IBIT), @@ -79,28 +176,88 @@ static const struct q6dsp_clk_init q6afe_clks[] = { Q6AFE_CLK(LPASS_CLK_ID_RX_CORE_MCLK), Q6AFE_CLK(LPASS_CLK_ID_RX_CORE_NPL_MCLK), Q6AFE_CLK(LPASS_CLK_ID_VA_CORE_2X_MCLK), - Q6DSP_VOTE_CLK(LPASS_HW_AVTIMER_VOTE, + Q6AFE_VOTE_CLK(LPASS_HW_AVTIMER_VOTE, Q6AFE_LPASS_CORE_AVTIMER_BLOCK, "LPASS_AVTIMER_MACRO"), - Q6DSP_VOTE_CLK(LPASS_HW_MACRO_VOTE, + Q6AFE_VOTE_CLK(LPASS_HW_MACRO_VOTE, Q6AFE_LPASS_CORE_HW_MACRO_BLOCK, "LPASS_HW_MACRO"), - Q6DSP_VOTE_CLK(LPASS_HW_DCODEC_VOTE, + Q6AFE_VOTE_CLK(LPASS_HW_DCODEC_VOTE, Q6AFE_LPASS_CORE_HW_DCODEC_BLOCK, "LPASS_HW_DCODEC"), }; -static const struct q6dsp_clk_desc q6dsp_clk_q6afe __maybe_unused = { - .clks = q6afe_clks, - .num_clks = ARRAY_SIZE(q6afe_clks), - .lpass_set_clk = q6afe_set_lpass_clock, - .lpass_vote_clk = q6afe_vote_lpass_core_hw, - .lpass_unvote_clk = q6afe_unvote_lpass_core_hw, -}; +static struct clk_hw *q6afe_of_clk_hw_get(struct of_phandle_args *clkspec, + void *data) +{ + struct q6afe_cc *cc = data; + unsigned int idx = clkspec->args[0]; + unsigned int attr = clkspec->args[1]; + + if (idx >= Q6AFE_MAX_CLK_ID || attr > LPASS_CLK_ATTRIBUTE_COUPLE_DIVISOR) { + dev_err(cc->dev, "Invalid clk specifier (%d, %d)\n", idx, attr); + return ERR_PTR(-EINVAL); + } + + if (cc->clks[idx]) { + cc->clks[idx]->attributes = attr; + return &cc->clks[idx]->hw; + } + + return ERR_PTR(-ENOENT); +} + +static int q6afe_clock_dev_probe(struct platform_device *pdev) +{ + struct q6afe_cc *cc; + struct device *dev = &pdev->dev; + int i, ret; + + cc = devm_kzalloc(dev, sizeof(*cc), GFP_KERNEL); + if (!cc) + return -ENOMEM; + + cc->dev = dev; + for (i = 0; i < ARRAY_SIZE(q6afe_clks); i++) { + unsigned int id = q6afe_clks[i].clk_id; + struct clk_init_data init = { + .name = q6afe_clks[i].name, + }; + struct q6afe_clk *clk; + + clk = devm_kzalloc(dev, sizeof(*clk), GFP_KERNEL); + if (!clk) + return -ENOMEM; + + clk->dev = dev; + clk->afe_clk_id = q6afe_clks[i].afe_clk_id; + clk->rate = q6afe_clks[i].rate; + clk->hw.init = &init; + + if (clk->rate) + init.ops = &clk_q6afe_ops; + else + init.ops = &clk_vote_q6afe_ops; + + cc->clks[id] = clk; + + ret = devm_clk_hw_register(dev, &clk->hw); + if (ret) + return ret; + } + + ret = devm_of_clk_add_hw_provider(dev, q6afe_of_clk_hw_get, cc); + if (ret) + return ret; + + dev_set_drvdata(dev, cc); + + return 0; +} #ifdef CONFIG_OF static const struct of_device_id q6afe_clock_device_id[] = { - { .compatible = "qcom,q6afe-clocks", .data = &q6dsp_clk_q6afe }, + { .compatible = "qcom,q6afe-clocks" }, {}, }; MODULE_DEVICE_TABLE(of, q6afe_clock_device_id); @@ -111,7 +268,7 @@ static struct platform_driver q6afe_clock_platform_driver = { .name = "q6afe-clock", .of_match_table = of_match_ptr(q6afe_clock_device_id), }, - .probe = q6dsp_clock_dev_probe, + .probe = q6afe_clock_dev_probe, }; module_platform_driver(q6afe_clock_platform_driver); diff --git a/sound/soc/qcom/qdsp6/q6afe-dai.c b/sound/soc/qcom/qdsp6/q6afe-dai.c index 8bb7452b8f..ac8f7324e9 100644 --- a/sound/soc/qcom/qdsp6/q6afe-dai.c +++ b/sound/soc/qcom/qdsp6/q6afe-dai.c @@ -11,9 +11,91 @@ #include #include #include -#include "q6dsp-lpass-ports.h" #include "q6afe.h" +#define Q6AFE_TDM_PB_DAI(pre, num, did) { \ + .playback = { \ + .stream_name = pre" TDM"#num" Playback", \ + .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\ + SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000 |\ + SNDRV_PCM_RATE_176400, \ + .formats = SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | \ + SNDRV_PCM_FMTBIT_S32_LE, \ + .channels_min = 1, \ + .channels_max = 8, \ + .rate_min = 8000, \ + .rate_max = 176400, \ + }, \ + .name = #did, \ + .ops = &q6tdm_ops, \ + .id = did, \ + .probe = msm_dai_q6_dai_probe, \ + .remove = msm_dai_q6_dai_remove, \ + } + +#define Q6AFE_TDM_CAP_DAI(pre, num, did) { \ + .capture = { \ + .stream_name = pre" TDM"#num" Capture", \ + .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\ + SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000 |\ + SNDRV_PCM_RATE_176400, \ + .formats = SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | \ + SNDRV_PCM_FMTBIT_S32_LE, \ + .channels_min = 1, \ + .channels_max = 8, \ + .rate_min = 8000, \ + .rate_max = 176400, \ + }, \ + .name = #did, \ + .ops = &q6tdm_ops, \ + .id = did, \ + .probe = msm_dai_q6_dai_probe, \ + .remove = msm_dai_q6_dai_remove, \ + } + +#define Q6AFE_CDC_DMA_RX_DAI(did) { \ + .playback = { \ + .stream_name = #did" Playback", \ + .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\ + SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000 |\ + SNDRV_PCM_RATE_176400, \ + .formats = SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | \ + SNDRV_PCM_FMTBIT_S32_LE, \ + .channels_min = 1, \ + .channels_max = 8, \ + .rate_min = 8000, \ + .rate_max = 176400, \ + }, \ + .name = #did, \ + .ops = &q6dma_ops, \ + .id = did, \ + .probe = msm_dai_q6_dai_probe, \ + .remove = msm_dai_q6_dai_remove, \ + } + +#define Q6AFE_CDC_DMA_TX_DAI(did) { \ + .capture = { \ + .stream_name = #did" Capture", \ + .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\ + SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000 |\ + SNDRV_PCM_RATE_176400, \ + .formats = SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | \ + SNDRV_PCM_FMTBIT_S32_LE, \ + .channels_min = 1, \ + .channels_max = 8, \ + .rate_min = 8000, \ + .rate_max = 176400, \ + }, \ + .name = #did, \ + .ops = &q6dma_ops, \ + .id = did, \ + .probe = msm_dai_q6_dai_probe, \ + .remove = msm_dai_q6_dai_remove, \ + } struct q6afe_dai_priv_data { uint32_t sd_line_mask; @@ -702,6 +784,591 @@ static int msm_dai_q6_dai_remove(struct snd_soc_dai *dai) return 0; } +static struct snd_soc_dai_driver q6afe_dais[] = { + { + .playback = { + .stream_name = "HDMI Playback", + .rates = SNDRV_PCM_RATE_48000 | + SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 2, + .channels_max = 8, + .rate_max = 192000, + .rate_min = 48000, + }, + .ops = &q6hdmi_ops, + .id = HDMI_RX, + .name = "HDMI", + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, { + .name = "SLIMBUS_0_RX", + .ops = &q6slim_ops, + .id = SLIMBUS_0_RX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + .playback = { + .stream_name = "Slimbus Playback", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 192000, + }, + }, { + .name = "SLIMBUS_0_TX", + .ops = &q6slim_ops, + .id = SLIMBUS_0_TX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + .capture = { + .stream_name = "Slimbus Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 192000, + }, + }, { + .playback = { + .stream_name = "Slimbus1 Playback", + .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 | + SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 2, + .rate_min = 8000, + .rate_max = 192000, + }, + .name = "SLIMBUS_1_RX", + .ops = &q6slim_ops, + .id = SLIMBUS_1_RX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, { + .name = "SLIMBUS_1_TX", + .ops = &q6slim_ops, + .id = SLIMBUS_1_TX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + .capture = { + .stream_name = "Slimbus1 Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 192000, + }, + }, { + .playback = { + .stream_name = "Slimbus2 Playback", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 192000, + }, + .name = "SLIMBUS_2_RX", + .ops = &q6slim_ops, + .id = SLIMBUS_2_RX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + + }, { + .name = "SLIMBUS_2_TX", + .ops = &q6slim_ops, + .id = SLIMBUS_2_TX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + .capture = { + .stream_name = "Slimbus2 Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 192000, + }, + }, { + .playback = { + .stream_name = "Slimbus3 Playback", + .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 | + SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 2, + .rate_min = 8000, + .rate_max = 192000, + }, + .name = "SLIMBUS_3_RX", + .ops = &q6slim_ops, + .id = SLIMBUS_3_RX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + + }, { + .name = "SLIMBUS_3_TX", + .ops = &q6slim_ops, + .id = SLIMBUS_3_TX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + .capture = { + .stream_name = "Slimbus3 Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 192000, + }, + }, { + .playback = { + .stream_name = "Slimbus4 Playback", + .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 | + SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 2, + .rate_min = 8000, + .rate_max = 192000, + }, + .name = "SLIMBUS_4_RX", + .ops = &q6slim_ops, + .id = SLIMBUS_4_RX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + + }, { + .name = "SLIMBUS_4_TX", + .ops = &q6slim_ops, + .id = SLIMBUS_4_TX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + .capture = { + .stream_name = "Slimbus4 Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 192000, + }, + }, { + .playback = { + .stream_name = "Slimbus5 Playback", + .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 | + SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 2, + .rate_min = 8000, + .rate_max = 192000, + }, + .name = "SLIMBUS_5_RX", + .ops = &q6slim_ops, + .id = SLIMBUS_5_RX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + + }, { + .name = "SLIMBUS_5_TX", + .ops = &q6slim_ops, + .id = SLIMBUS_5_TX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + .capture = { + .stream_name = "Slimbus5 Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 192000, + }, + }, { + .playback = { + .stream_name = "Slimbus6 Playback", + .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 | + SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 2, + .rate_min = 8000, + .rate_max = 192000, + }, + .ops = &q6slim_ops, + .name = "SLIMBUS_6_RX", + .id = SLIMBUS_6_RX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + + }, { + .name = "SLIMBUS_6_TX", + .ops = &q6slim_ops, + .id = SLIMBUS_6_TX, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + .capture = { + .stream_name = "Slimbus6 Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 192000, + }, + }, { + .playback = { + .stream_name = "Primary MI2S Playback", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 48000, + }, + .id = PRIMARY_MI2S_RX, + .name = "PRI_MI2S_RX", + .ops = &q6i2s_ops, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, { + .capture = { + .stream_name = "Primary MI2S Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 48000, + }, + .id = PRIMARY_MI2S_TX, + .name = "PRI_MI2S_TX", + .ops = &q6i2s_ops, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, { + .playback = { + .stream_name = "Secondary MI2S Playback", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000, + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 48000, + }, + .name = "SEC_MI2S_RX", + .id = SECONDARY_MI2S_RX, + .ops = &q6i2s_ops, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, { + .capture = { + .stream_name = "Secondary MI2S Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 48000, + }, + .id = SECONDARY_MI2S_TX, + .name = "SEC_MI2S_TX", + .ops = &q6i2s_ops, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, { + .playback = { + .stream_name = "Tertiary MI2S Playback", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000, + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 48000, + }, + .name = "TERT_MI2S_RX", + .id = TERTIARY_MI2S_RX, + .ops = &q6i2s_ops, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, { + .capture = { + .stream_name = "Tertiary MI2S Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 48000, + }, + .id = TERTIARY_MI2S_TX, + .name = "TERT_MI2S_TX", + .ops = &q6i2s_ops, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, { + .playback = { + .stream_name = "Quaternary MI2S Playback", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000, + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 48000, + }, + .name = "QUAT_MI2S_RX", + .id = QUATERNARY_MI2S_RX, + .ops = &q6i2s_ops, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, { + .capture = { + .stream_name = "Quaternary MI2S Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 48000, + }, + .id = QUATERNARY_MI2S_TX, + .name = "QUAT_MI2S_TX", + .ops = &q6i2s_ops, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, { + .playback = { + .stream_name = "Quinary MI2S Playback", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 192000, + }, + .id = QUINARY_MI2S_RX, + .name = "QUIN_MI2S_RX", + .ops = &q6i2s_ops, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, { + .capture = { + .stream_name = "Quinary MI2S Capture", + .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 | + SNDRV_PCM_RATE_16000, + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels_min = 1, + .channels_max = 8, + .rate_min = 8000, + .rate_max = 48000, + }, + .id = QUINARY_MI2S_TX, + .name = "QUIN_MI2S_TX", + .ops = &q6i2s_ops, + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, + Q6AFE_TDM_PB_DAI("Primary", 0, PRIMARY_TDM_RX_0), + Q6AFE_TDM_PB_DAI("Primary", 1, PRIMARY_TDM_RX_1), + Q6AFE_TDM_PB_DAI("Primary", 2, PRIMARY_TDM_RX_2), + Q6AFE_TDM_PB_DAI("Primary", 3, PRIMARY_TDM_RX_3), + Q6AFE_TDM_PB_DAI("Primary", 4, PRIMARY_TDM_RX_4), + Q6AFE_TDM_PB_DAI("Primary", 5, PRIMARY_TDM_RX_5), + Q6AFE_TDM_PB_DAI("Primary", 6, PRIMARY_TDM_RX_6), + Q6AFE_TDM_PB_DAI("Primary", 7, PRIMARY_TDM_RX_7), + Q6AFE_TDM_CAP_DAI("Primary", 0, PRIMARY_TDM_TX_0), + Q6AFE_TDM_CAP_DAI("Primary", 1, PRIMARY_TDM_TX_1), + Q6AFE_TDM_CAP_DAI("Primary", 2, PRIMARY_TDM_TX_2), + Q6AFE_TDM_CAP_DAI("Primary", 3, PRIMARY_TDM_TX_3), + Q6AFE_TDM_CAP_DAI("Primary", 4, PRIMARY_TDM_TX_4), + Q6AFE_TDM_CAP_DAI("Primary", 5, PRIMARY_TDM_TX_5), + Q6AFE_TDM_CAP_DAI("Primary", 6, PRIMARY_TDM_TX_6), + Q6AFE_TDM_CAP_DAI("Primary", 7, PRIMARY_TDM_TX_7), + Q6AFE_TDM_PB_DAI("Secondary", 0, SECONDARY_TDM_RX_0), + Q6AFE_TDM_PB_DAI("Secondary", 1, SECONDARY_TDM_RX_1), + Q6AFE_TDM_PB_DAI("Secondary", 2, SECONDARY_TDM_RX_2), + Q6AFE_TDM_PB_DAI("Secondary", 3, SECONDARY_TDM_RX_3), + Q6AFE_TDM_PB_DAI("Secondary", 4, SECONDARY_TDM_RX_4), + Q6AFE_TDM_PB_DAI("Secondary", 5, SECONDARY_TDM_RX_5), + Q6AFE_TDM_PB_DAI("Secondary", 6, SECONDARY_TDM_RX_6), + Q6AFE_TDM_PB_DAI("Secondary", 7, SECONDARY_TDM_RX_7), + Q6AFE_TDM_CAP_DAI("Secondary", 0, SECONDARY_TDM_TX_0), + Q6AFE_TDM_CAP_DAI("Secondary", 1, SECONDARY_TDM_TX_1), + Q6AFE_TDM_CAP_DAI("Secondary", 2, SECONDARY_TDM_TX_2), + Q6AFE_TDM_CAP_DAI("Secondary", 3, SECONDARY_TDM_TX_3), + Q6AFE_TDM_CAP_DAI("Secondary", 4, SECONDARY_TDM_TX_4), + Q6AFE_TDM_CAP_DAI("Secondary", 5, SECONDARY_TDM_TX_5), + Q6AFE_TDM_CAP_DAI("Secondary", 6, SECONDARY_TDM_TX_6), + Q6AFE_TDM_CAP_DAI("Secondary", 7, SECONDARY_TDM_TX_7), + Q6AFE_TDM_PB_DAI("Tertiary", 0, TERTIARY_TDM_RX_0), + Q6AFE_TDM_PB_DAI("Tertiary", 1, TERTIARY_TDM_RX_1), + Q6AFE_TDM_PB_DAI("Tertiary", 2, TERTIARY_TDM_RX_2), + Q6AFE_TDM_PB_DAI("Tertiary", 3, TERTIARY_TDM_RX_3), + Q6AFE_TDM_PB_DAI("Tertiary", 4, TERTIARY_TDM_RX_4), + Q6AFE_TDM_PB_DAI("Tertiary", 5, TERTIARY_TDM_RX_5), + Q6AFE_TDM_PB_DAI("Tertiary", 6, TERTIARY_TDM_RX_6), + Q6AFE_TDM_PB_DAI("Tertiary", 7, TERTIARY_TDM_RX_7), + Q6AFE_TDM_CAP_DAI("Tertiary", 0, TERTIARY_TDM_TX_0), + Q6AFE_TDM_CAP_DAI("Tertiary", 1, TERTIARY_TDM_TX_1), + Q6AFE_TDM_CAP_DAI("Tertiary", 2, TERTIARY_TDM_TX_2), + Q6AFE_TDM_CAP_DAI("Tertiary", 3, TERTIARY_TDM_TX_3), + Q6AFE_TDM_CAP_DAI("Tertiary", 4, TERTIARY_TDM_TX_4), + Q6AFE_TDM_CAP_DAI("Tertiary", 5, TERTIARY_TDM_TX_5), + Q6AFE_TDM_CAP_DAI("Tertiary", 6, TERTIARY_TDM_TX_6), + Q6AFE_TDM_CAP_DAI("Tertiary", 7, TERTIARY_TDM_TX_7), + Q6AFE_TDM_PB_DAI("Quaternary", 0, QUATERNARY_TDM_RX_0), + Q6AFE_TDM_PB_DAI("Quaternary", 1, QUATERNARY_TDM_RX_1), + Q6AFE_TDM_PB_DAI("Quaternary", 2, QUATERNARY_TDM_RX_2), + Q6AFE_TDM_PB_DAI("Quaternary", 3, QUATERNARY_TDM_RX_3), + Q6AFE_TDM_PB_DAI("Quaternary", 4, QUATERNARY_TDM_RX_4), + Q6AFE_TDM_PB_DAI("Quaternary", 5, QUATERNARY_TDM_RX_5), + Q6AFE_TDM_PB_DAI("Quaternary", 6, QUATERNARY_TDM_RX_6), + Q6AFE_TDM_PB_DAI("Quaternary", 7, QUATERNARY_TDM_RX_7), + Q6AFE_TDM_CAP_DAI("Quaternary", 0, QUATERNARY_TDM_TX_0), + Q6AFE_TDM_CAP_DAI("Quaternary", 1, QUATERNARY_TDM_TX_1), + Q6AFE_TDM_CAP_DAI("Quaternary", 2, QUATERNARY_TDM_TX_2), + Q6AFE_TDM_CAP_DAI("Quaternary", 3, QUATERNARY_TDM_TX_3), + Q6AFE_TDM_CAP_DAI("Quaternary", 4, QUATERNARY_TDM_TX_4), + Q6AFE_TDM_CAP_DAI("Quaternary", 5, QUATERNARY_TDM_TX_5), + Q6AFE_TDM_CAP_DAI("Quaternary", 6, QUATERNARY_TDM_TX_6), + Q6AFE_TDM_CAP_DAI("Quaternary", 7, QUATERNARY_TDM_TX_7), + Q6AFE_TDM_PB_DAI("Quinary", 0, QUINARY_TDM_RX_0), + Q6AFE_TDM_PB_DAI("Quinary", 1, QUINARY_TDM_RX_1), + Q6AFE_TDM_PB_DAI("Quinary", 2, QUINARY_TDM_RX_2), + Q6AFE_TDM_PB_DAI("Quinary", 3, QUINARY_TDM_RX_3), + Q6AFE_TDM_PB_DAI("Quinary", 4, QUINARY_TDM_RX_4), + Q6AFE_TDM_PB_DAI("Quinary", 5, QUINARY_TDM_RX_5), + Q6AFE_TDM_PB_DAI("Quinary", 6, QUINARY_TDM_RX_6), + Q6AFE_TDM_PB_DAI("Quinary", 7, QUINARY_TDM_RX_7), + Q6AFE_TDM_CAP_DAI("Quinary", 0, QUINARY_TDM_TX_0), + Q6AFE_TDM_CAP_DAI("Quinary", 1, QUINARY_TDM_TX_1), + Q6AFE_TDM_CAP_DAI("Quinary", 2, QUINARY_TDM_TX_2), + Q6AFE_TDM_CAP_DAI("Quinary", 3, QUINARY_TDM_TX_3), + Q6AFE_TDM_CAP_DAI("Quinary", 4, QUINARY_TDM_TX_4), + Q6AFE_TDM_CAP_DAI("Quinary", 5, QUINARY_TDM_TX_5), + Q6AFE_TDM_CAP_DAI("Quinary", 6, QUINARY_TDM_TX_6), + Q6AFE_TDM_CAP_DAI("Quinary", 7, QUINARY_TDM_TX_7), + { + .playback = { + .stream_name = "Display Port Playback", + .rates = SNDRV_PCM_RATE_48000 | + SNDRV_PCM_RATE_96000 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE, + .channels_min = 2, + .channels_max = 8, + .rate_max = 192000, + .rate_min = 48000, + }, + .ops = &q6hdmi_ops, + .id = DISPLAY_PORT_RX, + .name = "DISPLAY_PORT", + .probe = msm_dai_q6_dai_probe, + .remove = msm_dai_q6_dai_remove, + }, + Q6AFE_CDC_DMA_RX_DAI(WSA_CODEC_DMA_RX_0), + Q6AFE_CDC_DMA_TX_DAI(WSA_CODEC_DMA_TX_0), + Q6AFE_CDC_DMA_RX_DAI(WSA_CODEC_DMA_RX_1), + Q6AFE_CDC_DMA_TX_DAI(WSA_CODEC_DMA_TX_1), + Q6AFE_CDC_DMA_TX_DAI(WSA_CODEC_DMA_TX_2), + Q6AFE_CDC_DMA_TX_DAI(VA_CODEC_DMA_TX_0), + Q6AFE_CDC_DMA_TX_DAI(VA_CODEC_DMA_TX_1), + Q6AFE_CDC_DMA_TX_DAI(VA_CODEC_DMA_TX_2), + Q6AFE_CDC_DMA_RX_DAI(RX_CODEC_DMA_RX_0), + Q6AFE_CDC_DMA_TX_DAI(TX_CODEC_DMA_TX_0), + Q6AFE_CDC_DMA_RX_DAI(RX_CODEC_DMA_RX_1), + Q6AFE_CDC_DMA_TX_DAI(TX_CODEC_DMA_TX_1), + Q6AFE_CDC_DMA_RX_DAI(RX_CODEC_DMA_RX_2), + Q6AFE_CDC_DMA_TX_DAI(TX_CODEC_DMA_TX_2), + Q6AFE_CDC_DMA_RX_DAI(RX_CODEC_DMA_RX_3), + Q6AFE_CDC_DMA_TX_DAI(TX_CODEC_DMA_TX_3), + Q6AFE_CDC_DMA_RX_DAI(RX_CODEC_DMA_RX_4), + Q6AFE_CDC_DMA_TX_DAI(TX_CODEC_DMA_TX_4), + Q6AFE_CDC_DMA_RX_DAI(RX_CODEC_DMA_RX_5), + Q6AFE_CDC_DMA_TX_DAI(TX_CODEC_DMA_TX_5), + Q6AFE_CDC_DMA_RX_DAI(RX_CODEC_DMA_RX_6), + Q6AFE_CDC_DMA_RX_DAI(RX_CODEC_DMA_RX_7), +}; + +static int q6afe_of_xlate_dai_name(struct snd_soc_component *component, + const struct of_phandle_args *args, + const char **dai_name) +{ + int id = args->args[0]; + int ret = -EINVAL; + int i; + + for (i = 0; i < ARRAY_SIZE(q6afe_dais); i++) { + if (q6afe_dais[i].id == id) { + *dai_name = q6afe_dais[i].name; + ret = 0; + break; + } + } + + return ret; +} + static const struct snd_soc_dapm_widget q6afe_dai_widgets[] = { SND_SOC_DAPM_AIF_IN("HDMI_RX", NULL, 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SLIMBUS_0_RX", NULL, 0, SND_SOC_NOPM, 0, 0), @@ -960,7 +1627,7 @@ static const struct snd_soc_component_driver q6afe_dai_component = { .num_dapm_widgets = ARRAY_SIZE(q6afe_dai_widgets), .dapm_routes = q6afe_dapm_routes, .num_dapm_routes = ARRAY_SIZE(q6afe_dapm_routes), - .of_xlate_dai_name = q6dsp_audio_ports_of_xlate_dai_name, + .of_xlate_dai_name = q6afe_of_xlate_dai_name, }; @@ -1048,29 +1715,19 @@ static void of_q6afe_parse_dai_data(struct device *dev, static int q6afe_dai_dev_probe(struct platform_device *pdev) { - struct q6dsp_audio_port_dai_driver_config cfg; - struct snd_soc_dai_driver *dais; struct q6afe_dai_data *dai_data; struct device *dev = &pdev->dev; - int num_dais; dai_data = devm_kzalloc(dev, sizeof(*dai_data), GFP_KERNEL); if (!dai_data) return -ENOMEM; dev_set_drvdata(dev, dai_data); + of_q6afe_parse_dai_data(dev, dai_data); - cfg.probe = msm_dai_q6_dai_probe; - cfg.remove = msm_dai_q6_dai_remove; - cfg.q6hdmi_ops = &q6hdmi_ops; - cfg.q6slim_ops = &q6slim_ops; - cfg.q6i2s_ops = &q6i2s_ops; - cfg.q6tdm_ops = &q6tdm_ops; - cfg.q6dma_ops = &q6dma_ops; - dais = q6dsp_audio_ports_set_config(dev, &cfg, &num_dais); - - return devm_snd_soc_register_component(dev, &q6afe_dai_component, dais, num_dais); + return devm_snd_soc_register_component(dev, &q6afe_dai_component, + q6afe_dais, ARRAY_SIZE(q6afe_dais)); } #ifdef CONFIG_OF @@ -1090,5 +1747,5 @@ static struct platform_driver q6afe_dai_platform_driver = { }; module_platform_driver(q6afe_dai_platform_driver); -MODULE_DESCRIPTION("Q6 Audio Frontend dai driver"); +MODULE_DESCRIPTION("Q6 Audio Fronend dai driver"); MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/qcom/qdsp6/q6routing.c b/sound/soc/qcom/qdsp6/q6routing.c index 928fd23e2c..18c90bb492 100644 --- a/sound/soc/qcom/qdsp6/q6routing.c +++ b/sound/soc/qcom/qdsp6/q6routing.c @@ -372,12 +372,6 @@ int q6routing_stream_open(int fedai_id, int perf_mode, } session = &routing_data->sessions[stream_id - 1]; - if (session->port_id < 0) { - dev_err(routing_data->dev, "Routing not setup for MultiMedia%d Session\n", - session->fedai_id); - return -EINVAL; - } - pdata = &routing_data->port_data[session->port_id]; mutex_lock(&routing_data->lock); diff --git a/sound/soc/qcom/sc7180.c b/sound/soc/qcom/sc7180.c index 37225ef256..768566bb57 100644 --- a/sound/soc/qcom/sc7180.c +++ b/sound/soc/qcom/sc7180.c @@ -17,7 +17,6 @@ #include #include "../codecs/rt5682.h" -#include "../codecs/rt5682s.h" #include "common.h" #include "lpass.h" @@ -129,21 +128,7 @@ static int sc7180_snd_startup(struct snd_pcm_substream *substream) struct sc7180_snd_data *data = snd_soc_card_get_drvdata(card); struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); struct snd_soc_dai *codec_dai = asoc_rtd_to_codec(rtd, 0); - int pll_id, pll_source, pll_in, pll_out, clk_id, ret; - - if (!strcmp(codec_dai->name, "rt5682-aif1")) { - pll_source = RT5682_PLL1_S_MCLK; - pll_id = 0; - clk_id = RT5682_SCLK_S_PLL1; - pll_out = RT5682_PLL1_FREQ; - pll_in = DEFAULT_MCLK_RATE; - } else if (!strcmp(codec_dai->name, "rt5682s-aif1")) { - pll_source = RT5682S_PLL_S_MCLK; - pll_id = RT5682S_PLL2; - clk_id = RT5682S_SCLK_S_PLL2; - pll_out = RT5682_PLL1_FREQ; - pll_in = DEFAULT_MCLK_RATE; - } + int ret; switch (cpu_dai->id) { case MI2S_PRIMARY: @@ -160,15 +145,16 @@ static int sc7180_snd_startup(struct snd_pcm_substream *substream) SND_SOC_DAIFMT_I2S); /* Configure PLL1 for codec */ - ret = snd_soc_dai_set_pll(codec_dai, pll_id, pll_source, - pll_in, pll_out); + ret = snd_soc_dai_set_pll(codec_dai, 0, RT5682_PLL1_S_MCLK, + DEFAULT_MCLK_RATE, RT5682_PLL1_FREQ); if (ret) { dev_err(rtd->dev, "can't set codec pll: %d\n", ret); return ret; } /* Configure sysclk for codec */ - ret = snd_soc_dai_set_sysclk(codec_dai, clk_id, pll_out, + ret = snd_soc_dai_set_sysclk(codec_dai, RT5682_SCLK_S_PLL1, + RT5682_PLL1_FREQ, SND_SOC_CLOCK_IN); if (ret) dev_err(rtd->dev, "snd_soc_dai_set_sysclk err = %d\n", diff --git a/sound/soc/qcom/sdm845.c b/sound/soc/qcom/sdm845.c index 5c1d13eccb..0adfc57089 100644 --- a/sound/soc/qcom/sdm845.c +++ b/sound/soc/qcom/sdm845.c @@ -33,7 +33,6 @@ struct sdm845_snd_data { struct snd_soc_jack jack; bool jack_setup; - bool slim_port_setup; bool stream_prepared[AFE_PORT_MAX]; struct snd_soc_card *card; uint32_t pri_mi2s_clk_count; @@ -57,8 +56,8 @@ static int sdm845_slim_snd_hw_params(struct snd_pcm_substream *substream, int ret = 0, i; for_each_rtd_codec_dais(rtd, i, codec_dai) { - sruntime = snd_soc_dai_get_stream(codec_dai, - substream->stream); + sruntime = snd_soc_dai_get_sdw_stream(codec_dai, + substream->stream); if (sruntime != ERR_PTR(-ENOTSUPP)) pdata->sruntime[cpu_dai->id] = sruntime; @@ -225,7 +224,6 @@ static int sdm845_dai_init(struct snd_soc_pcm_runtime *rtd) struct snd_soc_dai *codec_dai = asoc_rtd_to_codec(rtd, 0); struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); struct sdm845_snd_data *pdata = snd_soc_card_get_drvdata(card); - struct snd_soc_dai_link *link = rtd->dai_link; struct snd_jack *jack; /* * Codec SLIMBUS configuration @@ -278,10 +276,6 @@ static int sdm845_dai_init(struct snd_soc_pcm_runtime *rtd) } break; case SLIMBUS_0_RX...SLIMBUS_6_TX: - /* setting up wcd multiple times for slim port is redundant */ - if (pdata->slim_port_setup || !link->no_pcm) - return 0; - for_each_rtd_codec_dais(rtd, i, codec_dai) { rval = snd_soc_dai_set_channel_map(codec_dai, ARRAY_SIZE(tx_ch), @@ -301,10 +295,8 @@ static int sdm845_dai_init(struct snd_soc_pcm_runtime *rtd) dev_warn(card->dev, "Failed to set jack: %d\n", rval); return rval; } + } - - pdata->slim_port_setup = true; - break; default: break; diff --git a/sound/soc/qcom/sm8250.c b/sound/soc/qcom/sm8250.c index 114a29e01c..fe8fd7367e 100644 --- a/sound/soc/qcom/sm8250.c +++ b/sound/soc/qcom/sm8250.c @@ -8,8 +8,6 @@ #include #include #include -#include -#include #include "qdsp6/q6afe.h" #include "common.h" @@ -20,66 +18,8 @@ struct sm8250_snd_data { bool stream_prepared[AFE_PORT_MAX]; struct snd_soc_card *card; struct sdw_stream_runtime *sruntime[AFE_PORT_MAX]; - struct snd_soc_jack jack; - bool jack_setup; }; -static int sm8250_snd_init(struct snd_soc_pcm_runtime *rtd) -{ - struct sm8250_snd_data *data = snd_soc_card_get_drvdata(rtd->card); - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - struct snd_soc_dai *codec_dai = asoc_rtd_to_codec(rtd, 0); - struct snd_soc_card *card = rtd->card; - int rval, i; - - if (!data->jack_setup) { - struct snd_jack *jack; - - rval = snd_soc_card_jack_new(card, "Headset Jack", - SND_JACK_HEADSET | SND_JACK_LINEOUT | - SND_JACK_MECHANICAL | - SND_JACK_BTN_0 | SND_JACK_BTN_1 | - SND_JACK_BTN_2 | SND_JACK_BTN_3 | - SND_JACK_BTN_4 | SND_JACK_BTN_5, - &data->jack, NULL, 0); - - if (rval < 0) { - dev_err(card->dev, "Unable to add Headphone Jack\n"); - return rval; - } - - jack = data->jack.jack; - - snd_jack_set_key(jack, SND_JACK_BTN_0, KEY_MEDIA); - snd_jack_set_key(jack, SND_JACK_BTN_1, KEY_VOICECOMMAND); - snd_jack_set_key(jack, SND_JACK_BTN_2, KEY_VOLUMEUP); - snd_jack_set_key(jack, SND_JACK_BTN_3, KEY_VOLUMEDOWN); - data->jack_setup = true; - } - - switch (cpu_dai->id) { - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: - for_each_rtd_codec_dais(rtd, i, codec_dai) { - rval = snd_soc_component_set_jack(codec_dai->component, - &data->jack, NULL); - if (rval != 0 && rval != -ENOTSUPP) { - dev_warn(card->dev, "Failed to set jack: %d\n", rval); - return rval; - } - } - - break; - default: - break; - } - - - return 0; -} - static int sm8250_be_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params) { @@ -129,15 +69,9 @@ static int sm8250_snd_hw_params(struct snd_pcm_substream *substream, switch (cpu_dai->id) { case WSA_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: for_each_rtd_codec_dais(rtd, i, codec_dai) { - sruntime = snd_soc_dai_get_stream(codec_dai, - substream->stream); + sruntime = snd_soc_dai_get_sdw_stream(codec_dai, + substream->stream); if (sruntime != ERR_PTR(-ENOTSUPP)) pdata->sruntime[cpu_dai->id] = sruntime; } @@ -195,12 +129,6 @@ static int sm8250_snd_prepare(struct snd_pcm_substream *substream) switch (cpu_dai->id) { case WSA_CODEC_DMA_RX_0: case WSA_CODEC_DMA_RX_1: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: return sm8250_snd_wsa_dma_prepare(substream); default: break; @@ -219,12 +147,6 @@ static int sm8250_snd_hw_free(struct snd_pcm_substream *substream) switch (cpu_dai->id) { case WSA_CODEC_DMA_RX_0: case WSA_CODEC_DMA_RX_1: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: if (sruntime && data->stream_prepared[cpu_dai->id]) { sdw_disable_stream(sruntime); sdw_deprepare_stream(sruntime); @@ -252,7 +174,6 @@ static void sm8250_add_be_ops(struct snd_soc_card *card) for_each_card_prelinks(card, i, link) { if (link->no_pcm == 1) { - link->init = sm8250_snd_init; link->be_hw_params_fixup = sm8250_be_hw_params_fixup; link->ops = &sm8250_be_ops; } diff --git a/sound/soc/rockchip/Kconfig b/sound/soc/rockchip/Kconfig index 42f76bc0fb..053097b73e 100644 --- a/sound/soc/rockchip/Kconfig +++ b/sound/soc/rockchip/Kconfig @@ -16,17 +16,6 @@ config SND_SOC_ROCKCHIP_I2S Rockchip I2S device. The device supports upto maximum of 8 channels each for play and record. -config SND_SOC_ROCKCHIP_I2S_TDM - tristate "Rockchip I2S/TDM Device Driver" - depends on HAVE_CLK && SND_SOC_ROCKCHIP - select SND_SOC_GENERIC_DMAENGINE_PCM - help - Say Y or M if you want to add support for the I2S/TDM driver for - Rockchip I2S/TDM devices, found in Rockchip SoCs. These devices - interface between the AHB bus and the I2S bus, and support up to a - maximum of 8 channels each for playback and recording. - - config SND_SOC_ROCKCHIP_PDM tristate "Rockchip PDM Controller Driver" depends on HAVE_CLK && SND_SOC_ROCKCHIP diff --git a/sound/soc/rockchip/Makefile b/sound/soc/rockchip/Makefile index 30c57c0d76..65e814d460 100644 --- a/sound/soc/rockchip/Makefile +++ b/sound/soc/rockchip/Makefile @@ -1,14 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 # ROCKCHIP Platform Support snd-soc-rockchip-i2s-objs := rockchip_i2s.o -snd-soc-rockchip-i2s-tdm-objs := rockchip_i2s_tdm.o +snd-soc-rockchip-pcm-objs := rockchip_pcm.o snd-soc-rockchip-pdm-objs := rockchip_pdm.o snd-soc-rockchip-spdif-objs := rockchip_spdif.o -obj-$(CONFIG_SND_SOC_ROCKCHIP_I2S) += snd-soc-rockchip-i2s.o +obj-$(CONFIG_SND_SOC_ROCKCHIP_I2S) += snd-soc-rockchip-i2s.o snd-soc-rockchip-pcm.o obj-$(CONFIG_SND_SOC_ROCKCHIP_PDM) += snd-soc-rockchip-pdm.o obj-$(CONFIG_SND_SOC_ROCKCHIP_SPDIF) += snd-soc-rockchip-spdif.o -obj-$(CONFIG_SND_SOC_ROCKCHIP_I2S_TDM) += snd-soc-rockchip-i2s-tdm.o snd-soc-rockchip-max98090-objs := rockchip_max98090.o snd-soc-rockchip-rt5645-objs := rockchip_rt5645.o diff --git a/sound/soc/rockchip/rk3288_hdmi_analog.c b/sound/soc/rockchip/rk3288_hdmi_analog.c index b052642ea6..33a0077474 100644 --- a/sound/soc/rockchip/rk3288_hdmi_analog.c +++ b/sound/soc/rockchip/rk3288_hdmi_analog.c @@ -249,9 +249,13 @@ static int snd_rk_mc_probe(struct platform_device *pdev) snd_soc_card_set_drvdata(card, machine); ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret) - return dev_err_probe(&pdev->dev, ret, - "Soc register card failed\n"); + if (ret == -EPROBE_DEFER) + return -EPROBE_DEFER; + if (ret) { + dev_err(&pdev->dev, + "Soc register card failed %d\n", ret); + return ret; + } return ret; } diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index a6d7656c20..7e89f5b0c2 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -20,6 +20,7 @@ #include #include "rockchip_i2s.h" +#include "rockchip_pcm.h" #define DRV_NAME "rockchip-i2s" @@ -755,7 +756,7 @@ static int rockchip_i2s_probe(struct platform_device *pdev) goto err_suspend; } - ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); + ret = rockchip_pcm_platform_register(&pdev->dev); if (ret) { dev_err(&pdev->dev, "Could not register PCM\n"); goto err_suspend; diff --git a/sound/soc/rockchip/rockchip_pdm.c b/sound/soc/rockchip/rockchip_pdm.c index 64d9891b64..38bd603eeb 100644 --- a/sound/soc/rockchip/rockchip_pdm.c +++ b/sound/soc/rockchip/rockchip_pdm.c @@ -20,12 +20,10 @@ #define PDM_DMA_BURST_SIZE (8) /* size * width: 8*4 = 32 bytes */ #define PDM_SIGNOFF_CLK_RATE (100000000) -#define PDM_PATH_MAX (4) enum rk_pdm_version { RK_PDM_RK3229, RK_PDM_RK3308, - RK_PDM_RV1126, }; struct rk_pdm_dev { @@ -123,55 +121,6 @@ static unsigned int get_pdm_ds_ratio(unsigned int sr) return ratio; } -static unsigned int get_pdm_cic_ratio(unsigned int clk) -{ - switch (clk) { - case 4096000: - case 5644800: - case 6144000: - return 0; - case 2048000: - case 2822400: - case 3072000: - return 1; - case 1024000: - case 1411200: - case 1536000: - return 2; - default: - return 1; - } -} - -static unsigned int samplerate_to_bit(unsigned int samplerate) -{ - switch (samplerate) { - case 8000: - case 11025: - case 12000: - return 0; - case 16000: - case 22050: - case 24000: - return 1; - case 32000: - return 2; - case 44100: - case 48000: - return 3; - case 64000: - case 88200: - case 96000: - return 4; - case 128000: - case 176400: - case 192000: - return 5; - default: - return 1; - } -} - static inline struct rk_pdm_dev *to_info(struct snd_soc_dai *dai) { return snd_soc_dai_get_drvdata(dai); @@ -217,8 +166,7 @@ static int rockchip_pdm_hw_params(struct snd_pcm_substream *substream, if (ret) return -EINVAL; - if (pdm->version == RK_PDM_RK3308 || - pdm->version == RK_PDM_RV1126) { + if (pdm->version == RK_PDM_RK3308) { rational_best_approximation(clk_out, clk_src, GENMASK(16 - 1, 0), GENMASK(16 - 1, 0), @@ -246,18 +194,8 @@ static int rockchip_pdm_hw_params(struct snd_pcm_substream *substream, PDM_CLK_FD_RATIO_MSK, val); } - - if (pdm->version == RK_PDM_RV1126) { - val = get_pdm_cic_ratio(clk_out); - regmap_update_bits(pdm->regmap, PDM_CLK_CTRL, PDM_CIC_RATIO_MSK, val); - val = samplerate_to_bit(samplerate); - regmap_update_bits(pdm->regmap, PDM_CTRL0, - PDM_SAMPLERATE_MSK, PDM_SAMPLERATE(val)); - } else { - val = get_pdm_ds_ratio(samplerate); - regmap_update_bits(pdm->regmap, PDM_CLK_CTRL, PDM_DS_RATIO_MSK, val); - } - + val = get_pdm_ds_ratio(samplerate); + regmap_update_bits(pdm->regmap, PDM_CLK_CTRL, PDM_DS_RATIO_MSK, val); regmap_update_bits(pdm->regmap, PDM_HPF_CTRL, PDM_HPF_CF_MSK, PDM_HPF_60HZ); regmap_update_bits(pdm->regmap, PDM_HPF_CTRL, @@ -503,10 +441,9 @@ static bool rockchip_pdm_precious_reg(struct device *dev, unsigned int reg) } static const struct reg_default rockchip_pdm_reg_defaults[] = { - { PDM_CTRL0, 0x78000017 }, - { PDM_CTRL1, 0x0bb8ea60 }, - { PDM_CLK_CTRL, 0x0000e401 }, - { PDM_DMA_CTRL, 0x0000001f }, + {0x04, 0x78000017}, + {0x08, 0x0bb8ea60}, + {0x18, 0x0000001f}, }; static const struct regmap_config rockchip_pdm_regmap_config = { @@ -532,44 +469,12 @@ static const struct of_device_id rockchip_pdm_match[] __maybe_unused = { .data = (void *)RK_PDM_RK3308 }, { .compatible = "rockchip,rk3308-pdm", .data = (void *)RK_PDM_RK3308 }, - { .compatible = "rockchip,rk3568-pdm", - .data = (void *)RK_PDM_RV1126 }, - { .compatible = "rockchip,rv1126-pdm", - .data = (void *)RK_PDM_RV1126 }, {}, }; MODULE_DEVICE_TABLE(of, rockchip_pdm_match); -static int rockchip_pdm_path_parse(struct rk_pdm_dev *pdm, struct device_node *node) -{ - unsigned int path[PDM_PATH_MAX]; - int cnt = 0, ret = 0, i = 0, val = 0, msk = 0; - - cnt = of_count_phandle_with_args(node, "rockchip,path-map", - NULL); - if (cnt != PDM_PATH_MAX) - return cnt; - - ret = of_property_read_u32_array(node, "rockchip,path-map", - path, cnt); - if (ret) - return ret; - - for (i = 0; i < cnt; i++) { - if (path[i] >= PDM_PATH_MAX) - return -EINVAL; - msk |= PDM_PATH_MASK(i); - val |= PDM_PATH(i, path[i]); - } - - regmap_update_bits(pdm->regmap, PDM_CLK_CTRL, msk, val); - - return 0; -} - static int rockchip_pdm_probe(struct platform_device *pdev) { - struct device_node *node = pdev->dev.of_node; const struct of_device_id *match; struct rk_pdm_dev *pdm; struct resource *res; @@ -635,11 +540,6 @@ static int rockchip_pdm_probe(struct platform_device *pdev) } rockchip_pdm_rxctrl(pdm, 0); - - ret = rockchip_pdm_path_parse(pdm, node); - if (ret != 0 && ret != -ENOENT) - goto err_suspend; - ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); if (ret) { dev_err(&pdev->dev, "could not register pcm: %d\n", ret); diff --git a/sound/soc/rockchip/rockchip_pdm.h b/sound/soc/rockchip/rockchip_pdm.h index cab977272e..8e5bbafef7 100644 --- a/sound/soc/rockchip/rockchip_pdm.h +++ b/sound/soc/rockchip/rockchip_pdm.h @@ -41,8 +41,6 @@ #define PDM_PATH1_EN BIT(28) #define PDM_PATH0_EN BIT(27) #define PDM_HWT_EN BIT(26) -#define PDM_SAMPLERATE_MSK GENMASK(7, 5) -#define PDM_SAMPLERATE(x) ((x) << 5) #define PDM_VDW_MSK (0x1f << 0) #define PDM_VDW(X) ((X - 1) << 0) @@ -53,9 +51,6 @@ #define PDM_FD_DENOMINATOR_MSK GENMASK(15, 0) /* PDM CLK CTRL */ -#define PDM_PATH_SHIFT(x) (8 + (x) * 2) -#define PDM_PATH_MASK(x) (0x3 << PDM_PATH_SHIFT(x)) -#define PDM_PATH(x, v) ((v) << PDM_PATH_SHIFT(x)) #define PDM_CLK_FD_RATIO_MSK BIT(6) #define PDM_CLK_FD_RATIO_40 (0X0 << 6) #define PDM_CLK_FD_RATIO_35 BIT(6) @@ -71,7 +66,6 @@ #define PDM_CLK_1280FS (0x2 << 0) #define PDM_CLK_2560FS (0x3 << 0) #define PDM_CLK_5120FS (0x4 << 0) -#define PDM_CIC_RATIO_MSK (0x3 << 0) /* PDM HPF CTRL */ #define PDM_HPF_LE BIT(3) diff --git a/sound/soc/samsung/aries_wm8994.c b/sound/soc/samsung/aries_wm8994.c index 5265e546b1..313ab650f8 100644 --- a/sound/soc/samsung/aries_wm8994.c +++ b/sound/soc/samsung/aries_wm8994.c @@ -585,16 +585,19 @@ static int aries_audio_probe(struct platform_device *pdev) extcon_np = of_parse_phandle(np, "extcon", 0); priv->usb_extcon = extcon_find_edev_by_node(extcon_np); - if (IS_ERR(priv->usb_extcon)) - return dev_err_probe(dev, PTR_ERR(priv->usb_extcon), - "Failed to get extcon device"); + if (IS_ERR(priv->usb_extcon)) { + if (PTR_ERR(priv->usb_extcon) != -EPROBE_DEFER) + dev_err(dev, "Failed to get extcon device"); + return PTR_ERR(priv->usb_extcon); + } of_node_put(extcon_np); priv->adc = devm_iio_channel_get(dev, "headset-detect"); - if (IS_ERR(priv->adc)) - return dev_err_probe(dev, PTR_ERR(priv->adc), - "Failed to get ADC channel"); - + if (IS_ERR(priv->adc)) { + if (PTR_ERR(priv->adc) != -EPROBE_DEFER) + dev_err(dev, "Failed to get ADC channel"); + return PTR_ERR(priv->adc); + } if (priv->adc->channel->type != IIO_VOLTAGE) return -EINVAL; diff --git a/sound/soc/samsung/arndale.c b/sound/soc/samsung/arndale.c index a5dc640d0d..606ac5e33a 100644 --- a/sound/soc/samsung/arndale.c +++ b/sound/soc/samsung/arndale.c @@ -174,8 +174,9 @@ static int arndale_audio_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(card->dev, card); if (ret) { - dev_err_probe(&pdev->dev, ret, - "snd_soc_register_card() failed\n"); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, + "snd_soc_register_card() failed: %d\n", ret); goto err_put_of_nodes; } return 0; diff --git a/sound/soc/samsung/littlemill.c b/sound/soc/samsung/littlemill.c index 34067cc314..390f2dd735 100644 --- a/sound/soc/samsung/littlemill.c +++ b/sound/soc/samsung/littlemill.c @@ -325,8 +325,9 @@ static int littlemill_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret) - dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); + if (ret && ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", + ret); return ret; } diff --git a/sound/soc/samsung/lowland.c b/sound/soc/samsung/lowland.c index 7b12ccd2a9..998d10cf8c 100644 --- a/sound/soc/samsung/lowland.c +++ b/sound/soc/samsung/lowland.c @@ -183,8 +183,9 @@ static int lowland_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret) - dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); + if (ret && ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", + ret); return ret; } diff --git a/sound/soc/samsung/odroid.c b/sound/soc/samsung/odroid.c index 4ff12e2e70..ca643a488c 100644 --- a/sound/soc/samsung/odroid.c +++ b/sound/soc/samsung/odroid.c @@ -311,7 +311,9 @@ static int odroid_audio_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(dev, card); if (ret < 0) { - dev_err_probe(dev, ret, "snd_soc_register_card() failed\n"); + if (ret != -EPROBE_DEFER) + dev_err(dev, "snd_soc_register_card() failed: %d\n", + ret); goto err_put_clk_i2s; } diff --git a/sound/soc/samsung/s3c-i2s-v2.c b/sound/soc/samsung/s3c-i2s-v2.c index de66cc422e..e9481187a0 100644 --- a/sound/soc/samsung/s3c-i2s-v2.c +++ b/sound/soc/samsung/s3c-i2s-v2.c @@ -397,8 +397,6 @@ static int s3c2412_i2s_trigger(struct snd_pcm_substream *substream, int cmd, /* clear again, just in case */ writel(0x0, i2s->regs + S3C2412_IISFIC); - fallthrough; - case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: if (!i2s->master) { diff --git a/sound/soc/samsung/smdk_wm8994.c b/sound/soc/samsung/smdk_wm8994.c index 821ad1eb1b..7661b63794 100644 --- a/sound/soc/samsung/smdk_wm8994.c +++ b/sound/soc/samsung/smdk_wm8994.c @@ -179,8 +179,8 @@ static int smdk_audio_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret) - dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); + if (ret && ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card() failed:%d\n", ret); return ret; } diff --git a/sound/soc/samsung/smdk_wm8994pcm.c b/sound/soc/samsung/smdk_wm8994pcm.c index d77dc54cae..029448f5be 100644 --- a/sound/soc/samsung/smdk_wm8994pcm.c +++ b/sound/soc/samsung/smdk_wm8994pcm.c @@ -118,8 +118,8 @@ static int snd_smdk_probe(struct platform_device *pdev) smdk_pcm.dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, &smdk_pcm); - if (ret) - dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n"); + if (ret && ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card failed %d\n", ret); return ret; } diff --git a/sound/soc/samsung/snow.c b/sound/soc/samsung/snow.c index 02372109c2..6da674e901 100644 --- a/sound/soc/samsung/snow.c +++ b/sound/soc/samsung/snow.c @@ -212,9 +212,12 @@ static int snow_probe(struct platform_device *pdev) snd_soc_card_set_drvdata(card, priv); ret = devm_snd_soc_register_card(dev, card); - if (ret) - return dev_err_probe(&pdev->dev, ret, - "snd_soc_register_card failed\n"); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, + "snd_soc_register_card failed (%d)\n", ret); + return ret; + } return ret; } diff --git a/sound/soc/samsung/speyside.c b/sound/soc/samsung/speyside.c index 37b1f4f60b..f5f6ba00d0 100644 --- a/sound/soc/samsung/speyside.c +++ b/sound/soc/samsung/speyside.c @@ -330,8 +330,9 @@ static int speyside_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret) - dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); + if (ret && ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", + ret); return ret; } diff --git a/sound/soc/samsung/tm2_wm5110.c b/sound/soc/samsung/tm2_wm5110.c index d611ec9e53..a2c77e6def 100644 --- a/sound/soc/samsung/tm2_wm5110.c +++ b/sound/soc/samsung/tm2_wm5110.c @@ -612,7 +612,8 @@ static int tm2_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(dev, card); if (ret < 0) { - dev_err_probe(dev, ret, "Failed to register card\n"); + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to register card: %d\n", ret); goto dai_node_put; } diff --git a/sound/soc/samsung/tobermory.c b/sound/soc/samsung/tobermory.c index 8d3149a47a..15223d860c 100644 --- a/sound/soc/samsung/tobermory.c +++ b/sound/soc/samsung/tobermory.c @@ -229,8 +229,9 @@ static int tobermory_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret) - dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); + if (ret && ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", + ret); return ret; } diff --git a/sound/soc/sh/rz-ssi.c b/sound/soc/sh/rz-ssi.c index e8d98b362f..fa0cc08f70 100644 --- a/sound/soc/sh/rz-ssi.c +++ b/sound/soc/sh/rz-ssi.c @@ -1020,12 +1020,7 @@ static int rz_ssi_probe(struct platform_device *pdev) reset_control_deassert(ssi->rstc); pm_runtime_enable(&pdev->dev); - ret = pm_runtime_resume_and_get(&pdev->dev); - if (ret < 0) { - pm_runtime_disable(ssi->dev); - reset_control_assert(ssi->rstc); - return dev_err_probe(ssi->dev, ret, "pm_runtime_resume_and_get failed\n"); - } + pm_runtime_resume_and_get(&pdev->dev); spin_lock_init(&ssi->lock); dev_set_drvdata(&pdev->dev, ssi); diff --git a/sound/soc/soc-acpi.c b/sound/soc/soc-acpi.c index 142476f139..395229bf5c 100644 --- a/sound/soc/soc-acpi.c +++ b/sound/soc/soc-acpi.c @@ -8,36 +8,14 @@ #include #include -static bool snd_soc_acpi_id_present(struct snd_soc_acpi_mach *machine) -{ - const struct snd_soc_acpi_codecs *comp_ids = machine->comp_ids; - int i; - - if (machine->id[0]) { - if (acpi_dev_present(machine->id, NULL, -1)) - return true; - } - - if (comp_ids) { - for (i = 0; i < comp_ids->num_codecs; i++) { - if (acpi_dev_present(comp_ids->codecs[i], NULL, -1)) { - strscpy(machine->id, comp_ids->codecs[i], ACPI_ID_LEN); - return true; - } - } - } - - return false; -} - struct snd_soc_acpi_mach * snd_soc_acpi_find_machine(struct snd_soc_acpi_mach *machines) { struct snd_soc_acpi_mach *mach; struct snd_soc_acpi_mach *mach_alt; - for (mach = machines; mach->id[0] || mach->comp_ids; mach++) { - if (snd_soc_acpi_id_present(mach)) { + for (mach = machines; mach->id[0]; mach++) { + if (acpi_dev_present(mach->id, NULL, -1)) { if (mach->machine_quirk) { mach_alt = mach->machine_quirk(mach); if (!mach_alt) @@ -55,13 +33,16 @@ EXPORT_SYMBOL_GPL(snd_soc_acpi_find_machine); static acpi_status snd_soc_acpi_find_package(acpi_handle handle, u32 level, void *context, void **ret) { - struct acpi_device *adev = acpi_fetch_acpi_dev(handle); + struct acpi_device *adev; acpi_status status; struct snd_soc_acpi_package_context *pkg_ctx = context; pkg_ctx->data_valid = false; - if (adev && adev->status.present && adev->status.functional) { + if (acpi_bus_get_device(handle, &adev)) + return AE_OK; + + if (adev->status.present && adev->status.functional) { struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; union acpi_object *myobj = NULL; diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index c0664f9499..8e8d917d22 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -13,10 +13,9 @@ #include #include -#define soc_component_ret(dai, ret) _soc_component_ret(dai, __func__, ret, -1) -#define soc_component_ret_reg_rw(dai, ret, reg) _soc_component_ret(dai, __func__, ret, reg) +#define soc_component_ret(dai, ret) _soc_component_ret(dai, __func__, ret) static inline int _soc_component_ret(struct snd_soc_component *component, - const char *func, int ret, int reg) + const char *func, int ret) { /* Positive/Zero values are not errors */ if (ret >= 0) @@ -28,14 +27,9 @@ static inline int _soc_component_ret(struct snd_soc_component *component, case -ENOTSUPP: break; default: - if (reg == -1) - dev_err(component->dev, - "ASoC: error at %s on %s: %d\n", - func, component->name, ret); - else - dev_err(component->dev, - "ASoC: error at %s on %s for register: [0x%08x] %d\n", - func, component->name, reg, ret); + dev_err(component->dev, + "ASoC: error at %s on %s: %d\n", + func, component->name, ret); } return ret; @@ -257,7 +251,8 @@ int snd_soc_component_set_jack(struct snd_soc_component *component, EXPORT_SYMBOL_GPL(snd_soc_component_set_jack); int snd_soc_component_module_get(struct snd_soc_component *component, - void *mark, int upon_open) + struct snd_pcm_substream *substream, + int upon_open) { int ret = 0; @@ -265,24 +260,25 @@ int snd_soc_component_module_get(struct snd_soc_component *component, !try_module_get(component->dev->driver->owner)) ret = -ENODEV; - /* mark module if succeeded */ + /* mark substream if succeeded */ if (ret == 0) - soc_component_mark_push(component, mark, module); + soc_component_mark_push(component, substream, module); return soc_component_ret(component, ret); } void snd_soc_component_module_put(struct snd_soc_component *component, - void *mark, int upon_open, int rollback) + struct snd_pcm_substream *substream, + int upon_open, int rollback) { - if (rollback && !soc_component_mark_match(component, mark, module)) + if (rollback && !soc_component_mark_match(component, substream, module)) return; if (component->driver->module_get_upon_open == !!upon_open) module_put(component->dev->driver->owner); - /* remove the mark from module */ - soc_component_mark_pop(component, mark, module); + /* remove marked substream */ + soc_component_mark_pop(component, substream, module); } int snd_soc_component_open(struct snd_soc_component *component, @@ -429,36 +425,43 @@ EXPORT_SYMBOL_GPL(snd_soc_component_exit_regmap); #endif -int snd_soc_component_compr_open(struct snd_soc_component *component, - struct snd_compr_stream *cstream) +int snd_soc_component_compr_open(struct snd_compr_stream *cstream) { - int ret = 0; + struct snd_soc_pcm_runtime *rtd = cstream->private_data; + struct snd_soc_component *component; + int i, ret; - if (component->driver->compress_ops && - component->driver->compress_ops->open) - ret = component->driver->compress_ops->open(component, cstream); - - /* mark substream if succeeded */ - if (ret == 0) + for_each_rtd_components(rtd, i, component) { + if (component->driver->compress_ops && + component->driver->compress_ops->open) { + ret = component->driver->compress_ops->open(component, cstream); + if (ret < 0) + return soc_component_ret(component, ret); + } soc_component_mark_push(component, cstream, compr_open); + } - return soc_component_ret(component, ret); + return 0; } EXPORT_SYMBOL_GPL(snd_soc_component_compr_open); -void snd_soc_component_compr_free(struct snd_soc_component *component, - struct snd_compr_stream *cstream, +void snd_soc_component_compr_free(struct snd_compr_stream *cstream, int rollback) { - if (rollback && !soc_component_mark_match(component, cstream, compr_open)) - return; + struct snd_soc_pcm_runtime *rtd = cstream->private_data; + struct snd_soc_component *component; + int i; - if (component->driver->compress_ops && - component->driver->compress_ops->free) - component->driver->compress_ops->free(component, cstream); + for_each_rtd_components(rtd, i, component) { + if (rollback && !soc_component_mark_match(component, cstream, compr_open)) + continue; - /* remove marked substream */ - soc_component_mark_pop(component, cstream, compr_open); + if (component->driver->compress_ops && + component->driver->compress_ops->free) + component->driver->compress_ops->free(component, cstream); + + soc_component_mark_pop(component, cstream, compr_open); + } } EXPORT_SYMBOL_GPL(snd_soc_component_compr_free); @@ -693,7 +696,7 @@ static unsigned int soc_component_read_no_lock( ret = -EIO; if (ret < 0) - return soc_component_ret_reg_rw(component, ret, reg); + return soc_component_ret(component, ret); return val; } @@ -729,7 +732,7 @@ static int soc_component_write_no_lock( else if (component->driver->write) ret = component->driver->write(component, reg, val); - return soc_component_ret_reg_rw(component, ret, reg); + return soc_component_ret(component, ret); } /** @@ -771,7 +774,7 @@ static int snd_soc_component_update_bits_legacy( mutex_unlock(&component->io_mutex); - return soc_component_ret_reg_rw(component, ret, reg); + return soc_component_ret(component, ret); } /** @@ -799,7 +802,7 @@ int snd_soc_component_update_bits(struct snd_soc_component *component, mask, val, &change); if (ret < 0) - return soc_component_ret_reg_rw(component, ret, reg); + return soc_component_ret(component, ret); return change; } EXPORT_SYMBOL_GPL(snd_soc_component_update_bits); @@ -835,7 +838,7 @@ int snd_soc_component_update_bits_async(struct snd_soc_component *component, mask, val, &change); if (ret < 0) - return soc_component_ret_reg_rw(component, ret, reg); + return soc_component_ret(component, ret); return change; } EXPORT_SYMBOL_GPL(snd_soc_component_update_bits_async); @@ -932,34 +935,6 @@ int snd_soc_pcm_component_pointer(struct snd_pcm_substream *substream) return 0; } -void snd_soc_pcm_component_delay(struct snd_pcm_substream *substream, - snd_pcm_sframes_t *cpu_delay, - snd_pcm_sframes_t *codec_delay) -{ - struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); - struct snd_soc_component *component; - snd_pcm_sframes_t delay; - int i; - - /* - * We're looking for the delay through the full audio path so it needs to - * be the maximum of the Components doing transmit and the maximum of the - * Components doing receive (ie, all CPUs and all CODECs) rather than - * just the maximum of all Components. - */ - for_each_rtd_components(rtd, i, component) { - if (!component->driver->delay) - continue; - - delay = component->driver->delay(component, substream); - - if (snd_soc_component_is_codec(component)) - *codec_delay = max(*codec_delay, delay); - else - *cpu_delay = max(*cpu_delay, delay); - } -} - int snd_soc_pcm_component_ioctl(struct snd_pcm_substream *substream, unsigned int cmd, void *arg) { diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index 8e2494a9f3..36060800e9 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -22,39 +22,6 @@ #include #include -static int snd_soc_compr_components_open(struct snd_compr_stream *cstream) -{ - struct snd_soc_pcm_runtime *rtd = cstream->private_data; - struct snd_soc_component *component; - int ret = 0; - int i; - - for_each_rtd_components(rtd, i, component) { - ret = snd_soc_component_module_get_when_open(component, cstream); - if (ret < 0) - break; - - ret = snd_soc_component_compr_open(component, cstream); - if (ret < 0) - break; - } - - return ret; -} - -static void snd_soc_compr_components_free(struct snd_compr_stream *cstream, - int rollback) -{ - struct snd_soc_pcm_runtime *rtd = cstream->private_data; - struct snd_soc_component *component; - int i; - - for_each_rtd_components(rtd, i, component) { - snd_soc_component_compr_free(component, cstream, rollback); - snd_soc_component_module_put_when_close(component, cstream, rollback); - } -} - static int soc_compr_clean(struct snd_compr_stream *cstream, int rollback) { struct snd_soc_pcm_runtime *rtd = cstream->private_data; @@ -77,7 +44,7 @@ static int soc_compr_clean(struct snd_compr_stream *cstream, int rollback) snd_soc_link_compr_shutdown(cstream, rollback); - snd_soc_compr_components_free(cstream, rollback); + snd_soc_component_compr_free(cstream, rollback); snd_soc_dai_compr_shutdown(cpu_dai, cstream, rollback); @@ -113,7 +80,7 @@ static int soc_compr_open(struct snd_compr_stream *cstream) if (ret < 0) goto err; - ret = snd_soc_compr_components_open(cstream); + ret = snd_soc_component_compr_open(cstream); if (ret < 0) goto err; @@ -170,7 +137,7 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream) if (ret < 0) goto out; - ret = snd_soc_compr_components_open(cstream); + ret = snd_soc_component_compr_open(cstream); if (ret < 0) goto open_err; @@ -193,7 +160,7 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream) return 0; machine_err: - snd_soc_compr_components_free(cstream, 1); + snd_soc_component_compr_free(cstream, 1); open_err: snd_soc_dai_compr_shutdown(cpu_dai, cstream, 1); out: @@ -238,7 +205,7 @@ static int soc_compr_free_fe(struct snd_compr_stream *cstream) snd_soc_link_compr_shutdown(cstream, 0); - snd_soc_compr_components_free(cstream, 0); + snd_soc_component_compr_free(cstream, 0); snd_soc_dai_compr_shutdown(cpu_dai, cstream, 0); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 9735026aa5..b84f461160 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -229,12 +229,31 @@ static void snd_soc_debugfs_exit(void) #else -static inline void soc_init_component_debugfs(struct snd_soc_component *component) { } -static inline void soc_cleanup_component_debugfs(struct snd_soc_component *component) { } -static inline void soc_init_card_debugfs(struct snd_soc_card *card) { } -static inline void soc_cleanup_card_debugfs(struct snd_soc_card *card) { } -static inline void snd_soc_debugfs_init(void) { } -static inline void snd_soc_debugfs_exit(void) { } +static inline void soc_init_component_debugfs( + struct snd_soc_component *component) +{ +} + +static inline void soc_cleanup_component_debugfs( + struct snd_soc_component *component) +{ +} + +static inline void soc_init_card_debugfs(struct snd_soc_card *card) +{ +} + +static inline void soc_cleanup_card_debugfs(struct snd_soc_card *card) +{ +} + +static inline void snd_soc_debugfs_init(void) +{ +} + +static inline void snd_soc_debugfs_exit(void) +{ +} #endif @@ -720,7 +739,9 @@ static void soc_resume_init(struct snd_soc_card *card) #else #define snd_soc_suspend NULL #define snd_soc_resume NULL -static inline void soc_resume_init(struct snd_soc_card *card) { } +static inline void soc_resume_init(struct snd_soc_card *card) +{ +} #endif static struct device_node @@ -1218,9 +1239,6 @@ int snd_soc_runtime_set_dai_fmt(struct snd_soc_pcm_runtime *rtd, unsigned int i; int ret; - if (!dai_fmt) - return 0; - for_each_rtd_codec_dais(rtd, i, codec_dai) { unsigned int codec_dai_fmt = dai_fmt; @@ -1237,6 +1255,7 @@ int snd_soc_runtime_set_dai_fmt(struct snd_soc_pcm_runtime *rtd, /* * Flip the polarity for the "CPU" end of a CODEC<->CODEC link + * the component which has non_legacy_dai_naming is Codec */ inv_dai_fmt = dai_fmt & ~SND_SOC_DAIFMT_MASTER_MASK; switch (dai_fmt & SND_SOC_DAIFMT_MASTER_MASK) { @@ -1256,7 +1275,7 @@ int snd_soc_runtime_set_dai_fmt(struct snd_soc_pcm_runtime *rtd, for_each_rtd_cpu_dais(rtd, i, cpu_dai) { unsigned int fmt = dai_fmt; - if (snd_soc_component_is_codec(cpu_dai->component)) + if (cpu_dai->component->driver->non_legacy_dai_naming) fmt = inv_dai_fmt; ret = snd_soc_dai_set_fmt(cpu_dai, fmt); @@ -1285,9 +1304,11 @@ static int soc_init_pcm_runtime(struct snd_soc_card *card, return ret; snd_soc_runtime_get_dai_fmt(rtd); - ret = snd_soc_runtime_set_dai_fmt(rtd, dai_link->dai_fmt); - if (ret) - return ret; + if (dai_link->dai_fmt) { + ret = snd_soc_runtime_set_dai_fmt(rtd, dai_link->dai_fmt); + if (ret) + return ret; + } /* add DPCM sysfs entries */ soc_dpcm_debugfs_add(rtd); @@ -1363,6 +1384,9 @@ static void soc_remove_component(struct snd_soc_component *component, if (probed) snd_soc_component_remove(component); + /* For framework level robustness */ + snd_soc_component_set_jack(component, NULL, NULL); + list_del_init(&component->card_list); snd_soc_dapm_free(snd_soc_component_get_dapm(component)); soc_cleanup_component_debugfs(component); @@ -2336,6 +2360,7 @@ int snd_soc_register_card(struct snd_soc_card *card) mutex_init(&card->mutex); mutex_init(&card->dapm_mutex); mutex_init(&card->pcm_mutex); + spin_lock_init(&card->dpcm_lock); return snd_soc_bind_card(card); } @@ -2517,7 +2542,7 @@ static int snd_soc_register_dais(struct snd_soc_component *component, for (i = 0; i < count; i++) { dai = snd_soc_register_dai(component, dai_drv + i, count == 1 && - !snd_soc_component_is_codec(component)); + !component->driver->non_legacy_dai_naming); if (dai == NULL) { ret = -ENOMEM; goto err; @@ -2844,56 +2869,6 @@ int snd_soc_of_parse_audio_simple_widgets(struct snd_soc_card *card, } EXPORT_SYMBOL_GPL(snd_soc_of_parse_audio_simple_widgets); -int snd_soc_of_parse_pin_switches(struct snd_soc_card *card, const char *prop) -{ - const unsigned int nb_controls_max = 16; - const char **strings, *control_name; - struct snd_kcontrol_new *controls; - struct device *dev = card->dev; - unsigned int i, nb_controls; - int ret; - - if (!of_property_read_bool(dev->of_node, prop)) - return 0; - - strings = devm_kcalloc(dev, nb_controls_max, - sizeof(*strings), GFP_KERNEL); - if (!strings) - return -ENOMEM; - - ret = of_property_read_string_array(dev->of_node, prop, - strings, nb_controls_max); - if (ret < 0) - return ret; - - nb_controls = (unsigned int)ret; - - controls = devm_kcalloc(dev, nb_controls, - sizeof(*controls), GFP_KERNEL); - if (!controls) - return -ENOMEM; - - for (i = 0; i < nb_controls; i++) { - control_name = devm_kasprintf(dev, GFP_KERNEL, - "%s Switch", strings[i]); - if (!control_name) - return -ENOMEM; - - controls[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER; - controls[i].name = control_name; - controls[i].info = snd_soc_dapm_info_pin_switch; - controls[i].get = snd_soc_dapm_get_pin_switch; - controls[i].put = snd_soc_dapm_put_pin_switch; - controls[i].private_value = (unsigned long)strings[i]; - } - - card->controls = controls; - card->num_controls = nb_controls; - - return 0; -} -EXPORT_SYMBOL_GPL(snd_soc_of_parse_pin_switches); - int snd_soc_of_get_slot_mask(struct device_node *np, const char *prop_name, unsigned int *mask) diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c index 6078afe335..3db0fcf243 100644 --- a/sound/soc/soc-dai.c +++ b/sound/soc/soc-dai.c @@ -453,6 +453,18 @@ void snd_soc_dai_shutdown(struct snd_soc_dai *dai, soc_dai_mark_pop(dai, substream, startup); } +snd_pcm_sframes_t snd_soc_dai_delay(struct snd_soc_dai *dai, + struct snd_pcm_substream *substream) +{ + int delay = 0; + + if (dai->driver->ops && + dai->driver->ops->delay) + delay = dai->driver->ops->delay(substream, dai); + + return delay; +} + int snd_soc_dai_compress_new(struct snd_soc_dai *dai, struct snd_soc_pcm_runtime *rtd, int num) { @@ -681,34 +693,6 @@ int snd_soc_pcm_dai_bespoke_trigger(struct snd_pcm_substream *substream, return 0; } -void snd_soc_pcm_dai_delay(struct snd_pcm_substream *substream, - snd_pcm_sframes_t *cpu_delay, - snd_pcm_sframes_t *codec_delay) -{ - struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); - struct snd_soc_dai *dai; - int i; - - /* - * We're looking for the delay through the full audio path so it needs to - * be the maximum of the DAIs doing transmit and the maximum of the DAIs - * doing receive (ie, all CPUs and all CODECs) rather than just the maximum - * of all DAIs. - */ - - /* for CPU */ - for_each_rtd_cpu_dais(rtd, i, dai) - if (dai->driver->ops && - dai->driver->ops->delay) - *cpu_delay = max(*cpu_delay, dai->driver->ops->delay(substream, dai)); - - /* for Codec */ - for_each_rtd_codec_dais(rtd, i, dai) - if (dai->driver->ops && - dai->driver->ops->delay) - *codec_delay = max(*codec_delay, dai->driver->ops->delay(substream, dai)); -} - int snd_soc_dai_compr_startup(struct snd_soc_dai *dai, struct snd_compr_stream *cstream) { diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index b06c568244..0479bb0005 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1331,13 +1331,11 @@ int snd_soc_dapm_dai_get_connected_widgets(struct snd_soc_dai *dai, int stream, return paths; } -EXPORT_SYMBOL_GPL(snd_soc_dapm_dai_get_connected_widgets); void snd_soc_dapm_dai_free_widgets(struct snd_soc_dapm_widget_list **list) { dapm_widget_list_free(list); } -EXPORT_SYMBOL_GPL(snd_soc_dapm_dai_free_widgets); /* * Handler for regulator supply widget. diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index c54c8ca8d7..4aa48c74f2 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -15,10 +15,6 @@ #include -static unsigned int prealloc_buffer_size_kbytes = 512; -module_param(prealloc_buffer_size_kbytes, uint, 0444); -MODULE_PARM_DESC(prealloc_buffer_size_kbytes, "Preallocate DMA buffer size (KB)."); - /* * The platforms dmaengine driver does not support reporting the amount of * bytes that are still left to transfer. @@ -241,7 +237,7 @@ static int dmaengine_pcm_new(struct snd_soc_component *component, prealloc_buffer_size = config->prealloc_buffer_size; max_buffer_size = config->pcm_hardware->buffer_bytes_max; } else { - prealloc_buffer_size = prealloc_buffer_size_kbytes * 1024; + prealloc_buffer_size = 512 * 1024; max_buffer_size = SIZE_MAX; } diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 03ea9591fb..ee3782ecd7 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -316,26 +316,26 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, if (sign_bit) mask = BIT(sign_bit + 1) - 1; - if (ucontrol->value.integer.value[0] < 0) - return -EINVAL; val = ucontrol->value.integer.value[0]; - if (mc->platform_max && val > mc->platform_max) + if (mc->platform_max && ((int)val + min) > mc->platform_max) return -EINVAL; if (val > max - min) return -EINVAL; + if (val < 0) + return -EINVAL; val = (val + min) & mask; if (invert) val = max - val; val_mask = mask << shift; val = val << shift; if (snd_soc_volsw_is_stereo(mc)) { - if (ucontrol->value.integer.value[1] < 0) - return -EINVAL; val2 = ucontrol->value.integer.value[1]; - if (mc->platform_max && val2 > mc->platform_max) + if (mc->platform_max && ((int)val2 + min) > mc->platform_max) return -EINVAL; if (val2 > max - min) return -EINVAL; + if (val2 < 0) + return -EINVAL; val2 = (val2 + min) & mask; if (invert) val2 = max - val2; @@ -430,13 +430,13 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, int ret; unsigned int val, val_mask; - if (ucontrol->value.integer.value[0] < 0) - return -EINVAL; val = ucontrol->value.integer.value[0]; if (mc->platform_max && val > mc->platform_max) return -EINVAL; if (val > max - min) return -EINVAL; + if (val < 0) + return -EINVAL; val_mask = mask << shift; val = (val + min) & mask; val = val << shift; diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 9a954680d4..48f71bb81a 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -27,37 +27,6 @@ #include #include -static inline void snd_soc_dpcm_mutex_lock(struct snd_soc_pcm_runtime *rtd) -{ - mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass); -} - -static inline void snd_soc_dpcm_mutex_unlock(struct snd_soc_pcm_runtime *rtd) -{ - mutex_unlock(&rtd->card->pcm_mutex); -} - -#define snd_soc_dpcm_mutex_assert_held(rtd) \ - lockdep_assert_held(&(rtd)->card->pcm_mutex) - -static inline void snd_soc_dpcm_stream_lock_irq(struct snd_soc_pcm_runtime *rtd, - int stream) -{ - snd_pcm_stream_lock_irq(snd_soc_dpcm_get_substream(rtd, stream)); -} - -#define snd_soc_dpcm_stream_lock_irqsave_nested(rtd, stream, flags) \ - snd_pcm_stream_lock_irqsave_nested(snd_soc_dpcm_get_substream(rtd, stream), flags) - -static inline void snd_soc_dpcm_stream_unlock_irq(struct snd_soc_pcm_runtime *rtd, - int stream) -{ - snd_pcm_stream_unlock_irq(snd_soc_dpcm_get_substream(rtd, stream)); -} - -#define snd_soc_dpcm_stream_unlock_irqrestore(rtd, stream, flags) \ - snd_pcm_stream_unlock_irqrestore(snd_soc_dpcm_get_substream(rtd, stream), flags) - #define DPCM_MAX_BE_USERS 8 static inline const char *soc_cpu_dai_name(struct snd_soc_pcm_runtime *rtd) @@ -104,6 +73,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, struct snd_pcm_hw_params *params = &fe->dpcm[stream].hw_params; struct snd_soc_dpcm *dpcm; ssize_t offset = 0; + unsigned long flags; /* FE state */ offset += scnprintf(buf + offset, size - offset, @@ -131,6 +101,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, goto out; } + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_be(fe, stream, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; params = &dpcm->hw_params; @@ -151,6 +122,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, params_channels(params), params_rate(params)); } + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); out: return offset; } @@ -173,13 +145,11 @@ static ssize_t dpcm_state_read_file(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; - snd_soc_dpcm_mutex_lock(fe); for_each_pcm_streams(stream) if (snd_soc_dai_stream_valid(asoc_rtd_to_cpu(fe, 0), stream)) offset += dpcm_show_state(fe, stream, buf + offset, out_count - offset); - snd_soc_dpcm_mutex_unlock(fe); ret = simple_read_from_buffer(user_buf, count, ppos, buf, offset); @@ -251,14 +221,14 @@ static void dpcm_set_fe_update_state(struct snd_soc_pcm_runtime *fe, struct snd_pcm_substream *substream = snd_soc_dpcm_get_substream(fe, stream); - snd_soc_dpcm_stream_lock_irq(fe, stream); + snd_pcm_stream_lock_irq(substream); if (state == SND_SOC_DPCM_UPDATE_NO && fe->dpcm[stream].trigger_pending) { dpcm_fe_dai_do_trigger(substream, fe->dpcm[stream].trigger_pending - 1); fe->dpcm[stream].trigger_pending = 0; } fe->dpcm[stream].runtime_update = state; - snd_soc_dpcm_stream_unlock_irq(fe, stream); + snd_pcm_stream_unlock_irq(substream); } static void dpcm_set_be_update_state(struct snd_soc_pcm_runtime *be, @@ -286,7 +256,7 @@ void snd_soc_runtime_action(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai; int i; - snd_soc_dpcm_mutex_assert_held(rtd); + lockdep_assert_held(&rtd->card->pcm_mutex); for_each_rtd_dais(rtd, i, dai) snd_soc_dai_action(dai, stream, action); @@ -339,8 +309,6 @@ int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir, { struct snd_soc_dpcm *dpcm; - snd_soc_dpcm_mutex_assert_held(fe); - for_each_dpcm_be(fe, dir, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; @@ -678,14 +646,14 @@ static int soc_pcm_components_close(struct snd_pcm_substream *substream, return ret; } -static int soc_pcm_clean(struct snd_soc_pcm_runtime *rtd, - struct snd_pcm_substream *substream, int rollback) +static int soc_pcm_clean(struct snd_pcm_substream *substream, int rollback) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); struct snd_soc_component *component; struct snd_soc_dai *dai; int i; - snd_soc_dpcm_mutex_assert_held(rtd); + mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass); if (!rollback) snd_soc_runtime_deactivate(rtd, substream->stream); @@ -697,6 +665,9 @@ static int soc_pcm_clean(struct snd_soc_pcm_runtime *rtd, soc_pcm_components_close(substream, rollback); + + mutex_unlock(&rtd->card->pcm_mutex); + snd_soc_pcm_component_pm_runtime_put(rtd, substream, rollback); for_each_rtd_components(rtd, i, component) @@ -711,21 +682,9 @@ static int soc_pcm_clean(struct snd_soc_pcm_runtime *rtd, * freed here. The cpu DAI, codec DAI, machine and components are also * shutdown. */ -static int __soc_pcm_close(struct snd_soc_pcm_runtime *rtd, - struct snd_pcm_substream *substream) -{ - return soc_pcm_clean(rtd, substream, 0); -} - -/* PCM close ops for non-DPCM streams */ static int soc_pcm_close(struct snd_pcm_substream *substream) { - struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); - - snd_soc_dpcm_mutex_lock(rtd); - soc_pcm_clean(rtd, substream, 0); - snd_soc_dpcm_mutex_unlock(rtd); - return 0; + return soc_pcm_clean(substream, 0); } static int soc_hw_sanity_check(struct snd_pcm_substream *substream) @@ -771,21 +730,21 @@ static int soc_hw_sanity_check(struct snd_pcm_substream *substream) * then initialized and any private data can be allocated. This also calls * startup for the cpu DAI, component, machine and codec DAI. */ -static int __soc_pcm_open(struct snd_soc_pcm_runtime *rtd, - struct snd_pcm_substream *substream) +static int soc_pcm_open(struct snd_pcm_substream *substream) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); struct snd_soc_component *component; struct snd_soc_dai *dai; int i, ret = 0; - snd_soc_dpcm_mutex_assert_held(rtd); - for_each_rtd_components(rtd, i, component) pinctrl_pm_select_default_state(component->dev); ret = snd_soc_pcm_component_pm_runtime_get(rtd, substream); if (ret < 0) - goto err; + goto pm_err; + + mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass); ret = soc_pcm_components_open(substream); if (ret < 0) @@ -832,26 +791,16 @@ static int __soc_pcm_open(struct snd_soc_pcm_runtime *rtd, snd_soc_runtime_activate(rtd, substream->stream); ret = 0; err: + mutex_unlock(&rtd->card->pcm_mutex); +pm_err: if (ret < 0) { - soc_pcm_clean(rtd, substream, 1); + soc_pcm_clean(substream, 1); dev_err(rtd->dev, "%s() failed (%d)", __func__, ret); } return ret; } -/* PCM open ops for non-DPCM streams */ -static int soc_pcm_open(struct snd_pcm_substream *substream) -{ - struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); - int ret; - - snd_soc_dpcm_mutex_lock(rtd); - ret = __soc_pcm_open(rtd, substream); - snd_soc_dpcm_mutex_unlock(rtd); - return ret; -} - static void codec2codec_close_delayed_work(struct snd_soc_pcm_runtime *rtd) { /* @@ -867,13 +816,13 @@ static void codec2codec_close_delayed_work(struct snd_soc_pcm_runtime *rtd) * rate, etc. This function is non atomic and can be called multiple times, * it can refer to the runtime info. */ -static int __soc_pcm_prepare(struct snd_soc_pcm_runtime *rtd, - struct snd_pcm_substream *substream) +static int soc_pcm_prepare(struct snd_pcm_substream *substream) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); struct snd_soc_dai *dai; int i, ret = 0; - snd_soc_dpcm_mutex_assert_held(rtd); + mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass); ret = snd_soc_link_prepare(substream); if (ret < 0) @@ -901,24 +850,14 @@ static int __soc_pcm_prepare(struct snd_soc_pcm_runtime *rtd, snd_soc_dai_digital_mute(dai, 0, substream->stream); out: + mutex_unlock(&rtd->card->pcm_mutex); + if (ret < 0) dev_err(rtd->dev, "ASoC: %s() failed (%d)\n", __func__, ret); return ret; } -/* PCM prepare ops for non-DPCM streams */ -static int soc_pcm_prepare(struct snd_pcm_substream *substream) -{ - struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); - int ret; - - snd_soc_dpcm_mutex_lock(rtd); - ret = __soc_pcm_prepare(rtd, substream); - snd_soc_dpcm_mutex_unlock(rtd); - return ret; -} - static void soc_pcm_codec_params_fixup(struct snd_pcm_hw_params *params, unsigned int mask) { @@ -930,20 +869,22 @@ static void soc_pcm_codec_params_fixup(struct snd_pcm_hw_params *params, interval->max = channels; } -static int soc_pcm_hw_clean(struct snd_soc_pcm_runtime *rtd, - struct snd_pcm_substream *substream, int rollback) +static int soc_pcm_hw_clean(struct snd_pcm_substream *substream, int rollback) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); struct snd_soc_dai *dai; int i; - snd_soc_dpcm_mutex_assert_held(rtd); + mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass); /* clear the corresponding DAIs parameters when going to be inactive */ for_each_rtd_dais(rtd, i, dai) { + int active = snd_soc_dai_stream_active(dai, substream->stream); + if (snd_soc_dai_active(dai) == 1) soc_pcm_set_dai_params(dai, NULL); - if (snd_soc_dai_stream_active(dai, substream->stream) == 1) + if (active == 1) snd_soc_dai_digital_mute(dai, 1, substream->stream); } @@ -957,32 +898,23 @@ static int soc_pcm_hw_clean(struct snd_soc_pcm_runtime *rtd, snd_soc_pcm_component_hw_free(substream, rollback); /* now free hw params for the DAIs */ - for_each_rtd_dais(rtd, i, dai) - if (snd_soc_dai_stream_valid(dai, substream->stream)) - snd_soc_dai_hw_free(dai, substream, rollback); + for_each_rtd_dais(rtd, i, dai) { + if (!snd_soc_dai_stream_valid(dai, substream->stream)) + continue; + snd_soc_dai_hw_free(dai, substream, rollback); + } + + mutex_unlock(&rtd->card->pcm_mutex); return 0; } /* * Frees resources allocated by hw_params, can be called multiple times */ -static int __soc_pcm_hw_free(struct snd_soc_pcm_runtime *rtd, - struct snd_pcm_substream *substream) -{ - return soc_pcm_hw_clean(rtd, substream, 0); -} - -/* hw_free PCM ops for non-DPCM streams */ static int soc_pcm_hw_free(struct snd_pcm_substream *substream) { - struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); - int ret; - - snd_soc_dpcm_mutex_lock(rtd); - ret = __soc_pcm_hw_free(rtd, substream); - snd_soc_dpcm_mutex_unlock(rtd); - return ret; + return soc_pcm_hw_clean(substream, 0); } /* @@ -990,15 +922,15 @@ static int soc_pcm_hw_free(struct snd_pcm_substream *substream) * function can also be called multiple times and can allocate buffers * (using snd_pcm_lib_* ). It's non-atomic. */ -static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, - struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) +static int soc_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); struct snd_soc_dai *cpu_dai; struct snd_soc_dai *codec_dai; int i, ret = 0; - snd_soc_dpcm_mutex_assert_held(rtd); + mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass); ret = soc_pcm_params_symmetry(substream, params); if (ret) @@ -1070,27 +1002,16 @@ static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, ret = snd_soc_pcm_component_hw_params(substream, params); out: + mutex_unlock(&rtd->card->pcm_mutex); + if (ret < 0) { - soc_pcm_hw_clean(rtd, substream, 1); + soc_pcm_hw_clean(substream, 1); dev_err(rtd->dev, "ASoC: %s() failed (%d)\n", __func__, ret); } return ret; } -/* hw_params PCM ops for non-DPCM streams */ -static int soc_pcm_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) -{ - struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); - int ret; - - snd_soc_dpcm_mutex_lock(rtd); - ret = __soc_pcm_hw_params(rtd, substream, params); - snd_soc_dpcm_mutex_unlock(rtd); - return ret; -} - static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); @@ -1164,22 +1085,41 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd) /* * soc level wrapper for pointer callback * If cpu_dai, codec_dai, component driver has the delay callback, then - * the runtime->delay will be updated via snd_soc_pcm_component/dai_delay(). + * the runtime->delay will be updated accordingly. */ static snd_pcm_uframes_t soc_pcm_pointer(struct snd_pcm_substream *substream) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); + struct snd_soc_dai *cpu_dai; + struct snd_soc_dai *codec_dai; struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_uframes_t offset = 0; + snd_pcm_sframes_t delay = 0; snd_pcm_sframes_t codec_delay = 0; snd_pcm_sframes_t cpu_delay = 0; + int i; + + /* clearing the previous total delay */ + runtime->delay = 0; offset = snd_soc_pcm_component_pointer(substream); - /* should be called *after* snd_soc_pcm_component_pointer() */ - snd_soc_pcm_dai_delay(substream, &cpu_delay, &codec_delay); - snd_soc_pcm_component_delay(substream, &cpu_delay, &codec_delay); + /* base delay if assigned in pointer callback */ + delay = runtime->delay; - runtime->delay = cpu_delay + codec_delay; + for_each_rtd_cpu_dais(rtd, i, cpu_dai) { + cpu_delay = max(cpu_delay, + snd_soc_dai_delay(cpu_dai, substream)); + } + delay += cpu_delay; + + for_each_rtd_codec_dais(rtd, i, codec_dai) { + codec_delay = max(codec_delay, + snd_soc_dai_delay(codec_dai, substream)); + } + delay += codec_delay; + + runtime->delay = delay; return offset; } @@ -1188,11 +1128,8 @@ static snd_pcm_uframes_t soc_pcm_pointer(struct snd_pcm_substream *substream) static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe, struct snd_soc_pcm_runtime *be, int stream) { - struct snd_pcm_substream *fe_substream; - struct snd_pcm_substream *be_substream; struct snd_soc_dpcm *dpcm; - - snd_soc_dpcm_mutex_assert_held(fe); + unsigned long flags; /* only add new dpcms */ for_each_dpcm_be(fe, stream, dpcm) { @@ -1200,21 +1137,7 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe, return 0; } - fe_substream = snd_soc_dpcm_get_substream(fe, stream); - be_substream = snd_soc_dpcm_get_substream(be, stream); - - if (!fe_substream->pcm->nonatomic && be_substream->pcm->nonatomic) { - dev_err(be->dev, "%s: FE is atomic but BE is nonatomic, invalid configuration\n", - __func__); - return -EINVAL; - } - if (fe_substream->pcm->nonatomic && !be_substream->pcm->nonatomic) { - dev_warn(be->dev, "%s: FE is nonatomic but BE is not, forcing BE as nonatomic\n", - __func__); - be_substream->pcm->nonatomic = 1; - } - - dpcm = kzalloc(sizeof(struct snd_soc_dpcm), GFP_ATOMIC); + dpcm = kzalloc(sizeof(struct snd_soc_dpcm), GFP_KERNEL); if (!dpcm) return -ENOMEM; @@ -1222,10 +1145,10 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe, dpcm->fe = fe; be->dpcm[stream].runtime = fe->dpcm[stream].runtime; dpcm->state = SND_SOC_DPCM_LINK_STATE_NEW; - snd_soc_dpcm_stream_lock_irq(fe, stream); + spin_lock_irqsave(&fe->card->dpcm_lock, flags); list_add(&dpcm->list_be, &fe->dpcm[stream].be_clients); list_add(&dpcm->list_fe, &be->dpcm[stream].fe_clients); - snd_soc_dpcm_stream_unlock_irq(fe, stream); + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); dev_dbg(fe->dev, "connected new DPCM %s path %s %s %s\n", stream ? "capture" : "playback", fe->dai_link->name, @@ -1268,11 +1191,8 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm, *d; - LIST_HEAD(deleted_dpcms); + unsigned long flags; - snd_soc_dpcm_mutex_assert_held(fe); - - snd_soc_dpcm_stream_lock_irq(fe, stream); for_each_dpcm_be_safe(fe, stream, dpcm, d) { dev_dbg(fe->dev, "ASoC: BE %s disconnect check for %s\n", stream ? "capture" : "playback", @@ -1288,16 +1208,12 @@ void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) /* BEs still alive need new FE */ dpcm_be_reparent(fe, dpcm->be, stream); - list_del(&dpcm->list_be); - list_move(&dpcm->list_fe, &deleted_dpcms); - } - snd_soc_dpcm_stream_unlock_irq(fe, stream); - - while (!list_empty(&deleted_dpcms)) { - dpcm = list_first_entry(&deleted_dpcms, struct snd_soc_dpcm, - list_fe); - list_del(&dpcm->list_fe); dpcm_remove_debugfs_state(dpcm); + + spin_lock_irqsave(&fe->card->dpcm_lock, flags); + list_del(&dpcm->list_be); + list_del(&dpcm->list_fe); + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); kfree(dpcm); } } @@ -1346,7 +1262,8 @@ static int widget_in_list(struct snd_soc_dapm_widget_list *list, return 0; } -bool dpcm_end_walk_at_be(struct snd_soc_dapm_widget *widget, enum snd_soc_dapm_direction dir) +static bool dpcm_end_walk_at_be(struct snd_soc_dapm_widget *widget, + enum snd_soc_dapm_direction dir) { struct snd_soc_card *card = widget->dapm->card; struct snd_soc_pcm_runtime *rtd; @@ -1364,7 +1281,6 @@ bool dpcm_end_walk_at_be(struct snd_soc_dapm_widget *widget, enum snd_soc_dapm_d return false; } -EXPORT_SYMBOL_GPL(dpcm_end_walk_at_be); int dpcm_path_get(struct snd_soc_pcm_runtime *fe, int stream, struct snd_soc_dapm_widget_list **list) @@ -1479,16 +1395,6 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, if (!fe->dpcm[stream].runtime && !fe->fe_compr) continue; - /* - * Filter for systems with 'component_chaining' enabled. - * This helps to avoid unnecessary re-configuration of an - * already active BE on such systems. - */ - if (fe->card->component_chaining && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_NEW) && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_CLOSE)) - continue; - /* newly connected FE and BE */ err = dpcm_be_connect(fe, be, stream); if (err < 0) { @@ -1523,9 +1429,12 @@ int dpcm_process_paths(struct snd_soc_pcm_runtime *fe, void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; + unsigned long flags; + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_be(fe, stream, dpcm) dpcm_set_be_update_state(dpcm->be, stream, SND_SOC_DPCM_UPDATE_NO); + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); } void dpcm_be_dai_stop(struct snd_soc_pcm_runtime *fe, int stream, @@ -1561,12 +1470,12 @@ void dpcm_be_dai_stop(struct snd_soc_pcm_runtime *fe, int stream, continue; if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_FREE) { - __soc_pcm_hw_free(be, be_substream); + soc_pcm_hw_free(be_substream); be->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE; } } - __soc_pcm_close(be, be_substream); + soc_pcm_close(be_substream); be_substream->runtime = NULL; be->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE; } @@ -1614,7 +1523,7 @@ int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream) stream ? "capture" : "playback", be->dai_link->name); be_substream->runtime = be->dpcm[stream].runtime; - err = __soc_pcm_open(be, be_substream); + err = soc_pcm_open(be_substream); if (err < 0) { be->dpcm[stream].users--; if (be->dpcm[stream].users < 0) @@ -1625,7 +1534,7 @@ int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream) be->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE; goto unwind; } - be->dpcm[stream].be_start = 0; + be->dpcm[stream].state = SND_SOC_DPCM_STATE_OPEN; count++; } @@ -1858,7 +1767,7 @@ static int dpcm_fe_dai_startup(struct snd_pcm_substream *fe_substream) dev_dbg(fe->dev, "ASoC: open FE %s\n", fe->dai_link->name); /* start the DAI frontend */ - ret = __soc_pcm_open(fe, fe_substream); + ret = soc_pcm_open(fe_substream); if (ret < 0) goto unwind; @@ -1889,8 +1798,6 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *fe = asoc_substream_to_rtd(substream); int stream = substream->stream; - snd_soc_dpcm_mutex_assert_held(fe); - dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); /* shutdown the BEs */ @@ -1899,7 +1806,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream) dev_dbg(fe->dev, "ASoC: close FE %s\n", fe->dai_link->name); /* now shutdown the frontend */ - __soc_pcm_close(fe, substream); + soc_pcm_close(substream); /* run the stream stop event */ dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP); @@ -1944,7 +1851,7 @@ void dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream) dev_dbg(be->dev, "ASoC: hw_free BE %s\n", be->dai_link->name); - __soc_pcm_hw_free(be, be_substream); + soc_pcm_hw_free(be_substream); be->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE; } @@ -1955,13 +1862,13 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *fe = asoc_substream_to_rtd(substream); int stream = substream->stream; - snd_soc_dpcm_mutex_lock(fe); + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); dev_dbg(fe->dev, "ASoC: hw_free FE %s\n", fe->dai_link->name); /* call hw_free on the frontend */ - soc_pcm_hw_clean(fe, substream, 0); + soc_pcm_hw_free(substream); /* only hw_params backends that are either sinks or sources * to this frontend DAI */ @@ -1970,7 +1877,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream) fe->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE; dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); - snd_soc_dpcm_mutex_unlock(fe); + mutex_unlock(&fe->card->mutex); return 0; } @@ -2014,7 +1921,7 @@ int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int stream) dev_dbg(be->dev, "ASoC: hw_params BE %s\n", be->dai_link->name); - ret = __soc_pcm_hw_params(be, be_substream, &dpcm->hw_params); + ret = soc_pcm_hw_params(be_substream, &dpcm->hw_params); if (ret < 0) goto unwind; @@ -2044,7 +1951,7 @@ int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int stream) (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP)) continue; - __soc_pcm_hw_free(be, be_substream); + soc_pcm_hw_free(be_substream); } return ret; @@ -2056,7 +1963,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *fe = asoc_substream_to_rtd(substream); int ret, stream = substream->stream; - snd_soc_dpcm_mutex_lock(fe); + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); memcpy(&fe->dpcm[stream].hw_params, params, @@ -2070,7 +1977,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream, params_channels(params), params_format(params)); /* call hw_params on the frontend */ - ret = __soc_pcm_hw_params(fe, substream, params); + ret = soc_pcm_hw_params(substream, params); if (ret < 0) dpcm_be_dai_hw_free(fe, stream); else @@ -2078,7 +1985,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream, out: dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); - snd_soc_dpcm_mutex_unlock(fe); + mutex_unlock(&fe->card->mutex); if (ret < 0) dev_err(fe->dev, "ASoC: %s failed (%d)\n", __func__, ret); @@ -2091,7 +1998,6 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, { struct snd_soc_pcm_runtime *be; struct snd_soc_dpcm *dpcm; - unsigned long flags; int ret = 0; for_each_dpcm_be(fe, stream, dpcm) { @@ -2100,128 +2006,89 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, be = dpcm->be; be_substream = snd_soc_dpcm_get_substream(be, stream); - snd_soc_dpcm_stream_lock_irqsave_nested(be, stream, flags); - /* is this op for this BE ? */ if (!snd_soc_dpcm_be_can_update(fe, be, stream)) - goto next; + continue; dev_dbg(be->dev, "ASoC: trigger BE %s cmd %d\n", be->dai_link->name, cmd); switch (cmd) { case SNDRV_PCM_TRIGGER_START: - if (!be->dpcm[stream].be_start && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) && + if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED)) - goto next; - - be->dpcm[stream].be_start++; - if (be->dpcm[stream].be_start != 1) - goto next; + continue; ret = soc_pcm_trigger(be_substream, cmd); - if (ret) { - be->dpcm[stream].be_start--; - goto next; - } + if (ret) + goto end; be->dpcm[stream].state = SND_SOC_DPCM_STATE_START; break; case SNDRV_PCM_TRIGGER_RESUME: if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_SUSPEND)) - goto next; - - be->dpcm[stream].be_start++; - if (be->dpcm[stream].be_start != 1) - goto next; + continue; ret = soc_pcm_trigger(be_substream, cmd); - if (ret) { - be->dpcm[stream].be_start--; - goto next; - } + if (ret) + goto end; be->dpcm[stream].state = SND_SOC_DPCM_STATE_START; break; case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - if (!be->dpcm[stream].be_start && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED)) - goto next; - - be->dpcm[stream].be_start++; - if (be->dpcm[stream].be_start != 1) - goto next; + if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED)) + continue; ret = soc_pcm_trigger(be_substream, cmd); - if (ret) { - be->dpcm[stream].be_start--; - goto next; - } + if (ret) + goto end; be->dpcm[stream].state = SND_SOC_DPCM_STATE_START; break; case SNDRV_PCM_TRIGGER_STOP: if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED)) - goto next; + continue; - if (be->dpcm[stream].state == SND_SOC_DPCM_STATE_START) - be->dpcm[stream].be_start--; - - if (be->dpcm[stream].be_start != 0) - goto next; + if (!snd_soc_dpcm_can_be_free_stop(fe, be, stream)) + continue; ret = soc_pcm_trigger(be_substream, cmd); - if (ret) { - if (be->dpcm[stream].state == SND_SOC_DPCM_STATE_START) - be->dpcm[stream].be_start++; - goto next; - } + if (ret) + goto end; be->dpcm[stream].state = SND_SOC_DPCM_STATE_STOP; break; case SNDRV_PCM_TRIGGER_SUSPEND: if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) - goto next; + continue; - be->dpcm[stream].be_start--; - if (be->dpcm[stream].be_start != 0) - goto next; + if (!snd_soc_dpcm_can_be_free_stop(fe, be, stream)) + continue; ret = soc_pcm_trigger(be_substream, cmd); - if (ret) { - be->dpcm[stream].be_start++; - goto next; - } + if (ret) + goto end; be->dpcm[stream].state = SND_SOC_DPCM_STATE_SUSPEND; break; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) - goto next; + continue; - be->dpcm[stream].be_start--; - if (be->dpcm[stream].be_start != 0) - goto next; + if (!snd_soc_dpcm_can_be_free_stop(fe, be, stream)) + continue; ret = soc_pcm_trigger(be_substream, cmd); - if (ret) { - be->dpcm[stream].be_start++; - goto next; - } + if (ret) + goto end; be->dpcm[stream].state = SND_SOC_DPCM_STATE_PAUSED; break; } -next: - snd_soc_dpcm_stream_unlock_irqrestore(be, stream, flags); - if (ret) - break; } +end: if (ret < 0) dev_err(fe->dev, "ASoC: %s() failed at %s (%d)\n", __func__, be->dai_link->name, ret); @@ -2389,7 +2256,7 @@ int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream) dev_dbg(be->dev, "ASoC: prepare BE %s\n", be->dai_link->name); - ret = __soc_pcm_prepare(be, be_substream); + ret = soc_pcm_prepare(be_substream); if (ret < 0) break; @@ -2407,7 +2274,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *fe = asoc_substream_to_rtd(substream); int stream = substream->stream, ret = 0; - snd_soc_dpcm_mutex_lock(fe); + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); dev_dbg(fe->dev, "ASoC: prepare FE %s\n", fe->dai_link->name); @@ -2426,7 +2293,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream) goto out; /* call prepare on the frontend */ - ret = __soc_pcm_prepare(fe, substream); + ret = soc_pcm_prepare(substream); if (ret < 0) goto out; @@ -2434,7 +2301,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream) out: dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); - snd_soc_dpcm_mutex_unlock(fe); + mutex_unlock(&fe->card->mutex); if (ret < 0) dev_err(fe->dev, "ASoC: %s() failed (%d)\n", __func__, ret); @@ -2485,6 +2352,7 @@ static int dpcm_run_update_startup(struct snd_soc_pcm_runtime *fe, int stream) struct snd_soc_dpcm *dpcm; enum snd_soc_dpcm_trigger trigger = fe->dai_link->trigger[stream]; int ret = 0; + unsigned long flags; dev_dbg(fe->dev, "ASoC: runtime %s open on FE %s\n", stream ? "capture" : "playback", fe->dai_link->name); @@ -2553,6 +2421,7 @@ static int dpcm_run_update_startup(struct snd_soc_pcm_runtime *fe, int stream) dpcm_be_dai_shutdown(fe, stream); disconnect: /* disconnect any pending BEs */ + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_be(fe, stream, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; @@ -2564,6 +2433,7 @@ static int dpcm_run_update_startup(struct snd_soc_pcm_runtime *fe, int stream) be->dpcm[stream].state == SND_SOC_DPCM_STATE_NEW) dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE; } + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); if (ret < 0) dev_err(fe->dev, "ASoC: %s() failed (%d)\n", __func__, ret); @@ -2638,7 +2508,7 @@ int snd_soc_dpcm_runtime_update(struct snd_soc_card *card) struct snd_soc_pcm_runtime *fe; int ret = 0; - mutex_lock_nested(&card->pcm_mutex, card->pcm_subclass); + mutex_lock_nested(&card->mutex, SND_SOC_CARD_CLASS_RUNTIME); /* shutdown all old paths first */ for_each_card_rtds(card, fe) { ret = soc_dpcm_fe_runtime_update(fe, 0); @@ -2654,7 +2524,7 @@ int snd_soc_dpcm_runtime_update(struct snd_soc_card *card) } out: - mutex_unlock(&card->pcm_mutex); + mutex_unlock(&card->mutex); return ret; } EXPORT_SYMBOL_GPL(snd_soc_dpcm_runtime_update); @@ -2665,8 +2535,6 @@ static void dpcm_fe_dai_cleanup(struct snd_pcm_substream *fe_substream) struct snd_soc_dpcm *dpcm; int stream = fe_substream->stream; - snd_soc_dpcm_mutex_assert_held(fe); - /* mark FE's links ready to prune */ for_each_dpcm_be(fe, stream, dpcm) dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE; @@ -2681,12 +2549,12 @@ static int dpcm_fe_dai_close(struct snd_pcm_substream *fe_substream) struct snd_soc_pcm_runtime *fe = asoc_substream_to_rtd(fe_substream); int ret; - snd_soc_dpcm_mutex_lock(fe); + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); ret = dpcm_fe_dai_shutdown(fe_substream); dpcm_fe_dai_cleanup(fe_substream); - snd_soc_dpcm_mutex_unlock(fe); + mutex_unlock(&fe->card->mutex); return ret; } @@ -2697,7 +2565,7 @@ static int dpcm_fe_dai_open(struct snd_pcm_substream *fe_substream) int ret; int stream = fe_substream->stream; - snd_soc_dpcm_mutex_lock(fe); + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); fe->dpcm[stream].runtime = fe_substream->runtime; ret = dpcm_path_get(fe, stream, &list); @@ -2714,7 +2582,7 @@ static int dpcm_fe_dai_open(struct snd_pcm_substream *fe_substream) dpcm_clear_pending_state(fe, stream); dpcm_path_put(&list); open_end: - snd_soc_dpcm_mutex_unlock(fe); + mutex_unlock(&fe->card->mutex); return ret; } @@ -2975,8 +2843,10 @@ static int snd_soc_dpcm_check_state(struct snd_soc_pcm_runtime *fe, struct snd_soc_dpcm *dpcm; int state; int ret = 1; + unsigned long flags; int i; + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_fe(be, stream, dpcm) { if (dpcm->fe == fe) @@ -2990,6 +2860,7 @@ static int snd_soc_dpcm_check_state(struct snd_soc_pcm_runtime *fe, } } } + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); /* it's safe to do this BE DAI */ return ret; diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 2630df024d..7459956d62 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -56,7 +56,7 @@ struct soc_tplg { const struct firmware *fw; /* runtime FW parsing */ - const u8 *pos; /* read position */ + const u8 *pos; /* read postion */ const u8 *hdr_pos; /* header position */ unsigned int pass; /* pass number */ @@ -78,7 +78,7 @@ struct soc_tplg { }; static int soc_tplg_process_headers(struct soc_tplg *tplg); -static int soc_tplg_complete(struct soc_tplg *tplg); +static void soc_tplg_complete(struct soc_tplg *tplg); /* check we dont overflow the data for this control chunk */ static int soc_tplg_check_elem_count(struct soc_tplg *tplg, size_t elem_size, @@ -312,12 +312,10 @@ static int soc_tplg_dai_link_load(struct soc_tplg *tplg, } /* tell the component driver that all firmware has been loaded in this request */ -static int soc_tplg_complete(struct soc_tplg *tplg) +static void soc_tplg_complete(struct soc_tplg *tplg) { if (tplg->ops && tplg->ops->complete) - return tplg->ops->complete(tplg->comp); - - return 0; + tplg->ops->complete(tplg->comp); } /* add a dynamic kcontrol */ @@ -351,7 +349,7 @@ static int soc_tplg_add_kcontrol(struct soc_tplg *tplg, struct snd_soc_component *comp = tplg->comp; return soc_tplg_add_dcontrol(comp->card->snd_card, - tplg->dev, k, comp->name_prefix, comp, kcontrol); + comp->dev, k, comp->name_prefix, comp, kcontrol); } /* remove a mixer kcontrol */ @@ -1475,6 +1473,10 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg, goto widget; } + control_hdr = (struct snd_soc_tplg_ctl_hdr *)tplg->pos; + dev_dbg(tplg->dev, "ASoC: template %s has %d controls of type %x\n", + w->name, w->num_kcontrols, control_hdr->type); + template.num_kcontrols = le32_to_cpu(w->num_kcontrols); kc = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(*kc), GFP_KERNEL); if (!kc) @@ -1485,7 +1487,7 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg, if (!kcontrol_type) goto err; - for (i = 0; i < le32_to_cpu(w->num_kcontrols); i++) { + for (i = 0; i < w->num_kcontrols; i++) { control_hdr = (struct snd_soc_tplg_ctl_hdr *)tplg->pos; switch (le32_to_cpu(control_hdr->ops.info)) { case SND_SOC_TPLG_CTL_VOLSW: @@ -1534,8 +1536,6 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg, } template.kcontrol_news = kc; - dev_dbg(tplg->dev, "ASoC: template %s with %d/%d/%d (mixer/enum/bytes) control\n", - w->name, mixer_count, enum_count, bytes_count); widget: ret = soc_tplg_widget_load(tplg, &template, w); @@ -1591,28 +1591,11 @@ static int soc_tplg_dapm_widget_elems_load(struct soc_tplg *tplg, struct snd_soc_tplg_dapm_widget *widget = (struct snd_soc_tplg_dapm_widget *) tplg->pos; int ret; - /* - * check if widget itself fits within topology file - * use sizeof instead of widget->size, as we can't be sure - * it is set properly yet (file may end before it is present) - */ - if (soc_tplg_get_offset(tplg) + sizeof(*widget) >= tplg->fw->size) { - dev_err(tplg->dev, "ASoC: invalid widget data size\n"); - return -EINVAL; - } - - /* check if widget has proper size */ if (le32_to_cpu(widget->size) != sizeof(*widget)) { dev_err(tplg->dev, "ASoC: invalid widget size\n"); return -EINVAL; } - /* check if widget private data fits within topology file */ - if (soc_tplg_get_offset(tplg) + le32_to_cpu(widget->priv.size) >= tplg->fw->size) { - dev_err(tplg->dev, "ASoC: invalid widget private data size\n"); - return -EINVAL; - } - ret = soc_tplg_dapm_widget_create(tplg, widget); if (ret < 0) { dev_err(tplg->dev, "ASoC: failed to load widget %s\n", @@ -2455,7 +2438,6 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, _manifest = manifest; } else { abi_match = false; - ret = manifest_new_ver(tplg, manifest, &_manifest); if (ret < 0) return ret; @@ -2486,14 +2468,6 @@ static int soc_valid_header(struct soc_tplg *tplg, return -EINVAL; } - if (soc_tplg_get_hdr_offset(tplg) + hdr->payload_size >= tplg->fw->size) { - dev_err(tplg->dev, - "ASoC: invalid header of type %d at offset %ld payload_size %d\n", - le32_to_cpu(hdr->type), soc_tplg_get_hdr_offset(tplg), - hdr->payload_size); - return -EINVAL; - } - /* big endian firmware objects not supported atm */ if (le32_to_cpu(hdr->magic) == SOC_TPLG_MAGIC_BIG_ENDIAN) { dev_err(tplg->dev, @@ -2653,7 +2627,7 @@ static int soc_tplg_load(struct soc_tplg *tplg) ret = soc_tplg_process_headers(tplg); if (ret == 0) - return soc_tplg_complete(tplg); + soc_tplg_complete(tplg); return ret; } @@ -2668,17 +2642,17 @@ int snd_soc_tplg_component_load(struct snd_soc_component *comp, /* * check if we have sane parameters: * comp - needs to exist to keep and reference data while parsing + * comp->dev - used for resource management and prints * comp->card - used for setting card related parameters - * comp->card->dev - used for resource management and prints * fw - we need it, as it is the very thing we parse */ - if (!comp || !comp->card || !comp->card->dev || !fw) + if (!comp || !comp->dev || !comp->card || !fw) return -EINVAL; /* setup parsing context */ memset(&tplg, 0, sizeof(tplg)); tplg.fw = fw; - tplg.dev = comp->card->dev; + tplg.dev = comp->dev; tplg.comp = comp; if (ops) { tplg.ops = ops; diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index a4efe7e52a..299b5d6ebf 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -63,23 +63,10 @@ static const struct snd_pcm_hardware dummy_dma_hardware = { .periods_max = 128, }; - -static const struct snd_soc_component_driver dummy_platform; - static int dummy_dma_open(struct snd_soc_component *component, struct snd_pcm_substream *substream) { struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); - int i; - - /* - * If there are other components associated with rtd, we shouldn't - * override their hwparams - */ - for_each_rtd_components(rtd, i, component) { - if (component->driver == &dummy_platform) - return 0; - } /* BE's dont need dummy params */ if (!rtd->dai_link->no_pcm) diff --git a/sound/soc/sof/Kconfig b/sound/soc/sof/Kconfig index 1a7d6cefd3..cd659493b5 100644 --- a/sound/soc/sof/Kconfig +++ b/sound/soc/sof/Kconfig @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -menuconfig SND_SOC_SOF_TOPLEVEL +config SND_SOC_SOF_TOPLEVEL bool "Sound Open Firmware Support" help This adds support for Sound Open Firmware (SOF). SOF is free and @@ -40,18 +40,12 @@ config SND_SOC_SOF_ACPI_DEV config SND_SOC_SOF_OF tristate "SOF OF enumeration support" depends on OF || COMPILE_TEST + select SND_SOC_SOF help This adds support for Device Tree enumeration. This option is - required to enable i.MX8 or Mediatek devices. + required to enable i.MX8 devices. Say Y if you need this option. If unsure select "N". -config SND_SOC_SOF_OF_DEV - tristate - -config SND_SOC_SOF_COMPRESS - bool - select SND_SOC_COMPRESS - config SND_SOC_SOF_DEBUG_PROBES bool "SOF enable data probing" select SND_SOC_COMPRESS @@ -63,7 +57,7 @@ config SND_SOC_SOF_DEBUG_PROBES config SND_SOC_SOF_DEVELOPER_SUPPORT bool "SOF developer options support" - depends on EXPERT && SND_SOC_SOF + depends on EXPERT help This option unlocks SOF developer options for debug/performance/ code hardening. @@ -194,14 +188,6 @@ config SND_SOC_SOF_DEBUG_IPC_FLOOD_TEST Say Y if you want to enable IPC flood test. If unsure, select "N". -config SND_SOC_SOF_DEBUG_IPC_MSG_INJECTOR - bool "SOF enable IPC message injector" - help - This option enables the IPC message injector which can be used to send - crafted IPC messages to the DSP to test its robustness. - Say Y if you want to enable the IPC message injector. - If unsure, select "N". - config SND_SOC_SOF_DEBUG_RETAIN_DSP_CONTEXT bool "SOF retain DSP context on any FW exceptions" help @@ -233,10 +219,8 @@ config SND_SOC_SOF_PROBE_WORK_QUEUE When selected, the probe is handled in two steps, for example to avoid lockdeps if request_module is used in the probe. -source "sound/soc/sof/amd/Kconfig" source "sound/soc/sof/imx/Kconfig" source "sound/soc/sof/intel/Kconfig" -source "sound/soc/sof/mediatek/Kconfig" source "sound/soc/sof/xtensa/Kconfig" endif diff --git a/sound/soc/sof/Makefile b/sound/soc/sof/Makefile index 964b429146..606d8137cd 100644 --- a/sound/soc/sof/Makefile +++ b/sound/soc/sof/Makefile @@ -1,10 +1,8 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) snd-sof-objs := core.o ops.o loader.o ipc.o pcm.o pm.o debug.o topology.o\ - control.o trace.o utils.o sof-audio.o stream-ipc.o - -snd-sof-$(CONFIG_SND_SOC_SOF_DEBUG_PROBES) += sof-probes.o -snd-sof-$(CONFIG_SND_SOC_SOF_COMPRESS) += compress.o + control.o trace.o utils.o sof-audio.o +snd-sof-$(CONFIG_SND_SOC_SOF_DEBUG_PROBES) += probe.o compress.o snd-sof-pci-objs := sof-pci-dev.o snd-sof-acpi-objs := sof-acpi-dev.o @@ -17,11 +15,9 @@ obj-$(CONFIG_SND_SOC_SOF_NOCODEC) += snd-sof-nocodec.o obj-$(CONFIG_SND_SOC_SOF_ACPI_DEV) += snd-sof-acpi.o -obj-$(CONFIG_SND_SOC_SOF_OF_DEV) += snd-sof-of.o +obj-$(CONFIG_SND_SOC_SOF_OF) += snd-sof-of.o obj-$(CONFIG_SND_SOC_SOF_PCI_DEV) += snd-sof-pci.o obj-$(CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL) += intel/ obj-$(CONFIG_SND_SOC_SOF_IMX_TOPLEVEL) += imx/ -obj-$(CONFIG_SND_SOC_SOF_AMD_TOPLEVEL) += amd/ obj-$(CONFIG_SND_SOC_SOF_XTENSA) += xtensa/ -obj-$(CONFIG_SND_SOC_SOF_MTK_TOPLEVEL) += mediatek/ diff --git a/sound/soc/sof/compress.c b/sound/soc/sof/compress.c index 01ca85f0b8..57d5bf0a17 100644 --- a/sound/soc/sof/compress.c +++ b/sound/soc/sof/compress.c @@ -1,51 +1,147 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) // -// Copyright 2021 NXP +// This file is provided under a dual BSD/GPLv2 license. When using or +// redistributing this file, you may do so under either license. +// +// Copyright(c) 2019-2020 Intel Corporation. All rights reserved. +// +// Author: Cezary Rojewski // -// Author: Daniel Baluta #include -#include -#include -#include "sof-audio.h" -#include "sof-priv.h" +#include "compress.h" +#include "ops.h" +#include "probe.h" -static void snd_sof_compr_fragment_elapsed_work(struct work_struct *work) +const struct snd_compress_ops sof_probe_compressed_ops = { + .copy = sof_probe_compr_copy, +}; +EXPORT_SYMBOL(sof_probe_compressed_ops); + +int sof_probe_compr_open(struct snd_compr_stream *cstream, + struct snd_soc_dai *dai) { - struct snd_sof_pcm_stream *sps = - container_of(work, struct snd_sof_pcm_stream, - period_elapsed_work); + struct snd_sof_dev *sdev = + snd_soc_component_get_drvdata(dai->component); + int ret; - snd_compr_fragment_elapsed(sps->cstream); -} - -void snd_sof_compr_init_elapsed_work(struct work_struct *work) -{ - INIT_WORK(work, snd_sof_compr_fragment_elapsed_work); -} - -/* - * sof compr fragment elapse, this could be called in irq thread context - */ -void snd_sof_compr_fragment_elapsed(struct snd_compr_stream *cstream) -{ - struct snd_soc_component *component; - struct snd_soc_pcm_runtime *rtd; - struct snd_sof_pcm *spcm; - - if (!cstream) - return; - - rtd = cstream->private_data; - component = snd_soc_rtdcom_lookup(rtd, SOF_AUDIO_PCM_DRV_NAME); - - spcm = snd_sof_find_spcm_dai(component, rtd); - if (!spcm) { - dev_err(component->dev, - "fragment elapsed called for unknown stream!\n"); - return; + ret = snd_sof_probe_compr_assign(sdev, cstream, dai); + if (ret < 0) { + dev_err(dai->dev, "Failed to assign probe stream: %d\n", ret); + return ret; } - /* use the same workqueue-based solution as for PCM, cf. snd_sof_pcm_elapsed */ - schedule_work(&spcm->stream[cstream->direction].period_elapsed_work); + sdev->extractor_stream_tag = ret; + return 0; } +EXPORT_SYMBOL(sof_probe_compr_open); + +int sof_probe_compr_free(struct snd_compr_stream *cstream, + struct snd_soc_dai *dai) +{ + struct snd_sof_dev *sdev = + snd_soc_component_get_drvdata(dai->component); + struct sof_probe_point_desc *desc; + size_t num_desc; + int i, ret; + + /* disconnect all probe points */ + ret = sof_ipc_probe_points_info(sdev, &desc, &num_desc); + if (ret < 0) { + dev_err(dai->dev, "Failed to get probe points: %d\n", ret); + goto exit; + } + + for (i = 0; i < num_desc; i++) + sof_ipc_probe_points_remove(sdev, &desc[i].buffer_id, 1); + kfree(desc); + +exit: + ret = sof_ipc_probe_deinit(sdev); + if (ret < 0) + dev_err(dai->dev, "Failed to deinit probe: %d\n", ret); + + sdev->extractor_stream_tag = SOF_PROBE_INVALID_NODE_ID; + snd_compr_free_pages(cstream); + + return snd_sof_probe_compr_free(sdev, cstream, dai); +} +EXPORT_SYMBOL(sof_probe_compr_free); + +int sof_probe_compr_set_params(struct snd_compr_stream *cstream, + struct snd_compr_params *params, struct snd_soc_dai *dai) +{ + struct snd_compr_runtime *rtd = cstream->runtime; + struct snd_sof_dev *sdev = + snd_soc_component_get_drvdata(dai->component); + int ret; + + cstream->dma_buffer.dev.type = SNDRV_DMA_TYPE_DEV_SG; + cstream->dma_buffer.dev.dev = sdev->dev; + ret = snd_compr_malloc_pages(cstream, rtd->buffer_size); + if (ret < 0) + return ret; + + ret = snd_sof_probe_compr_set_params(sdev, cstream, params, dai); + if (ret < 0) + return ret; + + ret = sof_ipc_probe_init(sdev, sdev->extractor_stream_tag, + rtd->dma_bytes); + if (ret < 0) { + dev_err(dai->dev, "Failed to init probe: %d\n", ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(sof_probe_compr_set_params); + +int sof_probe_compr_trigger(struct snd_compr_stream *cstream, int cmd, + struct snd_soc_dai *dai) +{ + struct snd_sof_dev *sdev = + snd_soc_component_get_drvdata(dai->component); + + return snd_sof_probe_compr_trigger(sdev, cstream, cmd, dai); +} +EXPORT_SYMBOL(sof_probe_compr_trigger); + +int sof_probe_compr_pointer(struct snd_compr_stream *cstream, + struct snd_compr_tstamp *tstamp, struct snd_soc_dai *dai) +{ + struct snd_sof_dev *sdev = + snd_soc_component_get_drvdata(dai->component); + + return snd_sof_probe_compr_pointer(sdev, cstream, tstamp, dai); +} +EXPORT_SYMBOL(sof_probe_compr_pointer); + +int sof_probe_compr_copy(struct snd_soc_component *component, + struct snd_compr_stream *cstream, + char __user *buf, size_t count) +{ + struct snd_compr_runtime *rtd = cstream->runtime; + unsigned int offset, n; + void *ptr; + int ret; + + if (count > rtd->buffer_size) + count = rtd->buffer_size; + + div_u64_rem(rtd->total_bytes_transferred, rtd->buffer_size, &offset); + ptr = rtd->dma_area + offset; + n = rtd->buffer_size - offset; + + if (count < n) { + ret = copy_to_user(buf, ptr, count); + } else { + ret = copy_to_user(buf, ptr, n); + ret += copy_to_user(buf + n, rtd->dma_area, count - n); + } + + if (ret) + return count - ret; + return count; +} +EXPORT_SYMBOL(sof_probe_compr_copy); diff --git a/sound/soc/sof/control.c b/sound/soc/sof/control.c index ef61936dad..a5dd728c58 100644 --- a/sound/soc/sof/control.c +++ b/sound/soc/sof/control.c @@ -65,32 +65,6 @@ static inline u32 ipc_to_mixer(u32 value, u32 *volume_map, int size) return i - 1; } -static void snd_sof_refresh_control(struct snd_sof_control *scontrol) -{ - struct sof_ipc_ctrl_data *cdata = scontrol->control_data; - struct snd_soc_component *scomp = scontrol->scomp; - int ret; - - if (!scontrol->comp_data_dirty) - return; - - if (!pm_runtime_active(scomp->dev)) - return; - - /* set the ABI header values */ - cdata->data->magic = SOF_ABI_MAGIC; - cdata->data->abi = SOF_ABI_VERSION; - - /* refresh the component data from DSP */ - scontrol->comp_data_dirty = false; - ret = snd_sof_ipc_set_get_comp_data(scontrol, false); - if (ret < 0) { - dev_err(scomp->dev, "error: failed to get control data: %d\n", ret); - /* Set the flag to re-try next time to get the data */ - scontrol->comp_data_dirty = true; - } -} - int snd_sof_volume_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -100,8 +74,6 @@ int snd_sof_volume_get(struct snd_kcontrol *kcontrol, struct sof_ipc_ctrl_data *cdata = scontrol->control_data; unsigned int i, channels = scontrol->num_channels; - snd_sof_refresh_control(scontrol); - /* read back each channel */ for (i = 0; i < channels; i++) ucontrol->value.integer.value[i] = @@ -134,7 +106,11 @@ int snd_sof_volume_put(struct snd_kcontrol *kcontrol, /* notify DSP of mixer updates */ if (pm_runtime_active(scomp->dev)) - snd_sof_ipc_set_get_comp_data(scontrol, true); + snd_sof_ipc_set_get_comp_data(scontrol, + SOF_IPC_COMP_SET_VALUE, + SOF_CTRL_TYPE_VALUE_CHAN_GET, + SOF_CTRL_CMD_VOLUME, + true); return change; } @@ -169,8 +145,6 @@ int snd_sof_switch_get(struct snd_kcontrol *kcontrol, struct sof_ipc_ctrl_data *cdata = scontrol->control_data; unsigned int i, channels = scontrol->num_channels; - snd_sof_refresh_control(scontrol); - /* read back each channel */ for (i = 0; i < channels; i++) ucontrol->value.integer.value[i] = cdata->chanv[i].value; @@ -203,7 +177,11 @@ int snd_sof_switch_put(struct snd_kcontrol *kcontrol, /* notify DSP of mixer updates */ if (pm_runtime_active(scomp->dev)) - snd_sof_ipc_set_get_comp_data(scontrol, true); + snd_sof_ipc_set_get_comp_data(scontrol, + SOF_IPC_COMP_SET_VALUE, + SOF_CTRL_TYPE_VALUE_CHAN_GET, + SOF_CTRL_CMD_SWITCH, + true); return change; } @@ -217,8 +195,6 @@ int snd_sof_enum_get(struct snd_kcontrol *kcontrol, struct sof_ipc_ctrl_data *cdata = scontrol->control_data; unsigned int i, channels = scontrol->num_channels; - snd_sof_refresh_control(scontrol); - /* read back each channel */ for (i = 0; i < channels; i++) ucontrol->value.enumerated.item[i] = cdata->chanv[i].value; @@ -248,7 +224,11 @@ int snd_sof_enum_put(struct snd_kcontrol *kcontrol, /* notify DSP of enum updates */ if (pm_runtime_active(scomp->dev)) - snd_sof_ipc_set_get_comp_data(scontrol, true); + snd_sof_ipc_set_get_comp_data(scontrol, + SOF_IPC_COMP_SET_VALUE, + SOF_CTRL_TYPE_VALUE_CHAN_GET, + SOF_CTRL_CMD_ENUM, + true); return change; } @@ -264,8 +244,6 @@ int snd_sof_bytes_get(struct snd_kcontrol *kcontrol, struct sof_abi_hdr *data = cdata->data; size_t size; - snd_sof_refresh_control(scontrol); - if (be->max > sizeof(ucontrol->value.bytes.data)) { dev_err_ratelimited(scomp->dev, "error: data max %d exceeds ucontrol data array size\n", @@ -322,7 +300,11 @@ int snd_sof_bytes_put(struct snd_kcontrol *kcontrol, /* notify DSP of byte control updates */ if (pm_runtime_active(scomp->dev)) - snd_sof_ipc_set_get_comp_data(scontrol, true); + snd_sof_ipc_set_get_comp_data(scontrol, + SOF_IPC_COMP_SET_DATA, + SOF_CTRL_TYPE_DATA_SET, + scontrol->cmd, + true); return 0; } @@ -367,7 +349,7 @@ int snd_sof_bytes_ext_put(struct snd_kcontrol *kcontrol, } /* Check that header id matches the command */ - if (header.numid != cdata->cmd) { + if (header.numid != scontrol->cmd) { dev_err_ratelimited(scomp->dev, "error: incorrect numid %d\n", header.numid); @@ -398,7 +380,11 @@ int snd_sof_bytes_ext_put(struct snd_kcontrol *kcontrol, /* notify DSP of byte control updates */ if (pm_runtime_active(scomp->dev)) - snd_sof_ipc_set_get_comp_data(scontrol, true); + snd_sof_ipc_set_get_comp_data(scontrol, + SOF_IPC_COMP_SET_DATA, + SOF_CTRL_TYPE_DATA_SET, + scontrol->cmd, + true); return 0; } @@ -435,7 +421,8 @@ int snd_sof_bytes_ext_volatile_get(struct snd_kcontrol *kcontrol, unsigned int _ cdata->data->magic = SOF_ABI_MAGIC; cdata->data->abi = SOF_ABI_VERSION; /* get all the component data from DSP */ - ret = snd_sof_ipc_set_get_comp_data(scontrol, false); + ret = snd_sof_ipc_set_get_comp_data(scontrol, SOF_IPC_COMP_GET_DATA, SOF_CTRL_TYPE_DATA_GET, + scontrol->cmd, false); if (ret < 0) goto out; @@ -456,7 +443,7 @@ int snd_sof_bytes_ext_volatile_get(struct snd_kcontrol *kcontrol, unsigned int _ goto out; } - header.numid = cdata->cmd; + header.numid = scontrol->cmd; header.length = data_size; if (copy_to_user(tlvd, &header, sizeof(struct snd_ctl_tlv))) { ret = -EFAULT; @@ -488,8 +475,6 @@ int snd_sof_bytes_ext_get(struct snd_kcontrol *kcontrol, (struct snd_ctl_tlv __user *)binary_data; size_t data_size; - snd_sof_refresh_control(scontrol); - /* * Decrement the limit by ext bytes header size to * ensure the user space buffer is not exceeded. @@ -516,7 +501,7 @@ int snd_sof_bytes_ext_get(struct snd_kcontrol *kcontrol, if (data_size > size) return -ENOSPC; - header.numid = cdata->cmd; + header.numid = scontrol->cmd; header.length = data_size; if (copy_to_user(tlvd, &header, sizeof(struct snd_ctl_tlv))) return -EFAULT; @@ -526,147 +511,3 @@ int snd_sof_bytes_ext_get(struct snd_kcontrol *kcontrol, return 0; } - -static void snd_sof_update_control(struct snd_sof_control *scontrol, - struct sof_ipc_ctrl_data *cdata) -{ - struct snd_soc_component *scomp = scontrol->scomp; - struct sof_ipc_ctrl_data *local_cdata; - int i; - - local_cdata = scontrol->control_data; - - if (cdata->cmd == SOF_CTRL_CMD_BINARY) { - if (cdata->num_elems != local_cdata->data->size) { - dev_err(scomp->dev, - "error: cdata binary size mismatch %u - %u\n", - cdata->num_elems, local_cdata->data->size); - return; - } - - /* copy the new binary data */ - memcpy(local_cdata->data, cdata->data, cdata->num_elems); - } else if (cdata->num_elems != scontrol->num_channels) { - dev_err(scomp->dev, - "error: cdata channel count mismatch %u - %d\n", - cdata->num_elems, scontrol->num_channels); - } else { - /* copy the new values */ - for (i = 0; i < cdata->num_elems; i++) - local_cdata->chanv[i].value = cdata->chanv[i].value; - } -} - -void snd_sof_control_notify(struct snd_sof_dev *sdev, - struct sof_ipc_ctrl_data *cdata) -{ - struct snd_soc_dapm_widget *widget; - struct snd_sof_control *scontrol; - struct snd_sof_widget *swidget; - struct snd_kcontrol *kc = NULL; - struct soc_mixer_control *sm; - struct soc_bytes_ext *be; - size_t expected_size; - struct soc_enum *se; - bool found = false; - int i, type; - - if (cdata->type == SOF_CTRL_TYPE_VALUE_COMP_GET || - cdata->type == SOF_CTRL_TYPE_VALUE_COMP_SET) { - dev_err(sdev->dev, - "Component data is not supported in control notification\n"); - return; - } - - /* Find the swidget first */ - list_for_each_entry(swidget, &sdev->widget_list, list) { - if (swidget->comp_id == cdata->comp_id) { - found = true; - break; - } - } - - if (!found) - return; - - /* Translate SOF cmd to TPLG type */ - switch (cdata->cmd) { - case SOF_CTRL_CMD_VOLUME: - case SOF_CTRL_CMD_SWITCH: - type = SND_SOC_TPLG_TYPE_MIXER; - break; - case SOF_CTRL_CMD_BINARY: - type = SND_SOC_TPLG_TYPE_BYTES; - break; - case SOF_CTRL_CMD_ENUM: - type = SND_SOC_TPLG_TYPE_ENUM; - break; - default: - dev_err(sdev->dev, "error: unknown cmd %u\n", cdata->cmd); - return; - } - - widget = swidget->widget; - for (i = 0; i < widget->num_kcontrols; i++) { - /* skip non matching types or non matching indexes within type */ - if (widget->dobj.widget.kcontrol_type[i] == type && - widget->kcontrol_news[i].index == cdata->index) { - kc = widget->kcontrols[i]; - break; - } - } - - if (!kc) - return; - - switch (cdata->cmd) { - case SOF_CTRL_CMD_VOLUME: - case SOF_CTRL_CMD_SWITCH: - sm = (struct soc_mixer_control *)kc->private_value; - scontrol = sm->dobj.private; - break; - case SOF_CTRL_CMD_BINARY: - be = (struct soc_bytes_ext *)kc->private_value; - scontrol = be->dobj.private; - break; - case SOF_CTRL_CMD_ENUM: - se = (struct soc_enum *)kc->private_value; - scontrol = se->dobj.private; - break; - default: - return; - } - - expected_size = sizeof(struct sof_ipc_ctrl_data); - switch (cdata->type) { - case SOF_CTRL_TYPE_VALUE_CHAN_GET: - case SOF_CTRL_TYPE_VALUE_CHAN_SET: - expected_size += cdata->num_elems * - sizeof(struct sof_ipc_ctrl_value_chan); - break; - case SOF_CTRL_TYPE_DATA_GET: - case SOF_CTRL_TYPE_DATA_SET: - expected_size += cdata->num_elems + sizeof(struct sof_abi_hdr); - break; - default: - return; - } - - if (cdata->rhdr.hdr.size != expected_size) { - dev_err(sdev->dev, "error: component notification size mismatch\n"); - return; - } - - if (cdata->num_elems) - /* - * The message includes the updated value/data, update the - * control's local cache using the received notification - */ - snd_sof_update_control(scontrol, cdata); - else - /* Mark the scontrol that the value/data is changed in SOF */ - scontrol->comp_data_dirty = true; - - snd_ctl_notify_one(swidget->scomp->card->snd_card, - SNDRV_CTL_EVENT_MASK_VALUE, kc, 0); -} diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index 8f32b5b12b..59d0d7b2b5 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -15,11 +15,11 @@ #include "sof-priv.h" #include "ops.h" #if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_PROBES) -#include "sof-probes.h" +#include "probe.h" #endif /* see SOF_DBG_ flags */ -static int sof_core_debug = IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_ENABLE_FIRMWARE_TRACE); +int sof_core_debug; module_param_named(sof_debug, sof_core_debug, int, 0444); MODULE_PARM_DESC(sof_debug, "SOF core debug options (0x0 all off)"); @@ -27,22 +27,6 @@ MODULE_PARM_DESC(sof_debug, "SOF core debug options (0x0 all off)"); #define TIMEOUT_DEFAULT_IPC_MS 500 #define TIMEOUT_DEFAULT_BOOT_MS 2000 -/** - * sof_debug_check_flag - check if a given flag(s) is set in sof_core_debug - * @mask: Flag or combination of flags to check - * - * Returns true if all bits set in mask is also set in sof_core_debug, otherwise - * false - */ -bool sof_debug_check_flag(int mask) -{ - if ((sof_core_debug & mask) == mask) - return true; - - return false; -} -EXPORT_SYMBOL(sof_debug_check_flag); - /* * FW Panic/fault handling. */ @@ -68,33 +52,23 @@ static const struct sof_panic_msg panic_msg[] = { {SOF_IPC_PANIC_ASSERT, "assertion failed"}, }; -/** - * sof_print_oops_and_stack - Handle the printing of DSP oops and stack trace - * @sdev: Pointer to the device's sdev - * @level: prink log level to use for the printing - * @panic_code: the panic code - * @tracep_code: tracepoint code - * @oops: Pointer to DSP specific oops data - * @panic_info: Pointer to the received panic information message - * @stack: Pointer to the call stack data - * @stack_words: Number of words in the stack data - * +/* * helper to be called from .dbg_dump callbacks. No error code is * provided, it's left as an exercise for the caller of .dbg_dump * (typically IPC or loader) */ -void sof_print_oops_and_stack(struct snd_sof_dev *sdev, const char *level, - u32 panic_code, u32 tracep_code, void *oops, - struct sof_ipc_panic_info *panic_info, - void *stack, size_t stack_words) +void snd_sof_get_status(struct snd_sof_dev *sdev, u32 panic_code, + u32 tracep_code, void *oops, + struct sof_ipc_panic_info *panic_info, + void *stack, size_t stack_words) { u32 code; int i; /* is firmware dead ? */ if ((panic_code & SOF_IPC_PANIC_MAGIC_MASK) != SOF_IPC_PANIC_MAGIC) { - dev_printk(level, sdev->dev, "unexpected fault %#010x trace %#010x\n", - panic_code, tracep_code); + dev_err(sdev->dev, "error: unexpected fault 0x%8.8x trace 0x%8.8x\n", + panic_code, tracep_code); return; /* no fault ? */ } @@ -102,55 +76,54 @@ void sof_print_oops_and_stack(struct snd_sof_dev *sdev, const char *level, for (i = 0; i < ARRAY_SIZE(panic_msg); i++) { if (panic_msg[i].id == code) { - dev_printk(level, sdev->dev, "reason: %s (%#x)\n", - panic_msg[i].msg, code & SOF_IPC_PANIC_CODE_MASK); - dev_printk(level, sdev->dev, "trace point: %#010x\n", tracep_code); + dev_err(sdev->dev, "error: %s\n", panic_msg[i].msg); + dev_err(sdev->dev, "error: trace point %8.8x\n", + tracep_code); goto out; } } /* unknown error */ - dev_printk(level, sdev->dev, "unknown panic code: %#x\n", - code & SOF_IPC_PANIC_CODE_MASK); - dev_printk(level, sdev->dev, "trace point: %#010x\n", tracep_code); + dev_err(sdev->dev, "error: unknown reason %8.8x\n", panic_code); + dev_err(sdev->dev, "error: trace point %8.8x\n", tracep_code); out: - dev_printk(level, sdev->dev, "panic at %s:%d\n", panic_info->filename, - panic_info->linenum); - sof_oops(sdev, level, oops); - sof_stack(sdev, level, oops, stack, stack_words); + dev_err(sdev->dev, "error: panic at %s:%d\n", + panic_info->filename, panic_info->linenum); + sof_oops(sdev, oops); + sof_stack(sdev, oops, stack, stack_words); } -EXPORT_SYMBOL(sof_print_oops_and_stack); +EXPORT_SYMBOL(snd_sof_get_status); /* * FW Boot State Transition Diagram * - * +----------------------------------------------------------------------+ - * | | - * ------------------ ------------------ | - * | | | | | - * | BOOT_FAILED |<-------| READY_FAILED | | - * | |<--+ | | ------------------ | - * ------------------ | ------------------ | | | - * ^ | ^ | CRASHED |---+ | - * | | | | | | | - * (FW Boot Timeout) | (FW_READY FAIL) ------------------ | | - * | | | ^ | | - * | | | |(DSP Panic) | | - * ------------------ | | ------------------ | | - * | | | | | | | | - * | IN_PROGRESS |---------------+------------->| COMPLETE | | | - * | | (FW Boot OK) (FW_READY OK) | | | | - * ------------------ | ------------------ | | - * ^ | | | | - * | | | | | - * (FW Loading OK) | (System Suspend/Runtime Suspend) - * | | | | | - * | (FW Loading Fail) | | | - * ------------------ | ------------------ | | | - * | | | | |<-----+ | | - * | PREPARE |---+ | NOT_STARTED |<---------------------+ | - * | | | |<--------------------------+ + * +-----------------------------------------------------------------------+ + * | | + * ------------------ ------------------ | + * | | | | | + * | BOOT_FAILED | | READY_FAILED |-------------------------+ | + * | | | | | | + * ------------------ ------------------ | | + * ^ ^ | | + * | | | | + * (FW Boot Timeout) (FW_READY FAIL) | | + * | | | | + * | | | | + * ------------------ | ------------------ | | + * | | | | | | | + * | IN_PROGRESS |---------------+------------->| COMPLETE | | | + * | | (FW Boot OK) (FW_READY OK) | | | | + * ------------------ ------------------ | | + * ^ | | | + * | | | | + * (FW Loading OK) (System Suspend/Runtime Suspend) + * | | | | + * | | | | + * ------------------ ------------------ | | | + * | | | |<-----+ | | + * | PREPARE | | NOT_STARTED |<---------------------+ | + * | | | |<---------------------------+ * ------------------ ------------------ * | ^ | ^ * | | | | @@ -174,7 +147,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) return ret; } - sof_set_fw_state(sdev, SOF_FW_BOOT_PREPARE); + sdev->fw_state = SOF_FW_BOOT_PREPARE; /* check machine info */ ret = sof_machine_check(sdev); @@ -213,11 +186,10 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) if (ret < 0) { dev_err(sdev->dev, "error: failed to load DSP firmware %d\n", ret); - sof_set_fw_state(sdev, SOF_FW_BOOT_FAILED); goto fw_load_err; } - sof_set_fw_state(sdev, SOF_FW_BOOT_IN_PROGRESS); + sdev->fw_state = SOF_FW_BOOT_IN_PROGRESS; /* * Boot the firmware. The FW boot status will be modified @@ -227,11 +199,11 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) if (ret < 0) { dev_err(sdev->dev, "error: failed to boot DSP firmware %d\n", ret); - sof_set_fw_state(sdev, SOF_FW_BOOT_FAILED); goto fw_run_err; } - if (sof_debug_check_flag(SOF_DBG_ENABLE_TRACE)) { + if (IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_ENABLE_FIRMWARE_TRACE) || + (sof_core_debug & SOF_DBG_ENABLE_TRACE)) { sdev->dtrace_is_supported = true; /* init DMA trace */ @@ -294,7 +266,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) snd_sof_remove(sdev); /* all resources freed, update state to match */ - sof_set_fw_state(sdev, SOF_FW_BOOT_NOT_STARTED); + sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; sdev->first_boot = true; return ret; @@ -329,7 +301,7 @@ int snd_sof_device_probe(struct device *dev, struct snd_sof_pdata *plat_data) sdev->pdata = plat_data; sdev->first_boot = true; - sof_set_fw_state(sdev, SOF_FW_BOOT_NOT_STARTED); + sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; #if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_PROBES) sdev->extractor_stream_tag = SOF_PROBE_INVALID_NODE_ID; #endif @@ -354,6 +326,9 @@ int snd_sof_device_probe(struct device *dev, struct snd_sof_pdata *plat_data) spin_lock_init(&sdev->hw_lock); mutex_init(&sdev->power_state_access); + if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)) + INIT_WORK(&sdev->probe_work, sof_probe_work); + /* set default timeouts if none provided */ if (plat_data->desc->ipc_timeout == 0) sdev->ipc_timeout = TIMEOUT_DEFAULT_IPC_MS; @@ -365,7 +340,6 @@ int snd_sof_device_probe(struct device *dev, struct snd_sof_pdata *plat_data) sdev->boot_timeout = plat_data->desc->boot_timeout; if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)) { - INIT_WORK(&sdev->probe_work, sof_probe_work); schedule_work(&sdev->probe_work); return 0; } @@ -391,15 +365,7 @@ int snd_sof_device_remove(struct device *dev) if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)) cancel_work_sync(&sdev->probe_work); - /* - * Unregister machine driver. This will unbind the snd_card which - * will remove the component driver and unload the topology - * before freeing the snd_card. - */ - snd_sof_machine_unregister(sdev, pdata); - if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) { - snd_sof_free_trace(sdev); ret = snd_sof_dsp_power_down_notify(sdev); if (ret < 0) dev_warn(dev, "error: %d failed to prepare DSP for device removal", @@ -407,8 +373,16 @@ int snd_sof_device_remove(struct device *dev) snd_sof_ipc_free(sdev); snd_sof_free_debug(sdev); + snd_sof_free_trace(sdev); } + /* + * Unregister machine driver. This will unbind the snd_card which + * will remove the component driver and unload the topology + * before freeing the snd_card. + */ + snd_sof_machine_unregister(sdev, pdata); + /* * Unregistering the machine driver results in unloading the topology. * Some widgets, ex: scheduler, attempt to power down the core they are diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c index 6d6757075f..a51a928ea4 100644 --- a/sound/soc/sof/debug.c +++ b/sound/soc/sof/debug.c @@ -20,7 +20,7 @@ #include "ops.h" #if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_PROBES) -#include "sof-probes.h" +#include "probe.h" /** * strsplit_u32 - Split string into sequence of u32 tokens @@ -336,104 +336,6 @@ static int sof_debug_ipc_flood_test(struct snd_sof_dev *sdev, } #endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_IPC_MSG_INJECTOR) -static ssize_t msg_inject_read(struct file *file, char __user *buffer, - size_t count, loff_t *ppos) -{ - struct snd_sof_dfsentry *dfse = file->private_data; - struct sof_ipc_reply *rhdr = dfse->msg_inject_rx; - - if (!rhdr->hdr.size || !count || *ppos) - return 0; - - if (count > rhdr->hdr.size) - count = rhdr->hdr.size; - - if (copy_to_user(buffer, dfse->msg_inject_rx, count)) - return -EFAULT; - - *ppos += count; - return count; -} - -static ssize_t msg_inject_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) -{ - struct snd_sof_dfsentry *dfse = file->private_data; - struct snd_sof_dev *sdev = dfse->sdev; - struct sof_ipc_cmd_hdr *hdr = dfse->msg_inject_tx; - size_t size; - int ret, err; - - if (*ppos) - return 0; - - size = simple_write_to_buffer(dfse->msg_inject_tx, SOF_IPC_MSG_MAX_SIZE, - ppos, buffer, count); - if (size != count) - return size > 0 ? -EFAULT : size; - - ret = pm_runtime_get_sync(sdev->dev); - if (ret < 0 && ret != -EACCES) { - dev_err_ratelimited(sdev->dev, "%s: DSP resume failed: %d\n", - __func__, ret); - pm_runtime_put_noidle(sdev->dev); - goto out; - } - - /* send the message */ - memset(dfse->msg_inject_rx, 0, SOF_IPC_MSG_MAX_SIZE); - ret = sof_ipc_tx_message(sdev->ipc, hdr->cmd, dfse->msg_inject_tx, count, - dfse->msg_inject_rx, SOF_IPC_MSG_MAX_SIZE); - - pm_runtime_mark_last_busy(sdev->dev); - err = pm_runtime_put_autosuspend(sdev->dev); - if (err < 0) - dev_err_ratelimited(sdev->dev, "%s: DSP idle failed: %d\n", - __func__, err); - - /* return size if test is successful */ - if (ret >= 0) - ret = size; - -out: - return ret; -} - -static const struct file_operations msg_inject_fops = { - .open = simple_open, - .read = msg_inject_read, - .write = msg_inject_write, - .llseek = default_llseek, -}; - -static int snd_sof_debugfs_msg_inject_item(struct snd_sof_dev *sdev, - const char *name, mode_t mode, - const struct file_operations *fops) -{ - struct snd_sof_dfsentry *dfse; - - dfse = devm_kzalloc(sdev->dev, sizeof(*dfse), GFP_KERNEL); - if (!dfse) - return -ENOMEM; - - /* pre allocate the tx and rx buffers */ - dfse->msg_inject_tx = devm_kzalloc(sdev->dev, SOF_IPC_MSG_MAX_SIZE, GFP_KERNEL); - dfse->msg_inject_rx = devm_kzalloc(sdev->dev, SOF_IPC_MSG_MAX_SIZE, GFP_KERNEL); - if (!dfse->msg_inject_tx || !dfse->msg_inject_rx) - return -ENOMEM; - - dfse->type = SOF_DFSENTRY_TYPE_BUF; - dfse->sdev = sdev; - - debugfs_create_file(name, mode, sdev->debugfs_root, dfse, fops); - /* add to dfsentry list */ - list_add(&dfse->list, &sdev->dfsentry_list); - - return 0; -} -#endif - static ssize_t sof_dfsentry_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { @@ -644,10 +546,10 @@ static const struct file_operations sof_dfs_fops = { }; /* create FS entry for debug files that can expose DSP memories, registers */ -static int snd_sof_debugfs_io_item(struct snd_sof_dev *sdev, - void __iomem *base, size_t size, - const char *name, - enum sof_debugfs_access_type access_type) +int snd_sof_debugfs_io_item(struct snd_sof_dev *sdev, + void __iomem *base, size_t size, + const char *name, + enum sof_debugfs_access_type access_type) { struct snd_sof_dfsentry *dfse; @@ -684,21 +586,7 @@ static int snd_sof_debugfs_io_item(struct snd_sof_dev *sdev, return 0; } - -int snd_sof_debugfs_add_region_item_iomem(struct snd_sof_dev *sdev, - enum snd_sof_fw_blk_type blk_type, u32 offset, - size_t size, const char *name, - enum sof_debugfs_access_type access_type) -{ - int bar = snd_sof_dsp_get_bar_index(sdev, blk_type); - - if (bar < 0) - return bar; - - return snd_sof_debugfs_io_item(sdev, sdev->bar[bar] + offset, size, name, - access_type); -} -EXPORT_SYMBOL_GPL(snd_sof_debugfs_add_region_item_iomem); +EXPORT_SYMBOL_GPL(snd_sof_debugfs_io_item); /* create FS entry for debug files to expose kernel memory */ int snd_sof_debugfs_buf_item(struct snd_sof_dev *sdev, @@ -910,15 +798,6 @@ int snd_sof_dbg_init(struct snd_sof_dev *sdev) return err; #endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_IPC_MSG_INJECTOR) - err = snd_sof_debugfs_msg_inject_item(sdev, "ipc_msg_inject", 0644, - &msg_inject_fops); - - /* errors are only due to memory allocation, not debugfs */ - if (err < 0) - return err; -#endif - return 0; } EXPORT_SYMBOL_GPL(snd_sof_dbg_init); @@ -929,86 +808,18 @@ void snd_sof_free_debug(struct snd_sof_dev *sdev) } EXPORT_SYMBOL_GPL(snd_sof_free_debug); -static const struct soc_fw_state_info { - enum sof_fw_state state; - const char *name; -} fw_state_dbg[] = { - {SOF_FW_BOOT_NOT_STARTED, "SOF_FW_BOOT_NOT_STARTED"}, - {SOF_FW_BOOT_PREPARE, "SOF_FW_BOOT_PREPARE"}, - {SOF_FW_BOOT_IN_PROGRESS, "SOF_FW_BOOT_IN_PROGRESS"}, - {SOF_FW_BOOT_FAILED, "SOF_FW_BOOT_FAILED"}, - {SOF_FW_BOOT_READY_FAILED, "SOF_FW_BOOT_READY_FAILED"}, - {SOF_FW_BOOT_READY_OK, "SOF_FW_BOOT_READY_OK"}, - {SOF_FW_BOOT_COMPLETE, "SOF_FW_BOOT_COMPLETE"}, - {SOF_FW_CRASHED, "SOF_FW_CRASHED"}, -}; - -static void snd_sof_dbg_print_fw_state(struct snd_sof_dev *sdev, const char *level) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(fw_state_dbg); i++) { - if (sdev->fw_state == fw_state_dbg[i].state) { - dev_printk(level, sdev->dev, "fw_state: %s (%d)\n", - fw_state_dbg[i].name, i); - return; - } - } - - dev_printk(level, sdev->dev, "fw_state: UNKNOWN (%d)\n", sdev->fw_state); -} - -void snd_sof_dsp_dbg_dump(struct snd_sof_dev *sdev, const char *msg, u32 flags) -{ - char *level = flags & SOF_DBG_DUMP_OPTIONAL ? KERN_DEBUG : KERN_ERR; - bool print_all = sof_debug_check_flag(SOF_DBG_PRINT_ALL_DUMPS); - - if (flags & SOF_DBG_DUMP_OPTIONAL && !print_all) - return; - - if (sof_ops(sdev)->dbg_dump && !sdev->dbg_dump_printed) { - dev_printk(level, sdev->dev, - "------------[ DSP dump start ]------------\n"); - if (msg) - dev_printk(level, sdev->dev, "%s\n", msg); - snd_sof_dbg_print_fw_state(sdev, level); - sof_ops(sdev)->dbg_dump(sdev, flags); - dev_printk(level, sdev->dev, - "------------[ DSP dump end ]------------\n"); - if (!print_all) - sdev->dbg_dump_printed = true; - } else if (msg) { - dev_printk(level, sdev->dev, "%s\n", msg); - } -} -EXPORT_SYMBOL(snd_sof_dsp_dbg_dump); - -static void snd_sof_ipc_dump(struct snd_sof_dev *sdev) -{ - if (sof_ops(sdev)->ipc_dump && !sdev->ipc_dump_printed) { - dev_err(sdev->dev, "------------[ IPC dump start ]------------\n"); - sof_ops(sdev)->ipc_dump(sdev); - dev_err(sdev->dev, "------------[ IPC dump end ]------------\n"); - if (!sof_debug_check_flag(SOF_DBG_PRINT_ALL_DUMPS)) - sdev->ipc_dump_printed = true; - } -} - void snd_sof_handle_fw_exception(struct snd_sof_dev *sdev) { if (IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_RETAIN_DSP_CONTEXT) || - sof_debug_check_flag(SOF_DBG_RETAIN_CTX)) { + (sof_core_debug & SOF_DBG_RETAIN_CTX)) { /* should we prevent DSP entering D3 ? */ - if (!sdev->ipc_dump_printed) - dev_info(sdev->dev, - "preventing DSP entering D3 state to preserve context\n"); + dev_info(sdev->dev, "info: preventing DSP entering D3 state to preserve context\n"); pm_runtime_get_noresume(sdev->dev); } /* dump vital information to the logs */ + snd_sof_dsp_dbg_dump(sdev, SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX); snd_sof_ipc_dump(sdev); - snd_sof_dsp_dbg_dump(sdev, "Firmware exception", - SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX); snd_sof_trace_notify_for_error(sdev); } EXPORT_SYMBOL(snd_sof_handle_fw_exception); diff --git a/sound/soc/sof/imx/Kconfig b/sound/soc/sof/imx/Kconfig index 9b8d5bb1e4..49d605cb09 100644 --- a/sound/soc/sof/imx/Kconfig +++ b/sound/soc/sof/imx/Kconfig @@ -11,33 +11,51 @@ config SND_SOC_SOF_IMX_TOPLEVEL if SND_SOC_SOF_IMX_TOPLEVEL -config SND_SOC_SOF_IMX_COMMON - tristate - select SND_SOC_SOF_OF_DEV - select SND_SOC_SOF - select SND_SOC_SOF_XTENSA - select SND_SOC_SOF_COMPRESS +config SND_SOC_SOF_IMX_OF + def_tristate SND_SOC_SOF_OF + select SND_SOC_SOF_IMX8 if SND_SOC_SOF_IMX8_SUPPORT + select SND_SOC_SOF_IMX8M if SND_SOC_SOF_IMX8M_SUPPORT help This option is not user-selectable but automagically handled by 'select' statements at a higher level. -config SND_SOC_SOF_IMX8 - tristate "SOF support for i.MX8" - depends on IMX_SCU - depends on IMX_DSP - select SND_SOC_SOF_IMX_COMMON +config SND_SOC_SOF_IMX_COMMON + tristate + help + This option is not user-selectable but automagically handled by + 'select' statements at a higher level. + +config SND_SOC_SOF_IMX8_SUPPORT + bool "SOF support for i.MX8" + depends on IMX_SCU=y || IMX_SCU=SND_SOC_SOF_IMX_OF + depends on IMX_DSP=y || IMX_DSP=SND_SOC_SOF_IMX_OF help This adds support for Sound Open Firmware for NXP i.MX8 platforms. Say Y if you have such a device. If unsure select "N". -config SND_SOC_SOF_IMX8M - tristate "SOF support for i.MX8M" - depends on IMX_DSP +config SND_SOC_SOF_IMX8 + tristate select SND_SOC_SOF_IMX_COMMON + select SND_SOC_SOF_XTENSA + help + This option is not user-selectable but automagically handled by + 'select' statements at a higher level. + +config SND_SOC_SOF_IMX8M_SUPPORT + bool "SOF support for i.MX8M" + depends on IMX_DSP=y || IMX_DSP=SND_SOC_SOF_OF help This adds support for Sound Open Firmware for NXP i.MX8M platforms. Say Y if you have such a device. If unsure select "N". -endif ## SND_SOC_SOF_IMX_TOPLEVEL +config SND_SOC_SOF_IMX8M + tristate + select SND_SOC_SOF_IMX_COMMON + select SND_SOC_SOF_XTENSA + help + This option is not user-selectable but automagically handled by + 'select' statements at a higher level. + +endif ## SND_SOC_SOF_IMX_IMX_TOPLEVEL diff --git a/sound/soc/sof/imx/imx-common.c b/sound/soc/sof/imx/imx-common.c index 36e3d414a1..8826ef94f0 100644 --- a/sound/soc/sof/imx/imx-common.c +++ b/sound/soc/sof/imx/imx-common.c @@ -69,33 +69,9 @@ void imx8_dump(struct snd_sof_dev *sdev, u32 flags) IMX8_STACK_DUMP_SIZE); /* Print the information to the console */ - sof_print_oops_and_stack(sdev, KERN_ERR, status, status, &xoops, - &panic_info, stack, IMX8_STACK_DUMP_SIZE); + snd_sof_get_status(sdev, status, status, &xoops, &panic_info, stack, + IMX8_STACK_DUMP_SIZE); } EXPORT_SYMBOL(imx8_dump); -int imx8_parse_clocks(struct snd_sof_dev *sdev, struct imx_clocks *clks) -{ - int ret; - - ret = devm_clk_bulk_get(sdev->dev, clks->num_dsp_clks, clks->dsp_clks); - if (ret) - dev_err(sdev->dev, "Failed to request DSP clocks\n"); - - return ret; -} -EXPORT_SYMBOL(imx8_parse_clocks); - -int imx8_enable_clocks(struct snd_sof_dev *sdev, struct imx_clocks *clks) -{ - return clk_bulk_prepare_enable(clks->num_dsp_clks, clks->dsp_clks); -} -EXPORT_SYMBOL(imx8_enable_clocks); - -void imx8_disable_clocks(struct snd_sof_dev *sdev, struct imx_clocks *clks) -{ - clk_bulk_disable_unprepare(clks->num_dsp_clks, clks->dsp_clks); -} -EXPORT_SYMBOL(imx8_disable_clocks); - MODULE_LICENSE("Dual BSD/GPL"); diff --git a/sound/soc/sof/imx/imx-common.h b/sound/soc/sof/imx/imx-common.h index ec4b3a5c74..1cc7d67041 100644 --- a/sound/soc/sof/imx/imx-common.h +++ b/sound/soc/sof/imx/imx-common.h @@ -3,8 +3,6 @@ #ifndef __IMX_COMMON_H__ #define __IMX_COMMON_H__ -#include - #define EXCEPT_MAX_HDR_SIZE 0x400 #define IMX8_STACK_DUMP_SIZE 32 @@ -15,13 +13,4 @@ void imx8_get_registers(struct snd_sof_dev *sdev, void imx8_dump(struct snd_sof_dev *sdev, u32 flags); -struct imx_clocks { - struct clk_bulk_data *dsp_clks; - int num_dsp_clks; -}; - -int imx8_parse_clocks(struct snd_sof_dev *sdev, struct imx_clocks *clks); -int imx8_enable_clocks(struct snd_sof_dev *sdev, struct imx_clocks *clks); -void imx8_disable_clocks(struct snd_sof_dev *sdev, struct imx_clocks *clks); - #endif diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c index f6baecbb57..7e9723a10d 100644 --- a/sound/soc/sof/imx/imx8.c +++ b/sound/soc/sof/imx/imx8.c @@ -21,7 +21,6 @@ #include #include #include "../ops.h" -#include "../sof-of-dev.h" #include "imx-common.h" /* DSP memories */ @@ -41,13 +40,6 @@ #define MBOX_OFFSET 0x800000 #define MBOX_SIZE 0x1000 -/* DSP clocks */ -static struct clk_bulk_data imx8_dsp_clks[] = { - { .id = "ipg" }, - { .id = "ocram" }, - { .id = "core" }, -}; - struct imx8_priv { struct device *dev; struct snd_sof_dev *sdev; @@ -64,9 +56,42 @@ struct imx8_priv { struct device **pd_dev; struct device_link **link; - struct imx_clocks *clks; }; +static void imx8_get_reply(struct snd_sof_dev *sdev) +{ + struct snd_sof_ipc_msg *msg = sdev->msg; + struct sof_ipc_reply reply; + int ret = 0; + + if (!msg) { + dev_warn(sdev->dev, "unexpected ipc interrupt\n"); + return; + } + + /* get reply */ + sof_mailbox_read(sdev, sdev->host_box.offset, &reply, sizeof(reply)); + + if (reply.error < 0) { + memcpy(msg->reply_data, &reply, sizeof(reply)); + ret = reply.error; + } else { + /* reply has correct size? */ + if (reply.hdr.size != msg->reply_size) { + dev_err(sdev->dev, "error: reply expected %zu got %u bytes\n", + msg->reply_size, reply.hdr.size); + ret = -EINVAL; + } + + /* read the message */ + if (msg->reply_size > 0) + sof_mailbox_read(sdev, sdev->host_box.offset, + msg->reply_data, msg->reply_size); + } + + msg->reply_error = ret; +} + static int imx8_get_mailbox_offset(struct snd_sof_dev *sdev) { return MBOX_OFFSET; @@ -83,7 +108,8 @@ static void imx8_dsp_handle_reply(struct imx_dsp_ipc *ipc) unsigned long flags; spin_lock_irqsave(&priv->sdev->ipc_lock, flags); - snd_sof_ipc_process_reply(priv->sdev, 0); + imx8_get_reply(priv->sdev); + snd_sof_ipc_reply(priv->sdev, 0); spin_unlock_irqrestore(&priv->sdev->ipc_lock, flags); } @@ -97,7 +123,7 @@ static void imx8_dsp_handle_request(struct imx_dsp_ipc *ipc) /* Check to see if the message is a panic code (0x0dead***) */ if ((p & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) - snd_sof_dsp_panic(priv->sdev, p, true); + snd_sof_dsp_panic(priv->sdev, p); else snd_sof_ipc_msgs_rx(priv->sdev); } @@ -196,11 +222,6 @@ static int imx8_probe(struct snd_sof_dev *sdev) if (!priv) return -ENOMEM; - priv->clks = devm_kzalloc(&pdev->dev, sizeof(*priv->clks), GFP_KERNEL); - if (!priv->clks) - return -ENOMEM; - - sdev->num_cores = 1; sdev->pdata->hw_pdata = priv; priv->dev = sdev->dev; priv->sdev = sdev; @@ -313,18 +334,6 @@ static int imx8_probe(struct snd_sof_dev *sdev) /* set default mailbox offset for FW ready message */ sdev->dsp_box.offset = MBOX_OFFSET; - /* init clocks info */ - priv->clks->dsp_clks = imx8_dsp_clks; - priv->clks->num_dsp_clks = ARRAY_SIZE(imx8_dsp_clks); - - ret = imx8_parse_clocks(sdev, priv->clks); - if (ret < 0) - goto exit_pdev_unregister; - - ret = imx8_enable_clocks(sdev, priv->clks); - if (ret < 0) - goto exit_pdev_unregister; - return 0; exit_pdev_unregister: @@ -343,7 +352,6 @@ static int imx8_remove(struct snd_sof_dev *sdev) struct imx8_priv *priv = sdev->pdata->hw_pdata; int i; - imx8_disable_clocks(sdev, priv->clks); platform_device_unregister(priv->ipc_dev); for (i = 0; i < priv->num_domains; i++) { @@ -367,92 +375,20 @@ static int imx8_get_bar_index(struct snd_sof_dev *sdev, u32 type) } } -static void imx8_suspend(struct snd_sof_dev *sdev) +static void imx8_ipc_msg_data(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream, + void *p, size_t sz) { - int i; - struct imx8_priv *priv = (struct imx8_priv *)sdev->pdata->hw_pdata; - - for (i = 0; i < DSP_MU_CHAN_NUM; i++) - imx_dsp_free_channel(priv->dsp_ipc, i); - - imx8_disable_clocks(sdev, priv->clks); + sof_mailbox_read(sdev, sdev->dsp_box.offset, p, sz); } -static int imx8_resume(struct snd_sof_dev *sdev) +static int imx8_ipc_pcm_params(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream, + const struct sof_ipc_pcm_params_reply *reply) { - struct imx8_priv *priv = (struct imx8_priv *)sdev->pdata->hw_pdata; - int ret; - int i; - - ret = imx8_enable_clocks(sdev, priv->clks); - if (ret < 0) - return ret; - - for (i = 0; i < DSP_MU_CHAN_NUM; i++) - imx_dsp_request_channel(priv->dsp_ipc, i); - return 0; } -static int imx8_dsp_runtime_resume(struct snd_sof_dev *sdev) -{ - int ret; - const struct sof_dsp_power_state target_dsp_state = { - .state = SOF_DSP_PM_D0, - }; - - ret = imx8_resume(sdev); - if (ret < 0) - return ret; - - return snd_sof_dsp_set_power_state(sdev, &target_dsp_state); -} - -static int imx8_dsp_runtime_suspend(struct snd_sof_dev *sdev) -{ - const struct sof_dsp_power_state target_dsp_state = { - .state = SOF_DSP_PM_D3, - }; - - imx8_suspend(sdev); - - return snd_sof_dsp_set_power_state(sdev, &target_dsp_state); -} - -static int imx8_dsp_suspend(struct snd_sof_dev *sdev, unsigned int target_state) -{ - const struct sof_dsp_power_state target_dsp_state = { - .state = target_state, - }; - - if (!pm_runtime_suspended(sdev->dev)) - imx8_suspend(sdev); - - return snd_sof_dsp_set_power_state(sdev, &target_dsp_state); -} - -static int imx8_dsp_resume(struct snd_sof_dev *sdev) -{ - int ret; - const struct sof_dsp_power_state target_dsp_state = { - .state = SOF_DSP_PM_D0, - }; - - ret = imx8_resume(sdev); - if (ret < 0) - return ret; - - if (pm_runtime_suspended(sdev->dev)) { - pm_runtime_disable(sdev->dev); - pm_runtime_set_active(sdev->dev); - pm_runtime_mark_last_busy(sdev->dev); - pm_runtime_enable(sdev->dev); - pm_runtime_idle(sdev->dev); - } - - return snd_sof_dsp_set_power_state(sdev, &target_dsp_state); -} - static struct snd_soc_dai_driver imx8_dai[] = { { .name = "esai0", @@ -478,16 +414,8 @@ static struct snd_soc_dai_driver imx8_dai[] = { }, }; -static int imx8_dsp_set_power_state(struct snd_sof_dev *sdev, - const struct sof_dsp_power_state *target_state) -{ - sdev->dsp_power_state = *target_state; - - return 0; -} - /* i.MX8 ops */ -static const struct snd_sof_dsp_ops sof_imx8_ops = { +struct snd_sof_dsp_ops sof_imx8_ops = { /* probe and remove */ .probe = imx8_probe, .remove = imx8_remove, @@ -498,9 +426,8 @@ static const struct snd_sof_dsp_ops sof_imx8_ops = { .block_read = sof_block_read, .block_write = sof_block_write, - /* Mailbox IO */ - .mailbox_read = sof_mailbox_read, - .mailbox_write = sof_mailbox_write, + /* Module IO */ + .read64 = sof_io_read64, /* ipc */ .send_msg = imx8_send_msg, @@ -508,8 +435,8 @@ static const struct snd_sof_dsp_ops sof_imx8_ops = { .get_mailbox_offset = imx8_get_mailbox_offset, .get_window_offset = imx8_get_window_offset, - .ipc_msg_data = sof_ipc_msg_data, - .ipc_pcm_params = sof_ipc_pcm_params, + .ipc_msg_data = imx8_ipc_msg_data, + .ipc_pcm_params = imx8_ipc_pcm_params, /* module loading */ .load_module = snd_sof_parse_module_memcpy, @@ -519,14 +446,9 @@ static const struct snd_sof_dsp_ops sof_imx8_ops = { /* Debug information */ .dbg_dump = imx8_dump, - .debugfs_add_region_item = snd_sof_debugfs_add_region_item_iomem, - - /* stream callbacks */ - .pcm_open = sof_stream_pcm_open, - .pcm_close = sof_stream_pcm_close, /* Firmware ops */ - .dsp_arch_ops = &sof_xtensa_arch_ops, + .arch_ops = &sof_xtensa_arch_ops, /* DAI drivers */ .drv = imx8_dai, @@ -538,19 +460,11 @@ static const struct snd_sof_dsp_ops sof_imx8_ops = { SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_NO_PERIOD_WAKEUP, - - /* PM */ - .runtime_suspend = imx8_dsp_runtime_suspend, - .runtime_resume = imx8_dsp_runtime_resume, - - .suspend = imx8_dsp_suspend, - .resume = imx8_dsp_resume, - - .set_power_state = imx8_dsp_set_power_state, }; +EXPORT_SYMBOL(sof_imx8_ops); /* i.MX8X ops */ -static const struct snd_sof_dsp_ops sof_imx8x_ops = { +struct snd_sof_dsp_ops sof_imx8x_ops = { /* probe and remove */ .probe = imx8_probe, .remove = imx8_remove, @@ -561,9 +475,8 @@ static const struct snd_sof_dsp_ops sof_imx8x_ops = { .block_read = sof_block_read, .block_write = sof_block_write, - /* Mailbox IO */ - .mailbox_read = sof_mailbox_read, - .mailbox_write = sof_mailbox_write, + /* Module IO */ + .read64 = sof_io_read64, /* ipc */ .send_msg = imx8_send_msg, @@ -571,8 +484,8 @@ static const struct snd_sof_dsp_ops sof_imx8x_ops = { .get_mailbox_offset = imx8_get_mailbox_offset, .get_window_offset = imx8_get_window_offset, - .ipc_msg_data = sof_ipc_msg_data, - .ipc_pcm_params = sof_ipc_pcm_params, + .ipc_msg_data = imx8_ipc_msg_data, + .ipc_pcm_params = imx8_ipc_pcm_params, /* module loading */ .load_module = snd_sof_parse_module_memcpy, @@ -582,28 +495,14 @@ static const struct snd_sof_dsp_ops sof_imx8x_ops = { /* Debug information */ .dbg_dump = imx8_dump, - .debugfs_add_region_item = snd_sof_debugfs_add_region_item_iomem, - - /* stream callbacks */ - .pcm_open = sof_stream_pcm_open, - .pcm_close = sof_stream_pcm_close, /* Firmware ops */ - .dsp_arch_ops = &sof_xtensa_arch_ops, + .arch_ops = &sof_xtensa_arch_ops, /* DAI drivers */ .drv = imx8_dai, .num_drv = ARRAY_SIZE(imx8_dai), - /* PM */ - .runtime_suspend = imx8_dsp_runtime_suspend, - .runtime_resume = imx8_dsp_runtime_resume, - - .suspend = imx8_dsp_suspend, - .resume = imx8_dsp_resume, - - .set_power_state = imx8_dsp_set_power_state, - /* ALSA HW info flags */ .hw_info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | @@ -611,41 +510,7 @@ static const struct snd_sof_dsp_ops sof_imx8x_ops = { SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_NO_PERIOD_WAKEUP }; - -static struct sof_dev_desc sof_of_imx8qxp_desc = { - .default_fw_path = "imx/sof", - .default_tplg_path = "imx/sof-tplg", - .default_fw_filename = "sof-imx8x.ri", - .nocodec_tplg_filename = "sof-imx8-nocodec.tplg", - .ops = &sof_imx8x_ops, -}; - -static struct sof_dev_desc sof_of_imx8qm_desc = { - .default_fw_path = "imx/sof", - .default_tplg_path = "imx/sof-tplg", - .default_fw_filename = "sof-imx8.ri", - .nocodec_tplg_filename = "sof-imx8-nocodec.tplg", - .ops = &sof_imx8_ops, -}; - -static const struct of_device_id sof_of_imx8_ids[] = { - { .compatible = "fsl,imx8qxp-dsp", .data = &sof_of_imx8qxp_desc}, - { .compatible = "fsl,imx8qm-dsp", .data = &sof_of_imx8qm_desc}, - { } -}; -MODULE_DEVICE_TABLE(of, sof_of_imx8_ids); - -/* DT driver definition */ -static struct platform_driver snd_sof_of_imx8_driver = { - .probe = sof_of_probe, - .remove = sof_of_remove, - .driver = { - .name = "sof-audio-of-imx8", - .pm = &sof_of_pm, - .of_match_table = sof_of_imx8_ids, - }, -}; -module_platform_driver(snd_sof_of_imx8_driver); +EXPORT_SYMBOL(sof_imx8x_ops); MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c index 788e77bcb6..892e1482f9 100644 --- a/sound/soc/sof/imx/imx8m.c +++ b/sound/soc/sof/imx/imx8m.c @@ -6,13 +6,10 @@ // // Hardware interface for audio DSP on i.MX8M -#include #include -#include #include #include #include -#include #include #include @@ -20,32 +17,11 @@ #include #include "../ops.h" -#include "../sof-of-dev.h" #include "imx-common.h" #define MBOX_OFFSET 0x800000 #define MBOX_SIZE 0x1000 -static struct clk_bulk_data imx8m_dsp_clks[] = { - { .id = "ipg" }, - { .id = "ocram" }, - { .id = "core" }, -}; - -/* DAP registers */ -#define IMX8M_DAP_DEBUG 0x28800000 -#define IMX8M_DAP_DEBUG_SIZE (64 * 1024) -#define IMX8M_DAP_PWRCTL (0x4000 + 0x3020) -#define IMX8M_PWRCTL_CORERESET BIT(16) - -/* DSP audio mix registers */ -#define AudioDSP_REG0 0x100 -#define AudioDSP_REG1 0x104 -#define AudioDSP_REG2 0x108 -#define AudioDSP_REG3 0x10c - -#define AudioDSP_REG2_RUNSTALL BIT(5) - struct imx8m_priv { struct device *dev; struct snd_sof_dev *sdev; @@ -53,13 +29,42 @@ struct imx8m_priv { /* DSP IPC handler */ struct imx_dsp_ipc *dsp_ipc; struct platform_device *ipc_dev; - - struct imx_clocks *clks; - - void __iomem *dap; - struct regmap *regmap; }; +static void imx8m_get_reply(struct snd_sof_dev *sdev) +{ + struct snd_sof_ipc_msg *msg = sdev->msg; + struct sof_ipc_reply reply; + int ret = 0; + + if (!msg) { + dev_warn(sdev->dev, "unexpected ipc interrupt\n"); + return; + } + + /* get reply */ + sof_mailbox_read(sdev, sdev->host_box.offset, &reply, sizeof(reply)); + + if (reply.error < 0) { + memcpy(msg->reply_data, &reply, sizeof(reply)); + ret = reply.error; + } else { + /* reply has correct size? */ + if (reply.hdr.size != msg->reply_size) { + dev_err(sdev->dev, "error: reply expected %zu got %u bytes\n", + msg->reply_size, reply.hdr.size); + ret = -EINVAL; + } + + /* read the message */ + if (msg->reply_size > 0) + sof_mailbox_read(sdev, sdev->host_box.offset, + msg->reply_data, msg->reply_size); + } + + msg->reply_error = ret; +} + static int imx8m_get_mailbox_offset(struct snd_sof_dev *sdev) { return MBOX_OFFSET; @@ -76,7 +81,8 @@ static void imx8m_dsp_handle_reply(struct imx_dsp_ipc *ipc) unsigned long flags; spin_lock_irqsave(&priv->sdev->ipc_lock, flags); - snd_sof_ipc_process_reply(priv->sdev, 0); + imx8m_get_reply(priv->sdev); + snd_sof_ipc_reply(priv->sdev, 0); spin_unlock_irqrestore(&priv->sdev->ipc_lock, flags); } @@ -90,7 +96,7 @@ static void imx8m_dsp_handle_request(struct imx_dsp_ipc *ipc) /* Check to see if the message is a panic code (0x0dead***) */ if ((p & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) - snd_sof_dsp_panic(priv->sdev, p, true); + snd_sof_dsp_panic(priv->sdev, p); else snd_sof_ipc_msgs_rx(priv->sdev); } @@ -116,34 +122,7 @@ static int imx8m_send_msg(struct snd_sof_dev *sdev, struct snd_sof_ipc_msg *msg) */ static int imx8m_run(struct snd_sof_dev *sdev) { - struct imx8m_priv *priv = (struct imx8m_priv *)sdev->pdata->hw_pdata; - - regmap_update_bits(priv->regmap, AudioDSP_REG2, AudioDSP_REG2_RUNSTALL, 0); - - return 0; -} - -static int imx8m_reset(struct snd_sof_dev *sdev) -{ - struct imx8m_priv *priv = (struct imx8m_priv *)sdev->pdata->hw_pdata; - u32 pwrctl; - - /* put DSP into reset and stall */ - pwrctl = readl(priv->dap + IMX8M_DAP_PWRCTL); - pwrctl |= IMX8M_PWRCTL_CORERESET; - writel(pwrctl, priv->dap + IMX8M_DAP_PWRCTL); - - /* keep reset asserted for 10 cycles */ - usleep_range(1, 2); - - regmap_update_bits(priv->regmap, AudioDSP_REG2, - AudioDSP_REG2_RUNSTALL, AudioDSP_REG2_RUNSTALL); - - /* take the DSP out of reset and keep stalled for FW loading */ - pwrctl = readl(priv->dap + IMX8M_DAP_PWRCTL); - pwrctl &= ~IMX8M_PWRCTL_CORERESET; - writel(pwrctl, priv->dap + IMX8M_DAP_PWRCTL); - + /* TODO: start DSP using Audio MIX bits */ return 0; } @@ -163,11 +142,6 @@ static int imx8m_probe(struct snd_sof_dev *sdev) if (!priv) return -ENOMEM; - priv->clks = devm_kzalloc(&pdev->dev, sizeof(*priv->clks), GFP_KERNEL); - if (!priv->clks) - return -ENOMEM; - - sdev->num_cores = 1; sdev->pdata->hw_pdata = priv; priv->dev = sdev->dev; priv->sdev = sdev; @@ -200,13 +174,6 @@ static int imx8m_probe(struct snd_sof_dev *sdev) goto exit_pdev_unregister; } - priv->dap = devm_ioremap(sdev->dev, IMX8M_DAP_DEBUG, IMX8M_DAP_DEBUG_SIZE); - if (!priv->dap) { - dev_err(sdev->dev, "error: failed to map DAP debug memory area"); - ret = -ENODEV; - goto exit_pdev_unregister; - } - sdev->bar[SOF_FW_BLK_TYPE_IRAM] = devm_ioremap(sdev->dev, base, size); if (!sdev->bar[SOF_FW_BLK_TYPE_IRAM]) { dev_err(sdev->dev, "failed to ioremap base 0x%x size 0x%x\n", @@ -242,25 +209,6 @@ static int imx8m_probe(struct snd_sof_dev *sdev) /* set default mailbox offset for FW ready message */ sdev->dsp_box.offset = MBOX_OFFSET; - priv->regmap = syscon_regmap_lookup_by_compatible("fsl,dsp-ctrl"); - if (IS_ERR(priv->regmap)) { - dev_err(sdev->dev, "cannot find dsp-ctrl registers"); - ret = PTR_ERR(priv->regmap); - goto exit_pdev_unregister; - } - - /* init clocks info */ - priv->clks->dsp_clks = imx8m_dsp_clks; - priv->clks->num_dsp_clks = ARRAY_SIZE(imx8m_dsp_clks); - - ret = imx8_parse_clocks(sdev, priv->clks); - if (ret < 0) - goto exit_pdev_unregister; - - ret = imx8_enable_clocks(sdev, priv->clks); - if (ret < 0) - goto exit_pdev_unregister; - return 0; exit_pdev_unregister: @@ -272,7 +220,6 @@ static int imx8m_remove(struct snd_sof_dev *sdev) { struct imx8m_priv *priv = sdev->pdata->hw_pdata; - imx8_disable_clocks(sdev, priv->clks); platform_device_unregister(priv->ipc_dev); return 0; @@ -291,18 +238,21 @@ static int imx8m_get_bar_index(struct snd_sof_dev *sdev, u32 type) } } -static struct snd_soc_dai_driver imx8m_dai[] = { +static void imx8m_ipc_msg_data(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream, + void *p, size_t sz) { - .name = "sai1", - .playback = { - .channels_min = 1, - .channels_max = 32, - }, - .capture = { - .channels_min = 1, - .channels_max = 32, - }, -}, + sof_mailbox_read(sdev, sdev->dsp_box.offset, p, sz); +} + +static int imx8m_ipc_pcm_params(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream, + const struct sof_ipc_pcm_params_reply *reply) +{ + return 0; +} + +static struct snd_soc_dai_driver imx8m_dai[] = { { .name = "sai3", .playback = { @@ -316,116 +266,20 @@ static struct snd_soc_dai_driver imx8m_dai[] = { }, }; -static int imx8m_dsp_set_power_state(struct snd_sof_dev *sdev, - const struct sof_dsp_power_state *target_state) -{ - sdev->dsp_power_state = *target_state; - - return 0; -} - -static int imx8m_resume(struct snd_sof_dev *sdev) -{ - struct imx8m_priv *priv = (struct imx8m_priv *)sdev->pdata->hw_pdata; - int ret; - int i; - - ret = imx8_enable_clocks(sdev, priv->clks); - if (ret < 0) - return ret; - - for (i = 0; i < DSP_MU_CHAN_NUM; i++) - imx_dsp_request_channel(priv->dsp_ipc, i); - - return 0; -} - -static void imx8m_suspend(struct snd_sof_dev *sdev) -{ - struct imx8m_priv *priv = (struct imx8m_priv *)sdev->pdata->hw_pdata; - int i; - - for (i = 0; i < DSP_MU_CHAN_NUM; i++) - imx_dsp_free_channel(priv->dsp_ipc, i); - - imx8_disable_clocks(sdev, priv->clks); -} - -static int imx8m_dsp_runtime_resume(struct snd_sof_dev *sdev) -{ - int ret; - const struct sof_dsp_power_state target_dsp_state = { - .state = SOF_DSP_PM_D0, - }; - - ret = imx8m_resume(sdev); - if (ret < 0) - return ret; - - return snd_sof_dsp_set_power_state(sdev, &target_dsp_state); -} - -static int imx8m_dsp_runtime_suspend(struct snd_sof_dev *sdev) -{ - const struct sof_dsp_power_state target_dsp_state = { - .state = SOF_DSP_PM_D3, - }; - - imx8m_suspend(sdev); - - return snd_sof_dsp_set_power_state(sdev, &target_dsp_state); -} - -static int imx8m_dsp_resume(struct snd_sof_dev *sdev) -{ - int ret; - const struct sof_dsp_power_state target_dsp_state = { - .state = SOF_DSP_PM_D0, - }; - - ret = imx8m_resume(sdev); - if (ret < 0) - return ret; - - if (pm_runtime_suspended(sdev->dev)) { - pm_runtime_disable(sdev->dev); - pm_runtime_set_active(sdev->dev); - pm_runtime_mark_last_busy(sdev->dev); - pm_runtime_enable(sdev->dev); - pm_runtime_idle(sdev->dev); - } - - return snd_sof_dsp_set_power_state(sdev, &target_dsp_state); -} - -static int imx8m_dsp_suspend(struct snd_sof_dev *sdev, unsigned int target_state) -{ - const struct sof_dsp_power_state target_dsp_state = { - .state = target_state, - }; - - if (!pm_runtime_suspended(sdev->dev)) - imx8m_suspend(sdev); - - return snd_sof_dsp_set_power_state(sdev, &target_dsp_state); -} - /* i.MX8 ops */ -static const struct snd_sof_dsp_ops sof_imx8m_ops = { +struct snd_sof_dsp_ops sof_imx8m_ops = { /* probe and remove */ .probe = imx8m_probe, .remove = imx8m_remove, /* DSP core boot */ .run = imx8m_run, - .reset = imx8m_reset, /* Block IO */ .block_read = sof_block_read, .block_write = sof_block_write, - /* Mailbox IO */ - .mailbox_read = sof_mailbox_read, - .mailbox_write = sof_mailbox_write, + /* Module IO */ + .read64 = sof_io_read64, /* ipc */ .send_msg = imx8m_send_msg, @@ -433,8 +287,8 @@ static const struct snd_sof_dsp_ops sof_imx8m_ops = { .get_mailbox_offset = imx8m_get_mailbox_offset, .get_window_offset = imx8m_get_window_offset, - .ipc_msg_data = sof_ipc_msg_data, - .ipc_pcm_params = sof_ipc_pcm_params, + .ipc_msg_data = imx8m_ipc_msg_data, + .ipc_pcm_params = imx8m_ipc_pcm_params, /* module loading */ .load_module = snd_sof_parse_module_memcpy, @@ -444,58 +298,21 @@ static const struct snd_sof_dsp_ops sof_imx8m_ops = { /* Debug information */ .dbg_dump = imx8_dump, - .debugfs_add_region_item = snd_sof_debugfs_add_region_item_iomem, - /* stream callbacks */ - .pcm_open = sof_stream_pcm_open, - .pcm_close = sof_stream_pcm_close, /* Firmware ops */ - .dsp_arch_ops = &sof_xtensa_arch_ops, + .arch_ops = &sof_xtensa_arch_ops, /* DAI drivers */ .drv = imx8m_dai, .num_drv = ARRAY_SIZE(imx8m_dai), - .suspend = imx8m_dsp_suspend, - .resume = imx8m_dsp_resume, - - .runtime_suspend = imx8m_dsp_runtime_suspend, - .runtime_resume = imx8m_dsp_runtime_resume, - - .set_power_state = imx8m_dsp_set_power_state, - .hw_info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_NO_PERIOD_WAKEUP, }; - -static struct sof_dev_desc sof_of_imx8mp_desc = { - .default_fw_path = "imx/sof", - .default_tplg_path = "imx/sof-tplg", - .default_fw_filename = "sof-imx8m.ri", - .nocodec_tplg_filename = "sof-imx8-nocodec.tplg", - .ops = &sof_imx8m_ops, -}; - -static const struct of_device_id sof_of_imx8m_ids[] = { - { .compatible = "fsl,imx8mp-dsp", .data = &sof_of_imx8mp_desc}, - { } -}; -MODULE_DEVICE_TABLE(of, sof_of_imx8m_ids); - -/* DT driver definition */ -static struct platform_driver snd_sof_of_imx8m_driver = { - .probe = sof_of_probe, - .remove = sof_of_remove, - .driver = { - .name = "sof-audio-of-imx8m", - .pm = &sof_of_pm, - .of_match_table = sof_of_imx8m_ids, - }, -}; -module_platform_driver(snd_sof_of_imx8m_driver); +EXPORT_SYMBOL(sof_imx8m_ops); MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/sound/soc/sof/intel/Makefile b/sound/soc/sof/intel/Makefile index 1f473d4d84..feae487f02 100644 --- a/sound/soc/sof/intel/Makefile +++ b/sound/soc/sof/intel/Makefile @@ -3,11 +3,13 @@ snd-sof-acpi-intel-byt-objs := byt.o snd-sof-acpi-intel-bdw-objs := bdw.o +snd-sof-intel-ipc-objs := intel-ipc.o + snd-sof-intel-hda-common-objs := hda.o hda-loader.o hda-stream.o hda-trace.o \ hda-dsp.o hda-ipc.o hda-ctrl.o hda-pcm.o \ hda-dai.o hda-bus.o \ apl.o cnl.o tgl.o icl.o -snd-sof-intel-hda-common-$(CONFIG_SND_SOC_SOF_HDA_PROBES) += hda-probes.o +snd-sof-intel-hda-common-$(CONFIG_SND_SOC_SOF_HDA_PROBES) += hda-compress.o snd-sof-intel-hda-objs := hda-codec.o @@ -16,6 +18,7 @@ snd-sof-intel-atom-objs := atom.o obj-$(CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP) += snd-sof-intel-atom.o obj-$(CONFIG_SND_SOC_SOF_BAYTRAIL) += snd-sof-acpi-intel-byt.o obj-$(CONFIG_SND_SOC_SOF_BROADWELL) += snd-sof-acpi-intel-bdw.o +obj-$(CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC) += snd-sof-intel-ipc.o obj-$(CONFIG_SND_SOC_SOF_HDA_COMMON) += snd-sof-intel-hda-common.o obj-$(CONFIG_SND_SOC_SOF_HDA) += snd-sof-intel-hda.o diff --git a/sound/soc/sof/intel/apl.c b/sound/soc/sof/intel/apl.c index 810b8b6748..c7ed2b3d6a 100644 --- a/sound/soc/sof/intel/apl.c +++ b/sound/soc/sof/intel/apl.c @@ -42,10 +42,6 @@ const struct snd_sof_dsp_ops sof_apl_ops = { .block_read = sof_block_read, .block_write = sof_block_write, - /* Mailbox IO */ - .mailbox_read = sof_mailbox_read, - .mailbox_write = sof_mailbox_write, - /* doorbell */ .irq_thread = hda_dsp_ipc_irq_thread, @@ -69,7 +65,6 @@ const struct snd_sof_dsp_ops sof_apl_ops = { .debug_map_count = ARRAY_SIZE(apl_dsp_debugfs), .dbg_dump = hda_dsp_dump, .ipc_dump = hda_ipc_dump, - .debugfs_add_region_item = snd_sof_debugfs_add_region_item_iomem, /* stream callbacks */ .pcm_open = hda_dsp_pcm_open, @@ -78,7 +73,6 @@ const struct snd_sof_dsp_ops sof_apl_ops = { .pcm_hw_free = hda_dsp_stream_hw_free, .pcm_trigger = hda_dsp_pcm_trigger, .pcm_pointer = hda_dsp_pcm_pointer, - .pcm_ack = hda_dsp_pcm_ack, #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_PROBES) /* probe callbacks */ @@ -102,8 +96,9 @@ const struct snd_sof_dsp_ops sof_apl_ops = { /* parse platform specific extended manifest */ .parse_platform_ext_manifest = hda_dsp_ext_man_get_cavs_config_data, - /* dsp core get/put */ - .core_get = hda_dsp_core_get, + /* dsp core power up/down */ + .core_power_up = hda_dsp_enable_core, + .core_power_down = hda_dsp_core_reset_power_down, /* trace callback */ .trace_init = hda_dsp_trace_init, @@ -130,7 +125,7 @@ const struct snd_sof_dsp_ops sof_apl_ops = { SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_NO_PERIOD_WAKEUP, - .dsp_arch_ops = &sof_xtensa_arch_ops, + .arch_ops = &sof_xtensa_arch_ops, }; EXPORT_SYMBOL_NS(sof_apl_ops, SND_SOC_SOF_INTEL_HDA_COMMON); @@ -147,6 +142,5 @@ const struct sof_intel_dsp_desc apl_chip_info = { .rom_init_timeout = 150, .ssp_count = APL_SSP_COUNT, .ssp_base_offset = APL_SSP_BASE_OFFSET, - .quirks = SOF_INTEL_PROCEN_FMT_QUIRK, }; EXPORT_SYMBOL_NS(apl_chip_info, SND_SOC_SOF_INTEL_HDA_COMMON); diff --git a/sound/soc/sof/intel/atom.c b/sound/soc/sof/intel/atom.c index ff5900b155..d8804efede 100644 --- a/sound/soc/sof/intel/atom.c +++ b/sound/soc/sof/intel/atom.c @@ -27,6 +27,7 @@ static void atom_host_done(struct snd_sof_dev *sdev); static void atom_dsp_done(struct snd_sof_dev *sdev); +static void atom_get_reply(struct snd_sof_dev *sdev); /* * Debug @@ -70,8 +71,8 @@ void atom_dump(struct snd_sof_dev *sdev, u32 flags) panic = snd_sof_dsp_read64(sdev, DSP_BAR, SHIM_IPCX); atom_get_registers(sdev, &xoops, &panic_info, stack, STACK_DUMP_SIZE); - sof_print_oops_and_stack(sdev, KERN_ERR, status, panic, &xoops, - &panic_info, stack, STACK_DUMP_SIZE); + snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, stack, + STACK_DUMP_SIZE); /* provide some context for firmware debug */ imrx = snd_sof_dsp_read64(sdev, DSP_BAR, SHIM_IMRX); @@ -153,7 +154,8 @@ irqreturn_t atom_irq_thread(int irq, void *context) * because the done bit can't be set in cmd_done function * which is triggered by msg */ - snd_sof_ipc_process_reply(sdev, ipcx); + atom_get_reply(sdev); + snd_sof_ipc_reply(sdev, ipcx); atom_dsp_done(sdev); @@ -165,8 +167,8 @@ irqreturn_t atom_irq_thread(int irq, void *context) /* Handle messages from DSP Core */ if ((ipcd & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) { - snd_sof_dsp_panic(sdev, PANIC_OFFSET(ipcd) + MBOX_OFFSET, - true); + snd_sof_dsp_panic(sdev, PANIC_OFFSET(ipcd) + + MBOX_OFFSET); } else { snd_sof_ipc_msgs_rx(sdev); } @@ -193,6 +195,45 @@ int atom_send_msg(struct snd_sof_dev *sdev, struct snd_sof_ipc_msg *msg) } EXPORT_SYMBOL_NS(atom_send_msg, SND_SOC_SOF_INTEL_ATOM_HIFI_EP); +static void atom_get_reply(struct snd_sof_dev *sdev) +{ + struct snd_sof_ipc_msg *msg = sdev->msg; + struct sof_ipc_reply reply; + int ret = 0; + + /* + * Sometimes, there is unexpected reply ipc arriving. The reply + * ipc belongs to none of the ipcs sent from driver. + * In this case, the driver must ignore the ipc. + */ + if (!msg) { + dev_warn(sdev->dev, "unexpected ipc interrupt raised!\n"); + return; + } + + /* get reply */ + sof_mailbox_read(sdev, sdev->host_box.offset, &reply, sizeof(reply)); + + if (reply.error < 0) { + memcpy(msg->reply_data, &reply, sizeof(reply)); + ret = reply.error; + } else { + /* reply correct size ? */ + if (reply.hdr.size != msg->reply_size) { + dev_err(sdev->dev, "error: reply expected %zu got %u bytes\n", + msg->reply_size, reply.hdr.size); + ret = -EINVAL; + } + + /* read the message */ + if (msg->reply_size > 0) + sof_mailbox_read(sdev, sdev->host_box.offset, + msg->reply_data, msg->reply_size); + } + + msg->reply_error = ret; +} + int atom_get_mailbox_offset(struct snd_sof_dev *sdev) { return MBOX_OFFSET; @@ -242,8 +283,11 @@ int atom_run(struct snd_sof_dev *sdev) break; msleep(100); } - if (tries < 0) + if (tries < 0) { + dev_err(sdev->dev, "error: unable to run DSP firmware\n"); + atom_dump(sdev, SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX); return -ENODEV; + } /* return init core mask */ return 1; @@ -293,7 +337,7 @@ static const char *fixup_tplg_name(struct snd_sof_dev *sdev, return tplg_filename; } -struct snd_soc_acpi_mach *atom_machine_select(struct snd_sof_dev *sdev) +void atom_machine_select(struct snd_sof_dev *sdev) { struct snd_sof_pdata *sof_pdata = sdev->pdata; const struct sof_dev_desc *desc = sof_pdata->desc; @@ -304,7 +348,7 @@ struct snd_soc_acpi_mach *atom_machine_select(struct snd_sof_dev *sdev) mach = snd_soc_acpi_find_machine(desc->machines); if (!mach) { dev_warn(sdev->dev, "warning: No matching ASoC machine driver found\n"); - return NULL; + return; } pdev = to_platform_device(sdev->dev); @@ -322,13 +366,12 @@ struct snd_soc_acpi_mach *atom_machine_select(struct snd_sof_dev *sdev) if (!tplg_filename) { dev_dbg(sdev->dev, "error: no topology filename\n"); - return NULL; + return; } sof_pdata->tplg_filename = tplg_filename; mach->mach_params.acpi_ipc_irq_index = desc->irqindex_host_ipc; - - return mach; + sof_pdata->machine = mach; } EXPORT_SYMBOL_NS(atom_machine_select, SND_SOC_SOF_INTEL_ATOM_HIFI_EP); @@ -403,14 +446,14 @@ struct snd_soc_dai_driver atom_dai[] = { }; EXPORT_SYMBOL_NS(atom_dai, SND_SOC_SOF_INTEL_ATOM_HIFI_EP); -void atom_set_mach_params(struct snd_soc_acpi_mach *mach, +void atom_set_mach_params(const struct snd_soc_acpi_mach *mach, struct snd_sof_dev *sdev) { struct snd_sof_pdata *pdata = sdev->pdata; const struct sof_dev_desc *desc = pdata->desc; struct snd_soc_acpi_mach_params *mach_params; - mach_params = &mach->mach_params; + mach_params = (struct snd_soc_acpi_mach_params *)&mach->mach_params; mach_params->platform = dev_name(sdev->dev); mach_params->num_dai_drivers = desc->ops->num_drv; mach_params->dai_drivers = desc->ops->drv; diff --git a/sound/soc/sof/intel/atom.h b/sound/soc/sof/intel/atom.h index b965e5e080..96a462c7a2 100644 --- a/sound/soc/sof/intel/atom.h +++ b/sound/soc/sof/intel/atom.h @@ -65,8 +65,8 @@ int atom_run(struct snd_sof_dev *sdev); int atom_reset(struct snd_sof_dev *sdev); void atom_dump(struct snd_sof_dev *sdev, u32 flags); -struct snd_soc_acpi_mach *atom_machine_select(struct snd_sof_dev *sdev); -void atom_set_mach_params(struct snd_soc_acpi_mach *mach, +void atom_machine_select(struct snd_sof_dev *sdev); +void atom_set_mach_params(const struct snd_soc_acpi_mach *mach, struct snd_sof_dev *sdev); extern struct snd_soc_dai_driver atom_dai[]; diff --git a/sound/soc/sof/intel/bdw.c b/sound/soc/sof/intel/bdw.c index d627b7498d..89a6c1f04a 100644 --- a/sound/soc/sof/intel/bdw.c +++ b/sound/soc/sof/intel/bdw.c @@ -75,6 +75,7 @@ static const struct snd_sof_debugfs_map bdw_debugfs[] = { static void bdw_host_done(struct snd_sof_dev *sdev); static void bdw_dsp_done(struct snd_sof_dev *sdev); +static void bdw_get_reply(struct snd_sof_dev *sdev); /* * DSP Control. @@ -258,8 +259,8 @@ static void bdw_dump(struct snd_sof_dev *sdev, u32 flags) panic = snd_sof_dsp_read(sdev, BDW_DSP_BAR, SHIM_IPCX); bdw_get_registers(sdev, &xoops, &panic_info, stack, BDW_STACK_DUMP_SIZE); - sof_print_oops_and_stack(sdev, KERN_ERR, status, panic, &xoops, - &panic_info, stack, BDW_STACK_DUMP_SIZE); + snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, stack, + BDW_STACK_DUMP_SIZE); /* provide some context for firmware debug */ imrx = snd_sof_dsp_read(sdev, BDW_DSP_BAR, SHIM_IMRX); @@ -325,7 +326,8 @@ static irqreturn_t bdw_irq_thread(int irq, void *context) * because the done bit can't be set in cmd_done function * which is triggered by msg */ - snd_sof_ipc_process_reply(sdev, ipcx); + bdw_get_reply(sdev); + snd_sof_ipc_reply(sdev, ipcx); bdw_dsp_done(sdev); @@ -344,8 +346,8 @@ static irqreturn_t bdw_irq_thread(int irq, void *context) /* Handle messages from DSP Core */ if ((ipcd & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) { - snd_sof_dsp_panic(sdev, BDW_PANIC_OFFSET(ipcx) + MBOX_OFFSET, - true); + snd_sof_dsp_panic(sdev, BDW_PANIC_OFFSET(ipcx) + + MBOX_OFFSET); } else { snd_sof_ipc_msgs_rx(sdev); } @@ -370,6 +372,45 @@ static int bdw_send_msg(struct snd_sof_dev *sdev, struct snd_sof_ipc_msg *msg) return 0; } +static void bdw_get_reply(struct snd_sof_dev *sdev) +{ + struct snd_sof_ipc_msg *msg = sdev->msg; + struct sof_ipc_reply reply; + int ret = 0; + + /* + * Sometimes, there is unexpected reply ipc arriving. The reply + * ipc belongs to none of the ipcs sent from driver. + * In this case, the driver must ignore the ipc. + */ + if (!msg) { + dev_warn(sdev->dev, "unexpected ipc interrupt raised!\n"); + return; + } + + /* get reply */ + sof_mailbox_read(sdev, sdev->host_box.offset, &reply, sizeof(reply)); + + if (reply.error < 0) { + memcpy(msg->reply_data, &reply, sizeof(reply)); + ret = reply.error; + } else { + /* reply correct size ? */ + if (reply.hdr.size != msg->reply_size) { + dev_err(sdev->dev, "error: reply expected %zu got %u bytes\n", + msg->reply_size, reply.hdr.size); + ret = -EINVAL; + } + + /* read the message */ + if (msg->reply_size > 0) + sof_mailbox_read(sdev, sdev->host_box.offset, + msg->reply_data, msg->reply_size); + } + + msg->reply_error = ret; +} + static int bdw_get_mailbox_offset(struct snd_sof_dev *sdev) { return MBOX_OFFSET; @@ -412,19 +453,10 @@ static int bdw_probe(struct snd_sof_dev *sdev) const struct sof_dev_desc *desc = pdata->desc; struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); - const struct sof_intel_dsp_desc *chip; struct resource *mmio; u32 base, size; int ret; - chip = get_chip_info(sdev->pdata); - if (!chip) { - dev_err(sdev->dev, "error: no such device supported\n"); - return -EIO; - } - - sdev->num_cores = chip->cores_num; - /* LPE base */ mmio = platform_get_resource(pdev, IORESOURCE_MEM, desc->resindex_lpe_base); @@ -503,13 +535,13 @@ static int bdw_probe(struct snd_sof_dev *sdev) return ret; } - /* set default mailbox offset for FW ready message */ - sdev->dsp_box.offset = MBOX_OFFSET; + /* set default mailbox */ + snd_sof_dsp_mailbox_init(sdev, MBOX_OFFSET, MBOX_SIZE, 0, 0); return ret; } -static struct snd_soc_acpi_mach *bdw_machine_select(struct snd_sof_dev *sdev) +static void bdw_machine_select(struct snd_sof_dev *sdev) { struct snd_sof_pdata *sof_pdata = sdev->pdata; const struct sof_dev_desc *desc = sof_pdata->desc; @@ -518,23 +550,22 @@ static struct snd_soc_acpi_mach *bdw_machine_select(struct snd_sof_dev *sdev) mach = snd_soc_acpi_find_machine(desc->machines); if (!mach) { dev_warn(sdev->dev, "warning: No matching ASoC machine driver found\n"); - return NULL; + return; } sof_pdata->tplg_filename = mach->sof_tplg_filename; mach->mach_params.acpi_ipc_irq_index = desc->irqindex_host_ipc; - - return mach; + sof_pdata->machine = mach; } -static void bdw_set_mach_params(struct snd_soc_acpi_mach *mach, +static void bdw_set_mach_params(const struct snd_soc_acpi_mach *mach, struct snd_sof_dev *sdev) { struct snd_sof_pdata *pdata = sdev->pdata; const struct sof_dev_desc *desc = pdata->desc; struct snd_soc_acpi_mach_params *mach_params; - mach_params = &mach->mach_params; + mach_params = (struct snd_soc_acpi_mach_params *)&mach->mach_params; mach_params->platform = dev_name(sdev->dev); mach_params->num_dai_drivers = desc->ops->num_drv; mach_params->dai_drivers = desc->ops->drv; @@ -585,18 +616,14 @@ static const struct snd_sof_dsp_ops sof_bdw_ops = { .block_read = sof_block_read, .block_write = sof_block_write, - /* Mailbox IO */ - .mailbox_read = sof_mailbox_read, - .mailbox_write = sof_mailbox_write, - /* ipc */ .send_msg = bdw_send_msg, .fw_ready = sof_fw_ready, .get_mailbox_offset = bdw_get_mailbox_offset, .get_window_offset = bdw_get_window_offset, - .ipc_msg_data = sof_ipc_msg_data, - .ipc_pcm_params = sof_ipc_pcm_params, + .ipc_msg_data = intel_ipc_msg_data, + .ipc_pcm_params = intel_ipc_pcm_params, /* machine driver */ .machine_select = bdw_machine_select, @@ -608,11 +635,10 @@ static const struct snd_sof_dsp_ops sof_bdw_ops = { .debug_map = bdw_debugfs, .debug_map_count = ARRAY_SIZE(bdw_debugfs), .dbg_dump = bdw_dump, - .debugfs_add_region_item = snd_sof_debugfs_add_region_item_iomem, /* stream callbacks */ - .pcm_open = sof_stream_pcm_open, - .pcm_close = sof_stream_pcm_close, + .pcm_open = intel_pcm_open, + .pcm_close = intel_pcm_close, /* Module loading */ .load_module = snd_sof_parse_module_memcpy, @@ -631,7 +657,7 @@ static const struct snd_sof_dsp_ops sof_bdw_ops = { SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_BATCH, - .dsp_arch_ops = &sof_xtensa_arch_ops, + .arch_ops = &sof_xtensa_arch_ops, }; static const struct sof_intel_dsp_desc bdw_chip_info = { diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c index dcfeaedb8f..8edaf6fdd2 100644 --- a/sound/soc/sof/intel/byt.c +++ b/sound/soc/sof/intel/byt.c @@ -113,19 +113,10 @@ static int byt_acpi_probe(struct snd_sof_dev *sdev) const struct sof_dev_desc *desc = pdata->desc; struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); - const struct sof_intel_dsp_desc *chip; struct resource *mmio; u32 base, size; int ret; - chip = get_chip_info(sdev->pdata); - if (!chip) { - dev_err(sdev->dev, "error: no such device supported\n"); - return -EIO; - } - - sdev->num_cores = chip->cores_num; - /* DSP DMA can only access low 31 bits of host memory */ ret = dma_coerce_mask_and_coherent(sdev->dev, DMA_BIT_MASK(31)); if (ret < 0) { @@ -235,10 +226,6 @@ static const struct snd_sof_dsp_ops sof_byt_ops = { .block_read = sof_block_read, .block_write = sof_block_write, - /* Mailbox IO */ - .mailbox_read = sof_mailbox_read, - .mailbox_write = sof_mailbox_write, - /* doorbell */ .irq_handler = atom_irq_handler, .irq_thread = atom_irq_thread, @@ -249,8 +236,8 @@ static const struct snd_sof_dsp_ops sof_byt_ops = { .get_mailbox_offset = atom_get_mailbox_offset, .get_window_offset = atom_get_window_offset, - .ipc_msg_data = sof_ipc_msg_data, - .ipc_pcm_params = sof_ipc_pcm_params, + .ipc_msg_data = intel_ipc_msg_data, + .ipc_pcm_params = intel_ipc_pcm_params, /* machine driver */ .machine_select = atom_machine_select, @@ -262,11 +249,10 @@ static const struct snd_sof_dsp_ops sof_byt_ops = { .debug_map = byt_debugfs, .debug_map_count = ARRAY_SIZE(byt_debugfs), .dbg_dump = atom_dump, - .debugfs_add_region_item = snd_sof_debugfs_add_region_item_iomem, /* stream callbacks */ - .pcm_open = sof_stream_pcm_open, - .pcm_close = sof_stream_pcm_close, + .pcm_open = intel_pcm_open, + .pcm_close = intel_pcm_close, /* module loading */ .load_module = snd_sof_parse_module_memcpy, @@ -289,7 +275,7 @@ static const struct snd_sof_dsp_ops sof_byt_ops = { SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_BATCH, - .dsp_arch_ops = &sof_xtensa_arch_ops, + .arch_ops = &sof_xtensa_arch_ops, }; static const struct sof_intel_dsp_desc byt_chip_info = { @@ -317,10 +303,6 @@ static const struct snd_sof_dsp_ops sof_cht_ops = { .block_read = sof_block_read, .block_write = sof_block_write, - /* Mailbox IO */ - .mailbox_read = sof_mailbox_read, - .mailbox_write = sof_mailbox_write, - /* doorbell */ .irq_handler = atom_irq_handler, .irq_thread = atom_irq_thread, @@ -331,8 +313,8 @@ static const struct snd_sof_dsp_ops sof_cht_ops = { .get_mailbox_offset = atom_get_mailbox_offset, .get_window_offset = atom_get_window_offset, - .ipc_msg_data = sof_ipc_msg_data, - .ipc_pcm_params = sof_ipc_pcm_params, + .ipc_msg_data = intel_ipc_msg_data, + .ipc_pcm_params = intel_ipc_pcm_params, /* machine driver */ .machine_select = atom_machine_select, @@ -344,11 +326,10 @@ static const struct snd_sof_dsp_ops sof_cht_ops = { .debug_map = cht_debugfs, .debug_map_count = ARRAY_SIZE(cht_debugfs), .dbg_dump = atom_dump, - .debugfs_add_region_item = snd_sof_debugfs_add_region_item_iomem, /* stream callbacks */ - .pcm_open = sof_stream_pcm_open, - .pcm_close = sof_stream_pcm_close, + .pcm_open = intel_pcm_open, + .pcm_close = intel_pcm_close, /* module loading */ .load_module = snd_sof_parse_module_memcpy, @@ -372,7 +353,7 @@ static const struct snd_sof_dsp_ops sof_cht_ops = { SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_BATCH, - .dsp_arch_ops = &sof_xtensa_arch_ops, + .arch_ops = &sof_xtensa_arch_ops, }; static const struct sof_intel_dsp_desc cht_chip_info = { diff --git a/sound/soc/sof/intel/cnl.c b/sound/soc/sof/intel/cnl.c index e615125d57..e115e12a85 100644 --- a/sound/soc/sof/intel/cnl.c +++ b/sound/soc/sof/intel/cnl.c @@ -82,24 +82,9 @@ irqreturn_t cnl_ipc_irq_thread(int irq, void *context) msg, msg_ext); /* handle messages from DSP */ - if ((hipctdr & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) { - struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; - bool non_recoverable = true; - - /* - * This is a PANIC message! - * - * If it is arriving during firmware boot and it is not - * the last boot attempt then change the non_recoverable - * to false as the DSP might be able to boot in the next - * iteration(s) - */ - if (sdev->fw_state == SOF_FW_BOOT_IN_PROGRESS && - hda->boot_iteration < HDA_FW_BOOT_ATTEMPTS) - non_recoverable = false; - - snd_sof_dsp_panic(sdev, HDA_DSP_PANIC_OFFSET(msg_ext), - non_recoverable); + if ((hipctdr & SOF_IPC_PANIC_MAGIC_MASK) == + SOF_IPC_PANIC_MAGIC) { + snd_sof_dsp_panic(sdev, HDA_DSP_PANIC_OFFSET(msg_ext)); } else { snd_sof_ipc_msgs_rx(sdev); } @@ -262,10 +247,6 @@ const struct snd_sof_dsp_ops sof_cnl_ops = { .block_read = sof_block_read, .block_write = sof_block_write, - /* Mailbox IO */ - .mailbox_read = sof_mailbox_read, - .mailbox_write = sof_mailbox_write, - /* doorbell */ .irq_thread = cnl_ipc_irq_thread, @@ -289,7 +270,6 @@ const struct snd_sof_dsp_ops sof_cnl_ops = { .debug_map_count = ARRAY_SIZE(cnl_dsp_debugfs), .dbg_dump = hda_dsp_dump, .ipc_dump = cnl_ipc_dump, - .debugfs_add_region_item = snd_sof_debugfs_add_region_item_iomem, /* stream callbacks */ .pcm_open = hda_dsp_pcm_open, @@ -298,7 +278,6 @@ const struct snd_sof_dsp_ops sof_cnl_ops = { .pcm_hw_free = hda_dsp_stream_hw_free, .pcm_trigger = hda_dsp_pcm_trigger, .pcm_pointer = hda_dsp_pcm_pointer, - .pcm_ack = hda_dsp_pcm_ack, #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_PROBES) /* probe callbacks */ @@ -319,8 +298,9 @@ const struct snd_sof_dsp_ops sof_cnl_ops = { /* parse platform specific extended manifest */ .parse_platform_ext_manifest = hda_dsp_ext_man_get_cavs_config_data, - /* dsp core get/put */ - .core_get = hda_dsp_core_get, + /* dsp core power up/down */ + .core_power_up = hda_dsp_enable_core, + .core_power_down = hda_dsp_core_reset_power_down, /* firmware run */ .run = hda_dsp_cl_boot_firmware, @@ -350,7 +330,7 @@ const struct snd_sof_dsp_ops sof_cnl_ops = { SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_NO_PERIOD_WAKEUP, - .dsp_arch_ops = &sof_xtensa_arch_ops, + .arch_ops = &sof_xtensa_arch_ops, }; EXPORT_SYMBOL_NS(sof_cnl_ops, SND_SOC_SOF_INTEL_HDA_COMMON); @@ -373,13 +353,6 @@ const struct sof_intel_dsp_desc cnl_chip_info = { }; EXPORT_SYMBOL_NS(cnl_chip_info, SND_SOC_SOF_INTEL_HDA_COMMON); -/* - * JasperLake is technically derived from IceLake, and should be in - * described in icl.c. However since JasperLake was designed with - * two cores, it cannot support the IceLake-specific power-up sequences - * which rely on core3. To simplify, JasperLake uses the CannonLake ops and - * is described in cnl.c - */ const struct sof_intel_dsp_desc jsl_chip_info = { /* Jasperlake */ .cores_num = 2, diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c index 2f3f4a733d..6744318de6 100644 --- a/sound/soc/sof/intel/hda-codec.c +++ b/sound/soc/sof/intel/hda-codec.c @@ -20,8 +20,6 @@ #include "../../codecs/hdac_hda.h" #endif /* CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC */ -#define CODEC_PROBE_RETRIES 3 - #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC) #define IDISP_VID_INTEL 0x80860000 @@ -123,15 +121,12 @@ static int hda_codec_probe(struct snd_sof_dev *sdev, int address, u32 hda_cmd = (address << 28) | (AC_NODE_ROOT << 20) | (AC_VERB_PARAMETERS << 8) | AC_PAR_VENDOR_ID; u32 resp = -1; - int ret, retry = 0; - - do { - mutex_lock(&hbus->core.cmd_mutex); - snd_hdac_bus_send_cmd(&hbus->core, hda_cmd); - snd_hdac_bus_get_response(&hbus->core, address, &resp); - mutex_unlock(&hbus->core.cmd_mutex); - } while (resp == -1 && retry++ < CODEC_PROBE_RETRIES); + int ret; + mutex_lock(&hbus->core.cmd_mutex); + snd_hdac_bus_send_cmd(&hbus->core, hda_cmd); + snd_hdac_bus_get_response(&hbus->core, address, &resp); + mutex_unlock(&hbus->core.cmd_mutex); if (resp == -1) return -EIO; dev_dbg(sdev->dev, "HDA codec #%d probed OK: response: %x\n", diff --git a/sound/soc/sof/intel/hda-ctrl.c b/sound/soc/sof/intel/hda-ctrl.c index 0c29bb196e..fa5f0a7189 100644 --- a/sound/soc/sof/intel/hda-ctrl.c +++ b/sound/soc/sof/intel/hda-ctrl.c @@ -353,7 +353,7 @@ void hda_dsp_ctrl_stop_chip(struct snd_sof_dev *sdev) snd_hdac_bus_stop_cmd_io(bus); #endif /* disable position buffer */ - if (bus->use_posbuf && bus->posbuf.addr) { + if (bus->posbuf.addr) { snd_sof_dsp_write(sdev, HDA_DSP_HDA_BAR, SOF_HDA_ADSP_DPLBASE, 0); snd_sof_dsp_write(sdev, HDA_DSP_HDA_BAR, diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index cd12589355..6704dbcd10 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -16,11 +16,9 @@ #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) -#if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_PROBES) -#include "../sof-probes.h" -#endif - struct hda_pipe_params { + u8 host_dma_id; + u8 link_dma_id; u32 ch; u32 s_freq; u32 s_fmt; @@ -28,6 +26,7 @@ struct hda_pipe_params { snd_pcm_format_t format; int link_index; int stream; + unsigned int host_bps; unsigned int link_bps; }; @@ -153,50 +152,49 @@ static int hda_link_dma_params(struct hdac_ext_stream *stream, return 0; } -/* Update config for the DAI widget */ -static struct sof_ipc_dai_config *hda_dai_update_config(struct snd_soc_dapm_widget *w, - int channel) +/* Send DAI_CONFIG IPC to the DAI that matches the dai_name and direction */ +static int hda_link_config_ipc(struct sof_intel_hda_stream *hda_stream, + const char *dai_name, int channel, int dir) { - struct snd_sof_widget *swidget = w->dobj.private; struct sof_ipc_dai_config *config; struct snd_sof_dai *sof_dai; + struct sof_ipc_reply reply; + int ret = 0; - if (!swidget) - return NULL; + list_for_each_entry(sof_dai, &hda_stream->sdev->dai_list, list) { + if (!sof_dai->cpu_dai_name) + continue; - sof_dai = swidget->private; + if (!strcmp(dai_name, sof_dai->cpu_dai_name) && + dir == sof_dai->comp_dai.direction) { + config = sof_dai->dai_config; - if (!sof_dai || !sof_dai->dai_config) { - dev_err(swidget->scomp->dev, "error: No config for DAI %s\n", w->name); - return NULL; + if (!config) { + dev_err(hda_stream->sdev->dev, + "error: no config for DAI %s\n", + sof_dai->name); + return -EINVAL; + } + + /* update config with stream tag */ + config->hda.link_dma_ch = channel; + + /* send IPC */ + ret = sof_ipc_tx_message(hda_stream->sdev->ipc, + config->hdr.cmd, + config, + config->hdr.size, + &reply, sizeof(reply)); + + if (ret < 0) + dev_err(hda_stream->sdev->dev, + "error: failed to set dai config for %s\n", + sof_dai->name); + return ret; + } } - config = &sof_dai->dai_config[sof_dai->current_config]; - - /* update config with stream tag */ - config->hda.link_dma_ch = channel; - - return config; -} - -static int hda_link_dai_widget_update(struct sof_intel_hda_stream *hda_stream, - struct snd_soc_dapm_widget *w, - int channel, bool widget_setup) -{ - struct snd_sof_dev *sdev = hda_stream->sdev; - struct sof_ipc_dai_config *config; - - config = hda_dai_update_config(w, channel); - if (!config) { - dev_err(sdev->dev, "error: no config for DAI %s\n", w->name); - return -ENOENT; - } - - /* set up/free DAI widget and send DAI_CONFIG IPC */ - if (widget_setup) - return hda_ctrl_dai_widget_setup(w, SOF_DAI_CONFIG_FLAGS_2_STEP_STOP); - - return hda_ctrl_dai_widget_free(w, SOF_DAI_CONFIG_FLAGS_NONE); + return -EINVAL; } static int hda_link_hw_params(struct snd_pcm_substream *substream, @@ -210,7 +208,6 @@ static int hda_link_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *codec_dai = asoc_rtd_to_codec(rtd, 0); struct sof_intel_hda_stream *hda_stream; struct hda_pipe_params p_params = {0}; - struct snd_soc_dapm_widget *w; struct hdac_ext_link *link; int stream_tag; int ret; @@ -229,13 +226,9 @@ static int hda_link_hw_params(struct snd_pcm_substream *substream, hda_stream = hstream_to_sof_hda_stream(link_dev); - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - w = dai->playback_widget; - else - w = dai->capture_widget; - - /* set up the DAI widget and send the DAI_CONFIG with the new tag */ - ret = hda_link_dai_widget_update(hda_stream, w, stream_tag - 1, true); + /* update the DSP with the new tag */ + ret = hda_link_config_ipc(hda_stream, dai->name, stream_tag - 1, + substream->stream); if (ret < 0) return ret; @@ -243,13 +236,17 @@ static int hda_link_hw_params(struct snd_pcm_substream *substream, if (!link) return -EINVAL; - /* set the hdac_stream in the codec dai */ - snd_soc_dai_set_stream(codec_dai, hdac_stream(link_dev), substream->stream); + /* set the stream tag in the codec dai dma params */ + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + snd_soc_dai_set_tdm_slot(codec_dai, stream_tag, 0, 0, 0); + else + snd_soc_dai_set_tdm_slot(codec_dai, 0, stream_tag, 0, 0); p_params.s_fmt = snd_pcm_format_width(params_format(params)); p_params.ch = params_channels(params); p_params.s_freq = params_rate(params); p_params.stream = substream->stream; + p_params.link_dma_id = stream_tag - 1; p_params.link_index = link->index; p_params.format = params_format(params); @@ -280,36 +277,6 @@ static int hda_link_pcm_prepare(struct snd_pcm_substream *substream, dai); } -static int hda_link_dai_config_pause_push_ipc(struct snd_soc_dapm_widget *w) -{ - struct snd_sof_widget *swidget = w->dobj.private; - struct snd_soc_component *component = swidget->scomp; - struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component); - struct sof_ipc_dai_config *config; - struct snd_sof_dai *sof_dai; - struct sof_ipc_reply reply; - int ret; - - sof_dai = swidget->private; - - if (!sof_dai || !sof_dai->dai_config) { - dev_err(sdev->dev, "No config for DAI %s\n", w->name); - return -EINVAL; - } - - config = &sof_dai->dai_config[sof_dai->current_config]; - - /* set PAUSE command flag */ - config->flags = FIELD_PREP(SOF_DAI_CONFIG_FLAGS_CMD_MASK, SOF_DAI_CONFIG_FLAGS_PAUSE); - - ret = sof_ipc_tx_message(sdev->ipc, config->hdr.cmd, config, config->hdr.size, - &reply, sizeof(reply)); - if (ret < 0) - dev_err(sdev->dev, "DAI config for %s failed during pause push\n", w->name); - - return ret; -} - static int hda_link_pcm_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { @@ -317,7 +284,6 @@ static int hda_link_pcm_trigger(struct snd_pcm_substream *substream, snd_soc_dai_get_dma_data(dai, substream); struct sof_intel_hda_stream *hda_stream; struct snd_soc_pcm_runtime *rtd; - struct snd_soc_dapm_widget *w; struct hdac_ext_link *link; struct hdac_stream *hstream; struct hdac_bus *bus; @@ -335,22 +301,29 @@ static int hda_link_pcm_trigger(struct snd_pcm_substream *substream, hda_stream = hstream_to_sof_hda_stream(link_dev); dev_dbg(dai->dev, "In %s cmd=%d\n", __func__, cmd); - - w = snd_soc_dai_get_widget(dai, substream->stream); - switch (cmd) { + case SNDRV_PCM_TRIGGER_RESUME: + /* set up hw_params */ + ret = hda_link_pcm_prepare(substream, dai); + if (ret < 0) { + dev_err(dai->dev, + "error: setting up hw_params during resume\n"); + return ret; + } + + fallthrough; case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: snd_hdac_ext_link_stream_start(link_dev); break; case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_STOP: - snd_hdac_ext_link_stream_clear(link_dev); - /* - * free DAI widget during stop/suspend to keep widget use_count's balanced. + * clear link DMA channel. It will be assigned when + * hw_params is set up again after resume. */ - ret = hda_link_dai_widget_update(hda_stream, w, DMA_CHAN_INVALID, false); + ret = hda_link_config_ipc(hda_stream, dai->name, + DMA_CHAN_INVALID, substream->stream); if (ret < 0) return ret; @@ -360,13 +333,10 @@ static int hda_link_pcm_trigger(struct snd_pcm_substream *substream, } link_dev->link_prepared = 0; - break; + + fallthrough; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: snd_hdac_ext_link_stream_clear(link_dev); - - ret = hda_link_dai_config_pause_push_ipc(w); - if (ret < 0) - return ret; break; default: return -EINVAL; @@ -384,7 +354,6 @@ static int hda_link_hw_free(struct snd_pcm_substream *substream, struct hdac_stream *hstream; struct snd_soc_pcm_runtime *rtd; struct hdac_ext_stream *link_dev; - struct snd_soc_dapm_widget *w; int ret; hstream = substream->runtime->private_data; @@ -400,13 +369,9 @@ static int hda_link_hw_free(struct snd_pcm_substream *substream, hda_stream = hstream_to_sof_hda_stream(link_dev); - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - w = dai->playback_widget; - else - w = dai->capture_widget; - - /* free the link DMA channel in the FW and the DAI widget */ - ret = hda_link_dai_widget_update(hda_stream, w, DMA_CHAN_INVALID, false); + /* free the link DMA channel in the FW */ + ret = hda_link_config_ipc(hda_stream, dai->name, DMA_CHAN_INVALID, + substream->stream); if (ret < 0) return ret; @@ -436,131 +401,61 @@ static const struct snd_soc_dai_ops hda_link_dai_ops = { .prepare = hda_link_pcm_prepare, }; -#endif +#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_PROBES) +#include "../compress.h" -/* only one flag used so far to harden hw_params/hw_free/trigger/prepare */ -struct ssp_dai_dma_data { - bool setup; +static struct snd_soc_cdai_ops sof_probe_compr_ops = { + .startup = sof_probe_compr_open, + .shutdown = sof_probe_compr_free, + .set_params = sof_probe_compr_set_params, + .trigger = sof_probe_compr_trigger, + .pointer = sof_probe_compr_pointer, }; -static int ssp_dai_setup_or_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai, - bool setup) -{ - struct snd_soc_component *component; - struct snd_sof_widget *swidget; - struct snd_soc_dapm_widget *w; - struct sof_ipc_fw_version *v; - struct snd_sof_dev *sdev; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - w = dai->playback_widget; - else - w = dai->capture_widget; - - swidget = w->dobj.private; - component = swidget->scomp; - sdev = snd_soc_component_get_drvdata(component); - v = &sdev->fw_ready.version; - - /* DAI_CONFIG IPC during hw_params is not supported in older firmware */ - if (v->abi_version < SOF_ABI_VER(3, 18, 0)) - return 0; - - if (setup) - return hda_ctrl_dai_widget_setup(w, SOF_DAI_CONFIG_FLAGS_NONE); - - return hda_ctrl_dai_widget_free(w, SOF_DAI_CONFIG_FLAGS_NONE); -} - -static int ssp_dai_startup(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - struct ssp_dai_dma_data *dma_data; - - dma_data = kzalloc(sizeof(*dma_data), GFP_KERNEL); - if (!dma_data) - return -ENOMEM; - - snd_soc_dai_set_dma_data(dai, substream, dma_data); - - return 0; -} - -static int ssp_dai_setup(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai, - bool setup) -{ - struct ssp_dai_dma_data *dma_data; - int ret = 0; - - dma_data = snd_soc_dai_get_dma_data(dai, substream); - if (!dma_data) { - dev_err(dai->dev, "%s: failed to get dma_data\n", __func__); - return -EIO; - } - - if (dma_data->setup != setup) { - ret = ssp_dai_setup_or_free(substream, dai, setup); - if (!ret) - dma_data->setup = setup; - } - return ret; -} +#endif +#endif static int ssp_dai_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) { - /* params are ignored for now */ - return ssp_dai_setup(substream, dai, true); -} + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); + struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, SOF_AUDIO_PCM_DRV_NAME); + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component); + struct sof_ipc_fw_version *v = &sdev->fw_ready.version; + struct sof_ipc_dai_config *config; + struct snd_sof_dai *sof_dai; + struct sof_ipc_reply reply; + int ret; -static int ssp_dai_prepare(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - /* - * the SSP will only be reconfigured during resume operations and - * not in case of xruns - */ - return ssp_dai_setup(substream, dai, true); -} - -static int ssp_dai_trigger(struct snd_pcm_substream *substream, - int cmd, struct snd_soc_dai *dai) -{ - if (cmd != SNDRV_PCM_TRIGGER_SUSPEND) + /* DAI_CONFIG IPC during hw_params is not supported in older firmware */ + if (v->abi_version < SOF_ABI_VER(3, 18, 0)) return 0; - return ssp_dai_setup(substream, dai, false); -} + list_for_each_entry(sof_dai, &sdev->dai_list, list) { + if (!sof_dai->cpu_dai_name || !sof_dai->dai_config) + continue; -static int ssp_dai_hw_free(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - return ssp_dai_setup(substream, dai, false); -} + if (!strcmp(dai->name, sof_dai->cpu_dai_name) && + substream->stream == sof_dai->comp_dai.direction) { + config = &sof_dai->dai_config[sof_dai->current_config]; -static void ssp_dai_shutdown(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - struct ssp_dai_dma_data *dma_data; + /* send IPC */ + ret = sof_ipc_tx_message(sdev->ipc, config->hdr.cmd, config, + config->hdr.size, &reply, sizeof(reply)); - dma_data = snd_soc_dai_get_dma_data(dai, substream); - if (!dma_data) { - dev_err(dai->dev, "%s: failed to get dma_data\n", __func__); - return; + if (ret < 0) + dev_err(sdev->dev, "error: failed to set DAI config for %s\n", + sof_dai->name); + return ret; + } } - snd_soc_dai_set_dma_data(dai, substream, NULL); - kfree(dma_data); + + return 0; } static const struct snd_soc_dai_ops ssp_dai_ops = { - .startup = ssp_dai_startup, .hw_params = ssp_dai_hw_params, - .prepare = ssp_dai_prepare, - .trigger = ssp_dai_trigger, - .hw_free = ssp_dai_hw_free, - .shutdown = ssp_dai_shutdown, }; /* diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 916a257ea9..262a70791a 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -34,7 +34,7 @@ MODULE_PARM_DESC(enable_trace_D0I3_S0, * DSP Core control. */ -static int hda_dsp_core_reset_enter(struct snd_sof_dev *sdev, unsigned int core_mask) +int hda_dsp_core_reset_enter(struct snd_sof_dev *sdev, unsigned int core_mask) { u32 adspcs; u32 reset; @@ -73,7 +73,7 @@ static int hda_dsp_core_reset_enter(struct snd_sof_dev *sdev, unsigned int core_ return ret; } -static int hda_dsp_core_reset_leave(struct snd_sof_dev *sdev, unsigned int core_mask) +int hda_dsp_core_reset_leave(struct snd_sof_dev *sdev, unsigned int core_mask) { unsigned int crst; u32 adspcs; @@ -113,7 +113,7 @@ static int hda_dsp_core_reset_leave(struct snd_sof_dev *sdev, unsigned int core_ return ret; } -static int hda_dsp_core_stall_reset(struct snd_sof_dev *sdev, unsigned int core_mask) +int hda_dsp_core_stall_reset(struct snd_sof_dev *sdev, unsigned int core_mask) { /* stall core */ snd_sof_dsp_update_bits_unlocked(sdev, HDA_DSP_BAR, @@ -125,31 +125,6 @@ static int hda_dsp_core_stall_reset(struct snd_sof_dev *sdev, unsigned int core_ return hda_dsp_core_reset_enter(sdev, core_mask); } -static bool hda_dsp_core_is_enabled(struct snd_sof_dev *sdev, unsigned int core_mask) -{ - int val; - bool is_enable; - - val = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_DSP_REG_ADSPCS); - -#define MASK_IS_EQUAL(v, m, field) ({ \ - u32 _m = field(m); \ - ((v) & _m) == _m; \ -}) - - is_enable = MASK_IS_EQUAL(val, core_mask, HDA_DSP_ADSPCS_CPA_MASK) && - MASK_IS_EQUAL(val, core_mask, HDA_DSP_ADSPCS_SPA_MASK) && - !(val & HDA_DSP_ADSPCS_CRST_MASK(core_mask)) && - !(val & HDA_DSP_ADSPCS_CSTALL_MASK(core_mask)); - -#undef MASK_IS_EQUAL - - dev_dbg(sdev->dev, "DSP core(s) enabled? %d : core_mask %x\n", - is_enable, core_mask); - - return is_enable; -} - int hda_dsp_core_run(struct snd_sof_dev *sdev, unsigned int core_mask) { int ret; @@ -181,7 +156,7 @@ int hda_dsp_core_run(struct snd_sof_dev *sdev, unsigned int core_mask) * Power Management. */ -static int hda_dsp_core_power_up(struct snd_sof_dev *sdev, unsigned int core_mask) +int hda_dsp_core_power_up(struct snd_sof_dev *sdev, unsigned int core_mask) { unsigned int cpa; u32 adspcs; @@ -220,7 +195,7 @@ static int hda_dsp_core_power_up(struct snd_sof_dev *sdev, unsigned int core_mas return ret; } -static int hda_dsp_core_power_down(struct snd_sof_dev *sdev, unsigned int core_mask) +int hda_dsp_core_power_down(struct snd_sof_dev *sdev, unsigned int core_mask) { u32 adspcs; int ret; @@ -243,6 +218,32 @@ static int hda_dsp_core_power_down(struct snd_sof_dev *sdev, unsigned int core_m return ret; } +bool hda_dsp_core_is_enabled(struct snd_sof_dev *sdev, + unsigned int core_mask) +{ + int val; + bool is_enable; + + val = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_DSP_REG_ADSPCS); + +#define MASK_IS_EQUAL(v, m, field) ({ \ + u32 _m = field(m); \ + ((v) & _m) == _m; \ +}) + + is_enable = MASK_IS_EQUAL(val, core_mask, HDA_DSP_ADSPCS_CPA_MASK) && + MASK_IS_EQUAL(val, core_mask, HDA_DSP_ADSPCS_SPA_MASK) && + !(val & HDA_DSP_ADSPCS_CRST_MASK(core_mask)) && + !(val & HDA_DSP_ADSPCS_CSTALL_MASK(core_mask)); + +#undef MASK_IS_EQUAL + + dev_dbg(sdev->dev, "DSP core(s) enabled? %d : core_mask %x\n", + is_enable, core_mask); + + return is_enable; +} + int hda_dsp_enable_core(struct snd_sof_dev *sdev, unsigned int core_mask) { struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; @@ -614,7 +615,7 @@ static int hda_suspend(struct snd_sof_dev *sdev, bool runtime_suspend) #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) struct hdac_bus *bus = sof_to_bus(sdev); #endif - int ret, j; + int ret; hda_sdw_int_enable(sdev, false); @@ -629,17 +630,13 @@ static int hda_suspend(struct snd_sof_dev *sdev, bool runtime_suspend) #endif /* power down DSP */ - ret = hda_dsp_core_reset_power_down(sdev, chip->host_managed_cores_mask); + ret = snd_sof_dsp_core_power_down(sdev, chip->host_managed_cores_mask); if (ret < 0) { dev_err(sdev->dev, "error: failed to power down core during suspend\n"); return ret; } - /* reset ref counts for all cores */ - for (j = 0; j < chip->cores_num; j++) - sdev->dsp_core_ref_count[j] = 0; - /* disable ppcap interrupt */ hda_dsp_ctrl_ppcap_enable(sdev, false); hda_dsp_ctrl_ppcap_int_enable(sdev, false); @@ -966,47 +963,3 @@ void hda_dsp_d0i3_work(struct work_struct *work) "error: failed to set DSP state %d substate %d\n", target_state.state, target_state.substate); } - -int hda_dsp_core_get(struct snd_sof_dev *sdev, int core) -{ - struct sof_ipc_pm_core_config pm_core_config = { - .hdr = { - .cmd = SOF_IPC_GLB_PM_MSG | SOF_IPC_PM_CORE_ENABLE, - .size = sizeof(pm_core_config), - }, - .enable_mask = sdev->enabled_cores_mask | BIT(core), - }; - int ret, ret1; - - /* power up core */ - ret = hda_dsp_enable_core(sdev, BIT(core)); - if (ret < 0) { - dev_err(sdev->dev, "failed to power up core %d with err: %d\n", - core, ret); - return ret; - } - - /* No need to send IPC for primary core or if FW boot is not complete */ - if (sdev->fw_state != SOF_FW_BOOT_COMPLETE || core == SOF_DSP_PRIMARY_CORE) - return 0; - - /* Now notify DSP for secondary cores */ - ret = sof_ipc_tx_message(sdev->ipc, pm_core_config.hdr.cmd, - &pm_core_config, sizeof(pm_core_config), - &pm_core_config, sizeof(pm_core_config)); - if (ret < 0) { - dev_err(sdev->dev, "failed to enable secondary core '%d' failed with %d\n", - core, ret); - goto power_down; - } - - return ret; - -power_down: - /* power down core if it is host managed and return the original error if this fails too */ - ret1 = hda_dsp_core_reset_power_down(sdev, BIT(core)); - if (ret1 < 0) - dev_err(sdev->dev, "failed to power down core: %d with err: %d\n", core, ret1); - - return ret; -} diff --git a/sound/soc/sof/intel/hda-ipc.c b/sound/soc/sof/intel/hda-ipc.c index f0cf8019d7..acfeca4260 100644 --- a/sound/soc/sof/intel/hda-ipc.c +++ b/sound/soc/sof/intel/hda-ipc.c @@ -70,6 +70,7 @@ void hda_dsp_ipc_get_reply(struct snd_sof_dev *sdev) struct snd_sof_ipc_msg *msg = sdev->msg; struct sof_ipc_reply reply; struct sof_ipc_cmd_hdr *hdr; + int ret = 0; /* * Sometimes, there is unexpected reply ipc arriving. The reply @@ -93,11 +94,35 @@ void hda_dsp_ipc_get_reply(struct snd_sof_dev *sdev) reply.hdr.cmd = SOF_IPC_GLB_REPLY; reply.hdr.size = sizeof(reply); memcpy(msg->reply_data, &reply, sizeof(reply)); - - msg->reply_error = 0; - } else { - snd_sof_ipc_get_reply(sdev); + goto out; } + + /* get IPC reply from DSP in the mailbox */ + sof_mailbox_read(sdev, sdev->host_box.offset, &reply, + sizeof(reply)); + + if (reply.error < 0) { + memcpy(msg->reply_data, &reply, sizeof(reply)); + ret = reply.error; + } else { + /* reply correct size ? */ + if (reply.hdr.size != msg->reply_size && + /* getter payload is never known upfront */ + ((reply.hdr.cmd & SOF_GLB_TYPE_MASK) != SOF_IPC_GLB_PROBE)) { + dev_err(sdev->dev, "error: reply expected %zu got %u bytes\n", + msg->reply_size, reply.hdr.size); + ret = -EINVAL; + } + + /* read the message */ + if (msg->reply_size > 0) + sof_mailbox_read(sdev, sdev->host_box.offset, + msg->reply_data, msg->reply_size); + } + +out: + msg->reply_error = ret; + } /* IPC handler thread */ @@ -173,23 +198,8 @@ irqreturn_t hda_dsp_ipc_irq_thread(int irq, void *context) /* handle messages from DSP */ if ((hipct & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) { - struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; - bool non_recoverable = true; - - /* - * This is a PANIC message! - * - * If it is arriving during firmware boot and it is not - * the last boot attempt then change the non_recoverable - * to false as the DSP might be able to boot in the next - * iteration(s) - */ - if (sdev->fw_state == SOF_FW_BOOT_IN_PROGRESS && - hda->boot_iteration < HDA_FW_BOOT_ATTEMPTS) - non_recoverable = false; - - snd_sof_dsp_panic(sdev, HDA_DSP_PANIC_OFFSET(msg_ext), - non_recoverable); + /* this is a PANIC message !! */ + snd_sof_dsp_panic(sdev, HDA_DSP_PANIC_OFFSET(msg_ext)); } else { /* normal message - process normally */ snd_sof_ipc_msgs_rx(sdev); @@ -243,9 +253,9 @@ int hda_dsp_ipc_get_window_offset(struct snd_sof_dev *sdev, u32 id) return SRAM_WINDOW_OFFSET(id); } -int hda_ipc_msg_data(struct snd_sof_dev *sdev, - struct snd_pcm_substream *substream, - void *p, size_t sz) +void hda_ipc_msg_data(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream, + void *p, size_t sz) { if (!substream || !sdev->stream_box.size) { sof_mailbox_read(sdev, sdev->dsp_box.offset, p, sz); @@ -258,13 +268,10 @@ int hda_ipc_msg_data(struct snd_sof_dev *sdev, hda_stream.hstream); /* The stream might already be closed */ - if (!hstream) - return -ESTRPIPE; - - sof_mailbox_read(sdev, hda_stream->stream.posn_offset, p, sz); + if (hstream) + sof_mailbox_read(sdev, hda_stream->stream.posn_offset, + p, sz); } - - return 0; } int hda_ipc_pcm_params(struct snd_sof_dev *sdev, diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 33306d2023..6f4771bf9d 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -23,6 +23,7 @@ #include "../ops.h" #include "hda.h" +#define HDA_FW_BOOT_ATTEMPTS 3 #define HDA_CL_STREAM_FORMAT 0x40 static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsigned int format, @@ -87,14 +88,12 @@ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag) struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; const struct sof_intel_dsp_desc *chip = hda->desc; unsigned int status; - unsigned long mask; - char *dump_msg; - u32 flags, j; + u32 flags; int ret; int i; /* step 1: power up corex */ - ret = hda_dsp_enable_core(sdev, chip->host_managed_cores_mask); + ret = snd_sof_dsp_core_power_up(sdev, chip->host_managed_cores_mask); if (ret < 0) { if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS) dev_err(sdev->dev, "error: dsp core 0/1 power up failed\n"); @@ -149,8 +148,8 @@ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag) chip->ipc_ack_mask); /* step 5: power down cores that are no longer needed */ - ret = hda_dsp_core_reset_power_down(sdev, chip->host_managed_cores_mask & - ~(chip->init_core_mask)); + ret = snd_sof_dsp_core_power_down(sdev, chip->host_managed_cores_mask & + ~(chip->init_core_mask)); if (ret < 0) { if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS) dev_err(sdev->dev, @@ -169,14 +168,8 @@ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag) HDA_DSP_REG_POLL_INTERVAL_US, chip->rom_init_timeout * USEC_PER_MSEC); - if (!ret) { - /* set enabled cores mask and increment ref count for cores in init_core_mask */ - sdev->enabled_cores_mask |= chip->init_core_mask; - mask = sdev->enabled_cores_mask; - for_each_set_bit(j, &mask, SOF_MAX_DSP_NUM_CORES) - sdev->dsp_core_ref_count[j]++; + if (!ret) return 0; - } if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS) dev_err(sdev->dev, @@ -184,18 +177,15 @@ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag) __func__); err: - flags = SOF_DBG_DUMP_PCI | SOF_DBG_DUMP_MBOX | SOF_DBG_DUMP_OPTIONAL; + flags = SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_PCI | SOF_DBG_DUMP_MBOX; - /* after max boot attempts make sure that the dump is printed */ + /* force error log level after max boot attempts */ if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS) - flags &= ~SOF_DBG_DUMP_OPTIONAL; + flags |= SOF_DBG_DUMP_FORCE_ERR_LEVEL; - dump_msg = kasprintf(GFP_KERNEL, "Boot iteration failed: %d/%d", - hda->boot_iteration, HDA_FW_BOOT_ATTEMPTS); - snd_sof_dsp_dbg_dump(sdev, dump_msg, flags); - hda_dsp_core_reset_power_down(sdev, chip->host_managed_cores_mask); + hda_dsp_dump(sdev, flags); + snd_sof_dsp_core_power_down(sdev, chip->host_managed_cores_mask); - kfree(dump_msg); return ret; } @@ -417,19 +407,17 @@ int hda_dsp_cl_boot_firmware(struct snd_sof_dev *sdev) hda_sdw_process_wakeen(sdev); /* - * Set the boot_iteration to the last attempt, indicating that the - * DSP ROM has been initialized and from this point there will be no - * retry done to boot. - * - * Continue with code loading and firmware boot + * at this point DSP ROM has been initialized and + * should be ready for code loading and firmware boot */ - hda->boot_iteration = HDA_FW_BOOT_ATTEMPTS; ret = cl_copy_fw(sdev, stream); - if (!ret) + if (!ret) { dev_dbg(sdev->dev, "Firmware download successful, booting...\n"); - else - snd_sof_dsp_dbg_dump(sdev, "Firmware download failed", - SOF_DBG_DUMP_PCI | SOF_DBG_DUMP_MBOX); + } else { + hda_dsp_dump(sdev, SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_PCI | SOF_DBG_DUMP_MBOX | + SOF_DBG_DUMP_FORCE_ERR_LEVEL); + dev_err(sdev->dev, "error: load fw failed ret: %d\n", ret); + } cleanup: /* @@ -487,6 +475,46 @@ int hda_dsp_post_fw_run(struct snd_sof_dev *sdev) return hda_dsp_ctrl_clock_power_gating(sdev, true); } +/* + * post fw run operations for ICL, + * Core 3 will be powered up and in stall when HPRO is enabled + */ +int hda_dsp_post_fw_run_icl(struct snd_sof_dev *sdev) +{ + struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; + int ret; + + if (sdev->first_boot) { + ret = hda_sdw_startup(sdev); + if (ret < 0) { + dev_err(sdev->dev, + "error: could not startup SoundWire links\n"); + return ret; + } + } + + hda_sdw_int_enable(sdev, true); + + /* + * The recommended HW programming sequence for ICL is to + * power up core 3 and keep it in stall if HPRO is enabled. + * Major difference between ICL and TGL, on ICL core 3 is managed by + * the host whereas on TGL it is handled by the firmware. + */ + if (!hda->clk_config_lpro) { + ret = snd_sof_dsp_core_power_up(sdev, BIT(3)); + if (ret < 0) { + dev_err(sdev->dev, "error: dsp core power up failed on core 3\n"); + return ret; + } + + snd_sof_dsp_stall(sdev, BIT(3)); + } + + /* re-enable clock gating and power gating */ + return hda_dsp_ctrl_clock_power_gating(sdev, true); +} + int hda_dsp_ext_man_get_cavs_config_data(struct snd_sof_dev *sdev, const struct sof_ext_man_elem_header *hdr) { @@ -524,3 +552,24 @@ int hda_dsp_ext_man_get_cavs_config_data(struct snd_sof_dev *sdev, return 0; } + +int hda_dsp_core_stall_icl(struct snd_sof_dev *sdev, unsigned int core_mask) +{ + struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; + const struct sof_intel_dsp_desc *chip = hda->desc; + + /* make sure core_mask in host managed cores */ + core_mask &= chip->host_managed_cores_mask; + if (!core_mask) { + dev_err(sdev->dev, "error: core_mask is not in host managed cores\n"); + return -EINVAL; + } + + /* stall core */ + snd_sof_dsp_update_bits_unlocked(sdev, HDA_DSP_BAR, + HDA_DSP_REG_ADSPCS, + HDA_DSP_ADSPCS_CSTALL_MASK(core_mask), + HDA_DSP_ADSPCS_CSTALL_MASK(core_mask)); + + return 0; +} diff --git a/sound/soc/sof/intel/hda-pcm.c b/sound/soc/sof/intel/hda-pcm.c index d78aa5d855..cc8ddef37f 100644 --- a/sound/soc/sof/intel/hda-pcm.c +++ b/sound/soc/sof/intel/hda-pcm.c @@ -32,10 +32,6 @@ static bool hda_always_enable_dmi_l1; module_param_named(always_enable_dmi_l1, hda_always_enable_dmi_l1, bool, 0444); MODULE_PARM_DESC(always_enable_dmi_l1, "SOF HDA always enable DMI l1"); -static bool hda_disable_rewinds = IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_DISABLE_REWINDS); -module_param_named(disable_rewinds, hda_disable_rewinds, bool, 0444); -MODULE_PARM_DESC(disable_rewinds, "SOF HDA disable rewinds"); - u32 hda_dsp_get_mult_div(struct snd_sof_dev *sdev, int rate) { switch (rate) { @@ -124,11 +120,8 @@ int hda_dsp_pcm_hw_params(struct snd_sof_dev *sdev, return ret; } - /* enable SPIB when rewinds are disabled */ - if (hda_disable_rewinds) - hda_dsp_stream_spib_config(sdev, stream, HDA_DSP_SPIB_ENABLE, 0); - else - hda_dsp_stream_spib_config(sdev, stream, HDA_DSP_SPIB_DISABLE, 0); + /* disable SPIB, to enable buffer wrap for stream */ + hda_dsp_stream_spib_config(sdev, stream, HDA_DSP_SPIB_DISABLE, 0); /* update no_stream_position flag for ipc params */ if (hda && hda->no_ipc_position) { @@ -147,29 +140,6 @@ int hda_dsp_pcm_hw_params(struct snd_sof_dev *sdev, return 0; } -/* update SPIB register with appl position */ -int hda_dsp_pcm_ack(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream) -{ - struct hdac_stream *hstream = substream->runtime->private_data; - struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream); - struct snd_pcm_runtime *runtime = substream->runtime; - ssize_t appl_pos, buf_size; - u32 spib; - - appl_pos = frames_to_bytes(runtime, runtime->control->appl_ptr); - buf_size = frames_to_bytes(runtime, runtime->buffer_size); - - spib = appl_pos % buf_size; - - /* Allowable value for SPIB is 1 byte to max buffer size */ - if (!spib) - spib = buf_size; - - sof_io_write(sdev, hext_stream->spib_addr, spib); - - return 0; -} - int hda_dsp_pcm_trigger(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream, int cmd) { @@ -202,74 +172,38 @@ snd_pcm_uframes_t hda_dsp_pcm_pointer(struct snd_sof_dev *sdev, goto found; } - switch (sof_hda_position_quirk) { - case SOF_HDA_POSITION_QUIRK_USE_SKYLAKE_LEGACY: - /* - * This legacy code, inherited from the Skylake driver, - * mixes DPIB registers and DPIB DDR updates and - * does not seem to follow any known hardware recommendations. - * It's not clear e.g. why there is a different flow - * for capture and playback, the only information that matters is - * what traffic class is used, and on all SOF-enabled platforms - * only VC0 is supported so the work-around was likely not necessary - * and quite possibly wrong. - */ - - /* DPIB/posbuf position mode: - * For Playback, Use DPIB register from HDA space which - * reflects the actual data transferred. - * For Capture, Use the position buffer for pointer, as DPIB - * is not accurate enough, its update may be completed - * earlier than the data written to DDR. - */ - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - pos = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, - AZX_REG_VS_SDXDPIB_XBASE + - (AZX_REG_VS_SDXDPIB_XINTERVAL * - hstream->index)); - } else { - /* - * For capture stream, we need more workaround to fix the - * position incorrect issue: - * - * 1. Wait at least 20us before reading position buffer after - * the interrupt generated(IOC), to make sure position update - * happens on frame boundary i.e. 20.833uSec for 48KHz. - * 2. Perform a dummy Read to DPIB register to flush DMA - * position value. - * 3. Read the DMA Position from posbuf. Now the readback - * value should be >= period boundary. - */ - usleep_range(20, 21); - snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, - AZX_REG_VS_SDXDPIB_XBASE + - (AZX_REG_VS_SDXDPIB_XINTERVAL * - hstream->index)); - pos = snd_hdac_stream_get_pos_posbuf(hstream); - } - break; - case SOF_HDA_POSITION_QUIRK_USE_DPIB_REGISTERS: - /* - * In case VC1 traffic is disabled this is the recommended option - */ + /* + * DPIB/posbuf position mode: + * For Playback, Use DPIB register from HDA space which + * reflects the actual data transferred. + * For Capture, Use the position buffer for pointer, as DPIB + * is not accurate enough, its update may be completed + * earlier than the data written to DDR. + */ + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { pos = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, AZX_REG_VS_SDXDPIB_XBASE + (AZX_REG_VS_SDXDPIB_XINTERVAL * hstream->index)); - break; - case SOF_HDA_POSITION_QUIRK_USE_DPIB_DDR_UPDATE: + } else { /* - * This is the recommended option when VC1 is enabled. - * While this isn't needed for SOF platforms it's added for - * consistency and debug. + * For capture stream, we need more workaround to fix the + * position incorrect issue: + * + * 1. Wait at least 20us before reading position buffer after + * the interrupt generated(IOC), to make sure position update + * happens on frame boundary i.e. 20.833uSec for 48KHz. + * 2. Perform a dummy Read to DPIB register to flush DMA + * position value. + * 3. Read the DMA Position from posbuf. Now the readback + * value should be >= period boundary. */ + usleep_range(20, 21); + snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, + AZX_REG_VS_SDXDPIB_XBASE + + (AZX_REG_VS_SDXDPIB_XINTERVAL * + hstream->index)); pos = snd_hdac_stream_get_pos_posbuf(hstream); - break; - default: - dev_err_once(sdev->dev, "hda_position_quirk value %d not supported\n", - sof_hda_position_quirk); - pos = 0; - break; } if (pos >= hstream->bufsize) @@ -300,13 +234,6 @@ int hda_dsp_pcm_open(struct snd_sof_dev *sdev, return -EINVAL; } - /* - * if we want the .ack to work, we need to prevent the control from being mapped. - * The status can still be mapped. - */ - if (hda_disable_rewinds) - runtime->hw.info |= SNDRV_PCM_INFO_NO_REWINDS | SNDRV_PCM_INFO_SYNC_APPLPTR; - /* * All playback streams are DMI L1 capable, capture streams need * pause push/release to be disabled diff --git a/sound/soc/sof/intel/hda-stream.c b/sound/soc/sof/intel/hda-stream.c index ba60807fbd..63c367478f 100644 --- a/sound/soc/sof/intel/hda-stream.c +++ b/sound/soc/sof/intel/hda-stream.c @@ -25,33 +25,6 @@ #define HDA_LTRP_GB_VALUE_US 95 -static inline const char *hda_hstream_direction_str(struct hdac_stream *hstream) -{ - if (hstream->direction == SNDRV_PCM_STREAM_PLAYBACK) - return "Playback"; - else - return "Capture"; -} - -static char *hda_hstream_dbg_get_stream_info_str(struct hdac_stream *hstream) -{ - struct snd_soc_pcm_runtime *rtd; - - if (hstream->substream) - rtd = asoc_substream_to_rtd(hstream->substream); - else if (hstream->cstream) - rtd = hstream->cstream->private_data; - else - /* Non audio DMA user, like dma-trace */ - return kasprintf(GFP_KERNEL, "-- (%s, stream_tag: %u)", - hda_hstream_direction_str(hstream), - hstream->stream_tag); - - return kasprintf(GFP_KERNEL, "dai_link \"%s\" (%s, stream_tag: %u)", - rtd->dai_link->name, hda_hstream_direction_str(hstream), - hstream->stream_tag); -} - /* * set up one of BDL entries for a stream */ @@ -116,13 +89,13 @@ int hda_dsp_stream_setup_bdl(struct snd_sof_dev *sdev, int remain, ioc; period_bytes = stream->period_bytes; - dev_dbg(sdev->dev, "%s: period_bytes:0x%x\n", __func__, period_bytes); + dev_dbg(sdev->dev, "period_bytes:0x%x\n", period_bytes); if (!period_bytes) period_bytes = stream->bufsize; periods = stream->bufsize / period_bytes; - dev_dbg(sdev->dev, "%s: periods:%d\n", __func__, periods); + dev_dbg(sdev->dev, "periods:%d\n", periods); remain = stream->bufsize % period_bytes; if (remain) @@ -271,64 +244,25 @@ int hda_dsp_stream_put(struct snd_sof_dev *sdev, int direction, int stream_tag) HDA_VS_INTEL_EM2_L1SEN, HDA_VS_INTEL_EM2_L1SEN); if (!found) { - dev_dbg(sdev->dev, "%s: stream_tag %d not opened!\n", - __func__, stream_tag); + dev_dbg(sdev->dev, "stream_tag %d not opened!\n", stream_tag); return -ENODEV; } return 0; } -static int hda_dsp_stream_reset(struct snd_sof_dev *sdev, struct hdac_stream *hstream) -{ - int sd_offset = SOF_STREAM_SD_OFFSET(hstream); - int timeout = HDA_DSP_STREAM_RESET_TIMEOUT; - u32 val; - - /* enter stream reset */ - snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, sd_offset, SOF_STREAM_SD_OFFSET_CRST, - SOF_STREAM_SD_OFFSET_CRST); - do { - val = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, sd_offset); - if (val & SOF_STREAM_SD_OFFSET_CRST) - break; - } while (--timeout); - if (timeout == 0) { - dev_err(sdev->dev, "timeout waiting for stream reset\n"); - return -ETIMEDOUT; - } - - timeout = HDA_DSP_STREAM_RESET_TIMEOUT; - - /* exit stream reset and wait to read a zero before reading any other register */ - snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, sd_offset, SOF_STREAM_SD_OFFSET_CRST, 0x0); - - /* wait for hardware to report that stream is out of reset */ - udelay(3); - do { - val = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, sd_offset); - if ((val & SOF_STREAM_SD_OFFSET_CRST) == 0) - break; - } while (--timeout); - if (timeout == 0) { - dev_err(sdev->dev, "timeout waiting for stream to exit reset\n"); - return -ETIMEDOUT; - } - - return 0; -} - int hda_dsp_stream_trigger(struct snd_sof_dev *sdev, struct hdac_ext_stream *stream, int cmd) { struct hdac_stream *hstream = &stream->hstream; int sd_offset = SOF_STREAM_SD_OFFSET(hstream); u32 dma_start = SOF_HDA_SD_CTL_DMA_START; - int ret = 0; + int ret; u32 run; /* cmd must be for audio stream */ switch (cmd) { + case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: case SNDRV_PCM_TRIGGER_START: snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, SOF_HDA_INTCTL, @@ -349,9 +283,14 @@ int hda_dsp_stream_trigger(struct snd_sof_dev *sdev, HDA_DSP_REG_POLL_INTERVAL_US, HDA_DSP_STREAM_RUN_TIMEOUT); - if (ret >= 0) - hstream->running = true; + if (ret < 0) { + dev_err(sdev->dev, + "error: %s: cmd %d: timeout on STREAM_SD_OFFSET read\n", + __func__, cmd); + return ret; + } + hstream->running = true; break; case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: @@ -367,32 +306,27 @@ int hda_dsp_stream_trigger(struct snd_sof_dev *sdev, HDA_DSP_REG_POLL_INTERVAL_US, HDA_DSP_STREAM_RUN_TIMEOUT); - if (ret >= 0) { - snd_sof_dsp_write(sdev, HDA_DSP_HDA_BAR, - sd_offset + SOF_HDA_ADSP_REG_CL_SD_STS, - SOF_HDA_CL_DMA_SD_INT_MASK); - - hstream->running = false; - snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, - SOF_HDA_INTCTL, - 1 << hstream->index, 0x0); + if (ret < 0) { + dev_err(sdev->dev, + "error: %s: cmd %d: timeout on STREAM_SD_OFFSET read\n", + __func__, cmd); + return ret; } + + snd_sof_dsp_write(sdev, HDA_DSP_HDA_BAR, sd_offset + + SOF_HDA_ADSP_REG_CL_SD_STS, + SOF_HDA_CL_DMA_SD_INT_MASK); + + hstream->running = false; + snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, SOF_HDA_INTCTL, + 1 << hstream->index, 0x0); break; default: dev_err(sdev->dev, "error: unknown command: %d\n", cmd); return -EINVAL; } - if (ret < 0) { - char *stream_name = hda_hstream_dbg_get_stream_info_str(hstream); - - dev_err(sdev->dev, - "%s: cmd %d on %s: timeout on STREAM_SD_OFFSET read\n", - __func__, cmd, stream_name ? stream_name : "unknown stream"); - kfree(stream_name); - } - - return ret; + return 0; } /* minimal recommended programming for ICCMAX stream */ @@ -471,13 +405,12 @@ int hda_dsp_stream_hw_params(struct snd_sof_dev *sdev, struct snd_dma_buffer *dmab, struct snd_pcm_hw_params *params) { - const struct sof_intel_dsp_desc *chip = get_chip_info(sdev->pdata); struct hdac_bus *bus = sof_to_bus(sdev); struct hdac_stream *hstream = &stream->hstream; int sd_offset = SOF_STREAM_SD_OFFSET(hstream); - int ret; + int ret, timeout = HDA_DSP_STREAM_RESET_TIMEOUT; u32 dma_start = SOF_HDA_SD_CTL_DMA_START; - u32 mask; + u32 val, mask; u32 run; if (!stream) { @@ -507,12 +440,9 @@ int hda_dsp_stream_hw_params(struct snd_sof_dev *sdev, HDA_DSP_STREAM_RUN_TIMEOUT); if (ret < 0) { - char *stream_name = hda_hstream_dbg_get_stream_info_str(hstream); - dev_err(sdev->dev, - "%s: on %s: timeout on STREAM_SD_OFFSET read1\n", - __func__, stream_name ? stream_name : "unknown stream"); - kfree(stream_name); + "error: %s: timeout on STREAM_SD_OFFSET read1\n", + __func__); return ret; } @@ -522,9 +452,36 @@ int hda_dsp_stream_hw_params(struct snd_sof_dev *sdev, SOF_HDA_CL_DMA_SD_INT_MASK); /* stream reset */ - ret = hda_dsp_stream_reset(sdev, hstream); - if (ret < 0) - return ret; + snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, sd_offset, 0x1, + 0x1); + udelay(3); + do { + val = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, + sd_offset); + if (val & 0x1) + break; + } while (--timeout); + if (timeout == 0) { + dev_err(sdev->dev, "error: stream reset failed\n"); + return -ETIMEDOUT; + } + + timeout = HDA_DSP_STREAM_RESET_TIMEOUT; + snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, sd_offset, 0x1, + 0x0); + + /* wait for hardware to report that stream is out of reset */ + udelay(3); + do { + val = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, + sd_offset); + if ((val & 0x1) == 0) + break; + } while (--timeout); + if (timeout == 0) { + dev_err(sdev->dev, "error: timeout waiting for stream reset\n"); + return -ETIMEDOUT; + } if (hstream->posbuf) *hstream->posbuf = 0; @@ -549,12 +506,9 @@ int hda_dsp_stream_hw_params(struct snd_sof_dev *sdev, HDA_DSP_STREAM_RUN_TIMEOUT); if (ret < 0) { - char *stream_name = hda_hstream_dbg_get_stream_info_str(hstream); - dev_err(sdev->dev, - "%s: on %s: timeout on STREAM_SD_OFFSET read1\n", - __func__, stream_name ? stream_name : "unknown stream"); - kfree(stream_name); + "error: %s: timeout on STREAM_SD_OFFSET read2\n", + __func__); return ret; } @@ -584,7 +538,6 @@ int hda_dsp_stream_hw_params(struct snd_sof_dev *sdev, /* * Recommended hardware programming sequence for HDAudio DMA format - * on earlier platforms - this is not needed on newer platforms * * 1. Put DMA into coupled mode by clearing PPCTL.PROCEN bit * for corresponding stream index before the time of writing @@ -594,11 +547,9 @@ int hda_dsp_stream_hw_params(struct snd_sof_dev *sdev, * enable decoupled mode */ - if (chip->quirks & SOF_INTEL_PROCEN_FMT_QUIRK) { - /* couple host and link DMA, disable DSP features */ - snd_sof_dsp_update_bits(sdev, HDA_DSP_PP_BAR, SOF_HDA_REG_PP_PPCTL, - mask, 0); - } + /* couple host and link DMA, disable DSP features */ + snd_sof_dsp_update_bits(sdev, HDA_DSP_PP_BAR, SOF_HDA_REG_PP_PPCTL, + mask, 0); /* program stream format */ snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, @@ -606,11 +557,9 @@ int hda_dsp_stream_hw_params(struct snd_sof_dev *sdev, SOF_HDA_ADSP_REG_CL_SD_FORMAT, 0xffff, hstream->format_val); - if (chip->quirks & SOF_INTEL_PROCEN_FMT_QUIRK) { - /* decouple host and link DMA, enable DSP features */ - snd_sof_dsp_update_bits(sdev, HDA_DSP_PP_BAR, SOF_HDA_REG_PP_PPCTL, - mask, mask); - } + /* decouple host and link DMA, enable DSP features */ + snd_sof_dsp_update_bits(sdev, HDA_DSP_PP_BAR, SOF_HDA_REG_PP_PPCTL, + mask, mask); /* program last valid index */ snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, @@ -625,10 +574,9 @@ int hda_dsp_stream_hw_params(struct snd_sof_dev *sdev, sd_offset + SOF_HDA_ADSP_REG_CL_SD_BDLPU, upper_32_bits(hstream->bdl.addr)); - /* enable position buffer, if needed */ - if (bus->use_posbuf && bus->posbuf.addr && - !(snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, SOF_HDA_ADSP_DPLBASE) - & SOF_HDA_ADSP_DPLBASE_ENABLE)) { + /* enable position buffer */ + if (!(snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, SOF_HDA_ADSP_DPLBASE) + & SOF_HDA_ADSP_DPLBASE_ENABLE)) { snd_sof_dsp_write(sdev, HDA_DSP_HDA_BAR, SOF_HDA_ADSP_DPUBASE, upper_32_bits(bus->posbuf.addr)); snd_sof_dsp_write(sdev, HDA_DSP_HDA_BAR, SOF_HDA_ADSP_DPLBASE, @@ -665,11 +613,6 @@ int hda_dsp_stream_hw_free(struct snd_sof_dev *sdev, hstream); struct hdac_bus *bus = sof_to_bus(sdev); u32 mask = 0x1 << stream->index; - int ret; - - ret = hda_dsp_stream_reset(sdev, stream); - if (ret < 0) - return ret; spin_lock_irq(&bus->reg_lock); /* couple host and link DMA if link DMA channel is idle */ @@ -678,8 +621,6 @@ int hda_dsp_stream_hw_free(struct snd_sof_dev *sdev, SOF_HDA_REG_PP_PPCTL, mask, 0); spin_unlock_irq(&bus->reg_lock); - hda_dsp_stream_spib_config(sdev, link_dev, HDA_DSP_SPIB_DISABLE, 0); - stream->substream = NULL; return 0; diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 1385695d77..ef92cca7ae 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -41,100 +41,15 @@ #define EXCEPT_MAX_HDR_SIZE 0x400 #define HDA_EXT_ROM_STATUS_SIZE 8 -int hda_ctrl_dai_widget_setup(struct snd_soc_dapm_widget *w, unsigned int quirk_flags) +static const struct sof_intel_dsp_desc + *get_chip_info(struct snd_sof_pdata *pdata) { - struct snd_sof_widget *swidget = w->dobj.private; - struct snd_soc_component *component = swidget->scomp; - struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component); - struct sof_ipc_dai_config *config; - struct snd_sof_dai *sof_dai; - struct sof_ipc_reply reply; - int ret; + const struct sof_dev_desc *desc = pdata->desc; + const struct sof_intel_dsp_desc *chip_info; - sof_dai = swidget->private; + chip_info = desc->chip_info; - if (!sof_dai || !sof_dai->dai_config) { - dev_err(sdev->dev, "No config for DAI %s\n", w->name); - return -EINVAL; - } - - /* DAI already configured, reset it before reconfiguring it */ - if (sof_dai->configured) { - ret = hda_ctrl_dai_widget_free(w, SOF_DAI_CONFIG_FLAGS_NONE); - if (ret < 0) - return ret; - } - - config = &sof_dai->dai_config[sof_dai->current_config]; - - /* - * For static pipelines, the DAI widget would already be set up and calling - * sof_widget_setup() simply returns without doing anything. - * For dynamic pipelines, the DAI widget will be set up now. - */ - ret = sof_widget_setup(sdev, swidget); - if (ret < 0) { - dev_err(sdev->dev, "error: failed setting up DAI widget %s\n", w->name); - return ret; - } - - /* set HW_PARAMS flag along with quirks */ - config->flags = SOF_DAI_CONFIG_FLAGS_HW_PARAMS | - quirk_flags << SOF_DAI_CONFIG_FLAGS_QUIRK_SHIFT; - - - /* send DAI_CONFIG IPC */ - ret = sof_ipc_tx_message(sdev->ipc, config->hdr.cmd, config, config->hdr.size, - &reply, sizeof(reply)); - if (ret < 0) { - dev_err(sdev->dev, "error: failed setting DAI config for %s\n", w->name); - return ret; - } - - sof_dai->configured = true; - - return 0; -} - -int hda_ctrl_dai_widget_free(struct snd_soc_dapm_widget *w, unsigned int quirk_flags) -{ - struct snd_sof_widget *swidget = w->dobj.private; - struct snd_soc_component *component = swidget->scomp; - struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component); - struct sof_ipc_dai_config *config; - struct snd_sof_dai *sof_dai; - struct sof_ipc_reply reply; - int ret; - - sof_dai = swidget->private; - - if (!sof_dai || !sof_dai->dai_config) { - dev_err(sdev->dev, "error: No config to free DAI %s\n", w->name); - return -EINVAL; - } - - /* nothing to do if hw_free() is called without restarting the stream after resume. */ - if (!sof_dai->configured) - return 0; - - config = &sof_dai->dai_config[sof_dai->current_config]; - - /* set HW_FREE flag along with any quirks */ - config->flags = SOF_DAI_CONFIG_FLAGS_HW_FREE | - quirk_flags << SOF_DAI_CONFIG_FLAGS_QUIRK_SHIFT; - - ret = sof_ipc_tx_message(sdev->ipc, config->hdr.cmd, config, config->hdr.size, - &reply, sizeof(reply)); - if (ret < 0) - dev_err(sdev->dev, "error: failed resetting DAI config for %s\n", w->name); - - /* - * Reset the configured_flag and free the widget even if the IPC fails to keep - * the widget use_count balanced - */ - sof_dai->configured = false; - - return sof_widget_free(sdev, swidget); + return chip_info; } #if IS_ENABLED(CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE) @@ -149,49 +64,36 @@ static int sdw_clock_stop_quirks = SDW_INTEL_CLK_STOP_BUS_RESET; module_param(sdw_clock_stop_quirks, int, 0444); MODULE_PARM_DESC(sdw_clock_stop_quirks, "SOF SoundWire clock stop quirks"); -static int sdw_dai_config_ipc(struct snd_sof_dev *sdev, - struct snd_soc_dapm_widget *w, - int link_id, int alh_stream_id, int dai_id, bool setup) -{ - struct snd_sof_widget *swidget = w->dobj.private; - struct sof_ipc_dai_config *config; - struct snd_sof_dai *sof_dai; - - if (!swidget) { - dev_err(sdev->dev, "error: No private data for widget %s\n", w->name); - return -EINVAL; - } - - sof_dai = swidget->private; - - if (!sof_dai || !sof_dai->dai_config) { - dev_err(sdev->dev, "error: No config for DAI %s\n", w->name); - return -EINVAL; - } - - config = &sof_dai->dai_config[sof_dai->current_config]; - - /* update config with link and stream ID */ - config->dai_index = (link_id << 8) | dai_id; - config->alh.stream_id = alh_stream_id; - - if (setup) - return hda_ctrl_dai_widget_setup(w, SOF_DAI_CONFIG_FLAGS_NONE); - - return hda_ctrl_dai_widget_free(w, SOF_DAI_CONFIG_FLAGS_NONE); -} - static int sdw_params_stream(struct device *dev, struct sdw_intel_stream_params_data *params_data) { struct snd_sof_dev *sdev = dev_get_drvdata(dev); struct snd_soc_dai *d = params_data->dai; - struct snd_soc_dapm_widget *w; + struct sof_ipc_dai_config config; + struct sof_ipc_reply reply; + int link_id = params_data->link_id; + int alh_stream_id = params_data->alh_stream_id; + int ret; + u32 size = sizeof(config); - w = snd_soc_dai_get_widget(d, params_data->stream); + memset(&config, 0, size); + config.hdr.size = size; + config.hdr.cmd = SOF_IPC_GLB_DAI_MSG | SOF_IPC_DAI_CONFIG; + config.type = SOF_DAI_INTEL_ALH; + config.dai_index = (link_id << 8) | (d->id); + config.alh.stream_id = alh_stream_id; - return sdw_dai_config_ipc(sdev, w, params_data->link_id, params_data->alh_stream_id, - d->id, true); + /* send message to DSP */ + ret = sof_ipc_tx_message(sdev->ipc, + config.hdr.cmd, &config, size, &reply, + sizeof(reply)); + if (ret < 0) { + dev_err(sdev->dev, + "error: failed to set DAI hw_params for link %d dai->id %d ALH %d\n", + link_id, d->id, alh_stream_id); + } + + return ret; } static int sdw_free_stream(struct device *dev, @@ -199,12 +101,30 @@ static int sdw_free_stream(struct device *dev, { struct snd_sof_dev *sdev = dev_get_drvdata(dev); struct snd_soc_dai *d = free_data->dai; - struct snd_soc_dapm_widget *w; + struct sof_ipc_dai_config config; + struct sof_ipc_reply reply; + int link_id = free_data->link_id; + int ret; + u32 size = sizeof(config); - w = snd_soc_dai_get_widget(d, free_data->stream); + memset(&config, 0, size); + config.hdr.size = size; + config.hdr.cmd = SOF_IPC_GLB_DAI_MSG | SOF_IPC_DAI_CONFIG; + config.type = SOF_DAI_INTEL_ALH; + config.dai_index = (link_id << 8) | d->id; + config.alh.stream_id = 0xFFFF; /* invalid value on purpose */ - /* send invalid stream_id */ - return sdw_dai_config_ipc(sdev, w, free_data->link_id, 0xFFFF, d->id, false); + /* send message to DSP */ + ret = sof_ipc_tx_message(sdev->ipc, + config.hdr.cmd, &config, size, &reply, + sizeof(reply)); + if (ret < 0) { + dev_err(sdev->dev, + "error: failed to free stream for link %d dai->id %d\n", + link_id, d->id); + } + + return ret; } static const struct sdw_intel_ops sdw_callback = { @@ -374,38 +294,7 @@ void hda_sdw_process_wakeen(struct snd_sof_dev *sdev) sdw_intel_process_wakeen_event(hdev->sdw); } -#else /* IS_ENABLED(CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE) */ -static inline int hda_sdw_acpi_scan(struct snd_sof_dev *sdev) -{ - return 0; -} - -static inline int hda_sdw_probe(struct snd_sof_dev *sdev) -{ - return 0; -} - -static inline int hda_sdw_exit(struct snd_sof_dev *sdev) -{ - return 0; -} - -static inline bool hda_dsp_check_sdw_irq(struct snd_sof_dev *sdev) -{ - return false; -} - -static inline irqreturn_t hda_dsp_sdw_thread(int irq, void *context) -{ - return IRQ_HANDLED; -} - -static inline bool hda_sdw_check_wakeen_irq(struct snd_sof_dev *sdev) -{ - return false; -} - -#endif /* IS_ENABLED(CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE) */ +#endif /* * Debug @@ -424,10 +313,6 @@ MODULE_PARM_DESC(use_msi, "SOF HDA use PCI MSI mode"); #define hda_use_msi (1) #endif -int sof_hda_position_quirk = SOF_HDA_POSITION_QUIRK_USE_DPIB_REGISTERS; -module_param_named(position_quirk, sof_hda_position_quirk, int, 0444); -MODULE_PARM_DESC(position_quirk, "SOF HDaudio position quirk"); - static char *hda_model; module_param(hda_model, charp, 0444); MODULE_PARM_DESC(hda_model, "Use the given HDA board model."); @@ -466,7 +351,7 @@ static const struct hda_dsp_msg_code hda_dsp_rom_msg[] = { {HDA_DSP_ROM_NULL_FW_ENTRY, "error: null FW entry point"}, }; -static void hda_dsp_get_status(struct snd_sof_dev *sdev, const char *level) +static void hda_dsp_get_status(struct snd_sof_dev *sdev) { u32 status; int i; @@ -476,8 +361,8 @@ static void hda_dsp_get_status(struct snd_sof_dev *sdev, const char *level) for (i = 0; i < ARRAY_SIZE(hda_dsp_rom_msg); i++) { if (status == hda_dsp_rom_msg[i].code) { - dev_printk(level, sdev->dev, "%s - code %8.8x\n", - hda_dsp_rom_msg[i].msg, status); + dev_err(sdev->dev, "%s - code %8.8x\n", + hda_dsp_rom_msg[i].msg, status); return; } } @@ -515,8 +400,7 @@ static void hda_dsp_get_registers(struct snd_sof_dev *sdev, } /* dump the first 8 dwords representing the extended ROM status */ -static void hda_dsp_dump_ext_rom_status(struct snd_sof_dev *sdev, const char *level, - u32 flags) +static void hda_dsp_dump_ext_rom_status(struct snd_sof_dev *sdev, u32 flags) { char msg[128]; int len = 0; @@ -528,30 +412,31 @@ static void hda_dsp_dump_ext_rom_status(struct snd_sof_dev *sdev, const char *le len += snprintf(msg + len, sizeof(msg) - len, " 0x%x", value); } - dev_printk(level, sdev->dev, "extended rom status: %s", msg); + sof_dev_dbg_or_err(sdev->dev, flags & SOF_DBG_DUMP_FORCE_ERR_LEVEL, + "extended rom status: %s", msg); } void hda_dsp_dump(struct snd_sof_dev *sdev, u32 flags) { - char *level = flags & SOF_DBG_DUMP_OPTIONAL ? KERN_DEBUG : KERN_ERR; struct sof_ipc_dsp_oops_xtensa xoops; struct sof_ipc_panic_info panic_info; u32 stack[HDA_DSP_STACK_DUMP_SIZE]; /* print ROM/FW status */ - hda_dsp_get_status(sdev, level); + hda_dsp_get_status(sdev); - if (flags & SOF_DBG_DUMP_REGS) { + /* print panic info if FW boot is complete. Otherwise, print the extended ROM status */ + if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) { u32 status = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_DSP_SRAM_REG_FW_STATUS); u32 panic = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_DSP_SRAM_REG_FW_TRACEP); hda_dsp_get_registers(sdev, &xoops, &panic_info, stack, HDA_DSP_STACK_DUMP_SIZE); - sof_print_oops_and_stack(sdev, level, status, panic, &xoops, - &panic_info, stack, HDA_DSP_STACK_DUMP_SIZE); + snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, + stack, HDA_DSP_STACK_DUMP_SIZE); } else { - hda_dsp_dump_ext_rom_status(sdev, level, flags); + hda_dsp_dump_ext_rom_status(sdev, flags); } } @@ -571,9 +456,12 @@ void hda_ipc_irq_dump(struct snd_sof_dev *sdev) ppsts = snd_sof_dsp_read(sdev, HDA_DSP_PP_BAR, SOF_HDA_REG_PP_PPSTS); rirbsts = snd_hdac_chip_readb(bus, RIRBSTS); - dev_err(sdev->dev, "hda irq intsts 0x%8.8x intlctl 0x%8.8x rirb %2.2x\n", + dev_err(sdev->dev, + "error: hda irq intsts 0x%8.8x intlctl 0x%8.8x rirb %2.2x\n", intsts, intctl, rirbsts); - dev_err(sdev->dev, "dsp irq ppsts 0x%8.8x adspis 0x%8.8x\n", ppsts, adspis); + dev_err(sdev->dev, + "error: dsp irq ppsts 0x%8.8x adspis 0x%8.8x\n", + ppsts, adspis); } void hda_ipc_dump(struct snd_sof_dev *sdev) @@ -591,7 +479,8 @@ void hda_ipc_dump(struct snd_sof_dev *sdev) /* dump the IPC regs */ /* TODO: parse the raw msg */ - dev_err(sdev->dev, "host status 0x%8.8x dsp status 0x%8.8x mask 0x%8.8x\n", + dev_err(sdev->dev, + "error: host status 0x%8.8x dsp status 0x%8.8x mask 0x%8.8x\n", hipcie, hipct, hipcctl); } @@ -608,10 +497,7 @@ static int hda_init(struct snd_sof_dev *sdev) /* HDA bus init */ sof_hda_bus_init(bus, &pci->dev); - if (sof_hda_position_quirk == SOF_HDA_POSITION_QUIRK_USE_DPIB_REGISTERS) - bus->use_posbuf = 0; - else - bus->use_posbuf = 1; + bus->use_posbuf = 1; bus->bdl_pos_adj = 0; bus->sync_write = 1; @@ -908,8 +794,6 @@ int hda_dsp_probe(struct snd_sof_dev *sdev) goto err; } - sdev->num_cores = chip->cores_num; - hdev = devm_kzalloc(sdev->dev, sizeof(*hdev), GFP_KERNEL); if (!hdev) return -ENOMEM; @@ -956,7 +840,6 @@ int hda_dsp_probe(struct snd_sof_dev *sdev) dev_dbg(sdev->dev, "DMA mask is 32 bit\n"); dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32)); } - dma_set_max_seg_size(&pci->dev, UINT_MAX); /* init streams */ ret = hda_dsp_stream_init(sdev); @@ -1046,9 +929,9 @@ int hda_dsp_probe(struct snd_sof_dev *sdev) int hda_dsp_remove(struct snd_sof_dev *sdev) { struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; - const struct sof_intel_dsp_desc *chip = hda->desc; struct hdac_bus *bus = sof_to_bus(sdev); struct pci_dev *pci = to_pci_dev(sdev->dev); + const struct sof_intel_dsp_desc *chip = hda->desc; /* cancel any attempt for DSP D0I3 */ cancel_delayed_work_sync(&hda->d0i3_work); @@ -1073,7 +956,7 @@ int hda_dsp_remove(struct snd_sof_dev *sdev) /* disable cores */ if (chip) - hda_dsp_core_reset_power_down(sdev, chip->host_managed_cores_mask); + snd_sof_dsp_core_power_down(sdev, chip->host_managed_cores_mask); /* disable DSP */ snd_sof_dsp_update_bits(sdev, HDA_DSP_PP_BAR, SOF_HDA_REG_PP_PPCTL, @@ -1100,8 +983,7 @@ int hda_dsp_remove(struct snd_sof_dev *sdev) } #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) -static void hda_generic_machine_select(struct snd_sof_dev *sdev, - struct snd_soc_acpi_mach **mach) +static int hda_generic_machine_select(struct snd_sof_dev *sdev) { struct hdac_bus *bus = sof_to_bus(sdev); struct snd_soc_acpi_mach_params *mach_params; @@ -1133,7 +1015,7 @@ static void hda_generic_machine_select(struct snd_sof_dev *sdev, * - one HDMI codec, and/or * - one external HDAudio codec */ - if (!*mach && codec_num <= 2) { + if (!pdata->machine && codec_num <= 2) { hda_mach = snd_soc_acpi_intel_hda_machines; dev_info(bus->dev, "using HDA machine driver %s now\n", @@ -1148,9 +1030,10 @@ static void hda_generic_machine_select(struct snd_sof_dev *sdev, tplg_filename = hda_mach->sof_tplg_filename; ret = dmic_topology_fixup(sdev, &tplg_filename, idisp_str, &dmic_num); if (ret < 0) - return; + return ret; hda_mach->mach_params.dmic_num = dmic_num; + pdata->machine = hda_mach; pdata->tplg_filename = tplg_filename; if (codec_num == 2) { @@ -1160,22 +1043,23 @@ static void hda_generic_machine_select(struct snd_sof_dev *sdev, */ hda_mach->mach_params.link_mask = 0; } - - *mach = hda_mach; } } /* used by hda machine driver to create dai links */ - if (*mach) { - mach_params = &(*mach)->mach_params; + if (pdata->machine) { + mach_params = (struct snd_soc_acpi_mach_params *) + &pdata->machine->mach_params; mach_params->codec_mask = bus->codec_mask; mach_params->common_hdmi_codec_drv = hda_codec_use_common_hdmi; } + + return 0; } #else -static void hda_generic_machine_select(struct snd_sof_dev *sdev, - struct snd_soc_acpi_mach **mach) +static int hda_generic_machine_select(struct snd_sof_dev *sdev) { + return 0; } #endif @@ -1258,7 +1142,7 @@ static bool link_slaves_found(struct snd_sof_dev *sdev, return true; } -static struct snd_soc_acpi_mach *hda_sdw_machine_select(struct snd_sof_dev *sdev) +static int hda_sdw_machine_select(struct snd_sof_dev *sdev) { struct snd_sof_pdata *pdata = sdev->pdata; const struct snd_soc_acpi_link_adr *link; @@ -1276,7 +1160,7 @@ static struct snd_soc_acpi_mach *hda_sdw_machine_select(struct snd_sof_dev *sdev * machines, for mixed cases with I2C/I2S the detection relies * on the HID list. */ - if (link_mask) { + if (link_mask && !pdata->machine) { for (mach = pdata->desc->alt_machines; mach && mach->link_mask; mach++) { /* @@ -1311,6 +1195,7 @@ static struct snd_soc_acpi_mach *hda_sdw_machine_select(struct snd_sof_dev *sdev if (mach && mach->link_mask) { int dmic_num = 0; + pdata->machine = mach; mach->mach_params.links = mach->links; mach->mach_params.link_mask = mach->link_mask; mach->mach_params.platform = dev_name(sdev->dev); @@ -1332,8 +1217,9 @@ static struct snd_soc_acpi_mach *hda_sdw_machine_select(struct snd_sof_dev *sdev int ret; ret = dmic_topology_fixup(sdev, &tplg_filename, "", &dmic_num); + if (ret < 0) - return NULL; + return ret; pdata->tplg_filename = tplg_filename; } @@ -1343,36 +1229,35 @@ static struct snd_soc_acpi_mach *hda_sdw_machine_select(struct snd_sof_dev *sdev "SoundWire machine driver %s topology %s\n", mach->drv_name, pdata->tplg_filename); - - return mach; + } else { + dev_info(sdev->dev, + "No SoundWire machine driver found\n"); } - - dev_info(sdev->dev, "No SoundWire machine driver found\n"); } - return NULL; + return 0; } #else -static struct snd_soc_acpi_mach *hda_sdw_machine_select(struct snd_sof_dev *sdev) +static int hda_sdw_machine_select(struct snd_sof_dev *sdev) { - return NULL; + return 0; } #endif -void hda_set_mach_params(struct snd_soc_acpi_mach *mach, +void hda_set_mach_params(const struct snd_soc_acpi_mach *mach, struct snd_sof_dev *sdev) { struct snd_sof_pdata *pdata = sdev->pdata; const struct sof_dev_desc *desc = pdata->desc; struct snd_soc_acpi_mach_params *mach_params; - mach_params = &mach->mach_params; + mach_params = (struct snd_soc_acpi_mach_params *)&mach->mach_params; mach_params->platform = dev_name(sdev->dev); mach_params->num_dai_drivers = desc->ops->num_drv; mach_params->dai_drivers = desc->ops->drv; } -struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) +void hda_machine_select(struct snd_sof_dev *sdev) { struct snd_sof_pdata *sof_pdata = sdev->pdata; const struct sof_dev_desc *desc = sof_pdata->desc; @@ -1387,6 +1272,8 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) if (!sof_pdata->tplg_filename) sof_pdata->tplg_filename = mach->sof_tplg_filename; + sof_pdata->machine = mach; + if (mach->link_mask) { mach->mach_params.links = mach->links; mach->mach_params.link_mask = mach->link_mask; @@ -1396,18 +1283,16 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) /* * If I2S fails, try SoundWire */ - if (!mach) - mach = hda_sdw_machine_select(sdev); + hda_sdw_machine_select(sdev); /* * Choose HDA generic machine driver if mach is NULL. * Otherwise, set certain mach params. */ - hda_generic_machine_select(sdev, &mach); - if (!mach) - dev_warn(sdev->dev, "warning: No matching ASoC machine driver found\n"); + hda_generic_machine_select(sdev); - return mach; + if (!sof_pdata->machine) + dev_warn(sdev->dev, "warning: No matching ASoC machine driver found\n"); } int hda_pci_intel_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h index 03a6bb7a16..4fdfb10864 100644 --- a/sound/soc/sof/intel/hda.h +++ b/sound/soc/sof/intel/hda.h @@ -273,7 +273,7 @@ #define BXT_D0I3_DELAY 5000 #define FW_CL_STREAM_NUMBER 0x1 -#define HDA_FW_BOOT_ATTEMPTS 3 +#define HDA_FW_BOOT_ATTEMPTS 3 /* ADSPCS - Audio DSP Control & Status */ @@ -487,18 +487,24 @@ struct sof_intel_hda_stream { (SOF_HDA_ADSP_SD_ENTRY_SIZE * ((s)->index) \ + SOF_HDA_ADSP_LOADER_BASE) -#define SOF_STREAM_SD_OFFSET_CRST 0x1 - /* * DSP Core services. */ int hda_dsp_probe(struct snd_sof_dev *sdev); int hda_dsp_remove(struct snd_sof_dev *sdev); +int hda_dsp_core_reset_enter(struct snd_sof_dev *sdev, + unsigned int core_mask); +int hda_dsp_core_reset_leave(struct snd_sof_dev *sdev, + unsigned int core_mask); +int hda_dsp_core_stall_reset(struct snd_sof_dev *sdev, unsigned int core_mask); int hda_dsp_core_run(struct snd_sof_dev *sdev, unsigned int core_mask); +int hda_dsp_core_power_up(struct snd_sof_dev *sdev, unsigned int core_mask); int hda_dsp_enable_core(struct snd_sof_dev *sdev, unsigned int core_mask); +int hda_dsp_core_power_down(struct snd_sof_dev *sdev, unsigned int core_mask); +bool hda_dsp_core_is_enabled(struct snd_sof_dev *sdev, + unsigned int core_mask); int hda_dsp_core_reset_power_down(struct snd_sof_dev *sdev, unsigned int core_mask); -int hda_dsp_core_get(struct snd_sof_dev *sdev, int core); void hda_dsp_ipc_int_enable(struct snd_sof_dev *sdev); void hda_dsp_ipc_int_disable(struct snd_sof_dev *sdev); @@ -536,7 +542,6 @@ int hda_dsp_pcm_trigger(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream, int cmd); snd_pcm_uframes_t hda_dsp_pcm_pointer(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream); -int hda_dsp_pcm_ack(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream); /* * DSP Stream Operations. @@ -567,9 +572,9 @@ int hda_dsp_stream_spib_config(struct snd_sof_dev *sdev, struct hdac_ext_stream *stream, int enable, u32 size); -int hda_ipc_msg_data(struct snd_sof_dev *sdev, - struct snd_pcm_substream *substream, - void *p, size_t sz); +void hda_ipc_msg_data(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream, + void *p, size_t sz); int hda_ipc_pcm_params(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream, const struct sof_ipc_pcm_params_reply *reply); @@ -614,10 +619,14 @@ int hda_dsp_ipc_cmd_done(struct snd_sof_dev *sdev, int dir); */ int hda_dsp_cl_boot_firmware(struct snd_sof_dev *sdev); int hda_dsp_cl_boot_firmware_iccmax(struct snd_sof_dev *sdev); +int hda_dsp_cl_boot_firmware_iccmax_icl(struct snd_sof_dev *sdev); +int hda_dsp_cl_boot_firmware_skl(struct snd_sof_dev *sdev); /* pre and post fw run ops */ int hda_dsp_pre_fw_run(struct snd_sof_dev *sdev); int hda_dsp_post_fw_run(struct snd_sof_dev *sdev); +int hda_dsp_post_fw_run_icl(struct snd_sof_dev *sdev); +int hda_dsp_core_stall_icl(struct snd_sof_dev *sdev, unsigned int core_mask); /* parse platform specific ext manifest ops */ int hda_dsp_ext_man_get_cavs_config_data(struct snd_sof_dev *sdev, @@ -686,15 +695,45 @@ bool hda_common_check_sdw_irq(struct snd_sof_dev *sdev); #else +static inline int hda_sdw_acpi_scan(struct snd_sof_dev *sdev) +{ + return 0; +} + +static inline int hda_sdw_probe(struct snd_sof_dev *sdev) +{ + return 0; +} + static inline int hda_sdw_startup(struct snd_sof_dev *sdev) { return 0; } +static inline int hda_sdw_exit(struct snd_sof_dev *sdev) +{ + return 0; +} + static inline void hda_sdw_int_enable(struct snd_sof_dev *sdev, bool enable) { } +static inline bool hda_dsp_check_sdw_irq(struct snd_sof_dev *sdev) +{ + return false; +} + +static inline irqreturn_t hda_dsp_sdw_thread(int irq, void *context) +{ + return IRQ_HANDLED; +} + +static inline bool hda_sdw_check_wakeen_irq(struct snd_sof_dev *sdev) +{ + return false; +} + static inline void hda_sdw_process_wakeen(struct snd_sof_dev *sdev) { } @@ -728,22 +767,11 @@ extern const struct sof_intel_dsp_desc jsl_chip_info; extern const struct sof_intel_dsp_desc adls_chip_info; /* machine driver select */ -struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev); -void hda_set_mach_params(struct snd_soc_acpi_mach *mach, +void hda_machine_select(struct snd_sof_dev *sdev); +void hda_set_mach_params(const struct snd_soc_acpi_mach *mach, struct snd_sof_dev *sdev); /* PCI driver selection and probe */ int hda_pci_intel_probe(struct pci_dev *pci, const struct pci_device_id *pci_id); -struct snd_sof_dai; -struct sof_ipc_dai_config; -int hda_ctrl_dai_widget_setup(struct snd_soc_dapm_widget *w, unsigned int quirk_flags); -int hda_ctrl_dai_widget_free(struct snd_soc_dapm_widget *w, unsigned int quirk_flags); - -#define SOF_HDA_POSITION_QUIRK_USE_SKYLAKE_LEGACY (0) /* previous implementation */ -#define SOF_HDA_POSITION_QUIRK_USE_DPIB_REGISTERS (1) /* recommended if VC0 only */ -#define SOF_HDA_POSITION_QUIRK_USE_DPIB_DDR_UPDATE (2) /* recommended with VC0 or VC1 */ - -extern int sof_hda_position_quirk; - #endif diff --git a/sound/soc/sof/intel/icl.c b/sound/soc/sof/intel/icl.c index f75e398396..ee095b8f2d 100644 --- a/sound/soc/sof/intel/icl.c +++ b/sound/soc/sof/intel/icl.c @@ -18,75 +18,12 @@ #include "hda-ipc.h" #include "../sof-audio.h" -#define ICL_DSP_HPRO_CORE_ID 3 - static const struct snd_sof_debugfs_map icl_dsp_debugfs[] = { {"hda", HDA_DSP_HDA_BAR, 0, 0x4000, SOF_DEBUGFS_ACCESS_ALWAYS}, {"pp", HDA_DSP_PP_BAR, 0, 0x1000, SOF_DEBUGFS_ACCESS_ALWAYS}, {"dsp", HDA_DSP_BAR, 0, 0x10000, SOF_DEBUGFS_ACCESS_ALWAYS}, }; -static int icl_dsp_core_stall(struct snd_sof_dev *sdev, unsigned int core_mask) -{ - struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; - const struct sof_intel_dsp_desc *chip = hda->desc; - - /* make sure core_mask in host managed cores */ - core_mask &= chip->host_managed_cores_mask; - if (!core_mask) { - dev_err(sdev->dev, "error: core_mask is not in host managed cores\n"); - return -EINVAL; - } - - /* stall core */ - snd_sof_dsp_update_bits_unlocked(sdev, HDA_DSP_BAR, HDA_DSP_REG_ADSPCS, - HDA_DSP_ADSPCS_CSTALL_MASK(core_mask), - HDA_DSP_ADSPCS_CSTALL_MASK(core_mask)); - - return 0; -} - -/* - * post fw run operation for ICL. - * Core 3 will be powered up and in stall when HPRO is enabled - */ -static int icl_dsp_post_fw_run(struct snd_sof_dev *sdev) -{ - struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; - int ret; - - if (sdev->first_boot) { - ret = hda_sdw_startup(sdev); - if (ret < 0) { - dev_err(sdev->dev, "error: could not startup SoundWire links\n"); - return ret; - } - } - - hda_sdw_int_enable(sdev, true); - - /* - * The recommended HW programming sequence for ICL is to - * power up core 3 and keep it in stall if HPRO is enabled. - */ - if (!hda->clk_config_lpro) { - ret = hda_dsp_enable_core(sdev, BIT(ICL_DSP_HPRO_CORE_ID)); - if (ret < 0) { - dev_err(sdev->dev, "error: dsp core power up failed on core %d\n", - ICL_DSP_HPRO_CORE_ID); - return ret; - } - - sdev->enabled_cores_mask |= BIT(ICL_DSP_HPRO_CORE_ID); - sdev->dsp_core_ref_count[ICL_DSP_HPRO_CORE_ID]++; - - snd_sof_dsp_stall(sdev, BIT(ICL_DSP_HPRO_CORE_ID)); - } - - /* re-enable clock gating and power gating */ - return hda_dsp_ctrl_clock_power_gating(sdev, true); -} - /* Icelake ops */ const struct snd_sof_dsp_ops sof_icl_ops = { /* probe/remove/shutdown */ @@ -104,10 +41,6 @@ const struct snd_sof_dsp_ops sof_icl_ops = { .block_read = sof_block_read, .block_write = sof_block_write, - /* Mailbox IO */ - .mailbox_read = sof_mailbox_read, - .mailbox_write = sof_mailbox_write, - /* doorbell */ .irq_thread = cnl_ipc_irq_thread, @@ -131,7 +64,6 @@ const struct snd_sof_dsp_ops sof_icl_ops = { .debug_map_count = ARRAY_SIZE(icl_dsp_debugfs), .dbg_dump = hda_dsp_dump, .ipc_dump = cnl_ipc_dump, - .debugfs_add_region_item = snd_sof_debugfs_add_region_item_iomem, /* stream callbacks */ .pcm_open = hda_dsp_pcm_open, @@ -140,7 +72,6 @@ const struct snd_sof_dsp_ops sof_icl_ops = { .pcm_hw_free = hda_dsp_stream_hw_free, .pcm_trigger = hda_dsp_pcm_trigger, .pcm_pointer = hda_dsp_pcm_pointer, - .pcm_ack = hda_dsp_pcm_ack, #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_PROBES) /* probe callbacks */ @@ -156,17 +87,18 @@ const struct snd_sof_dsp_ops sof_icl_ops = { /* pre/post fw run */ .pre_fw_run = hda_dsp_pre_fw_run, - .post_fw_run = icl_dsp_post_fw_run, + .post_fw_run = hda_dsp_post_fw_run_icl, /* parse platform specific extended manifest */ .parse_platform_ext_manifest = hda_dsp_ext_man_get_cavs_config_data, - /* dsp core get/put */ - .core_get = hda_dsp_core_get, + /* dsp core power up/down */ + .core_power_up = hda_dsp_enable_core, + .core_power_down = hda_dsp_core_reset_power_down, /* firmware run */ .run = hda_dsp_cl_boot_firmware_iccmax, - .stall = icl_dsp_core_stall, + .stall = hda_dsp_core_stall_icl, /* trace callback */ .trace_init = hda_dsp_trace_init, @@ -193,7 +125,7 @@ const struct snd_sof_dsp_ops sof_icl_ops = { SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_NO_PERIOD_WAKEUP, - .dsp_arch_ops = &sof_xtensa_arch_ops, + .arch_ops = &sof_xtensa_arch_ops, }; EXPORT_SYMBOL_NS(sof_icl_ops, SND_SOC_SOF_INTEL_HDA_COMMON); diff --git a/sound/soc/sof/intel/pci-apl.c b/sound/soc/sof/intel/pci-apl.c index a023b3cc0a..f89e746c25 100644 --- a/sound/soc/sof/intel/pci-apl.c +++ b/sound/soc/sof/intel/pci-apl.c @@ -26,6 +26,7 @@ static const struct sof_dev_desc bxt_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &apl_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", @@ -41,6 +42,7 @@ static const struct sof_dev_desc glk_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &apl_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", diff --git a/sound/soc/sof/intel/pci-cnl.c b/sound/soc/sof/intel/pci-cnl.c index 40cf1cd000..f23257adf2 100644 --- a/sound/soc/sof/intel/pci-cnl.c +++ b/sound/soc/sof/intel/pci-cnl.c @@ -27,6 +27,7 @@ static const struct sof_dev_desc cnl_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &cnl_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", @@ -43,6 +44,7 @@ static const struct sof_dev_desc cfl_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &cnl_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", @@ -59,6 +61,7 @@ static const struct sof_dev_desc cml_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &cnl_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", diff --git a/sound/soc/sof/intel/pci-icl.c b/sound/soc/sof/intel/pci-icl.c index 39c84121b3..2f60c28ae8 100644 --- a/sound/soc/sof/intel/pci-icl.c +++ b/sound/soc/sof/intel/pci-icl.c @@ -27,6 +27,7 @@ static const struct sof_dev_desc icl_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &icl_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", @@ -42,6 +43,7 @@ static const struct sof_dev_desc jsl_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &jsl_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index fd46210f17..beb2fb3cd0 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -27,6 +27,7 @@ static const struct sof_dev_desc tgl_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &tgl_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", @@ -43,6 +44,7 @@ static const struct sof_dev_desc tglh_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &tglh_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", @@ -58,6 +60,7 @@ static const struct sof_dev_desc ehl_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &ehl_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", @@ -74,6 +77,7 @@ static const struct sof_dev_desc adls_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &adls_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", @@ -90,6 +94,7 @@ static const struct sof_dev_desc adl_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = -1, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &tgl_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", diff --git a/sound/soc/sof/intel/pci-tng.c b/sound/soc/sof/intel/pci-tng.c index f8c841caa3..4bded668b6 100644 --- a/sound/soc/sof/intel/pci-tng.c +++ b/sound/soc/sof/intel/pci-tng.c @@ -55,18 +55,9 @@ static int tangier_pci_probe(struct snd_sof_dev *sdev) struct snd_sof_pdata *pdata = sdev->pdata; const struct sof_dev_desc *desc = pdata->desc; struct pci_dev *pci = to_pci_dev(sdev->dev); - const struct sof_intel_dsp_desc *chip; u32 base, size; int ret; - chip = get_chip_info(sdev->pdata); - if (!chip) { - dev_err(sdev->dev, "error: no such device supported\n"); - return -EIO; - } - - sdev->num_cores = chip->cores_num; - /* DSP DMA can only access low 31 bits of host memory */ ret = dma_coerce_mask_and_coherent(&pci->dev, DMA_BIT_MASK(31)); if (ret < 0) { @@ -151,10 +142,6 @@ const struct snd_sof_dsp_ops sof_tng_ops = { .block_read = sof_block_read, .block_write = sof_block_write, - /* Mailbox IO */ - .mailbox_read = sof_mailbox_read, - .mailbox_write = sof_mailbox_write, - /* doorbell */ .irq_handler = atom_irq_handler, .irq_thread = atom_irq_thread, @@ -165,8 +152,8 @@ const struct snd_sof_dsp_ops sof_tng_ops = { .get_mailbox_offset = atom_get_mailbox_offset, .get_window_offset = atom_get_window_offset, - .ipc_msg_data = sof_ipc_msg_data, - .ipc_pcm_params = sof_ipc_pcm_params, + .ipc_msg_data = intel_ipc_msg_data, + .ipc_pcm_params = intel_ipc_pcm_params, /* machine driver */ .machine_select = atom_machine_select, @@ -178,11 +165,10 @@ const struct snd_sof_dsp_ops sof_tng_ops = { .debug_map = tng_debugfs, .debug_map_count = ARRAY_SIZE(tng_debugfs), .dbg_dump = atom_dump, - .debugfs_add_region_item = snd_sof_debugfs_add_region_item_iomem, /* stream callbacks */ - .pcm_open = sof_stream_pcm_open, - .pcm_close = sof_stream_pcm_close, + .pcm_open = intel_pcm_open, + .pcm_close = intel_pcm_close, /* module loading */ .load_module = snd_sof_parse_module_memcpy, @@ -201,7 +187,7 @@ const struct snd_sof_dsp_ops sof_tng_ops = { SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_BATCH, - .dsp_arch_ops = &sof_xtensa_arch_ops, + .arch_ops = &sof_xtensa_arch_ops, }; const struct sof_intel_dsp_desc tng_chip_info = { @@ -215,6 +201,7 @@ static const struct sof_dev_desc tng_desc = { .resindex_pcicfg_base = -1, .resindex_imr_base = 0, .irqindex_host_ipc = -1, + .resindex_dma_base = -1, .chip_info = &tng_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", diff --git a/sound/soc/sof/intel/shim.h b/sound/soc/sof/intel/shim.h index f36cd9d5eb..e9f7d4d7fc 100644 --- a/sound/soc/sof/intel/shim.h +++ b/sound/soc/sof/intel/shim.h @@ -151,9 +151,6 @@ #define PCI_PMCS 0x84 #define PCI_PMCS_PS_MASK 0x3 -/* Intel quirks */ -#define SOF_INTEL_PROCEN_FMT_QUIRK BIT(0) - /* DSP hardware descriptor */ struct sof_intel_dsp_desc { int cores_num; @@ -169,7 +166,6 @@ struct sof_intel_dsp_desc { int ssp_base_offset; /* base address of the SSPs */ u32 sdw_shim_base; u32 sdw_alh_base; - u32 quirks; bool (*check_sdw_irq)(struct snd_sof_dev *sdev); }; @@ -181,11 +177,4 @@ struct sof_intel_stream { size_t posn_offset; }; -static inline const struct sof_intel_dsp_desc *get_chip_info(struct snd_sof_pdata *pdata) -{ - const struct sof_dev_desc *desc = pdata->desc; - - return desc->chip_info; -} - #endif diff --git a/sound/soc/sof/intel/tgl.c b/sound/soc/sof/intel/tgl.c index 7f7929c5cb..199d41a7dc 100644 --- a/sound/soc/sof/intel/tgl.c +++ b/sound/soc/sof/intel/tgl.c @@ -20,46 +20,6 @@ static const struct snd_sof_debugfs_map tgl_dsp_debugfs[] = { {"dsp", HDA_DSP_BAR, 0, 0x10000, SOF_DEBUGFS_ACCESS_ALWAYS}, }; -static int tgl_dsp_core_get(struct snd_sof_dev *sdev, int core) -{ - struct sof_ipc_pm_core_config pm_core_config = { - .hdr = { - .cmd = SOF_IPC_GLB_PM_MSG | SOF_IPC_PM_CORE_ENABLE, - .size = sizeof(pm_core_config), - }, - .enable_mask = sdev->enabled_cores_mask | BIT(core), - }; - - /* power up primary core if not already powered up and return */ - if (core == SOF_DSP_PRIMARY_CORE) - return hda_dsp_enable_core(sdev, BIT(core)); - - /* notify DSP for secondary cores */ - return sof_ipc_tx_message(sdev->ipc, pm_core_config.hdr.cmd, - &pm_core_config, sizeof(pm_core_config), - &pm_core_config, sizeof(pm_core_config)); -} - -static int tgl_dsp_core_put(struct snd_sof_dev *sdev, int core) -{ - struct sof_ipc_pm_core_config pm_core_config = { - .hdr = { - .cmd = SOF_IPC_GLB_PM_MSG | SOF_IPC_PM_CORE_ENABLE, - .size = sizeof(pm_core_config), - }, - .enable_mask = sdev->enabled_cores_mask & ~BIT(core), - }; - - /* power down primary core and return */ - if (core == SOF_DSP_PRIMARY_CORE) - return hda_dsp_core_reset_power_down(sdev, BIT(core)); - - /* notify DSP for secondary cores */ - return sof_ipc_tx_message(sdev->ipc, pm_core_config.hdr.cmd, - &pm_core_config, sizeof(pm_core_config), - &pm_core_config, sizeof(pm_core_config)); -} - /* Tigerlake ops */ const struct snd_sof_dsp_ops sof_tgl_ops = { /* probe/remove/shutdown */ @@ -77,10 +37,6 @@ const struct snd_sof_dsp_ops sof_tgl_ops = { .block_read = sof_block_read, .block_write = sof_block_write, - /* Mailbox IO */ - .mailbox_read = sof_mailbox_read, - .mailbox_write = sof_mailbox_write, - /* doorbell */ .irq_thread = cnl_ipc_irq_thread, @@ -104,7 +60,6 @@ const struct snd_sof_dsp_ops sof_tgl_ops = { .debug_map_count = ARRAY_SIZE(tgl_dsp_debugfs), .dbg_dump = hda_dsp_dump, .ipc_dump = cnl_ipc_dump, - .debugfs_add_region_item = snd_sof_debugfs_add_region_item_iomem, /* stream callbacks */ .pcm_open = hda_dsp_pcm_open, @@ -113,7 +68,6 @@ const struct snd_sof_dsp_ops sof_tgl_ops = { .pcm_hw_free = hda_dsp_stream_hw_free, .pcm_trigger = hda_dsp_pcm_trigger, .pcm_pointer = hda_dsp_pcm_pointer, - .pcm_ack = hda_dsp_pcm_ack, #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_PROBES) /* probe callbacks */ @@ -134,9 +88,9 @@ const struct snd_sof_dsp_ops sof_tgl_ops = { /* parse platform specific extended manifest */ .parse_platform_ext_manifest = hda_dsp_ext_man_get_cavs_config_data, - /* dsp core get/put */ - .core_get = tgl_dsp_core_get, - .core_put = tgl_dsp_core_put, + /* dsp core power up/down */ + .core_power_up = hda_dsp_enable_core, + .core_power_down = hda_dsp_core_reset_power_down, /* firmware run */ .run = hda_dsp_cl_boot_firmware_iccmax, @@ -166,7 +120,7 @@ const struct snd_sof_dsp_ops sof_tgl_ops = { SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_NO_PERIOD_WAKEUP, - .dsp_arch_ops = &sof_xtensa_arch_ops, + .arch_ops = &sof_xtensa_arch_ops, }; EXPORT_SYMBOL_NS(sof_tgl_ops, SND_SOC_SOF_INTEL_HDA_COMMON); diff --git a/sound/soc/sof/ipc.c b/sound/soc/sof/ipc.c index 5bcf906d90..c2d07b783f 100644 --- a/sound/soc/sof/ipc.c +++ b/sound/soc/sof/ipc.c @@ -18,7 +18,7 @@ #include "sof-audio.h" #include "ops.h" -static void ipc_trace_message(struct snd_sof_dev *sdev, u32 msg_type); +static void ipc_trace_message(struct snd_sof_dev *sdev, u32 msg_id); static void ipc_stream_message(struct snd_sof_dev *sdev, u32 msg_cmd); /* @@ -173,22 +173,7 @@ static void ipc_log_header(struct device *dev, u8 *text, u32 cmd) } break; case SOF_IPC_GLB_TRACE_MSG: - str = "GLB_TRACE_MSG"; - switch (type) { - case SOF_IPC_TRACE_DMA_PARAMS: - str2 = "DMA_PARAMS"; break; - case SOF_IPC_TRACE_DMA_POSITION: - str2 = "DMA_POSITION"; break; - case SOF_IPC_TRACE_DMA_PARAMS_EXT: - str2 = "DMA_PARAMS_EXT"; break; - case SOF_IPC_TRACE_FILTER_UPDATE: - str2 = "FILTER_UPDATE"; break; - case SOF_IPC_TRACE_DMA_FREE: - str2 = "DMA_FREE"; break; - default: - str2 = "unknown type"; break; - } - break; + str = "GLB_TRACE_MSG"; break; case SOF_IPC_GLB_TEST_MSG: str = "GLB_TEST_MSG"; switch (type) { @@ -207,29 +192,6 @@ static void ipc_log_header(struct device *dev, u8 *text, u32 cmd) str2 = "unknown type"; break; } break; - case SOF_IPC_GLB_PROBE: - str = "GLB_PROBE"; - switch (type) { - case SOF_IPC_PROBE_INIT: - str2 = "INIT"; break; - case SOF_IPC_PROBE_DEINIT: - str2 = "DEINIT"; break; - case SOF_IPC_PROBE_DMA_ADD: - str2 = "DMA_ADD"; break; - case SOF_IPC_PROBE_DMA_INFO: - str2 = "DMA_INFO"; break; - case SOF_IPC_PROBE_DMA_REMOVE: - str2 = "DMA_REMOVE"; break; - case SOF_IPC_PROBE_POINT_ADD: - str2 = "POINT_ADD"; break; - case SOF_IPC_PROBE_POINT_INFO: - str2 = "POINT_INFO"; break; - case SOF_IPC_PROBE_POINT_REMOVE: - str2 = "POINT_REMOVE"; break; - default: - str2 = "unknown type"; break; - } - break; default: str = "unknown GLB command"; break; } @@ -264,17 +226,15 @@ static int tx_wait_done(struct snd_sof_ipc *ipc, struct snd_sof_ipc_msg *msg, msecs_to_jiffies(sdev->ipc_timeout)); if (ret == 0) { - dev_err(sdev->dev, - "ipc tx timed out for %#x (msg/reply size: %d/%zu)\n", - hdr->cmd, hdr->size, msg->reply_size); + dev_err(sdev->dev, "error: ipc timed out for 0x%x size %d\n", + hdr->cmd, hdr->size); snd_sof_handle_fw_exception(ipc->sdev); ret = -ETIMEDOUT; } else { ret = msg->reply_error; if (ret < 0) { - dev_err(sdev->dev, - "ipc tx error for %#x (msg/reply size: %d/%zu): %d\n", - hdr->cmd, hdr->size, msg->reply_size, ret); + dev_err(sdev->dev, "error: ipc error for 0x%x size %zu\n", + hdr->cmd, msg->reply_size); } else { ipc_log_header(sdev->dev, "ipc tx succeeded", hdr->cmd); if (msg->reply_size) @@ -282,12 +242,6 @@ static int tx_wait_done(struct snd_sof_ipc *ipc, struct snd_sof_ipc_msg *msg, memcpy(reply_data, msg->reply_data, msg->reply_size); } - - /* re-enable dumps after successful IPC tx */ - if (sdev->ipc_dump_printed) { - sdev->dbg_dump_printed = false; - sdev->ipc_dump_printed = false; - } } return ret; @@ -302,7 +256,7 @@ static int sof_ipc_tx_message_unlocked(struct snd_sof_ipc *ipc, u32 header, struct snd_sof_ipc_msg *msg; int ret; - if (ipc->disable_ipc_tx || sdev->fw_state != SOF_FW_BOOT_COMPLETE) + if (ipc->disable_ipc_tx) return -ENODEV; /* @@ -332,7 +286,7 @@ static int sof_ipc_tx_message_unlocked(struct snd_sof_ipc *ipc, u32 header, spin_unlock_irq(&sdev->ipc_lock); - if (ret) { + if (ret < 0) { dev_err_ratelimited(sdev->dev, "error: ipc tx failed with error %d\n", ret); @@ -342,7 +296,10 @@ static int sof_ipc_tx_message_unlocked(struct snd_sof_ipc *ipc, u32 header, ipc_log_header(sdev->dev, "ipc tx", msg->header); /* now wait for completion */ - return tx_wait_done(ipc, msg, reply_data); + if (!ret) + ret = tx_wait_done(ipc, msg, reply_data); + + return ret; } /* send IPC message from host to DSP */ @@ -394,67 +351,6 @@ int sof_ipc_tx_message_no_pm(struct snd_sof_ipc *ipc, u32 header, } EXPORT_SYMBOL(sof_ipc_tx_message_no_pm); -/* Generic helper function to retrieve the reply */ -void snd_sof_ipc_get_reply(struct snd_sof_dev *sdev) -{ - struct snd_sof_ipc_msg *msg = sdev->msg; - struct sof_ipc_reply reply; - int ret = 0; - - /* - * Sometimes, there is unexpected reply ipc arriving. The reply - * ipc belongs to none of the ipcs sent from driver. - * In this case, the driver must ignore the ipc. - */ - if (!msg) { - dev_warn(sdev->dev, "unexpected ipc interrupt raised!\n"); - return; - } - - /* get the generic reply */ - snd_sof_dsp_mailbox_read(sdev, sdev->host_box.offset, &reply, - sizeof(reply)); - - if (reply.error < 0) { - memcpy(msg->reply_data, &reply, sizeof(reply)); - ret = reply.error; - } else if (!reply.hdr.size) { - /* Reply should always be >= sizeof(struct sof_ipc_reply) */ - if (msg->reply_size) - dev_err(sdev->dev, - "empty reply received, expected %zu bytes\n", - msg->reply_size); - else - dev_err(sdev->dev, "empty reply received\n"); - - ret = -EINVAL; - } else if (msg->reply_size > 0) { - if (reply.hdr.size == msg->reply_size) { - ret = 0; - } else if (reply.hdr.size < msg->reply_size) { - dev_dbg(sdev->dev, - "reply size (%u) is less than expected (%zu)\n", - reply.hdr.size, msg->reply_size); - - msg->reply_size = reply.hdr.size; - ret = 0; - } else { - dev_err(sdev->dev, - "reply size (%u) exceeds the buffer size (%zu)\n", - reply.hdr.size, msg->reply_size); - ret = -EINVAL; - } - - /* get the full message if reply.hdr.size <= msg->reply_size */ - if (!ret) - snd_sof_dsp_mailbox_read(sdev, sdev->host_box.offset, - msg->reply_data, msg->reply_size); - } - - msg->reply_error = ret; -} -EXPORT_SYMBOL(snd_sof_ipc_get_reply); - /* handle reply message from DSP */ void snd_sof_ipc_reply(struct snd_sof_dev *sdev, u32 msg_id) { @@ -473,52 +369,15 @@ void snd_sof_ipc_reply(struct snd_sof_dev *sdev, u32 msg_id) } EXPORT_SYMBOL(snd_sof_ipc_reply); -static void ipc_comp_notification(struct snd_sof_dev *sdev, - struct sof_ipc_cmd_hdr *hdr) -{ - u32 msg_type = hdr->cmd & SOF_CMD_TYPE_MASK; - struct sof_ipc_ctrl_data *cdata; - int ret; - - switch (msg_type) { - case SOF_IPC_COMP_GET_VALUE: - case SOF_IPC_COMP_GET_DATA: - cdata = kmalloc(hdr->size, GFP_KERNEL); - if (!cdata) - return; - - /* read back full message */ - ret = snd_sof_ipc_msg_data(sdev, NULL, cdata, hdr->size); - if (ret < 0) { - dev_err(sdev->dev, - "error: failed to read component event: %d\n", ret); - goto err; - } - break; - default: - dev_err(sdev->dev, "error: unhandled component message %#x\n", msg_type); - return; - } - - snd_sof_control_notify(sdev, cdata); - -err: - kfree(cdata); -} - /* DSP firmware has sent host a message */ void snd_sof_ipc_msgs_rx(struct snd_sof_dev *sdev) { struct sof_ipc_cmd_hdr hdr; u32 cmd, type; - int err; + int err = 0; /* read back header */ - err = snd_sof_ipc_msg_data(sdev, NULL, &hdr, sizeof(hdr)); - if (err < 0) { - dev_warn(sdev->dev, "failed to read IPC header: %d\n", err); - return; - } + snd_sof_ipc_msg_data(sdev, NULL, &hdr, sizeof(hdr)); ipc_log_header(sdev->dev, "ipc rx", hdr.cmd); cmd = hdr.cmd & SOF_GLB_TYPE_MASK; @@ -534,9 +393,9 @@ void snd_sof_ipc_msgs_rx(struct snd_sof_dev *sdev) if (sdev->fw_state == SOF_FW_BOOT_IN_PROGRESS) { err = sof_ops(sdev)->fw_ready(sdev, cmd); if (err < 0) - sof_set_fw_state(sdev, SOF_FW_BOOT_READY_FAILED); + sdev->fw_state = SOF_FW_BOOT_READY_FAILED; else - sof_set_fw_state(sdev, SOF_FW_BOOT_READY_OK); + sdev->fw_state = SOF_FW_BOOT_COMPLETE; /* wake up firmware loader */ wake_up(&sdev->boot_wait); @@ -545,9 +404,7 @@ void snd_sof_ipc_msgs_rx(struct snd_sof_dev *sdev) case SOF_IPC_GLB_COMPOUND: case SOF_IPC_GLB_TPLG_MSG: case SOF_IPC_GLB_PM_MSG: - break; case SOF_IPC_GLB_COMP_MSG: - ipc_comp_notification(sdev, &hdr); break; case SOF_IPC_GLB_STREAM_MSG: /* need to pass msg id into the function */ @@ -569,22 +426,19 @@ EXPORT_SYMBOL(snd_sof_ipc_msgs_rx); * IPC trace mechanism. */ -static void ipc_trace_message(struct snd_sof_dev *sdev, u32 msg_type) +static void ipc_trace_message(struct snd_sof_dev *sdev, u32 msg_id) { struct sof_ipc_dma_trace_posn posn; - int ret; - switch (msg_type) { + switch (msg_id) { case SOF_IPC_TRACE_DMA_POSITION: /* read back full message */ - ret = snd_sof_ipc_msg_data(sdev, NULL, &posn, sizeof(posn)); - if (ret < 0) - dev_warn(sdev->dev, "failed to read trace position: %d\n", ret); - else - snd_sof_trace_update_pos(sdev, &posn); + snd_sof_ipc_msg_data(sdev, NULL, &posn, sizeof(posn)); + snd_sof_trace_update_pos(sdev, &posn); break; default: - dev_err(sdev->dev, "error: unhandled trace message %#x\n", msg_type); + dev_err(sdev->dev, "error: unhandled trace message %x\n", + msg_id); break; } } @@ -599,7 +453,7 @@ static void ipc_period_elapsed(struct snd_sof_dev *sdev, u32 msg_id) struct snd_sof_pcm_stream *stream; struct sof_ipc_stream_posn posn; struct snd_sof_pcm *spcm; - int direction, ret; + int direction; spcm = snd_sof_find_spcm_comp(scomp, msg_id, &direction); if (!spcm) { @@ -610,22 +464,15 @@ static void ipc_period_elapsed(struct snd_sof_dev *sdev, u32 msg_id) } stream = &spcm->stream[direction]; - ret = snd_sof_ipc_msg_data(sdev, stream->substream, &posn, sizeof(posn)); - if (ret < 0) { - dev_warn(sdev->dev, "failed to read stream position: %d\n", ret); - return; - } + snd_sof_ipc_msg_data(sdev, stream->substream, &posn, sizeof(posn)); dev_vdbg(sdev->dev, "posn : host 0x%llx dai 0x%llx wall 0x%llx\n", posn.host_posn, posn.dai_posn, posn.wallclock); memcpy(&stream->posn, &posn, sizeof(posn)); - if (spcm->pcm.compress) - snd_sof_compr_fragment_elapsed(stream->cstream); - else if (stream->substream->runtime && - !stream->substream->runtime->no_period_wakeup) - /* only inform ALSA for period_wakeup mode */ + /* only inform ALSA for period_wakeup mode */ + if (!stream->substream->runtime->no_period_wakeup) snd_sof_pcm_period_elapsed(stream->substream); } @@ -636,7 +483,7 @@ static void ipc_xrun(struct snd_sof_dev *sdev, u32 msg_id) struct snd_sof_pcm_stream *stream; struct sof_ipc_stream_posn posn; struct snd_sof_pcm *spcm; - int direction, ret; + int direction; spcm = snd_sof_find_spcm_comp(scomp, msg_id, &direction); if (!spcm) { @@ -646,11 +493,7 @@ static void ipc_xrun(struct snd_sof_dev *sdev, u32 msg_id) } stream = &spcm->stream[direction]; - ret = snd_sof_ipc_msg_data(sdev, stream->substream, &posn, sizeof(posn)); - if (ret < 0) { - dev_warn(sdev->dev, "failed to read overrun position: %d\n", ret); - return; - } + snd_sof_ipc_msg_data(sdev, stream->substream, &posn, sizeof(posn)); dev_dbg(sdev->dev, "posn XRUN: host %llx comp %d size %d\n", posn.host_posn, posn.xrun_comp_id, posn.xrun_size); @@ -677,7 +520,7 @@ static void ipc_stream_message(struct snd_sof_dev *sdev, u32 msg_cmd) ipc_xrun(sdev, msg_id); break; default: - dev_err(sdev->dev, "error: unhandled stream message %#x\n", + dev_err(sdev->dev, "error: unhandled stream message %x\n", msg_id); break; } @@ -722,6 +565,11 @@ static int sof_get_ctrl_copy_params(enum sof_ipc_ctrl_type ctrl_type, sparams->src = (u8 *)src->chanv; sparams->dst = (u8 *)dst->chanv; break; + case SOF_CTRL_TYPE_VALUE_COMP_GET: + case SOF_CTRL_TYPE_VALUE_COMP_SET: + sparams->src = (u8 *)src->compv; + sparams->dst = (u8 *)dst->compv; + break; case SOF_CTRL_TYPE_DATA_GET: case SOF_CTRL_TYPE_DATA_SET: sparams->src = (u8 *)src->data->data; @@ -741,7 +589,7 @@ static int sof_get_ctrl_copy_params(enum sof_ipc_ctrl_type ctrl_type, static int sof_set_get_large_ctrl_data(struct snd_sof_dev *sdev, struct sof_ipc_ctrl_data *cdata, struct sof_ipc_ctrl_data_params *sparams, - bool set) + bool send) { struct sof_ipc_ctrl_data *partdata; size_t send_bytes; @@ -756,7 +604,7 @@ static int sof_set_get_large_ctrl_data(struct snd_sof_dev *sdev, if (!partdata) return -ENOMEM; - if (set) + if (send) err = sof_get_ctrl_copy_params(cdata->type, cdata, partdata, sparams); else @@ -785,7 +633,7 @@ static int sof_set_get_large_ctrl_data(struct snd_sof_dev *sdev, msg_bytes -= send_bytes; partdata->elems_remaining = msg_bytes; - if (set) + if (send) memcpy(sparams->dst, sparams->src + offset, send_bytes); err = sof_ipc_tx_message_unlocked(sdev->ipc, @@ -797,7 +645,7 @@ static int sof_set_get_large_ctrl_data(struct snd_sof_dev *sdev, if (err < 0) break; - if (!set) + if (!send) memcpy(sparams->dst + offset, sparams->src, send_bytes); offset += pl_size; @@ -812,7 +660,11 @@ static int sof_set_get_large_ctrl_data(struct snd_sof_dev *sdev, /* * IPC get()/set() for kcontrols. */ -int snd_sof_ipc_set_get_comp_data(struct snd_sof_control *scontrol, bool set) +int snd_sof_ipc_set_get_comp_data(struct snd_sof_control *scontrol, + u32 ipc_cmd, + enum sof_ipc_ctrl_type ctrl_type, + enum sof_ipc_ctrl_cmd ctrl_cmd, + bool send) { struct snd_soc_component *scomp = scontrol->scomp; struct sof_ipc_ctrl_data *cdata = scontrol->control_data; @@ -820,69 +672,28 @@ int snd_sof_ipc_set_get_comp_data(struct snd_sof_control *scontrol, bool set) struct sof_ipc_fw_ready *ready = &sdev->fw_ready; struct sof_ipc_fw_version *v = &ready->version; struct sof_ipc_ctrl_data_params sparams; - enum sof_ipc_ctrl_type ctrl_type; - struct snd_sof_widget *swidget; - bool widget_found = false; size_t send_bytes; - u32 ipc_cmd; int err; - list_for_each_entry(swidget, &sdev->widget_list, list) { - if (swidget->comp_id == scontrol->comp_id) { - widget_found = true; - break; - } - } - - if (!widget_found) { - dev_err(sdev->dev, "error: can't find widget with id %d\n", scontrol->comp_id); - return -EINVAL; - } - - /* - * Volatile controls should always be part of static pipelines and the widget use_count - * would always be > 0 in this case. For the others, just return the cached value if the - * widget is not set up. - */ - if (!swidget->use_count) - return 0; - /* read or write firmware volume */ if (scontrol->readback_offset != 0) { /* write/read value header via mmaped region */ send_bytes = sizeof(struct sof_ipc_ctrl_value_chan) * cdata->num_elems; - if (set) - err = snd_sof_dsp_block_write(sdev, SOF_FW_BLK_TYPE_IRAM, - scontrol->readback_offset, - cdata->chanv, send_bytes); + if (send) + snd_sof_dsp_block_write(sdev, sdev->mmio_bar, + scontrol->readback_offset, + cdata->chanv, send_bytes); else - err = snd_sof_dsp_block_read(sdev, SOF_FW_BLK_TYPE_IRAM, - scontrol->readback_offset, - cdata->chanv, send_bytes); - - if (err) - dev_err_once(sdev->dev, "error: %s TYPE_IRAM failed\n", - set ? "write to" : "read from"); - return err; - } - - /* - * Select the IPC cmd and the ctrl_type based on the ctrl_cmd and the - * direction - * Note: SOF_CTRL_TYPE_VALUE_COMP_* is not used and supported currently - * for ctrl_type - */ - if (cdata->cmd == SOF_CTRL_CMD_BINARY) { - ipc_cmd = set ? SOF_IPC_COMP_SET_DATA : SOF_IPC_COMP_GET_DATA; - ctrl_type = set ? SOF_CTRL_TYPE_DATA_SET : SOF_CTRL_TYPE_DATA_GET; - } else { - ipc_cmd = set ? SOF_IPC_COMP_SET_VALUE : SOF_IPC_COMP_GET_VALUE; - ctrl_type = set ? SOF_CTRL_TYPE_VALUE_CHAN_SET : SOF_CTRL_TYPE_VALUE_CHAN_GET; + snd_sof_dsp_block_read(sdev, sdev->mmio_bar, + scontrol->readback_offset, + cdata->chanv, send_bytes); + return 0; } cdata->rhdr.hdr.cmd = SOF_IPC_GLB_COMP_MSG | ipc_cmd; + cdata->cmd = ctrl_cmd; cdata->type = ctrl_type; cdata->comp_id = scontrol->comp_id; cdata->msg_index = 0; @@ -896,6 +707,13 @@ int snd_sof_ipc_set_get_comp_data(struct snd_sof_control *scontrol, bool set) sparams.hdr_bytes = sizeof(struct sof_ipc_ctrl_data); sparams.elems = scontrol->num_channels; break; + case SOF_CTRL_TYPE_VALUE_COMP_GET: + case SOF_CTRL_TYPE_VALUE_COMP_SET: + sparams.msg_bytes = scontrol->num_channels * + sizeof(struct sof_ipc_ctrl_value_comp); + sparams.hdr_bytes = sizeof(struct sof_ipc_ctrl_data); + sparams.elems = scontrol->num_channels; + break; case SOF_CTRL_TYPE_DATA_GET: case SOF_CTRL_TYPE_DATA_SET: sparams.msg_bytes = cdata->data->size; @@ -934,7 +752,7 @@ int snd_sof_ipc_set_get_comp_data(struct snd_sof_control *scontrol, bool set) return -EINVAL; } - err = sof_set_get_large_ctrl_data(sdev, cdata, &sparams, set); + err = sof_set_get_large_ctrl_data(sdev, cdata, &sparams, send); if (err < 0) dev_err(sdev->dev, "error: set/get large ctrl ipc comp %d\n", @@ -944,6 +762,22 @@ int snd_sof_ipc_set_get_comp_data(struct snd_sof_control *scontrol, bool set) } EXPORT_SYMBOL(snd_sof_ipc_set_get_comp_data); +/* + * IPC layer enumeration. + */ + +int snd_sof_dsp_mailbox_init(struct snd_sof_dev *sdev, u32 dspbox, + size_t dspbox_size, u32 hostbox, + size_t hostbox_size) +{ + sdev->dsp_box.offset = dspbox; + sdev->dsp_box.size = dspbox_size; + sdev->host_box.offset = hostbox; + sdev->host_box.size = hostbox_size; + return 0; +} +EXPORT_SYMBOL(snd_sof_dsp_mailbox_init); + int snd_sof_ipc_valid(struct snd_sof_dev *sdev) { struct sof_ipc_fw_ready *ready = &sdev->fw_ready; @@ -995,22 +829,6 @@ int snd_sof_ipc_valid(struct snd_sof_dev *sdev) } EXPORT_SYMBOL(snd_sof_ipc_valid); -int sof_ipc_init_msg_memory(struct snd_sof_dev *sdev) -{ - struct snd_sof_ipc_msg *msg; - - msg = &sdev->ipc->msg; - msg->msg_data = devm_kzalloc(sdev->dev, SOF_IPC_MSG_MAX_SIZE, GFP_KERNEL); - if (!msg->msg_data) - return -ENOMEM; - - msg->reply_data = devm_kzalloc(sdev->dev, SOF_IPC_MSG_MAX_SIZE, GFP_KERNEL); - if (!msg->reply_data) - return -ENOMEM; - - return 0; -} - struct snd_sof_ipc *snd_sof_ipc_init(struct snd_sof_dev *sdev) { struct snd_sof_ipc *ipc; @@ -1027,6 +845,17 @@ struct snd_sof_ipc *snd_sof_ipc_init(struct snd_sof_dev *sdev) /* indicate that we aren't sending a message ATM */ msg->ipc_complete = true; + /* pre-allocate message data */ + msg->msg_data = devm_kzalloc(sdev->dev, SOF_IPC_MSG_MAX_SIZE, + GFP_KERNEL); + if (!msg->msg_data) + return NULL; + + msg->reply_data = devm_kzalloc(sdev->dev, SOF_IPC_MSG_MAX_SIZE, + GFP_KERNEL); + if (!msg->reply_data) + return NULL; + init_waitqueue_head(&msg->waitq); return ipc; diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c index 697f03565a..bb79c77775 100644 --- a/sound/soc/sof/loader.c +++ b/sound/soc/sof/loader.c @@ -13,7 +13,6 @@ #include #include #include -#include "sof-priv.h" #include "ops.h" static int get_ext_windows(struct snd_sof_dev *sdev, @@ -87,7 +86,7 @@ static int get_cc_info(struct snd_sof_dev *sdev, } /* parse the extended FW boot data structures from FW boot message */ -static int snd_sof_fw_parse_ext_data(struct snd_sof_dev *sdev, u32 offset) +int snd_sof_fw_parse_ext_data(struct snd_sof_dev *sdev, u32 bar, u32 offset) { struct sof_ipc_ext_data_hdr *ext_hdr; void *ext_data; @@ -98,16 +97,15 @@ static int snd_sof_fw_parse_ext_data(struct snd_sof_dev *sdev, u32 offset) return -ENOMEM; /* get first header */ - snd_sof_dsp_block_read(sdev, SOF_FW_BLK_TYPE_SRAM, offset, ext_data, + snd_sof_dsp_block_read(sdev, bar, offset, ext_data, sizeof(*ext_hdr)); ext_hdr = ext_data; while (ext_hdr->hdr.cmd == SOF_IPC_FW_READY) { /* read in ext structure */ - snd_sof_dsp_block_read(sdev, SOF_FW_BLK_TYPE_SRAM, - offset + sizeof(*ext_hdr), - (void *)((u8 *)ext_data + sizeof(*ext_hdr)), - ext_hdr->hdr.size - sizeof(*ext_hdr)); + snd_sof_dsp_block_read(sdev, bar, offset + sizeof(*ext_hdr), + (void *)((u8 *)ext_data + sizeof(*ext_hdr)), + ext_hdr->hdr.size - sizeof(*ext_hdr)); dev_dbg(sdev->dev, "found ext header type %d size 0x%x\n", ext_hdr->type, ext_hdr->hdr.size); @@ -140,7 +138,7 @@ static int snd_sof_fw_parse_ext_data(struct snd_sof_dev *sdev, u32 offset) /* move to next header */ offset += ext_hdr->hdr.size; - snd_sof_dsp_block_read(sdev, SOF_FW_BLK_TYPE_SRAM, offset, ext_data, + snd_sof_dsp_block_read(sdev, bar, offset, ext_data, sizeof(*ext_hdr)); ext_hdr = ext_data; } @@ -148,6 +146,7 @@ static int snd_sof_fw_parse_ext_data(struct snd_sof_dev *sdev, u32 offset) kfree(ext_data); return ret; } +EXPORT_SYMBOL(snd_sof_fw_parse_ext_data); static int ext_man_get_fw_version(struct snd_sof_dev *sdev, const struct sof_ext_man_elem_header *hdr) @@ -373,6 +372,7 @@ static void sof_get_windows(struct snd_sof_dev *sdev) u32 debug_size = 0; u32 debug_offset = 0; int window_offset; + int bar; int i; if (!sdev->info_window) { @@ -380,6 +380,12 @@ static void sof_get_windows(struct snd_sof_dev *sdev) return; } + bar = snd_sof_dsp_get_bar_index(sdev, SOF_FW_BLK_TYPE_SRAM); + if (bar < 0) { + dev_err(sdev->dev, "error: have no bar mapping\n"); + return; + } + for (i = 0; i < sdev->info_window->num_windows; i++) { elem = &sdev->info_window->window[i]; @@ -394,53 +400,64 @@ static void sof_get_windows(struct snd_sof_dev *sdev) case SOF_IPC_REGION_UPBOX: inbox_offset = window_offset + elem->offset; inbox_size = elem->size; - snd_sof_debugfs_add_region_item(sdev, SOF_FW_BLK_TYPE_SRAM, - inbox_offset, - elem->size, "inbox", - SOF_DEBUGFS_ACCESS_D0_ONLY); + snd_sof_debugfs_io_item(sdev, + sdev->bar[bar] + + inbox_offset, + elem->size, "inbox", + SOF_DEBUGFS_ACCESS_D0_ONLY); break; case SOF_IPC_REGION_DOWNBOX: outbox_offset = window_offset + elem->offset; outbox_size = elem->size; - snd_sof_debugfs_add_region_item(sdev, SOF_FW_BLK_TYPE_SRAM, - outbox_offset, - elem->size, "outbox", - SOF_DEBUGFS_ACCESS_D0_ONLY); + snd_sof_debugfs_io_item(sdev, + sdev->bar[bar] + + outbox_offset, + elem->size, "outbox", + SOF_DEBUGFS_ACCESS_D0_ONLY); break; case SOF_IPC_REGION_TRACE: - snd_sof_debugfs_add_region_item(sdev, SOF_FW_BLK_TYPE_SRAM, - window_offset + elem->offset, - elem->size, "etrace", - SOF_DEBUGFS_ACCESS_D0_ONLY); + snd_sof_debugfs_io_item(sdev, + sdev->bar[bar] + + window_offset + + elem->offset, + elem->size, "etrace", + SOF_DEBUGFS_ACCESS_D0_ONLY); break; case SOF_IPC_REGION_DEBUG: debug_offset = window_offset + elem->offset; debug_size = elem->size; - snd_sof_debugfs_add_region_item(sdev, SOF_FW_BLK_TYPE_SRAM, - window_offset + elem->offset, - elem->size, "debug", - SOF_DEBUGFS_ACCESS_D0_ONLY); + snd_sof_debugfs_io_item(sdev, + sdev->bar[bar] + + window_offset + + elem->offset, + elem->size, "debug", + SOF_DEBUGFS_ACCESS_D0_ONLY); break; case SOF_IPC_REGION_STREAM: stream_offset = window_offset + elem->offset; stream_size = elem->size; - snd_sof_debugfs_add_region_item(sdev, SOF_FW_BLK_TYPE_SRAM, - stream_offset, - elem->size, "stream", - SOF_DEBUGFS_ACCESS_D0_ONLY); + snd_sof_debugfs_io_item(sdev, + sdev->bar[bar] + + stream_offset, + elem->size, "stream", + SOF_DEBUGFS_ACCESS_D0_ONLY); break; case SOF_IPC_REGION_REGS: - snd_sof_debugfs_add_region_item(sdev, SOF_FW_BLK_TYPE_SRAM, - window_offset + elem->offset, - elem->size, "regs", - SOF_DEBUGFS_ACCESS_D0_ONLY); + snd_sof_debugfs_io_item(sdev, + sdev->bar[bar] + + window_offset + + elem->offset, + elem->size, "regs", + SOF_DEBUGFS_ACCESS_D0_ONLY); break; case SOF_IPC_REGION_EXCEPTION: sdev->dsp_oops_offset = window_offset + elem->offset; - snd_sof_debugfs_add_region_item(sdev, SOF_FW_BLK_TYPE_SRAM, - window_offset + elem->offset, - elem->size, "exception", - SOF_DEBUGFS_ACCESS_D0_ONLY); + snd_sof_debugfs_io_item(sdev, + sdev->bar[bar] + + window_offset + + elem->offset, + elem->size, "exception", + SOF_DEBUGFS_ACCESS_D0_ONLY); break; default: dev_err(sdev->dev, "error: get illegal window info\n"); @@ -453,12 +470,8 @@ static void sof_get_windows(struct snd_sof_dev *sdev) return; } - sdev->dsp_box.offset = inbox_offset; - sdev->dsp_box.size = inbox_size; - - sdev->host_box.offset = outbox_offset; - sdev->host_box.size = outbox_size; - + snd_sof_dsp_mailbox_init(sdev, inbox_offset, inbox_size, + outbox_offset, outbox_size); sdev->stream_box.offset = stream_offset; sdev->stream_box.size = stream_size; @@ -480,6 +493,7 @@ int sof_fw_ready(struct snd_sof_dev *sdev, u32 msg_id) { struct sof_ipc_fw_ready *fw_ready = &sdev->fw_ready; int offset; + int bar; int ret; /* mailbox must be on 4k boundary */ @@ -489,6 +503,12 @@ int sof_fw_ready(struct snd_sof_dev *sdev, u32 msg_id) return offset; } + bar = snd_sof_dsp_get_bar_index(sdev, SOF_FW_BLK_TYPE_SRAM); + if (bar < 0) { + dev_err(sdev->dev, "error: have no bar mapping\n"); + return -EINVAL; + } + dev_dbg(sdev->dev, "ipc: DSP is ready 0x%8.8x offset 0x%x\n", msg_id, offset); @@ -496,17 +516,8 @@ int sof_fw_ready(struct snd_sof_dev *sdev, u32 msg_id) if (!sdev->first_boot) return 0; - /* - * copy data from the DSP FW ready offset - * Subsequent error handling is not needed for BLK_TYPE_SRAM - */ - ret = snd_sof_dsp_block_read(sdev, SOF_FW_BLK_TYPE_SRAM, offset, fw_ready, - sizeof(*fw_ready)); - if (ret) { - dev_err(sdev->dev, - "error: unable to read fw_ready, read from TYPE_SRAM failed\n"); - return ret; - } + /* copy data from the DSP FW ready offset */ + snd_sof_dsp_block_read(sdev, bar, offset, fw_ready, sizeof(*fw_ready)); /* make sure ABI version is compatible */ ret = snd_sof_ipc_valid(sdev); @@ -514,11 +525,12 @@ int sof_fw_ready(struct snd_sof_dev *sdev, u32 msg_id) return ret; /* now check for extended data */ - snd_sof_fw_parse_ext_data(sdev, offset + sizeof(struct sof_ipc_fw_ready)); + snd_sof_fw_parse_ext_data(sdev, bar, offset + + sizeof(struct sof_ipc_fw_ready)); sof_get_windows(sdev); - return sof_ipc_init_msg_memory(sdev); + return 0; } EXPORT_SYMBOL(sof_fw_ready); @@ -527,7 +539,7 @@ int snd_sof_parse_module_memcpy(struct snd_sof_dev *sdev, struct snd_sof_mod_hdr *module) { struct snd_sof_blk_hdr *block; - int count, ret; + int count, bar; u32 offset; size_t remaining; @@ -564,6 +576,13 @@ int snd_sof_parse_module_memcpy(struct snd_sof_dev *sdev, case SOF_FW_BLK_TYPE_DRAM: case SOF_FW_BLK_TYPE_SRAM: offset = block->offset; + bar = snd_sof_dsp_get_bar_index(sdev, block->type); + if (bar < 0) { + dev_err(sdev->dev, + "error: no BAR mapping for block type 0x%x\n", + block->type); + return bar; + } break; default: dev_err(sdev->dev, "error: bad type 0x%x for block 0x%x\n", @@ -581,13 +600,8 @@ int snd_sof_parse_module_memcpy(struct snd_sof_dev *sdev, block->size); return -EINVAL; } - ret = snd_sof_dsp_block_write(sdev, block->type, offset, - block + 1, block->size); - if (ret < 0) { - dev_err(sdev->dev, "error: write to block type 0x%x failed\n", - block->type); - return ret; - } + snd_sof_dsp_block_write(sdev, bar, offset, + block + 1, block->size); if (remaining < block->size) { dev_err(sdev->dev, "error: not enough data remaining\n"); @@ -786,16 +800,22 @@ int snd_sof_load_firmware_memcpy(struct snd_sof_dev *sdev) } EXPORT_SYMBOL(snd_sof_load_firmware_memcpy); +int snd_sof_load_firmware(struct snd_sof_dev *sdev) +{ + dev_dbg(sdev->dev, "loading firmware\n"); + + if (sof_ops(sdev)->load_firmware) + return sof_ops(sdev)->load_firmware(sdev); + return 0; +} +EXPORT_SYMBOL(snd_sof_load_firmware); + int snd_sof_run_firmware(struct snd_sof_dev *sdev) { int ret; init_waitqueue_head(&sdev->boot_wait); - /* (re-)enable dsp dump */ - sdev->dbg_dump_printed = false; - sdev->ipc_dump_printed = false; - /* create read-only fw_version debugfs to store boot version info */ if (sdev->first_boot) { ret = snd_sof_debugfs_buf_item(sdev, &sdev->fw_version, @@ -820,8 +840,7 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev) /* boot the firmware on the DSP */ ret = snd_sof_dsp_run(sdev); if (ret < 0) { - snd_sof_dsp_dbg_dump(sdev, "Failed to start DSP", - SOF_DBG_DUMP_MBOX | SOF_DBG_DUMP_PCI); + dev_err(sdev->dev, "error: failed to reset DSP\n"); return ret; } @@ -835,13 +854,16 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev) sdev->fw_state > SOF_FW_BOOT_IN_PROGRESS, msecs_to_jiffies(sdev->boot_timeout)); if (ret == 0) { - snd_sof_dsp_dbg_dump(sdev, "Firmware boot failure due to timeout", - SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX | - SOF_DBG_DUMP_TEXT | SOF_DBG_DUMP_PCI); + dev_err(sdev->dev, "error: firmware boot failure\n"); + snd_sof_dsp_dbg_dump(sdev, SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX | + SOF_DBG_DUMP_TEXT | SOF_DBG_DUMP_PCI | SOF_DBG_DUMP_FORCE_ERR_LEVEL); + sdev->fw_state = SOF_FW_BOOT_FAILED; return -EIO; } - if (sdev->fw_state == SOF_FW_BOOT_READY_FAILED) + if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) + dev_dbg(sdev->dev, "firmware boot complete\n"); + else return -EIO; /* FW boots but fw_ready op failed */ /* perform post fw run operations */ @@ -851,9 +873,6 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev) return ret; } - dev_dbg(sdev->dev, "firmware boot complete\n"); - sof_set_fw_state(sdev, SOF_FW_BOOT_COMPLETE); - return 0; } EXPORT_SYMBOL(snd_sof_run_firmware); diff --git a/sound/soc/sof/ops.c b/sound/soc/sof/ops.c index 235e2ef721..11ecebd079 100644 --- a/sound/soc/sof/ops.c +++ b/sound/soc/sof/ops.c @@ -142,46 +142,22 @@ void snd_sof_dsp_update_bits_forced(struct snd_sof_dev *sdev, u32 bar, } EXPORT_SYMBOL(snd_sof_dsp_update_bits_forced); -/** - * snd_sof_dsp_panic - handle a received DSP panic message - * @sdev: Pointer to the device's sdev - * @offset: offset of panic information - * @non_recoverable: the panic is fatal, no recovery will be done by the caller - */ -void snd_sof_dsp_panic(struct snd_sof_dev *sdev, u32 offset, bool non_recoverable) +void snd_sof_dsp_panic(struct snd_sof_dev *sdev, u32 offset) { + dev_err(sdev->dev, "error : DSP panic!\n"); + /* - * if DSP is not ready and the dsp_oops_offset is not yet set, use the - * offset from the panic message. + * check if DSP is not ready and did not set the dsp_oops_offset. + * if the dsp_oops_offset is not set, set it from the panic message. + * Also add a check to memory window setting with panic message. */ if (!sdev->dsp_oops_offset) sdev->dsp_oops_offset = offset; + else + dev_dbg(sdev->dev, "panic: dsp_oops_offset %zu offset %d\n", + sdev->dsp_oops_offset, offset); - /* - * Print warning if the offset from the panic message differs from - * dsp_oops_offset - */ - if (sdev->dsp_oops_offset != offset) - dev_warn(sdev->dev, - "%s: dsp_oops_offset %zu differs from panic offset %u\n", - __func__, sdev->dsp_oops_offset, offset); - - /* - * Set the fw_state to crashed only in case of non recoverable DSP panic - * event. - * Use different message within the snd_sof_dsp_dbg_dump() depending on - * the non_recoverable flag. - */ - sdev->dbg_dump_printed = false; - if (non_recoverable) { - snd_sof_dsp_dbg_dump(sdev, "DSP panic!", - SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX); - sof_set_fw_state(sdev, SOF_FW_CRASHED); - snd_sof_trace_notify_for_error(sdev); - } else { - snd_sof_dsp_dbg_dump(sdev, - "DSP panic (recovery will be attempted)", - SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX); - } + snd_sof_dsp_dbg_dump(sdev, SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX); + snd_sof_trace_notify_for_error(sdev); } EXPORT_SYMBOL(snd_sof_dsp_panic); diff --git a/sound/soc/sof/ops.h b/sound/soc/sof/ops.h index ffe7456e77..4a5d6e497f 100644 --- a/sound/soc/sof/ops.h +++ b/sound/soc/sof/ops.h @@ -72,68 +72,35 @@ static inline int snd_sof_dsp_reset(struct snd_sof_dev *sdev) return 0; } -/* dsp core get/put */ -static inline int snd_sof_dsp_core_get(struct snd_sof_dev *sdev, int core) +/* dsp core power up/power down */ +static inline int snd_sof_dsp_core_power_up(struct snd_sof_dev *sdev, + unsigned int core_mask) { - if (core > sdev->num_cores - 1) { - dev_err(sdev->dev, "invalid core id: %d for num_cores: %d\n", core, - sdev->num_cores); - return -EINVAL; + int ret = 0; + + core_mask &= ~sdev->enabled_cores_mask; + if (sof_ops(sdev)->core_power_up && core_mask) { + ret = sof_ops(sdev)->core_power_up(sdev, core_mask); + if (!ret) + sdev->enabled_cores_mask |= core_mask; } - if (sof_ops(sdev)->core_get) { - int ret; - - /* if current ref_count is > 0, increment it and return */ - if (sdev->dsp_core_ref_count[core] > 0) { - sdev->dsp_core_ref_count[core]++; - return 0; - } - - /* power up the core */ - ret = sof_ops(sdev)->core_get(sdev, core); - if (ret < 0) - return ret; - - /* increment ref_count */ - sdev->dsp_core_ref_count[core]++; - - /* and update enabled_cores_mask */ - sdev->enabled_cores_mask |= BIT(core); - - dev_dbg(sdev->dev, "Core %d powered up\n", core); - } - - return 0; + return ret; } -static inline int snd_sof_dsp_core_put(struct snd_sof_dev *sdev, int core) +static inline int snd_sof_dsp_core_power_down(struct snd_sof_dev *sdev, + unsigned int core_mask) { - if (core > sdev->num_cores - 1) { - dev_err(sdev->dev, "invalid core id: %d for num_cores: %d\n", core, - sdev->num_cores); - return -EINVAL; + int ret = 0; + + core_mask &= sdev->enabled_cores_mask; + if (sof_ops(sdev)->core_power_down && core_mask) { + ret = sof_ops(sdev)->core_power_down(sdev, core_mask); + if (!ret) + sdev->enabled_cores_mask &= ~core_mask; } - if (sof_ops(sdev)->core_put) { - int ret; - - /* decrement ref_count and return if it is > 0 */ - if (--(sdev->dsp_core_ref_count[core]) > 0) - return 0; - - /* power down the core */ - ret = sof_ops(sdev)->core_put(sdev, core); - if (ret < 0) - return ret; - - /* and update enabled_cores_mask */ - sdev->enabled_cores_mask &= ~BIT(core); - - dev_dbg(sdev->dev, "Core %d powered down\n", core); - } - - return 0; + return ret; } /* pre/post fw load */ @@ -274,17 +241,16 @@ snd_sof_dsp_set_power_state(struct snd_sof_dev *sdev, } /* debug */ -void snd_sof_dsp_dbg_dump(struct snd_sof_dev *sdev, const char *msg, u32 flags); - -static inline int snd_sof_debugfs_add_region_item(struct snd_sof_dev *sdev, - enum snd_sof_fw_blk_type blk_type, u32 offset, size_t size, - const char *name, enum sof_debugfs_access_type access_type) +static inline void snd_sof_dsp_dbg_dump(struct snd_sof_dev *sdev, u32 flags) { - if (sof_ops(sdev) && sof_ops(sdev)->debugfs_add_region_item) - return sof_ops(sdev)->debugfs_add_region_item(sdev, blk_type, offset, - size, name, access_type); + if (sof_ops(sdev)->dbg_dump) + sof_ops(sdev)->dbg_dump(sdev, flags); +} - return 0; +static inline void snd_sof_ipc_dump(struct snd_sof_dev *sdev) +{ + if (sof_ops(sdev)->ipc_dump) + sof_ops(sdev)->ipc_dump(sdev); } /* register IO */ @@ -331,33 +297,16 @@ static inline u64 snd_sof_dsp_read64(struct snd_sof_dev *sdev, u32 bar, } /* block IO */ -static inline int snd_sof_dsp_block_read(struct snd_sof_dev *sdev, - enum snd_sof_fw_blk_type blk_type, - u32 offset, void *dest, size_t bytes) +static inline void snd_sof_dsp_block_read(struct snd_sof_dev *sdev, u32 bar, + u32 offset, void *dest, size_t bytes) { - return sof_ops(sdev)->block_read(sdev, blk_type, offset, dest, bytes); + sof_ops(sdev)->block_read(sdev, bar, offset, dest, bytes); } -static inline int snd_sof_dsp_block_write(struct snd_sof_dev *sdev, - enum snd_sof_fw_blk_type blk_type, - u32 offset, void *src, size_t bytes) +static inline void snd_sof_dsp_block_write(struct snd_sof_dev *sdev, u32 bar, + u32 offset, void *src, size_t bytes) { - return sof_ops(sdev)->block_write(sdev, blk_type, offset, src, bytes); -} - -/* mailbox IO */ -static inline void snd_sof_dsp_mailbox_read(struct snd_sof_dev *sdev, - u32 offset, void *dest, size_t bytes) -{ - if (sof_ops(sdev)->mailbox_read) - sof_ops(sdev)->mailbox_read(sdev, offset, dest, bytes); -} - -static inline void snd_sof_dsp_mailbox_write(struct snd_sof_dev *sdev, - u32 offset, void *src, size_t bytes) -{ - if (sof_ops(sdev)->mailbox_write) - sof_ops(sdev)->mailbox_write(sdev, offset, src, bytes); + sof_ops(sdev)->block_write(sdev, bar, offset, src, bytes); } /* ipc */ @@ -451,20 +400,12 @@ snd_sof_pcm_platform_trigger(struct snd_sof_dev *sdev, return 0; } -/* Firmware loading */ -static inline int snd_sof_load_firmware(struct snd_sof_dev *sdev) -{ - dev_dbg(sdev->dev, "loading firmware\n"); - - return sof_ops(sdev)->load_firmware(sdev); -} - /* host DSP message data */ -static inline int snd_sof_ipc_msg_data(struct snd_sof_dev *sdev, - struct snd_pcm_substream *substream, - void *p, size_t sz) +static inline void snd_sof_ipc_msg_data(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream, + void *p, size_t sz) { - return sof_ops(sdev)->ipc_msg_data(sdev, substream, p, sz); + sof_ops(sdev)->ipc_msg_data(sdev, substream, p, sz); } /* host configure DSP HW parameters */ @@ -487,16 +428,6 @@ snd_sof_pcm_platform_pointer(struct snd_sof_dev *sdev, return 0; } -/* pcm ack */ -static inline int snd_sof_pcm_platform_ack(struct snd_sof_dev *sdev, - struct snd_pcm_substream *substream) -{ - if (sof_ops(sdev) && sof_ops(sdev)->pcm_ack) - return sof_ops(sdev)->pcm_ack(sdev, substream); - - return 0; -} - #if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_PROBES) static inline int snd_sof_probe_compr_assign(struct snd_sof_dev *sdev, @@ -557,23 +488,36 @@ snd_sof_machine_unregister(struct snd_sof_dev *sdev, void *pdata) sof_ops(sdev)->machine_unregister(sdev, pdata); } -static inline struct snd_soc_acpi_mach * +static inline void snd_sof_machine_select(struct snd_sof_dev *sdev) { if (sof_ops(sdev) && sof_ops(sdev)->machine_select) - return sof_ops(sdev)->machine_select(sdev); - - return NULL; + sof_ops(sdev)->machine_select(sdev); } static inline void -snd_sof_set_mach_params(struct snd_soc_acpi_mach *mach, +snd_sof_set_mach_params(const struct snd_soc_acpi_mach *mach, struct snd_sof_dev *sdev) { if (sof_ops(sdev) && sof_ops(sdev)->set_mach_params) sof_ops(sdev)->set_mach_params(mach, sdev); } +static inline const struct snd_sof_dsp_ops +*sof_get_ops(const struct sof_dev_desc *d, + const struct sof_ops_table mach_ops[], int asize) +{ + int i; + + for (i = 0; i < asize; i++) { + if (d == mach_ops[i].desc) + return mach_ops[i].ops; + } + + /* not found */ + return NULL; +} + /** * snd_sof_dsp_register_poll_timeout - Periodically poll an address * until a condition is met or a timeout occurs @@ -643,5 +587,5 @@ int snd_sof_dsp_register_poll(struct snd_sof_dev *sdev, u32 bar, u32 offset, u32 mask, u32 target, u32 timeout_ms, u32 interval_us); -void snd_sof_dsp_panic(struct snd_sof_dev *sdev, u32 offset, bool non_recoverable); +void snd_sof_dsp_panic(struct snd_sof_dev *sdev, u32 offset); #endif diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c index 37fb8e6cd4..9893b182da 100644 --- a/sound/soc/sof/pcm.c +++ b/sound/soc/sof/pcm.c @@ -17,7 +17,7 @@ #include "sof-audio.h" #include "ops.h" #if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_PROBES) -#include "sof-probes.h" +#include "compress.h" #endif /* Create DMA buffer page table for DSP */ @@ -57,7 +57,7 @@ static int sof_pcm_dsp_params(struct snd_sof_pcm *spcm, struct snd_pcm_substream /* * sof pcm period elapse work */ -static void snd_sof_pcm_period_elapsed_work(struct work_struct *work) +void snd_sof_pcm_period_elapsed_work(struct work_struct *work) { struct snd_sof_pcm_stream *sps = container_of(work, struct snd_sof_pcm_stream, @@ -66,11 +66,6 @@ static void snd_sof_pcm_period_elapsed_work(struct work_struct *work) snd_pcm_period_elapsed(sps->substream); } -void snd_sof_pcm_init_elapsed_work(struct work_struct *work) -{ - INIT_WORK(work, snd_sof_pcm_period_elapsed_work); -} - /* * sof pcm period elapse, this could be called at irq thread context. */ @@ -100,16 +95,14 @@ void snd_sof_pcm_period_elapsed(struct snd_pcm_substream *substream) } EXPORT_SYMBOL(snd_sof_pcm_period_elapsed); -int sof_pcm_dsp_pcm_free(struct snd_pcm_substream *substream, struct snd_sof_dev *sdev, - struct snd_sof_pcm *spcm) +static int sof_pcm_dsp_pcm_free(struct snd_pcm_substream *substream, + struct snd_sof_dev *sdev, + struct snd_sof_pcm *spcm) { struct sof_ipc_stream stream; struct sof_ipc_reply reply; int ret; - if (!spcm->prepared[substream->stream]) - return 0; - stream.hdr.size = sizeof(stream); stream.hdr.cmd = SOF_IPC_GLB_STREAM_MSG | SOF_IPC_STREAM_PCM_FREE; stream.comp_id = spcm->stream[substream->stream].comp_id; @@ -123,40 +116,6 @@ int sof_pcm_dsp_pcm_free(struct snd_pcm_substream *substream, struct snd_sof_dev return ret; } -static int sof_pcm_setup_connected_widgets(struct snd_sof_dev *sdev, - struct snd_soc_pcm_runtime *rtd, - struct snd_sof_pcm *spcm, int dir) -{ - struct snd_soc_dai *dai; - int ret, j; - - /* query DAPM for list of connected widgets and set them up */ - for_each_rtd_cpu_dais(rtd, j, dai) { - struct snd_soc_dapm_widget_list *list; - - ret = snd_soc_dapm_dai_get_connected_widgets(dai, dir, &list, - dpcm_end_walk_at_be); - if (ret < 0) { - dev_err(sdev->dev, "error: dai %s has no valid %s path\n", dai->name, - dir == SNDRV_PCM_STREAM_PLAYBACK ? "playback" : "capture"); - return ret; - } - - spcm->stream[dir].list = list; - - ret = sof_widget_list_setup(sdev, spcm, dir); - if (ret < 0) { - dev_err(sdev->dev, "error: failed widget list set up for pcm %d dir %d\n", - spcm->pcm.pcm_id, dir); - spcm->stream[dir].list = NULL; - snd_soc_dapm_dai_free_widgets(&list); - return ret; - } - } - - return 0; -} - static int sof_pcm_hw_params(struct snd_soc_component *component, struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) @@ -181,9 +140,11 @@ static int sof_pcm_hw_params(struct snd_soc_component *component, * Handle repeated calls to hw_params() without free_pcm() in * between. At least ALSA OSS emulation depends on this. */ - ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm); - if (ret < 0) - return ret; + if (spcm->prepared[substream->stream]) { + ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm); + if (ret < 0) + return ret; + } dev_dbg(component->dev, "pcm: hw params stream %d dir %d\n", spcm->pcm.pcm_id, substream->stream); @@ -252,14 +213,7 @@ static int sof_pcm_hw_params(struct snd_soc_component *component, dev_dbg(component->dev, "stream_tag %d", pcm.params.stream_tag); - /* if this is a repeated hw_params without hw_free, skip setting up widgets */ - if (!spcm->stream[substream->stream].list) { - ret = sof_pcm_setup_connected_widgets(sdev, rtd, spcm, substream->stream); - if (ret < 0) - return ret; - } - - /* send hw_params IPC to the DSP */ + /* send IPC to the DSP */ ret = sof_ipc_tx_message(sdev->ipc, pcm.hdr.cmd, &pcm, sizeof(pcm), &ipc_params_reply, sizeof(ipc_params_reply)); if (ret < 0) { @@ -299,26 +253,20 @@ static int sof_pcm_hw_free(struct snd_soc_component *component, dev_dbg(component->dev, "pcm: free stream %d dir %d\n", spcm->pcm.pcm_id, substream->stream); - /* free PCM in the DSP */ - ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm); - if (ret < 0) - err = ret; + if (spcm->prepared[substream->stream]) { + ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm); + if (ret < 0) + err = ret; + } + cancel_work_sync(&spcm->stream[substream->stream].period_elapsed_work); - /* stop DMA */ ret = snd_sof_pcm_platform_hw_free(sdev, substream); if (ret < 0) { dev_err(component->dev, "error: platform hw free failed\n"); err = ret; } - /* free the DAPM widget list */ - ret = sof_widget_list_free(sdev, spcm, substream->stream); - if (ret < 0) - err = ret; - - cancel_work_sync(&spcm->stream[substream->stream].period_elapsed_work); - return err; } @@ -368,7 +316,6 @@ static int sof_pcm_trigger(struct snd_soc_component *component, struct sof_ipc_stream stream; struct sof_ipc_reply reply; bool reset_hw_params = false; - bool free_widget_list = false; bool ipc_first = false; int ret; @@ -395,6 +342,26 @@ static int sof_pcm_trigger(struct snd_soc_component *component, case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: stream.hdr.cmd |= SOF_IPC_STREAM_TRIG_RELEASE; break; + case SNDRV_PCM_TRIGGER_RESUME: + if (spcm->stream[substream->stream].suspend_ignored) { + /* + * this case will be triggered when INFO_RESUME is + * supported, no need to resume streams that remained + * enabled in D0ix. + */ + spcm->stream[substream->stream].suspend_ignored = false; + return 0; + } + + /* set up hw_params */ + ret = sof_pcm_prepare(component, substream); + if (ret < 0) { + dev_err(component->dev, + "error: failed to set up hw_params upon resume\n"); + return ret; + } + + fallthrough; case SNDRV_PCM_TRIGGER_START: if (spcm->stream[substream->stream].suspend_ignored) { /* @@ -419,7 +386,6 @@ static int sof_pcm_trigger(struct snd_soc_component *component, spcm->stream[substream->stream].suspend_ignored = true; return 0; } - free_widget_list = true; fallthrough; case SNDRV_PCM_TRIGGER_STOP: stream.hdr.cmd |= SOF_IPC_STREAM_TRIG_STOP; @@ -448,12 +414,8 @@ static int sof_pcm_trigger(struct snd_soc_component *component, snd_sof_pcm_platform_trigger(sdev, substream, cmd); /* free PCM if reset_hw_params is set and the STOP IPC is successful */ - if (!ret && reset_hw_params) { - ret = sof_pcm_stream_free(sdev, substream, spcm, substream->stream, - free_widget_list); - if (ret < 0) - return ret; - } + if (!ret && reset_hw_params) + ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm); return ret; } @@ -787,18 +749,6 @@ int sof_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_pa channels->min = dai->dai_config->esai.tdm_slots; channels->max = dai->dai_config->esai.tdm_slots; - dev_dbg(component->dev, - "rate_min: %d rate_max: %d\n", rate->min, rate->max); - dev_dbg(component->dev, - "channels_min: %d channels_max: %d\n", - channels->min, channels->max); - break; - case SOF_DAI_MEDIATEK_AFE: - rate->min = dai->dai_config->afe.rate; - rate->max = dai->dai_config->afe.rate; - channels->min = dai->dai_config->afe.channels; - channels->max = dai->dai_config->afe.channels; - dev_dbg(component->dev, "rate_min: %d rate_max: %d\n", rate->min, rate->max); dev_dbg(component->dev, @@ -817,42 +767,6 @@ int sof_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_pa "channels_min: %d channels_max: %d\n", channels->min, channels->max); break; - case SOF_DAI_AMD_BT: - rate->min = dai->dai_config->acpbt.fsync_rate; - rate->max = dai->dai_config->acpbt.fsync_rate; - channels->min = dai->dai_config->acpbt.tdm_slots; - channels->max = dai->dai_config->acpbt.tdm_slots; - - dev_dbg(component->dev, - "AMD_BT rate_min: %d rate_max: %d\n", rate->min, rate->max); - dev_dbg(component->dev, - "AMD_BT channels_min: %d channels_max: %d\n", - channels->min, channels->max); - break; - case SOF_DAI_AMD_SP: - rate->min = dai->dai_config->acpsp.fsync_rate; - rate->max = dai->dai_config->acpsp.fsync_rate; - channels->min = dai->dai_config->acpsp.tdm_slots; - channels->max = dai->dai_config->acpsp.tdm_slots; - - dev_dbg(component->dev, - "AMD_SP rate_min: %d rate_max: %d\n", rate->min, rate->max); - dev_dbg(component->dev, - "AMD_SP channels_min: %d channels_max: %d\n", - channels->min, channels->max); - break; - case SOF_DAI_AMD_DMIC: - rate->min = dai->dai_config->acpdmic.fsync_rate; - rate->max = dai->dai_config->acpdmic.fsync_rate; - channels->min = dai->dai_config->acpdmic.tdm_slots; - channels->max = dai->dai_config->acpdmic.tdm_slots; - - dev_dbg(component->dev, - "AMD_DMIC rate_min: %d rate_max: %d\n", rate->min, rate->max); - dev_dbg(component->dev, - "AMD_DMIC channels_min: %d channels_max: %d\n", - channels->min, channels->max); - break; default: dev_err(component->dev, "error: invalid DAI type %d\n", dai->dai_config->type); @@ -896,14 +810,6 @@ static void sof_pcm_remove(struct snd_soc_component *component) snd_soc_tplg_component_remove(component); } -static int sof_pcm_ack(struct snd_soc_component *component, - struct snd_pcm_substream *substream) -{ - struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component); - - return snd_sof_pcm_platform_ack(sdev, substream); -} - void snd_sof_new_platform_drv(struct snd_sof_dev *sdev) { struct snd_soc_component_driver *pd = &sdev->plat_drv; @@ -922,9 +828,12 @@ void snd_sof_new_platform_drv(struct snd_sof_dev *sdev) pd->hw_free = sof_pcm_hw_free; pd->trigger = sof_pcm_trigger; pd->pointer = sof_pcm_pointer; - pd->ack = sof_pcm_ack; +#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMPRESS) + pd->compress_ops = &sof_compressed_ops; +#endif #if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_PROBES) + /* override cops when probe support is enabled */ pd->compress_ops = &sof_probe_compressed_ops; #endif pd->pcm_construct = sof_pcm_new; diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c index 197a88695f..c83fb62559 100644 --- a/sound/soc/sof/pm.c +++ b/sound/soc/sof/pm.c @@ -122,7 +122,7 @@ static int sof_resume(struct device *dev, bool runtime_resume) old_state == SOF_DSP_PM_D0) return 0; - sof_set_fw_state(sdev, SOF_FW_BOOT_PREPARE); + sdev->fw_state = SOF_FW_BOOT_PREPARE; /* load the firmware */ ret = snd_sof_load_firmware(sdev); @@ -130,11 +130,10 @@ static int sof_resume(struct device *dev, bool runtime_resume) dev_err(sdev->dev, "error: failed to load DSP firmware after resume %d\n", ret); - sof_set_fw_state(sdev, SOF_FW_BOOT_FAILED); return ret; } - sof_set_fw_state(sdev, SOF_FW_BOOT_IN_PROGRESS); + sdev->fw_state = SOF_FW_BOOT_IN_PROGRESS; /* * Boot the firmware. The FW boot status will be modified @@ -145,7 +144,6 @@ static int sof_resume(struct device *dev, bool runtime_resume) dev_err(sdev->dev, "error: failed to boot DSP firmware after resume %d\n", ret); - sof_set_fw_state(sdev, SOF_FW_BOOT_FAILED); return ret; } @@ -159,7 +157,7 @@ static int sof_resume(struct device *dev, bool runtime_resume) } /* restore pipelines */ - ret = sof_set_up_pipelines(sdev, false); + ret = sof_restore_pipelines(sdev->dev); if (ret < 0) { dev_err(sdev->dev, "error: failed to restore pipeline after resume %d\n", @@ -193,7 +191,7 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) if (sdev->fw_state != SOF_FW_BOOT_COMPLETE) goto suspend; - /* prepare for streams to be resumed properly upon resume */ + /* set restore_stream for all streams during system suspend */ if (!runtime_suspend) { ret = sof_set_hw_params_upon_resume(sdev->dev); if (ret < 0) { @@ -210,8 +208,6 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) if (target_state == SOF_DSP_PM_D0) goto suspend; - sof_tear_down_pipelines(sdev, false); - /* release trace */ snd_sof_release_trace(sdev); @@ -259,7 +255,7 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) return ret; /* reset FW state */ - sof_set_fw_state(sdev, SOF_FW_BOOT_NOT_STARTED); + sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; sdev->enabled_cores_mask = 0; return ret; @@ -314,14 +310,6 @@ int snd_sof_prepare(struct device *dev) /* will suspend to S3 by default */ sdev->system_suspend_target = SOF_SUSPEND_S3; - /* - * if the firmware is crashed or boot failed then we try to aim for S3 - * to reboot the firmware - */ - if (sdev->fw_state == SOF_FW_CRASHED || - sdev->fw_state == SOF_FW_BOOT_FAILED) - return 0; - if (!desc->use_acpi_target_states) return 0; diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c index 9e76b79650..989912f2b7 100644 --- a/sound/soc/sof/sof-audio.c +++ b/sound/soc/sof/sof-audio.c @@ -8,540 +8,9 @@ // Author: Ranjani Sridharan // -#include #include "sof-audio.h" #include "ops.h" -static int sof_kcontrol_setup(struct snd_sof_dev *sdev, struct snd_sof_control *scontrol) -{ - int ret; - - /* reset readback offset for scontrol */ - scontrol->readback_offset = 0; - - ret = snd_sof_ipc_set_get_comp_data(scontrol, true); - if (ret < 0) - dev_err(sdev->dev, "error: failed kcontrol value set for widget: %d\n", - scontrol->comp_id); - - return ret; -} - -static int sof_dai_config_setup(struct snd_sof_dev *sdev, struct snd_sof_dai *dai) -{ - struct sof_ipc_dai_config *config; - struct sof_ipc_reply reply; - int ret; - - config = &dai->dai_config[dai->current_config]; - if (!config) { - dev_err(sdev->dev, "error: no config for DAI %s\n", dai->name); - return -EINVAL; - } - - /* set NONE flag to clear all previous settings */ - config->flags = SOF_DAI_CONFIG_FLAGS_NONE; - - ret = sof_ipc_tx_message(sdev->ipc, config->hdr.cmd, config, config->hdr.size, - &reply, sizeof(reply)); - - if (ret < 0) - dev_err(sdev->dev, "error: failed to set dai config for %s\n", dai->name); - - return ret; -} - -static int sof_widget_kcontrol_setup(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget) -{ - struct snd_sof_control *scontrol; - int ret; - - /* set up all controls for the widget */ - list_for_each_entry(scontrol, &sdev->kcontrol_list, list) - if (scontrol->comp_id == swidget->comp_id) { - /* set kcontrol data in DSP */ - ret = sof_kcontrol_setup(sdev, scontrol); - if (ret < 0) { - dev_err(sdev->dev, "error: fail to set up kcontrols for widget %s\n", - swidget->widget->name); - return ret; - } - - /* - * Read back the data from the DSP for static widgets. This is particularly - * useful for binary kcontrols associated with static pipeline widgets to - * initialize the data size to match that in the DSP. - */ - if (swidget->dynamic_pipeline_widget) - continue; - - ret = snd_sof_ipc_set_get_comp_data(scontrol, false); - if (ret < 0) - dev_warn(sdev->dev, "Failed kcontrol get for control in widget %s\n", - swidget->widget->name); - } - - return 0; -} - -static void sof_reset_route_setup_status(struct snd_sof_dev *sdev, struct snd_sof_widget *widget) -{ - struct snd_sof_route *sroute; - - list_for_each_entry(sroute, &sdev->route_list, list) - if (sroute->src_widget == widget || sroute->sink_widget == widget) - sroute->setup = false; -} - -int sof_widget_free(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget) -{ - struct sof_ipc_free ipc_free = { - .hdr = { - .size = sizeof(ipc_free), - .cmd = SOF_IPC_GLB_TPLG_MSG, - }, - .id = swidget->comp_id, - }; - struct sof_ipc_reply reply; - int ret, ret1, core; - - if (!swidget->private) - return 0; - - /* only free when use_count is 0 */ - if (--swidget->use_count) - return 0; - - core = swidget->core; - - switch (swidget->id) { - case snd_soc_dapm_scheduler: - { - const struct sof_ipc_pipe_new *pipeline = swidget->private; - - core = pipeline->core; - ipc_free.hdr.cmd |= SOF_IPC_TPLG_PIPE_FREE; - break; - } - case snd_soc_dapm_buffer: - ipc_free.hdr.cmd |= SOF_IPC_TPLG_BUFFER_FREE; - break; - case snd_soc_dapm_dai_in: - case snd_soc_dapm_dai_out: - { - struct snd_sof_dai *dai = swidget->private; - - dai->configured = false; - fallthrough; - } - default: - ipc_free.hdr.cmd |= SOF_IPC_TPLG_COMP_FREE; - break; - } - - /* continue to disable core even if IPC fails */ - ret = sof_ipc_tx_message(sdev->ipc, ipc_free.hdr.cmd, &ipc_free, sizeof(ipc_free), - &reply, sizeof(reply)); - if (ret < 0) - dev_err(sdev->dev, "error: failed to free widget %s\n", swidget->widget->name); - - /* - * disable widget core. continue to route setup status and complete flag - * even if this fails and return the appropriate error - */ - ret1 = snd_sof_dsp_core_put(sdev, core); - if (ret1 < 0) { - dev_err(sdev->dev, "error: failed to disable target core: %d for widget %s\n", - core, swidget->widget->name); - if (!ret) - ret = ret1; - } - - /* reset route setup status for all routes that contain this widget */ - sof_reset_route_setup_status(sdev, swidget); - swidget->complete = 0; - - if (!ret) - dev_dbg(sdev->dev, "widget %s freed\n", swidget->widget->name); - - return ret; -} -EXPORT_SYMBOL(sof_widget_free); - -int sof_widget_setup(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget) -{ - struct sof_ipc_pipe_new *pipeline; - struct sof_ipc_comp_reply r; - struct sof_ipc_cmd_hdr *hdr; - struct sof_ipc_comp *comp; - struct snd_sof_dai *dai; - size_t ipc_size; - int ret; - int core; - - /* skip if there is no private data */ - if (!swidget->private) - return 0; - - /* widget already set up */ - if (++swidget->use_count > 1) - return 0; - - /* set core ID */ - core = swidget->core; - if (swidget->id == snd_soc_dapm_scheduler) { - pipeline = swidget->private; - core = pipeline->core; - } - - /* enable widget core */ - ret = snd_sof_dsp_core_get(sdev, core); - if (ret < 0) { - dev_err(sdev->dev, "error: failed to enable target core for widget %s\n", - swidget->widget->name); - goto use_count_dec; - } - - switch (swidget->id) { - case snd_soc_dapm_dai_in: - case snd_soc_dapm_dai_out: - ipc_size = sizeof(struct sof_ipc_comp_dai) + sizeof(struct sof_ipc_comp_ext); - comp = kzalloc(ipc_size, GFP_KERNEL); - if (!comp) { - ret = -ENOMEM; - goto core_put; - } - - dai = swidget->private; - dai->configured = false; - memcpy(comp, &dai->comp_dai, sizeof(struct sof_ipc_comp_dai)); - - /* append extended data to the end of the component */ - memcpy((u8 *)comp + sizeof(struct sof_ipc_comp_dai), &swidget->comp_ext, - sizeof(swidget->comp_ext)); - - ret = sof_ipc_tx_message(sdev->ipc, comp->hdr.cmd, comp, ipc_size, &r, sizeof(r)); - kfree(comp); - if (ret < 0) { - dev_err(sdev->dev, "error: failed to load widget %s\n", - swidget->widget->name); - goto core_put; - } - - ret = sof_dai_config_setup(sdev, dai); - if (ret < 0) { - dev_err(sdev->dev, "error: failed to load dai config for DAI %s\n", - swidget->widget->name); - - /* - * widget use_count and core ref_count will both be decremented by - * sof_widget_free() - */ - sof_widget_free(sdev, swidget); - return ret; - } - break; - case snd_soc_dapm_scheduler: - pipeline = swidget->private; - ret = sof_ipc_tx_message(sdev->ipc, pipeline->hdr.cmd, pipeline, - sizeof(*pipeline), &r, sizeof(r)); - break; - default: - hdr = swidget->private; - ret = sof_ipc_tx_message(sdev->ipc, hdr->cmd, swidget->private, hdr->size, - &r, sizeof(r)); - break; - } - if (ret < 0) { - dev_err(sdev->dev, "error: failed to load widget %s\n", swidget->widget->name); - goto core_put; - } - - /* restore kcontrols for widget */ - ret = sof_widget_kcontrol_setup(sdev, swidget); - if (ret < 0) { - dev_err(sdev->dev, "error: failed to restore kcontrols for widget %s\n", - swidget->widget->name); - /* - * widget use_count and core ref_count will both be decremented by - * sof_widget_free() - */ - sof_widget_free(sdev, swidget); - return ret; - } - - dev_dbg(sdev->dev, "widget %s setup complete\n", swidget->widget->name); - - return 0; - -core_put: - snd_sof_dsp_core_put(sdev, core); -use_count_dec: - swidget->use_count--; - return ret; -} -EXPORT_SYMBOL(sof_widget_setup); - -static int sof_route_setup_ipc(struct snd_sof_dev *sdev, struct snd_sof_route *sroute) -{ - struct sof_ipc_pipe_comp_connect *connect; - struct sof_ipc_reply reply; - int ret; - - /* skip if there's no private data */ - if (!sroute->private) - return 0; - - /* nothing to do if route is already set up */ - if (sroute->setup) - return 0; - - connect = sroute->private; - - dev_dbg(sdev->dev, "setting up route %s -> %s\n", - sroute->src_widget->widget->name, - sroute->sink_widget->widget->name); - - /* send ipc */ - ret = sof_ipc_tx_message(sdev->ipc, - connect->hdr.cmd, - connect, sizeof(*connect), - &reply, sizeof(reply)); - if (ret < 0) { - dev_err(sdev->dev, "%s: route setup failed %d\n", __func__, ret); - return ret; - } - - sroute->setup = true; - - return 0; -} - -static int sof_route_setup(struct snd_sof_dev *sdev, struct snd_soc_dapm_widget *wsource, - struct snd_soc_dapm_widget *wsink) -{ - struct snd_sof_widget *src_widget = wsource->dobj.private; - struct snd_sof_widget *sink_widget = wsink->dobj.private; - struct snd_sof_route *sroute; - bool route_found = false; - - /* ignore routes involving virtual widgets in topology */ - switch (src_widget->id) { - case snd_soc_dapm_out_drv: - case snd_soc_dapm_output: - case snd_soc_dapm_input: - return 0; - default: - break; - } - - switch (sink_widget->id) { - case snd_soc_dapm_out_drv: - case snd_soc_dapm_output: - case snd_soc_dapm_input: - return 0; - default: - break; - } - - /* find route matching source and sink widgets */ - list_for_each_entry(sroute, &sdev->route_list, list) - if (sroute->src_widget == src_widget && sroute->sink_widget == sink_widget) { - route_found = true; - break; - } - - if (!route_found) { - dev_err(sdev->dev, "error: cannot find SOF route for source %s -> %s sink\n", - wsource->name, wsink->name); - return -EINVAL; - } - - return sof_route_setup_ipc(sdev, sroute); -} - -static int sof_setup_pipeline_connections(struct snd_sof_dev *sdev, - struct snd_soc_dapm_widget_list *list, int dir) -{ - struct snd_soc_dapm_widget *widget; - struct snd_soc_dapm_path *p; - int ret; - int i; - - /* - * Set up connections between widgets in the sink/source paths based on direction. - * Some non-SOF widgets exist in topology either for compatibility or for the - * purpose of connecting a pipeline from a host to a DAI in order to receive the DAPM - * events. But they are not handled by the firmware. So ignore them. - */ - if (dir == SNDRV_PCM_STREAM_PLAYBACK) { - for_each_dapm_widgets(list, i, widget) { - if (!widget->dobj.private) - continue; - - snd_soc_dapm_widget_for_each_sink_path(widget, p) - if (p->sink->dobj.private) { - ret = sof_route_setup(sdev, widget, p->sink); - if (ret < 0) - return ret; - } - } - } else { - for_each_dapm_widgets(list, i, widget) { - if (!widget->dobj.private) - continue; - - snd_soc_dapm_widget_for_each_source_path(widget, p) - if (p->source->dobj.private) { - ret = sof_route_setup(sdev, p->source, widget); - if (ret < 0) - return ret; - } - } - } - - return 0; -} - -int sof_widget_list_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm, int dir) -{ - struct snd_soc_dapm_widget_list *list = spcm->stream[dir].list; - struct snd_soc_dapm_widget *widget; - int i, ret, num_widgets; - - /* nothing to set up */ - if (!list) - return 0; - - /* set up widgets in the list */ - for_each_dapm_widgets(list, num_widgets, widget) { - struct snd_sof_widget *swidget = widget->dobj.private; - struct snd_sof_widget *pipe_widget; - - if (!swidget) - continue; - - /* - * The scheduler widget for a pipeline is not part of the connected DAPM - * widget list and it needs to be set up before the widgets in the pipeline - * are set up. The use_count for the scheduler widget is incremented for every - * widget in a given pipeline to ensure that it is freed only after the last - * widget in the pipeline is freed. - */ - pipe_widget = swidget->pipe_widget; - if (!pipe_widget) { - dev_err(sdev->dev, "error: no pipeline widget found for %s\n", - swidget->widget->name); - ret = -EINVAL; - goto widget_free; - } - - ret = sof_widget_setup(sdev, pipe_widget); - if (ret < 0) - goto widget_free; - - /* set up the widget */ - ret = sof_widget_setup(sdev, swidget); - if (ret < 0) { - sof_widget_free(sdev, pipe_widget); - goto widget_free; - } - } - - /* - * error in setting pipeline connections will result in route status being reset for - * routes that were successfully set up when the widgets are freed. - */ - ret = sof_setup_pipeline_connections(sdev, list, dir); - if (ret < 0) - goto widget_free; - - /* complete pipelines */ - for_each_dapm_widgets(list, i, widget) { - struct snd_sof_widget *swidget = widget->dobj.private; - struct snd_sof_widget *pipe_widget; - - if (!swidget) - continue; - - pipe_widget = swidget->pipe_widget; - if (!pipe_widget) { - dev_err(sdev->dev, "error: no pipeline widget found for %s\n", - swidget->widget->name); - ret = -EINVAL; - goto widget_free; - } - - if (pipe_widget->complete) - continue; - - pipe_widget->complete = snd_sof_complete_pipeline(sdev, pipe_widget); - if (pipe_widget->complete < 0) { - ret = pipe_widget->complete; - goto widget_free; - } - } - - return 0; - -widget_free: - /* free all widgets that have been set up successfully */ - for_each_dapm_widgets(list, i, widget) { - struct snd_sof_widget *swidget = widget->dobj.private; - - if (!swidget) - continue; - - if (!num_widgets--) - break; - - sof_widget_free(sdev, swidget); - sof_widget_free(sdev, swidget->pipe_widget); - } - - return ret; -} - -int sof_widget_list_free(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm, int dir) -{ - struct snd_soc_dapm_widget_list *list = spcm->stream[dir].list; - struct snd_soc_dapm_widget *widget; - int i, ret; - int ret1 = 0; - - /* nothing to free */ - if (!list) - return 0; - - /* - * Free widgets in the list. This can fail but continue freeing other widgets to keep - * use_counts balanced. - */ - for_each_dapm_widgets(list, i, widget) { - struct snd_sof_widget *swidget = widget->dobj.private; - - if (!swidget) - continue; - - /* - * free widget and its pipe_widget. Either of these can fail, but free as many as - * possible before freeing the list and returning the error. - */ - ret = sof_widget_free(sdev, swidget); - if (ret < 0) - ret1 = ret; - - ret = sof_widget_free(sdev, swidget->pipe_widget); - if (ret < 0) - ret1 = ret; - } - - snd_soc_dapm_dai_free_widgets(&list); - spcm->stream[dir].list = NULL; - - return ret1; -} - /* * helper to determine if there are only D0i3 compatible * streams active @@ -624,6 +93,55 @@ int sof_set_hw_params_upon_resume(struct device *dev) return snd_sof_dsp_hw_params_upon_resume(sdev); } +static int sof_restore_kcontrols(struct device *dev) +{ + struct snd_sof_dev *sdev = dev_get_drvdata(dev); + struct snd_sof_control *scontrol; + int ipc_cmd, ctrl_type; + int ret = 0; + + /* restore kcontrol values */ + list_for_each_entry(scontrol, &sdev->kcontrol_list, list) { + /* reset readback offset for scontrol after resuming */ + scontrol->readback_offset = 0; + + /* notify DSP of kcontrol values */ + switch (scontrol->cmd) { + case SOF_CTRL_CMD_VOLUME: + case SOF_CTRL_CMD_ENUM: + case SOF_CTRL_CMD_SWITCH: + ipc_cmd = SOF_IPC_COMP_SET_VALUE; + ctrl_type = SOF_CTRL_TYPE_VALUE_CHAN_SET; + ret = snd_sof_ipc_set_get_comp_data(scontrol, + ipc_cmd, ctrl_type, + scontrol->cmd, + true); + break; + case SOF_CTRL_CMD_BINARY: + ipc_cmd = SOF_IPC_COMP_SET_DATA; + ctrl_type = SOF_CTRL_TYPE_DATA_SET; + ret = snd_sof_ipc_set_get_comp_data(scontrol, + ipc_cmd, ctrl_type, + scontrol->cmd, + true); + break; + + default: + break; + } + + if (ret < 0) { + dev_err(dev, + "error: failed kcontrol value set for widget: %d\n", + scontrol->comp_id); + + return ret; + } + } + + return 0; +} + const struct sof_ipc_pipe_new *snd_sof_pipeline_find(struct snd_sof_dev *sdev, int pipeline_id) { @@ -640,62 +158,142 @@ const struct sof_ipc_pipe_new *snd_sof_pipeline_find(struct snd_sof_dev *sdev, return NULL; } -int sof_set_up_pipelines(struct snd_sof_dev *sdev, bool verify) +int sof_restore_pipelines(struct device *dev) { - struct sof_ipc_fw_version *v = &sdev->fw_ready.version; + struct snd_sof_dev *sdev = dev_get_drvdata(dev); struct snd_sof_widget *swidget; struct snd_sof_route *sroute; + struct sof_ipc_pipe_new *pipeline; + struct snd_sof_dai *dai; + struct sof_ipc_cmd_hdr *hdr; + struct sof_ipc_comp *comp; + size_t ipc_size; int ret; /* restore pipeline components */ - list_for_each_entry(swidget, &sdev->widget_list, list) { - /* only set up the widgets belonging to static pipelines */ - if (!verify && swidget->dynamic_pipeline_widget) + list_for_each_entry_reverse(swidget, &sdev->widget_list, list) { + struct sof_ipc_comp_reply r; + + /* skip if there is no private data */ + if (!swidget->private) continue; - /* - * For older firmware, skip scheduler widgets in this loop, - * sof_widget_setup() will be called in the 'complete pipeline' loop - */ - if (v->abi_version < SOF_ABI_VER(3, 19, 0) && - swidget->id == snd_soc_dapm_scheduler) - continue; + ret = sof_pipeline_core_enable(sdev, swidget); + if (ret < 0) { + dev_err(dev, + "error: failed to enable target core: %d\n", + ret); - /* update DAI config. The IPC will be sent in sof_widget_setup() */ - if (WIDGET_IS_DAI(swidget->id)) { - struct snd_sof_dai *dai = swidget->private; - struct sof_ipc_dai_config *config; - - if (!dai || !dai->dai_config) - continue; - - config = dai->dai_config; - /* - * The link DMA channel would be invalidated for running - * streams but not for streams that were in the PAUSED - * state during suspend. So invalidate it here before setting - * the dai config in the DSP. - */ - if (config->type == SOF_DAI_INTEL_HDA) - config->hda.link_dma_ch = DMA_CHAN_INVALID; + return ret; } - ret = sof_widget_setup(sdev, swidget); - if (ret < 0) + switch (swidget->id) { + case snd_soc_dapm_dai_in: + case snd_soc_dapm_dai_out: + ipc_size = sizeof(struct sof_ipc_comp_dai) + + sizeof(struct sof_ipc_comp_ext); + comp = kzalloc(ipc_size, GFP_KERNEL); + if (!comp) + return -ENOMEM; + + dai = swidget->private; + memcpy(comp, &dai->comp_dai, + sizeof(struct sof_ipc_comp_dai)); + + /* append extended data to the end of the component */ + memcpy((u8 *)comp + sizeof(struct sof_ipc_comp_dai), + &swidget->comp_ext, sizeof(swidget->comp_ext)); + + ret = sof_ipc_tx_message(sdev->ipc, comp->hdr.cmd, + comp, ipc_size, + &r, sizeof(r)); + kfree(comp); + break; + case snd_soc_dapm_scheduler: + + /* + * During suspend, all DSP cores are powered off. + * Therefore upon resume, create the pipeline comp + * and power up the core that the pipeline is + * scheduled on. + */ + pipeline = swidget->private; + ret = sof_load_pipeline_ipc(dev, pipeline, &r); + break; + default: + hdr = swidget->private; + ret = sof_ipc_tx_message(sdev->ipc, hdr->cmd, + swidget->private, hdr->size, + &r, sizeof(r)); + break; + } + if (ret < 0) { + dev_err(dev, + "error: failed to load widget type %d with ID: %d\n", + swidget->widget->id, swidget->comp_id); + return ret; + } } /* restore pipeline connections */ - list_for_each_entry(sroute, &sdev->route_list, list) { + list_for_each_entry_reverse(sroute, &sdev->route_list, list) { + struct sof_ipc_pipe_comp_connect *connect; + struct sof_ipc_reply reply; - /* only set up routes belonging to static pipelines */ - if (!verify && (sroute->src_widget->dynamic_pipeline_widget || - sroute->sink_widget->dynamic_pipeline_widget)) + /* skip if there's no private data */ + if (!sroute->private) continue; - ret = sof_route_setup_ipc(sdev, sroute); + connect = sroute->private; + + /* send ipc */ + ret = sof_ipc_tx_message(sdev->ipc, + connect->hdr.cmd, + connect, sizeof(*connect), + &reply, sizeof(reply)); if (ret < 0) { - dev_err(sdev->dev, "%s: restore pipeline connections failed\n", __func__); + dev_err(dev, + "error: failed to load route sink %s control %s source %s\n", + sroute->route->sink, + sroute->route->control ? sroute->route->control + : "none", + sroute->route->source); + + return ret; + } + } + + /* restore dai links */ + list_for_each_entry_reverse(dai, &sdev->dai_list, list) { + struct sof_ipc_reply reply; + struct sof_ipc_dai_config *config = &dai->dai_config[dai->current_config]; + + if (!config) { + dev_err(dev, "error: no config for DAI %s\n", + dai->name); + continue; + } + + /* + * The link DMA channel would be invalidated for running + * streams but not for streams that were in the PAUSED + * state during suspend. So invalidate it here before setting + * the dai config in the DSP. + */ + if (config->type == SOF_DAI_INTEL_HDA) + config->hda.link_dma_ch = DMA_CHAN_INVALID; + + ret = sof_ipc_tx_message(sdev->ipc, + config->hdr.cmd, config, + config->hdr.size, + &reply, sizeof(reply)); + + if (ret < 0) { + dev_err(dev, + "error: failed to set dai config for %s\n", + dai->name); + return ret; } } @@ -704,148 +302,23 @@ int sof_set_up_pipelines(struct snd_sof_dev *sdev, bool verify) list_for_each_entry(swidget, &sdev->widget_list, list) { switch (swidget->id) { case snd_soc_dapm_scheduler: - /* only complete static pipelines */ - if (!verify && swidget->dynamic_pipeline_widget) - continue; - - if (v->abi_version < SOF_ABI_VER(3, 19, 0)) { - ret = sof_widget_setup(sdev, swidget); - if (ret < 0) - return ret; - } - swidget->complete = - snd_sof_complete_pipeline(sdev, swidget); + snd_sof_complete_pipeline(dev, swidget); break; default: break; } } - return 0; -} - -int sof_pcm_stream_free(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream, - struct snd_sof_pcm *spcm, int dir, bool free_widget_list) -{ - int ret; - - /* Send PCM_FREE IPC to reset pipeline */ - ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm); + /* restore pipeline kcontrols */ + ret = sof_restore_kcontrols(dev); if (ret < 0) - return ret; - - /* stop the DMA */ - ret = snd_sof_pcm_platform_hw_free(sdev, substream); - if (ret < 0) - return ret; - - /* free widget list */ - if (free_widget_list) { - ret = sof_widget_list_free(sdev, spcm, dir); - if (ret < 0) - dev_err(sdev->dev, "failed to free widgets during suspend\n"); - } + dev_err(dev, + "error: restoring kcontrols after resume\n"); return ret; } -/* - * Free the PCM, its associated widgets and set the prepared flag to false for all PCMs that - * did not get suspended(ex: paused streams) so the widgets can be set up again during resume. - */ -static int sof_tear_down_left_over_pipelines(struct snd_sof_dev *sdev) -{ - struct snd_sof_widget *swidget; - struct snd_sof_pcm *spcm; - int dir, ret; - - /* - * free all PCMs and their associated DAPM widgets if their connected DAPM widget - * list is not NULL. This should only be true for paused streams at this point. - * This is equivalent to the handling of FE DAI suspend trigger for running streams. - */ - list_for_each_entry(spcm, &sdev->pcm_list, list) - for_each_pcm_streams(dir) { - struct snd_pcm_substream *substream = spcm->stream[dir].substream; - - if (!substream || !substream->runtime) - continue; - - if (spcm->stream[dir].list) { - ret = sof_pcm_stream_free(sdev, substream, spcm, dir, true); - if (ret < 0) - return ret; - } - } - - /* - * free any left over DAI widgets. This is equivalent to the handling of suspend trigger - * for the BE DAI for running streams. - */ - list_for_each_entry(swidget, &sdev->widget_list, list) - if (WIDGET_IS_DAI(swidget->id) && swidget->use_count == 1) { - ret = sof_widget_free(sdev, swidget); - if (ret < 0) - return ret; - } - - return 0; -} - -/* - * For older firmware, this function doesn't free widgets for static pipelines during suspend. - * It only resets use_count for all widgets. - */ -int sof_tear_down_pipelines(struct snd_sof_dev *sdev, bool verify) -{ - struct sof_ipc_fw_version *v = &sdev->fw_ready.version; - struct snd_sof_widget *swidget; - struct snd_sof_route *sroute; - int ret; - - /* - * This function is called during suspend and for one-time topology verification during - * first boot. In both cases, there is no need to protect swidget->use_count and - * sroute->setup because during suspend all running streams are suspended and during - * topology loading the sound card unavailable to open PCMs. - */ - list_for_each_entry(swidget, &sdev->widget_list, list) { - if (swidget->dynamic_pipeline_widget) - continue; - - /* Do not free widgets for static pipelines with FW ABI older than 3.19 */ - if (!verify && !swidget->dynamic_pipeline_widget && - v->abi_version < SOF_ABI_VER(3, 19, 0)) { - swidget->use_count = 0; - swidget->complete = 0; - continue; - } - - ret = sof_widget_free(sdev, swidget); - if (ret < 0) - return ret; - } - - /* - * Tear down all pipelines associated with PCMs that did not get suspended - * and unset the prepare flag so that they can be set up again during resume. - * Skip this step for older firmware. - */ - if (!verify && v->abi_version >= SOF_ABI_VER(3, 19, 0)) { - ret = sof_tear_down_left_over_pipelines(sdev); - if (ret < 0) { - dev_err(sdev->dev, "failed to tear down paused pipelines\n"); - return ret; - } - } - - list_for_each_entry(sroute, &sdev->route_list, list) - sroute->setup = false; - - return 0; -} - /* * Generic object lookup APIs. */ @@ -1027,10 +500,9 @@ int sof_machine_check(struct snd_sof_dev *sdev) if (!IS_ENABLED(CONFIG_SND_SOC_SOF_FORCE_NOCODEC_MODE)) { /* find machine */ - mach = snd_sof_machine_select(sdev); - if (mach) { - sof_pdata->machine = mach; - snd_sof_set_mach_params(mach, sdev); + snd_sof_machine_select(sdev); + if (sof_pdata->machine) { + snd_sof_set_mach_params(sof_pdata->machine, sdev); return 0; } @@ -1052,7 +524,7 @@ int sof_machine_check(struct snd_sof_dev *sdev) sof_pdata->tplg_filename = desc->nocodec_tplg_filename; sof_pdata->machine = mach; - snd_sof_set_mach_params(mach, sdev); + snd_sof_set_mach_params(sof_pdata->machine, sdev); return 0; } diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h index f3009e6b91..dc274e63ed 100644 --- a/sound/soc/sof/sof-audio.h +++ b/sound/soc/sof/sof-audio.h @@ -28,17 +28,13 @@ #define DMA_CHAN_INVALID 0xFFFFFFFF -#define WIDGET_IS_DAI(id) ((id) == snd_soc_dapm_dai_in || (id) == snd_soc_dapm_dai_out) - /* PCM stream, mapped to FW component */ struct snd_sof_pcm_stream { u32 comp_id; struct snd_dma_buffer page_table; struct sof_ipc_stream_posn posn; struct snd_pcm_substream *substream; - struct snd_compr_stream *cstream; struct work_struct period_elapsed_work; - struct snd_soc_dapm_widget_list *list; /* list of connected DAPM widgets */ bool d0i3_compatible; /* DSP can be in D0I3 when this pcm is opened */ /* * flag to indicate that the DSP pipelines should be kept @@ -70,45 +66,28 @@ struct snd_sof_control { int min_volume_step; /* min volume step for volume_table */ int max_volume_step; /* max volume step for volume_table */ int num_channels; - unsigned int access; u32 readback_offset; /* offset to mmapped data if used */ struct sof_ipc_ctrl_data *control_data; u32 size; /* cdata size */ + enum sof_ipc_ctrl_cmd cmd; u32 *volume_table; /* volume table computed from tlv data*/ struct list_head list; /* list in sdev control list */ struct snd_sof_led_control led_ctl; - - /* if true, the control's data needs to be updated from Firmware */ - bool comp_data_dirty; }; -struct snd_sof_widget; - /* ASoC SOF DAPM widget */ struct snd_sof_widget { struct snd_soc_component *scomp; int comp_id; int pipeline_id; int complete; - int use_count; /* use_count will be protected by the PCM mutex held by the core */ int core; int id; - /* - * Flag indicating if the widget should be set up dynamically when a PCM is opened. - * This flag is only set for the scheduler type widget in topology. During topology - * loading, this flag is propagated to all the widgets belonging to the same pipeline. - * When this flag is not set, a widget is set up at the time of topology loading - * and retained until the DSP enters D3. It will need to be set up again when resuming - * from D3. - */ - bool dynamic_pipeline_widget; - struct snd_soc_dapm_widget *widget; struct list_head list; /* list in sdev widget list */ - struct snd_sof_widget *pipe_widget; /* extended data for UUID components */ struct sof_ipc_comp_ext comp_ext; @@ -122,9 +101,6 @@ struct snd_sof_route { struct snd_soc_dapm_route *route; struct list_head list; /* list in sdev route list */ - struct snd_sof_widget *src_widget; - struct snd_sof_widget *sink_widget; - bool setup; void *private; }; @@ -133,11 +109,11 @@ struct snd_sof_route { struct snd_sof_dai { struct snd_soc_component *scomp; const char *name; + const char *cpu_dai_name; struct sof_ipc_comp_dai comp_dai; int number_configs; int current_config; - bool configured; /* DAI configured during BE hw_params */ struct sof_ipc_dai_config *dai_config; struct list_head list; /* list in sdev dai list */ }; @@ -172,8 +148,6 @@ int snd_sof_bytes_ext_get(struct snd_kcontrol *kcontrol, unsigned int size); int snd_sof_bytes_ext_volatile_get(struct snd_kcontrol *kcontrol, unsigned int __user *binary_data, unsigned int size); -void snd_sof_control_notify(struct snd_sof_dev *sdev, - struct sof_ipc_ctrl_data *cdata); /* * Topology. @@ -181,9 +155,15 @@ void snd_sof_control_notify(struct snd_sof_dev *sdev, * be freed by snd_soc_unregister_component, */ int snd_sof_load_topology(struct snd_soc_component *scomp, const char *file); -int snd_sof_complete_pipeline(struct snd_sof_dev *sdev, +int snd_sof_complete_pipeline(struct device *dev, struct snd_sof_widget *swidget); +int sof_load_pipeline_ipc(struct device *dev, + struct sof_ipc_pipe_new *pipeline, + struct sof_ipc_comp_reply *r); +int sof_pipeline_core_enable(struct snd_sof_dev *sdev, + const struct snd_sof_widget *swidget); + /* * Stream IPC */ @@ -225,27 +205,22 @@ struct snd_sof_pcm *snd_sof_find_spcm_pcm_id(struct snd_soc_component *scomp, const struct sof_ipc_pipe_new *snd_sof_pipeline_find(struct snd_sof_dev *sdev, int pipeline_id); void snd_sof_pcm_period_elapsed(struct snd_pcm_substream *substream); -void snd_sof_pcm_init_elapsed_work(struct work_struct *work); - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMPRESS) -void snd_sof_compr_fragment_elapsed(struct snd_compr_stream *cstream); -void snd_sof_compr_init_elapsed_work(struct work_struct *work); -#else -static inline void snd_sof_compr_fragment_elapsed(struct snd_compr_stream *cstream) { } -static inline void snd_sof_compr_init_elapsed_work(struct work_struct *work) { } -#endif +void snd_sof_pcm_period_elapsed_work(struct work_struct *work); /* * Mixer IPC */ -int snd_sof_ipc_set_get_comp_data(struct snd_sof_control *scontrol, bool set); +int snd_sof_ipc_set_get_comp_data(struct snd_sof_control *scontrol, + u32 ipc_cmd, + enum sof_ipc_ctrl_type ctrl_type, + enum sof_ipc_ctrl_cmd ctrl_cmd, + bool send); /* DAI link fixup */ int sof_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params); /* PM */ -int sof_set_up_pipelines(struct snd_sof_dev *sdev, bool verify); -int sof_tear_down_pipelines(struct snd_sof_dev *sdev, bool verify); +int sof_restore_pipelines(struct device *dev); int sof_set_hw_params_upon_resume(struct device *dev); bool snd_sof_stream_suspend_ignored(struct snd_sof_dev *sdev); bool snd_sof_dsp_only_d0i3_compatible_stream_active(struct snd_sof_dev *sdev); @@ -254,14 +229,4 @@ bool snd_sof_dsp_only_d0i3_compatible_stream_active(struct snd_sof_dev *sdev); int sof_machine_register(struct snd_sof_dev *sdev, void *pdata); void sof_machine_unregister(struct snd_sof_dev *sdev, void *pdata); -int sof_widget_setup(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget); -int sof_widget_free(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget); - -/* PCM */ -int sof_widget_list_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm, int dir); -int sof_widget_list_free(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm, int dir); -int sof_pcm_dsp_pcm_free(struct snd_pcm_substream *substream, struct snd_sof_dev *sdev, - struct snd_sof_pcm *spcm); -int sof_pcm_stream_free(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream, - struct snd_sof_pcm *spcm, int dir, bool free_widget_list); #endif diff --git a/sound/soc/sof/sof-of-dev.c b/sound/soc/sof/sof-of-dev.c index e3718638f9..d1a21edfa0 100644 --- a/sound/soc/sof/sof-of-dev.c +++ b/sound/soc/sof/sof-of-dev.c @@ -7,41 +7,65 @@ #include #include -#include #include #include -#include "sof-of-dev.h" #include "ops.h" -static char *fw_path; -module_param(fw_path, charp, 0444); -MODULE_PARM_DESC(fw_path, "alternate path for SOF firmware."); +extern struct snd_sof_dsp_ops sof_imx8_ops; +extern struct snd_sof_dsp_ops sof_imx8x_ops; +extern struct snd_sof_dsp_ops sof_imx8m_ops; -static char *tplg_path; -module_param(tplg_path, charp, 0444); -MODULE_PARM_DESC(tplg_path, "alternate path for SOF topology."); +/* platform specific devices */ +#if IS_ENABLED(CONFIG_SND_SOC_SOF_IMX8) +static struct sof_dev_desc sof_of_imx8qxp_desc = { + .default_fw_path = "imx/sof", + .default_tplg_path = "imx/sof-tplg", + .default_fw_filename = "sof-imx8x.ri", + .nocodec_tplg_filename = "sof-imx8-nocodec.tplg", + .ops = &sof_imx8x_ops, +}; -const struct dev_pm_ops sof_of_pm = { +static struct sof_dev_desc sof_of_imx8qm_desc = { + .default_fw_path = "imx/sof", + .default_tplg_path = "imx/sof-tplg", + .default_fw_filename = "sof-imx8.ri", + .nocodec_tplg_filename = "sof-imx8-nocodec.tplg", + .ops = &sof_imx8_ops, +}; +#endif + +#if IS_ENABLED(CONFIG_SND_SOC_SOF_IMX8M) +static struct sof_dev_desc sof_of_imx8mp_desc = { + .default_fw_path = "imx/sof", + .default_tplg_path = "imx/sof-tplg", + .default_fw_filename = "sof-imx8m.ri", + .nocodec_tplg_filename = "sof-imx8-nocodec.tplg", + .ops = &sof_imx8m_ops, +}; +#endif + +static const struct dev_pm_ops sof_of_pm = { .prepare = snd_sof_prepare, .complete = snd_sof_complete, SET_SYSTEM_SLEEP_PM_OPS(snd_sof_suspend, snd_sof_resume) SET_RUNTIME_PM_OPS(snd_sof_runtime_suspend, snd_sof_runtime_resume, NULL) }; -EXPORT_SYMBOL(sof_of_pm); static void sof_of_probe_complete(struct device *dev) { /* allow runtime_pm */ pm_runtime_set_autosuspend_delay(dev, SND_SOF_SUSPEND_DELAY_MS); pm_runtime_use_autosuspend(dev); - pm_runtime_mark_last_busy(dev); pm_runtime_set_active(dev); pm_runtime_enable(dev); + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } -int sof_of_probe(struct platform_device *pdev) +static int sof_of_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; const struct sof_dev_desc *desc; @@ -66,15 +90,9 @@ int sof_of_probe(struct platform_device *pdev) sof_pdata->dev = &pdev->dev; sof_pdata->fw_filename = desc->default_fw_filename; - if (fw_path) - sof_pdata->fw_filename_prefix = fw_path; - else - sof_pdata->fw_filename_prefix = sof_pdata->desc->default_fw_path; - - if (tplg_path) - sof_pdata->tplg_filename_prefix = tplg_path; - else - sof_pdata->tplg_filename_prefix = sof_pdata->desc->default_tplg_path; + /* TODO: read alternate fw and tplg filenames from DT */ + sof_pdata->fw_filename_prefix = sof_pdata->desc->default_fw_path; + sof_pdata->tplg_filename_prefix = sof_pdata->desc->default_tplg_path; /* set callback to be called on successful device probe to enable runtime_pm */ sof_pdata->sof_probe_complete = sof_of_probe_complete; @@ -82,9 +100,8 @@ int sof_of_probe(struct platform_device *pdev) /* call sof helper for DSP hardware probe */ return snd_sof_device_probe(dev, sof_pdata); } -EXPORT_SYMBOL(sof_of_probe); -int sof_of_remove(struct platform_device *pdev) +static int sof_of_remove(struct platform_device *pdev) { pm_runtime_disable(&pdev->dev); @@ -93,6 +110,29 @@ int sof_of_remove(struct platform_device *pdev) return 0; } -EXPORT_SYMBOL(sof_of_remove); + +static const struct of_device_id sof_of_ids[] = { +#if IS_ENABLED(CONFIG_SND_SOC_SOF_IMX8) + { .compatible = "fsl,imx8qxp-dsp", .data = &sof_of_imx8qxp_desc}, + { .compatible = "fsl,imx8qm-dsp", .data = &sof_of_imx8qm_desc}, +#endif +#if IS_ENABLED(CONFIG_SND_SOC_SOF_IMX8M) + { .compatible = "fsl,imx8mp-dsp", .data = &sof_of_imx8mp_desc}, +#endif + { } +}; +MODULE_DEVICE_TABLE(of, sof_of_ids); + +/* DT driver definition */ +static struct platform_driver snd_sof_of_driver = { + .probe = sof_of_probe, + .remove = sof_of_remove, + .driver = { + .name = "sof-audio-of", + .pm = &sof_of_pm, + .of_match_table = sof_of_ids, + }, +}; +module_platform_driver(snd_sof_of_driver); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index 20c6ca37db..bc9e707656 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -59,23 +59,22 @@ static const struct dmi_system_id sof_tplg_table[] = { }, .driver_data = "sof-adl-rt5682-ssp0-max98373-ssp2.tplg", }, - { - .callback = sof_tplg_cb, - .matches = { - DMI_MATCH(DMI_PRODUCT_FAMILY, "Google_Brya"), - DMI_MATCH(DMI_OEM_STRING, "AUDIO-MAX98390_ALC5682I_I2S"), - }, - .driver_data = "sof-adl-max98390-ssp2-rt5682-ssp0.tplg", - }, - {} }; static const struct dmi_system_id community_key_platforms[] = { { - .ident = "Up boards", + .ident = "Up Squared", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "AAEON"), + DMI_MATCH(DMI_BOARD_NAME, "UP-APL01"), + } + }, + { + .ident = "Up Extreme", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "AAEON"), + DMI_MATCH(DMI_BOARD_NAME, "UP-WHL01"), } }, { diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h index 087935192c..fd8423172d 100644 --- a/sound/soc/sof/sof-priv.h +++ b/sound/soc/sof/sof-priv.h @@ -20,31 +20,19 @@ #include #include -/* Flag definitions used in sof_core_debug (sof_debug module parameter) */ +/* debug flags */ #define SOF_DBG_ENABLE_TRACE BIT(0) #define SOF_DBG_RETAIN_CTX BIT(1) /* prevent DSP D3 on FW exception */ -#define SOF_DBG_VERIFY_TPLG BIT(2) /* verify topology during load */ -#define SOF_DBG_DYNAMIC_PIPELINES_OVERRIDE BIT(3) /* 0: use topology token - * 1: override topology - */ -#define SOF_DBG_DYNAMIC_PIPELINES_ENABLE BIT(4) /* 0: use static pipelines - * 1: use dynamic pipelines - */ -#define SOF_DBG_DISABLE_MULTICORE BIT(5) /* schedule all pipelines/widgets - * on primary core - */ -#define SOF_DBG_PRINT_ALL_DUMPS BIT(6) /* Print all ipc and dsp dumps */ -/* Flag definitions used for controlling the DSP dump behavior */ #define SOF_DBG_DUMP_REGS BIT(0) #define SOF_DBG_DUMP_MBOX BIT(1) #define SOF_DBG_DUMP_TEXT BIT(2) #define SOF_DBG_DUMP_PCI BIT(3) -/* Output this dump (at the DEBUG level) only when SOF_DBG_PRINT_ALL_DUMPS is set */ -#define SOF_DBG_DUMP_OPTIONAL BIT(4) +#define SOF_DBG_DUMP_FORCE_ERR_LEVEL BIT(4) /* used to dump dsp status with error log level */ + /* global debug state set by SOF_DBG_ flags */ -bool sof_debug_check_flag(int mask); +extern int sof_core_debug; /* max BARs mmaped devices can use */ #define SND_SOF_BARS 8 @@ -73,9 +61,6 @@ bool sof_debug_check_flag(int mask); /* So far the primary core on all DSPs has ID 0 */ #define SOF_DSP_PRIMARY_CORE 0 -/* max number of DSP cores */ -#define SOF_MAX_DSP_NUM_CORES 8 - /* DSP power state */ enum sof_dsp_power_states { SOF_DSP_PM_D0, @@ -98,16 +83,6 @@ enum sof_system_suspend_state { SOF_SUSPEND_S3, }; -enum sof_dfsentry_type { - SOF_DFSENTRY_TYPE_IOMEM = 0, - SOF_DFSENTRY_TYPE_BUF, -}; - -enum sof_debugfs_access_type { - SOF_DEBUGFS_ACCESS_ALWAYS = 0, - SOF_DEBUGFS_ACCESS_D0_ONLY, -}; - struct snd_sof_dev; struct snd_sof_ipc_msg; struct snd_sof_ipc; @@ -132,8 +107,10 @@ struct snd_sof_dsp_ops { int (*run)(struct snd_sof_dev *sof_dev); /* mandatory */ int (*stall)(struct snd_sof_dev *sof_dev, unsigned int core_mask); /* optional */ int (*reset)(struct snd_sof_dev *sof_dev); /* optional */ - int (*core_get)(struct snd_sof_dev *sof_dev, int core); /* optional */ - int (*core_put)(struct snd_sof_dev *sof_dev, int core); /* optional */ + int (*core_power_up)(struct snd_sof_dev *sof_dev, + unsigned int core_mask); /* optional */ + int (*core_power_down)(struct snd_sof_dev *sof_dev, + unsigned int core_mask); /* optional */ /* * Register IO: only used by respective drivers themselves, @@ -150,20 +127,12 @@ struct snd_sof_dsp_ops { void __iomem *addr); /* optional */ /* memcpy IO */ - int (*block_read)(struct snd_sof_dev *sof_dev, - enum snd_sof_fw_blk_type type, u32 offset, - void *dest, size_t size); /* mandatory */ - int (*block_write)(struct snd_sof_dev *sof_dev, - enum snd_sof_fw_blk_type type, u32 offset, - void *src, size_t size); /* mandatory */ - - /* Mailbox IO */ - void (*mailbox_read)(struct snd_sof_dev *sof_dev, - u32 offset, void *dest, - size_t size); /* optional */ - void (*mailbox_write)(struct snd_sof_dev *sof_dev, - u32 offset, void *src, - size_t size); /* optional */ + void (*block_read)(struct snd_sof_dev *sof_dev, u32 bar, + u32 offset, void *dest, + size_t size); /* mandatory */ + void (*block_write)(struct snd_sof_dev *sof_dev, u32 bar, + u32 offset, void *src, + size_t size); /* mandatory */ /* doorbell */ irqreturn_t (*irq_handler)(int irq, void *context); /* optional */ @@ -209,9 +178,6 @@ struct snd_sof_dsp_ops { snd_pcm_uframes_t (*pcm_pointer)(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream); /* optional */ - /* pcm ack */ - int (*pcm_ack)(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream); /* optional */ - #if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_PROBES) /* Except for probe_pointer, all probe ops are mandatory */ int (*probe_assign)(struct snd_sof_dev *sdev, @@ -234,9 +200,9 @@ struct snd_sof_dsp_ops { #endif /* host read DSP stream data */ - int (*ipc_msg_data)(struct snd_sof_dev *sdev, - struct snd_pcm_substream *substream, - void *p, size_t sz); /* mandatory */ + void (*ipc_msg_data)(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream, + void *p, size_t sz); /* mandatory */ /* host configure DSP HW parameters */ int (*ipc_pcm_params)(struct snd_sof_dev *sdev, @@ -271,10 +237,6 @@ struct snd_sof_dsp_ops { void (*dbg_dump)(struct snd_sof_dev *sof_dev, u32 flags); /* optional */ void (*ipc_dump)(struct snd_sof_dev *sof_dev); /* optional */ - int (*debugfs_add_region_item)(struct snd_sof_dev *sdev, - enum snd_sof_fw_blk_type blk_type, u32 offset, - size_t size, const char *name, - enum sof_debugfs_access_type access_type); /* optional */ /* host DMA trace initialization */ int (*trace_init)(struct snd_sof_dev *sdev, @@ -295,8 +257,8 @@ struct snd_sof_dsp_ops { void *pdata); /* optional */ void (*machine_unregister)(struct snd_sof_dev *sdev, void *pdata); /* optional */ - struct snd_soc_acpi_mach * (*machine_select)(struct snd_sof_dev *sdev); /* optional */ - void (*set_mach_params)(struct snd_soc_acpi_mach *mach, + void (*machine_select)(struct snd_sof_dev *sdev); /* optional */ + void (*set_mach_params)(const struct snd_soc_acpi_mach *mach, struct snd_sof_dev *sdev); /* optional */ /* DAI ops */ @@ -306,17 +268,33 @@ struct snd_sof_dsp_ops { /* ALSA HW info flags, will be stored in snd_pcm_runtime.hw.info */ u32 hw_info; - const struct dsp_arch_ops *dsp_arch_ops; + const struct sof_arch_ops *arch_ops; }; /* DSP architecture specific callbacks for oops and stack dumps */ -struct dsp_arch_ops { - void (*dsp_oops)(struct snd_sof_dev *sdev, const char *level, void *oops); - void (*dsp_stack)(struct snd_sof_dev *sdev, const char *level, void *oops, +struct sof_arch_ops { + void (*dsp_oops)(struct snd_sof_dev *sdev, void *oops); + void (*dsp_stack)(struct snd_sof_dev *sdev, void *oops, u32 *stack, u32 stack_words); }; -#define sof_dsp_arch_ops(sdev) ((sdev)->pdata->desc->ops->dsp_arch_ops) +#define sof_arch_ops(sdev) ((sdev)->pdata->desc->ops->arch_ops) + +/* DSP device HW descriptor mapping between bus ID and ops */ +struct sof_ops_table { + const struct sof_dev_desc *desc; + const struct snd_sof_dsp_ops *ops; +}; + +enum sof_dfsentry_type { + SOF_DFSENTRY_TYPE_IOMEM = 0, + SOF_DFSENTRY_TYPE_BUF, +}; + +enum sof_debugfs_access_type { + SOF_DEBUGFS_ACCESS_ALWAYS = 0, + SOF_DEBUGFS_ACCESS_D0_ONLY, +}; /* FS entry for debug files that can expose DSP memories, registers */ struct snd_sof_dfsentry { @@ -331,10 +309,6 @@ struct snd_sof_dfsentry { enum sof_debugfs_access_type access_type; #if ENABLE_DEBUGFS_CACHEBUF char *cache_buf; /* buffer to cache the contents of debugfs memory */ -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_IPC_MSG_INJECTOR) - void *msg_inject_tx; - void *msg_inject_rx; #endif struct snd_sof_dev *sdev; struct list_head list; /* list in sdev dfsentry list */ @@ -377,6 +351,15 @@ struct snd_sof_ipc_msg { bool ipc_complete; }; +enum snd_sof_fw_state { + SOF_FW_BOOT_NOT_STARTED = 0, + SOF_FW_BOOT_PREPARE, + SOF_FW_BOOT_IN_PROGRESS, + SOF_FW_BOOT_FAILED, + SOF_FW_BOOT_READY_FAILED, /* firmware booted but fw_ready op failed */ + SOF_FW_BOOT_COMPLETE, +}; + /* * SOF Device Level. */ @@ -401,7 +384,7 @@ struct snd_sof_dev { /* DSP firmware boot */ wait_queue_head_t boot_wait; - enum sof_fw_state fw_state; + enum snd_sof_fw_state fw_state; bool first_boot; /* work queue in case the probe is implemented in two steps */ @@ -430,8 +413,6 @@ struct snd_sof_dev { /* debug */ struct dentry *debugfs_root; struct list_head dfsentry_list; - bool dbg_dump_printed; - bool ipc_dump_printed; /* firmware loader */ struct snd_dma_buffer dmab; @@ -474,18 +455,6 @@ struct snd_sof_dev { bool msi_enabled; - /* DSP core context */ - u32 num_cores; - - /* - * ref count per core that will be modified during system suspend/resume and during pcm - * hw_params/hw_free. This doesn't need to be protected with a mutex because pcm - * hw_params/hw_free are already protected by the PCM mutex in the ALSA framework in - * sound/core/ when streams are active and during system suspend/resume, streams are - * already suspended. - */ - int dsp_core_ref_count[SOF_MAX_DSP_NUM_CORES]; - void *private; /* core does not touch this */ }; @@ -516,23 +485,27 @@ int snd_sof_create_page_table(struct device *dev, /* * Firmware loading. */ +int snd_sof_load_firmware(struct snd_sof_dev *sdev); int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev); int snd_sof_load_firmware_memcpy(struct snd_sof_dev *sdev); int snd_sof_run_firmware(struct snd_sof_dev *sdev); int snd_sof_parse_module_memcpy(struct snd_sof_dev *sdev, struct snd_sof_mod_hdr *module); void snd_sof_fw_unload(struct snd_sof_dev *sdev); +int snd_sof_fw_parse_ext_data(struct snd_sof_dev *sdev, u32 bar, u32 offset); /* * IPC low level APIs. */ struct snd_sof_ipc *snd_sof_ipc_init(struct snd_sof_dev *sdev); void snd_sof_ipc_free(struct snd_sof_dev *sdev); -void snd_sof_ipc_get_reply(struct snd_sof_dev *sdev); void snd_sof_ipc_reply(struct snd_sof_dev *sdev, u32 msg_id); void snd_sof_ipc_msgs_rx(struct snd_sof_dev *sdev); int snd_sof_ipc_stream_pcm_params(struct snd_sof_dev *sdev, struct sof_ipc_pcm_params *params); +int snd_sof_dsp_mailbox_init(struct snd_sof_dev *sdev, u32 dspbox, + size_t dspbox_size, u32 hostbox, + size_t hostbox_size); int snd_sof_ipc_valid(struct snd_sof_dev *sdev); int sof_ipc_tx_message(struct snd_sof_ipc *ipc, u32 header, void *msg_data, size_t msg_bytes, void *reply_data, @@ -540,12 +513,6 @@ int sof_ipc_tx_message(struct snd_sof_ipc *ipc, u32 header, int sof_ipc_tx_message_no_pm(struct snd_sof_ipc *ipc, u32 header, void *msg_data, size_t msg_bytes, void *reply_data, size_t reply_bytes); -int sof_ipc_init_msg_memory(struct snd_sof_dev *sdev); -static inline void snd_sof_ipc_process_reply(struct snd_sof_dev *sdev, u32 msg_id) -{ - snd_sof_ipc_get_reply(sdev); - snd_sof_ipc_reply(sdev, msg_id); -} /* * Trace/debug @@ -555,53 +522,45 @@ void snd_sof_release_trace(struct snd_sof_dev *sdev); void snd_sof_free_trace(struct snd_sof_dev *sdev); int snd_sof_dbg_init(struct snd_sof_dev *sdev); void snd_sof_free_debug(struct snd_sof_dev *sdev); +int snd_sof_debugfs_io_item(struct snd_sof_dev *sdev, + void __iomem *base, size_t size, + const char *name, + enum sof_debugfs_access_type access_type); int snd_sof_debugfs_buf_item(struct snd_sof_dev *sdev, void *base, size_t size, const char *name, mode_t mode); int snd_sof_trace_update_pos(struct snd_sof_dev *sdev, struct sof_ipc_dma_trace_posn *posn); void snd_sof_trace_notify_for_error(struct snd_sof_dev *sdev); -void sof_print_oops_and_stack(struct snd_sof_dev *sdev, const char *level, - u32 panic_code, u32 tracep_code, void *oops, - struct sof_ipc_panic_info *panic_info, - void *stack, size_t stack_words); +void snd_sof_get_status(struct snd_sof_dev *sdev, u32 panic_code, + u32 tracep_code, void *oops, + struct sof_ipc_panic_info *panic_info, + void *stack, size_t stack_words); int snd_sof_init_trace_ipc(struct snd_sof_dev *sdev); void snd_sof_handle_fw_exception(struct snd_sof_dev *sdev); int snd_sof_dbg_memory_info_init(struct snd_sof_dev *sdev); -int snd_sof_debugfs_add_region_item_iomem(struct snd_sof_dev *sdev, - enum snd_sof_fw_blk_type blk_type, u32 offset, size_t size, - const char *name, enum sof_debugfs_access_type access_type); + +/* + * Platform specific ops. + */ +extern struct snd_compress_ops sof_compressed_ops; /* * DSP Architectures. */ -static inline void sof_stack(struct snd_sof_dev *sdev, const char *level, - void *oops, u32 *stack, u32 stack_words) +static inline void sof_stack(struct snd_sof_dev *sdev, void *oops, u32 *stack, + u32 stack_words) { - sof_dsp_arch_ops(sdev)->dsp_stack(sdev, level, oops, stack, - stack_words); + sof_arch_ops(sdev)->dsp_stack(sdev, oops, stack, stack_words); } -static inline void sof_oops(struct snd_sof_dev *sdev, const char *level, void *oops) +static inline void sof_oops(struct snd_sof_dev *sdev, void *oops) { - if (sof_dsp_arch_ops(sdev)->dsp_oops) - sof_dsp_arch_ops(sdev)->dsp_oops(sdev, level, oops); + if (sof_arch_ops(sdev)->dsp_oops) + sof_arch_ops(sdev)->dsp_oops(sdev, oops); } -extern const struct dsp_arch_ops sof_xtensa_arch_ops; - -/* - * Firmware state tracking - */ -static inline void sof_set_fw_state(struct snd_sof_dev *sdev, - enum sof_fw_state new_state) -{ - if (sdev->fw_state == new_state) - return; - - dev_dbg(sdev->dev, "fw_state change: %d -> %d\n", sdev->fw_state, new_state); - sdev->fw_state = new_state; -} +extern const struct sof_arch_ops sof_xtensa_arch_ops; /* * Utilities @@ -614,24 +573,33 @@ void sof_mailbox_write(struct snd_sof_dev *sdev, u32 offset, void *message, size_t bytes); void sof_mailbox_read(struct snd_sof_dev *sdev, u32 offset, void *message, size_t bytes); -int sof_block_write(struct snd_sof_dev *sdev, enum snd_sof_fw_blk_type blk_type, - u32 offset, void *src, size_t size); -int sof_block_read(struct snd_sof_dev *sdev, enum snd_sof_fw_blk_type blk_type, - u32 offset, void *dest, size_t size); +void sof_block_write(struct snd_sof_dev *sdev, u32 bar, u32 offset, void *src, + size_t size); +void sof_block_read(struct snd_sof_dev *sdev, u32 bar, u32 offset, void *dest, + size_t size); int sof_fw_ready(struct snd_sof_dev *sdev, u32 msg_id); -int sof_ipc_msg_data(struct snd_sof_dev *sdev, - struct snd_pcm_substream *substream, - void *p, size_t sz); -int sof_ipc_pcm_params(struct snd_sof_dev *sdev, - struct snd_pcm_substream *substream, - const struct sof_ipc_pcm_params_reply *reply); +void intel_ipc_msg_data(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream, + void *p, size_t sz); +int intel_ipc_pcm_params(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream, + const struct sof_ipc_pcm_params_reply *reply); -int sof_stream_pcm_open(struct snd_sof_dev *sdev, - struct snd_pcm_substream *substream); -int sof_stream_pcm_close(struct snd_sof_dev *sdev, - struct snd_pcm_substream *substream); +int intel_pcm_open(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream); +int intel_pcm_close(struct snd_sof_dev *sdev, + struct snd_pcm_substream *substream); int sof_machine_check(struct snd_sof_dev *sdev); + +#define sof_dev_dbg_or_err(dev, is_err, fmt, ...) \ + do { \ + if (is_err) \ + dev_err(dev, "error: " fmt, __VA_ARGS__); \ + else \ + dev_dbg(dev, fmt, __VA_ARGS__); \ + } while (0) + #endif diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index e72dcae5e7..1bb2dcf37f 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -376,10 +376,6 @@ static const struct sof_dai_types sof_dais[] = { {"ALH", SOF_DAI_INTEL_ALH}, {"SAI", SOF_DAI_IMX_SAI}, {"ESAI", SOF_DAI_IMX_ESAI}, - {"ACP", SOF_DAI_AMD_BT}, - {"ACPSP", SOF_DAI_AMD_SP}, - {"ACPDMIC", SOF_DAI_AMD_DMIC}, - {"AFE", SOF_DAI_MEDIATEK_AFE}, }; static enum sof_ipc_dai_type find_dai(const char *name) @@ -576,12 +572,6 @@ static const struct sof_topology_token sched_tokens[] = { offsetof(struct sof_ipc_pipe_new, time_domain), 0}, }; -static const struct sof_topology_token pipeline_tokens[] = { - {SOF_TKN_SCHED_DYNAMIC_PIPELINE, SND_SOC_TPLG_TUPLE_TYPE_BOOL, get_token_u16, - offsetof(struct snd_sof_widget, dynamic_pipeline_widget), 0}, - -}; - /* volume */ static const struct sof_topology_token volume_tokens[] = { {SOF_TKN_VOLUME_RAMP_STEP_TYPE, SND_SOC_TPLG_TUPLE_TYPE_WORD, @@ -807,19 +797,6 @@ static const struct sof_topology_token led_tokens[] = { get_token_u32, offsetof(struct snd_sof_led_control, direction), 0}, }; -/* AFE */ -static const struct sof_topology_token afe_tokens[] = { - {SOF_TKN_MEDIATEK_AFE_RATE, - SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32, - offsetof(struct sof_ipc_dai_mtk_afe_params, rate), 0}, - {SOF_TKN_MEDIATEK_AFE_CH, - SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32, - offsetof(struct sof_ipc_dai_mtk_afe_params, channels), 0}, - {SOF_TKN_MEDIATEK_AFE_FORMAT, - SND_SOC_TPLG_TUPLE_TYPE_STRING, get_token_comp_format, - offsetof(struct sof_ipc_dai_mtk_afe_params, format), 0}, -}; - static int sof_parse_uuid_tokens(struct snd_soc_component *scomp, void *object, const struct sof_topology_token *tokens, @@ -1090,11 +1067,11 @@ static int sof_control_load_volume(struct snd_soc_component *scomp, /* set cmd for mixer control */ if (le32_to_cpu(mc->max) == 1) { - scontrol->control_data->cmd = SOF_CTRL_CMD_SWITCH; + scontrol->cmd = SOF_CTRL_CMD_SWITCH; goto skip; } - scontrol->control_data->cmd = SOF_CTRL_CMD_VOLUME; + scontrol->cmd = SOF_CTRL_CMD_VOLUME; /* extract tlv data */ if (!kc->tlv.p || get_tlv_data(kc->tlv.p, tlv) < 0) { @@ -1165,7 +1142,7 @@ static int sof_control_load_enum(struct snd_soc_component *scomp, scontrol->comp_id = sdev->next_comp_id; scontrol->num_channels = le32_to_cpu(ec->num_channels); scontrol->control_data->index = kc->index; - scontrol->control_data->cmd = SOF_CTRL_CMD_ENUM; + scontrol->cmd = SOF_CTRL_CMD_ENUM; dev_dbg(scomp->dev, "tplg: load kcontrol index %d chans %d comp_id %d\n", scontrol->comp_id, scontrol->num_channels, scontrol->comp_id); @@ -1211,7 +1188,7 @@ static int sof_control_load_bytes(struct snd_soc_component *scomp, } scontrol->comp_id = sdev->next_comp_id; - scontrol->control_data->cmd = SOF_CTRL_CMD_BINARY; + scontrol->cmd = SOF_CTRL_CMD_BINARY; scontrol->control_data->index = kc->index; dev_dbg(scomp->dev, "tplg: load kcontrol index %d chans %d\n", @@ -1273,7 +1250,6 @@ static int sof_control_load(struct snd_soc_component *scomp, int index, return -ENOMEM; scontrol->scomp = scomp; - scontrol->access = kc->access; switch (le32_to_cpu(hdr->ops.info)) { case SND_SOC_TPLG_CTL_VOLSW: @@ -1346,6 +1322,69 @@ static int sof_control_unload(struct snd_soc_component *scomp, * DAI Topology */ +/* Static DSP core power management so far, should be extended in the future */ +static int sof_core_enable(struct snd_sof_dev *sdev, int core) +{ + struct sof_ipc_pm_core_config pm_core_config = { + .hdr = { + .cmd = SOF_IPC_GLB_PM_MSG | SOF_IPC_PM_CORE_ENABLE, + .size = sizeof(pm_core_config), + }, + .enable_mask = sdev->enabled_cores_mask | BIT(core), + }; + int ret; + + if (sdev->enabled_cores_mask & BIT(core)) + return 0; + + /* power up the core if it is host managed */ + ret = snd_sof_dsp_core_power_up(sdev, BIT(core)); + if (ret < 0) { + dev_err(sdev->dev, "error: %d powering up core %d\n", + ret, core); + return ret; + } + + /* Now notify DSP */ + ret = sof_ipc_tx_message(sdev->ipc, pm_core_config.hdr.cmd, + &pm_core_config, sizeof(pm_core_config), + &pm_core_config, sizeof(pm_core_config)); + if (ret < 0) { + dev_err(sdev->dev, "error: core %d enable ipc failure %d\n", + core, ret); + goto err; + } + return ret; +err: + /* power down core if it is host managed and return the original error if this fails too */ + if (snd_sof_dsp_core_power_down(sdev, BIT(core)) < 0) + dev_err(sdev->dev, "error: powering down core %d\n", core); + + return ret; +} + +int sof_pipeline_core_enable(struct snd_sof_dev *sdev, + const struct snd_sof_widget *swidget) +{ + const struct sof_ipc_pipe_new *pipeline; + int ret; + + if (swidget->id == snd_soc_dapm_scheduler) { + pipeline = swidget->private; + } else { + pipeline = snd_sof_pipeline_find(sdev, swidget->pipeline_id); + if (!pipeline) + return -ENOENT; + } + + /* First enable the pipeline core */ + ret = sof_core_enable(sdev, pipeline->core); + if (ret < 0) + return ret; + + return sof_core_enable(sdev, swidget->core); +} + static int sof_connect_dai_widget(struct snd_soc_component *scomp, struct snd_soc_dapm_widget *w, struct snd_soc_tplg_dapm_widget *tw, @@ -1473,8 +1512,10 @@ static struct sof_ipc_comp *sof_comp_alloc(struct snd_sof_widget *swidget, static int sof_widget_load_dai(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r, struct snd_sof_dai *dai) { + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct snd_soc_tplg_private *private = &tw->priv; struct sof_ipc_comp_dai *comp_dai; size_t ipc_size = sizeof(*comp_dai); @@ -1511,7 +1552,10 @@ static int sof_widget_load_dai(struct snd_soc_component *scomp, int index, swidget->widget->name, comp_dai->type, comp_dai->dai_index); sof_dbg_comp_config(scomp, &comp_dai->config); - if (dai) { + ret = sof_ipc_tx_message(sdev->ipc, comp_dai->comp.hdr.cmd, + comp_dai, ipc_size, r, sizeof(*r)); + + if (ret == 0 && dai) { dai->scomp = scomp; /* @@ -1533,8 +1577,10 @@ static int sof_widget_load_dai(struct snd_soc_component *scomp, int index, static int sof_widget_load_buffer(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, - struct snd_soc_tplg_dapm_widget *tw) + struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r) { + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct snd_soc_tplg_private *private = &tw->priv; struct sof_ipc_buffer *buffer; int ret; @@ -1566,7 +1612,15 @@ static int sof_widget_load_buffer(struct snd_soc_component *scomp, int index, swidget->private = buffer; - return 0; + ret = sof_ipc_tx_message(sdev->ipc, buffer->comp.hdr.cmd, buffer, + sizeof(*buffer), r, sizeof(*r)); + if (ret < 0) { + dev_err(scomp->dev, "error: buffer %s load failed\n", + swidget->widget->name); + kfree(buffer); + } + + return ret; } /* bind PCM ID to host component ID */ @@ -1595,8 +1649,10 @@ static int spcm_bind(struct snd_soc_component *scomp, struct snd_sof_pcm *spcm, static int sof_widget_load_pcm(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, enum sof_ipc_stream_direction dir, - struct snd_soc_tplg_dapm_widget *tw) + struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r) { + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct snd_soc_tplg_private *private = &tw->priv; struct sof_ipc_comp_host *host; size_t ipc_size = sizeof(*host); @@ -1635,7 +1691,10 @@ static int sof_widget_load_pcm(struct snd_soc_component *scomp, int index, swidget->private = host; - return 0; + ret = sof_ipc_tx_message(sdev->ipc, host->comp.hdr.cmd, host, + ipc_size, r, sizeof(*r)); + if (ret >= 0) + return ret; err: kfree(host); return ret; @@ -1644,9 +1703,28 @@ static int sof_widget_load_pcm(struct snd_soc_component *scomp, int index, /* * Pipeline Topology */ +int sof_load_pipeline_ipc(struct device *dev, + struct sof_ipc_pipe_new *pipeline, + struct sof_ipc_comp_reply *r) +{ + struct snd_sof_dev *sdev = dev_get_drvdata(dev); + int ret = sof_core_enable(sdev, pipeline->core); + + if (ret < 0) + return ret; + + ret = sof_ipc_tx_message(sdev->ipc, pipeline->hdr.cmd, pipeline, + sizeof(*pipeline), r, sizeof(*r)); + if (ret < 0) + dev_err(dev, "error: load pipeline ipc failure\n"); + + return ret; +} + static int sof_widget_load_pipeline(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, - struct snd_soc_tplg_dapm_widget *tw) + struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r) { struct snd_soc_tplg_private *private = &tw->priv; struct sof_ipc_pipe_new *pipeline; @@ -1686,30 +1764,16 @@ static int sof_widget_load_pipeline(struct snd_soc_component *scomp, int index, goto err; } - ret = sof_parse_tokens(scomp, swidget, pipeline_tokens, - ARRAY_SIZE(pipeline_tokens), private->array, - le32_to_cpu(private->size)); - if (ret != 0) { - dev_err(scomp->dev, "error: parse dynamic pipeline token failed %d\n", - private->size); - goto err; - } - - if (sof_debug_check_flag(SOF_DBG_DISABLE_MULTICORE)) - pipeline->core = SOF_DSP_PRIMARY_CORE; - - if (sof_debug_check_flag(SOF_DBG_DYNAMIC_PIPELINES_OVERRIDE)) - swidget->dynamic_pipeline_widget = - sof_debug_check_flag(SOF_DBG_DYNAMIC_PIPELINES_ENABLE); - - dev_dbg(scomp->dev, "pipeline %s: period %d pri %d mips %d core %d frames %d dynamic %d\n", + dev_dbg(scomp->dev, "pipeline %s: period %d pri %d mips %d core %d frames %d\n", swidget->widget->name, pipeline->period, pipeline->priority, - pipeline->period_mips, pipeline->core, pipeline->frames_per_sched, - swidget->dynamic_pipeline_widget); + pipeline->period_mips, pipeline->core, pipeline->frames_per_sched); swidget->private = pipeline; - return 0; + /* send ipc's to create pipeline comp and power up schedule core */ + ret = sof_load_pipeline_ipc(scomp->dev, pipeline, r); + if (ret >= 0) + return ret; err: kfree(pipeline); return ret; @@ -1721,8 +1785,10 @@ static int sof_widget_load_pipeline(struct snd_soc_component *scomp, int index, static int sof_widget_load_mixer(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, - struct snd_soc_tplg_dapm_widget *tw) + struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r) { + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct snd_soc_tplg_private *private = &tw->priv; struct sof_ipc_comp_mixer *mixer; size_t ipc_size = sizeof(*mixer); @@ -1751,7 +1817,12 @@ static int sof_widget_load_mixer(struct snd_soc_component *scomp, int index, swidget->private = mixer; - return 0; + ret = sof_ipc_tx_message(sdev->ipc, mixer->comp.hdr.cmd, mixer, + ipc_size, r, sizeof(*r)); + if (ret < 0) + kfree(mixer); + + return ret; } /* @@ -1759,8 +1830,10 @@ static int sof_widget_load_mixer(struct snd_soc_component *scomp, int index, */ static int sof_widget_load_mux(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, - struct snd_soc_tplg_dapm_widget *tw) + struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r) { + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct snd_soc_tplg_private *private = &tw->priv; struct sof_ipc_comp_mux *mux; size_t ipc_size = sizeof(*mux); @@ -1789,7 +1862,12 @@ static int sof_widget_load_mux(struct snd_soc_component *scomp, int index, swidget->private = mux; - return 0; + ret = sof_ipc_tx_message(sdev->ipc, mux->comp.hdr.cmd, mux, + ipc_size, r, sizeof(*r)); + if (ret < 0) + kfree(mux); + + return ret; } /* @@ -1798,7 +1876,8 @@ static int sof_widget_load_mux(struct snd_soc_component *scomp, int index, static int sof_widget_load_pga(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, - struct snd_soc_tplg_dapm_widget *tw) + struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r) { struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct snd_soc_tplg_private *private = &tw->priv; @@ -1858,7 +1937,10 @@ static int sof_widget_load_pga(struct snd_soc_component *scomp, int index, } } - return 0; + ret = sof_ipc_tx_message(sdev->ipc, volume->comp.hdr.cmd, volume, + ipc_size, r, sizeof(*r)); + if (ret >= 0) + return ret; err: kfree(volume); return ret; @@ -1870,8 +1952,10 @@ static int sof_widget_load_pga(struct snd_soc_component *scomp, int index, static int sof_widget_load_src(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, - struct snd_soc_tplg_dapm_widget *tw) + struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r) { + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct snd_soc_tplg_private *private = &tw->priv; struct sof_ipc_comp_src *src; size_t ipc_size = sizeof(*src); @@ -1910,7 +1994,10 @@ static int sof_widget_load_src(struct snd_soc_component *scomp, int index, swidget->private = src; - return 0; + ret = sof_ipc_tx_message(sdev->ipc, src->comp.hdr.cmd, src, + ipc_size, r, sizeof(*r)); + if (ret >= 0) + return ret; err: kfree(src); return ret; @@ -1922,8 +2009,10 @@ static int sof_widget_load_src(struct snd_soc_component *scomp, int index, static int sof_widget_load_asrc(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, - struct snd_soc_tplg_dapm_widget *tw) + struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r) { + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct snd_soc_tplg_private *private = &tw->priv; struct sof_ipc_comp_asrc *asrc; size_t ipc_size = sizeof(*asrc); @@ -1964,7 +2053,10 @@ static int sof_widget_load_asrc(struct snd_soc_component *scomp, int index, swidget->private = asrc; - return 0; + ret = sof_ipc_tx_message(sdev->ipc, asrc->comp.hdr.cmd, asrc, + ipc_size, r, sizeof(*r)); + if (ret >= 0) + return ret; err: kfree(asrc); return ret; @@ -1976,8 +2068,10 @@ static int sof_widget_load_asrc(struct snd_soc_component *scomp, int index, static int sof_widget_load_siggen(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, - struct snd_soc_tplg_dapm_widget *tw) + struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r) { + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct snd_soc_tplg_private *private = &tw->priv; struct sof_ipc_comp_tone *tone; size_t ipc_size = sizeof(*tone); @@ -2016,7 +2110,10 @@ static int sof_widget_load_siggen(struct snd_soc_component *scomp, int index, swidget->private = tone; - return 0; + ret = sof_ipc_tx_message(sdev->ipc, tone->comp.hdr.cmd, tone, + ipc_size, r, sizeof(*r)); + if (ret >= 0) + return ret; err: kfree(tone); return ret; @@ -2076,7 +2173,7 @@ static int sof_get_control_data(struct snd_soc_component *scomp, *size += wdata[i].pdata->size; /* get data type */ - switch (wdata[i].control->control_data->cmd) { + switch (wdata[i].control->cmd) { case SOF_CTRL_CMD_VOLUME: case SOF_CTRL_CMD_ENUM: case SOF_CTRL_CMD_SWITCH: @@ -2098,8 +2195,10 @@ static int sof_get_control_data(struct snd_soc_component *scomp, static int sof_process_load(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r, int type) { + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct snd_soc_dapm_widget *widget = swidget->widget; struct snd_soc_tplg_private *private = &tw->priv; struct sof_ipc_comp_process *process; @@ -2173,6 +2272,33 @@ static int sof_process_load(struct snd_soc_component *scomp, int index, process->size = ipc_data_size; swidget->private = process; + + ret = sof_ipc_tx_message(sdev->ipc, process->comp.hdr.cmd, process, + ipc_size, r, sizeof(*r)); + + if (ret < 0) { + dev_err(scomp->dev, "error: create process failed\n"); + goto err; + } + + /* we sent the data in single message so return */ + if (ipc_data_size) + goto out; + + /* send control data with large message supported method */ + for (i = 0; i < widget->num_kcontrols; i++) { + wdata[i].control->readback_offset = 0; + ret = snd_sof_ipc_set_get_comp_data(wdata[i].control, + wdata[i].ipc_cmd, + wdata[i].ctrl_type, + wdata[i].control->cmd, + true); + if (ret != 0) { + dev_err(scomp->dev, "error: send control failed\n"); + break; + } + } + err: if (ret < 0) kfree(process); @@ -2188,7 +2314,8 @@ static int sof_process_load(struct snd_soc_component *scomp, int index, static int sof_widget_load_process(struct snd_soc_component *scomp, int index, struct snd_sof_widget *swidget, - struct snd_soc_tplg_dapm_widget *tw) + struct snd_soc_tplg_dapm_widget *tw, + struct sof_ipc_comp_reply *r) { struct snd_soc_tplg_private *private = &tw->priv; struct sof_ipc_comp_process config; @@ -2214,7 +2341,8 @@ static int sof_widget_load_process(struct snd_soc_component *scomp, int index, } /* now load process specific data and send IPC */ - ret = sof_process_load(scomp, index, swidget, tw, find_process_comp_type(config.type)); + ret = sof_process_load(scomp, index, swidget, tw, r, + find_process_comp_type(config.type)); if (ret < 0) { dev_err(scomp->dev, "error: process loading failed\n"); return ret; @@ -2263,6 +2391,8 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct snd_sof_widget *swidget; struct snd_sof_dai *dai; + struct sof_ipc_comp_reply reply; + struct snd_sof_control *scontrol; struct sof_ipc_comp comp = { .core = SOF_DSP_PRIMARY_CORE, }; @@ -2279,6 +2409,7 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, swidget->id = w->id; swidget->pipeline_id = index; swidget->private = NULL; + memset(&reply, 0, sizeof(reply)); dev_dbg(scomp->dev, "tplg: ready widget id %d pipe %d type %d name : %s stream %s\n", swidget->comp_id, index, swidget->id, tw->name, @@ -2295,11 +2426,16 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, return ret; } - if (sof_debug_check_flag(SOF_DBG_DISABLE_MULTICORE)) - comp.core = SOF_DSP_PRIMARY_CORE; - swidget->core = comp.core; + /* default is primary core, safe to call for already enabled cores */ + ret = sof_core_enable(sdev, comp.core); + if (ret < 0) { + dev_err(scomp->dev, "error: enable core: %d\n", ret); + kfree(swidget); + return ret; + } + ret = sof_parse_tokens(scomp, &swidget->comp_ext, comp_ext_tokens, ARRAY_SIZE(comp_ext_tokens), tw->priv.array, le32_to_cpu(tw->priv.size)); @@ -2320,51 +2456,57 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, return -ENOMEM; } - ret = sof_widget_load_dai(scomp, index, swidget, tw, dai); - if (!ret) - ret = sof_connect_dai_widget(scomp, w, tw, dai); - if (ret < 0) { + ret = sof_widget_load_dai(scomp, index, swidget, tw, &reply, dai); + if (ret == 0) { + sof_connect_dai_widget(scomp, w, tw, dai); + list_add(&dai->list, &sdev->dai_list); + swidget->private = dai; + } else { kfree(dai); - break; } - list_add(&dai->list, &sdev->dai_list); - swidget->private = dai; break; case snd_soc_dapm_mixer: - ret = sof_widget_load_mixer(scomp, index, swidget, tw); + ret = sof_widget_load_mixer(scomp, index, swidget, tw, &reply); break; case snd_soc_dapm_pga: - ret = sof_widget_load_pga(scomp, index, swidget, tw); + ret = sof_widget_load_pga(scomp, index, swidget, tw, &reply); + /* Find scontrol for this pga and set readback offset*/ + list_for_each_entry(scontrol, &sdev->kcontrol_list, list) { + if (scontrol->comp_id == swidget->comp_id) { + scontrol->readback_offset = reply.offset; + break; + } + } break; case snd_soc_dapm_buffer: - ret = sof_widget_load_buffer(scomp, index, swidget, tw); + ret = sof_widget_load_buffer(scomp, index, swidget, tw, &reply); break; case snd_soc_dapm_scheduler: - ret = sof_widget_load_pipeline(scomp, index, swidget, tw); + ret = sof_widget_load_pipeline(scomp, index, swidget, tw, &reply); break; case snd_soc_dapm_aif_out: ret = sof_widget_load_pcm(scomp, index, swidget, - SOF_IPC_STREAM_CAPTURE, tw); + SOF_IPC_STREAM_CAPTURE, tw, &reply); break; case snd_soc_dapm_aif_in: ret = sof_widget_load_pcm(scomp, index, swidget, - SOF_IPC_STREAM_PLAYBACK, tw); + SOF_IPC_STREAM_PLAYBACK, tw, &reply); break; case snd_soc_dapm_src: - ret = sof_widget_load_src(scomp, index, swidget, tw); + ret = sof_widget_load_src(scomp, index, swidget, tw, &reply); break; case snd_soc_dapm_asrc: - ret = sof_widget_load_asrc(scomp, index, swidget, tw); + ret = sof_widget_load_asrc(scomp, index, swidget, tw, &reply); break; case snd_soc_dapm_siggen: - ret = sof_widget_load_siggen(scomp, index, swidget, tw); + ret = sof_widget_load_siggen(scomp, index, swidget, tw, &reply); break; case snd_soc_dapm_effect: - ret = sof_widget_load_process(scomp, index, swidget, tw); + ret = sof_widget_load_process(scomp, index, swidget, tw, &reply); break; case snd_soc_dapm_mux: case snd_soc_dapm_demux: - ret = sof_widget_load_mux(scomp, index, swidget, tw); + ret = sof_widget_load_mux(scomp, index, swidget, tw, &reply); break; case snd_soc_dapm_switch: case snd_soc_dapm_dai_link: @@ -2375,12 +2517,12 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, } /* check IPC reply */ - if (ret < 0) { + if (ret < 0 || reply.rhdr.error < 0) { dev_err(scomp->dev, - "error: failed to add widget id %d type %d name : %s stream %s\n", + "error: DSP failed to add widget id %d type %d name : %s stream %s reply %d\n", tw->shift, swidget->id, tw->name, strnlen(tw->sname, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) > 0 - ? tw->sname : "none"); + ? tw->sname : "none", reply.rhdr.error); kfree(swidget); return ret; } @@ -2422,8 +2564,10 @@ static int sof_route_unload(struct snd_soc_component *scomp, static int sof_widget_unload(struct snd_soc_component *scomp, struct snd_soc_dobj *dobj) { + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); const struct snd_kcontrol_new *kc; struct snd_soc_dapm_widget *widget; + struct sof_ipc_pipe_new *pipeline; struct snd_sof_control *scontrol; struct snd_sof_widget *swidget; struct soc_mixer_control *sm; @@ -2450,6 +2594,24 @@ static int sof_widget_unload(struct snd_soc_component *scomp, list_del(&dai->list); } break; + case snd_soc_dapm_scheduler: + + /* power down the pipeline schedule core */ + pipeline = swidget->private; + + /* + * Runtime PM should still function normally if topology loading fails and + * it's components are unloaded. Do not power down the primary core so that the + * CTX_SAVE IPC can succeed during runtime suspend. + */ + if (pipeline->core == SOF_DSP_PRIMARY_CORE) + break; + + ret = snd_sof_dsp_core_power_down(sdev, 1 << pipeline->core); + if (ret < 0) + dev_err(scomp->dev, "error: powering down pipeline schedule core %d\n", + pipeline->core); + break; default: break; } @@ -2518,10 +2680,8 @@ static int sof_dai_load(struct snd_soc_component *scomp, int index, for_each_pcm_streams(stream) { spcm->stream[stream].comp_id = COMP_ID_UNASSIGNED; - if (pcm->compress) - snd_sof_compr_init_elapsed_work(&spcm->stream[stream].period_elapsed_work); - else - snd_sof_pcm_init_elapsed_work(&spcm->stream[stream].period_elapsed_work); + INIT_WORK(&spcm->stream[stream].period_elapsed_work, + snd_sof_pcm_period_elapsed_work); } spcm->pcm = *pcm; @@ -2677,6 +2837,9 @@ static int sof_set_dai_config_multi(struct snd_sof_dev *sdev, u32 size, continue; if (strcmp(link->name, dai->name) == 0) { + struct sof_ipc_reply reply; + int ret; + /* * the same dai config will be applied to all DAIs in * the same dai link. We have to ensure that the ipc @@ -2688,6 +2851,18 @@ static int sof_set_dai_config_multi(struct snd_sof_dev *sdev, u32 size, dev_dbg(sdev->dev, "set DAI config for %s index %d\n", dai->name, config[curr_conf].dai_index); + /* send message to DSP */ + ret = sof_ipc_tx_message(sdev->ipc, + config[curr_conf].hdr.cmd, + &config[curr_conf], size, + &reply, sizeof(reply)); + + if (ret < 0) { + dev_err(sdev->dev, + "error: failed to set DAI config for %s index %d\n", + dai->name, config[curr_conf].dai_index); + return ret; + } dai->number_configs = num_conf; dai->current_config = curr_conf; @@ -2695,6 +2870,9 @@ static int sof_set_dai_config_multi(struct snd_sof_dev *sdev, u32 size, if (!dai->dai_config) return -ENOMEM; + /* set cpu_dai_name */ + dai->cpu_dai_name = link->cpus->dai_name; + found = 1; } } @@ -2764,12 +2942,12 @@ static int sof_link_ssp_load(struct snd_soc_component *scomp, int index, config[i].ssp.rx_slots = le32_to_cpu(hw_config[i].rx_slots); config[i].ssp.tx_slots = le32_to_cpu(hw_config[i].tx_slots); - dev_dbg(scomp->dev, "tplg: config SSP%d fmt %#x mclk %d bclk %d fclk %d width (%d)%d slots %d mclk id %d quirks %d clks_control %#x\n", + dev_dbg(scomp->dev, "tplg: config SSP%d fmt 0x%x mclk %d bclk %d fclk %d width (%d)%d slots %d mclk id %d quirks %d\n", config[i].dai_index, config[i].format, config[i].ssp.mclk_rate, config[i].ssp.bclk_rate, config[i].ssp.fsync_rate, config[i].ssp.sample_valid_bits, config[i].ssp.tdm_slot_width, config[i].ssp.tdm_slots, - config[i].ssp.mclk_id, config[i].ssp.quirks, config[i].ssp.clks_control); + config[i].ssp.mclk_id, config[i].ssp.quirks); /* validate SSP fsync rate and channel count */ if (config[i].ssp.fsync_rate < 8000 || config[i].ssp.fsync_rate > 192000) { @@ -2909,144 +3087,6 @@ static int sof_link_esai_load(struct snd_soc_component *scomp, int index, return ret; } -static int sof_link_acp_dmic_load(struct snd_soc_component *scomp, int index, - struct snd_soc_dai_link *link, - struct snd_soc_tplg_link_config *cfg, - struct snd_soc_tplg_hw_config *hw_config, - struct sof_ipc_dai_config *config) -{ - struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); - u32 size = sizeof(*config); - int ret; - - /* handle master/slave and inverted clocks */ - sof_dai_set_format(hw_config, config); - - /* init IPC */ - memset(&config->acpdmic, 0, sizeof(struct sof_ipc_dai_acp_params)); - config->hdr.size = size; - - config->acpdmic.fsync_rate = le32_to_cpu(hw_config->fsync_rate); - config->acpdmic.tdm_slots = le32_to_cpu(hw_config->tdm_slots); - - dev_info(scomp->dev, "ACP_DMIC config ACP%d channel %d rate %d\n", - config->dai_index, config->acpdmic.tdm_slots, - config->acpdmic.fsync_rate); - - /* set config for all DAI's with name matching the link name */ - ret = sof_set_dai_config(sdev, size, link, config); - if (ret < 0) - dev_err(scomp->dev, "ACP_DMIC failed to save DAI config for ACP%d\n", - config->dai_index); - return ret; -} - -static int sof_link_acp_bt_load(struct snd_soc_component *scomp, int index, - struct snd_soc_dai_link *link, - struct snd_soc_tplg_link_config *cfg, - struct snd_soc_tplg_hw_config *hw_config, - struct sof_ipc_dai_config *config) -{ - struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); - u32 size = sizeof(*config); - int ret; - - /* handle master/slave and inverted clocks */ - sof_dai_set_format(hw_config, config); - - /* init IPC */ - memset(&config->acpbt, 0, sizeof(struct sof_ipc_dai_acp_params)); - config->hdr.size = size; - - config->acpbt.fsync_rate = le32_to_cpu(hw_config->fsync_rate); - config->acpbt.tdm_slots = le32_to_cpu(hw_config->tdm_slots); - - dev_info(scomp->dev, "ACP_BT config ACP%d channel %d rate %d\n", - config->dai_index, config->acpbt.tdm_slots, - config->acpbt.fsync_rate); - - /* set config for all DAI's with name matching the link name */ - ret = sof_set_dai_config(sdev, size, link, config); - if (ret < 0) - dev_err(scomp->dev, "ACP_BT failed to save DAI config for ACP%d\n", - config->dai_index); - return ret; -} - -static int sof_link_acp_sp_load(struct snd_soc_component *scomp, int index, - struct snd_soc_dai_link *link, - struct snd_soc_tplg_link_config *cfg, - struct snd_soc_tplg_hw_config *hw_config, - struct sof_ipc_dai_config *config) -{ - struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); - u32 size = sizeof(*config); - int ret; - - /* handle master/slave and inverted clocks */ - sof_dai_set_format(hw_config, config); - - /* init IPC */ - memset(&config->acpsp, 0, sizeof(struct sof_ipc_dai_acp_params)); - config->hdr.size = size; - - config->acpsp.fsync_rate = le32_to_cpu(hw_config->fsync_rate); - config->acpsp.tdm_slots = le32_to_cpu(hw_config->tdm_slots); - - dev_info(scomp->dev, "ACP_SP config ACP%d channel %d rate %d\n", - config->dai_index, config->acpsp.tdm_slots, - config->acpsp.fsync_rate); - - /* set config for all DAI's with name matching the link name */ - ret = sof_set_dai_config(sdev, size, link, config); - if (ret < 0) - dev_err(scomp->dev, "ACP_SP failed to save DAI config for ACP%d\n", - config->dai_index); - return ret; -} - -static int sof_link_afe_load(struct snd_soc_component *scomp, int index, - struct snd_soc_dai_link *link, - struct snd_soc_tplg_link_config *cfg, - struct snd_soc_tplg_hw_config *hw_config, - struct sof_ipc_dai_config *config) -{ - struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); - struct snd_soc_tplg_private *private = &cfg->priv; - struct snd_soc_dai *dai; - u32 size = sizeof(*config); - int ret; - - config->hdr.size = size; - - /* get any bespoke DAI tokens */ - ret = sof_parse_tokens(scomp, &config->afe, afe_tokens, - ARRAY_SIZE(afe_tokens), private->array, - le32_to_cpu(private->size)); - if (ret != 0) { - dev_err(scomp->dev, "parse afe tokens failed %d\n", - le32_to_cpu(private->size)); - return ret; - } - - dev_dbg(scomp->dev, "AFE config rate %d channels %d format:%d\n", - config->afe.rate, config->afe.channels, config->afe.format); - - dai = snd_soc_find_dai(link->cpus); - if (!dai) { - dev_err(scomp->dev, "%s: failed to find dai %s", __func__, link->cpus->dai_name); - return -EINVAL; - } - - config->afe.stream_id = DMA_CHAN_INVALID; - - ret = sof_set_dai_config(sdev, size, link, config); - if (ret < 0) - dev_err(scomp->dev, "failed to process afe dai link %s", link->name); - - return ret; -} - static int sof_link_dmic_load(struct snd_soc_component *scomp, int index, struct snd_soc_dai_link *link, struct snd_soc_tplg_link_config *cfg, @@ -3332,19 +3372,6 @@ static int sof_link_load(struct snd_soc_component *scomp, int index, case SOF_DAI_IMX_ESAI: ret = sof_link_esai_load(scomp, index, link, cfg, hw_config + curr_conf, config); break; - case SOF_DAI_AMD_BT: - ret = sof_link_acp_bt_load(scomp, index, link, cfg, hw_config + curr_conf, config); - break; - case SOF_DAI_AMD_SP: - ret = sof_link_acp_sp_load(scomp, index, link, cfg, hw_config + curr_conf, config); - break; - case SOF_DAI_AMD_DMIC: - ret = sof_link_acp_dmic_load(scomp, index, link, cfg, hw_config + curr_conf, - config); - break; - case SOF_DAI_MEDIATEK_AFE: - ret = sof_link_afe_load(scomp, index, link, cfg, hw_config + curr_conf, config); - break; default: dev_err(scomp->dev, "error: invalid DAI type %d\n", common_config.type); ret = -EINVAL; @@ -3365,6 +3392,7 @@ static int sof_route_load(struct snd_soc_component *scomp, int index, struct snd_sof_widget *source_swidget, *sink_swidget; struct snd_soc_dobj *dobj = &route->dobj; struct snd_sof_route *sroute; + struct sof_ipc_reply reply; int ret = 0; /* allocate memory for sroute and connect */ @@ -3439,11 +3467,33 @@ static int sof_route_load(struct snd_soc_component *scomp, int index, route->source, route->sink); goto err; } else { + ret = sof_ipc_tx_message(sdev->ipc, + connect->hdr.cmd, + connect, sizeof(*connect), + &reply, sizeof(reply)); + + /* check IPC return value */ + if (ret < 0) { + dev_err(scomp->dev, "error: failed to add route sink %s control %s source %s\n", + route->sink, + route->control ? route->control : "none", + route->source); + goto err; + } + + /* check IPC reply */ + if (reply.error < 0) { + dev_err(scomp->dev, "error: DSP failed to add route sink %s control %s source %s result %d\n", + route->sink, + route->control ? route->control : "none", + route->source, reply.error); + ret = reply.error; + goto err; + } + sroute->route = route; dobj->private = sroute; sroute->private = connect; - sroute->src_widget = source_swidget; - sroute->sink_widget = sink_swidget; /* add route to route list */ list_add(&sroute->list, &sdev->route_list); @@ -3457,14 +3507,59 @@ static int sof_route_load(struct snd_soc_component *scomp, int index, return ret; } -int snd_sof_complete_pipeline(struct snd_sof_dev *sdev, +/* Function to set the initial value of SOF kcontrols. + * The value will be stored in scontrol->control_data + */ +static int snd_sof_cache_kcontrol_val(struct snd_soc_component *scomp) +{ + struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); + struct snd_sof_control *scontrol = NULL; + int ipc_cmd, ctrl_type; + int ret = 0; + + list_for_each_entry(scontrol, &sdev->kcontrol_list, list) { + + /* notify DSP of kcontrol values */ + switch (scontrol->cmd) { + case SOF_CTRL_CMD_VOLUME: + case SOF_CTRL_CMD_ENUM: + case SOF_CTRL_CMD_SWITCH: + ipc_cmd = SOF_IPC_COMP_GET_VALUE; + ctrl_type = SOF_CTRL_TYPE_VALUE_CHAN_GET; + break; + case SOF_CTRL_CMD_BINARY: + ipc_cmd = SOF_IPC_COMP_GET_DATA; + ctrl_type = SOF_CTRL_TYPE_DATA_GET; + break; + default: + dev_err(scomp->dev, + "error: Invalid scontrol->cmd: %d\n", + scontrol->cmd); + return -EINVAL; + } + ret = snd_sof_ipc_set_get_comp_data(scontrol, + ipc_cmd, ctrl_type, + scontrol->cmd, + false); + if (ret < 0) { + dev_warn(scomp->dev, + "error: kcontrol value get for widget: %d\n", + scontrol->comp_id); + } + } + + return ret; +} + +int snd_sof_complete_pipeline(struct device *dev, struct snd_sof_widget *swidget) { + struct snd_sof_dev *sdev = dev_get_drvdata(dev); struct sof_ipc_pipe_ready ready; struct sof_ipc_reply reply; int ret; - dev_dbg(sdev->dev, "tplg: complete pipeline %s id %d\n", + dev_dbg(dev, "tplg: complete pipeline %s id %d\n", swidget->widget->name, swidget->comp_id); memset(&ready, 0, sizeof(ready)); @@ -3480,84 +3575,31 @@ int snd_sof_complete_pipeline(struct snd_sof_dev *sdev, return 1; } -/** - * sof_set_pipe_widget - Set pipe_widget for a component - * @sdev: pointer to struct snd_sof_dev - * @pipe_widget: pointer to struct snd_sof_widget of type snd_soc_dapm_scheduler - * @swidget: pointer to struct snd_sof_widget that has the same pipeline ID as @pipe_widget - * - * Return: 0 if successful, -EINVAL on error. - * The function checks if @swidget is associated with any volatile controls. If so, setting - * the dynamic_pipeline_widget is disallowed. - */ -static int sof_set_pipe_widget(struct snd_sof_dev *sdev, struct snd_sof_widget *pipe_widget, - struct snd_sof_widget *swidget) -{ - struct snd_sof_control *scontrol; - - if (pipe_widget->dynamic_pipeline_widget) { - /* dynamic widgets cannot have volatile kcontrols */ - list_for_each_entry(scontrol, &sdev->kcontrol_list, list) - if (scontrol->comp_id == swidget->comp_id && - (scontrol->access & SNDRV_CTL_ELEM_ACCESS_VOLATILE)) { - dev_err(sdev->dev, - "error: volatile control found for dynamic widget %s\n", - swidget->widget->name); - return -EINVAL; - } - } - - /* set the pipe_widget and apply the dynamic_pipeline_widget_flag */ - swidget->pipe_widget = pipe_widget; - swidget->dynamic_pipeline_widget = pipe_widget->dynamic_pipeline_widget; - - return 0; -} - /* completion - called at completion of firmware loading */ -static int sof_complete(struct snd_soc_component *scomp) +static void sof_complete(struct snd_soc_component *scomp) { struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); - struct snd_sof_widget *swidget, *comp_swidget; - int ret; + struct snd_sof_widget *swidget; - /* set the pipe_widget and apply the dynamic_pipeline_widget_flag */ + /* some widget types require completion notificattion */ list_for_each_entry(swidget, &sdev->widget_list, list) { + if (swidget->complete) + continue; + switch (swidget->id) { case snd_soc_dapm_scheduler: - /* - * Apply the dynamic_pipeline_widget flag and set the pipe_widget field - * for all widgets that have the same pipeline ID as the scheduler widget - */ - list_for_each_entry(comp_swidget, &sdev->widget_list, list) - if (comp_swidget->pipeline_id == swidget->pipeline_id) { - ret = sof_set_pipe_widget(sdev, swidget, comp_swidget); - if (ret < 0) - return ret; - } + swidget->complete = + snd_sof_complete_pipeline(scomp->dev, swidget); break; default: break; } } - - /* verify topology components loading including dynamic pipelines */ - if (sof_debug_check_flag(SOF_DBG_VERIFY_TPLG)) { - ret = sof_set_up_pipelines(sdev, true); - if (ret < 0) { - dev_err(sdev->dev, "error: topology verification failed %d\n", ret); - return ret; - } - - ret = sof_tear_down_pipelines(sdev, true); - if (ret < 0) { - dev_err(sdev->dev, "error: topology tear down pipelines failed %d\n", ret); - return ret; - } - } - - /* set up static pipelines */ - return sof_set_up_pipelines(sdev, false); + /* + * cache initial values of SOF kcontrols by reading DSP value over + * IPC. It may be overwritten by alsa-mixer after booting up + */ + snd_sof_cache_kcontrol_val(scomp); } /* manifest - optional to inform component of manifest */ diff --git a/sound/soc/sof/trace.c b/sound/soc/sof/trace.c index f13024c8eb..58f6ca5cf4 100644 --- a/sound/soc/sof/trace.c +++ b/sound/soc/sof/trace.c @@ -417,7 +417,7 @@ int snd_sof_init_trace_ipc(struct snd_sof_dev *sdev) "error: fail in snd_sof_dma_trace_init %d\n", ret); return ret; } - dev_dbg(sdev->dev, "%s: stream_tag: %d\n", __func__, params.stream_tag); + dev_dbg(sdev->dev, "stream_tag: %d\n", params.stream_tag); /* send IPC to the DSP */ ret = sof_ipc_tx_message(sdev->ipc, @@ -480,8 +480,7 @@ int snd_sof_init_trace(struct snd_sof_dev *sdev) goto table_err; sdev->dma_trace_pages = ret; - dev_dbg(sdev->dev, "%s: dma_trace_pages: %d\n", - __func__, sdev->dma_trace_pages); + dev_dbg(sdev->dev, "dma_trace_pages: %d\n", sdev->dma_trace_pages); if (sdev->first_boot) { ret = trace_debugfs_create(sdev); @@ -539,10 +538,6 @@ EXPORT_SYMBOL(snd_sof_trace_notify_for_error); void snd_sof_release_trace(struct snd_sof_dev *sdev) { - struct sof_ipc_fw_ready *ready = &sdev->fw_ready; - struct sof_ipc_fw_version *v = &ready->version; - struct sof_ipc_cmd_hdr hdr; - struct sof_ipc_reply ipc_reply; int ret; if (!sdev->dtrace_is_supported || !sdev->dtrace_is_enabled) @@ -553,20 +548,6 @@ void snd_sof_release_trace(struct snd_sof_dev *sdev) dev_err(sdev->dev, "error: snd_sof_dma_trace_trigger: stop: %d\n", ret); - /* - * stop and free trace DMA in the DSP. TRACE_DMA_FREE is only supported from - * ABI 3.20.0 onwards - */ - if (v->abi_version >= SOF_ABI_VER(3, 20, 0)) { - hdr.size = sizeof(hdr); - hdr.cmd = SOF_IPC_GLB_TRACE_MSG | SOF_IPC_TRACE_DMA_FREE; - - ret = sof_ipc_tx_message(sdev->ipc, hdr.cmd, &hdr, hdr.size, - &ipc_reply, sizeof(ipc_reply)); - if (ret < 0) - dev_err(sdev->dev, "DMA_TRACE_FREE failed with error: %d\n", ret); - } - ret = snd_sof_dma_trace_release(sdev); if (ret < 0) dev_err(sdev->dev, diff --git a/sound/soc/sof/utils.c b/sound/soc/sof/utils.c index 66fa6602fb..5539d3afbe 100644 --- a/sound/soc/sof/utils.c +++ b/sound/soc/sof/utils.c @@ -14,7 +14,6 @@ #include #include #include "sof-priv.h" -#include "ops.h" /* * Register IO @@ -73,21 +72,15 @@ EXPORT_SYMBOL(sof_mailbox_read); * Memory copy. */ -int sof_block_write(struct snd_sof_dev *sdev, enum snd_sof_fw_blk_type blk_type, - u32 offset, void *src, size_t size) +void sof_block_write(struct snd_sof_dev *sdev, u32 bar, u32 offset, void *src, + size_t size) { - int bar = snd_sof_dsp_get_bar_index(sdev, blk_type); + void __iomem *dest = sdev->bar[bar] + offset; const u8 *src_byte = src; - void __iomem *dest; u32 affected_mask; u32 tmp; int m, n; - if (bar < 0) - return bar; - - dest = sdev->bar[bar] + offset; - m = size / 4; n = size % 4; @@ -107,22 +100,15 @@ int sof_block_write(struct snd_sof_dev *sdev, enum snd_sof_fw_blk_type blk_type, tmp |= *(u32 *)(src_byte + m * 4) & affected_mask; iowrite32(tmp, dest + m * 4); } - - return 0; } EXPORT_SYMBOL(sof_block_write); -int sof_block_read(struct snd_sof_dev *sdev, enum snd_sof_fw_blk_type blk_type, - u32 offset, void *dest, size_t size) +void sof_block_read(struct snd_sof_dev *sdev, u32 bar, u32 offset, void *dest, + size_t size) { - int bar = snd_sof_dsp_get_bar_index(sdev, blk_type); + void __iomem *src = sdev->bar[bar] + offset; - if (bar < 0) - return bar; - - memcpy_fromio(dest, sdev->bar[bar] + offset, size); - - return 0; + memcpy_fromio(dest, src, size); } EXPORT_SYMBOL(sof_block_read); diff --git a/sound/soc/sof/xtensa/core.c b/sound/soc/sof/xtensa/core.c index bebbe3a286..f6e3411b33 100644 --- a/sound/soc/sof/xtensa/core.c +++ b/sound/soc/sof/xtensa/core.c @@ -81,39 +81,33 @@ static const struct xtensa_exception_cause xtensa_exception_causes[] = { }; /* only need xtensa atm */ -static void xtensa_dsp_oops(struct snd_sof_dev *sdev, const char *level, void *oops) +static void xtensa_dsp_oops(struct snd_sof_dev *sdev, void *oops) { struct sof_ipc_dsp_oops_xtensa *xoops = oops; int i; - dev_printk(level, sdev->dev, "error: DSP Firmware Oops\n"); + dev_err(sdev->dev, "error: DSP Firmware Oops\n"); for (i = 0; i < ARRAY_SIZE(xtensa_exception_causes); i++) { if (xtensa_exception_causes[i].id == xoops->exccause) { - dev_printk(level, sdev->dev, - "error: Exception Cause: %s, %s\n", - xtensa_exception_causes[i].msg, - xtensa_exception_causes[i].description); + dev_err(sdev->dev, "error: Exception Cause: %s, %s\n", + xtensa_exception_causes[i].msg, + xtensa_exception_causes[i].description); } } - dev_printk(level, sdev->dev, - "EXCCAUSE 0x%8.8x EXCVADDR 0x%8.8x PS 0x%8.8x SAR 0x%8.8x\n", - xoops->exccause, xoops->excvaddr, xoops->ps, xoops->sar); - dev_printk(level, sdev->dev, - "EPC1 0x%8.8x EPC2 0x%8.8x EPC3 0x%8.8x EPC4 0x%8.8x", - xoops->epc1, xoops->epc2, xoops->epc3, xoops->epc4); - dev_printk(level, sdev->dev, - "EPC5 0x%8.8x EPC6 0x%8.8x EPC7 0x%8.8x DEPC 0x%8.8x", - xoops->epc5, xoops->epc6, xoops->epc7, xoops->depc); - dev_printk(level, sdev->dev, - "EPS2 0x%8.8x EPS3 0x%8.8x EPS4 0x%8.8x EPS5 0x%8.8x", - xoops->eps2, xoops->eps3, xoops->eps4, xoops->eps5); - dev_printk(level, sdev->dev, - "EPS6 0x%8.8x EPS7 0x%8.8x INTENABL 0x%8.8x INTERRU 0x%8.8x", - xoops->eps6, xoops->eps7, xoops->intenable, xoops->interrupt); + dev_err(sdev->dev, "EXCCAUSE 0x%8.8x EXCVADDR 0x%8.8x PS 0x%8.8x SAR 0x%8.8x\n", + xoops->exccause, xoops->excvaddr, xoops->ps, xoops->sar); + dev_err(sdev->dev, "EPC1 0x%8.8x EPC2 0x%8.8x EPC3 0x%8.8x EPC4 0x%8.8x", + xoops->epc1, xoops->epc2, xoops->epc3, xoops->epc4); + dev_err(sdev->dev, "EPC5 0x%8.8x EPC6 0x%8.8x EPC7 0x%8.8x DEPC 0x%8.8x", + xoops->epc5, xoops->epc6, xoops->epc7, xoops->depc); + dev_err(sdev->dev, "EPS2 0x%8.8x EPS3 0x%8.8x EPS4 0x%8.8x EPS5 0x%8.8x", + xoops->eps2, xoops->eps3, xoops->eps4, xoops->eps5); + dev_err(sdev->dev, "EPS6 0x%8.8x EPS7 0x%8.8x INTENABL 0x%8.8x INTERRU 0x%8.8x", + xoops->eps6, xoops->eps7, xoops->intenable, xoops->interrupt); } -static void xtensa_stack(struct snd_sof_dev *sdev, const char *level, void *oops, - u32 *stack, u32 stack_words) +static void xtensa_stack(struct snd_sof_dev *sdev, void *oops, u32 *stack, + u32 stack_words) { struct sof_ipc_dsp_oops_xtensa *xoops = oops; u32 stack_ptr = xoops->plat_hdr.stackptr; @@ -121,7 +115,7 @@ static void xtensa_stack(struct snd_sof_dev *sdev, const char *level, void *oops unsigned char buf[4 * 8 + 3 + 1]; int i; - dev_printk(level, sdev->dev, "stack dump from 0x%8.8x\n", stack_ptr); + dev_err(sdev->dev, "stack dump from 0x%8.8x\n", stack_ptr); /* * example output: @@ -130,11 +124,11 @@ static void xtensa_stack(struct snd_sof_dev *sdev, const char *level, void *oops for (i = 0; i < stack_words; i += 4) { hex_dump_to_buffer(stack + i, 16, 16, 4, buf, sizeof(buf), false); - dev_printk(level, sdev->dev, "0x%08x: %s\n", stack_ptr + i * 4, buf); + dev_err(sdev->dev, "0x%08x: %s\n", stack_ptr + i * 4, buf); } } -const struct dsp_arch_ops sof_xtensa_arch_ops = { +const struct sof_arch_ops sof_xtensa_arch_ops = { .dsp_oops = xtensa_dsp_oops, .dsp_stack = xtensa_stack, }; diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c index 6ee714542b..e6078f50e5 100644 --- a/sound/soc/stm/stm32_adfsdm.c +++ b/sound/soc/stm/stm32_adfsdm.c @@ -12,7 +12,7 @@ #include #include #include -#include + #include #include #include @@ -334,8 +334,6 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, priv); - pm_runtime_enable(&pdev->dev); - ret = devm_snd_soc_register_component(&pdev->dev, &stm32_adfsdm_dai_component, &priv->dai_drv, 1); @@ -375,7 +373,6 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) static int stm32_adfsdm_remove(struct platform_device *pdev) { snd_soc_unregister_component(&pdev->dev); - pm_runtime_disable(&pdev->dev); return 0; } diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index ac5dff4d16..717f45a834 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -1045,24 +1044,36 @@ static int stm32_i2s_parse_dt(struct platform_device *pdev, /* Get clocks */ i2s->pclk = devm_clk_get(&pdev->dev, "pclk"); - if (IS_ERR(i2s->pclk)) - return dev_err_probe(&pdev->dev, PTR_ERR(i2s->pclk), - "Could not get pclk\n"); + if (IS_ERR(i2s->pclk)) { + if (PTR_ERR(i2s->pclk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Could not get pclk: %ld\n", + PTR_ERR(i2s->pclk)); + return PTR_ERR(i2s->pclk); + } i2s->i2sclk = devm_clk_get(&pdev->dev, "i2sclk"); - if (IS_ERR(i2s->i2sclk)) - return dev_err_probe(&pdev->dev, PTR_ERR(i2s->i2sclk), - "Could not get i2sclk\n"); + if (IS_ERR(i2s->i2sclk)) { + if (PTR_ERR(i2s->i2sclk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Could not get i2sclk: %ld\n", + PTR_ERR(i2s->i2sclk)); + return PTR_ERR(i2s->i2sclk); + } i2s->x8kclk = devm_clk_get(&pdev->dev, "x8k"); - if (IS_ERR(i2s->x8kclk)) - return dev_err_probe(&pdev->dev, PTR_ERR(i2s->x8kclk), - "Could not get x8k parent clock\n"); + if (IS_ERR(i2s->x8kclk)) { + if (PTR_ERR(i2s->x8kclk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Could not get x8k parent clock: %ld\n", + PTR_ERR(i2s->x8kclk)); + return PTR_ERR(i2s->x8kclk); + } i2s->x11kclk = devm_clk_get(&pdev->dev, "x11k"); - if (IS_ERR(i2s->x11kclk)) - return dev_err_probe(&pdev->dev, PTR_ERR(i2s->x11kclk), - "Could not get x11k parent clock\n"); + if (IS_ERR(i2s->x11kclk)) { + if (PTR_ERR(i2s->x11kclk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Could not get x11k parent clock: %ld\n", + PTR_ERR(i2s->x11kclk)); + return PTR_ERR(i2s->x11kclk); + } /* Register mclk provider if requested */ if (of_find_property(np, "#clock-cells", NULL)) { @@ -1085,10 +1096,12 @@ static int stm32_i2s_parse_dt(struct platform_device *pdev, /* Reset */ rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL); - if (IS_ERR(rst)) - return dev_err_probe(&pdev->dev, PTR_ERR(rst), - "Reset controller error\n"); - + if (IS_ERR(rst)) { + if (PTR_ERR(rst) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Reset controller error %ld\n", + PTR_ERR(rst)); + return PTR_ERR(rst); + } reset_control_assert(rst); udelay(2); reset_control_deassert(rst); @@ -1100,7 +1113,6 @@ static int stm32_i2s_remove(struct platform_device *pdev) { snd_dmaengine_pcm_unregister(&pdev->dev); snd_soc_unregister_component(&pdev->dev); - pm_runtime_disable(&pdev->dev); return 0; } @@ -1131,15 +1143,19 @@ static int stm32_i2s_probe(struct platform_device *pdev) i2s->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "pclk", i2s->base, i2s->regmap_conf); - if (IS_ERR(i2s->regmap)) - return dev_err_probe(&pdev->dev, PTR_ERR(i2s->regmap), - "Regmap init error\n"); - - pm_runtime_enable(&pdev->dev); + if (IS_ERR(i2s->regmap)) { + if (PTR_ERR(i2s->regmap) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Regmap init error %ld\n", + PTR_ERR(i2s->regmap)); + return PTR_ERR(i2s->regmap); + } ret = snd_dmaengine_pcm_register(&pdev->dev, &stm32_i2s_pcm_config, 0); - if (ret) - return dev_err_probe(&pdev->dev, ret, "PCM DMA register error\n"); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "PCM DMA register error %d\n", ret); + return ret; + } ret = snd_soc_register_component(&pdev->dev, &stm32_i2s_component, i2s->dai_drv, 1); diff --git a/sound/soc/stm/stm32_sai.c b/sound/soc/stm/stm32_sai.c index 8e21e6f886..058757c721 100644 --- a/sound/soc/stm/stm32_sai.c +++ b/sound/soc/stm/stm32_sai.c @@ -173,20 +173,29 @@ static int stm32_sai_probe(struct platform_device *pdev) if (!STM_SAI_IS_F4(sai)) { sai->pclk = devm_clk_get(&pdev->dev, "pclk"); - if (IS_ERR(sai->pclk)) - return dev_err_probe(&pdev->dev, PTR_ERR(sai->pclk), - "missing bus clock pclk\n"); + if (IS_ERR(sai->pclk)) { + if (PTR_ERR(sai->pclk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "missing bus clock pclk: %ld\n", + PTR_ERR(sai->pclk)); + return PTR_ERR(sai->pclk); + } } sai->clk_x8k = devm_clk_get(&pdev->dev, "x8k"); - if (IS_ERR(sai->clk_x8k)) - return dev_err_probe(&pdev->dev, PTR_ERR(sai->clk_x8k), - "missing x8k parent clock\n"); + if (IS_ERR(sai->clk_x8k)) { + if (PTR_ERR(sai->clk_x8k) != -EPROBE_DEFER) + dev_err(&pdev->dev, "missing x8k parent clock: %ld\n", + PTR_ERR(sai->clk_x8k)); + return PTR_ERR(sai->clk_x8k); + } sai->clk_x11k = devm_clk_get(&pdev->dev, "x11k"); - if (IS_ERR(sai->clk_x11k)) - return dev_err_probe(&pdev->dev, PTR_ERR(sai->clk_x11k), - "missing x11k parent clock\n"); + if (IS_ERR(sai->clk_x11k)) { + if (PTR_ERR(sai->clk_x11k) != -EPROBE_DEFER) + dev_err(&pdev->dev, "missing x11k parent clock: %ld\n", + PTR_ERR(sai->clk_x11k)); + return PTR_ERR(sai->clk_x11k); + } /* init irqs */ sai->irq = platform_get_irq(pdev, 0); @@ -195,10 +204,12 @@ static int stm32_sai_probe(struct platform_device *pdev) /* reset */ rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL); - if (IS_ERR(rst)) - return dev_err_probe(&pdev->dev, PTR_ERR(rst), - "Reset controller error\n"); - + if (IS_ERR(rst)) { + if (PTR_ERR(rst) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Reset controller error %ld\n", + PTR_ERR(rst)); + return PTR_ERR(rst); + } reset_control_assert(rst); udelay(2); reset_control_deassert(rst); diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index dd636af81c..9c3b8e2096 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -1294,7 +1294,7 @@ static struct snd_soc_dai_driver stm32_sai_playback_dai = { .id = 1, /* avoid call to fmt_single_name() */ .playback = { .channels_min = 1, - .channels_max = 16, + .channels_max = 2, .rate_min = 8000, .rate_max = 192000, .rates = SNDRV_PCM_RATE_CONTINUOUS, @@ -1312,7 +1312,7 @@ static struct snd_soc_dai_driver stm32_sai_capture_dai = { .id = 1, /* avoid call to fmt_single_name() */ .capture = { .channels_min = 1, - .channels_max = 16, + .channels_max = 2, .rate_min = 8000, .rate_max = 192000, .rates = SNDRV_PCM_RATE_CONTINUOUS, @@ -1379,9 +1379,12 @@ static int stm32_sai_sub_parse_of(struct platform_device *pdev, */ sai->regmap = devm_regmap_init_mmio(&pdev->dev, base, sai->regmap_config); - if (IS_ERR(sai->regmap)) - return dev_err_probe(&pdev->dev, PTR_ERR(sai->regmap), - "Regmap init error\n"); + if (IS_ERR(sai->regmap)) { + if (PTR_ERR(sai->regmap) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Regmap init error %ld\n", + PTR_ERR(sai->regmap)); + return PTR_ERR(sai->regmap); + } /* Get direction property */ if (of_property_match_string(np, "dma-names", "tx") >= 0) { @@ -1469,9 +1472,12 @@ static int stm32_sai_sub_parse_of(struct platform_device *pdev, of_node_put(args.np); sai->sai_ck = devm_clk_get(&pdev->dev, "sai_ck"); - if (IS_ERR(sai->sai_ck)) - return dev_err_probe(&pdev->dev, PTR_ERR(sai->sai_ck), - "Missing kernel clock sai_ck\n"); + if (IS_ERR(sai->sai_ck)) { + if (PTR_ERR(sai->sai_ck) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Missing kernel clock sai_ck: %ld\n", + PTR_ERR(sai->sai_ck)); + return PTR_ERR(sai->sai_ck); + } ret = clk_prepare(sai->pdata->pclk); if (ret < 0) @@ -1545,8 +1551,11 @@ static int stm32_sai_sub_probe(struct platform_device *pdev) conf = &stm32_sai_pcm_config_spdif; ret = snd_dmaengine_pcm_register(&pdev->dev, conf, 0); - if (ret) - return dev_err_probe(&pdev->dev, ret, "Could not register pcm dma\n"); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "Could not register pcm dma\n"); + return ret; + } ret = snd_soc_register_component(&pdev->dev, &stm32_component, &sai->cpu_dai_drv, 1); diff --git a/sound/soc/stm/stm32_spdifrx.c b/sound/soc/stm/stm32_spdifrx.c index 6f7882c4fe..48145f5535 100644 --- a/sound/soc/stm/stm32_spdifrx.c +++ b/sound/soc/stm/stm32_spdifrx.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -406,9 +405,12 @@ static int stm32_spdifrx_dma_ctrl_register(struct device *dev, int ret; spdifrx->ctrl_chan = dma_request_chan(dev, "rx-ctrl"); - if (IS_ERR(spdifrx->ctrl_chan)) - return dev_err_probe(dev, PTR_ERR(spdifrx->ctrl_chan), - "dma_request_slave_channel error\n"); + if (IS_ERR(spdifrx->ctrl_chan)) { + if (PTR_ERR(spdifrx->ctrl_chan) != -EPROBE_DEFER) + dev_err(dev, "dma_request_slave_channel error %ld\n", + PTR_ERR(spdifrx->ctrl_chan)); + return PTR_ERR(spdifrx->ctrl_chan); + } spdifrx->dmab = devm_kzalloc(dev, sizeof(struct snd_dma_buffer), GFP_KERNEL); @@ -927,9 +929,12 @@ static int stm32_spdifrx_parse_of(struct platform_device *pdev, spdifrx->phys_addr = res->start; spdifrx->kclk = devm_clk_get(&pdev->dev, "kclk"); - if (IS_ERR(spdifrx->kclk)) - return dev_err_probe(&pdev->dev, PTR_ERR(spdifrx->kclk), - "Could not get kclk\n"); + if (IS_ERR(spdifrx->kclk)) { + if (PTR_ERR(spdifrx->kclk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Could not get kclk: %ld\n", + PTR_ERR(spdifrx->kclk)); + return PTR_ERR(spdifrx->kclk); + } spdifrx->irq = platform_get_irq(pdev, 0); if (spdifrx->irq < 0) @@ -950,7 +955,6 @@ static int stm32_spdifrx_remove(struct platform_device *pdev) snd_dmaengine_pcm_unregister(&pdev->dev); snd_soc_unregister_component(&pdev->dev); - pm_runtime_disable(&pdev->dev); return 0; } @@ -981,9 +985,12 @@ static int stm32_spdifrx_probe(struct platform_device *pdev) spdifrx->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "kclk", spdifrx->base, spdifrx->regmap_conf); - if (IS_ERR(spdifrx->regmap)) - return dev_err_probe(&pdev->dev, PTR_ERR(spdifrx->regmap), - "Regmap init error\n"); + if (IS_ERR(spdifrx->regmap)) { + if (PTR_ERR(spdifrx->regmap) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Regmap init error %ld\n", + PTR_ERR(spdifrx->regmap)); + return PTR_ERR(spdifrx->regmap); + } ret = devm_request_irq(&pdev->dev, spdifrx->irq, stm32_spdifrx_isr, 0, dev_name(&pdev->dev), spdifrx); @@ -993,20 +1000,23 @@ static int stm32_spdifrx_probe(struct platform_device *pdev) } rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL); - if (IS_ERR(rst)) - return dev_err_probe(&pdev->dev, PTR_ERR(rst), - "Reset controller error\n"); - + if (IS_ERR(rst)) { + if (PTR_ERR(rst) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Reset controller error %ld\n", + PTR_ERR(rst)); + return PTR_ERR(rst); + } reset_control_assert(rst); udelay(2); reset_control_deassert(rst); - pm_runtime_enable(&pdev->dev); - pcm_config = &stm32_spdifrx_pcm_config; ret = snd_dmaengine_pcm_register(&pdev->dev, pcm_config, 0); - if (ret) - return dev_err_probe(&pdev->dev, ret, "PCM DMA register error\n"); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "PCM DMA register error %d\n", ret); + return ret; + } ret = snd_soc_register_component(&pdev->dev, &stm32_spdifrx_component, diff --git a/sound/soc/sunxi/sun4i-codec.c b/sound/soc/sunxi/sun4i-codec.c index 60712f24ad..da597e456b 100644 --- a/sound/soc/sunxi/sun4i-codec.c +++ b/sound/soc/sunxi/sun4i-codec.c @@ -1752,7 +1752,8 @@ static int sun4i_codec_probe(struct platform_device *pdev) GPIOD_OUT_LOW); if (IS_ERR(scodec->gpio_pa)) { ret = PTR_ERR(scodec->gpio_pa); - dev_err_probe(&pdev->dev, ret, "Failed to get pa gpio\n"); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "Failed to get pa gpio: %d\n", ret); return ret; } diff --git a/sound/soc/sunxi/sun4i-spdif.c b/sound/soc/sunxi/sun4i-spdif.c index 17090f4315..a10949bf0c 100644 --- a/sound/soc/sunxi/sun4i-spdif.c +++ b/sound/soc/sunxi/sun4i-spdif.c @@ -21,8 +21,6 @@ #include #include #include -#include -#include #include #include #include @@ -188,7 +186,6 @@ struct sun4i_spdif_dev { struct regmap *regmap; struct snd_dmaengine_dai_dma_data dma_params_tx; const struct sun4i_spdif_quirks *quirks; - spinlock_t lock; }; static void sun4i_spdif_configure(struct sun4i_spdif_dev *host) @@ -388,122 +385,11 @@ static int sun4i_spdif_trigger(struct snd_pcm_substream *substream, int cmd, return ret; } -static int sun4i_spdif_info(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_info *uinfo) -{ - uinfo->type = SNDRV_CTL_ELEM_TYPE_IEC958; - uinfo->count = 1; - - return 0; -} - -static int sun4i_spdif_get_status_mask(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - u8 *status = ucontrol->value.iec958.status; - - status[0] = 0xff; - status[1] = 0xff; - status[2] = 0xff; - status[3] = 0xff; - status[4] = 0xff; - status[5] = 0x03; - - return 0; -} - -static int sun4i_spdif_get_status(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_dai *cpu_dai = snd_kcontrol_chip(kcontrol); - struct sun4i_spdif_dev *host = snd_soc_dai_get_drvdata(cpu_dai); - u8 *status = ucontrol->value.iec958.status; - unsigned long flags; - unsigned int reg; - - spin_lock_irqsave(&host->lock, flags); - - regmap_read(host->regmap, SUN4I_SPDIF_TXCHSTA0, ®); - - status[0] = reg & 0xff; - status[1] = (reg >> 8) & 0xff; - status[2] = (reg >> 16) & 0xff; - status[3] = (reg >> 24) & 0xff; - - regmap_read(host->regmap, SUN4I_SPDIF_TXCHSTA1, ®); - - status[4] = reg & 0xff; - status[5] = (reg >> 8) & 0x3; - - spin_unlock_irqrestore(&host->lock, flags); - - return 0; -} - -static int sun4i_spdif_set_status(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_dai *cpu_dai = snd_kcontrol_chip(kcontrol); - struct sun4i_spdif_dev *host = snd_soc_dai_get_drvdata(cpu_dai); - u8 *status = ucontrol->value.iec958.status; - unsigned long flags; - unsigned int reg; - bool chg0, chg1; - - spin_lock_irqsave(&host->lock, flags); - - reg = (u32)status[3] << 24; - reg |= (u32)status[2] << 16; - reg |= (u32)status[1] << 8; - reg |= (u32)status[0]; - - regmap_update_bits_check(host->regmap, SUN4I_SPDIF_TXCHSTA0, - GENMASK(31,0), reg, &chg0); - - reg = (u32)status[5] << 8; - reg |= (u32)status[4]; - - regmap_update_bits_check(host->regmap, SUN4I_SPDIF_TXCHSTA1, - GENMASK(9,0), reg, &chg1); - - reg = SUN4I_SPDIF_TXCFG_CHSTMODE; - if (status[0] & IEC958_AES0_NONAUDIO) - reg |= SUN4I_SPDIF_TXCFG_NONAUDIO; - - regmap_update_bits(host->regmap, SUN4I_SPDIF_TXCFG, - SUN4I_SPDIF_TXCFG_CHSTMODE | - SUN4I_SPDIF_TXCFG_NONAUDIO, reg); - - spin_unlock_irqrestore(&host->lock, flags); - - return chg0 || chg1; -} - -static struct snd_kcontrol_new sun4i_spdif_controls[] = { - { - .access = SNDRV_CTL_ELEM_ACCESS_READ, - .iface = SNDRV_CTL_ELEM_IFACE_PCM, - .name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, MASK), - .info = sun4i_spdif_info, - .get = sun4i_spdif_get_status_mask - }, - { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, - .name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, DEFAULT), - .info = sun4i_spdif_info, - .get = sun4i_spdif_get_status, - .put = sun4i_spdif_set_status - } -}; - static int sun4i_spdif_soc_dai_probe(struct snd_soc_dai *dai) { struct sun4i_spdif_dev *host = snd_soc_dai_get_drvdata(dai); snd_soc_dai_init_dma_data(dai, &host->dma_params_tx, NULL); - snd_soc_add_dai_controls(dai, sun4i_spdif_controls, - ARRAY_SIZE(sun4i_spdif_controls)); - return 0; } @@ -626,7 +512,6 @@ static int sun4i_spdif_probe(struct platform_device *pdev) return -ENOMEM; host->pdev = pdev; - spin_lock_init(&host->lock); /* Initialize this copy of the CPU DAI driver structure */ memcpy(&host->cpu_dai_drv, &sun4i_spdif_dai, sizeof(sun4i_spdif_dai)); diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index 0bea2162f6..518bfb724a 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -21,7 +21,6 @@ #include #include #include -#include #define SUN8I_SYSCLK_CTL 0x00c #define SUN8I_SYSCLK_CTL_AIF1CLK_ENA 11 @@ -73,12 +72,6 @@ #define SUN8I_AIF1_MXR_SRC_AD0R_MXR_SRC_AIF2DACR 10 #define SUN8I_AIF1_MXR_SRC_AD0R_MXR_SRC_ADCR 9 #define SUN8I_AIF1_MXR_SRC_AD0R_MXR_SRC_AIF2DACL 8 -#define SUN8I_AIF1_VOL_CTRL1 0x050 -#define SUN8I_AIF1_VOL_CTRL1_AD0L_VOL 8 -#define SUN8I_AIF1_VOL_CTRL1_AD0R_VOL 0 -#define SUN8I_AIF1_VOL_CTRL3 0x058 -#define SUN8I_AIF1_VOL_CTRL3_DA0L_VOL 8 -#define SUN8I_AIF1_VOL_CTRL3_DA0R_VOL 0 #define SUN8I_AIF2_ADCDAT_CTRL 0x084 #define SUN8I_AIF2_ADCDAT_CTRL_AIF2_ADCL_ENA 15 #define SUN8I_AIF2_ADCDAT_CTRL_AIF2_ADCR_ENA 14 @@ -98,12 +91,6 @@ #define SUN8I_AIF2_MXR_SRC_ADCR_MXR_SRC_AIF1DA1R 10 #define SUN8I_AIF2_MXR_SRC_ADCR_MXR_SRC_AIF2DACL 9 #define SUN8I_AIF2_MXR_SRC_ADCR_MXR_SRC_ADCR 8 -#define SUN8I_AIF2_VOL_CTRL1 0x090 -#define SUN8I_AIF2_VOL_CTRL1_ADCL_VOL 8 -#define SUN8I_AIF2_VOL_CTRL1_ADCR_VOL 0 -#define SUN8I_AIF2_VOL_CTRL2 0x098 -#define SUN8I_AIF2_VOL_CTRL2_DACL_VOL 8 -#define SUN8I_AIF2_VOL_CTRL2_DACR_VOL 0 #define SUN8I_AIF3_CLK_CTRL_AIF3_CLK_SRC_AIF1 (0x0 << 0) #define SUN8I_AIF3_CLK_CTRL_AIF3_CLK_SRC_AIF2 (0x1 << 0) #define SUN8I_AIF3_CLK_CTRL_AIF3_CLK_SRC_AIF1CLK (0x2 << 0) @@ -115,14 +102,8 @@ #define SUN8I_ADC_DIG_CTRL_ENAD 15 #define SUN8I_ADC_DIG_CTRL_ADOUT_DTS 2 #define SUN8I_ADC_DIG_CTRL_ADOUT_DLY 1 -#define SUN8I_ADC_VOL_CTRL 0x104 -#define SUN8I_ADC_VOL_CTRL_ADCL_VOL 8 -#define SUN8I_ADC_VOL_CTRL_ADCR_VOL 0 #define SUN8I_DAC_DIG_CTRL 0x120 #define SUN8I_DAC_DIG_CTRL_ENDA 15 -#define SUN8I_DAC_VOL_CTRL 0x124 -#define SUN8I_DAC_VOL_CTRL_DACL_VOL 8 -#define SUN8I_DAC_VOL_CTRL_DACR_VOL 0 #define SUN8I_DAC_MXR_SRC 0x130 #define SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA0L 15 #define SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA1L 14 @@ -715,41 +696,6 @@ static struct snd_soc_dai_driver sun8i_codec_dais[] = { }, }; -static const DECLARE_TLV_DB_SCALE(sun8i_codec_vol_scale, -12000, 75, 1); - -static const struct snd_kcontrol_new sun8i_codec_controls[] = { - SOC_DOUBLE_TLV("AIF1 AD0 Capture Volume", - SUN8I_AIF1_VOL_CTRL1, - SUN8I_AIF1_VOL_CTRL1_AD0L_VOL, - SUN8I_AIF1_VOL_CTRL1_AD0R_VOL, - 0xc0, 0, sun8i_codec_vol_scale), - SOC_DOUBLE_TLV("AIF1 DA0 Playback Volume", - SUN8I_AIF1_VOL_CTRL3, - SUN8I_AIF1_VOL_CTRL3_DA0L_VOL, - SUN8I_AIF1_VOL_CTRL3_DA0R_VOL, - 0xc0, 0, sun8i_codec_vol_scale), - SOC_DOUBLE_TLV("AIF2 ADC Capture Volume", - SUN8I_AIF2_VOL_CTRL1, - SUN8I_AIF2_VOL_CTRL1_ADCL_VOL, - SUN8I_AIF2_VOL_CTRL1_ADCR_VOL, - 0xc0, 0, sun8i_codec_vol_scale), - SOC_DOUBLE_TLV("AIF2 DAC Playback Volume", - SUN8I_AIF2_VOL_CTRL2, - SUN8I_AIF2_VOL_CTRL2_DACL_VOL, - SUN8I_AIF2_VOL_CTRL2_DACR_VOL, - 0xc0, 0, sun8i_codec_vol_scale), - SOC_DOUBLE_TLV("ADC Capture Volume", - SUN8I_ADC_VOL_CTRL, - SUN8I_ADC_VOL_CTRL_ADCL_VOL, - SUN8I_ADC_VOL_CTRL_ADCR_VOL, - 0xc0, 0, sun8i_codec_vol_scale), - SOC_DOUBLE_TLV("DAC Playback Volume", - SUN8I_DAC_VOL_CTRL, - SUN8I_DAC_VOL_CTRL_DACL_VOL, - SUN8I_DAC_VOL_CTRL_DACR_VOL, - 0xc0, 0, sun8i_codec_vol_scale), -}; - static int sun8i_codec_aif_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { @@ -1269,8 +1215,6 @@ static int sun8i_codec_component_probe(struct snd_soc_component *component) } static const struct snd_soc_component_driver sun8i_soc_component = { - .controls = sun8i_codec_controls, - .num_controls = ARRAY_SIZE(sun8i_codec_controls), .dapm_widgets = sun8i_codec_dapm_widgets, .num_dapm_widgets = ARRAY_SIZE(sun8i_codec_dapm_widgets), .dapm_routes = sun8i_codec_dapm_routes, diff --git a/sound/soc/tegra/Kconfig b/sound/soc/tegra/Kconfig index cd454871d6..83c87f35a7 100644 --- a/sound/soc/tegra/Kconfig +++ b/sound/soc/tegra/Kconfig @@ -108,54 +108,6 @@ config SND_SOC_TEGRA210_ADMAIF channel. Buffer size is configurable for each ADMAIIF channel. Say Y or M if you want to add support for Tegra210 ADMAIF module. -config SND_SOC_TEGRA210_MVC - tristate "Tegra210 MVC module" - help - Config to enable the digital Master Volume Controller (MVC) which - provides gain or attenuation to a digital signal path. It can be - used in input or output signal path. It can be used either for - per-stream volume control or for master volume control. - Say Y or M if you want to add support for Tegra210 MVC module. - -config SND_SOC_TEGRA210_SFC - tristate "Tegra210 SFC module" - help - Config to enable the Sampling Frequency Converter (SFC) which - converts the sampling frequency of input signal to another - frequency. It supports sampling frequency conversion of streams - upto 2 channels (stereo). - Say Y or M if you want to add support for Tegra210 SFC module. - -config SND_SOC_TEGRA210_AMX - tristate "Tegra210 AMX module" - help - Config to enable the Audio Multiplexer (AMX) which can multiplex - four input streams (each of up to 16 channels) and generate - output stream (of up to 16 channels). A byte RAM helps to form an - output frame by any combination of bytes from the input frames. - Say Y or M if you want to add support for Tegra210 AMX module. - -config SND_SOC_TEGRA210_ADX - tristate "Tegra210 ADX module" - help - Config to enable the Audio Demultiplexer (ADX) which takes an - input stream (up to 16 channels) and demultiplexes it into four - output streams (each of up to 16 channels). A byte RAM helps to - form output frames by any combination of bytes from the input - frame. Its design is identical to that of byte RAM in the AMX - except that the data flow direction is reversed. - Say Y or M if you want to add support for Tegra210 ADX module. - -config SND_SOC_TEGRA210_MIXER - tristate "Tegra210 Mixer module" - help - Config to enable the Mixer module which can help to mix multiple - audio streams. It supports mixing of upto 10 input streams, - where each stream can contain maximum of 8 channels. It supports - 5 output each of which can be a mix of any combination of 10 - input streams. - Say Y or M if you want to add support for Tegra210 Mixer module. - config SND_SOC_TEGRA_AUDIO_GRAPH_CARD tristate "Audio Graph Card based Tegra driver" depends on SND_AUDIO_GRAPH_CARD diff --git a/sound/soc/tegra/Makefile b/sound/soc/tegra/Makefile index f19d56690a..e2cec9ae31 100644 --- a/sound/soc/tegra/Makefile +++ b/sound/soc/tegra/Makefile @@ -13,11 +13,6 @@ snd-soc-tegra210-dmic-objs := tegra210_dmic.o snd-soc-tegra210-i2s-objs := tegra210_i2s.o snd-soc-tegra186-dspk-objs := tegra186_dspk.o snd-soc-tegra210-admaif-objs := tegra210_admaif.o -snd-soc-tegra210-mvc-objs := tegra210_mvc.o -snd-soc-tegra210-sfc-objs := tegra210_sfc.o -snd-soc-tegra210-amx-objs := tegra210_amx.o -snd-soc-tegra210-adx-objs := tegra210_adx.o -snd-soc-tegra210-mixer-objs := tegra210_mixer.o obj-$(CONFIG_SND_SOC_TEGRA) += snd-soc-tegra-pcm.o obj-$(CONFIG_SND_SOC_TEGRA20_AC97) += snd-soc-tegra20-ac97.o @@ -31,11 +26,6 @@ obj-$(CONFIG_SND_SOC_TEGRA210_AHUB) += snd-soc-tegra210-ahub.o obj-$(CONFIG_SND_SOC_TEGRA210_I2S) += snd-soc-tegra210-i2s.o obj-$(CONFIG_SND_SOC_TEGRA186_DSPK) += snd-soc-tegra186-dspk.o obj-$(CONFIG_SND_SOC_TEGRA210_ADMAIF) += snd-soc-tegra210-admaif.o -obj-$(CONFIG_SND_SOC_TEGRA210_MVC) += snd-soc-tegra210-mvc.o -obj-$(CONFIG_SND_SOC_TEGRA210_SFC) += snd-soc-tegra210-sfc.o -obj-$(CONFIG_SND_SOC_TEGRA210_AMX) += snd-soc-tegra210-amx.o -obj-$(CONFIG_SND_SOC_TEGRA210_ADX) += snd-soc-tegra210-adx.o -obj-$(CONFIG_SND_SOC_TEGRA210_MIXER) += snd-soc-tegra210-mixer.o # Tegra machine Support snd-soc-tegra-wm8903-objs := tegra_wm8903.o diff --git a/sound/soc/tegra/tegra20_i2s.c b/sound/soc/tegra/tegra20_i2s.c index 27365a877e..266d2cab9f 100644 --- a/sound/soc/tegra/tegra20_i2s.c +++ b/sound/soc/tegra/tegra20_i2s.c @@ -262,59 +262,10 @@ static int tegra20_i2s_probe(struct snd_soc_dai *dai) return 0; } -static const unsigned int tegra20_i2s_rates[] = { - 8000, 11025, 16000, 22050, 32000, 44100, 48000, 64000, 88200, 96000 -}; - -static int tegra20_i2s_filter_rates(struct snd_pcm_hw_params *params, - struct snd_pcm_hw_rule *rule) -{ - struct snd_interval *r = hw_param_interval(params, rule->var); - struct snd_soc_dai *dai = rule->private; - struct tegra20_i2s *i2s = dev_get_drvdata(dai->dev); - struct clk *parent = clk_get_parent(i2s->clk_i2s); - long i, parent_rate, valid_rates = 0; - - parent_rate = clk_get_rate(parent); - if (parent_rate <= 0) { - dev_err(dai->dev, "Can't get parent clock rate: %ld\n", - parent_rate); - return parent_rate ?: -EINVAL; - } - - for (i = 0; i < ARRAY_SIZE(tegra20_i2s_rates); i++) { - if (parent_rate % (tegra20_i2s_rates[i] * 128) == 0) - valid_rates |= BIT(i); - } - - /* - * At least one rate must be valid, otherwise the parent clock isn't - * audio PLL. Nothing should be filtered in this case. - */ - if (!valid_rates) - valid_rates = BIT(ARRAY_SIZE(tegra20_i2s_rates)) - 1; - - return snd_interval_list(r, ARRAY_SIZE(tegra20_i2s_rates), - tegra20_i2s_rates, valid_rates); -} - -static int tegra20_i2s_startup(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - if (!device_property_read_bool(dai->dev, "nvidia,fixed-parent-rate")) - return 0; - - return snd_pcm_hw_rule_add(substream->runtime, 0, - SNDRV_PCM_HW_PARAM_RATE, - tegra20_i2s_filter_rates, dai, - SNDRV_PCM_HW_PARAM_RATE, -1); -} - static const struct snd_soc_dai_ops tegra20_i2s_dai_ops = { .set_fmt = tegra20_i2s_set_fmt, .hw_params = tegra20_i2s_hw_params, .trigger = tegra20_i2s_trigger, - .startup = tegra20_i2s_startup, }; static const struct snd_soc_dai_driver tegra20_i2s_dai_template = { diff --git a/sound/soc/tegra/tegra20_spdif.c b/sound/soc/tegra/tegra20_spdif.c index d09cd7ee68..7751575cd6 100644 --- a/sound/soc/tegra/tegra20_spdif.c +++ b/sound/soc/tegra/tegra20_spdif.c @@ -7,15 +7,12 @@ */ #include -#include #include #include #include -#include #include #include #include -#include #include #include #include @@ -25,12 +22,12 @@ #include "tegra20_spdif.h" +#define DRV_NAME "tegra20-spdif" + static __maybe_unused int tegra20_spdif_runtime_suspend(struct device *dev) { struct tegra20_spdif *spdif = dev_get_drvdata(dev); - regcache_cache_only(spdif->regmap, true); - clk_disable_unprepare(spdif->clk_spdif_out); return 0; @@ -41,45 +38,23 @@ static __maybe_unused int tegra20_spdif_runtime_resume(struct device *dev) struct tegra20_spdif *spdif = dev_get_drvdata(dev); int ret; - ret = reset_control_assert(spdif->reset); - if (ret) - return ret; - ret = clk_prepare_enable(spdif->clk_spdif_out); if (ret) { dev_err(dev, "clk_enable failed: %d\n", ret); return ret; } - usleep_range(10, 100); - - ret = reset_control_deassert(spdif->reset); - if (ret) - goto disable_clocks; - - regcache_cache_only(spdif->regmap, false); - regcache_mark_dirty(spdif->regmap); - - ret = regcache_sync(spdif->regmap); - if (ret) - goto disable_clocks; - return 0; - -disable_clocks: - clk_disable_unprepare(spdif->clk_spdif_out); - - return ret; } static int tegra20_spdif_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, - struct snd_soc_dai *dai) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { - struct tegra20_spdif *spdif = dev_get_drvdata(dai->dev); + struct device *dev = dai->dev; + struct tegra20_spdif *spdif = snd_soc_dai_get_drvdata(dai); unsigned int mask = 0, val = 0; int ret, spdifclock; - long rate; mask |= TEGRA20_SPDIF_CTRL_PACK | TEGRA20_SPDIF_CTRL_BIT_MODE_MASK; @@ -94,14 +69,6 @@ static int tegra20_spdif_hw_params(struct snd_pcm_substream *substream, regmap_update_bits(spdif->regmap, TEGRA20_SPDIF_CTRL, mask, val); - /* - * FIFO trigger level must be bigger than DMA burst or equal to it, - * otherwise data is discarded on overflow. - */ - regmap_update_bits(spdif->regmap, TEGRA20_SPDIF_DATA_FIFO_CSR, - TEGRA20_SPDIF_DATA_FIFO_CSR_TX_ATN_LVL_MASK, - TEGRA20_SPDIF_DATA_FIFO_CSR_TX_ATN_LVL_TU4_WORD_FULL); - switch (params_rate(params)) { case 32000: spdifclock = 4096000; @@ -130,16 +97,10 @@ static int tegra20_spdif_hw_params(struct snd_pcm_substream *substream, ret = clk_set_rate(spdif->clk_spdif_out, spdifclock); if (ret) { - dev_err(dai->dev, "Can't set SPDIF clock rate: %d\n", ret); + dev_err(dev, "Can't set SPDIF clock rate: %d\n", ret); return ret; } - rate = clk_get_rate(spdif->clk_spdif_out); - if (rate != spdifclock) - dev_warn_once(dai->dev, - "SPDIF clock rate %d doesn't match requested rate %lu\n", - spdifclock, rate); - return 0; } @@ -157,9 +118,9 @@ static void tegra20_spdif_stop_playback(struct tegra20_spdif *spdif) } static int tegra20_spdif_trigger(struct snd_pcm_substream *substream, int cmd, - struct snd_soc_dai *dai) + struct snd_soc_dai *dai) { - struct tegra20_spdif *spdif = dev_get_drvdata(dai->dev); + struct tegra20_spdif *spdif = snd_soc_dai_get_drvdata(dai); switch (cmd) { case SNDRV_PCM_TRIGGER_START: @@ -179,62 +140,9 @@ static int tegra20_spdif_trigger(struct snd_pcm_substream *substream, int cmd, return 0; } -static int tegra20_spdif_filter_rates(struct snd_pcm_hw_params *params, - struct snd_pcm_hw_rule *rule) -{ - struct snd_interval *r = hw_param_interval(params, rule->var); - struct snd_soc_dai *dai = rule->private; - struct tegra20_spdif *spdif = dev_get_drvdata(dai->dev); - struct clk *parent = clk_get_parent(spdif->clk_spdif_out); - const unsigned int rates[] = { 32000, 44100, 48000 }; - long i, parent_rate, valid_rates = 0; - - parent_rate = clk_get_rate(parent); - if (parent_rate <= 0) { - dev_err(dai->dev, "Can't get parent clock rate: %ld\n", - parent_rate); - return parent_rate ?: -EINVAL; - } - - for (i = 0; i < ARRAY_SIZE(rates); i++) { - if (parent_rate % (rates[i] * 128) == 0) - valid_rates |= BIT(i); - } - - /* - * At least one rate must be valid, otherwise the parent clock isn't - * audio PLL. Nothing should be filtered in this case. - */ - if (!valid_rates) - valid_rates = BIT(ARRAY_SIZE(rates)) - 1; - - return snd_interval_list(r, ARRAY_SIZE(rates), rates, valid_rates); -} - -static int tegra20_spdif_startup(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - if (!device_property_read_bool(dai->dev, "nvidia,fixed-parent-rate")) - return 0; - - /* - * SPDIF and I2S share audio PLL. HDMI takes audio packets from SPDIF - * and audio may not work on some TVs if clock rate isn't precise. - * - * PLL rate is controlled by I2S side. Filter out audio rates that - * don't match PLL rate at the start of stream to allow both SPDIF - * and I2S work simultaneously, assuming that PLL rate won't be - * changed later on. - */ - return snd_pcm_hw_rule_add(substream->runtime, 0, - SNDRV_PCM_HW_PARAM_RATE, - tegra20_spdif_filter_rates, dai, - SNDRV_PCM_HW_PARAM_RATE, -1); -} - static int tegra20_spdif_probe(struct snd_soc_dai *dai) { - struct tegra20_spdif *spdif = dev_get_drvdata(dai->dev); + struct tegra20_spdif *spdif = snd_soc_dai_get_drvdata(dai); dai->capture_dma_data = NULL; dai->playback_dma_data = &spdif->playback_dma_data; @@ -243,27 +151,26 @@ static int tegra20_spdif_probe(struct snd_soc_dai *dai) } static const struct snd_soc_dai_ops tegra20_spdif_dai_ops = { - .hw_params = tegra20_spdif_hw_params, - .trigger = tegra20_spdif_trigger, - .startup = tegra20_spdif_startup, + .hw_params = tegra20_spdif_hw_params, + .trigger = tegra20_spdif_trigger, }; static struct snd_soc_dai_driver tegra20_spdif_dai = { - .name = "tegra20-spdif", + .name = DRV_NAME, .probe = tegra20_spdif_probe, .playback = { .stream_name = "Playback", .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | - SNDRV_PCM_RATE_48000, + SNDRV_PCM_RATE_48000, .formats = SNDRV_PCM_FMTBIT_S16_LE, }, .ops = &tegra20_spdif_dai_ops, }; static const struct snd_soc_component_driver tegra20_spdif_component = { - .name = "tegra20-spdif", + .name = DRV_NAME, }; static bool tegra20_spdif_wr_rd_reg(struct device *dev, unsigned int reg) @@ -344,7 +251,7 @@ static const struct regmap_config tegra20_spdif_regmap_config = { static int tegra20_spdif_platform_probe(struct platform_device *pdev) { struct tegra20_spdif *spdif; - struct resource *mem; + struct resource *mem, *dmareq; void __iomem *regs; int ret; @@ -355,77 +262,89 @@ static int tegra20_spdif_platform_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, spdif); - spdif->reset = devm_reset_control_get_exclusive(&pdev->dev, NULL); - if (IS_ERR(spdif->reset)) { - dev_err(&pdev->dev, "Can't retrieve spdif reset\n"); - return PTR_ERR(spdif->reset); - } - - spdif->clk_spdif_out = devm_clk_get(&pdev->dev, "out"); + spdif->clk_spdif_out = devm_clk_get(&pdev->dev, "spdif_out"); if (IS_ERR(spdif->clk_spdif_out)) { - dev_err(&pdev->dev, "Could not retrieve spdif clock\n"); - return PTR_ERR(spdif->clk_spdif_out); + pr_err("Can't retrieve spdif clock\n"); + ret = PTR_ERR(spdif->clk_spdif_out); + return ret; } regs = devm_platform_get_and_ioremap_resource(pdev, 0, &mem); if (IS_ERR(regs)) return PTR_ERR(regs); + dmareq = platform_get_resource(pdev, IORESOURCE_DMA, 0); + if (!dmareq) { + dev_err(&pdev->dev, "No DMA resource\n"); + return -ENODEV; + } + spdif->regmap = devm_regmap_init_mmio(&pdev->dev, regs, - &tegra20_spdif_regmap_config); + &tegra20_spdif_regmap_config); if (IS_ERR(spdif->regmap)) { dev_err(&pdev->dev, "regmap init failed\n"); - return PTR_ERR(spdif->regmap); + ret = PTR_ERR(spdif->regmap); + return ret; } spdif->playback_dma_data.addr = mem->start + TEGRA20_SPDIF_DATA_OUT; spdif->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; spdif->playback_dma_data.maxburst = 4; + spdif->playback_dma_data.slave_id = dmareq->start; - ret = devm_pm_runtime_enable(&pdev->dev); - if (ret) - return ret; + pm_runtime_enable(&pdev->dev); - ret = devm_snd_soc_register_component(&pdev->dev, - &tegra20_spdif_component, - &tegra20_spdif_dai, 1); + ret = snd_soc_register_component(&pdev->dev, &tegra20_spdif_component, + &tegra20_spdif_dai, 1); if (ret) { dev_err(&pdev->dev, "Could not register DAI: %d\n", ret); - return ret; + ret = -ENOMEM; + goto err_pm_disable; } - ret = devm_tegra_pcm_platform_register(&pdev->dev); + ret = tegra_pcm_platform_register(&pdev->dev); if (ret) { dev_err(&pdev->dev, "Could not register PCM: %d\n", ret); - return ret; + goto err_unregister_component; } + return 0; + +err_unregister_component: + snd_soc_unregister_component(&pdev->dev); +err_pm_disable: + pm_runtime_disable(&pdev->dev); + + return ret; +} + +static int tegra20_spdif_platform_remove(struct platform_device *pdev) +{ + tegra_pcm_platform_unregister(&pdev->dev); + snd_soc_unregister_component(&pdev->dev); + + pm_runtime_disable(&pdev->dev); + return 0; } static const struct dev_pm_ops tegra20_spdif_pm_ops = { SET_RUNTIME_PM_OPS(tegra20_spdif_runtime_suspend, tegra20_spdif_runtime_resume, NULL) - SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) }; -static const struct of_device_id tegra20_spdif_of_match[] = { - { .compatible = "nvidia,tegra20-spdif", }, - {}, -}; -MODULE_DEVICE_TABLE(of, tegra20_spdif_of_match); - static struct platform_driver tegra20_spdif_driver = { .driver = { - .name = "tegra20-spdif", + .name = DRV_NAME, .pm = &tegra20_spdif_pm_ops, - .of_match_table = tegra20_spdif_of_match, }, .probe = tegra20_spdif_platform_probe, + .remove = tegra20_spdif_platform_remove, }; + module_platform_driver(tegra20_spdif_driver); MODULE_AUTHOR("Stephen Warren "); MODULE_DESCRIPTION("Tegra20 SPDIF ASoC driver"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/sound/soc/tegra/tegra20_spdif.h b/sound/soc/tegra/tegra20_spdif.h index ff4b79e205..1973ffc2d5 100644 --- a/sound/soc/tegra/tegra20_spdif.h +++ b/sound/soc/tegra/tegra20_spdif.h @@ -451,7 +451,6 @@ struct tegra20_spdif { struct snd_dmaengine_dai_dma_data capture_dma_data; struct snd_dmaengine_dai_dma_data playback_dma_data; struct regmap *regmap; - struct reset_control *reset; }; #endif diff --git a/sound/soc/tegra/tegra210_ahub.c b/sound/soc/tegra/tegra210_ahub.c index 388b815443..1b2f7cb8c6 100644 --- a/sound/soc/tegra/tegra210_ahub.c +++ b/sound/soc/tegra/tegra210_ahub.c @@ -108,68 +108,14 @@ static struct snd_soc_dai_driver tegra210_ahub_dais[] = { DAI(ADMAIF8), DAI(ADMAIF9), DAI(ADMAIF10), - /* XBAR <-> I2S <-> Codec */ DAI(I2S1), DAI(I2S2), DAI(I2S3), DAI(I2S4), DAI(I2S5), - /* XBAR <- DMIC <- Codec */ DAI(DMIC1), DAI(DMIC2), DAI(DMIC3), - /* XBAR -> SFC -> XBAR */ - DAI(SFC1 RX), - DAI(SFC1 TX), - DAI(SFC2 RX), - DAI(SFC2 TX), - DAI(SFC3 RX), - DAI(SFC3 TX), - DAI(SFC4 RX), - DAI(SFC4 TX), - /* XBAR -> MVC -> XBAR */ - DAI(MVC1 RX), - DAI(MVC1 TX), - DAI(MVC2 RX), - DAI(MVC2 TX), - /* XBAR -> AMX(4:1) -> XBAR */ - DAI(AMX1 RX1), - DAI(AMX1 RX2), - DAI(AMX1 RX3), - DAI(AMX1 RX4), - DAI(AMX1), - DAI(AMX2 RX1), - DAI(AMX2 RX2), - DAI(AMX2 RX3), - DAI(AMX2 RX4), - DAI(AMX2), - /* XBAR -> ADX(1:4) -> XBAR */ - DAI(ADX1), - DAI(ADX1 TX1), - DAI(ADX1 TX2), - DAI(ADX1 TX3), - DAI(ADX1 TX4), - DAI(ADX2), - DAI(ADX2 TX1), - DAI(ADX2 TX2), - DAI(ADX2 TX3), - DAI(ADX2 TX4), - /* XBAR -> MIXER(10:5) -> XBAR */ - DAI(MIXER1 RX1), - DAI(MIXER1 RX2), - DAI(MIXER1 RX3), - DAI(MIXER1 RX4), - DAI(MIXER1 RX5), - DAI(MIXER1 RX6), - DAI(MIXER1 RX7), - DAI(MIXER1 RX8), - DAI(MIXER1 RX9), - DAI(MIXER1 RX10), - DAI(MIXER1 TX1), - DAI(MIXER1 TX2), - DAI(MIXER1 TX3), - DAI(MIXER1 TX4), - DAI(MIXER1 TX5), }; static struct snd_soc_dai_driver tegra186_ahub_dais[] = { @@ -193,93 +139,18 @@ static struct snd_soc_dai_driver tegra186_ahub_dais[] = { DAI(ADMAIF18), DAI(ADMAIF19), DAI(ADMAIF20), - /* XBAR <-> I2S <-> Codec */ DAI(I2S1), DAI(I2S2), DAI(I2S3), DAI(I2S4), DAI(I2S5), DAI(I2S6), - /* XBAR <- DMIC <- Codec */ DAI(DMIC1), DAI(DMIC2), DAI(DMIC3), DAI(DMIC4), - /* XBAR -> DSPK -> Codec */ DAI(DSPK1), DAI(DSPK2), - /* XBAR -> SFC -> XBAR */ - DAI(SFC1 RX), - DAI(SFC1 TX), - DAI(SFC2 RX), - DAI(SFC2 TX), - DAI(SFC3 RX), - DAI(SFC3 TX), - DAI(SFC4 RX), - DAI(SFC4 TX), - /* XBAR -> MVC -> XBAR */ - DAI(MVC1 RX), - DAI(MVC1 TX), - DAI(MVC2 RX), - DAI(MVC2 TX), - /* XBAR -> AMX(4:1) -> XBAR */ - DAI(AMX1 RX1), - DAI(AMX1 RX2), - DAI(AMX1 RX3), - DAI(AMX1 RX4), - DAI(AMX1), - DAI(AMX2 RX1), - DAI(AMX2 RX2), - DAI(AMX2 RX3), - DAI(AMX2 RX4), - DAI(AMX2), - DAI(AMX3 RX1), - DAI(AMX3 RX2), - DAI(AMX3 RX3), - DAI(AMX3 RX4), - DAI(AMX3), - DAI(AMX4 RX1), - DAI(AMX4 RX2), - DAI(AMX4 RX3), - DAI(AMX4 RX4), - DAI(AMX4), - /* XBAR -> ADX(1:4) -> XBAR */ - DAI(ADX1), - DAI(ADX1 TX1), - DAI(ADX1 TX2), - DAI(ADX1 TX3), - DAI(ADX1 TX4), - DAI(ADX2), - DAI(ADX2 TX1), - DAI(ADX2 TX2), - DAI(ADX2 TX3), - DAI(ADX2 TX4), - DAI(ADX3), - DAI(ADX3 TX1), - DAI(ADX3 TX2), - DAI(ADX3 TX3), - DAI(ADX3 TX4), - DAI(ADX4), - DAI(ADX4 TX1), - DAI(ADX4 TX2), - DAI(ADX4 TX3), - DAI(ADX4 TX4), - /* XBAR -> MIXER1(10:5) -> XBAR */ - DAI(MIXER1 RX1), - DAI(MIXER1 RX2), - DAI(MIXER1 RX3), - DAI(MIXER1 RX4), - DAI(MIXER1 RX5), - DAI(MIXER1 RX6), - DAI(MIXER1 RX7), - DAI(MIXER1 RX8), - DAI(MIXER1 RX9), - DAI(MIXER1 RX10), - DAI(MIXER1 TX1), - DAI(MIXER1 TX2), - DAI(MIXER1 TX3), - DAI(MIXER1 TX4), - DAI(MIXER1 TX5), }; static const char * const tegra210_ahub_mux_texts[] = { @@ -302,27 +173,6 @@ static const char * const tegra210_ahub_mux_texts[] = { "DMIC1", "DMIC2", "DMIC3", - "SFC1", - "SFC2", - "SFC3", - "SFC4", - "MVC1", - "MVC2", - "AMX1", - "AMX2", - "ADX1 TX1", - "ADX1 TX2", - "ADX1 TX3", - "ADX1 TX4", - "ADX2 TX1", - "ADX2 TX2", - "ADX2 TX3", - "ADX2 TX4", - "MIXER1 TX1", - "MIXER1 TX2", - "MIXER1 TX3", - "MIXER1 TX4", - "MIXER1 TX5", }; static const char * const tegra186_ahub_mux_texts[] = { @@ -357,42 +207,10 @@ static const char * const tegra186_ahub_mux_texts[] = { "DMIC2", "DMIC3", "DMIC4", - "SFC1", - "SFC2", - "SFC3", - "SFC4", - "MVC1", - "MVC2", - "AMX1", - "AMX2", - "AMX3", - "AMX4", - "ADX1 TX1", - "ADX1 TX2", - "ADX1 TX3", - "ADX1 TX4", - "ADX2 TX1", - "ADX2 TX2", - "ADX2 TX3", - "ADX2 TX4", - "ADX3 TX1", - "ADX3 TX2", - "ADX3 TX3", - "ADX3 TX4", - "ADX4 TX1", - "ADX4 TX2", - "ADX4 TX3", - "ADX4 TX4", - "MIXER1 TX1", - "MIXER1 TX2", - "MIXER1 TX3", - "MIXER1 TX4", - "MIXER1 TX5", }; static const unsigned int tegra210_ahub_mux_values[] = { 0, - /* ADMAIF */ MUX_VALUE(0, 0), MUX_VALUE(0, 1), MUX_VALUE(0, 2), @@ -403,47 +221,18 @@ static const unsigned int tegra210_ahub_mux_values[] = { MUX_VALUE(0, 7), MUX_VALUE(0, 8), MUX_VALUE(0, 9), - /* I2S */ MUX_VALUE(0, 16), MUX_VALUE(0, 17), MUX_VALUE(0, 18), MUX_VALUE(0, 19), MUX_VALUE(0, 20), - /* DMIC */ MUX_VALUE(2, 18), MUX_VALUE(2, 19), MUX_VALUE(2, 20), - /* SFC */ - MUX_VALUE(0, 24), - MUX_VALUE(0, 25), - MUX_VALUE(0, 26), - MUX_VALUE(0, 27), - /* MVC */ - MUX_VALUE(2, 8), - MUX_VALUE(2, 9), - /* AMX */ - MUX_VALUE(1, 8), - MUX_VALUE(1, 9), - /* ADX */ - MUX_VALUE(2, 24), - MUX_VALUE(2, 25), - MUX_VALUE(2, 26), - MUX_VALUE(2, 27), - MUX_VALUE(2, 28), - MUX_VALUE(2, 29), - MUX_VALUE(2, 30), - MUX_VALUE(2, 31), - /* MIXER */ - MUX_VALUE(1, 0), - MUX_VALUE(1, 1), - MUX_VALUE(1, 2), - MUX_VALUE(1, 3), - MUX_VALUE(1, 4), }; static const unsigned int tegra186_ahub_mux_values[] = { 0, - /* ADMAIF */ MUX_VALUE(0, 0), MUX_VALUE(0, 1), MUX_VALUE(0, 2), @@ -460,59 +249,20 @@ static const unsigned int tegra186_ahub_mux_values[] = { MUX_VALUE(0, 13), MUX_VALUE(0, 14), MUX_VALUE(0, 15), - /* I2S */ MUX_VALUE(0, 16), MUX_VALUE(0, 17), MUX_VALUE(0, 18), MUX_VALUE(0, 19), MUX_VALUE(0, 20), MUX_VALUE(0, 21), - /* ADMAIF */ MUX_VALUE(3, 16), MUX_VALUE(3, 17), MUX_VALUE(3, 18), MUX_VALUE(3, 19), - /* DMIC */ MUX_VALUE(2, 18), MUX_VALUE(2, 19), MUX_VALUE(2, 20), MUX_VALUE(2, 21), - /* SFC */ - MUX_VALUE(0, 24), - MUX_VALUE(0, 25), - MUX_VALUE(0, 26), - MUX_VALUE(0, 27), - /* MVC */ - MUX_VALUE(2, 8), - MUX_VALUE(2, 9), - /* AMX */ - MUX_VALUE(1, 8), - MUX_VALUE(1, 9), - MUX_VALUE(1, 10), - MUX_VALUE(1, 11), - /* ADX */ - MUX_VALUE(2, 24), - MUX_VALUE(2, 25), - MUX_VALUE(2, 26), - MUX_VALUE(2, 27), - MUX_VALUE(2, 28), - MUX_VALUE(2, 29), - MUX_VALUE(2, 30), - MUX_VALUE(2, 31), - MUX_VALUE(3, 0), - MUX_VALUE(3, 1), - MUX_VALUE(3, 2), - MUX_VALUE(3, 3), - MUX_VALUE(3, 4), - MUX_VALUE(3, 5), - MUX_VALUE(3, 6), - MUX_VALUE(3, 7), - /* MIXER */ - MUX_VALUE(1, 0), - MUX_VALUE(1, 1), - MUX_VALUE(1, 2), - MUX_VALUE(1, 3), - MUX_VALUE(1, 4), }; /* Controls for t210 */ @@ -531,32 +281,6 @@ MUX_ENUM_CTRL_DECL(t210_i2s2_tx, 0x11); MUX_ENUM_CTRL_DECL(t210_i2s3_tx, 0x12); MUX_ENUM_CTRL_DECL(t210_i2s4_tx, 0x13); MUX_ENUM_CTRL_DECL(t210_i2s5_tx, 0x14); -MUX_ENUM_CTRL_DECL(t210_sfc1_tx, 0x18); -MUX_ENUM_CTRL_DECL(t210_sfc2_tx, 0x19); -MUX_ENUM_CTRL_DECL(t210_sfc3_tx, 0x1a); -MUX_ENUM_CTRL_DECL(t210_sfc4_tx, 0x1b); -MUX_ENUM_CTRL_DECL(t210_mvc1_tx, 0x48); -MUX_ENUM_CTRL_DECL(t210_mvc2_tx, 0x49); -MUX_ENUM_CTRL_DECL(t210_amx11_tx, 0x50); -MUX_ENUM_CTRL_DECL(t210_amx12_tx, 0x51); -MUX_ENUM_CTRL_DECL(t210_amx13_tx, 0x52); -MUX_ENUM_CTRL_DECL(t210_amx14_tx, 0x53); -MUX_ENUM_CTRL_DECL(t210_amx21_tx, 0x54); -MUX_ENUM_CTRL_DECL(t210_amx22_tx, 0x55); -MUX_ENUM_CTRL_DECL(t210_amx23_tx, 0x56); -MUX_ENUM_CTRL_DECL(t210_amx24_tx, 0x57); -MUX_ENUM_CTRL_DECL(t210_adx1_tx, 0x58); -MUX_ENUM_CTRL_DECL(t210_adx2_tx, 0x59); -MUX_ENUM_CTRL_DECL(t210_mixer11_tx, 0x20); -MUX_ENUM_CTRL_DECL(t210_mixer12_tx, 0x21); -MUX_ENUM_CTRL_DECL(t210_mixer13_tx, 0x22); -MUX_ENUM_CTRL_DECL(t210_mixer14_tx, 0x23); -MUX_ENUM_CTRL_DECL(t210_mixer15_tx, 0x24); -MUX_ENUM_CTRL_DECL(t210_mixer16_tx, 0x25); -MUX_ENUM_CTRL_DECL(t210_mixer17_tx, 0x26); -MUX_ENUM_CTRL_DECL(t210_mixer18_tx, 0x27); -MUX_ENUM_CTRL_DECL(t210_mixer19_tx, 0x28); -MUX_ENUM_CTRL_DECL(t210_mixer110_tx, 0x29); /* Controls for t186 */ MUX_ENUM_CTRL_DECL_186(t186_admaif1_tx, 0x00); @@ -587,42 +311,6 @@ MUX_ENUM_CTRL_DECL_186(t186_admaif17_tx, 0x68); MUX_ENUM_CTRL_DECL_186(t186_admaif18_tx, 0x69); MUX_ENUM_CTRL_DECL_186(t186_admaif19_tx, 0x6a); MUX_ENUM_CTRL_DECL_186(t186_admaif20_tx, 0x6b); -MUX_ENUM_CTRL_DECL_186(t186_sfc1_tx, 0x18); -MUX_ENUM_CTRL_DECL_186(t186_sfc2_tx, 0x19); -MUX_ENUM_CTRL_DECL_186(t186_sfc3_tx, 0x1a); -MUX_ENUM_CTRL_DECL_186(t186_sfc4_tx, 0x1b); -MUX_ENUM_CTRL_DECL_186(t186_mvc1_tx, 0x48); -MUX_ENUM_CTRL_DECL_186(t186_mvc2_tx, 0x49); -MUX_ENUM_CTRL_DECL_186(t186_amx11_tx, 0x50); -MUX_ENUM_CTRL_DECL_186(t186_amx12_tx, 0x51); -MUX_ENUM_CTRL_DECL_186(t186_amx13_tx, 0x52); -MUX_ENUM_CTRL_DECL_186(t186_amx14_tx, 0x53); -MUX_ENUM_CTRL_DECL_186(t186_amx21_tx, 0x54); -MUX_ENUM_CTRL_DECL_186(t186_amx22_tx, 0x55); -MUX_ENUM_CTRL_DECL_186(t186_amx23_tx, 0x56); -MUX_ENUM_CTRL_DECL_186(t186_amx24_tx, 0x57); -MUX_ENUM_CTRL_DECL_186(t186_amx31_tx, 0x58); -MUX_ENUM_CTRL_DECL_186(t186_amx32_tx, 0x59); -MUX_ENUM_CTRL_DECL_186(t186_amx33_tx, 0x5a); -MUX_ENUM_CTRL_DECL_186(t186_amx34_tx, 0x5b); -MUX_ENUM_CTRL_DECL_186(t186_amx41_tx, 0x64); -MUX_ENUM_CTRL_DECL_186(t186_amx42_tx, 0x65); -MUX_ENUM_CTRL_DECL_186(t186_amx43_tx, 0x66); -MUX_ENUM_CTRL_DECL_186(t186_amx44_tx, 0x67); -MUX_ENUM_CTRL_DECL_186(t186_adx1_tx, 0x60); -MUX_ENUM_CTRL_DECL_186(t186_adx2_tx, 0x61); -MUX_ENUM_CTRL_DECL_186(t186_adx3_tx, 0x62); -MUX_ENUM_CTRL_DECL_186(t186_adx4_tx, 0x63); -MUX_ENUM_CTRL_DECL_186(t186_mixer11_tx, 0x20); -MUX_ENUM_CTRL_DECL_186(t186_mixer12_tx, 0x21); -MUX_ENUM_CTRL_DECL_186(t186_mixer13_tx, 0x22); -MUX_ENUM_CTRL_DECL_186(t186_mixer14_tx, 0x23); -MUX_ENUM_CTRL_DECL_186(t186_mixer15_tx, 0x24); -MUX_ENUM_CTRL_DECL_186(t186_mixer16_tx, 0x25); -MUX_ENUM_CTRL_DECL_186(t186_mixer17_tx, 0x26); -MUX_ENUM_CTRL_DECL_186(t186_mixer18_tx, 0x27); -MUX_ENUM_CTRL_DECL_186(t186_mixer19_tx, 0x28); -MUX_ENUM_CTRL_DECL_186(t186_mixer110_tx, 0x29); /* * The number of entries in, and order of, this array is closely tied to the @@ -648,47 +336,6 @@ static const struct snd_soc_dapm_widget tegra210_ahub_widgets[] = { TX_WIDGETS("DMIC1"), TX_WIDGETS("DMIC2"), TX_WIDGETS("DMIC3"), - WIDGETS("SFC1", t210_sfc1_tx), - WIDGETS("SFC2", t210_sfc2_tx), - WIDGETS("SFC3", t210_sfc3_tx), - WIDGETS("SFC4", t210_sfc4_tx), - WIDGETS("MVC1", t210_mvc1_tx), - WIDGETS("MVC2", t210_mvc2_tx), - WIDGETS("AMX1 RX1", t210_amx11_tx), - WIDGETS("AMX1 RX2", t210_amx12_tx), - WIDGETS("AMX1 RX3", t210_amx13_tx), - WIDGETS("AMX1 RX4", t210_amx14_tx), - WIDGETS("AMX2 RX1", t210_amx21_tx), - WIDGETS("AMX2 RX2", t210_amx22_tx), - WIDGETS("AMX2 RX3", t210_amx23_tx), - WIDGETS("AMX2 RX4", t210_amx24_tx), - TX_WIDGETS("AMX1"), - TX_WIDGETS("AMX2"), - WIDGETS("ADX1", t210_adx1_tx), - WIDGETS("ADX2", t210_adx2_tx), - TX_WIDGETS("ADX1 TX1"), - TX_WIDGETS("ADX1 TX2"), - TX_WIDGETS("ADX1 TX3"), - TX_WIDGETS("ADX1 TX4"), - TX_WIDGETS("ADX2 TX1"), - TX_WIDGETS("ADX2 TX2"), - TX_WIDGETS("ADX2 TX3"), - TX_WIDGETS("ADX2 TX4"), - WIDGETS("MIXER1 RX1", t210_mixer11_tx), - WIDGETS("MIXER1 RX2", t210_mixer12_tx), - WIDGETS("MIXER1 RX3", t210_mixer13_tx), - WIDGETS("MIXER1 RX4", t210_mixer14_tx), - WIDGETS("MIXER1 RX5", t210_mixer15_tx), - WIDGETS("MIXER1 RX6", t210_mixer16_tx), - WIDGETS("MIXER1 RX7", t210_mixer17_tx), - WIDGETS("MIXER1 RX8", t210_mixer18_tx), - WIDGETS("MIXER1 RX9", t210_mixer19_tx), - WIDGETS("MIXER1 RX10", t210_mixer110_tx), - TX_WIDGETS("MIXER1 TX1"), - TX_WIDGETS("MIXER1 TX2"), - TX_WIDGETS("MIXER1 TX3"), - TX_WIDGETS("MIXER1 TX4"), - TX_WIDGETS("MIXER1 TX5"), }; static const struct snd_soc_dapm_widget tegra186_ahub_widgets[] = { @@ -724,67 +371,6 @@ static const struct snd_soc_dapm_widget tegra186_ahub_widgets[] = { TX_WIDGETS("DMIC4"), WIDGETS("DSPK1", t186_dspk1_tx), WIDGETS("DSPK2", t186_dspk2_tx), - WIDGETS("SFC1", t186_sfc1_tx), - WIDGETS("SFC2", t186_sfc2_tx), - WIDGETS("SFC3", t186_sfc3_tx), - WIDGETS("SFC4", t186_sfc4_tx), - WIDGETS("MVC1", t186_mvc1_tx), - WIDGETS("MVC2", t186_mvc2_tx), - WIDGETS("AMX1 RX1", t186_amx11_tx), - WIDGETS("AMX1 RX2", t186_amx12_tx), - WIDGETS("AMX1 RX3", t186_amx13_tx), - WIDGETS("AMX1 RX4", t186_amx14_tx), - WIDGETS("AMX2 RX1", t186_amx21_tx), - WIDGETS("AMX2 RX2", t186_amx22_tx), - WIDGETS("AMX2 RX3", t186_amx23_tx), - WIDGETS("AMX2 RX4", t186_amx24_tx), - WIDGETS("AMX3 RX1", t186_amx31_tx), - WIDGETS("AMX3 RX2", t186_amx32_tx), - WIDGETS("AMX3 RX3", t186_amx33_tx), - WIDGETS("AMX3 RX4", t186_amx34_tx), - WIDGETS("AMX4 RX1", t186_amx41_tx), - WIDGETS("AMX4 RX2", t186_amx42_tx), - WIDGETS("AMX4 RX3", t186_amx43_tx), - WIDGETS("AMX4 RX4", t186_amx44_tx), - TX_WIDGETS("AMX1"), - TX_WIDGETS("AMX2"), - TX_WIDGETS("AMX3"), - TX_WIDGETS("AMX4"), - WIDGETS("ADX1", t186_adx1_tx), - WIDGETS("ADX2", t186_adx2_tx), - WIDGETS("ADX3", t186_adx3_tx), - WIDGETS("ADX4", t186_adx4_tx), - TX_WIDGETS("ADX1 TX1"), - TX_WIDGETS("ADX1 TX2"), - TX_WIDGETS("ADX1 TX3"), - TX_WIDGETS("ADX1 TX4"), - TX_WIDGETS("ADX2 TX1"), - TX_WIDGETS("ADX2 TX2"), - TX_WIDGETS("ADX2 TX3"), - TX_WIDGETS("ADX2 TX4"), - TX_WIDGETS("ADX3 TX1"), - TX_WIDGETS("ADX3 TX2"), - TX_WIDGETS("ADX3 TX3"), - TX_WIDGETS("ADX3 TX4"), - TX_WIDGETS("ADX4 TX1"), - TX_WIDGETS("ADX4 TX2"), - TX_WIDGETS("ADX4 TX3"), - TX_WIDGETS("ADX4 TX4"), - WIDGETS("MIXER1 RX1", t186_mixer11_tx), - WIDGETS("MIXER1 RX2", t186_mixer12_tx), - WIDGETS("MIXER1 RX3", t186_mixer13_tx), - WIDGETS("MIXER1 RX4", t186_mixer14_tx), - WIDGETS("MIXER1 RX5", t186_mixer15_tx), - WIDGETS("MIXER1 RX6", t186_mixer16_tx), - WIDGETS("MIXER1 RX7", t186_mixer17_tx), - WIDGETS("MIXER1 RX8", t186_mixer18_tx), - WIDGETS("MIXER1 RX9", t186_mixer19_tx), - WIDGETS("MIXER1 RX10", t186_mixer110_tx), - TX_WIDGETS("MIXER1 TX1"), - TX_WIDGETS("MIXER1 TX2"), - TX_WIDGETS("MIXER1 TX3"), - TX_WIDGETS("MIXER1 TX4"), - TX_WIDGETS("MIXER1 TX5"), }; #define TEGRA_COMMON_MUX_ROUTES(name) \ @@ -806,28 +392,7 @@ static const struct snd_soc_dapm_widget tegra186_ahub_widgets[] = { { name " Mux", "I2S5", "I2S5 XBAR-RX" }, \ { name " Mux", "DMIC1", "DMIC1 XBAR-RX" }, \ { name " Mux", "DMIC2", "DMIC2 XBAR-RX" }, \ - { name " Mux", "DMIC3", "DMIC3 XBAR-RX" }, \ - { name " Mux", "SFC1", "SFC1 XBAR-RX" }, \ - { name " Mux", "SFC2", "SFC2 XBAR-RX" }, \ - { name " Mux", "SFC3", "SFC3 XBAR-RX" }, \ - { name " Mux", "SFC4", "SFC4 XBAR-RX" }, \ - { name " Mux", "MVC1", "MVC1 XBAR-RX" }, \ - { name " Mux", "MVC2", "MVC2 XBAR-RX" }, \ - { name " Mux", "AMX1", "AMX1 XBAR-RX" }, \ - { name " Mux", "AMX2", "AMX2 XBAR-RX" }, \ - { name " Mux", "ADX1 TX1", "ADX1 TX1 XBAR-RX" }, \ - { name " Mux", "ADX1 TX2", "ADX1 TX2 XBAR-RX" }, \ - { name " Mux", "ADX1 TX3", "ADX1 TX3 XBAR-RX" }, \ - { name " Mux", "ADX1 TX4", "ADX1 TX4 XBAR-RX" }, \ - { name " Mux", "ADX2 TX1", "ADX2 TX1 XBAR-RX" }, \ - { name " Mux", "ADX2 TX2", "ADX2 TX2 XBAR-RX" }, \ - { name " Mux", "ADX2 TX3", "ADX2 TX3 XBAR-RX" }, \ - { name " Mux", "ADX2 TX4", "ADX2 TX4 XBAR-RX" }, \ - { name " Mux", "MIXER1 TX1", "MIXER1 TX1 XBAR-RX" }, \ - { name " Mux", "MIXER1 TX2", "MIXER1 TX2 XBAR-RX" }, \ - { name " Mux", "MIXER1 TX3", "MIXER1 TX3 XBAR-RX" }, \ - { name " Mux", "MIXER1 TX4", "MIXER1 TX4 XBAR-RX" }, \ - { name " Mux", "MIXER1 TX5", "MIXER1 TX5 XBAR-RX" }, + { name " Mux", "DMIC3", "DMIC3 XBAR-RX" }, #define TEGRA186_ONLY_MUX_ROUTES(name) \ { name " Mux", "ADMAIF11", "ADMAIF11 XBAR-RX" }, \ @@ -841,17 +406,7 @@ static const struct snd_soc_dapm_widget tegra186_ahub_widgets[] = { { name " Mux", "ADMAIF19", "ADMAIF19 XBAR-RX" }, \ { name " Mux", "ADMAIF20", "ADMAIF20 XBAR-RX" }, \ { name " Mux", "I2S6", "I2S6 XBAR-RX" }, \ - { name " Mux", "DMIC4", "DMIC4 XBAR-RX" }, \ - { name " Mux", "AMX3", "AMX3 XBAR-RX" }, \ - { name " Mux", "AMX4", "AMX4 XBAR-RX" }, \ - { name " Mux", "ADX3 TX1", "ADX3 TX1 XBAR-RX" }, \ - { name " Mux", "ADX3 TX2", "ADX3 TX2 XBAR-RX" }, \ - { name " Mux", "ADX3 TX3", "ADX3 TX3 XBAR-RX" }, \ - { name " Mux", "ADX3 TX4", "ADX3 TX4 XBAR-RX" }, \ - { name " Mux", "ADX4 TX1", "ADX4 TX1 XBAR-RX" }, \ - { name " Mux", "ADX4 TX2", "ADX4 TX2 XBAR-RX" }, \ - { name " Mux", "ADX4 TX3", "ADX4 TX3 XBAR-RX" }, \ - { name " Mux", "ADX4 TX4", "ADX4 TX4 XBAR-RX" }, + { name " Mux", "DMIC4", "DMIC4 XBAR-RX" }, #define TEGRA210_MUX_ROUTES(name) \ TEGRA_COMMON_MUX_ROUTES(name) @@ -898,32 +453,6 @@ static const struct snd_soc_dapm_route tegra210_ahub_routes[] = { TEGRA210_MUX_ROUTES("I2S3") TEGRA210_MUX_ROUTES("I2S4") TEGRA210_MUX_ROUTES("I2S5") - TEGRA210_MUX_ROUTES("SFC1") - TEGRA210_MUX_ROUTES("SFC2") - TEGRA210_MUX_ROUTES("SFC3") - TEGRA210_MUX_ROUTES("SFC4") - TEGRA210_MUX_ROUTES("MVC1") - TEGRA210_MUX_ROUTES("MVC2") - TEGRA210_MUX_ROUTES("AMX1 RX1") - TEGRA210_MUX_ROUTES("AMX1 RX2") - TEGRA210_MUX_ROUTES("AMX1 RX3") - TEGRA210_MUX_ROUTES("AMX1 RX4") - TEGRA210_MUX_ROUTES("AMX2 RX1") - TEGRA210_MUX_ROUTES("AMX2 RX2") - TEGRA210_MUX_ROUTES("AMX2 RX3") - TEGRA210_MUX_ROUTES("AMX2 RX4") - TEGRA210_MUX_ROUTES("ADX1") - TEGRA210_MUX_ROUTES("ADX2") - TEGRA210_MUX_ROUTES("MIXER1 RX1") - TEGRA210_MUX_ROUTES("MIXER1 RX2") - TEGRA210_MUX_ROUTES("MIXER1 RX3") - TEGRA210_MUX_ROUTES("MIXER1 RX4") - TEGRA210_MUX_ROUTES("MIXER1 RX5") - TEGRA210_MUX_ROUTES("MIXER1 RX6") - TEGRA210_MUX_ROUTES("MIXER1 RX7") - TEGRA210_MUX_ROUTES("MIXER1 RX8") - TEGRA210_MUX_ROUTES("MIXER1 RX9") - TEGRA210_MUX_ROUTES("MIXER1 RX10") }; static const struct snd_soc_dapm_route tegra186_ahub_routes[] = { @@ -975,42 +504,6 @@ static const struct snd_soc_dapm_route tegra186_ahub_routes[] = { TEGRA186_MUX_ROUTES("I2S6") TEGRA186_MUX_ROUTES("DSPK1") TEGRA186_MUX_ROUTES("DSPK2") - TEGRA186_MUX_ROUTES("SFC1") - TEGRA186_MUX_ROUTES("SFC2") - TEGRA186_MUX_ROUTES("SFC3") - TEGRA186_MUX_ROUTES("SFC4") - TEGRA186_MUX_ROUTES("MVC1") - TEGRA186_MUX_ROUTES("MVC2") - TEGRA186_MUX_ROUTES("AMX1 RX1") - TEGRA186_MUX_ROUTES("AMX1 RX2") - TEGRA186_MUX_ROUTES("AMX1 RX3") - TEGRA186_MUX_ROUTES("AMX1 RX4") - TEGRA186_MUX_ROUTES("AMX2 RX1") - TEGRA186_MUX_ROUTES("AMX2 RX2") - TEGRA186_MUX_ROUTES("AMX2 RX3") - TEGRA186_MUX_ROUTES("AMX2 RX4") - TEGRA186_MUX_ROUTES("AMX3 RX1") - TEGRA186_MUX_ROUTES("AMX3 RX2") - TEGRA186_MUX_ROUTES("AMX3 RX3") - TEGRA186_MUX_ROUTES("AMX3 RX4") - TEGRA186_MUX_ROUTES("AMX4 RX1") - TEGRA186_MUX_ROUTES("AMX4 RX2") - TEGRA186_MUX_ROUTES("AMX4 RX3") - TEGRA186_MUX_ROUTES("AMX4 RX4") - TEGRA186_MUX_ROUTES("ADX1") - TEGRA186_MUX_ROUTES("ADX2") - TEGRA186_MUX_ROUTES("ADX3") - TEGRA186_MUX_ROUTES("ADX4") - TEGRA186_MUX_ROUTES("MIXER1 RX1") - TEGRA186_MUX_ROUTES("MIXER1 RX2") - TEGRA186_MUX_ROUTES("MIXER1 RX3") - TEGRA186_MUX_ROUTES("MIXER1 RX4") - TEGRA186_MUX_ROUTES("MIXER1 RX5") - TEGRA186_MUX_ROUTES("MIXER1 RX6") - TEGRA186_MUX_ROUTES("MIXER1 RX7") - TEGRA186_MUX_ROUTES("MIXER1 RX8") - TEGRA186_MUX_ROUTES("MIXER1 RX9") - TEGRA186_MUX_ROUTES("MIXER1 RX10") }; static const struct snd_soc_component_driver tegra210_ahub_component = { diff --git a/sound/soc/tegra/tegra_asoc_machine.c b/sound/soc/tegra/tegra_asoc_machine.c index a73404879a..2e549b6906 100644 --- a/sound/soc/tegra/tegra_asoc_machine.c +++ b/sound/soc/tegra/tegra_asoc_machine.c @@ -321,7 +321,7 @@ static int tegra_machine_hw_params(struct snd_pcm_substream *substream, return 0; } -static const struct snd_soc_ops tegra_machine_snd_ops = { +static struct snd_soc_ops tegra_machine_snd_ops = { .hw_params = tegra_machine_hw_params, }; diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index 468c8e77de..ef1e74d952 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -48,12 +48,6 @@ int tegra_pcm_platform_register(struct device *dev) } EXPORT_SYMBOL_GPL(tegra_pcm_platform_register); -int devm_tegra_pcm_platform_register(struct device *dev) -{ - return devm_snd_dmaengine_pcm_register(dev, &tegra_dmaengine_pcm_config, 0); -} -EXPORT_SYMBOL_GPL(devm_tegra_pcm_platform_register); - int tegra_pcm_platform_register_with_chan_names(struct device *dev, struct snd_dmaengine_pcm_config *config, char *txdmachan, char *rxdmachan) diff --git a/sound/soc/tegra/tegra_pcm.h b/sound/soc/tegra/tegra_pcm.h index 2a36eea174..d602126c65 100644 --- a/sound/soc/tegra/tegra_pcm.h +++ b/sound/soc/tegra/tegra_pcm.h @@ -32,7 +32,6 @@ int tegra_pcm_hw_params(struct snd_soc_component *component, snd_pcm_uframes_t tegra_pcm_pointer(struct snd_soc_component *component, struct snd_pcm_substream *substream); int tegra_pcm_platform_register(struct device *dev); -int devm_tegra_pcm_platform_register(struct device *dev); int tegra_pcm_platform_register_with_chan_names(struct device *dev, struct snd_dmaengine_pcm_config *config, char *txdmachan, char *rxdmachan); diff --git a/sound/soc/ti/Kconfig b/sound/soc/ti/Kconfig index 40110e9a9e..1d9fe3fca1 100644 --- a/sound/soc/ti/Kconfig +++ b/sound/soc/ti/Kconfig @@ -212,7 +212,7 @@ config SND_SOC_DM365_VOICE_CODEC Say Y if you want to add support for SoC On-chip voice codec endchoice -config SND_SOC_DM365_SELECT_VOICE_CODECS +config SND_SOC_DM365_VOICE_CODEC_MODULE def_tristate y depends on SND_SOC_DM365_VOICE_CODEC && SND_SOC select MFD_DAVINCI_VOICECODEC diff --git a/sound/soc/ti/ams-delta.c b/sound/soc/ti/ams-delta.c index b1a32545ba..ecd24d412a 100644 --- a/sound/soc/ti/ams-delta.c +++ b/sound/soc/ti/ams-delta.c @@ -330,9 +330,10 @@ static void cx81801_close(struct tty_struct *tty) } /* Line discipline .hangup() */ -static void cx81801_hangup(struct tty_struct *tty) +static int cx81801_hangup(struct tty_struct *tty) { cx81801_close(tty); + return 0; } /* Line discipline .receive_buf() */ diff --git a/sound/soc/ti/davinci-evm.c b/sound/soc/ti/davinci-evm.c index 68d69e3268..b043a0070d 100644 --- a/sound/soc/ti/davinci-evm.c +++ b/sound/soc/ti/davinci-evm.c @@ -73,7 +73,7 @@ static int evm_hw_params(struct snd_pcm_substream *substream, return 0; } -static const struct snd_soc_ops evm_ops = { +static struct snd_soc_ops evm_ops = { .startup = evm_startup, .shutdown = evm_shutdown, .hw_params = evm_hw_params, diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index 2c146b91fc..56a19eeec5 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -1870,10 +1870,12 @@ static int mcasp_reparent_fck(struct platform_device *pdev) static bool davinci_mcasp_have_gpiochip(struct davinci_mcasp *mcasp) { #ifdef CONFIG_OF_GPIO - return of_property_read_bool(mcasp->dev->of_node, "gpio-controller"); -#else - return false; + if (mcasp->dev->of_node && + of_property_read_bool(mcasp->dev->of_node, "gpio-controller")) + return true; #endif + + return false; } static int davinci_mcasp_get_config(struct davinci_mcasp *mcasp, @@ -2024,9 +2026,13 @@ static int davinci_mcasp_get_dma_type(struct davinci_mcasp *mcasp) tmp = mcasp->dma_data[SNDRV_PCM_STREAM_PLAYBACK].filter_data; chan = dma_request_chan(mcasp->dev, tmp); - if (IS_ERR(chan)) - return dev_err_probe(mcasp->dev, PTR_ERR(chan), - "Can't verify DMA configuration\n"); + if (IS_ERR(chan)) { + if (PTR_ERR(chan) != -EPROBE_DEFER) + dev_err(mcasp->dev, + "Can't verify DMA configuration (%ld)\n", + PTR_ERR(chan)); + return PTR_ERR(chan); + } if (WARN_ON(!chan->device || !chan->device->dev)) { dma_release_channel(chan); return -EINVAL; @@ -2224,6 +2230,9 @@ static int davinci_mcasp_init_gpiochip(struct davinci_mcasp *mcasp) mcasp->gpio_chip = davinci_mcasp_template_chip; mcasp->gpio_chip.label = dev_name(mcasp->dev); mcasp->gpio_chip.parent = mcasp->dev; +#ifdef CONFIG_OF_GPIO + mcasp->gpio_chip.of_node = mcasp->dev->of_node; +#endif return devm_gpiochip_add_data(mcasp->dev, &mcasp->gpio_chip, mcasp); } diff --git a/sound/soc/ti/j721e-evm.c b/sound/soc/ti/j721e-evm.c index 4077e15ec4..9347f982c3 100644 --- a/sound/soc/ti/j721e-evm.c +++ b/sound/soc/ti/j721e-evm.c @@ -464,9 +464,13 @@ static int j721e_get_clocks(struct device *dev, int ret; clocks->target = devm_clk_get(dev, prefix); - if (IS_ERR(clocks->target)) - return dev_err_probe(dev, PTR_ERR(clocks->target), - "failed to acquire %s\n", prefix); + if (IS_ERR(clocks->target)) { + ret = PTR_ERR(clocks->target); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to acquire %s: %d\n", + prefix, ret); + return ret; + } clk_name = kasprintf(GFP_KERNEL, "%s-48000", prefix); if (clk_name) { diff --git a/sound/soc/ti/omap-abe-twl6040.c b/sound/soc/ti/omap-abe-twl6040.c index da809c7f25..2e3d1eea77 100644 --- a/sound/soc/ti/omap-abe-twl6040.c +++ b/sound/soc/ti/omap-abe-twl6040.c @@ -96,7 +96,7 @@ static int omap_abe_dmic_hw_params(struct snd_pcm_substream *substream, return 0; } -static const struct snd_soc_ops omap_abe_dmic_ops = { +static struct snd_soc_ops omap_abe_dmic_ops = { .hw_params = omap_abe_dmic_hw_params, }; diff --git a/sound/soc/ux500/mop500_ab8500.c b/sound/soc/ux500/mop500_ab8500.c index 3e654e708f..2c39c7a2fd 100644 --- a/sound/soc/ux500/mop500_ab8500.c +++ b/sound/soc/ux500/mop500_ab8500.c @@ -348,7 +348,7 @@ static int mop500_ab8500_hw_free(struct snd_pcm_substream *substream) return 0; } -const struct snd_soc_ops mop500_ab8500_ops[] = { +struct snd_soc_ops mop500_ab8500_ops[] = { { .hw_params = mop500_ab8500_hw_params, .hw_free = mop500_ab8500_hw_free, diff --git a/sound/soc/ux500/mop500_ab8500.h b/sound/soc/ux500/mop500_ab8500.h index 087ef246d8..8138a4e9aa 100644 --- a/sound/soc/ux500/mop500_ab8500.h +++ b/sound/soc/ux500/mop500_ab8500.h @@ -11,7 +11,7 @@ #ifndef MOP500_AB8500_H #define MOP500_AB8500_H -extern const struct snd_soc_ops mop500_ab8500_ops[]; +extern struct snd_soc_ops mop500_ab8500_ops[]; int mop500_ab8500_machine_init(struct snd_soc_pcm_runtime *rtd); void mop500_ab8500_remove(struct snd_soc_card *card); diff --git a/sound/soc/xilinx/xlnx_spdif.c b/sound/soc/xilinx/xlnx_spdif.c index cba0e868a7..e2ca087ade 100644 --- a/sound/soc/xilinx/xlnx_spdif.c +++ b/sound/soc/xilinx/xlnx_spdif.c @@ -237,6 +237,7 @@ MODULE_DEVICE_TABLE(of, xlnx_spdif_of_match); static int xlnx_spdif_probe(struct platform_device *pdev) { int ret; + struct resource *res; struct snd_soc_dai_driver *dai_drv; struct spdif_dev_data *ctx; @@ -272,10 +273,13 @@ static int xlnx_spdif_probe(struct platform_device *pdev) if (ctx->mode) { dai_drv = &xlnx_spdif_tx_dai; } else { - ret = platform_get_irq(pdev, 0); - if (ret < 0) + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!res) { + dev_err(dev, "No IRQ resource found\n"); + ret = -ENODEV; goto clk_err; - ret = devm_request_irq(dev, ret, + } + ret = devm_request_irq(dev, res->start, xlnx_spdifrx_irq_handler, 0, "XLNX_SPDIF_RX", ctx); if (ret) { diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 3881e1c1b0..6b84f66e4a 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -688,7 +688,7 @@ static void dbri_cmdsend(struct snd_dbri *dbri, s32 *cmd, int len) { u32 dvma_addr = (u32)dbri->dma_dvma; s32 tmp, addr; - static int wait_id; + static int wait_id = 0; wait_id++; wait_id &= 0xffff; /* restrict it to a 16 bit counter. */ @@ -1926,7 +1926,7 @@ static void dbri_process_interrupt_buffer(struct snd_dbri *dbri) static irqreturn_t snd_dbri_interrupt(int irq, void *dev_id) { struct snd_dbri *dbri = dev_id; - static int errcnt; + static int errcnt = 0; int x; if (dbri == NULL) @@ -2591,7 +2591,7 @@ static int dbri_probe(struct platform_device *op) struct snd_dbri *dbri; struct resource *rp; struct snd_card *card; - static int dev; + static int dev = 0; int irq; int err; diff --git a/sound/usb/card.c b/sound/usb/card.c index 2a1f76ff85..fbbe330a52 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -993,6 +993,8 @@ void snd_usb_unlock_shutdown(struct snd_usb_audio *chip) wake_up(&chip->shutdown_wait); } +#ifdef CONFIG_PM + int snd_usb_autoresume(struct snd_usb_audio *chip) { int i, err; @@ -1104,6 +1106,11 @@ static int usb_audio_resume(struct usb_interface *intf) atomic_dec(&chip->active); /* allow autopm after this point */ return err; } +#else +#define usb_audio_suspend NULL +#define usb_audio_resume NULL +#define usb_audio_resume NULL +#endif /* CONFIG_PM */ static const struct usb_device_id usb_audio_ids [] = { #include "quirks-table.h" diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 4dfe764167..98345a695d 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -271,7 +271,7 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, return -EINVAL; } - /* first, see if the ID we're looking at is a clock source already */ + /* first, see if the ID we're looking for is a clock source already */ source = snd_usb_find_clock_source(chip, entity_id, proto); if (source) { entity_id = GET_VAL(source, proto, bClockID); @@ -297,7 +297,7 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, goto find_source; } - /* the entity ID we are looking at is a selector. + /* the entity ID we are looking for is a selector. * find out what it currently selects */ ret = uac_clock_selector_get_val(chip, clock_id); if (ret < 0) { diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index a5641956ef..567514832b 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -145,7 +145,6 @@ static inline void check_mapped_dB(const struct usbmix_name_map *p, if (p && p->dB) { cval->dBmin = p->dB->min; cval->dBmax = p->dB->max; - cval->min_mute = p->dB->min_mute; cval->initialized = 1; } } @@ -362,8 +361,9 @@ static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request, memset(buf, 0, sizeof(buf)); - if (snd_usb_lock_shutdown(chip)) - return -EIO; + ret = snd_usb_lock_shutdown(chip) ? -EIO : 0; + if (ret) + goto error; idx = mixer_ctrl_intf(cval->head.mixer) | (cval->head.id << 8); ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), bRequest, @@ -372,7 +372,8 @@ static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request, snd_usb_unlock_shutdown(chip); if (ret < 0) { - usb_audio_dbg(chip, +error: + usb_audio_err(chip, "cannot get ctl value: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n", request, validx, idx, cval->val_type); return ret; @@ -1207,32 +1208,12 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, } } -/* forcibly initialize the current mixer value; if GET_CUR fails, set to - * the minimum as default - */ -static void init_cur_mix_raw(struct usb_mixer_elem_info *cval, int ch, int idx) -{ - int val, err; - - err = snd_usb_get_cur_mix_value(cval, ch, idx, &val); - if (!err) - return; - if (!cval->head.mixer->ignore_ctl_error) - usb_audio_warn(cval->head.mixer->chip, - "%d:%d: failed to get current value for ch %d (%d)\n", - cval->head.id, mixer_ctrl_intf(cval->head.mixer), - ch, err); - snd_usb_set_cur_mix_value(cval, ch, idx, cval->min); -} - /* * retrieve the minimum and maximum values for the specified control */ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval, int default_min, struct snd_kcontrol *kctl) { - int i, idx; - /* for failsafe */ cval->min = default_min; cval->max = cval->min + 1; @@ -1245,6 +1226,7 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval, } else { int minchn = 0; if (cval->cmask) { + int i; for (i = 0; i < MAX_CHANNELS; i++) if (cval->cmask & (1 << i)) { minchn = i + 1; @@ -1345,19 +1327,6 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval, } } - /* initialize all elements */ - if (!cval->cmask) { - init_cur_mix_raw(cval, 0, 0); - } else { - idx = 0; - for (i = 0; i < MAX_CHANNELS; i++) { - if (cval->cmask & (1 << i)) { - init_cur_mix_raw(cval, i + 1, idx); - idx++; - } - } - } - return 0; } @@ -3633,6 +3602,7 @@ void snd_usb_mixer_disconnect(struct usb_mixer_interface *mixer) mixer->disconnected = true; } +#ifdef CONFIG_PM /* stop any bus activity of a mixer */ static void snd_usb_mixer_inactivate(struct usb_mixer_interface *mixer) { @@ -3678,14 +3648,17 @@ static int restore_mixer_value(struct usb_mixer_elem_list *list) err = snd_usb_set_cur_mix_value(cval, c + 1, idx, cval->cache_val[idx]); if (err < 0) - break; + return err; } idx++; } } else { /* master */ - if (cval->cached) - snd_usb_set_cur_mix_value(cval, 0, 0, *cval->cache_val); + if (cval->cached) { + err = snd_usb_set_cur_mix_value(cval, 0, 0, *cval->cache_val); + if (err < 0) + return err; + } } return 0; @@ -3711,6 +3684,7 @@ int snd_usb_mixer_resume(struct usb_mixer_interface *mixer) return snd_usb_mixer_activate(mixer); } +#endif void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list, struct usb_mixer_interface *mixer, @@ -3719,5 +3693,7 @@ void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list, list->mixer = mixer; list->id = unitid; list->dump = snd_usb_mixer_dump_cval; +#ifdef CONFIG_PM list->resume = restore_mixer_value; +#endif } diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h index d43895c1ae..98ea24d91d 100644 --- a/sound/usb/mixer.h +++ b/sound/usb/mixer.h @@ -118,8 +118,10 @@ void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list, int snd_usb_mixer_vol_tlv(struct snd_kcontrol *kcontrol, int op_flag, unsigned int size, unsigned int __user *_tlv); +#ifdef CONFIG_PM int snd_usb_mixer_suspend(struct usb_mixer_interface *mixer); int snd_usb_mixer_resume(struct usb_mixer_interface *mixer); +#endif int snd_usb_set_cur_mix_value(struct usb_mixer_elem_info *cval, int channel, int index, int value); diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 96991ddf50..55eea90ee9 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -6,9 +6,8 @@ */ struct usbmix_dB_map { - int min; - int max; - bool min_mute; + u32 min; + u32 max; }; struct usbmix_name_map { @@ -337,13 +336,6 @@ static const struct usbmix_name_map bose_companion5_map[] = { { 0 } /* terminator */ }; -/* Bose Revolve+ SoundLink, correction of dB maps */ -static const struct usbmix_dB_map bose_soundlink_dB = {-8283, -0, true}; -static const struct usbmix_name_map bose_soundlink_map[] = { - { 2, NULL, .dB = &bose_soundlink_dB }, - { 0 } /* terminator */ -}; - /* Sennheiser Communications Headset [PC 8], the dB value is reported as -6 negative maximum */ static const struct usbmix_dB_map sennheiser_pc8_dB = {-9500, 0}; static const struct usbmix_name_map sennheiser_pc8_map[] = { @@ -431,14 +423,6 @@ static const struct usbmix_name_map aorus_master_alc1220vb_map[] = { {} }; -/* MSI MPG X570S Carbon Max Wifi with ALC4080 */ -static const struct usbmix_name_map msi_mpg_x570s_carbon_max_wifi_alc4080_map[] = { - { 29, "Speaker Playback" }, - { 30, "Front Headphone Playback" }, - { 32, "IEC958 Playback" }, - {} -}; - /* * Control map entries */ @@ -537,11 +521,6 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x05a7, 0x1020), .map = bose_companion5_map, }, - { - /* Bose Revolve+ SoundLink */ - .id = USB_ID(0x05a7, 0x40fa), - .map = bose_soundlink_map, - }, { /* Corsair Virtuoso SE (wired mode) */ .id = USB_ID(0x1b1c, 0x0a3d), @@ -585,10 +564,6 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .map = trx40_mobo_map, .connector_map = trx40_mobo_connector_map, }, - { /* MSI MPG X570S Carbon Max Wifi */ - .id = USB_ID(0x0db0, 0x419c), - .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map, - }, { /* MSI TRX40 */ .id = USB_ID(0x0db0, 0x543d), .map = trx40_mobo_map, diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index e447ddd685..d48729e6a3 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3280,6 +3280,7 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) return err; } +#ifdef CONFIG_PM void snd_usb_mixer_resume_quirk(struct usb_mixer_interface *mixer) { switch (mixer->chip->usb_id) { @@ -3288,6 +3289,7 @@ void snd_usb_mixer_resume_quirk(struct usb_mixer_interface *mixer) break; } } +#endif void snd_usb_mixer_rc_memory_change(struct usb_mixer_interface *mixer, int unitid) diff --git a/sound/usb/mixer_quirks.h b/sound/usb/mixer_quirks.h index 4ba01ba3fe..52be26db55 100644 --- a/sound/usb/mixer_quirks.h +++ b/sound/usb/mixer_quirks.h @@ -14,7 +14,9 @@ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, struct usb_mixer_elem_info *cval, int unitid, struct snd_kcontrol *kctl); +#ifdef CONFIG_PM void snd_usb_mixer_resume_quirk(struct usb_mixer_interface *mixer); +#endif #endif /* SND_USB_MIXER_QUIRKS_H */ diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index cec6e91afe..2e51fb031a 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -1086,13 +1086,6 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre if (err < 0) return err; - list_for_each_entry(fp, &subs->fmt_list, list) { - if (fp->implicit_fb) { - runtime->hw.info |= SNDRV_PCM_INFO_JOINT_DUPLEX; - break; - } - } - return 0; } diff --git a/sound/usb/power.h b/sound/usb/power.h index 396e3e5144..6004231a7c 100644 --- a/sound/usb/power.h +++ b/sound/usb/power.h @@ -21,7 +21,17 @@ struct snd_usb_power_domain * snd_usb_find_power_domain(struct usb_host_interface *ctrl_iface, unsigned char id); +#ifdef CONFIG_PM int snd_usb_autoresume(struct snd_usb_audio *chip); void snd_usb_autosuspend(struct snd_usb_audio *chip); +#else +static inline int snd_usb_autoresume(struct snd_usb_audio *chip) +{ + return 0; +} +static inline void snd_usb_autosuspend(struct snd_usb_audio *chip) +{ +} +#endif #endif /* __USBAUDIO_POWER_H */ diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index cfc1ea5397..c39cc6851e 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -668,15 +668,14 @@ static void i_usx2y_04int(struct urb *urb) static int usx2y_rate_set(struct usx2ydev *usx2y, int rate) { - int err = 0, i; - struct snd_usx2y_urb_seq *us = NULL; - int *usbdata = NULL; - const struct s_c2 *ra = rate == 48000 ? setrate_48000 : setrate_44100; + int err = 0, i; + struct snd_usx2y_urb_seq *us = NULL; + int *usbdata = NULL; + const struct s_c2 *ra = rate == 48000 ? setrate_48000 : setrate_44100; struct urb *urb; if (usx2y->rate != rate) { - us = kzalloc(struct_size(us, urb, NOOF_SETRATE_URBS), - GFP_KERNEL); + us = kzalloc(sizeof(*us) + sizeof(struct urb *) * NOOF_SETRATE_URBS, GFP_KERNEL); if (!us) { err = -ENOMEM; goto cleanup; diff --git a/sound/virtio/virtio_pcm_msg.c b/sound/virtio/virtio_pcm_msg.c index aca2dc1989..f88c8f29cb 100644 --- a/sound/virtio/virtio_pcm_msg.c +++ b/sound/virtio/virtio_pcm_msg.c @@ -20,7 +20,7 @@ struct virtio_pcm_msg { struct virtio_snd_pcm_xfer xfer; struct virtio_snd_pcm_status status; size_t length; - struct scatterlist sgs[]; + struct scatterlist sgs[0]; }; /** @@ -146,7 +146,8 @@ int virtsnd_pcm_msg_alloc(struct virtio_pcm_substream *vss, int sg_num = virtsnd_pcm_sg_num(data, period_bytes); struct virtio_pcm_msg *msg; - msg = kzalloc(struct_size(msg, sgs, sg_num + 2), GFP_KERNEL); + msg = kzalloc(sizeof(*msg) + sizeof(*msg->sgs) * (sg_num + 2), + GFP_KERNEL); if (!msg) return -ENOMEM; diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 1c94eaff19..7aa9472749 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1261,7 +1261,7 @@ static int had_pcm_mmap(struct snd_pcm_substream *substream, { vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); return remap_pfn_range(vma, vma->vm_start, - substream->dma_buffer.addr >> PAGE_SHIFT, + substream->runtime->dma_addr >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); } @@ -1750,9 +1750,7 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) card_ctx->irq = irq; /* only 32bit addressable */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (ret) - return ret; + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); init_channel_allocations(); diff --git a/sound/xen/xen_snd_front.c b/sound/xen/xen_snd_front.c index 4041748c12..2cb0a19be2 100644 --- a/sound/xen/xen_snd_front.c +++ b/sound/xen/xen_snd_front.c @@ -358,7 +358,6 @@ static struct xenbus_driver xen_driver = { .probe = xen_drv_probe, .remove = xen_drv_remove, .otherend_changed = sndback_changed, - .not_essential = true, }; static int __init xen_drv_init(void) diff --git a/tools/Makefile b/tools/Makefile index db2f7b8ebe..7e9d34ddd7 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -12,7 +12,6 @@ help: @echo ' acpi - ACPI tools' @echo ' bpf - misc BPF tools' @echo ' cgroup - cgroup tools' - @echo ' counter - counter tools' @echo ' cpupower - a tool for all things x86 CPU power' @echo ' debugging - tools for debugging' @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer' @@ -24,6 +23,7 @@ help: @echo ' intel-speed-select - Intel Speed Select tool' @echo ' kvm_stat - top-like utility for displaying kvm statistics' @echo ' leds - LEDs tools' + @echo ' liblockdep - user-space wrapper for kernel locking-validator' @echo ' objtool - an ELF object analysis tool' @echo ' pci - PCI tools' @echo ' perf - Linux performance measurement and analysis tool' @@ -65,12 +65,15 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup counter firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE +cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE $(call descend,$@) bpf/%: FORCE $(call descend,$@) +liblockdep: FORCE + $(call descend,lib/lockdep) + libapi: FORCE $(call descend,lib/api) @@ -97,7 +100,7 @@ freefall: FORCE kvm_stat: FORCE $(call descend,kvm/$@) -all: acpi cgroup counter cpupower gpio hv firewire \ +all: acpi cgroup cpupower gpio hv firewire liblockdep \ perf selftests bootconfig spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ @@ -109,9 +112,12 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: +cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: $(call descend,$(@:_install=),install) +liblockdep_install: + $(call descend,lib/lockdep,install) + selftests_install: $(call descend,testing/$(@:_install=),install) @@ -127,8 +133,8 @@ freefall_install: kvm_stat_install: $(call descend,kvm/$(@:_install=),install) -install: acpi_install cgroup_install counter_install cpupower_install gpio_install \ - hv_install firewire_install iio_install \ +install: acpi_install cgroup_install cpupower_install gpio_install \ + hv_install firewire_install iio_install liblockdep_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install \ @@ -141,9 +147,12 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: +cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: $(call descend,$(@:_clean=),clean) +liblockdep_clean: + $(call descend,lib/lockdep,clean) + libapi_clean: $(call descend,lib/api,clean) @@ -172,10 +181,10 @@ freefall_clean: build_clean: $(call descend,build,clean) -clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \ +clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ - freefall_clean build_clean libbpf_clean libsubcmd_clean \ + freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \ intel-speed-select_clean tracing_clean diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 11e8673945..5ef1c15e88 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -205,8 +205,6 @@ static void print_delayacct(struct taskstats *t) "RECLAIM %12s%15s%15s\n" " %15llu%15llu%15llums\n" "THRASHING%12s%15s%15s\n" - " %15llu%15llu%15llums\n" - "COMPACT %12s%15s%15s\n" " %15llu%15llu%15llums\n", "count", "real total", "virtual total", "delay total", "delay average", @@ -230,11 +228,7 @@ static void print_delayacct(struct taskstats *t) "count", "delay total", "delay average", (unsigned long long)t->thrashing_count, (unsigned long long)t->thrashing_delay_total, - average_ms(t->thrashing_delay_total, t->thrashing_count), - "count", "delay total", "delay average", - (unsigned long long)t->compact_count, - (unsigned long long)t->compact_delay_total, - average_ms(t->compact_delay_total, t->compact_count)); + average_ms(t->thrashing_delay_total, t->thrashing_count)); } static void task_context_switch_counts(struct taskstats *t) diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h index 749a2e3af8..578b3ee861 100644 --- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h @@ -61,35 +61,27 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_PMC4, PERF_REG_POWERPC_PMC5, PERF_REG_POWERPC_PMC6, - PERF_REG_POWERPC_SDAR, - PERF_REG_POWERPC_SIAR, - /* Max mask value for interrupt regs w/o extended regs */ + /* Max regs without the extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, - /* Max mask value for interrupt regs including extended regs */ - PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_SIAR + 1, }; #define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) +/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */ +#define PERF_EXCLUDE_REG_EXT_300 (7ULL << PERF_REG_POWERPC_MMCR3) + /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 - * includes 11 SPRS from MMCR0 to SIAR excluding the - * unsupported SPRS MMCR3, SIER2 and SIER3. + * includes 9 SPRS from MMCR0 to PMC6 excluding the + * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300. */ -#define PERF_REG_PMU_MASK_300 \ - ((1ULL << PERF_REG_POWERPC_MMCR0) | (1ULL << PERF_REG_POWERPC_MMCR1) | \ - (1ULL << PERF_REG_POWERPC_MMCR2) | (1ULL << PERF_REG_POWERPC_PMC1) | \ - (1ULL << PERF_REG_POWERPC_PMC2) | (1ULL << PERF_REG_POWERPC_PMC3) | \ - (1ULL << PERF_REG_POWERPC_PMC4) | (1ULL << PERF_REG_POWERPC_PMC5) | \ - (1ULL << PERF_REG_POWERPC_PMC6) | (1ULL << PERF_REG_POWERPC_SDAR) | \ - (1ULL << PERF_REG_POWERPC_SIAR)) +#define PERF_REG_PMU_MASK_300 ((0xfffULL << PERF_REG_POWERPC_MMCR0) - PERF_EXCLUDE_REG_EXT_300) /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 - * includes 14 SPRs from MMCR0 to SIAR. + * includes 12 SPRs from MMCR0 to PMC6. */ -#define PERF_REG_PMU_MASK_31 \ - (PERF_REG_PMU_MASK_300 | (1ULL << PERF_REG_POWERPC_MMCR3) | \ - (1ULL << PERF_REG_POWERPC_SIER2) | (1ULL << PERF_REG_POWERPC_SIER3)) +#define PERF_REG_PMU_MASK_31 (0xfffULL << PERF_REG_POWERPC_MMCR0) +#define PERF_REG_EXTENDED_MAX (PERF_REG_POWERPC_PMC6 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 6db4e2932b..63d30dde20 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCEs for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -277,7 +277,6 @@ #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -299,9 +298,6 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ -#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ @@ -317,7 +313,6 @@ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ -#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index a4a39c3e0f..a7c413432b 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -476,7 +476,6 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ -#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 @@ -487,23 +486,6 @@ #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f -/* AMD Collaborative Processor Performance Control MSRs */ -#define MSR_AMD_CPPC_CAP1 0xc00102b0 -#define MSR_AMD_CPPC_ENABLE 0xc00102b1 -#define MSR_AMD_CPPC_CAP2 0xc00102b2 -#define MSR_AMD_CPPC_REQ 0xc00102b3 -#define MSR_AMD_CPPC_STATUS 0xc00102b4 - -#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) -#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) -#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) -#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) - -#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) -#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) -#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) -#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) - /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 @@ -643,8 +625,6 @@ #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc -#define MSR_IA32_XFD 0x000001c4 -#define MSR_IA32_XFD_ERR 0x000001c5 #define MSR_IA32_XSS 0x00000da0 #define MSR_IA32_APICBASE 0x0000001b diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index aff774775c..b2d504f119 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -35,7 +35,11 @@ # define NEED_CMOV 0 #endif +#ifdef CONFIG_X86_USE_3DNOW +# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) +#else # define NEED_3DNOW 0 +#endif #if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) # define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index bf6e96011d..2ef1f6513c 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -373,23 +373,9 @@ struct kvm_debugregs { __u64 reserved[9]; }; -/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */ +/* for KVM_CAP_XSAVE */ struct kvm_xsave { - /* - * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes - * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) - * respectively, when invoked on the vm file descriptor. - * - * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) - * will always be at least 4096. Currently, it is only greater - * than 4096 if a dynamic feature has been enabled with - * ``arch_prctl()``, but this may change in the future. - * - * The offsets of the state save areas in struct kvm_xsave follow - * the contents of CPUID leaf 0xD on the host. - */ __u32 region[1024]; - __u32 extra[0]; }; #define KVM_MAX_XCRS 16 @@ -452,9 +438,6 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 -/* attributes for system fd (group 0) */ -#define KVM_X86_XCOMP_GUEST_SUPP 0 - struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; @@ -521,8 +504,4 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 -/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ -#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ -#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ - #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h index 500b96e71f..5a6aac9fa4 100644 --- a/tools/arch/x86/include/uapi/asm/prctl.h +++ b/tools/arch/x86/include/uapi/asm/prctl.h @@ -2,22 +2,16 @@ #ifndef _ASM_X86_PRCTL_H #define _ASM_X86_PRCTL_H -#define ARCH_SET_GS 0x1001 -#define ARCH_SET_FS 0x1002 -#define ARCH_GET_FS 0x1003 -#define ARCH_GET_GS 0x1004 +#define ARCH_SET_GS 0x1001 +#define ARCH_SET_FS 0x1002 +#define ARCH_GET_FS 0x1003 +#define ARCH_GET_GS 0x1004 -#define ARCH_GET_CPUID 0x1011 -#define ARCH_SET_CPUID 0x1012 +#define ARCH_GET_CPUID 0x1011 +#define ARCH_SET_CPUID 0x1012 -#define ARCH_GET_XCOMP_SUPP 0x1021 -#define ARCH_GET_XCOMP_PERM 0x1022 -#define ARCH_REQ_XCOMP_PERM 0x1023 -#define ARCH_GET_XCOMP_GUEST_PERM 0x1024 -#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 - -#define ARCH_MAP_VDSO_X32 0x2001 -#define ARCH_MAP_VDSO_32 0x2002 -#define ARCH_MAP_VDSO_64 0x2003 +#define ARCH_MAP_VDSO_X32 0x2001 +#define ARCH_MAP_VDSO_32 0x2002 +#define ARCH_MAP_VDSO_64 0x2003 #endif /* _ASM_X86_PRCTL_H */ diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 59cf2343f3..1cc9da6e29 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy) rep movsq movl %edx, %ecx rep movsb - RET + ret SYM_FUNC_END(memcpy) SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(memcpy) @@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb - RET + ret SYM_FUNC_END(memcpy_erms) SYM_FUNC_START_LOCAL(memcpy_orig) @@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq %r9, 1*8(%rdi) movq %r10, -2*8(%rdi, %rdx) movq %r11, -1*8(%rdi, %rdx) - RET + retq .p2align 4 .Lless_16bytes: cmpl $8, %edx @@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq -1*8(%rsi, %rdx), %r9 movq %r8, 0*8(%rdi) movq %r9, -1*8(%rdi, %rdx) - RET + retq .p2align 4 .Lless_8bytes: cmpl $4, %edx @@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movl -4(%rsi, %rdx), %r8d movl %ecx, (%rdi) movl %r8d, -4(%rdi, %rdx) - RET + retq .p2align 4 .Lless_3bytes: subl $1, %edx @@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movb %cl, (%rdi) .Lend: - RET + retq SYM_FUNC_END(memcpy_orig) .popsection diff --git a/tools/bootconfig/Makefile b/tools/bootconfig/Makefile index 566c3e0ee5..da59757753 100644 --- a/tools/bootconfig/Makefile +++ b/tools/bootconfig/Makefile @@ -15,9 +15,9 @@ CFLAGS = -Wall -g -I$(CURDIR)/include ALL_TARGETS := bootconfig ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) -all: $(ALL_PROGRAMS) test +all: $(ALL_PROGRAMS) -$(OUTPUT)bootconfig: main.c include/linux/bootconfig.h $(LIBSRC) +$(OUTPUT)bootconfig: main.c $(LIBSRC) $(CC) $(filter %.c,$^) $(CFLAGS) -o $@ test: $(ALL_PROGRAMS) test-bootconfig.sh diff --git a/tools/bootconfig/include/linux/bootconfig.h b/tools/bootconfig/include/linux/bootconfig.h index 6784296a06..de7f30f99a 100644 --- a/tools/bootconfig/include/linux/bootconfig.h +++ b/tools/bootconfig/include/linux/bootconfig.h @@ -2,53 +2,10 @@ #ifndef _BOOTCONFIG_LINUX_BOOTCONFIG_H #define _BOOTCONFIG_LINUX_BOOTCONFIG_H -#include -#include -#include -#include -#include -#include -#include - +#include "../../../../include/linux/bootconfig.h" #ifndef fallthrough # define fallthrough #endif -#define WARN_ON(cond) \ - ((cond) ? printf("Internal warning(%s:%d, %s): %s\n", \ - __FILE__, __LINE__, __func__, #cond) : 0) - -#define unlikely(cond) (cond) - -/* Copied from lib/string.c */ -static inline char *skip_spaces(const char *str) -{ - while (isspace(*str)) - ++str; - return (char *)str; -} - -static inline char *strim(char *s) -{ - size_t size; - char *end; - - size = strlen(s); - if (!size) - return s; - - end = s + size - 1; - while (end >= s && isspace(*end)) - end--; - *(end + 1) = '\0'; - - return skip_spaces(s); -} - -#define __init -#define __initdata - -#include "../../../../include/linux/bootconfig.h" - #endif diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 156b62a163..fd67496a94 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -12,10 +12,9 @@ #include #include +#include #include -#define pr_err(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) - static int xbc_show_value(struct xbc_node *node, bool semicolon) { const char *val, *eol; @@ -177,7 +176,7 @@ static int load_xbc_from_initrd(int fd, char **buf) { struct stat stat; int ret; - uint32_t size = 0, csum = 0, rcsum; + u32 size = 0, csum = 0, rcsum; char magic[BOOTCONFIG_MAGIC_LEN]; const char *msg; @@ -201,11 +200,11 @@ static int load_xbc_from_initrd(int fd, char **buf) if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) return pr_errno("Failed to lseek for size", -errno); - if (read(fd, &size, sizeof(uint32_t)) < 0) + if (read(fd, &size, sizeof(u32)) < 0) return pr_errno("Failed to read size", -errno); size = le32toh(size); - if (read(fd, &csum, sizeof(uint32_t)) < 0) + if (read(fd, &csum, sizeof(u32)) < 0) return pr_errno("Failed to read checksum", -errno); csum = le32toh(csum); @@ -230,7 +229,7 @@ static int load_xbc_from_initrd(int fd, char **buf) return -EINVAL; } - ret = xbc_init(*buf, size, &msg, NULL); + ret = xbc_init(*buf, &msg, NULL); /* Wrong data */ if (ret < 0) { pr_err("parse error: %s.\n", msg); @@ -270,7 +269,7 @@ static int init_xbc_with_error(char *buf, int len) if (!copy) return -ENOMEM; - ret = xbc_init(buf, len, &msg, &pos); + ret = xbc_init(buf, &msg, &pos); if (ret < 0) show_xbc_error(copy, msg, pos); free(copy); @@ -363,7 +362,7 @@ static int apply_xbc(const char *path, const char *xbc_path) size_t total_size; struct stat stat; const char *msg; - uint32_t size, csum; + u32 size, csum; int pos, pad; int ret, fd; @@ -377,13 +376,13 @@ static int apply_xbc(const char *path, const char *xbc_path) /* Backup the bootconfig data */ data = calloc(size + BOOTCONFIG_ALIGN + - sizeof(uint32_t) + sizeof(uint32_t) + BOOTCONFIG_MAGIC_LEN, 1); + sizeof(u32) + sizeof(u32) + BOOTCONFIG_MAGIC_LEN, 1); if (!data) return -ENOMEM; memcpy(data, buf, size); /* Check the data format */ - ret = xbc_init(buf, size, &msg, &pos); + ret = xbc_init(buf, &msg, &pos); if (ret < 0) { show_xbc_error(data, msg, pos); free(data); @@ -392,13 +391,12 @@ static int apply_xbc(const char *path, const char *xbc_path) return ret; } printf("Apply %s to %s\n", xbc_path, path); - xbc_get_info(&ret, NULL); printf("\tNumber of nodes: %d\n", ret); printf("\tSize: %u bytes\n", (unsigned int)size); printf("\tChecksum: %d\n", (unsigned int)csum); /* TODO: Check the options by schema */ - xbc_exit(); + xbc_destroy_all(); free(buf); /* Remove old boot config if exists */ @@ -425,17 +423,17 @@ static int apply_xbc(const char *path, const char *xbc_path) } /* To align up the total size to BOOTCONFIG_ALIGN, get padding size */ - total_size = stat.st_size + size + sizeof(uint32_t) * 2 + BOOTCONFIG_MAGIC_LEN; + total_size = stat.st_size + size + sizeof(u32) * 2 + BOOTCONFIG_MAGIC_LEN; pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size; size += pad; /* Add a footer */ p = data + size; - *(uint32_t *)p = htole32(size); - p += sizeof(uint32_t); + *(u32 *)p = htole32(size); + p += sizeof(u32); - *(uint32_t *)p = htole32(csum); - p += sizeof(uint32_t); + *(u32 *)p = htole32(csum); + p += sizeof(u32); memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); p += BOOTCONFIG_MAGIC_LEN; diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index a736f64dc5..05ce4446b7 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# SPDX-License-Identifier: GPL-2.0-only *.d /bootstrap/ /bpftool diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index ac8487dcff..f89929c703 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# SPDX-License-Identifier: GPL-2.0-only include ../../../scripts/Makefile.include INSTALL ?= install @@ -24,7 +24,7 @@ man: man8 man8: $(DOC_MAN8) RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) -RST2MAN_OPTS += --verbose --strip-comments +RST2MAN_OPTS += --verbose list_pages = $(sort $(basename $(filter-out $(1),$(MAN8_RST)))) see_also = $(subst " ",, \ diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 342716f74e..4425d942dd 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - ================ bpftool-btf ================ @@ -9,14 +7,13 @@ tool for inspection of BTF data :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== **bpftool** [*OPTIONS*] **btf** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | { **-B** | **--base-btf** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } | + { **-B** | **--base-btf** } } *COMMANDS* := { **dump** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index a17e9aa314..13a217a250 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - ================ bpftool-cgroup ================ @@ -9,14 +7,13 @@ tool for inspection and simple manipulation of eBPF progs :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== **bpftool** [*OPTIONS*] **cgroup** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-f** | **--bpffs** } } *COMMANDS* := { **show** | **list** | **tree** | **attach** | **detach** | **help** } @@ -33,9 +30,9 @@ CGROUP COMMANDS | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } | *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | | **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | -| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | -| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | -| **sock_release** } +| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | +| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | +| **sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION @@ -101,9 +98,9 @@ DESCRIPTION **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp6 socket (since 4.18); **recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for - an unconnected udp4 socket (since 5.2); + an unconnected udp4 socket (since 5.2); **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for - an unconnected udp6 socket (since 5.2); + an unconnected udp6 socket (since 5.2); **sysctl** sysctl access (since 5.2); **getsockopt** call to getsockopt (since 5.3); **setsockopt** call to setsockopt (since 5.3); diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index 4ce9a77bc1..ab9f57ee4c 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - =============== bpftool-feature =============== @@ -9,14 +7,12 @@ tool for inspection of eBPF-related parameters for Linux kernel or net device :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== **bpftool** [*OPTIONS*] **feature** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **probe** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index bc276388f4..2a137f8a4c 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - ================ bpftool-gen ================ @@ -9,14 +7,13 @@ tool for BPF code-generation :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== **bpftool** [*OPTIONS*] **gen** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-L** | **--use-loader** } } *COMMAND* := { **object** | **skeleton** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst index 84839d4886..471f363a72 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - ============ bpftool-iter ============ @@ -9,14 +7,12 @@ tool to create BPF iterators :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== **bpftool** [*OPTIONS*] **iter** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **pin** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index 52a4eee4af..9434349636 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - ================ bpftool-link ================ @@ -9,14 +7,13 @@ tool for inspection and simple manipulation of eBPF links :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== **bpftool** [*OPTIONS*] **link** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-f** | **--bpffs** } | { **-n** | **--nomount** } } *COMMANDS* := { **show** | **list** | **pin** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 7c188a5984..1445cadc15 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - ================ bpftool-map ================ @@ -9,14 +7,13 @@ tool for inspection and simple manipulation of eBPF maps :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== **bpftool** [*OPTIONS*] **map** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-f** | **--bpffs** } | { **-n** | **--nomount** } } *COMMANDS* := { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | @@ -55,7 +52,7 @@ MAP COMMANDS | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** -| | **task_storage** | **bloom_filter** } + | **task_storage** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index f4e0a51633..1ae0375e8f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - ================ bpftool-net ================ @@ -9,14 +7,12 @@ tool for inspection of netdev/tc related bpf prog attachments :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== **bpftool** [*OPTIONS*] **net** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } @@ -35,44 +31,44 @@ NET COMMANDS DESCRIPTION =========== **bpftool net { show | list }** [ **dev** *NAME* ] - List bpf program attachments in the kernel networking subsystem. + List bpf program attachments in the kernel networking subsystem. - Currently, only device driver xdp attachments and tc filter - classification/action attachments are implemented, i.e., for - program types **BPF_PROG_TYPE_SCHED_CLS**, - **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. - For programs attached to a particular cgroup, e.g., - **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, - **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, - users can use **bpftool cgroup** to dump cgroup attachments. - For sk_{filter, skb, msg, reuseport} and lwt/seg6 - bpf programs, users should consult other tools, e.g., iproute2. + Currently, only device driver xdp attachments and tc filter + classification/action attachments are implemented, i.e., for + program types **BPF_PROG_TYPE_SCHED_CLS**, + **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. + For programs attached to a particular cgroup, e.g., + **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, + **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + users can use **bpftool cgroup** to dump cgroup attachments. + For sk_{filter, skb, msg, reuseport} and lwt/seg6 + bpf programs, users should consult other tools, e.g., iproute2. - The current output will start with all xdp program attachments, followed by - all tc class/qdisc bpf program attachments. Both xdp programs and - tc programs are ordered based on ifindex number. If multiple bpf - programs attached to the same networking device through **tc filter**, - the order will be first all bpf programs attached to tc classes, then - all bpf programs attached to non clsact qdiscs, and finally all - bpf programs attached to root and clsact qdisc. + The current output will start with all xdp program attachments, followed by + all tc class/qdisc bpf program attachments. Both xdp programs and + tc programs are ordered based on ifindex number. If multiple bpf + programs attached to the same networking device through **tc filter**, + the order will be first all bpf programs attached to tc classes, then + all bpf programs attached to non clsact qdiscs, and finally all + bpf programs attached to root and clsact qdisc. **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ] - Attach bpf program *PROG* to network interface *NAME* with - type specified by *ATTACH_TYPE*. Previously attached bpf program - can be replaced by the command used with **overwrite** option. - Currently, only XDP-related modes are supported for *ATTACH_TYPE*. + Attach bpf program *PROG* to network interface *NAME* with + type specified by *ATTACH_TYPE*. Previously attached bpf program + can be replaced by the command used with **overwrite** option. + Currently, only XDP-related modes are supported for *ATTACH_TYPE*. - *ATTACH_TYPE* can be of: - **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it; - **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb; - **xdpdrv** - Native XDP. runs earliest point in driver's receive path; - **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception; + *ATTACH_TYPE* can be of: + **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it; + **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb; + **xdpdrv** - Native XDP. runs earliest point in driver's receive path; + **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception; **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME* - Detach bpf program attached to network interface *NAME* with - type specified by *ATTACH_TYPE*. To detach bpf program, same - *ATTACH_TYPE* previously used for attach must be specified. - Currently, only XDP-related modes are supported for *ATTACH_TYPE*. + Detach bpf program attached to network interface *NAME* with + type specified by *ATTACH_TYPE*. To detach bpf program, same + *ATTACH_TYPE* previously used for attach must be specified. + Currently, only XDP-related modes are supported for *ATTACH_TYPE*. **bpftool net help** Print short help message. diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index 5fea633a82..ce52798a91 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - ================ bpftool-perf ================ @@ -9,14 +7,12 @@ tool for inspection of perf related bpf prog attachments :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== **bpftool** [*OPTIONS*] **perf** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **show** | **list** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index a2e9359e55..f27265bd58 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - ================ bpftool-prog ================ @@ -9,14 +7,12 @@ tool for inspection and simple manipulation of eBPF progs :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== **bpftool** [*OPTIONS*] **prog** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | { **-L** | **--use-loader** } } diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst index ee53a122c0..02afc0fc14 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - ================== bpftool-struct_ops ================== @@ -9,14 +7,12 @@ tool to register/unregister/introspect BPF struct_ops :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== **bpftool** [*OPTIONS*] **struct_ops** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **show** | **list** | **dump** | **register** | **unregister** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 7084dd9fa2..8ac86565c5 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - ================ BPFTOOL ================ @@ -9,8 +7,6 @@ tool for inspection and simple manipulation of eBPF programs and maps :Manual section: 8 -.. include:: substitutions.rst - SYNOPSIS ======== @@ -22,7 +18,8 @@ SYNOPSIS *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** } - *OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| } + *OPTIONS* := { { **-V** | **--version** } | + { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *MAP-COMMANDS* := { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst index 908487b9c2..05d06c74dc 100644 --- a/tools/bpf/bpftool/Documentation/common_options.rst +++ b/tools/bpf/bpftool/Documentation/common_options.rst @@ -1,5 +1,3 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - -h, --help Print short help message (similar to **bpftool help**). @@ -22,12 +20,3 @@ Print all logs available, even debug-level information. This includes logs from libbpf as well as from the verifier, when attempting to load programs. - --l, --legacy - Use legacy libbpf mode which has more relaxed BPF program - requirements. By default, bpftool has more strict requirements - about section names, changes pinning logic and doesn't support - some of the older non-BTF map declarations. - - See https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0 - for details. diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 83369f55df..cce52df3be 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# SPDX-License-Identifier: GPL-2.0-only include ../../scripts/Makefile.include ifeq ($(srctree),) @@ -13,55 +13,35 @@ else Q = @ endif -BPF_DIR = $(srctree)/tools/lib/bpf +BPF_DIR = $(srctree)/tools/lib/bpf/ ifneq ($(OUTPUT),) - _OUTPUT := $(OUTPUT) + LIBBPF_OUTPUT = $(OUTPUT)/libbpf/ + LIBBPF_PATH = $(LIBBPF_OUTPUT) + BOOTSTRAP_OUTPUT = $(OUTPUT)/bootstrap/ else - _OUTPUT := $(CURDIR) + LIBBPF_OUTPUT = + LIBBPF_PATH = $(BPF_DIR) + BOOTSTRAP_OUTPUT = $(CURDIR)/bootstrap/ endif -BOOTSTRAP_OUTPUT := $(_OUTPUT)/bootstrap/ -LIBBPF_OUTPUT := $(_OUTPUT)/libbpf/ -LIBBPF_DESTDIR := $(LIBBPF_OUTPUT) -LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include -LIBBPF_HDRS_DIR := $(LIBBPF_INCLUDE)/bpf -LIBBPF := $(LIBBPF_OUTPUT)libbpf.a - -LIBBPF_BOOTSTRAP_OUTPUT := $(BOOTSTRAP_OUTPUT)libbpf/ -LIBBPF_BOOTSTRAP_DESTDIR := $(LIBBPF_BOOTSTRAP_OUTPUT) -LIBBPF_BOOTSTRAP_INCLUDE := $(LIBBPF_BOOTSTRAP_DESTDIR)/include -LIBBPF_BOOTSTRAP_HDRS_DIR := $(LIBBPF_BOOTSTRAP_INCLUDE)/bpf -LIBBPF_BOOTSTRAP := $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a - -# We need to copy hashmap.h and nlattr.h which is not otherwise exported by -# libbpf, but still required by bpftool. -LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h) -LIBBPF_BOOTSTRAP_INTERNAL_HDRS := $(addprefix $(LIBBPF_BOOTSTRAP_HDRS_DIR)/,hashmap.h) +LIBBPF = $(LIBBPF_PATH)libbpf.a +LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/ +LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a ifeq ($(BPFTOOL_VERSION),) BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) endif -$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT) $(LIBBPF_HDRS_DIR) $(LIBBPF_BOOTSTRAP_HDRS_DIR): +$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT): $(QUIET_MKDIR)mkdir -p $@ -$(LIBBPF): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_OUTPUT) - $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) \ - DESTDIR=$(LIBBPF_DESTDIR) prefix= $(LIBBPF) install_headers +$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a -$(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_DIR) - $(call QUIET_INSTALL, $@) - $(Q)install -m 644 -t $(LIBBPF_HDRS_DIR) $< - -$(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) +$(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ - DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR) prefix= \ - ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers - -$(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR) - $(call QUIET_INSTALL, $@) - $(Q)install -m 644 -t $(LIBBPF_BOOTSTRAP_HDRS_DIR) $< + ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ $(LIBBPF)-clean: FORCE | $(LIBBPF_OUTPUT) $(call QUIET_CLEAN, libbpf) @@ -79,10 +59,11 @@ CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS)) CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(if $(OUTPUT),$(OUTPUT),.) \ - -I$(LIBBPF_INCLUDE) \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ - -I$(srctree)/tools/include/uapi + -I$(srctree)/tools/include/uapi \ + -I$(srctree)/tools/lib \ + -I$(srctree)/tools/perf CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' ifneq ($(EXTRA_CFLAGS),) CFLAGS += $(EXTRA_CFLAGS) @@ -152,16 +133,10 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT SRCS += $(BFD_SRCS) endif -HOST_CFLAGS = $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ - $(subst $(CLANG_CROSS_FLAGS),,$(CFLAGS))) - BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o) -$(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP) - OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o -$(OBJS): $(LIBBPF) $(LIBBPF_INTERNAL_HDRS) VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -184,13 +159,13 @@ else $(Q)cp "$(VMLINUX_H)" $@ endif -$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP) +$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) $(QUIET_CLANG)$(CLANG) \ -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ - -I$(LIBBPF_BOOTSTRAP_INCLUDE) \ - -g -O2 -Wall -target bpf -c $< -o $@ - $(Q)$(LLVM_STRIP) -g $@ + -I$(LIBBPF_PATH) \ + -I$(srctree)/tools/lib \ + -g -O2 -Wall -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@ @@ -205,27 +180,25 @@ endif CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS) $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c - $(QUIET_CC)$(HOSTCC) $(HOST_CFLAGS) -c -MMD $< -o $@ + $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ + $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< -$(OUTPUT)feature.o: -ifneq ($(feature-zlib), 1) - $(error "No zlib found") -endif +$(OUTPUT)feature.o: | zdep $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) - $(QUIET_LINK)$(HOSTCC) $(HOST_CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@ + $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \ + $(LIBS_BOOTSTRAP) $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) -$(BOOTSTRAP_OUTPUT)%.o: %.c $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS) | $(BOOTSTRAP_OUTPUT) - $(QUIET_CC)$(HOSTCC) $(HOST_CFLAGS) -c -MMD $< -o $@ +$(BOOTSTRAP_OUTPUT)%.o: %.c | $(BOOTSTRAP_OUTPUT) + $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< $(OUTPUT)%.o: %.c - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ + $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< feature-detect-clean: $(call QUIET_CLEAN, feature-detect) @@ -240,12 +213,10 @@ clean: $(LIBBPF)-clean $(LIBBPF_BOOTSTRAP)-clean feature-detect-clean $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool $(Q)$(RM) -r -- $(OUTPUT)feature/ -install-bin: $(OUTPUT)bpftool +install: $(OUTPUT)bpftool $(call QUIET_INSTALL, bpftool) $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/sbin $(Q)$(INSTALL) $(OUTPUT)bpftool $(DESTDIR)$(prefix)/sbin/bpftool - -install: install-bin $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir) $(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir) @@ -268,7 +239,10 @@ doc-uninstall: FORCE: +zdep: + @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi + .SECONDARY: -.PHONY: all FORCE bootstrap clean install-bin install uninstall +.PHONY: all FORCE clean install uninstall zdep .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 493753a496..88e2bcf16c 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -261,7 +261,7 @@ _bpftool() # Deal with options if [[ ${words[cword]} == -* ]]; then local c='--version --json --pretty --bpffs --mapcompat --debug \ - --use-loader --base-btf --legacy' + --use-loader --base-btf' COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) return 0 fi @@ -710,8 +710,7 @@ _bpftool() hash_of_maps devmap devmap_hash sockmap cpumap \ xskmap sockhash cgroup_storage reuseport_sockarray \ percpu_cgroup_storage queue stack sk_storage \ - struct_ops ringbuf inode_storage task_storage \ - bloom_filter' + struct_ops inode_storage task_storage ringbuf' COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) ) return 0 ;; diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 59833125ac..f7e5ff3586 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -8,14 +8,13 @@ #include #include #include -#include -#include -#include - #include #include -#include #include +#include +#include +#include +#include #include "json_writer.h" #include "main.h" @@ -38,13 +37,16 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", - [BTF_KIND_DECL_TAG] = "DECL_TAG", - [BTF_KIND_TYPE_TAG] = "TYPE_TAG", +}; + +struct btf_attach_table { + DECLARE_HASHTABLE(table, 16); }; struct btf_attach_point { __u32 obj_id; __u32 btf_id; + struct hlist_node hash; }; static const char *btf_int_enc_str(__u8 encoding) @@ -143,7 +145,6 @@ static int dump_btf_type(const struct btf *btf, __u32 id, case BTF_KIND_VOLATILE: case BTF_KIND_RESTRICT: case BTF_KIND_TYPEDEF: - case BTF_KIND_TYPE_TAG: if (json_output) jsonw_uint_field(w, "type_id", t->type); else @@ -327,7 +328,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, printf("\n\ttype_id=%u offset=%u size=%u", v->type, v->offset, v->size); - if (v->type < btf__type_cnt(btf)) { + if (v->type <= btf__get_nr_types(btf)) { vt = btf__type_by_id(btf, v->type); printf(" (%s '%s')", btf_kind_str[btf_kind_safe(btf_kind(vt))], @@ -346,17 +347,6 @@ static int dump_btf_type(const struct btf *btf, __u32 id, printf(" size=%u", t->size); break; } - case BTF_KIND_DECL_TAG: { - const struct btf_decl_tag *tag = (const void *)(t + 1); - - if (json_output) { - jsonw_uint_field(w, "type_id", t->type); - jsonw_int_field(w, "component_idx", tag->component_idx); - } else { - printf(" type_id=%u component_idx=%d", t->type, tag->component_idx); - } - break; - } default: break; } @@ -388,14 +378,14 @@ static int dump_btf_raw(const struct btf *btf, } } else { const struct btf *base; - int cnt = btf__type_cnt(btf); + int cnt = btf__get_nr_types(btf); int start_id = 1; base = btf__base_btf(btf); if (base) - start_id = btf__type_cnt(base); + start_id = btf__get_nr_types(base) + 1; - for (i = start_id; i < cnt; i++) { + for (i = start_id; i <= cnt; i++) { t = btf__type_by_id(btf, i); dump_btf_type(btf, i, t); } @@ -420,10 +410,9 @@ static int dump_btf_c(const struct btf *btf, struct btf_dump *d; int err = 0, i; - d = btf_dump__new(btf, btf_dump_printf, NULL, NULL); - err = libbpf_get_error(d); - if (err) - return err; + d = btf_dump__new(btf, NULL, NULL, btf_dump_printf); + if (IS_ERR(d)) + return PTR_ERR(d); printf("#ifndef __VMLINUX_H__\n"); printf("#define __VMLINUX_H__\n"); @@ -439,9 +428,9 @@ static int dump_btf_c(const struct btf *btf, goto done; } } else { - int cnt = btf__type_cnt(btf); + int cnt = btf__get_nr_types(btf); - for (i = 1; i < cnt; i++) { + for (i = 1; i <= cnt; i++) { err = btf_dump__dump_type(d, i); if (err) goto done; @@ -550,8 +539,8 @@ static int do_dump(int argc, char **argv) } btf = btf__parse_split(*argv, base ?: base_btf); - err = libbpf_get_error(btf); - if (err) { + if (IS_ERR(btf)) { + err = -PTR_ERR(btf); btf = NULL; p_err("failed to load BTF from %s: %s", *argv, strerror(err)); @@ -644,8 +633,21 @@ static int btf_parse_fd(int *argc, char ***argv) return fd; } +static void delete_btf_table(struct btf_attach_table *tab) +{ + struct btf_attach_point *obj; + struct hlist_node *tmp; + + unsigned int bkt; + + hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { + hash_del(&obj->hash); + free(obj); + } +} + static int -build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, +build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, void *info, __u32 *len) { static const char * const names[] = { @@ -653,6 +655,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, [BPF_OBJ_PROG] = "prog", [BPF_OBJ_MAP] = "map", }; + struct btf_attach_point *obj_node; __u32 btf_id, id = 0; int err; int fd; @@ -726,25 +729,28 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, if (!btf_id) continue; - err = hashmap__append(tab, u32_as_hash_field(btf_id), - u32_as_hash_field(id)); - if (err) { - p_err("failed to append entry to hashmap for BTF ID %u, object ID %u: %s", - btf_id, id, strerror(errno)); + obj_node = calloc(1, sizeof(*obj_node)); + if (!obj_node) { + p_err("failed to allocate memory: %s", strerror(errno)); + err = -ENOMEM; goto err_free; } + + obj_node->obj_id = id; + obj_node->btf_id = btf_id; + hash_add(tab->table, &obj_node->hash, obj_node->btf_id); } return 0; err_free: - hashmap__free(tab); + delete_btf_table(tab); return err; } static int -build_btf_tables(struct hashmap *btf_prog_table, - struct hashmap *btf_map_table) +build_btf_tables(struct btf_attach_table *btf_prog_table, + struct btf_attach_table *btf_map_table) { struct bpf_prog_info prog_info; __u32 prog_len = sizeof(prog_info); @@ -760,7 +766,7 @@ build_btf_tables(struct hashmap *btf_prog_table, err = build_btf_type_table(btf_map_table, BPF_OBJ_MAP, &map_info, &map_len); if (err) { - hashmap__free(btf_prog_table); + delete_btf_table(btf_prog_table); return err; } @@ -769,10 +775,10 @@ build_btf_tables(struct hashmap *btf_prog_table, static void show_btf_plain(struct bpf_btf_info *info, int fd, - struct hashmap *btf_prog_table, - struct hashmap *btf_map_table) + struct btf_attach_table *btf_prog_table, + struct btf_attach_table *btf_map_table) { - struct hashmap_entry *entry; + struct btf_attach_point *obj; const char *name = u64_to_ptr(info->name); int n; @@ -786,30 +792,29 @@ show_btf_plain(struct bpf_btf_info *info, int fd, printf("size %uB", info->btf_size); n = 0; - hashmap__for_each_key_entry(btf_prog_table, entry, - u32_as_hash_field(info->id)) { - printf("%s%u", n++ == 0 ? " prog_ids " : ",", - hash_field_as_u32(entry->value)); + hash_for_each_possible(btf_prog_table->table, obj, hash, info->id) { + if (obj->btf_id == info->id) + printf("%s%u", n++ == 0 ? " prog_ids " : ",", + obj->obj_id); } n = 0; - hashmap__for_each_key_entry(btf_map_table, entry, - u32_as_hash_field(info->id)) { - printf("%s%u", n++ == 0 ? " map_ids " : ",", - hash_field_as_u32(entry->value)); + hash_for_each_possible(btf_map_table->table, obj, hash, info->id) { + if (obj->btf_id == info->id) + printf("%s%u", n++ == 0 ? " map_ids " : ",", + obj->obj_id); } - - emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); } static void show_btf_json(struct bpf_btf_info *info, int fd, - struct hashmap *btf_prog_table, - struct hashmap *btf_map_table) + struct btf_attach_table *btf_prog_table, + struct btf_attach_table *btf_map_table) { - struct hashmap_entry *entry; + struct btf_attach_point *obj; const char *name = u64_to_ptr(info->name); jsonw_start_object(json_wtr); /* btf object */ @@ -818,21 +823,23 @@ show_btf_json(struct bpf_btf_info *info, int fd, jsonw_name(json_wtr, "prog_ids"); jsonw_start_array(json_wtr); /* prog_ids */ - hashmap__for_each_key_entry(btf_prog_table, entry, - u32_as_hash_field(info->id)) { - jsonw_uint(json_wtr, hash_field_as_u32(entry->value)); + hash_for_each_possible(btf_prog_table->table, obj, hash, + info->id) { + if (obj->btf_id == info->id) + jsonw_uint(json_wtr, obj->obj_id); } jsonw_end_array(json_wtr); /* prog_ids */ jsonw_name(json_wtr, "map_ids"); jsonw_start_array(json_wtr); /* map_ids */ - hashmap__for_each_key_entry(btf_map_table, entry, - u32_as_hash_field(info->id)) { - jsonw_uint(json_wtr, hash_field_as_u32(entry->value)); + hash_for_each_possible(btf_map_table->table, obj, hash, + info->id) { + if (obj->btf_id == info->id) + jsonw_uint(json_wtr, obj->obj_id); } jsonw_end_array(json_wtr); /* map_ids */ - emit_obj_refs_json(refs_table, info->id, json_wtr); /* pids */ + emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */ jsonw_bool_field(json_wtr, "kernel", info->kernel_btf); @@ -843,8 +850,8 @@ show_btf_json(struct bpf_btf_info *info, int fd, } static int -show_btf(int fd, struct hashmap *btf_prog_table, - struct hashmap *btf_map_table) +show_btf(int fd, struct btf_attach_table *btf_prog_table, + struct btf_attach_table *btf_map_table) { struct bpf_btf_info info; __u32 len = sizeof(info); @@ -881,8 +888,8 @@ show_btf(int fd, struct hashmap *btf_prog_table, static int do_show(int argc, char **argv) { - struct hashmap *btf_prog_table; - struct hashmap *btf_map_table; + struct btf_attach_table btf_prog_table; + struct btf_attach_table btf_map_table; int err, fd = -1; __u32 id = 0; @@ -898,19 +905,9 @@ static int do_show(int argc, char **argv) return BAD_ARG(); } - btf_prog_table = hashmap__new(hash_fn_for_key_as_id, - equal_fn_for_key_as_id, NULL); - btf_map_table = hashmap__new(hash_fn_for_key_as_id, - equal_fn_for_key_as_id, NULL); - if (!btf_prog_table || !btf_map_table) { - hashmap__free(btf_prog_table); - hashmap__free(btf_map_table); - if (fd >= 0) - close(fd); - p_err("failed to create hashmap for object references"); - return -1; - } - err = build_btf_tables(btf_prog_table, btf_map_table); + hash_init(btf_prog_table.table); + hash_init(btf_map_table.table); + err = build_btf_tables(&btf_prog_table, &btf_map_table); if (err) { if (fd >= 0) close(fd); @@ -919,7 +916,7 @@ static int do_show(int argc, char **argv) build_obj_refs_table(&refs_table, BPF_OBJ_BTF); if (fd >= 0) { - err = show_btf(fd, btf_prog_table, btf_map_table); + err = show_btf(fd, &btf_prog_table, &btf_map_table); close(fd); goto exit_free; } @@ -951,7 +948,7 @@ static int do_show(int argc, char **argv) break; } - err = show_btf(fd, btf_prog_table, btf_map_table); + err = show_btf(fd, &btf_prog_table, &btf_map_table); close(fd); if (err) break; @@ -961,9 +958,9 @@ static int do_show(int argc, char **argv) jsonw_end_array(json_wtr); /* root array */ exit_free: - hashmap__free(btf_prog_table); - hashmap__free(btf_map_table); - delete_obj_refs_table(refs_table); + delete_btf_table(&btf_prog_table); + delete_btf_table(&btf_map_table); + delete_obj_refs_table(&refs_table); return err; } diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index f5dddf8ef4..9c25286a5c 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -32,16 +32,14 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, const struct btf_type *func_proto, __u32 prog_id) { + struct bpf_prog_info_linear *prog_info = NULL; const struct btf_type *func_type; - int prog_fd = -1, func_sig_len; - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); const char *prog_name = NULL; + struct bpf_func_info *finfo; struct btf *prog_btf = NULL; - struct bpf_func_info finfo; - __u32 finfo_rec_size; + struct bpf_prog_info *info; + int prog_fd, func_sig_len; char prog_str[1024]; - int err; /* Get the ptr's func_proto */ func_sig_len = btf_dump_func(d->btf, prog_str, func_proto, NULL, 0, @@ -54,30 +52,25 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, /* Get the bpf_prog's name. Obtain from func_info. */ prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (prog_fd < 0) + if (prog_fd == -1) goto print; - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (err) + prog_info = bpf_program__get_prog_info_linear(prog_fd, + 1UL << BPF_PROG_INFO_FUNC_INFO); + close(prog_fd); + if (IS_ERR(prog_info)) { + prog_info = NULL; goto print; + } + info = &prog_info->info; - if (!info.btf_id || !info.nr_func_info) + if (!info->btf_id || !info->nr_func_info) goto print; - - finfo_rec_size = info.func_info_rec_size; - memset(&info, 0, sizeof(info)); - info.nr_func_info = 1; - info.func_info_rec_size = finfo_rec_size; - info.func_info = ptr_to_u64(&finfo); - - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (err) - goto print; - - prog_btf = btf__load_from_kernel_by_id(info.btf_id); + prog_btf = btf__load_from_kernel_by_id(info->btf_id); if (libbpf_get_error(prog_btf)) goto print; - func_type = btf__type_by_id(prog_btf, finfo.type_id); + finfo = u64_to_ptr(info->func_info); + func_type = btf__type_by_id(prog_btf, finfo->type_id); if (!func_type || !btf_is_func(func_type)) goto print; @@ -99,8 +92,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, prog_str[sizeof(prog_str) - 1] = '\0'; jsonw_string(d->jw, prog_str); btf__free(prog_btf); - if (prog_fd >= 0) - close(prog_fd); + free(prog_info); return 0; } diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index fa8eb81343..d42d930a3e 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -22,7 +22,6 @@ #include #include -#include #include /* libbpf_num_possible_cpus */ #include "main.h" @@ -74,7 +73,6 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { [BPF_XDP] = "xdp", [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select", [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate", - [BPF_PERF_EVENT] = "perf_event", }; void p_err(const char *fmt, ...) @@ -395,7 +393,7 @@ void print_hex_data_json(uint8_t *data, size_t len) } /* extra params for nftw cb */ -static struct hashmap *build_fn_table; +static struct pinned_obj_table *build_fn_table; static enum bpf_obj_type build_fn_type; static int do_build_table_cb(const char *fpath, const struct stat *sb, @@ -403,9 +401,9 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, { struct bpf_prog_info pinned_info; __u32 len = sizeof(pinned_info); + struct pinned_obj *obj_node; enum bpf_obj_type objtype; int fd, err = 0; - char *path; if (typeflag != FTW_F) goto out_ret; @@ -422,26 +420,28 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len)) goto out_close; - path = strdup(fpath); - if (!path) { + obj_node = calloc(1, sizeof(*obj_node)); + if (!obj_node) { err = -1; goto out_close; } - err = hashmap__append(build_fn_table, u32_as_hash_field(pinned_info.id), path); - if (err) { - p_err("failed to append entry to hashmap for ID %u, path '%s': %s", - pinned_info.id, path, strerror(errno)); + obj_node->id = pinned_info.id; + obj_node->path = strdup(fpath); + if (!obj_node->path) { + err = -1; + free(obj_node); goto out_close; } + hash_add(build_fn_table->table, &obj_node->hash, obj_node->id); out_close: close(fd); out_ret: return err; } -int build_pinned_obj_table(struct hashmap *tab, +int build_pinned_obj_table(struct pinned_obj_table *tab, enum bpf_obj_type type) { struct mntent *mntent = NULL; @@ -470,18 +470,17 @@ int build_pinned_obj_table(struct hashmap *tab, return err; } -void delete_pinned_obj_table(struct hashmap *map) +void delete_pinned_obj_table(struct pinned_obj_table *tab) { - struct hashmap_entry *entry; - size_t bkt; + struct pinned_obj *obj; + struct hlist_node *tmp; + unsigned int bkt; - if (!map) - return; - - hashmap__for_each_entry(map, entry, bkt) - free(entry->value); - - hashmap__free(map); + hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { + hash_del(&obj->hash); + free(obj->path); + free(obj); + } } unsigned int get_page_size(void) @@ -963,13 +962,3 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) return fd; } - -size_t hash_fn_for_key_as_id(const void *key, void *ctx) -{ - return (size_t)key; -} - -bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx) -{ - return k1 == k2; -} diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index e999159fa2..7f36385aa9 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -467,7 +467,7 @@ static bool probe_bpf_syscall(const char *define_prefix) { bool res; - bpf_prog_load(BPF_PROG_TYPE_UNSPEC, NULL, NULL, NULL, 0, NULL); + bpf_load_program(BPF_PROG_TYPE_UNSPEC, NULL, 0, NULL, 0, NULL, 0); res = (errno != ENOSYS); print_bool_feature("have_bpf_syscall", @@ -624,7 +624,6 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, */ switch (id) { case BPF_FUNC_trace_printk: - case BPF_FUNC_trace_vprintk: case BPF_FUNC_probe_write_user: if (!full_mode) continue; @@ -643,111 +642,15 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, } static void -probe_misc_feature(struct bpf_insn *insns, size_t len, - const char *define_prefix, __u32 ifindex, - const char *feat_name, const char *plain_name, - const char *define_name) +probe_large_insn_limit(const char *define_prefix, __u32 ifindex) { - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .prog_ifindex = ifindex, - ); bool res; - int fd; - errno = 0; - fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - insns, len, &opts); - res = fd >= 0 || !errno; - - if (fd >= 0) - close(fd); - - print_bool_feature(feat_name, plain_name, define_name, res, - define_prefix); -} - -/* - * Probe for availability of kernel commit (5.3): - * - * c04c0d2b968a ("bpf: increase complexity limit and maximum program size") - */ -static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex) -{ - struct bpf_insn insns[BPF_MAXINSNS + 1]; - int i; - - for (i = 0; i < BPF_MAXINSNS; i++) - insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); - insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); - - probe_misc_feature(insns, ARRAY_SIZE(insns), - define_prefix, ifindex, - "have_large_insn_limit", + res = bpf_probe_large_insn_limit(ifindex); + print_bool_feature("have_large_insn_limit", "Large program size limit", - "LARGE_INSN_LIMIT"); -} - -/* - * Probe for bounded loop support introduced in commit 2589726d12a1 - * ("bpf: introduce bounded loops"). - */ -static void -probe_bounded_loops(const char *define_prefix, __u32 ifindex) -{ - struct bpf_insn insns[4] = { - BPF_MOV64_IMM(BPF_REG_0, 10), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, -2), - BPF_EXIT_INSN() - }; - - probe_misc_feature(insns, ARRAY_SIZE(insns), - define_prefix, ifindex, - "have_bounded_loops", - "Bounded loop support", - "BOUNDED_LOOPS"); -} - -/* - * Probe for the v2 instruction set extension introduced in commit 92b31a9af73b - * ("bpf: add BPF_J{LT,LE,SLT,SLE} instructions"). - */ -static void -probe_v2_isa_extension(const char *define_prefix, __u32 ifindex) -{ - struct bpf_insn insns[4] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 0, 1), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN() - }; - - probe_misc_feature(insns, ARRAY_SIZE(insns), - define_prefix, ifindex, - "have_v2_isa_extension", - "ISA extension v2", - "V2_ISA_EXTENSION"); -} - -/* - * Probe for the v3 instruction set extension introduced in commit 092ed0968bb6 - * ("bpf: verifier support JMP32"). - */ -static void -probe_v3_isa_extension(const char *define_prefix, __u32 ifindex) -{ - struct bpf_insn insns[4] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 0, 1), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN() - }; - - probe_misc_feature(insns, ARRAY_SIZE(insns), - define_prefix, ifindex, - "have_v3_isa_extension", - "ISA extension v3", - "V3_ISA_EXTENSION"); + "LARGE_INSN_LIMIT", + res, define_prefix); } static void @@ -864,9 +767,6 @@ static void section_misc(const char *define_prefix, __u32 ifindex) "/*** eBPF misc features ***/", define_prefix); probe_large_insn_limit(define_prefix, ifindex); - probe_bounded_loops(define_prefix, ifindex); - probe_v2_isa_extension(define_prefix, ifindex); - probe_v3_isa_extension(define_prefix, ifindex); print_end_section(); } diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index b4695df2ea..d40d92bbf0 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "json_writer.h" #include "main.h" @@ -33,11 +34,6 @@ static void sanitize_identifier(char *name) name[i] = '_'; } -static bool str_has_prefix(const char *str, const char *prefix) -{ - return strncmp(str, prefix, strlen(prefix)) == 0; -} - static bool str_has_suffix(const char *str, const char *suffix) { size_t i, n1 = strlen(str), n2 = strlen(suffix); @@ -72,47 +68,23 @@ static void get_header_guard(char *guard, const char *obj_name) guard[i] = toupper(guard[i]); } -static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz) +static const char *get_map_ident(const struct bpf_map *map) { - static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; const char *name = bpf_map__name(map); - int i, n; - if (!bpf_map__is_internal(map)) { - snprintf(buf, buf_sz, "%s", name); - return true; - } + if (!bpf_map__is_internal(map)) + return name; - for (i = 0, n = ARRAY_SIZE(sfxs); i < n; i++) { - const char *sfx = sfxs[i], *p; - - p = strstr(name, sfx); - if (p) { - snprintf(buf, buf_sz, "%s", p + 1); - sanitize_identifier(buf); - return true; - } - } - - return false; -} - -static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz) -{ - static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; - int i, n; - - for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) { - const char *pfx = pfxs[i]; - - if (str_has_prefix(sec_name, pfx)) { - snprintf(buf, buf_sz, "%s", sec_name + 1); - sanitize_identifier(buf); - return true; - } - } - - return false; + if (str_has_suffix(name, ".data")) + return "data"; + else if (str_has_suffix(name, ".rodata")) + return "rodata"; + else if (str_has_suffix(name, ".bss")) + return "bss"; + else if (str_has_suffix(name, ".kconfig")) + return "kconfig"; + else + return NULL; } static void codegen_btf_dump_printf(void *ctx, const char *fmt, va_list args) @@ -129,14 +101,24 @@ static int codegen_datasec_def(struct bpf_object *obj, const char *sec_name = btf__name_by_offset(btf, sec->name_off); const struct btf_var_secinfo *sec_var = btf_var_secinfos(sec); int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec); - char var_ident[256], sec_ident[256]; + const char *sec_ident; + char var_ident[256]; bool strip_mods = false; - if (!get_datasec_ident(sec_name, sec_ident, sizeof(sec_ident))) - return 0; - - if (strcmp(sec_name, ".kconfig") != 0) + if (strcmp(sec_name, ".data") == 0) { + sec_ident = "data"; strip_mods = true; + } else if (strcmp(sec_name, ".bss") == 0) { + sec_ident = "bss"; + strip_mods = true; + } else if (strcmp(sec_name, ".rodata") == 0) { + sec_ident = "rodata"; + strip_mods = true; + } else if (strcmp(sec_name, ".kconfig") == 0) { + sec_ident = "kconfig"; + } else { + return 0; + } printf(" struct %s__%s {\n", obj_name, sec_ident); for (i = 0; i < vlen; i++, sec_var++) { @@ -211,64 +193,24 @@ static int codegen_datasec_def(struct bpf_object *obj, static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) { struct btf *btf = bpf_object__btf(obj); - int n = btf__type_cnt(btf); + int n = btf__get_nr_types(btf); struct btf_dump *d; - struct bpf_map *map; - const struct btf_type *sec; - char sec_ident[256], map_ident[256]; int i, err = 0; - d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL); - err = libbpf_get_error(d); - if (err) - return err; + d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf); + if (IS_ERR(d)) + return PTR_ERR(d); - bpf_object__for_each_map(map, obj) { - /* only generate definitions for memory-mapped internal maps */ - if (!bpf_map__is_internal(map)) - continue; - if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + for (i = 1; i <= n; i++) { + const struct btf_type *t = btf__type_by_id(btf, i); + + if (!btf_is_datasec(t)) continue; - if (!get_map_ident(map, map_ident, sizeof(map_ident))) - continue; - - sec = NULL; - for (i = 1; i < n; i++) { - const struct btf_type *t = btf__type_by_id(btf, i); - const char *name; - - if (!btf_is_datasec(t)) - continue; - - name = btf__str_by_offset(btf, t->name_off); - if (!get_datasec_ident(name, sec_ident, sizeof(sec_ident))) - continue; - - if (strcmp(sec_ident, map_ident) == 0) { - sec = t; - break; - } - } - - /* In some cases (e.g., sections like .rodata.cst16 containing - * compiler allocated string constants only) there will be - * special internal maps with no corresponding DATASEC BTF - * type. In such case, generate empty structs for each such - * map. It will still be memory-mapped and its contents - * accessible from user-space through BPF skeleton. - */ - if (!sec) { - printf(" struct %s__%s {\n", obj_name, map_ident); - printf(" } *%s;\n", map_ident); - } else { - err = codegen_datasec_def(obj, btf, d, sec, obj_name); - if (err) - goto out; - } + err = codegen_datasec_def(obj, btf, d, t, obj_name); + if (err) + goto out; } - - out: btf_dump__free(d); return err; @@ -296,8 +238,8 @@ static void codegen(const char *template, ...) } else if (c == '\n') { break; } else { - p_err("unrecognized character at pos %td in template '%s': '%c'", - src - template - 1, template, c); + p_err("unrecognized character at pos %td in template '%s'", + src - template - 1, template); free(s); exit(-1); } @@ -444,7 +386,6 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) { struct bpf_program *prog; struct bpf_map *map; - char ident[256]; codegen("\ \n\ @@ -465,7 +406,10 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) } bpf_object__for_each_map(map, obj) { - if (!get_map_ident(map, ident, sizeof(ident))) + const char * ident; + + ident = get_map_ident(map); + if (!ident) continue; if (bpf_map__is_internal(map) && (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) @@ -486,16 +430,21 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard) { + struct bpf_object_load_attr load_attr = {}; DECLARE_LIBBPF_OPTS(gen_loader_opts, opts); struct bpf_map *map; - char ident[256]; int err = 0; err = bpf_object__gen_loader(obj, &opts); if (err) return err; - err = bpf_object__load(obj); + load_attr.obj = obj; + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + load_attr.log_level = 1 + 2 + 4; + + err = bpf_object__load_xattr(&load_attr); if (err) { p_err("failed to load object file"); goto out; @@ -529,10 +478,12 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h ", obj_name, opts.data_sz); bpf_object__for_each_map(map, obj) { + const char *ident; const void *mmap_data = NULL; size_t mmap_size = 0; - if (!get_map_ident(map, ident, sizeof(ident))) + ident = get_map_ident(map); + if (!ident) continue; if (!bpf_map__is_internal(map) || @@ -594,15 +545,15 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h return err; \n\ ", obj_name); bpf_object__for_each_map(map, obj) { - const char *mmap_flags; + const char *ident, *mmap_flags; - if (!get_map_ident(map, ident, sizeof(ident))) + ident = get_map_ident(map); + if (!ident) continue; if (!bpf_map__is_internal(map) || !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) continue; - if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG) mmap_flags = "PROT_READ"; else @@ -652,8 +603,7 @@ static int do_skeleton(int argc, char **argv) DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data; struct bpf_object *obj = NULL; - const char *file; - char ident[256]; + const char *file, *ident; struct bpf_program *prog; int fd, err = -1; struct bpf_map *map; @@ -713,22 +663,19 @@ static int do_skeleton(int argc, char **argv) if (obj_name[0] == '\0') get_obj_name(obj_name, file); opts.object_name = obj_name; - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - opts.kernel_log_level = 1 + 2 + 4; obj = bpf_object__open_mem(obj_data, file_sz, &opts); - err = libbpf_get_error(obj); - if (err) { + if (IS_ERR(obj)) { char err_buf[256]; - libbpf_strerror(err, err_buf, sizeof(err_buf)); + libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf)); p_err("failed to open BPF object file: %s", err_buf); obj = NULL; goto out; } bpf_object__for_each_map(map, obj) { - if (!get_map_ident(map, ident, sizeof(ident))) { + ident = get_map_ident(map); + if (!ident) { p_err("ignoring unrecognized internal map '%s'...", bpf_map__name(map)); continue; @@ -781,7 +728,8 @@ static int do_skeleton(int argc, char **argv) if (map_cnt) { printf("\tstruct {\n"); bpf_object__for_each_map(map, obj) { - if (!get_map_ident(map, ident, sizeof(ident))) + ident = get_map_ident(map); + if (!ident) continue; if (use_loader) printf("\t\tstruct bpf_map_desc %s;\n", ident); @@ -855,10 +803,7 @@ static int do_skeleton(int argc, char **argv) } \n\ \n\ err = %1$s__create_skeleton(obj); \n\ - if (err) \n\ - goto err_out; \n\ - \n\ - err = bpf_object__open_skeleton(obj->skeleton, opts);\n\ + err = err ?: bpf_object__open_skeleton(obj->skeleton, opts);\n\ if (err) \n\ goto err_out; \n\ \n\ @@ -917,8 +862,6 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \n\ - static inline const void *%1$s__elf_bytes(size_t *sz); \n\ - \n\ static inline int \n\ %1$s__create_skeleton(struct %1$s *obj) \n\ { \n\ @@ -950,7 +893,9 @@ static int do_skeleton(int argc, char **argv) ); i = 0; bpf_object__for_each_map(map, obj) { - if (!get_map_ident(map, ident, sizeof(ident))) + ident = get_map_ident(map); + + if (!ident) continue; codegen("\ @@ -998,20 +943,10 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \n\ - s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\ - \n\ - return 0; \n\ - err: \n\ - bpf_object__destroy_skeleton(s); \n\ - return -ENOMEM; \n\ - } \n\ - \n\ - static inline const void *%2$s__elf_bytes(size_t *sz) \n\ - { \n\ - *sz = %1$d; \n\ - return (const void *)\"\\ \n\ - " - , file_sz, obj_name); + s->data_sz = %d; \n\ + s->data = (void *)\"\\ \n\ + ", + file_sz); /* embed contents of BPF object file */ print_hex(obj_data, file_sz); @@ -1019,6 +954,11 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \"; \n\ + \n\ + return 0; \n\ + err: \n\ + bpf_object__destroy_skeleton(s); \n\ + return -ENOMEM; \n\ } \n\ \n\ #endif /* %s */ \n\ diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index f88fdc820d..84a9b01d95 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -46,8 +46,7 @@ static int do_pin(int argc, char **argv) } obj = bpf_object__open(objfile); - err = libbpf_get_error(obj); - if (err) { + if (IS_ERR(obj)) { p_err("can't open objfile %s", objfile); goto close_map_fd; } @@ -58,15 +57,15 @@ static int do_pin(int argc, char **argv) goto close_obj; } - prog = bpf_object__next_program(obj, NULL); + prog = bpf_program__next(NULL, obj); if (!prog) { p_err("can't find bpf program in objfile %s", objfile); goto close_obj; } link = bpf_program__attach_iter(prog, &iter_opts); - err = libbpf_get_error(link); - if (err) { + if (IS_ERR(link)) { + err = PTR_ERR(link); p_err("attach_iter failed for program %s", bpf_program__name(prog)); goto close_obj; diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 2c258db0d3..8cc3e36f8c 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -7,7 +7,6 @@ #include #include -#include #include "json_writer.h" #include "main.h" @@ -21,8 +20,6 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_NETNS] = "netns", }; -static struct hashmap *link_table; - static int link_parse_fd(int *argc, char ***argv) { int fd; @@ -159,18 +156,19 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) break; } - if (!hashmap__empty(link_table)) { - struct hashmap_entry *entry; + if (!hash_empty(link_table.table)) { + struct pinned_obj *obj; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hashmap__for_each_key_entry(link_table, entry, - u32_as_hash_field(info->id)) - jsonw_string(json_wtr, entry->value); + hash_for_each_possible(link_table.table, obj, hash, info->id) { + if (obj->id == info->id) + jsonw_string(json_wtr, obj->path); + } jsonw_end_array(json_wtr); } - emit_obj_refs_json(refs_table, info->id, json_wtr); + emit_obj_refs_json(&refs_table, info->id, json_wtr); jsonw_end_object(json_wtr); @@ -246,14 +244,15 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) break; } - if (!hashmap__empty(link_table)) { - struct hashmap_entry *entry; + if (!hash_empty(link_table.table)) { + struct pinned_obj *obj; - hashmap__for_each_key_entry(link_table, entry, - u32_as_hash_field(info->id)) - printf("\n\tpinned %s", (char *)entry->value); + hash_for_each_possible(link_table.table, obj, hash, info->id) { + if (obj->id == info->id) + printf("\n\tpinned %s", obj->path); + } } - emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); @@ -303,15 +302,8 @@ static int do_show(int argc, char **argv) __u32 id = 0; int err, fd; - if (show_pinned) { - link_table = hashmap__new(hash_fn_for_key_as_id, - equal_fn_for_key_as_id, NULL); - if (!link_table) { - p_err("failed to create hashmap for pinned paths"); - return -1; - } - build_pinned_obj_table(link_table, BPF_OBJ_LINK); - } + if (show_pinned) + build_pinned_obj_table(&link_table, BPF_OBJ_LINK); build_obj_refs_table(&refs_table, BPF_OBJ_LINK); if (argc == 2) { @@ -352,10 +344,7 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(refs_table); - - if (show_pinned) - delete_pinned_obj_table(link_table); + delete_obj_refs_table(&refs_table); return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 020e91a542..d27ec4f852 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -10,9 +10,8 @@ #include #include -#include -#include #include +#include #include "main.h" @@ -31,9 +30,11 @@ bool block_mount; bool verifier_logs; bool relaxed_maps; bool use_loader; -bool legacy_libbpf; struct btf *base_btf; -struct hashmap *refs_table; +struct pinned_obj_table prog_table; +struct pinned_obj_table map_table; +struct pinned_obj_table link_table; +struct obj_refs_table refs_table; static void __noreturn clean_and_exit(int i) { @@ -93,7 +94,6 @@ static int do_version(int argc, char **argv) jsonw_name(json_wtr, "features"); jsonw_start_object(json_wtr); /* features */ jsonw_bool_field(json_wtr, "libbfd", has_libbfd); - jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf); jsonw_bool_field(json_wtr, "skeletons", has_skeletons); jsonw_end_object(json_wtr); /* features */ @@ -107,10 +107,6 @@ static int do_version(int argc, char **argv) printf(" libbfd"); nb_features++; } - if (!legacy_libbpf) { - printf("%s libbpf_strict", nb_features++ ? "," : ""); - nb_features++; - } if (has_skeletons) printf("%s skeletons", nb_features++ ? "," : ""); printf("\n"); @@ -402,10 +398,8 @@ int main(int argc, char **argv) { "debug", no_argument, NULL, 'd' }, { "use-loader", no_argument, NULL, 'L' }, { "base-btf", required_argument, NULL, 'B' }, - { "legacy", no_argument, NULL, 'l' }, { 0 } }; - bool version_requested = false; int opt, ret; setlinebuf(stdout); @@ -417,13 +411,16 @@ int main(int argc, char **argv) block_mount = false; bin_name = argv[0]; + hash_init(prog_table.table); + hash_init(map_table.table); + hash_init(link_table.table); + opterr = 0; - while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l", + while ((opt = getopt_long(argc, argv, "VhpjfLmndB:", options, NULL)) >= 0) { switch (opt) { case 'V': - version_requested = true; - break; + return do_version(argc, argv); case 'h': return do_help(argc, argv); case 'p': @@ -465,9 +462,6 @@ int main(int argc, char **argv) case 'L': use_loader = true; break; - case 'l': - legacy_libbpf = true; - break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) @@ -477,25 +471,21 @@ int main(int argc, char **argv) } } - if (!legacy_libbpf) { - ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - if (ret) - p_err("failed to enable libbpf strict mode: %d", ret); - } - argc -= optind; argv += optind; if (argc < 0) usage(); - if (version_requested) - return do_version(argc, argv); - ret = cmd_select(cmds, argc, argv, do_help); if (json_output) jsonw_destroy(&json_wtr); + if (show_pinned) { + delete_pinned_obj_table(&prog_table); + delete_pinned_obj_table(&map_table); + delete_pinned_obj_table(&link_table); + } btf__free(base_btf); return ret; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 8d76d937a6..90caa42aac 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -11,9 +11,9 @@ #include #include #include +#include #include -#include #include #include "json_writer.h" @@ -57,7 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr) #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" #define HELP_SPEC_LINK \ @@ -90,9 +90,11 @@ extern bool block_mount; extern bool verifier_logs; extern bool relaxed_maps; extern bool use_loader; -extern bool legacy_libbpf; extern struct btf *base_btf; -extern struct hashmap *refs_table; +extern struct pinned_obj_table prog_table; +extern struct pinned_obj_table map_table; +extern struct pinned_obj_table link_table; +extern struct obj_refs_table refs_table; void __printf(1, 2) p_err(const char *fmt, ...); void __printf(1, 2) p_info(const char *fmt, ...); @@ -106,12 +108,28 @@ void set_max_rlimit(void); int mount_tracefs(const char *target); +struct pinned_obj_table { + DECLARE_HASHTABLE(table, 16); +}; + +struct pinned_obj { + __u32 id; + char *path; + struct hlist_node hash; +}; + +struct obj_refs_table { + DECLARE_HASHTABLE(table, 16); +}; + struct obj_ref { int pid; char comm[16]; }; struct obj_refs { + struct hlist_node node; + __u32 id; int ref_cnt; struct obj_ref *refs; }; @@ -119,15 +137,15 @@ struct obj_refs { struct btf; struct bpf_line_info; -int build_pinned_obj_table(struct hashmap *table, +int build_pinned_obj_table(struct pinned_obj_table *table, enum bpf_obj_type type); -void delete_pinned_obj_table(struct hashmap *table); -__weak int build_obj_refs_table(struct hashmap **table, +void delete_pinned_obj_table(struct pinned_obj_table *tab); +__weak int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type); -__weak void delete_obj_refs_table(struct hashmap *table); -__weak void emit_obj_refs_json(struct hashmap *table, __u32 id, +__weak void delete_obj_refs_table(struct obj_refs_table *table); +__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_wtr); -__weak void emit_obj_refs_plain(struct hashmap *table, __u32 id, +__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix); void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); @@ -241,23 +259,4 @@ int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, int print_all_levels(__maybe_unused enum libbpf_print_level level, const char *format, va_list args); - -size_t hash_fn_for_key_as_id(const void *key, void *ctx); -bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx); - -static inline void *u32_as_hash_field(__u32 x) -{ - return (void *)(uintptr_t)x; -} - -static inline __u32 hash_field_as_u32(const void *x) -{ - return (__u32)(uintptr_t)x; -} - -static inline bool hashmap__empty(struct hashmap *map) -{ - return map ? hashmap__size(map) == 0 : true; -} - #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index cc530a2298..407071d54a 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -17,7 +17,6 @@ #include #include -#include #include "json_writer.h" #include "main.h" @@ -53,13 +52,10 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_RINGBUF] = "ringbuf", [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", - [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", }; const size_t map_type_name_size = ARRAY_SIZE(map_type_name); -static struct hashmap *map_table; - static bool map_is_per_cpu(__u32 type) { return type == BPF_MAP_TYPE_PERCPU_HASH || @@ -539,18 +535,19 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) if (info->btf_id) jsonw_int_field(json_wtr, "btf_id", info->btf_id); - if (!hashmap__empty(map_table)) { - struct hashmap_entry *entry; + if (!hash_empty(map_table.table)) { + struct pinned_obj *obj; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hashmap__for_each_key_entry(map_table, entry, - u32_as_hash_field(info->id)) - jsonw_string(json_wtr, entry->value); + hash_for_each_possible(map_table.table, obj, hash, info->id) { + if (obj->id == info->id) + jsonw_string(json_wtr, obj->path); + } jsonw_end_array(json_wtr); } - emit_obj_refs_json(refs_table, info->id, json_wtr); + emit_obj_refs_json(&refs_table, info->id, json_wtr); jsonw_end_object(json_wtr); @@ -613,12 +610,13 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) } close(fd); - if (!hashmap__empty(map_table)) { - struct hashmap_entry *entry; + if (!hash_empty(map_table.table)) { + struct pinned_obj *obj; - hashmap__for_each_key_entry(map_table, entry, - u32_as_hash_field(info->id)) - printf("\n\tpinned %s", (char *)entry->value); + hash_for_each_possible(map_table.table, obj, hash, info->id) { + if (obj->id == info->id) + printf("\n\tpinned %s", obj->path); + } } printf("\n"); @@ -638,7 +636,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (frozen) printf("%sfrozen", info->btf_id ? " " : ""); - emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); return 0; @@ -696,15 +694,8 @@ static int do_show(int argc, char **argv) int err; int fd; - if (show_pinned) { - map_table = hashmap__new(hash_fn_for_key_as_id, - equal_fn_for_key_as_id, NULL); - if (!map_table) { - p_err("failed to create hashmap for pinned paths"); - return -1; - } - build_pinned_obj_table(map_table, BPF_OBJ_MAP); - } + if (show_pinned) + build_pinned_obj_table(&map_table, BPF_OBJ_MAP); build_obj_refs_table(&refs_table, BPF_OBJ_MAP); if (argc == 2) @@ -749,10 +740,7 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(refs_table); - - if (show_pinned) - delete_pinned_obj_table(map_table); + delete_obj_refs_table(&refs_table); return errno == ENOENT ? 0 : -1; } @@ -812,7 +800,7 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info) if (info->btf_vmlinux_value_type_id) { if (!btf_vmlinux) { btf_vmlinux = libbpf_find_kernel_btf(); - if (libbpf_get_error(btf_vmlinux)) + if (IS_ERR(btf_vmlinux)) p_err("failed to get kernel btf"); } return btf_vmlinux; @@ -832,13 +820,13 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info) static void free_map_kv_btf(struct btf *btf) { - if (!libbpf_get_error(btf) && btf != btf_vmlinux) + if (!IS_ERR(btf) && btf != btf_vmlinux) btf__free(btf); } static void free_btf_vmlinux(void) { - if (!libbpf_get_error(btf_vmlinux)) + if (!IS_ERR(btf_vmlinux)) btf__free(btf_vmlinux); } @@ -863,8 +851,8 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, if (wtr) { btf = get_map_kv_btf(info); - err = libbpf_get_error(btf); - if (err) { + if (IS_ERR(btf)) { + err = PTR_ERR(btf); goto exit_free; } @@ -1261,10 +1249,7 @@ static int do_pin(int argc, char **argv) static int do_create(int argc, char **argv) { - LIBBPF_OPTS(bpf_map_create_opts, attr); - enum bpf_map_type map_type = BPF_MAP_TYPE_UNSPEC; - __u32 key_size = 0, value_size = 0, max_entries = 0; - const char *map_name = NULL; + struct bpf_create_map_attr attr = { NULL, }; const char *pinfile; int err = -1, fd; @@ -1279,30 +1264,30 @@ static int do_create(int argc, char **argv) if (is_prefix(*argv, "type")) { NEXT_ARG(); - if (map_type) { + if (attr.map_type) { p_err("map type already specified"); goto exit; } - map_type = map_type_from_str(*argv); - if ((int)map_type < 0) { + attr.map_type = map_type_from_str(*argv); + if ((int)attr.map_type < 0) { p_err("unrecognized map type: %s", *argv); goto exit; } NEXT_ARG(); } else if (is_prefix(*argv, "name")) { NEXT_ARG(); - map_name = GET_ARG(); + attr.name = GET_ARG(); } else if (is_prefix(*argv, "key")) { - if (parse_u32_arg(&argc, &argv, &key_size, + if (parse_u32_arg(&argc, &argv, &attr.key_size, "key size")) goto exit; } else if (is_prefix(*argv, "value")) { - if (parse_u32_arg(&argc, &argv, &value_size, + if (parse_u32_arg(&argc, &argv, &attr.value_size, "value size")) goto exit; } else if (is_prefix(*argv, "entries")) { - if (parse_u32_arg(&argc, &argv, &max_entries, + if (parse_u32_arg(&argc, &argv, &attr.max_entries, "max entries")) goto exit; } else if (is_prefix(*argv, "flags")) { @@ -1343,14 +1328,14 @@ static int do_create(int argc, char **argv) } } - if (!map_name) { + if (!attr.name) { p_err("map name not specified"); goto exit; } set_max_rlimit(); - fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr); + fd = bpf_create_map_xattr(&attr); if (fd < 0) { p_err("map create failed: %s", strerror(errno)); goto exit; @@ -1481,7 +1466,7 @@ static int do_help(int argc, char **argv) " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" - " task_storage | bloom_filter }\n" + " task_storage }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-n|--nomount} }\n" "", diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 6b0c410152..825f29f93a 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -22,6 +22,7 @@ #include #include +#include #include "main.h" @@ -124,7 +125,7 @@ int do_event_pipe(int argc, char **argv) .wakeup_events = 1, }; struct bpf_map_info map_info = {}; - LIBBPF_OPTS(perf_buffer_raw_opts, opts); + struct perf_buffer_raw_opts opts = {}; struct event_pipe_ctx ctx = { .all_cpus = true, .cpu = -1, @@ -190,11 +191,14 @@ int do_event_pipe(int argc, char **argv) ctx.idx = 0; } + opts.attr = &perf_attr; + opts.event_cb = print_bpf_output; + opts.ctx = &ctx; opts.cpu_cnt = ctx.all_cpus ? 0 : 1; opts.cpus = &ctx.cpu; opts.map_keys = &ctx.idx; - pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &perf_attr, - print_bpf_output, &ctx, &opts); + + pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &opts); err = libbpf_get_error(pb); if (err) { p_err("failed to create perf buffer: %s (%d)", diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 56b598eee0..477e55d59c 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -6,37 +6,35 @@ #include #include #include - #include -#include #include "main.h" #include "skeleton/pid_iter.h" #ifdef BPFTOOL_WITHOUT_SKELETONS -int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) +int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) { return -ENOTSUP; } -void delete_obj_refs_table(struct hashmap *map) {} -void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix) {} -void emit_obj_refs_json(struct hashmap *map, __u32 id, json_writer_t *json_writer) {} +void delete_obj_refs_table(struct obj_refs_table *table) {} +void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {} +void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_writer) {} #else /* BPFTOOL_WITHOUT_SKELETONS */ #include "pid_iter.skel.h" -static void add_ref(struct hashmap *map, struct pid_iter_entry *e) +static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) { - struct hashmap_entry *entry; struct obj_refs *refs; struct obj_ref *ref; - int err, i; void *tmp; + int i; - hashmap__for_each_key_entry(map, entry, u32_as_hash_field(e->id)) { - refs = entry->value; + hash_for_each_possible(table->table, refs, node, e->id) { + if (refs->id != e->id) + continue; for (i = 0; i < refs->ref_cnt; i++) { if (refs->refs[i].pid == e->pid) @@ -66,6 +64,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) return; } + refs->id = e->id; refs->refs = malloc(sizeof(*refs->refs)); if (!refs->refs) { free(refs); @@ -77,11 +76,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); refs->ref_cnt = 1; - - err = hashmap__append(map, u32_as_hash_field(e->id), refs); - if (err) - p_err("failed to append entry to hashmap for ID %u: %s", - e->id, strerror(errno)); + hash_add(table->table, &refs->node, e->id); } static int __printf(2, 0) @@ -92,7 +87,7 @@ libbpf_print_none(__maybe_unused enum libbpf_print_level level, return 0; } -int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) +int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) { struct pid_iter_entry *e; char buf[4096 / sizeof(*e) * sizeof(*e)]; @@ -100,11 +95,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) int err, ret, fd = -1, i; libbpf_print_fn_t default_print; - *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!*map) { - p_err("failed to create hashmap for PID references"); - return -1; - } + hash_init(table->table); set_max_rlimit(); skel = pid_iter_bpf__open(); @@ -160,7 +151,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) e = (void *)buf; for (i = 0; i < ret; i++, e++) { - add_ref(*map, e); + add_ref(table, e); } } err = 0; @@ -171,44 +162,39 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) return err; } -void delete_obj_refs_table(struct hashmap *map) +void delete_obj_refs_table(struct obj_refs_table *table) { - struct hashmap_entry *entry; - size_t bkt; - - if (!map) - return; - - hashmap__for_each_entry(map, entry, bkt) { - struct obj_refs *refs = entry->value; + struct obj_refs *refs; + struct hlist_node *tmp; + unsigned int bkt; + hash_for_each_safe(table->table, bkt, tmp, refs, node) { + hash_del(&refs->node); free(refs->refs); free(refs); } - - hashmap__free(map); } -void emit_obj_refs_json(struct hashmap *map, __u32 id, +void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_writer) { - struct hashmap_entry *entry; + struct obj_refs *refs; + struct obj_ref *ref; + int i; - if (hashmap__empty(map)) + if (hash_empty(table->table)) return; - hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) { - struct obj_refs *refs = entry->value; - int i; - + hash_for_each_possible(table->table, refs, node, id) { + if (refs->id != id) + continue; if (refs->ref_cnt == 0) break; jsonw_name(json_writer, "pids"); jsonw_start_array(json_writer); for (i = 0; i < refs->ref_cnt; i++) { - struct obj_ref *ref = &refs->refs[i]; - + ref = &refs->refs[i]; jsonw_start_object(json_writer); jsonw_int_field(json_writer, "pid", ref->pid); jsonw_string_field(json_writer, "comm", ref->comm); @@ -219,24 +205,24 @@ void emit_obj_refs_json(struct hashmap *map, __u32 id, } } -void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix) +void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) { - struct hashmap_entry *entry; + struct obj_refs *refs; + struct obj_ref *ref; + int i; - if (hashmap__empty(map)) + if (hash_empty(table->table)) return; - hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) { - struct obj_refs *refs = entry->value; - int i; - + hash_for_each_possible(table->table, refs, node, id) { + if (refs->id != id) + continue; if (refs->ref_cnt == 0) break; printf("%s", prefix); for (i = 0; i < refs->ref_cnt; i++) { - struct obj_ref *ref = &refs->refs[i]; - + ref = &refs->refs[i]; printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid); } break; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2a21d50516..f8755beb3d 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -24,8 +24,8 @@ #include #include -#include #include +#include #include #include "cfg.h" @@ -85,8 +85,6 @@ static const char * const attach_type_strings[] = { [__MAX_BPF_ATTACH_TYPE] = NULL, }; -static struct hashmap *prog_table; - static enum bpf_attach_type parse_attach_type(const char *str) { enum bpf_attach_type type; @@ -100,76 +98,6 @@ static enum bpf_attach_type parse_attach_type(const char *str) return __MAX_BPF_ATTACH_TYPE; } -static int prep_prog_info(struct bpf_prog_info *const info, enum dump_mode mode, - void **info_data, size_t *const info_data_sz) -{ - struct bpf_prog_info holder = {}; - size_t needed = 0; - void *ptr; - - if (mode == DUMP_JITED) { - holder.jited_prog_len = info->jited_prog_len; - needed += info->jited_prog_len; - } else { - holder.xlated_prog_len = info->xlated_prog_len; - needed += info->xlated_prog_len; - } - - holder.nr_jited_ksyms = info->nr_jited_ksyms; - needed += info->nr_jited_ksyms * sizeof(__u64); - - holder.nr_jited_func_lens = info->nr_jited_func_lens; - needed += info->nr_jited_func_lens * sizeof(__u32); - - holder.nr_func_info = info->nr_func_info; - holder.func_info_rec_size = info->func_info_rec_size; - needed += info->nr_func_info * info->func_info_rec_size; - - holder.nr_line_info = info->nr_line_info; - holder.line_info_rec_size = info->line_info_rec_size; - needed += info->nr_line_info * info->line_info_rec_size; - - holder.nr_jited_line_info = info->nr_jited_line_info; - holder.jited_line_info_rec_size = info->jited_line_info_rec_size; - needed += info->nr_jited_line_info * info->jited_line_info_rec_size; - - if (needed > *info_data_sz) { - ptr = realloc(*info_data, needed); - if (!ptr) - return -1; - - *info_data = ptr; - *info_data_sz = needed; - } - ptr = *info_data; - - if (mode == DUMP_JITED) { - holder.jited_prog_insns = ptr_to_u64(ptr); - ptr += holder.jited_prog_len; - } else { - holder.xlated_prog_insns = ptr_to_u64(ptr); - ptr += holder.xlated_prog_len; - } - - holder.jited_ksyms = ptr_to_u64(ptr); - ptr += holder.nr_jited_ksyms * sizeof(__u64); - - holder.jited_func_lens = ptr_to_u64(ptr); - ptr += holder.nr_jited_func_lens * sizeof(__u32); - - holder.func_info = ptr_to_u64(ptr); - ptr += holder.nr_func_info * holder.func_info_rec_size; - - holder.line_info = ptr_to_u64(ptr); - ptr += holder.nr_line_info * holder.line_info_rec_size; - - holder.jited_line_info = ptr_to_u64(ptr); - ptr += holder.nr_jited_line_info * holder.jited_line_info_rec_size; - - *info = holder; - return 0; -} - static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) { struct timespec real_time_ts, boot_time_ts; @@ -489,18 +417,19 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) if (info->btf_id) jsonw_int_field(json_wtr, "btf_id", info->btf_id); - if (!hashmap__empty(prog_table)) { - struct hashmap_entry *entry; + if (!hash_empty(prog_table.table)) { + struct pinned_obj *obj; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hashmap__for_each_key_entry(prog_table, entry, - u32_as_hash_field(info->id)) - jsonw_string(json_wtr, entry->value); + hash_for_each_possible(prog_table.table, obj, hash, info->id) { + if (obj->id == info->id) + jsonw_string(json_wtr, obj->path); + } jsonw_end_array(json_wtr); } - emit_obj_refs_json(refs_table, info->id, json_wtr); + emit_obj_refs_json(&refs_table, info->id, json_wtr); show_prog_metadata(fd, info->nr_map_ids); @@ -560,18 +489,19 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (info->nr_map_ids) show_prog_maps(fd, info->nr_map_ids); - if (!hashmap__empty(prog_table)) { - struct hashmap_entry *entry; + if (!hash_empty(prog_table.table)) { + struct pinned_obj *obj; - hashmap__for_each_key_entry(prog_table, entry, - u32_as_hash_field(info->id)) - printf("\n\tpinned %s", (char *)entry->value); + hash_for_each_possible(prog_table.table, obj, hash, info->id) { + if (obj->id == info->id) + printf("\n\tpinned %s", obj->path); + } } if (info->btf_id) printf("\n\tbtf_id %d", info->btf_id); - emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); @@ -638,15 +568,8 @@ static int do_show(int argc, char **argv) int err; int fd; - if (show_pinned) { - prog_table = hashmap__new(hash_fn_for_key_as_id, - equal_fn_for_key_as_id, NULL); - if (!prog_table) { - p_err("failed to create hashmap for pinned paths"); - return -1; - } - build_pinned_obj_table(prog_table, BPF_OBJ_PROG); - } + if (show_pinned) + build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); build_obj_refs_table(&refs_table, BPF_OBJ_PROG); if (argc == 2) @@ -689,10 +612,7 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(refs_table); - - if (show_pinned) - delete_pinned_obj_table(prog_table); + delete_obj_refs_table(&refs_table); return err; } @@ -876,18 +796,16 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, static int do_dump(int argc, char **argv) { - struct bpf_prog_info info; - __u32 info_len = sizeof(info); - size_t info_data_sz = 0; - void *info_data = NULL; + struct bpf_prog_info_linear *info_linear; char *filepath = NULL; bool opcodes = false; bool visual = false; enum dump_mode mode; bool linum = false; - int nb_fds, i = 0; int *fds = NULL; + int nb_fds, i = 0; int err = -1; + __u64 arrays; if (is_prefix(*argv, "jited")) { if (disasm_init()) @@ -947,44 +865,43 @@ static int do_dump(int argc, char **argv) goto exit_close; } + if (mode == DUMP_JITED) + arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; + else + arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; + + arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; + if (json_output && nb_fds > 1) jsonw_start_array(json_wtr); /* root array */ for (i = 0; i < nb_fds; i++) { - memset(&info, 0, sizeof(info)); - - err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len); - if (err) { - p_err("can't get prog info: %s", strerror(errno)); - break; - } - - err = prep_prog_info(&info, mode, &info_data, &info_data_sz); - if (err) { - p_err("can't grow prog info_data"); - break; - } - - err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len); - if (err) { + info_linear = bpf_program__get_prog_info_linear(fds[i], arrays); + if (IS_ERR_OR_NULL(info_linear)) { p_err("can't get prog info: %s", strerror(errno)); break; } if (json_output && nb_fds > 1) { jsonw_start_object(json_wtr); /* prog object */ - print_prog_header_json(&info); + print_prog_header_json(&info_linear->info); jsonw_name(json_wtr, "insns"); } else if (nb_fds > 1) { - print_prog_header_plain(&info); + print_prog_header_plain(&info_linear->info); } - err = prog_dump(&info, mode, filepath, opcodes, visual, linum); + err = prog_dump(&info_linear->info, mode, filepath, opcodes, + visual, linum); if (json_output && nb_fds > 1) jsonw_end_object(json_wtr); /* prog object */ else if (i != nb_fds - 1 && nb_fds > 1) printf("\n"); + free(info_linear); if (err) break; close(fds[i]); @@ -996,7 +913,6 @@ static int do_dump(int argc, char **argv) for (; i < nb_fds; i++) close(fds[i]); exit_free: - free(info_data); free(fds); return err; } @@ -1464,6 +1380,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, .relaxed_maps = relaxed_maps, ); + struct bpf_object_load_attr load_attr = { 0 }; enum bpf_attach_type expected_attach_type; struct map_replace *map_replace = NULL; struct bpf_program *prog = NULL, *pos; @@ -1485,6 +1402,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) while (argc) { if (is_prefix(*argv, "type")) { + char *type; + NEXT_ARG(); if (common_prog_type != BPF_PROG_TYPE_UNSPEC) { @@ -1494,26 +1413,21 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) if (!REQ_ARGS(1)) goto err_free_reuse_maps; - err = libbpf_prog_type_by_name(*argv, &common_prog_type, - &expected_attach_type); - if (err < 0) { - /* Put a '/' at the end of type to appease libbpf */ - char *type = malloc(strlen(*argv) + 2); - - if (!type) { - p_err("mem alloc failed"); - goto err_free_reuse_maps; - } - *type = 0; - strcat(type, *argv); - strcat(type, "/"); - - err = get_prog_type_by_name(type, &common_prog_type, - &expected_attach_type); - free(type); - if (err < 0) - goto err_free_reuse_maps; + /* Put a '/' at the end of type to appease libbpf */ + type = malloc(strlen(*argv) + 2); + if (!type) { + p_err("mem alloc failed"); + goto err_free_reuse_maps; } + *type = 0; + strcat(type, *argv); + strcat(type, "/"); + + err = get_prog_type_by_name(type, &common_prog_type, + &expected_attach_type); + free(type); + if (err < 0) + goto err_free_reuse_maps; NEXT_ARG(); } else if (is_prefix(*argv, "map")) { @@ -1597,10 +1511,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) set_max_rlimit(); - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - open_opts.kernel_log_level = 1 + 2 + 4; - obj = bpf_object__open_file(file, &open_opts); if (libbpf_get_error(obj)) { p_err("failed to open object file"); @@ -1655,7 +1565,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) j = 0; idx = 0; bpf_object__for_each_map(map, obj) { - if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) + if (!bpf_map__is_offload_neutral(map)) bpf_map__set_ifindex(map, ifindex); if (j < old_map_fds && idx == map_replace[j].idx) { @@ -1680,7 +1590,12 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - err = bpf_object__load(obj); + load_attr.obj = obj; + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + load_attr.log_level = 1 + 2 + 4; + + err = bpf_object__load_xattr(&load_attr); if (err) { p_err("failed to load object file"); goto err_close_obj; @@ -1691,7 +1606,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; if (first_prog_only) { - prog = bpf_object__next_program(obj, NULL); + prog = bpf_program__next(NULL, obj); if (!prog) { p_err("object file doesn't contain any bpf program"); goto err_close_obj; @@ -1735,11 +1650,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) else bpf_object__unpin_programs(obj, pinfile); err_close_obj: - if (!legacy_libbpf) { - p_info("Warning: bpftool is now running in libbpf strict mode and has more stringent requirements about BPF programs.\n" - "If it used to work for this object file but now doesn't, see --legacy option for more details.\n"); - } - bpf_object__close(obj); err_free_reuse_maps: for (i = 0; i < old_map_fds; i++) @@ -1772,19 +1682,17 @@ static int try_loader(struct gen_loader_opts *gen) sizeof(struct bpf_prog_desc)); int log_buf_sz = (1u << 24) - 1; int err, fds_before, fd_delta; - char *log_buf = NULL; + char *log_buf; ctx = alloca(ctx_sz); memset(ctx, 0, ctx_sz); ctx->sz = ctx_sz; - if (verifier_logs) { - ctx->log_level = 1 + 2 + 4; - ctx->log_size = log_buf_sz; - log_buf = malloc(log_buf_sz); - if (!log_buf) - return -ENOMEM; - ctx->log_buf = (long) log_buf; - } + ctx->log_level = 1; + ctx->log_size = log_buf_sz; + log_buf = malloc(log_buf_sz); + if (!log_buf) + return -ENOMEM; + ctx->log_buf = (long) log_buf; opts.ctx = ctx; opts.data = gen->data; opts.data_sz = gen->data_sz; @@ -1793,9 +1701,9 @@ static int try_loader(struct gen_loader_opts *gen) fds_before = count_open_fds(); err = bpf_load_and_run(&opts); fd_delta = count_open_fds() - fds_before; - if (err < 0 || verifier_logs) { + if (err < 0) { fprintf(stderr, "err %d\n%s\n%s", err, opts.errstr, log_buf); - if (fd_delta && err < 0) + if (fd_delta) fprintf(stderr, "loader prog leaked %d FDs\n", fd_delta); } @@ -1807,6 +1715,7 @@ static int do_loader(int argc, char **argv) { DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); DECLARE_LIBBPF_OPTS(gen_loader_opts, gen); + struct bpf_object_load_attr load_attr = {}; struct bpf_object *obj; const char *file; int err = 0; @@ -1815,10 +1724,6 @@ static int do_loader(int argc, char **argv) return -1; file = GET_ARG(); - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - open_opts.kernel_log_level = 1 + 2 + 4; - obj = bpf_object__open_file(file, &open_opts); if (libbpf_get_error(obj)) { p_err("failed to open object file"); @@ -1829,7 +1734,12 @@ static int do_loader(int argc, char **argv) if (err) goto err_close_obj; - err = bpf_object__load(obj); + load_attr.obj = obj; + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + load_attr.log_level = 1 + 2 + 4; + + err = bpf_object__load_xattr(&load_attr); if (err) { p_err("failed to load object file"); goto err_close_obj; @@ -2099,58 +2009,41 @@ static void profile_print_readings(void) static char *profile_target_name(int tgt_fd) { - struct bpf_func_info func_info; - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); + struct bpf_prog_info_linear *info_linear; + struct bpf_func_info *func_info; const struct btf_type *t; - __u32 func_info_rec_size; struct btf *btf = NULL; char *name = NULL; - int err; - err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len); - if (err) { - p_err("failed to bpf_obj_get_info_by_fd for prog FD %d", tgt_fd); - goto out; + info_linear = bpf_program__get_prog_info_linear( + tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); + if (IS_ERR_OR_NULL(info_linear)) { + p_err("failed to get info_linear for prog FD %d", tgt_fd); + return NULL; } - if (info.btf_id == 0) { + if (info_linear->info.btf_id == 0) { p_err("prog FD %d doesn't have valid btf", tgt_fd); goto out; } - func_info_rec_size = info.func_info_rec_size; - if (info.nr_func_info == 0) { - p_err("bpf_obj_get_info_by_fd for prog FD %d found 0 func_info", tgt_fd); - goto out; - } - - memset(&info, 0, sizeof(info)); - info.nr_func_info = 1; - info.func_info_rec_size = func_info_rec_size; - info.func_info = ptr_to_u64(&func_info); - - err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len); - if (err) { - p_err("failed to get func_info for prog FD %d", tgt_fd); - goto out; - } - - btf = btf__load_from_kernel_by_id(info.btf_id); + btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); if (libbpf_get_error(btf)) { p_err("failed to load btf for prog FD %d", tgt_fd); goto out; } - t = btf__type_by_id(btf, func_info.type_id); + func_info = u64_to_ptr(info_linear->info.func_info); + t = btf__type_by_id(btf, func_info[0].type_id); if (!t) { p_err("btf %d doesn't have type %d", - info.btf_id, func_info.type_id); + info_linear->info.btf_id, func_info[0].type_id); goto out; } name = strdup(btf__name_by_offset(btf, t->name_off)); out: btf__free(btf); + free(info_linear); return name; } diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c index f70702fcb2..d9b4209729 100644 --- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -71,8 +71,8 @@ int iter(struct bpf_iter__task_file *ctx) e.pid = task->tgid; e.id = get_obj_id(file->private_data, obj_type); - bpf_probe_read_kernel_str(&e.comm, sizeof(e.comm), - task->group_leader->comm); + bpf_probe_read_kernel(&e.comm, sizeof(e.comm), + task->group_leader->comm); bpf_seq_write(ctx->meta->seq, &e, sizeof(e)); return 0; diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index 2f693b082b..ab2d229056 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -32,7 +32,7 @@ static const struct btf *get_btf_vmlinux(void) return btf_vmlinux; btf_vmlinux = libbpf_find_kernel_btf(); - if (libbpf_get_error(btf_vmlinux)) + if (IS_ERR(btf_vmlinux)) p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y"); return btf_vmlinux; @@ -45,7 +45,7 @@ static const char *get_kern_struct_ops_name(const struct bpf_map_info *info) const char *st_ops_name; kern_btf = get_btf_vmlinux(); - if (libbpf_get_error(kern_btf)) + if (IS_ERR(kern_btf)) return ""; t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id); @@ -63,7 +63,7 @@ static __s32 get_map_info_type_id(void) return map_info_type_id; kern_btf = get_btf_vmlinux(); - if (libbpf_get_error(kern_btf)) { + if (IS_ERR(kern_btf)) { map_info_type_id = PTR_ERR(kern_btf); return map_info_type_id; } @@ -252,7 +252,7 @@ static struct res do_one_id(const char *id_str, work_func func, void *data, } fd = bpf_map_get_fd_by_id(id); - if (fd < 0) { + if (fd == -1) { p_err("can't get map by id (%lu): %s", id, strerror(errno)); res.nr_errs++; return res; @@ -415,7 +415,7 @@ static int do_dump(int argc, char **argv) } kern_btf = get_btf_vmlinux(); - if (libbpf_get_error(kern_btf)) + if (IS_ERR(kern_btf)) return -1; if (!json_output) { @@ -479,7 +479,7 @@ static int do_unregister(int argc, char **argv) static int do_register(int argc, char **argv) { - LIBBPF_OPTS(bpf_object_open_opts, open_opts); + struct bpf_object_load_attr load_attr = {}; const struct bpf_map_def *def; struct bpf_map_info info = {}; __u32 info_len = sizeof(info); @@ -494,17 +494,18 @@ static int do_register(int argc, char **argv) file = GET_ARG(); - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - open_opts.kernel_log_level = 1 + 2 + 4; - - obj = bpf_object__open_file(file, &open_opts); - if (libbpf_get_error(obj)) + obj = bpf_object__open(file); + if (IS_ERR_OR_NULL(obj)) return -1; set_max_rlimit(); - if (bpf_object__load(obj)) { + load_attr.obj = obj; + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + load_attr.log_level = 1 + 2 + 4; + + if (bpf_object__load_xattr(&load_attr)) { bpf_object__close(obj); return -1; } @@ -515,7 +516,7 @@ static int do_register(int argc, char **argv) continue; link = bpf_map__attach_struct_ops(map); - if (libbpf_get_error(link)) { + if (IS_ERR(link)) { p_err("can't register struct_ops %s: %s", bpf_map__name(map), strerror(-PTR_ERR(link))); @@ -595,7 +596,7 @@ int do_struct_ops(int argc, char **argv) err = cmd_select(cmds, argc, argv, do_help); - if (!libbpf_get_error(btf_vmlinux)) + if (!IS_ERR(btf_vmlinux)) btf__free(btf_vmlinux); return err; diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 320a88ac28..af9f9d3534 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -23,7 +23,6 @@ CC = $(HOSTCC) LD = $(HOSTLD) ARCH = $(HOSTARCH) RM ?= rm -CROSS_COMPILE = OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ @@ -31,33 +30,27 @@ LIBBPF_SRC := $(srctree)/tools/lib/bpf/ SUBCMD_SRC := $(srctree)/tools/lib/subcmd/ BPFOBJ := $(OUTPUT)/libbpf/libbpf.a -LIBBPF_OUT := $(abspath $(dir $(BPFOBJ)))/ SUBCMDOBJ := $(OUTPUT)/libsubcmd/libsubcmd.a -LIBBPF_DESTDIR := $(LIBBPF_OUT) -LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)include - BINARY := $(OUTPUT)/resolve_btfids BINARY_IN := $(BINARY)-in.o all: $(BINARY) -$(OUTPUT) $(OUTPUT)/libsubcmd $(LIBBPF_OUT): +$(OUTPUT) $(OUTPUT)/libbpf $(OUTPUT)/libsubcmd: $(call msg,MKDIR,,$@) $(Q)mkdir -p $(@) $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT) - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) \ - DESTDIR=$(LIBBPF_DESTDIR) prefix= \ - $(abspath $@) install_headers +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) CFLAGS := -g \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ - -I$(LIBBPF_INCLUDE) \ + -I$(LIBBPF_SRC) \ -I$(SUBCMD_SRC) LIBS = -lelf -lz @@ -65,7 +58,7 @@ LIBS = -lelf -lz export srctree OUTPUT CFLAGS Q include $(srctree)/tools/build/Makefile.include -$(BINARY_IN): $(BPFOBJ) fixdep FORCE | $(OUTPUT) +$(BINARY_IN): fixdep FORCE | $(OUTPUT) $(Q)$(MAKE) $(build)=resolve_btfids $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) @@ -75,8 +68,7 @@ $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) clean_objects := $(wildcard $(OUTPUT)/*.o \ $(OUTPUT)/.*.o.cmd \ $(OUTPUT)/.*.o.d \ - $(LIBBPF_OUT) \ - $(LIBBPF_DESTDIR) \ + $(OUTPUT)/libbpf \ $(OUTPUT)/libsubcmd \ $(OUTPUT)/resolve_btfids) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 5d26f3c6f9..45e0d64061 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -60,8 +60,8 @@ #include #include #include -#include -#include +#include +#include #include #define BTF_IDS_SECTION ".BTF_ids" @@ -83,14 +83,12 @@ struct btf_id { int cnt; }; int addr_cnt; - bool is_set; Elf64_Addr addr[ADDR_CNT]; }; struct object { const char *path; const char *btf; - const char *base_btf_path; struct { int fd; @@ -452,10 +450,8 @@ static int symbols_collect(struct object *obj) * in symbol's size, together with 'cnt' field hence * that - 1. */ - if (id) { + if (id) id->cnt = sym.st_size / sizeof(int) - 1; - id->is_set = true; - } } else { pr_err("FAILED unsupported prefix %s\n", prefix); return -1; @@ -481,36 +477,25 @@ static int symbols_resolve(struct object *obj) int nr_structs = obj->nr_structs; int nr_unions = obj->nr_unions; int nr_funcs = obj->nr_funcs; - struct btf *base_btf = NULL; int err, type_id; struct btf *btf; __u32 nr_types; - if (obj->base_btf_path) { - base_btf = btf__parse(obj->base_btf_path, NULL); - err = libbpf_get_error(base_btf); - if (err) { - pr_err("FAILED: load base BTF from %s: %s\n", - obj->base_btf_path, strerror(-err)); - return -1; - } - } - - btf = btf__parse_split(obj->btf ?: obj->path, base_btf); + btf = btf__parse(obj->btf ?: obj->path, NULL); err = libbpf_get_error(btf); if (err) { pr_err("FAILED: load BTF from %s: %s\n", obj->btf ?: obj->path, strerror(-err)); - goto out; + return -1; } err = -1; - nr_types = btf__type_cnt(btf); + nr_types = btf__get_nr_types(btf); /* * Iterate all the BTF types and search for collected symbol IDs. */ - for (type_id = 1; type_id < nr_types; type_id++) { + for (type_id = 1; type_id <= nr_types; type_id++) { const struct btf_type *type; struct rb_root *root; struct btf_id *id; @@ -560,7 +545,6 @@ static int symbols_resolve(struct object *obj) err = 0; out: - btf__free(base_btf); btf__free(btf); return err; } @@ -571,8 +555,9 @@ static int id_patch(struct object *obj, struct btf_id *id) int *ptr = data->d_buf; int i; - if (!id->id && !id->is_set) + if (!id->id) { pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); + } for (i = 0; i < id->addr_cnt; i++) { unsigned long addr = id->addr[i]; @@ -693,6 +678,7 @@ static const char * const resolve_btfids_usage[] = { int main(int argc, const char **argv) { + bool no_fail = false; struct object obj = { .efile = { .idlist_shndx = -1, @@ -709,8 +695,8 @@ int main(int argc, const char **argv) "be more verbose (show errors, etc)"), OPT_STRING(0, "btf", &obj.btf, "BTF data", "BTF data"), - OPT_STRING('b', "btf_base", &obj.base_btf_path, "file", - "path of file providing base BTF"), + OPT_BOOLEAN(0, "no-fail", &no_fail, + "do not fail if " BTF_IDS_SECTION " section is not found"), OPT_END() }; int err = -1; @@ -731,7 +717,9 @@ int main(int argc, const char **argv) */ if (obj.efile.idlist_shndx == -1 || obj.efile.symbols_shndx == -1) { - pr_debug("Cannot find .BTF_ids or symbols sections, nothing to do\n"); + if (no_fail) + return 0; + pr_err("FAILED to find needed sections\n"); err = 0; goto out; } diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index da6de16a3d..3818ec511f 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -9,10 +9,10 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a -BPF_DESTDIR := $(BPFOBJ_OUTPUT) -BPF_INCLUDE := $(BPF_DESTDIR)/include -INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi) -CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS) +BPF_INCLUDE := $(BPFOBJ_OUTPUT) +INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) \ + -I$(abspath ../../include/uapi) +CFLAGS := -g -Wall # Try to detect best kernel BTF source KERNEL_REL := $(shell uname -r) @@ -33,7 +33,7 @@ endif .DELETE_ON_ERROR: -.PHONY: all clean runqslower libbpf_hdrs +.PHONY: all clean runqslower all: runqslower runqslower: $(OUTPUT)/runqslower @@ -46,15 +46,13 @@ clean: $(Q)$(RM) $(OUTPUT)runqslower $(Q)$(RM) -r .output -libbpf_hdrs: $(BPFOBJ) - $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) $(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ - $(OUTPUT)/runqslower.bpf.o | libbpf_hdrs + $(OUTPUT)/runqslower.bpf.o -$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h | libbpf_hdrs +$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@ @@ -83,9 +81,8 @@ else endif $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \ - DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@ -$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) +$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ - ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) + CC=$(HOSTCC) LD=$(HOSTLD) diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index 9a5c1f008f..ab9353f2fd 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -68,7 +68,7 @@ int handle__sched_switch(u64 *ctx) */ struct task_struct *prev = (struct task_struct *)ctx[1]; struct task_struct *next = (struct task_struct *)ctx[2]; - struct runq_event event = {}; + struct event event = {}; u64 *tsp, delta_us; long state; u32 pid; diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c index d78f414859..d897158449 100644 --- a/tools/bpf/runqslower/runqslower.c +++ b/tools/bpf/runqslower/runqslower.c @@ -100,7 +100,7 @@ static int bump_memlock_rlimit(void) void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) { - const struct runq_event *e = data; + const struct event *e = data; struct tm *tm; char ts[32]; time_t t; @@ -123,6 +123,7 @@ int main(int argc, char **argv) .parser = parse_arg, .doc = argp_program_doc, }; + struct perf_buffer_opts pb_opts; struct perf_buffer *pb = NULL; struct runqslower_bpf *obj; int err; @@ -164,8 +165,9 @@ int main(int argc, char **argv) printf("Tracing run queue latency higher than %llu us\n", env.min_us); printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)"); - pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, - handle_event, handle_lost_events, NULL, NULL); + pb_opts.sample_cb = handle_event; + pb_opts.lost_cb = handle_lost_events; + pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts); err = libbpf_get_error(pb); if (err) { pb = NULL; diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h index 4f70f07200..9db225425e 100644 --- a/tools/bpf/runqslower/runqslower.h +++ b/tools/bpf/runqslower/runqslower.h @@ -4,7 +4,7 @@ #define TASK_COMM_LEN 16 -struct runq_event { +struct event { char task[TASK_COMM_LEN]; __u64 delta_us; pid_t pid; diff --git a/tools/build/Build.include b/tools/build/Build.include index c2a95ab473..2cf3b1bde8 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -99,7 +99,7 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX ### ## HOSTCC C flags -host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj)) +host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(KBUILD_HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj)) # output directory for tests below TMPOUT = .tmp_$$$$ diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index ae61f46404..88dd7db55d 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -51,7 +51,6 @@ FEATURE_TESTS_BASIC := \ libslang \ libslang-include-subdir \ libtraceevent \ - libtracefs \ libcrypto \ libunwind \ pthread-attr-setaffinity-np \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 1480910c79..e1e670014b 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -35,7 +35,6 @@ FILES= \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libtraceevent.bin \ - test-libtracefs.bin \ test-libcrypto.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ @@ -90,7 +89,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$( ############################### $(OUTPUT)test-all.bin: - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap $(OUTPUT)test-hello.bin: $(BUILD) @@ -199,9 +198,6 @@ $(OUTPUT)test-libslang-include-subdir.bin: $(OUTPUT)test-libtraceevent.bin: $(BUILD) -ltraceevent -$(OUTPUT)test-libtracefs.bin: - $(BUILD) -ltracefs - $(OUTPUT)test-libcrypto.bin: $(BUILD) -lcrypto @@ -296,7 +292,7 @@ $(OUTPUT)test-jvmti-cmlr.bin: $(BUILD) $(OUTPUT)test-llvm.bin: - $(BUILDXX) -std=gnu++14 \ + $(BUILDXX) -std=gnu++11 \ -I$(shell $(LLVM_CONFIG) --includedir) \ -L$(shell $(LLVM_CONFIG) --libdir) \ $(shell $(LLVM_CONFIG) --libs Core BPF) \ @@ -304,12 +300,12 @@ $(OUTPUT)test-llvm.bin: > $(@:.bin=.make.output) 2>&1 $(OUTPUT)test-llvm-version.bin: - $(BUILDXX) -std=gnu++14 \ + $(BUILDXX) -std=gnu++11 \ -I$(shell $(LLVM_CONFIG) --includedir) \ > $(@:.bin=.make.output) 2>&1 $(OUTPUT)test-clang.bin: - $(BUILDXX) -std=gnu++14 \ + $(BUILDXX) -std=gnu++11 \ -I$(shell $(LLVM_CONFIG) --includedir) \ -L$(shell $(LLVM_CONFIG) --libdir) \ -Wl,--start-group -lclangBasic -lclangDriver \ diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c index 727d22e34a..82070eadfc 100644 --- a/tools/build/feature/test-bpf.c +++ b/tools/build/feature/test-bpf.c @@ -14,12 +14,6 @@ # define __NR_bpf 349 # elif defined(__s390__) # define __NR_bpf 351 -# elif defined(__mips__) && defined(_ABIO32) -# define __NR_bpf 4355 -# elif defined(__mips__) && defined(_ABIN32) -# define __NR_bpf 6319 -# elif defined(__mips__) && defined(_ABI64) -# define __NR_bpf 5315 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py index 1d3a90d93f..1600b17dbb 100644 --- a/tools/cgroup/memcg_slabinfo.py +++ b/tools/cgroup/memcg_slabinfo.py @@ -11,7 +11,7 @@ from drgn.helpers.linux import list_for_each_entry, list_empty from drgn.helpers.linux import for_each_page from drgn.helpers.linux.cpumask import for_each_online_cpu from drgn.helpers.linux.percpu import per_cpu_ptr -from drgn import container_of, FaultError, Object, cast +from drgn import container_of, FaultError, Object DESC = """ @@ -69,15 +69,15 @@ def oo_objects(s): def count_partial(n, fn): - nr_objs = 0 - for slab in list_for_each_entry('struct slab', n.partial.address_of_(), - 'slab_list'): - nr_objs += fn(slab) - return nr_objs + nr_pages = 0 + for page in list_for_each_entry('struct page', n.partial.address_of_(), + 'lru'): + nr_pages += fn(page) + return nr_pages -def count_free(slab): - return slab.objects - slab.inuse +def count_free(page): + return page.objects - page.inuse def slub_get_slabinfo(s, cfg): @@ -145,14 +145,14 @@ def detect_kernel_config(): return cfg -def for_each_slab(prog): +def for_each_slab_page(prog): PGSlab = 1 << prog.constant('PG_slab') PGHead = 1 << prog.constant('PG_head') for page in for_each_page(prog): try: if page.flags.value_() & PGSlab: - yield cast('struct slab *', page) + yield page except FaultError: pass @@ -190,13 +190,13 @@ def main(): 'list'): obj_cgroups.add(ptr.value_()) - # look over all slab folios and look for objects belonging - # to the given memory cgroup - for slab in for_each_slab(prog): - objcg_vec_raw = slab.memcg_data.value_() + # look over all slab pages, belonging to non-root memcgs + # and look for objects belonging to the given memory cgroup + for page in for_each_slab_page(prog): + objcg_vec_raw = page.memcg_data.value_() if objcg_vec_raw == 0: continue - cache = slab.slab_cache + cache = page.slab_cache if not cache: continue addr = cache.value_() diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index b94a16ba5c..0076437f6e 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -279,7 +279,6 @@ static void print_event(struct iio_event_data *event) printf(", direction: %s", iio_ev_dir_text[dir]); printf("\n"); - fflush(stdout); } /* Enable or disable events in sysfs if the knob is available */ diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h index 9ab313e935..5d2ab38965 100644 --- a/tools/include/asm-generic/bitops.h +++ b/tools/include/asm-generic/bitops.h @@ -18,6 +18,7 @@ #include #include #include +#include #ifndef _TOOLS_LINUX_BITOPS_H_ #error only can be included directly diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index ea97804d04..95611df1d2 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -1,10 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _TOOLS_LINUX_BITMAP_H -#define _TOOLS_LINUX_BITMAP_H +#ifndef _PERF_BITOPS_H +#define _PERF_BITOPS_H #include #include -#include #include #include @@ -182,4 +181,4 @@ static inline int bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } -#endif /* _TOOLS_LINUX_BITMAP_H */ +#endif /* _PERF_BITOPS_H */ diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h index 38edaa08f8..ad6fa21d97 100644 --- a/tools/include/linux/hash.h +++ b/tools/include/linux/hash.h @@ -62,7 +62,10 @@ static inline u32 __hash_32_generic(u32 val) return val * GOLDEN_RATIO_32; } -static inline u32 hash_32(u32 val, unsigned int bits) +#ifndef HAVE_ARCH_HASH_32 +#define hash_32 hash_32_generic +#endif +static inline u32 hash_32_generic(u32 val, unsigned int bits) { /* High bits are more random, so use them. */ return __hash_32(val) >> (32 - bits); diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 9701e8307d..a7e54a08fb 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -15,6 +14,8 @@ #define UINT_MAX (~0U) #endif +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) #define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) @@ -51,6 +52,15 @@ _min1 < _min2 ? _min1 : _min2; }) #endif +#ifndef roundup +#define roundup(x, y) ( \ +{ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#endif + #ifndef BUG_ON #ifdef NDEBUG #define BUG_ON(cond) do { if (cond) {} } while (0) @@ -92,9 +102,17 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); int scnprintf(char * buf, size_t size, const char * fmt, ...); int scnprintf_pad(char * buf, size_t size, const char * fmt, ...); -#ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) -#endif + +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) #define current_gfp_context(k) 0 #define synchronize_rcu() diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index aca52db2f3..7e72d975cb 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -66,17 +66,6 @@ struct unwind_hint { static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func -/* - * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore - * for the case where a function is intentionally missing frame pointer setup, - * but otherwise needs objtool/ORC coverage when frame pointers are disabled. - */ -#ifdef CONFIG_FRAME_POINTER -#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func) -#else -#define STACK_FRAME_NON_STANDARD_FP(func) -#endif - #else /* __ASSEMBLY__ */ /* @@ -138,7 +127,6 @@ struct unwind_hint { #define UNWIND_HINT(sp_reg, sp_offset, type, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) -#define STACK_FRAME_NON_STANDARD_FP(func) #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index 622266b197..c934572d93 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -37,4 +37,6 @@ static inline bool arch_spin_is_locked(arch_spinlock_t *mutex) return true; } +#include + #endif diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index c1c285fe49..3e2c6f2ed5 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -265,17 +265,12 @@ struct stat { * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively * - the system call is performed by calling the syscall instruction * - syscall return comes in rax - * - rcx and r11 are clobbered, others are preserved. + * - rcx and r8..r11 may be clobbered, others are preserved. * - the arguments are cast to long and assigned into the target registers * which are then simply passed as registers to the asm code, so that we * don't have to experience issues with register constraints. * - the syscall number is always specified last in order to allow to force * some registers before (gcc refuses a %-register at the last position). - * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1 - * Calling Conventions. - * - * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI - * */ #define my_syscall0(num) \ @@ -285,9 +280,9 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a"(_ret) \ + : "=a" (_ret) \ : "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -300,10 +295,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a"(_ret) \ + : "=a" (_ret) \ : "r"(_arg1), \ "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -317,10 +312,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a"(_ret) \ + : "=a" (_ret) \ : "r"(_arg1), "r"(_arg2), \ "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -335,10 +330,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a"(_ret) \ + : "=a" (_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -354,10 +349,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a"(_ret) \ + : "=a" (_ret), "=r"(_arg4) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ + : "rcx", "r8", "r9", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -374,10 +369,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a"(_ret) \ + : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ + : "rcx", "r9", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -395,7 +390,7 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a"(_ret) \ + : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ "r"(_arg6), "0"(_num) \ : "rcx", "r11", "memory", "cc" \ @@ -420,7 +415,7 @@ asm(".section .text\n" "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call "call main\n" // main() returns the status code, we'll exit with it. "mov %eax, %edi\n" // retrieve exit code (32 bit) - "mov $60, %eax\n" // NR_exit == 60 + "mov $60, %rax\n" // NR_exit == 60 "syscall\n" // really exit "hlt\n" // ensure it does not return ""); @@ -1571,12 +1566,6 @@ pid_t sys_getpid(void) return my_syscall0(__NR_getpid); } -static __attribute__((unused)) -pid_t sys_gettid(void) -{ - return my_syscall0(__NR_gettid); -} - static __attribute__((unused)) int sys_gettimeofday(struct timeval *tv, struct timezone *tz) { @@ -2035,18 +2024,6 @@ pid_t getpid(void) return ret; } -static __attribute__((unused)) -pid_t gettid(void) -{ - pid_t ret = sys_gettid(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - static __attribute__((unused)) int gettimeofday(struct timeval *tv, struct timezone *tz) { diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 1c48b0ae3b..1c5fb86d45 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -880,14 +880,8 @@ __SYSCALL(__NR_memfd_secret, sys_memfd_secret) #define __NR_process_mrelease 448 __SYSCALL(__NR_process_mrelease, sys_process_mrelease) -#define __NR_futex_waitv 449 -__SYSCALL(__NR_futex_waitv, sys_futex_waitv) - -#define __NR_set_mempolicy_home_node 450 -__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) - #undef __NR_syscalls -#define __NR_syscalls 451 +#define __NR_syscalls 449 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 642808520d..3b810b53ba 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1096,24 +1096,6 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) -/** - * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. - * - * This queries metadata about a framebuffer. User-space fills - * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the - * struct as the output. - * - * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles - * will be filled with GEM buffer handles. Planes are valid until one has a - * zero handle -- this can be used to compute the number of planes. - * - * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid - * until one has a zero &drm_mode_fb_cmd2.pitches. - * - * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set - * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the - * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. - */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) /* diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 914ebd9290..bde5860b36 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -1522,12 +1522,6 @@ struct drm_i915_gem_caching { #define I915_TILING_NONE 0 #define I915_TILING_X 1 #define I915_TILING_Y 2 -/* - * Do not add new tiling types here. The I915_TILING_* values are for - * de-tiling fence registers that no longer exist on modern platforms. Although - * the hardware may support new types of tiling in general (e.g., Tile4), we - * do not need to add them to the uapi that is specific to now-defunct ioctls. - */ #define I915_TILING_LAST I915_TILING_Y #define I915_BIT_6_SWIZZLE_NONE 0 @@ -1830,7 +1824,6 @@ struct drm_i915_gem_context_param { * Extensions: * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) - * i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT) */ #define I915_CONTEXT_PARAM_ENGINES 0xa @@ -1853,55 +1846,6 @@ struct drm_i915_gem_context_param { * attempted to use it, never re-use this context param number. */ #define I915_CONTEXT_PARAM_RINGSIZE 0xc - -/* - * I915_CONTEXT_PARAM_PROTECTED_CONTENT: - * - * Mark that the context makes use of protected content, which will result - * in the context being invalidated when the protected content session is. - * Given that the protected content session is killed on suspend, the device - * is kept awake for the lifetime of a protected context, so the user should - * make sure to dispose of them once done. - * This flag can only be set at context creation time and, when set to true, - * must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE - * to false. This flag can't be set to true in conjunction with setting the - * I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example: - * - * .. code-block:: C - * - * struct drm_i915_gem_context_create_ext_setparam p_protected = { - * .base = { - * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, - * }, - * .param = { - * .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT, - * .value = 1, - * } - * }; - * struct drm_i915_gem_context_create_ext_setparam p_norecover = { - * .base = { - * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, - * .next_extension = to_user_pointer(&p_protected), - * }, - * .param = { - * .param = I915_CONTEXT_PARAM_RECOVERABLE, - * .value = 0, - * } - * }; - * struct drm_i915_gem_context_create_ext create = { - * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, - * .extensions = to_user_pointer(&p_norecover); - * }; - * - * ctx_id = gem_context_create_ext(drm_fd, &create); - * - * In addition to the normal failure cases, setting this flag during context - * creation can result in the following errors: - * - * -ENODEV: feature not available - * -EPERM: trying to mark a recoverable or not bannable context as protected - */ -#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd /* Must be kept compact -- no holes and well documented */ __u64 value; @@ -2105,135 +2049,6 @@ struct i915_context_engines_bond { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ -/** - * struct i915_context_engines_parallel_submit - Configure engine for - * parallel submission. - * - * Setup a slot in the context engine map to allow multiple BBs to be submitted - * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU - * in parallel. Multiple hardware contexts are created internally in the i915 to - * run these BBs. Once a slot is configured for N BBs only N BBs can be - * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user - * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how - * many BBs there are based on the slot's configuration. The N BBs are the last - * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. - * - * The default placement behavior is to create implicit bonds between each - * context if each context maps to more than 1 physical engine (e.g. context is - * a virtual engine). Also we only allow contexts of same engine class and these - * contexts must be in logically contiguous order. Examples of the placement - * behavior are described below. Lastly, the default is to not allow BBs to be - * preempted mid-batch. Rather insert coordinated preemption points on all - * hardware contexts between each set of BBs. Flags could be added in the future - * to change both of these default behaviors. - * - * Returns -EINVAL if hardware context placement configuration is invalid or if - * the placement configuration isn't supported on the platform / submission - * interface. - * Returns -ENODEV if extension isn't supported on the platform / submission - * interface. - * - * .. code-block:: none - * - * Examples syntax: - * CS[X] = generic engine of same class, logical instance X - * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE - * - * Example 1 pseudo code: - * set_engines(INVALID) - * set_parallel(engine_index=0, width=2, num_siblings=1, - * engines=CS[0],CS[1]) - * - * Results in the following valid placement: - * CS[0], CS[1] - * - * Example 2 pseudo code: - * set_engines(INVALID) - * set_parallel(engine_index=0, width=2, num_siblings=2, - * engines=CS[0],CS[2],CS[1],CS[3]) - * - * Results in the following valid placements: - * CS[0], CS[1] - * CS[2], CS[3] - * - * This can be thought of as two virtual engines, each containing two - * engines thereby making a 2D array. However, there are bonds tying the - * entries together and placing restrictions on how they can be scheduled. - * Specifically, the scheduler can choose only vertical columns from the 2D - * array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the - * scheduler wants to submit to CS[0], it must also choose CS[1] and vice - * versa. Same for CS[2] requires also using CS[3]. - * VE[0] = CS[0], CS[2] - * VE[1] = CS[1], CS[3] - * - * Example 3 pseudo code: - * set_engines(INVALID) - * set_parallel(engine_index=0, width=2, num_siblings=2, - * engines=CS[0],CS[1],CS[1],CS[3]) - * - * Results in the following valid and invalid placements: - * CS[0], CS[1] - * CS[1], CS[3] - Not logically contiguous, return -EINVAL - */ -struct i915_context_engines_parallel_submit { - /** - * @base: base user extension. - */ - struct i915_user_extension base; - - /** - * @engine_index: slot for parallel engine - */ - __u16 engine_index; - - /** - * @width: number of contexts per parallel engine or in other words the - * number of batches in each submission - */ - __u16 width; - - /** - * @num_siblings: number of siblings per context or in other words the - * number of possible placements for each submission - */ - __u16 num_siblings; - - /** - * @mbz16: reserved for future use; must be zero - */ - __u16 mbz16; - - /** - * @flags: all undefined flags must be zero, currently not defined flags - */ - __u64 flags; - - /** - * @mbz64: reserved for future use; must be zero - */ - __u64 mbz64[3]; - - /** - * @engines: 2-d array of engine instances to configure parallel engine - * - * length = width (i) * num_siblings (j) - * index = j + i * num_siblings - */ - struct i915_engine_class_instance engines[0]; - -} __packed; - -#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \ - struct i915_user_extension base; \ - __u16 engine_index; \ - __u16 width; \ - __u16 num_siblings; \ - __u16 mbz16; \ - __u64 flags; \ - __u64 mbz64[3]; \ - struct i915_engine_class_instance engines[N__]; \ -} __attribute__((packed)) name__ - /** * DOC: Context Engine Map uAPI * @@ -2293,7 +2108,6 @@ struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ -#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ struct i915_engine_class_instance engines[0]; } __attribute__((packed)); @@ -2912,20 +2726,14 @@ struct drm_i915_engine_info { /** @flags: Engine flags. */ __u64 flags; -#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0) /** @capabilities: Capabilities of this engine. */ __u64 capabilities; #define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0) #define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1) - /** @logical_instance: Logical instance of engine */ - __u16 logical_instance; - /** @rsvd1: Reserved fields. */ - __u16 rsvd1[3]; - /** @rsvd2: Reserved fields. */ - __u64 rsvd2[3]; + __u64 rsvd1[4]; }; /** @@ -3171,12 +2979,8 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see * struct drm_i915_gem_create_ext_memory_regions. - * - * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see - * struct drm_i915_gem_create_ext_protected_content. */ #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 -#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1 __u64 extensions; }; @@ -3234,50 +3038,6 @@ struct drm_i915_gem_create_ext_memory_regions { __u64 regions; }; -/** - * struct drm_i915_gem_create_ext_protected_content - The - * I915_OBJECT_PARAM_PROTECTED_CONTENT extension. - * - * If this extension is provided, buffer contents are expected to be protected - * by PXP encryption and require decryption for scan out and processing. This - * is only possible on platforms that have PXP enabled, on all other scenarios - * using this extension will cause the ioctl to fail and return -ENODEV. The - * flags parameter is reserved for future expansion and must currently be set - * to zero. - * - * The buffer contents are considered invalid after a PXP session teardown. - * - * The encryption is guaranteed to be processed correctly only if the object - * is submitted with a context created using the - * I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks - * at submission time on the validity of the objects involved. - * - * Below is an example on how to create a protected object: - * - * .. code-block:: C - * - * struct drm_i915_gem_create_ext_protected_content protected_ext = { - * .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT }, - * .flags = 0, - * }; - * struct drm_i915_gem_create_ext create_ext = { - * .size = PAGE_SIZE, - * .extensions = (uintptr_t)&protected_ext, - * }; - * - * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); - * if (err) ... - */ -struct drm_i915_gem_create_ext_protected_content { - /** @base: Extension link. See struct i915_user_extension. */ - struct i915_user_extension base; - /** @flags: reserved for future usage, currently MBZ */ - __u32 flags; -}; - -/* ID of the protected content session managed by i915 when PXP is active */ -#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf - #if defined(__cplusplus) } #endif diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b0383d371b..791f31dd0a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -906,7 +906,6 @@ enum bpf_map_type { BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, - BPF_MAP_TYPE_BLOOM_FILTER, }; /* Note that tracing related programs such as @@ -1275,13 +1274,6 @@ union bpf_attr { * struct stored as the * map value */ - /* Any per-map-type extra fields - * - * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the - * number of hash functions (if 0, the bloom filter will default - * to using 5 hash functions). - */ - __u64 map_extra; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -1342,10 +1334,8 @@ union bpf_attr { /* or valid module BTF object fd or 0 to attach to vmlinux */ __u32 attach_btf_obj_fd; }; - __u32 core_relo_cnt; /* number of bpf_core_relo */ + __u32 :32; /* pad */ __aligned_u64 fd_array; /* array of FDs */ - __aligned_u64 core_relos; - __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1639,7 +1629,7 @@ union bpf_attr { * u32 bpf_get_smp_processor_id(void) * Description * Get the SMP (symmetric multiprocessing) processor id. Note that - * all programs run with migration disabled, which means that the + * all programs run with preemption disabled, which means that the * SMP processor id is stable during all the execution of the * program. * Return @@ -1746,7 +1736,7 @@ union bpf_attr { * if the maximum number of tail calls has been reached for this * chain of programs. This limit is defined in the kernel by the * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), - * which is currently set to 33. + * which is currently set to 32. * Return * 0 on success, or a negative error in case of failure. * @@ -4056,7 +4046,7 @@ union bpf_attr { * arguments. The *data* are a **u64** array and corresponding format string * values are stored in the array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* array. - * The *data_len* is the size of *data* in bytes - must be a multiple of 8. + * The *data_len* is the size of *data* in bytes. * * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. * Reading kernel memory may fail due to either invalid address or @@ -4761,8 +4751,7 @@ union bpf_attr { * Each format specifier in **fmt** corresponds to one u64 element * in the **data** array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* - * array. The *data_len* is the size of *data* in bytes - must be - * a multiple of 8. + * array. The *data_len* is the size of *data* in bytes. * * Formats **%s** and **%p{i,I}{4,6}** require to read kernel * memory. Reading kernel memory may fail due to either invalid @@ -4888,136 +4877,6 @@ union bpf_attr { * Get the struct pt_regs associated with **task**. * Return * A pointer to struct pt_regs. - * - * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags) - * Description - * Get branch trace from hardware engines like Intel LBR. The - * hardware engine is stopped shortly after the helper is - * called. Therefore, the user need to filter branch entries - * based on the actual use case. To capture branch trace - * before the trigger point of the BPF program, the helper - * should be called at the beginning of the BPF program. - * - * The data is stored as struct perf_branch_entry into output - * buffer *entries*. *size* is the size of *entries* in bytes. - * *flags* is reserved for now and must be zero. - * - * Return - * On success, number of bytes written to *buf*. On error, a - * negative value. - * - * **-EINVAL** if *flags* is not zero. - * - * **-ENOENT** if architecture does not support branch records. - * - * long bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len) - * Description - * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 - * to format and can handle more format args as a result. - * - * Arguments are to be used as in **bpf_seq_printf**\ () helper. - * Return - * The number of bytes written to the buffer, or a negative error - * in case of failure. - * - * struct unix_sock *bpf_skc_to_unix_sock(void *sk) - * Description - * Dynamically cast a *sk* pointer to a *unix_sock* pointer. - * Return - * *sk* if casting is valid, or **NULL** otherwise. - * - * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res) - * Description - * Get the address of a kernel symbol, returned in *res*. *res* is - * set to 0 if the symbol is not found. - * Return - * On success, zero. On error, a negative value. - * - * **-EINVAL** if *flags* is not zero. - * - * **-EINVAL** if string *name* is not the same size as *name_sz*. - * - * **-ENOENT** if symbol is not found. - * - * **-EPERM** if caller does not have permission to obtain kernel address. - * - * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags) - * Description - * Find vma of *task* that contains *addr*, call *callback_fn* - * function with *task*, *vma*, and *callback_ctx*. - * The *callback_fn* should be a static function and - * the *callback_ctx* should be a pointer to the stack. - * The *flags* is used to control certain aspects of the helper. - * Currently, the *flags* must be 0. - * - * The expected callback signature is - * - * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); - * - * Return - * 0 on success. - * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. - * **-EBUSY** if failed to try lock mmap_lock. - * **-EINVAL** for invalid **flags**. - * - * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) - * Description - * For **nr_loops**, call **callback_fn** function - * with **callback_ctx** as the context parameter. - * The **callback_fn** should be a static function and - * the **callback_ctx** should be a pointer to the stack. - * The **flags** is used to control certain aspects of the helper. - * Currently, the **flags** must be 0. Currently, nr_loops is - * limited to 1 << 23 (~8 million) loops. - * - * long (\*callback_fn)(u32 index, void \*ctx); - * - * where **index** is the current index in the loop. The index - * is zero-indexed. - * - * If **callback_fn** returns 0, the helper will continue to the next - * loop. If return value is 1, the helper will skip the rest of - * the loops and return. Other return values are not used now, - * and will be rejected by the verifier. - * - * Return - * The number of loops performed, **-EINVAL** for invalid **flags**, - * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. - * - * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2) - * Description - * Do strncmp() between **s1** and **s2**. **s1** doesn't need - * to be null-terminated and **s1_sz** is the maximum storage - * size of **s1**. **s2** must be a read-only string. - * Return - * An integer less than, equal to, or greater than zero - * if the first **s1_sz** bytes of **s1** is found to be - * less than, to match, or be greater than **s2**. - * - * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) - * Description - * Get **n**-th argument (zero based) of the traced function (for tracing programs) - * returned in **value**. - * - * Return - * 0 on success. - * **-EINVAL** if n >= arguments count of traced function. - * - * long bpf_get_func_ret(void *ctx, u64 *value) - * Description - * Get return value of the traced function (for tracing programs) - * in **value**. - * - * Return - * 0 on success. - * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. - * - * long bpf_get_func_arg_cnt(void *ctx) - * Description - * Get number of arguments of the traced function (for tracing programs). - * - * Return - * The number of arguments of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5196,16 +5055,6 @@ union bpf_attr { FN(get_func_ip), \ FN(get_attach_cookie), \ FN(task_pt_regs), \ - FN(get_branch_snapshot), \ - FN(trace_vprintk), \ - FN(skc_to_unix_sock), \ - FN(kallsyms_lookup_name), \ - FN(find_vma), \ - FN(loop), \ - FN(strncmp), \ - FN(get_func_arg), \ - FN(get_func_ret), \ - FN(get_func_arg_cnt), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5435,8 +5284,6 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; - __u32 :32; /* Padding, future use. */ - __u64 hwtstamp; }; struct bpf_tunnel_key { @@ -5730,7 +5577,6 @@ struct bpf_prog_info { __u64 run_time_ns; __u64 run_cnt; __u64 recursion_misses; - __u32 verified_insns; } __attribute__((aligned(8))); struct bpf_map_info { @@ -5748,8 +5594,6 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; - __u32 :32; /* alignment pad */ - __u64 map_extra; } __attribute__((aligned(8))); struct bpf_btf_info { @@ -6382,7 +6226,6 @@ struct bpf_sk_lookup { __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ - __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */ }; /* @@ -6415,78 +6258,4 @@ enum { BTF_F_ZERO = (1ULL << 3), }; -/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value - * has to be adjusted by relocations. It is emitted by llvm and passed to - * libbpf and later to the kernel. - */ -enum bpf_core_relo_kind { - BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */ - BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */ - BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */ - BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ - BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ - BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ - BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ - BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */ - BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */ - BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ - BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ - BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ -}; - -/* - * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf - * and from libbpf to the kernel. - * - * CO-RE relocation captures the following data: - * - insn_off - instruction offset (in bytes) within a BPF program that needs - * its insn->imm field to be relocated with actual field info; - * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - * type or field; - * - access_str_off - offset into corresponding .BTF string section. String - * interpretation depends on specific relocation kind: - * - for field-based relocations, string encodes an accessed field using - * a sequence of field and array indices, separated by colon (:). It's - * conceptually very close to LLVM's getelementptr ([0]) instruction's - * arguments for identifying offset to a field. - * - for type-based relocations, strings is expected to be just "0"; - * - for enum value-based relocations, string contains an index of enum - * value within its enum type; - * - kind - one of enum bpf_core_relo_kind; - * - * Example: - * struct sample { - * int a; - * struct { - * int b[10]; - * }; - * }; - * - * struct sample *s = ...; - * int *x = &s->a; // encoded as "0:0" (a is field #0) - * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, - * // b is field #0 inside anon struct, accessing elem #5) - * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) - * - * type_id for all relocs in this example will capture BTF type id of - * `struct sample`. - * - * Such relocation is emitted when using __builtin_preserve_access_index() - * Clang built-in, passing expression that captures field address, e.g.: - * - * bpf_probe_read(&dst, sizeof(dst), - * __builtin_preserve_access_index(&src->a.b.c)); - * - * In this case Clang will emit field relocation recording necessary data to - * be able to find offset of embedded `a.b.c` field within `src` struct. - * - * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction - */ -struct bpf_core_relo { - __u32 insn_off; - __u32 type_id; - __u32 access_str_off; - enum bpf_core_relo_kind kind; -}; - #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index b0d8fea195..d27b1708ef 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG. + * FUNC, FUNC_PROTO and VAR. * "type" is a type_id referring to another type. */ union { @@ -56,30 +56,25 @@ struct btf_type { #define BTF_INFO_VLEN(info) ((info) & 0xffff) #define BTF_INFO_KFLAG(info) ((info) >> 31) -enum { - BTF_KIND_UNKN = 0, /* Unknown */ - BTF_KIND_INT = 1, /* Integer */ - BTF_KIND_PTR = 2, /* Pointer */ - BTF_KIND_ARRAY = 3, /* Array */ - BTF_KIND_STRUCT = 4, /* Struct */ - BTF_KIND_UNION = 5, /* Union */ - BTF_KIND_ENUM = 6, /* Enumeration */ - BTF_KIND_FWD = 7, /* Forward */ - BTF_KIND_TYPEDEF = 8, /* Typedef */ - BTF_KIND_VOLATILE = 9, /* Volatile */ - BTF_KIND_CONST = 10, /* Const */ - BTF_KIND_RESTRICT = 11, /* Restrict */ - BTF_KIND_FUNC = 12, /* Function */ - BTF_KIND_FUNC_PROTO = 13, /* Function Proto */ - BTF_KIND_VAR = 14, /* Variable */ - BTF_KIND_DATASEC = 15, /* Section */ - BTF_KIND_FLOAT = 16, /* Floating point */ - BTF_KIND_DECL_TAG = 17, /* Decl Tag */ - BTF_KIND_TYPE_TAG = 18, /* Type Tag */ - - NR_BTF_KINDS, - BTF_KIND_MAX = NR_BTF_KINDS - 1, -}; +#define BTF_KIND_UNKN 0 /* Unknown */ +#define BTF_KIND_INT 1 /* Integer */ +#define BTF_KIND_PTR 2 /* Pointer */ +#define BTF_KIND_ARRAY 3 /* Array */ +#define BTF_KIND_STRUCT 4 /* Struct */ +#define BTF_KIND_UNION 5 /* Union */ +#define BTF_KIND_ENUM 6 /* Enumeration */ +#define BTF_KIND_FWD 7 /* Forward */ +#define BTF_KIND_TYPEDEF 8 /* Typedef */ +#define BTF_KIND_VOLATILE 9 /* Volatile */ +#define BTF_KIND_CONST 10 /* Const */ +#define BTF_KIND_RESTRICT 11 /* Restrict */ +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#define BTF_KIND_VAR 14 /* Variable */ +#define BTF_KIND_DATASEC 15 /* Section */ +#define BTF_KIND_FLOAT 16 /* Floating point */ +#define BTF_KIND_MAX BTF_KIND_FLOAT +#define NR_BTF_KINDS (BTF_KIND_MAX + 1) /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -175,15 +170,4 @@ struct btf_var_secinfo { __u32 size; }; -/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe - * additional information related to the tag applied location. - * If component_idx == -1, the tag is applied to a struct, union, - * variable or function. Otherwise, it is applied to a struct/union - * member or a func argument, and component_idx indicates which member - * or argument (0 ... vlen-1). - */ -struct btf_decl_tag { - __s32 component_idx; -}; - #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 6218f93f5c..b3610fdd1f 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -7,23 +7,24 @@ /* This struct should be in sync with struct rtnl_link_stats64 */ struct rtnl_link_stats { - __u32 rx_packets; - __u32 tx_packets; - __u32 rx_bytes; - __u32 tx_bytes; - __u32 rx_errors; - __u32 tx_errors; - __u32 rx_dropped; - __u32 tx_dropped; - __u32 multicast; + __u32 rx_packets; /* total packets received */ + __u32 tx_packets; /* total packets transmitted */ + __u32 rx_bytes; /* total bytes received */ + __u32 tx_bytes; /* total bytes transmitted */ + __u32 rx_errors; /* bad packets received */ + __u32 tx_errors; /* packet transmit problems */ + __u32 rx_dropped; /* no space in linux buffers */ + __u32 tx_dropped; /* no space available in linux */ + __u32 multicast; /* multicast packets received */ __u32 collisions; + /* detailed rx_errors: */ __u32 rx_length_errors; - __u32 rx_over_errors; - __u32 rx_crc_errors; - __u32 rx_frame_errors; - __u32 rx_fifo_errors; - __u32 rx_missed_errors; + __u32 rx_over_errors; /* receiver ring buff overflow */ + __u32 rx_crc_errors; /* recved pkt with crc error */ + __u32 rx_frame_errors; /* recv'd frame alignment error */ + __u32 rx_fifo_errors; /* recv'r fifo overrun */ + __u32 rx_missed_errors; /* receiver missed packet */ /* detailed tx_errors */ __u32 tx_aborted_errors; @@ -36,201 +37,29 @@ struct rtnl_link_stats { __u32 rx_compressed; __u32 tx_compressed; - __u32 rx_nohandler; + __u32 rx_nohandler; /* dropped, no handler found */ }; -/** - * struct rtnl_link_stats64 - The main device statistics structure. - * - * @rx_packets: Number of good packets received by the interface. - * For hardware interfaces counts all good packets received from the device - * by the host, including packets which host had to drop at various stages - * of processing (even in the driver). - * - * @tx_packets: Number of packets successfully transmitted. - * For hardware interfaces counts packets which host was able to successfully - * hand over to the device, which does not necessarily mean that packets - * had been successfully transmitted out of the device, only that device - * acknowledged it copied them out of host memory. - * - * @rx_bytes: Number of good received bytes, corresponding to @rx_packets. - * - * For IEEE 802.3 devices should count the length of Ethernet Frames - * excluding the FCS. - * - * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets. - * - * For IEEE 802.3 devices should count the length of Ethernet Frames - * excluding the FCS. - * - * @rx_errors: Total number of bad packets received on this network device. - * This counter must include events counted by @rx_length_errors, - * @rx_crc_errors, @rx_frame_errors and other errors not otherwise - * counted. - * - * @tx_errors: Total number of transmit problems. - * This counter must include events counter by @tx_aborted_errors, - * @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors, - * @tx_window_errors and other errors not otherwise counted. - * - * @rx_dropped: Number of packets received but not processed, - * e.g. due to lack of resources or unsupported protocol. - * For hardware interfaces this counter may include packets discarded - * due to L2 address filtering but should not include packets dropped - * by the device due to buffer exhaustion which are counted separately in - * @rx_missed_errors (since procfs folds those two counters together). - * - * @tx_dropped: Number of packets dropped on their way to transmission, - * e.g. due to lack of resources. - * - * @multicast: Multicast packets received. - * For hardware interfaces this statistic is commonly calculated - * at the device level (unlike @rx_packets) and therefore may include - * packets which did not reach the host. - * - * For IEEE 802.3 devices this counter may be equivalent to: - * - * - 30.3.1.1.21 aMulticastFramesReceivedOK - * - * @collisions: Number of collisions during packet transmissions. - * - * @rx_length_errors: Number of packets dropped due to invalid length. - * Part of aggregate "frame" errors in `/proc/net/dev`. - * - * For IEEE 802.3 devices this counter should be equivalent to a sum - * of the following attributes: - * - * - 30.3.1.1.23 aInRangeLengthErrors - * - 30.3.1.1.24 aOutOfRangeLengthField - * - 30.3.1.1.25 aFrameTooLongErrors - * - * @rx_over_errors: Receiver FIFO overflow event counter. - * - * Historically the count of overflow events. Such events may be - * reported in the receive descriptors or via interrupts, and may - * not correspond one-to-one with dropped packets. - * - * The recommended interpretation for high speed interfaces is - - * number of packets dropped because they did not fit into buffers - * provided by the host, e.g. packets larger than MTU or next buffer - * in the ring was not available for a scatter transfer. - * - * Part of aggregate "frame" errors in `/proc/net/dev`. - * - * This statistics was historically used interchangeably with - * @rx_fifo_errors. - * - * This statistic corresponds to hardware events and is not commonly used - * on software devices. - * - * @rx_crc_errors: Number of packets received with a CRC error. - * Part of aggregate "frame" errors in `/proc/net/dev`. - * - * For IEEE 802.3 devices this counter must be equivalent to: - * - * - 30.3.1.1.6 aFrameCheckSequenceErrors - * - * @rx_frame_errors: Receiver frame alignment errors. - * Part of aggregate "frame" errors in `/proc/net/dev`. - * - * For IEEE 802.3 devices this counter should be equivalent to: - * - * - 30.3.1.1.7 aAlignmentErrors - * - * @rx_fifo_errors: Receiver FIFO error counter. - * - * Historically the count of overflow events. Those events may be - * reported in the receive descriptors or via interrupts, and may - * not correspond one-to-one with dropped packets. - * - * This statistics was used interchangeably with @rx_over_errors. - * Not recommended for use in drivers for high speed interfaces. - * - * This statistic is used on software devices, e.g. to count software - * packet queue overflow (can) or sequencing errors (GRE). - * - * @rx_missed_errors: Count of packets missed by the host. - * Folded into the "drop" counter in `/proc/net/dev`. - * - * Counts number of packets dropped by the device due to lack - * of buffer space. This usually indicates that the host interface - * is slower than the network interface, or host is not keeping up - * with the receive packet rate. - * - * This statistic corresponds to hardware events and is not used - * on software devices. - * - * @tx_aborted_errors: - * Part of aggregate "carrier" errors in `/proc/net/dev`. - * For IEEE 802.3 devices capable of half-duplex operation this counter - * must be equivalent to: - * - * - 30.3.1.1.11 aFramesAbortedDueToXSColls - * - * High speed interfaces may use this counter as a general device - * discard counter. - * - * @tx_carrier_errors: Number of frame transmission errors due to loss - * of carrier during transmission. - * Part of aggregate "carrier" errors in `/proc/net/dev`. - * - * For IEEE 802.3 devices this counter must be equivalent to: - * - * - 30.3.1.1.13 aCarrierSenseErrors - * - * @tx_fifo_errors: Number of frame transmission errors due to device - * FIFO underrun / underflow. This condition occurs when the device - * begins transmission of a frame but is unable to deliver the - * entire frame to the transmitter in time for transmission. - * Part of aggregate "carrier" errors in `/proc/net/dev`. - * - * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for - * old half-duplex Ethernet. - * Part of aggregate "carrier" errors in `/proc/net/dev`. - * - * For IEEE 802.3 devices possibly equivalent to: - * - * - 30.3.2.1.4 aSQETestErrors - * - * @tx_window_errors: Number of frame transmission errors due - * to late collisions (for Ethernet - after the first 64B of transmission). - * Part of aggregate "carrier" errors in `/proc/net/dev`. - * - * For IEEE 802.3 devices this counter must be equivalent to: - * - * - 30.3.1.1.10 aLateCollisions - * - * @rx_compressed: Number of correctly received compressed packets. - * This counters is only meaningful for interfaces which support - * packet compression (e.g. CSLIP, PPP). - * - * @tx_compressed: Number of transmitted compressed packets. - * This counters is only meaningful for interfaces which support - * packet compression (e.g. CSLIP, PPP). - * - * @rx_nohandler: Number of packets received on the interface - * but dropped by the networking stack because the device is - * not designated to receive packets (e.g. backup link in a bond). - */ +/* The main device statistics structure */ struct rtnl_link_stats64 { - __u64 rx_packets; - __u64 tx_packets; - __u64 rx_bytes; - __u64 tx_bytes; - __u64 rx_errors; - __u64 tx_errors; - __u64 rx_dropped; - __u64 tx_dropped; - __u64 multicast; + __u64 rx_packets; /* total packets received */ + __u64 tx_packets; /* total packets transmitted */ + __u64 rx_bytes; /* total bytes received */ + __u64 tx_bytes; /* total bytes transmitted */ + __u64 rx_errors; /* bad packets received */ + __u64 tx_errors; /* packet transmit problems */ + __u64 rx_dropped; /* no space in linux buffers */ + __u64 tx_dropped; /* no space available in linux */ + __u64 multicast; /* multicast packets received */ __u64 collisions; /* detailed rx_errors: */ __u64 rx_length_errors; - __u64 rx_over_errors; - __u64 rx_crc_errors; - __u64 rx_frame_errors; - __u64 rx_fifo_errors; - __u64 rx_missed_errors; + __u64 rx_over_errors; /* receiver ring buff overflow */ + __u64 rx_crc_errors; /* recved pkt with crc error */ + __u64 rx_frame_errors; /* recv'd frame alignment error */ + __u64 rx_fifo_errors; /* recv'r fifo overrun */ + __u64 rx_missed_errors; /* receiver missed packet */ /* detailed tx_errors */ __u64 tx_aborted_errors; @@ -242,7 +71,8 @@ struct rtnl_link_stats64 { /* for cslip etc */ __u64 rx_compressed; __u64 tx_compressed; - __u64 rx_nohandler; + + __u64 rx_nohandler; /* dropped, no handler found */ }; /* The struct should be in sync with struct ifmap */ @@ -340,30 +170,12 @@ enum { IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, - IFLA_PROTO_DOWN_REASON, - - /* device (sysfs) name as parent, used instead - * of IFLA_LINK where there's no parent netdev - */ - IFLA_PARENT_DEV_NAME, - IFLA_PARENT_DEV_BUS_NAME, - IFLA_GRO_MAX_SIZE, - __IFLA_MAX }; #define IFLA_MAX (__IFLA_MAX - 1) -enum { - IFLA_PROTO_DOWN_REASON_UNSPEC, - IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */ - IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */ - - __IFLA_PROTO_DOWN_REASON_CNT, - IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1 -}; - /* backwards compatibility for userspace */ #ifndef __KERNEL__ #define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) @@ -481,7 +293,6 @@ enum { IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, - IFLA_BR_MCAST_QUERIER_STATE, __IFLA_BR_MAX, }; @@ -535,8 +346,6 @@ enum { IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, IFLA_BRPORT_MRP_IN_OPEN, - IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, - IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -624,7 +433,6 @@ enum macvlan_macaddr_mode { }; #define MACVLAN_FLAG_NOPROMISC 1 -#define MACVLAN_FLAG_NODST 2 /* skip dst macvlan if matching src macvlan */ /* VRF section */ enum { @@ -789,18 +597,6 @@ enum ifla_geneve_df { GENEVE_DF_MAX = __GENEVE_DF_END - 1, }; -/* Bareudp section */ -enum { - IFLA_BAREUDP_UNSPEC, - IFLA_BAREUDP_PORT, - IFLA_BAREUDP_ETHERTYPE, - IFLA_BAREUDP_SRCPORT_MIN, - IFLA_BAREUDP_MULTIPROTO_MODE, - __IFLA_BAREUDP_MAX -}; - -#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) - /* PPP section */ enum { IFLA_PPP_UNSPEC, @@ -859,7 +655,6 @@ enum { IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, IFLA_BOND_AD_LACP_ACTIVE, - IFLA_BOND_MISSED_MAX, __IFLA_BOND_MAX, }; @@ -1104,14 +899,7 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) -/* HSR/PRP section, both uses same interface */ - -/* Different redundancy protocols for hsr device */ -enum { - HSR_PROTOCOL_HSR, - HSR_PROTOCOL_PRP, - HSR_PROTOCOL_MAX, -}; +/* HSR section */ enum { IFLA_HSR_UNSPEC, @@ -1121,9 +909,6 @@ enum { IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ IFLA_HSR_SEQ_NR, IFLA_HSR_VERSION, /* HSR version */ - IFLA_HSR_PROTOCOL, /* Indicate different protocol than - * HSR. For example PRP. - */ __IFLA_HSR_MAX, }; @@ -1248,8 +1033,6 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) -#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) -#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5) enum { IFLA_RMNET_UNSPEC, @@ -1265,14 +1048,4 @@ struct ifla_rmnet_flags { __u32 mask; }; -/* MCTP section */ - -enum { - IFLA_MCTP_UNSPEC, - IFLA_MCTP_NET, - __IFLA_MCTP_MAX, -}; - -#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) - #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 507ee1f2aa..a067410ebe 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -269,7 +269,6 @@ struct kvm_xen_exit { #define KVM_EXIT_AP_RESET_HOLD 32 #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 -#define KVM_EXIT_RISCV_SBI 35 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -398,23 +397,13 @@ struct kvm_run { * "ndata" is correct, that new fields are enumerated in "flags", * and that each flag enumerates fields that are 64-bit aligned * and sized (so that ndata+internal.data[] is valid/accurate). - * - * Space beyond the defined fields may be used to store arbitrary - * debug information relating to the emulation failure. It is - * accounted for in "ndata" but the format is unspecified and is - * not represented in "flags". Any such information is *not* ABI! */ struct { __u32 suberror; __u32 ndata; __u64 flags; - union { - struct { - __u8 insn_size; - __u8 insn_bytes[15]; - }; - }; - /* Arbitrary debug data may follow. */ + __u8 insn_size; + __u8 insn_bytes[15]; } emulation_failure; /* KVM_EXIT_OSI */ struct { @@ -480,13 +469,6 @@ struct kvm_run { } msr; /* KVM_EXIT_XEN */ struct kvm_xen_exit xen; - /* KVM_EXIT_RISCV_SBI */ - struct { - unsigned long extension_id; - unsigned long function_id; - unsigned long args[6]; - unsigned long ret[2]; - } riscv_sbi; /* Fix the size of the union. */ char padding[256]; }; @@ -1130,11 +1112,6 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_BINARY_STATS_FD 203 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 -#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 -#define KVM_CAP_VM_GPA_BITS 207 -#define KVM_CAP_XSAVE2 208 -#define KVM_CAP_SYS_ATTRIBUTES 209 -#define KVM_CAP_PPC_AIL_MODE_3 210 #ifdef KVM_CAP_IRQ_ROUTING @@ -1166,20 +1143,11 @@ struct kvm_irq_routing_hv_sint { __u32 sint; }; -struct kvm_irq_routing_xen_evtchn { - __u32 port; - __u32 vcpu; - __u32 priority; -}; - -#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1)) - /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_HV_SINT 4 -#define KVM_IRQ_ROUTING_XEN_EVTCHN 5 struct kvm_irq_routing_entry { __u32 gsi; @@ -1191,7 +1159,6 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; struct kvm_irq_routing_hv_sint hv_sint; - struct kvm_irq_routing_xen_evtchn xen_evtchn; __u32 pad[8]; } u; }; @@ -1222,7 +1189,6 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) -#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) struct kvm_xen_hvm_config { __u32 flags; @@ -1257,16 +1223,11 @@ struct kvm_irqfd { /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ #define KVM_CLOCK_TSC_STABLE 2 -#define KVM_CLOCK_REALTIME (1 << 2) -#define KVM_CLOCK_HOST_TSC (1 << 3) struct kvm_clock_data { __u64 clock; __u32 flags; - __u32 pad0; - __u64 realtime; - __u64 host_tsc; - __u32 pad[4]; + __u32 pad[9]; }; /* For KVM_CAP_SW_TLB */ @@ -2046,7 +2007,4 @@ struct kvm_stats_desc { #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) -/* Available with KVM_CAP_XSAVE2 */ -#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) - #endif /* __LINUX_KVM_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 82858b697c..f92880a156 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -465,8 +465,6 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via * siginfo_t::si_perf_data, e.g. to permit user to identify the event. - * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be - * truncated accordingly on 32 bit architectures. */ __u64 sig_data; }; @@ -1143,21 +1141,6 @@ enum perf_event_type { */ PERF_RECORD_TEXT_POKE = 20, - /* - * Data written to the AUX area by hardware due to aux_output, may need - * to be matched to the event by an architecture-specific hardware ID. - * This records the hardware ID, but requires sample_id to provide the - * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT - * records from multiple events. - * - * struct { - * struct perf_event_header header; - * u64 hw_id; - * struct sample_id sample_id; - * }; - */ - PERF_RECORD_AUX_OUTPUT_HW_ID = 21, - PERF_RECORD_MAX, /* non-ABI */ }; @@ -1227,16 +1210,14 @@ union perf_mem_data_src { mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ mem_blk:3, /* access blocked */ - mem_hops:3, /* hop level */ - mem_rsvd:18; + mem_rsvd:21; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:18, - mem_hops:3, /* hop level */ + __u64 mem_rsvd:21, mem_blk:3, /* access blocked */ mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ @@ -1260,13 +1241,7 @@ union perf_mem_data_src { #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ #define PERF_MEM_OP_SHIFT 0 -/* - * PERF_MEM_LVL_* namespace being depricated to some extent in the - * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. - * Supporting this namespace inorder to not break defined ABIs. - * - * memory hierarchy (memory level, hit or miss) - */ +/* memory hierarchy (memory level, hit or miss) */ #define PERF_MEM_LVL_NA 0x01 /* not available */ #define PERF_MEM_LVL_HIT 0x02 /* hit level */ #define PERF_MEM_LVL_MISS 0x04 /* miss level */ @@ -1332,14 +1307,6 @@ union perf_mem_data_src { #define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ #define PERF_MEM_BLK_SHIFT 40 -/* hop level */ -#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ -/* 5-7 available */ -#define PERF_MEM_HOPS_SHIFT 43 - #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index e998764f02..43bd7f713c 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -235,7 +235,7 @@ struct prctl_mm_map { #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) /* MTE tag check fault modes */ -# define PR_MTE_TCF_NONE 0UL +# define PR_MTE_TCF_NONE 0 # define PR_MTE_TCF_SYNC (1UL << 1) # define PR_MTE_TCF_ASYNC (1UL << 2) # define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) @@ -268,11 +268,5 @@ struct prctl_mm_map { # define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ # define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ # define PR_SCHED_CORE_MAX 4 -# define PR_SCHED_CORE_SCOPE_THREAD 0 -# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 -# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 - -#define PR_SET_VMA 0x53564d41 -# define PR_SET_VMA_ANON_NAME 0 #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 2d3e5df39a..93e40f91bd 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -204,11 +204,6 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */ -/* - * For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the - * available bit count in most significant bit. It's for the case of so-called 'left-justified' or - * `right-padding` sample which has less width than 32 bit. - */ #define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10) #define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11) #define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12) @@ -307,7 +302,7 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ #define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */ -#define SNDRV_PCM_INFO_NO_REWINDS 0x20000000 /* hardware can only support monotonic changes of appl_ptr */ + #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ @@ -1009,7 +1004,7 @@ typedef int __bitwise snd_ctl_elem_iface_t; #define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1) #define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE) #define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */ -/* (1 << 3) is unused. */ +// (1 << 3) is unused. #define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */ #define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */ #define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index 0da84cb9e6..5d4cfac671 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +libbpf_version.h libbpf.pc libbpf.so.* TAGS diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index f947b61b21..74c3b73a5f 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -8,8 +8,7 @@ VERSION_SCRIPT := libbpf.map LIBBPF_VERSION := $(shell \ grep -oE '^LIBBPF_([0-9.]+)' $(VERSION_SCRIPT) | \ sort -rV | head -n1 | cut -d'_' -f2) -LIBBPF_MAJOR_VERSION := $(word 1,$(subst ., ,$(LIBBPF_VERSION))) -LIBBPF_MINOR_VERSION := $(word 2,$(subst ., ,$(LIBBPF_VERSION))) +LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION))) MAKEFLAGS += --no-print-directory @@ -60,8 +59,7 @@ ifndef VERBOSE VERBOSE = 0 endif -INCLUDES = -I$(if $(OUTPUT),$(OUTPUT),.) \ - -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi +INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi export prefix libdir src obj @@ -84,13 +82,11 @@ else endif # Append required CFLAGS -override CFLAGS += -std=gnu89 override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum override CFLAGS += -Werror -Wall override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -override CFLAGS += $(CLANG_CROSS_FLAGS) # flags specific for shared library SHLIB_FLAGS := -DSHARED -fPIC @@ -116,7 +112,6 @@ STATIC_OBJDIR := $(OUTPUT)staticobjs/ BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o BPF_HELPER_DEFS := $(OUTPUT)bpf_helper_defs.h -BPF_GENERATED := $(BPF_HELPER_DEFS) LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) @@ -141,19 +136,25 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force $(BPF_GENERATED) +$(BPF_IN_SHARED): force $(BPF_HELPER_DEFS) @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \ (diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true + @(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \ + (diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true + @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \ + (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \ (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force $(BPF_GENERATED) +$(BPF_IN_STATIC): force $(BPF_HELPER_DEFS) $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h @@ -163,7 +164,7 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \ + $(QUIET_LINK)$(CC) $(LDFLAGS) \ --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ -Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so @@ -178,7 +179,7 @@ $(OUTPUT)libbpf.pc: -e "s|@VERSION@|$(LIBBPF_VERSION)|" \ < libbpf.pc.template > $@ -check: check_abi check_version +check: check_abi check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT) @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \ @@ -204,21 +205,6 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT) exit 1; \ fi -HDR_MAJ_VERSION := $(shell grep -oE '^$(pound)define LIBBPF_MAJOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) -HDR_MIN_VERSION := $(shell grep -oE '^$(pound)define LIBBPF_MINOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) - -check_version: $(VERSION_SCRIPT) libbpf_version.h - @if [ "$(HDR_MAJ_VERSION)" != "$(LIBBPF_MAJOR_VERSION)" ]; then \ - echo "Error: libbpf major version mismatch detected: " \ - "'$(HDR_MAJ_VERSION)' != '$(LIBBPF_MAJOR_VERSION)'" >&2; \ - exit 1; \ - fi - @if [ "$(HDR_MIN_VERSION)" != "$(LIBBPF_MINOR_VERSION)" ]; then \ - echo "Error: libbpf minor version mismatch detected: " \ - "'$(HDR_MIN_VERSION)' != '$(LIBBPF_MINOR_VERSION)'" >&2; \ - exit 1; \ - fi - define do_install_mkdir if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ @@ -237,24 +223,14 @@ install_lib: all_cmd $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) -SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ - bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \ - skel_internal.h libbpf_version.h -GEN_HDRS := $(BPF_GENERATED) +INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ + bpf_helpers.h $(BPF_HELPER_DEFS) bpf_tracing.h \ + bpf_endian.h bpf_core_read.h skel_internal.h -INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf -INSTALL_SRC_HDRS := $(addprefix $(INSTALL_PFX)/, $(SRC_HDRS)) -INSTALL_GEN_HDRS := $(addprefix $(INSTALL_PFX)/, $(notdir $(GEN_HDRS))) - -$(INSTALL_SRC_HDRS): $(INSTALL_PFX)/%.h: %.h - $(call QUIET_INSTALL, $@) \ - $(call do_install,$<,$(prefix)/include/bpf,644) - -$(INSTALL_GEN_HDRS): $(INSTALL_PFX)/%.h: $(OUTPUT)%.h - $(call QUIET_INSTALL, $@) \ - $(call do_install,$<,$(prefix)/include/bpf,644) - -install_headers: $(BPF_GENERATED) $(INSTALL_SRC_HDRS) $(INSTALL_GEN_HDRS) +install_headers: $(BPF_HELPER_DEFS) + $(call QUIET_INSTALL, headers) \ + $(foreach hdr,$(INSTALL_HEADERS), \ + $(call do_install,$(hdr),$(prefix)/include/bpf,644);) install_pkgconfig: $(PC_FILE) $(call QUIET_INSTALL, $(PC_FILE)) \ @@ -264,12 +240,12 @@ install: install_lib install_pkgconfig install_headers clean: $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ - *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_GENERATED) \ + *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ $(addprefix $(OUTPUT), \ *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc) -PHONY += force cscope tags check check_abi check_version +PHONY += force cscope tags force: cscope: diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 550b4cbb6c..bfd1ce9fe2 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -28,9 +28,6 @@ #include #include #include -#include -#include -#include #include "bpf.h" #include "libbpf.h" #include "libbpf_internal.h" @@ -52,12 +49,6 @@ # define __NR_bpf 351 # elif defined(__arc__) # define __NR_bpf 280 -# elif defined(__mips__) && defined(_ABIO32) -# define __NR_bpf 4355 -# elif defined(__mips__) && defined(_ABIN32) -# define __NR_bpf 6319 -# elif defined(__mips__) && defined(_ABI64) -# define __NR_bpf 5315 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif @@ -74,217 +65,133 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } -static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr, - unsigned int size) -{ - int fd; - - fd = sys_bpf(cmd, attr, size); - return ensure_good_fd(fd); -} - -#define PROG_LOAD_ATTEMPTS 5 - -static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) +static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) { + int retries = 5; int fd; do { - fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size); - } while (fd < 0 && errno == EAGAIN && --attempts > 0); + fd = sys_bpf(BPF_PROG_LOAD, attr, size); + } while (fd < 0 && errno == EAGAIN && retries-- > 0); return fd; } -/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to - * memcg-based memory accounting for BPF maps and progs. This was done in [0]. - * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in - * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF. - * - * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ - * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") - */ -int probe_memcg_account(void) +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { - const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); - struct bpf_insn insns[] = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), - BPF_EXIT_INSN(), - }; - size_t insn_cnt = sizeof(insns) / sizeof(insns[0]); - union bpf_attr attr; - int prog_fd; - - /* attempt loading freplace trying to use custom BTF */ - memset(&attr, 0, prog_load_attr_sz); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.insns = ptr_to_u64(insns); - attr.insn_cnt = insn_cnt; - attr.license = ptr_to_u64("GPL"); - - prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz); - if (prog_fd >= 0) { - close(prog_fd); - return 1; - } - return 0; -} - -static bool memlock_bumped; -static rlim_t memlock_rlim = RLIM_INFINITY; - -int libbpf_set_memlock_rlim(size_t memlock_bytes) -{ - if (memlock_bumped) - return libbpf_err(-EBUSY); - - memlock_rlim = memlock_bytes; - return 0; -} - -int bump_rlimit_memlock(void) -{ - struct rlimit rlim; - - /* this the default in libbpf 1.0, but for now user has to opt-in explicitly */ - if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK)) - return 0; - - /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ - if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) - return 0; - - memlock_bumped = true; - - /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */ - if (memlock_rlim == 0) - return 0; - - rlim.rlim_cur = rlim.rlim_max = memlock_rlim; - if (setrlimit(RLIMIT_MEMLOCK, &rlim)) - return -errno; - - return 0; -} - -int bpf_map_create(enum bpf_map_type map_type, - const char *map_name, - __u32 key_size, - __u32 value_size, - __u32 max_entries, - const struct bpf_map_create_opts *opts) -{ - const size_t attr_sz = offsetofend(union bpf_attr, map_extra); union bpf_attr attr; int fd; - bump_rlimit_memlock(); + memset(&attr, '\0', sizeof(attr)); - memset(&attr, 0, attr_sz); - - if (!OPTS_VALID(opts, bpf_map_create_opts)) - return libbpf_err(-EINVAL); - - attr.map_type = map_type; - if (map_name) - libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); - attr.key_size = key_size; - attr.value_size = value_size; - attr.max_entries = max_entries; - - attr.btf_fd = OPTS_GET(opts, btf_fd, 0); - attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); - attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); - attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); - - attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); - attr.map_flags = OPTS_GET(opts, map_flags, 0); - attr.map_extra = OPTS_GET(opts, map_extra, 0); - attr.numa_node = OPTS_GET(opts, numa_node, 0); - attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); - - fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); - return libbpf_err_errno(fd); -} - -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) -{ - LIBBPF_OPTS(bpf_map_create_opts, p); - - p.map_flags = create_attr->map_flags; - p.numa_node = create_attr->numa_node; - p.btf_fd = create_attr->btf_fd; - p.btf_key_type_id = create_attr->btf_key_type_id; - p.btf_value_type_id = create_attr->btf_value_type_id; - p.map_ifindex = create_attr->map_ifindex; - if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS) - p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id; + attr.map_type = create_attr->map_type; + attr.key_size = create_attr->key_size; + attr.value_size = create_attr->value_size; + attr.max_entries = create_attr->max_entries; + attr.map_flags = create_attr->map_flags; + if (create_attr->name) + memcpy(attr.map_name, create_attr->name, + min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1)); + attr.numa_node = create_attr->numa_node; + attr.btf_fd = create_attr->btf_fd; + attr.btf_key_type_id = create_attr->btf_key_type_id; + attr.btf_value_type_id = create_attr->btf_value_type_id; + attr.map_ifindex = create_attr->map_ifindex; + if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS) + attr.btf_vmlinux_value_type_id = + create_attr->btf_vmlinux_value_type_id; else - p.inner_map_fd = create_attr->inner_map_fd; + attr.inner_map_fd = create_attr->inner_map_fd; - return bpf_map_create(create_attr->map_type, create_attr->name, - create_attr->key_size, create_attr->value_size, - create_attr->max_entries, &p); + fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node) { - LIBBPF_OPTS(bpf_map_create_opts, opts); + struct bpf_create_map_attr map_attr = {}; - opts.map_flags = map_flags; + map_attr.name = name; + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; if (node >= 0) { - opts.numa_node = node; - opts.map_flags |= BPF_F_NUMA_NODE; + map_attr.numa_node = node; + map_attr.map_flags |= BPF_F_NUMA_NODE; } - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); + return bpf_create_map_xattr(&map_attr); } int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); + struct bpf_create_map_attr map_attr = {}; - return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + + return bpf_create_map_xattr(&map_attr); } int bpf_create_map_name(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); + struct bpf_create_map_attr map_attr = {}; - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); + map_attr.name = name; + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + + return bpf_create_map_xattr(&map_attr); } int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node) { - LIBBPF_OPTS(bpf_map_create_opts, opts); + union bpf_attr attr; + int fd; + + memset(&attr, '\0', sizeof(attr)); + + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = 4; + attr.inner_map_fd = inner_map_fd; + attr.max_entries = max_entries; + attr.map_flags = map_flags; + if (name) + memcpy(attr.map_name, name, + min(strlen(name), BPF_OBJ_NAME_LEN - 1)); - opts.inner_map_fd = inner_map_fd; - opts.map_flags = map_flags; if (node >= 0) { - opts.map_flags |= BPF_F_NUMA_NODE; - opts.numa_node = node; + attr.map_flags |= BPF_F_NUMA_NODE; + attr.numa_node = node; } - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); + fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags) { - LIBBPF_OPTS(bpf_map_create_opts, opts, - .inner_map_fd = inner_map_fd, - .map_flags = map_flags, - ); - - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); + return bpf_create_map_in_map_node(map_type, name, key_size, + inner_map_fd, max_entries, map_flags, + -1); } static void * @@ -312,95 +219,57 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt, return info; } -DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0) -int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, - const char *prog_name, const char *license, - const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts) +int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) { void *finfo = NULL, *linfo = NULL; - const char *func_info, *line_info; - __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; - __u32 func_info_rec_size, line_info_rec_size; - int fd, attempts; union bpf_attr attr; - char *log_buf; + int fd; - bump_rlimit_memlock(); - - if (!OPTS_VALID(opts, bpf_prog_load_opts)) + if (!load_attr->log_buf != !load_attr->log_buf_sz) return libbpf_err(-EINVAL); - attempts = OPTS_GET(opts, attempts, 0); - if (attempts < 0) + if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf)) return libbpf_err(-EINVAL); - if (attempts == 0) - attempts = PROG_LOAD_ATTEMPTS; memset(&attr, 0, sizeof(attr)); + attr.prog_type = load_attr->prog_type; + attr.expected_attach_type = load_attr->expected_attach_type; - attr.prog_type = prog_type; - attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); - - attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0); - attr.prog_flags = OPTS_GET(opts, prog_flags, 0); - attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); - attr.kern_version = OPTS_GET(opts, kern_version, 0); - - if (prog_name) - libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); - attr.license = ptr_to_u64(license); - - if (insn_cnt > UINT_MAX) - return libbpf_err(-E2BIG); - - attr.insns = ptr_to_u64(insns); - attr.insn_cnt = (__u32)insn_cnt; - - attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); - attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0); - - if (attach_prog_fd && attach_btf_obj_fd) - return libbpf_err(-EINVAL); - - attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0); - if (attach_prog_fd) - attr.attach_prog_fd = attach_prog_fd; + if (load_attr->attach_prog_fd) + attr.attach_prog_fd = load_attr->attach_prog_fd; else - attr.attach_btf_obj_fd = attach_btf_obj_fd; + attr.attach_btf_obj_fd = load_attr->attach_btf_obj_fd; + attr.attach_btf_id = load_attr->attach_btf_id; - log_buf = OPTS_GET(opts, log_buf, NULL); - log_size = OPTS_GET(opts, log_size, 0); - log_level = OPTS_GET(opts, log_level, 0); + attr.prog_ifindex = load_attr->prog_ifindex; + attr.kern_version = load_attr->kern_version; - if (!!log_buf != !!log_size) - return libbpf_err(-EINVAL); - if (log_level > (4 | 2 | 1)) - return libbpf_err(-EINVAL); - if (log_level && !log_buf) - return libbpf_err(-EINVAL); + attr.insn_cnt = (__u32)load_attr->insn_cnt; + attr.insns = ptr_to_u64(load_attr->insns); + attr.license = ptr_to_u64(load_attr->license); - func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); - func_info = OPTS_GET(opts, func_info, NULL); - attr.func_info_rec_size = func_info_rec_size; - attr.func_info = ptr_to_u64(func_info); - attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0); - - line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0); - line_info = OPTS_GET(opts, line_info, NULL); - attr.line_info_rec_size = line_info_rec_size; - attr.line_info = ptr_to_u64(line_info); - attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); - - attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); - - if (log_level) { - attr.log_buf = ptr_to_u64(log_buf); - attr.log_size = log_size; - attr.log_level = log_level; + attr.log_level = load_attr->log_level; + if (attr.log_level) { + attr.log_buf = ptr_to_u64(load_attr->log_buf); + attr.log_size = load_attr->log_buf_sz; } - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + attr.prog_btf_fd = load_attr->prog_btf_fd; + attr.prog_flags = load_attr->prog_flags; + + attr.func_info_rec_size = load_attr->func_info_rec_size; + attr.func_info_cnt = load_attr->func_info_cnt; + attr.func_info = ptr_to_u64(load_attr->func_info); + + attr.line_info_rec_size = load_attr->line_info_rec_size; + attr.line_info_cnt = load_attr->line_info_cnt; + attr.line_info = ptr_to_u64(load_attr->line_info); + + if (load_attr->name) + memcpy(attr.prog_name, load_attr->name, + min(strlen(load_attr->name), (size_t)BPF_OBJ_NAME_LEN - 1)); + + fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) return fd; @@ -410,11 +279,11 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, */ while (errno == E2BIG && (!finfo || !linfo)) { if (!finfo && attr.func_info_cnt && - attr.func_info_rec_size < func_info_rec_size) { + attr.func_info_rec_size < load_attr->func_info_rec_size) { /* try with corrected func info records */ - finfo = alloc_zero_tailing_info(func_info, - attr.func_info_cnt, - func_info_rec_size, + finfo = alloc_zero_tailing_info(load_attr->func_info, + load_attr->func_info_cnt, + load_attr->func_info_rec_size, attr.func_info_rec_size); if (!finfo) { errno = E2BIG; @@ -422,12 +291,13 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, } attr.func_info = ptr_to_u64(finfo); - attr.func_info_rec_size = func_info_rec_size; + attr.func_info_rec_size = load_attr->func_info_rec_size; } else if (!linfo && attr.line_info_cnt && - attr.line_info_rec_size < line_info_rec_size) { - linfo = alloc_zero_tailing_info(line_info, - attr.line_info_cnt, - line_info_rec_size, + attr.line_info_rec_size < + load_attr->line_info_rec_size) { + linfo = alloc_zero_tailing_info(load_attr->line_info, + load_attr->line_info_cnt, + load_attr->line_info_rec_size, attr.line_info_rec_size); if (!linfo) { errno = E2BIG; @@ -435,27 +305,26 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, } attr.line_info = ptr_to_u64(linfo); - attr.line_info_rec_size = line_info_rec_size; + attr.line_info_rec_size = load_attr->line_info_rec_size; } else { break; } - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) goto done; } - if (log_level == 0 && log_buf) { - /* log_level == 0 with non-NULL log_buf requires retrying on error - * with log_level == 1 and log_buf/log_buf_size set, to get details of - * failure - */ - attr.log_buf = ptr_to_u64(log_buf); - attr.log_size = log_size; - attr.log_level = 1; + if (load_attr->log_level || !load_attr->log_buf) + goto done; - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); - } + /* Try again with log */ + attr.log_buf = ptr_to_u64(load_attr->log_buf); + attr.log_size = load_attr->log_buf_sz; + attr.log_level = 1; + load_attr->log_buf[0] = 0; + + fd = sys_bpf_prog_load(&attr, sizeof(attr)); done: /* free() doesn't affect errno, so we don't need to restore it */ free(finfo); @@ -463,20 +332,17 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, return libbpf_err_errno(fd); } -__attribute__((alias("bpf_load_program_xattr2"))) int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz); - -static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz) + char *log_buf, size_t log_buf_sz) { - LIBBPF_OPTS(bpf_prog_load_opts, p); + struct bpf_prog_load_params p = {}; if (!load_attr || !log_buf != !log_buf_sz) return libbpf_err(-EINVAL); + p.prog_type = load_attr->prog_type; p.expected_attach_type = load_attr->expected_attach_type; - switch (load_attr->prog_type) { + switch (p.prog_type) { case BPF_PROG_TYPE_STRUCT_OPS: case BPF_PROG_TYPE_LSM: p.attach_btf_id = load_attr->attach_btf_id; @@ -490,9 +356,12 @@ static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr p.prog_ifindex = load_attr->prog_ifindex; p.kern_version = load_attr->kern_version; } + p.insn_cnt = load_attr->insns_cnt; + p.insns = load_attr->insns; + p.license = load_attr->license; p.log_level = load_attr->log_level; p.log_buf = log_buf; - p.log_size = log_buf_sz; + p.log_buf_sz = log_buf_sz; p.prog_btf_fd = load_attr->prog_btf_fd; p.func_info_rec_size = load_attr->func_info_rec_size; p.func_info_cnt = load_attr->func_info_cnt; @@ -500,10 +369,10 @@ static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr p.line_info_rec_size = load_attr->line_info_rec_size; p.line_info_cnt = load_attr->line_info_cnt; p.line_info = load_attr->line_info; + p.name = load_attr->name; p.prog_flags = load_attr->prog_flags; - return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license, - load_attr->insns, load_attr->insns_cnt, &p); + return libbpf__bpf_prog_load(&p); } int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, @@ -522,7 +391,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, load_attr.license = license; load_attr.kern_version = kern_version; - return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz); + return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); } int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, @@ -533,8 +402,6 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, union bpf_attr attr; int fd; - bump_rlimit_memlock(); - memset(&attr, 0, sizeof(attr)); attr.prog_type = type; attr.insn_cnt = (__u32)insns_cnt; @@ -547,7 +414,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.kern_version = kern_version; attr.prog_flags = prog_flags; - fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -691,11 +558,11 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, return libbpf_err_errno(ret); } -int bpf_map_delete_batch(int fd, const void *keys, __u32 *count, +int bpf_map_delete_batch(int fd, void *keys, __u32 *count, const struct bpf_map_batch_opts *opts) { return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, - NULL, (void *)keys, NULL, count, opts); + NULL, keys, NULL, count, opts); } int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, @@ -715,11 +582,11 @@ int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, count, opts); } -int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count, +int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, const struct bpf_map_batch_opts *opts) { return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, - (void *)keys, (void *)values, count, opts); + keys, values, count, opts); } int bpf_obj_pin(int fd, const char *pathname) @@ -743,7 +610,7 @@ int bpf_obj_get(const char *pathname) memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); - fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); + fd = sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -854,7 +721,7 @@ int bpf_link_create(int prog_fd, int target_fd, break; } proceed: - fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); + fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -897,7 +764,7 @@ int bpf_iter_create(int link_fd) memset(&attr, 0, sizeof(attr)); attr.iter_create.link_fd = link_fd; - fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr)); + fd = sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -1055,7 +922,7 @@ int bpf_prog_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.prog_id = id; - fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -1067,7 +934,7 @@ int bpf_map_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.map_id = id; - fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -1079,7 +946,7 @@ int bpf_btf_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.btf_id = id; - fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -1091,7 +958,7 @@ int bpf_link_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.link_id = id; - fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -1122,71 +989,28 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; - fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); + fd = sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); return libbpf_err_errno(fd); } -int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts) +int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, + bool do_log) { - const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level); - union bpf_attr attr; - char *log_buf; - size_t log_size; - __u32 log_level; + union bpf_attr attr = {}; int fd; - bump_rlimit_memlock(); - - memset(&attr, 0, attr_sz); - - if (!OPTS_VALID(opts, bpf_btf_load_opts)) - return libbpf_err(-EINVAL); - - log_buf = OPTS_GET(opts, log_buf, NULL); - log_size = OPTS_GET(opts, log_size, 0); - log_level = OPTS_GET(opts, log_level, 0); - - if (log_size > UINT_MAX) - return libbpf_err(-EINVAL); - if (log_size && !log_buf) - return libbpf_err(-EINVAL); - - attr.btf = ptr_to_u64(btf_data); + attr.btf = ptr_to_u64(btf); attr.btf_size = btf_size; - /* log_level == 0 and log_buf != NULL means "try loading without - * log_buf, but retry with log_buf and log_level=1 on error", which is - * consistent across low-level and high-level BTF and program loading - * APIs within libbpf and provides a sensible behavior in practice - */ - if (log_level) { - attr.btf_log_buf = ptr_to_u64(log_buf); - attr.btf_log_size = (__u32)log_size; - attr.btf_log_level = log_level; - } - - fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); - if (fd < 0 && log_buf && log_level == 0) { - attr.btf_log_buf = ptr_to_u64(log_buf); - attr.btf_log_size = (__u32)log_size; - attr.btf_log_level = 1; - fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); - } - return libbpf_err_errno(fd); -} - -int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) -{ - LIBBPF_OPTS(bpf_btf_load_opts, opts); - int fd; retry: if (do_log && log_buf && log_buf_size) { - opts.log_buf = log_buf; - opts.log_size = log_buf_size; - opts.log_level = 1; + attr.btf_log_level = 1; + attr.btf_log_size = log_buf_size; + attr.btf_log_buf = ptr_to_u64(log_buf); } - fd = bpf_btf_load(btf, btf_size, &opts); + fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); + if (fd < 0 && !do_log && log_buf && log_buf_size) { do_log = true; goto retry; @@ -1227,7 +1051,7 @@ int bpf_enable_stats(enum bpf_stats_type type) memset(&attr, 0, sizeof(attr)); attr.enable_stats.type = type; - fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr)); + fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr)); return libbpf_err_errno(fd); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 14e0d97ad2..6fffb3cdf3 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -29,38 +29,11 @@ #include #include "libbpf_common.h" -#include "libbpf_legacy.h" #ifdef __cplusplus extern "C" { #endif -int libbpf_set_memlock_rlim(size_t memlock_bytes); - -struct bpf_map_create_opts { - size_t sz; /* size of this struct for forward/backward compatibility */ - - __u32 btf_fd; - __u32 btf_key_type_id; - __u32 btf_value_type_id; - __u32 btf_vmlinux_value_type_id; - - __u32 inner_map_fd; - __u32 map_flags; - __u64 map_extra; - - __u32 numa_node; - __u32 map_ifindex; -}; -#define bpf_map_create_opts__last_field map_ifindex - -LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, - const char *map_name, - __u32 key_size, - __u32 value_size, - __u32 max_entries, - const struct bpf_map_create_opts *opts); - struct bpf_create_map_attr { const char *name; enum bpf_map_type map_type; @@ -79,95 +52,25 @@ struct bpf_create_map_attr { }; }; -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") +LIBBPF_API int +bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags); -struct bpf_prog_load_opts { - size_t sz; /* size of this struct for forward/backward compatibility */ - - /* libbpf can retry BPF_PROG_LOAD command if bpf() syscall returns - * -EAGAIN. This field determines how many attempts libbpf has to - * make. If not specified, libbpf will use default value of 5. - */ - int attempts; - - enum bpf_attach_type expected_attach_type; - __u32 prog_btf_fd; - __u32 prog_flags; - __u32 prog_ifindex; - __u32 kern_version; - - __u32 attach_btf_id; - __u32 attach_prog_fd; - __u32 attach_btf_obj_fd; - - const int *fd_array; - - /* .BTF.ext func info data */ - const void *func_info; - __u32 func_info_cnt; - __u32 func_info_rec_size; - - /* .BTF.ext line info data */ - const void *line_info; - __u32 line_info_cnt; - __u32 line_info_rec_size; - - /* verifier log options */ - __u32 log_level; - __u32 log_size; - char *log_buf; -}; -#define bpf_prog_load_opts__last_field log_buf - -LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, - const char *prog_name, const char *license, - const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts); -/* this "specialization" should go away in libbpf 1.0 */ -LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, - const char *prog_name, const char *license, - const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts); - -/* This is an elaborate way to not conflict with deprecated bpf_prog_load() - * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone. - * With this approach, if someone is calling bpf_prog_load() with - * 4 arguments, they will use the deprecated API, which keeps backwards - * compatibility (both source code and binary). If bpf_prog_load() is called - * with 6 arguments, though, it gets redirected to __bpf_prog_load. - * So looking forward to libbpf 1.0 when this hack will be gone and - * __bpf_prog_load() will be called just bpf_prog_load(). - */ -#ifndef bpf_prog_load -#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__) -#define ___bpf_prog_load4(file, type, pobj, prog_fd) \ - bpf_prog_load_deprecated(file, type, pobj, prog_fd) -#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \ - bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts) -#endif /* bpf_prog_load */ - struct bpf_load_program_attr { enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; @@ -197,18 +100,15 @@ struct bpf_load_program_attr { /* Flags to direct loading requirements */ #define MAPS_RELAX_COMPAT 0x01 -/* Recommended log buffer size */ +/* Recommend log buffer size */ #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ - -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") -LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") +LIBBPF_API int +bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz); LIBBPF_API int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, __u32 prog_flags, @@ -216,23 +116,6 @@ LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, char *log_buf, size_t log_buf_sz, int log_level); -struct bpf_btf_load_opts { - size_t sz; /* size of this struct for forward/backward compatibility */ - - /* kernel log options */ - char *log_buf; - __u32 log_level; - __u32 log_size; -}; -#define bpf_btf_load_opts__last_field log_size - -LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, - const struct bpf_btf_load_opts *opts); - -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead") -LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, - __u32 log_buf_size, bool do_log); - LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); @@ -254,128 +137,17 @@ struct bpf_map_batch_opts { }; #define bpf_map_batch_opts__last_field flags - -/** - * @brief **bpf_map_delete_batch()** allows for batch deletion of multiple - * elements in a BPF map. - * - * @param fd BPF map file descriptor - * @param keys pointer to an array of *count* keys - * @param count input and output parameter; on input **count** represents the - * number of elements in the map to delete in batch; - * on output if a non-EFAULT error is returned, **count** represents the number of deleted - * elements if the output **count** value is not equal to the input **count** value - * If EFAULT is returned, **count** should not be trusted to be correct. - * @param opts options for configuring the way the batch deletion works - * @return 0, on success; negative error code, otherwise (errno is also set to - * the error code) - */ -LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys, +LIBBPF_API int bpf_map_delete_batch(int fd, void *keys, __u32 *count, const struct bpf_map_batch_opts *opts); - -/** - * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements. - * - * The parameter *in_batch* is the address of the first element in the batch to read. - * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent - * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate - * that the batched lookup starts from the beginning of the map. - * - * The *keys* and *values* are output parameters which must point to memory large enough to - * hold *count* items based on the key and value size of the map *map_fd*. The *keys* - * buffer must be of *key_size* * *count*. The *values* buffer must be of - * *value_size* * *count*. - * - * @param fd BPF map file descriptor - * @param in_batch address of the first element in batch to read, can pass NULL to - * indicate that the batched lookup starts from the beginning of the map. - * @param out_batch output parameter that should be passed to next call as *in_batch* - * @param keys pointer to an array large enough for *count* keys - * @param values pointer to an array large enough for *count* values - * @param count input and output parameter; on input it's the number of elements - * in the map to read in batch; on output it's the number of elements that were - * successfully read. - * If a non-EFAULT error is returned, count will be set as the number of elements - * that were read before the error occurred. - * If EFAULT is returned, **count** should not be trusted to be correct. - * @param opts options for configuring the way the batch lookup works - * @return 0, on success; negative error code, otherwise (errno is also set to - * the error code) - */ LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, void *values, __u32 *count, const struct bpf_map_batch_opts *opts); - -/** - * @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion - * of BPF map elements where each element is deleted after being retrieved. - * - * @param fd BPF map file descriptor - * @param in_batch address of the first element in batch to read, can pass NULL to - * get address of the first element in *out_batch* - * @param out_batch output parameter that should be passed to next call as *in_batch* - * @param keys pointer to an array of *count* keys - * @param values pointer to an array large enough for *count* values - * @param count input and output parameter; on input it's the number of elements - * in the map to read and delete in batch; on output it represents the number of - * elements that were successfully read and deleted - * If a non-**EFAULT** error code is returned and if the output **count** value - * is not equal to the input **count** value, up to **count** elements may - * have been deleted. - * if **EFAULT** is returned up to *count* elements may have been deleted without - * being returned via the *keys* and *values* output parameters. - * @param opts options for configuring the way the batch lookup and delete works - * @return 0, on success; negative error code, otherwise (errno is also set to - * the error code) - */ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, void *keys, void *values, __u32 *count, const struct bpf_map_batch_opts *opts); - -/** - * @brief **bpf_map_update_batch()** updates multiple elements in a map - * by specifying keys and their corresponding values. - * - * The *keys* and *values* parameters must point to memory large enough - * to hold *count* items based on the key and value size of the map. - * - * The *opts* parameter can be used to control how *bpf_map_update_batch()* - * should handle keys that either do or do not already exist in the map. - * In particular the *flags* parameter of *bpf_map_batch_opts* can be - * one of the following: - * - * Note that *count* is an input and output parameter, where on output it - * represents how many elements were successfully updated. Also note that if - * **EFAULT** then *count* should not be trusted to be correct. - * - * **BPF_ANY** - * Create new elements or update existing. - * - * **BPF_NOEXIST** - * Create new elements only if they do not exist. - * - * **BPF_EXIST** - * Update existing elements. - * - * **BPF_F_LOCK** - * Update spin_lock-ed map elements. This must be - * specified if the map value contains a spinlock. - * - * @param fd BPF map file descriptor - * @param keys pointer to an array of *count* keys - * @param values pointer to an array of *count* values - * @param count input and output parameter; on input it's the number of elements - * in the map to update in batch; on output if a non-EFAULT error is returned, - * **count** represents the number of updated elements if the output **count** - * value is not equal to the input **count** value. - * If EFAULT is returned, **count** should not be trusted to be correct. - * @param opts options for configuring the way the batch update works - * @return 0, on success; negative error code, otherwise (errno is also set to - * the error code) - */ -LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values, +LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, const struct bpf_map_batch_opts *opts); @@ -471,6 +243,8 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); +LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, + __u32 log_buf_size, bool do_log); LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index 223308931d..615400391e 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -3,27 +3,10 @@ #ifndef __BPF_GEN_INTERNAL_H #define __BPF_GEN_INTERNAL_H -#include "bpf.h" - struct ksym_relo_desc { const char *name; int kind; int insn_idx; - bool is_weak; - bool is_typeless; -}; - -struct ksym_desc { - const char *name; - int ref; - int kind; - union { - /* used for kfunc */ - int off; - /* used for typeless ksym */ - bool typeless; - }; - int insn; }; struct bpf_gen { @@ -39,34 +22,20 @@ struct bpf_gen { int error; struct ksym_relo_desc *relos; int relo_cnt; - struct bpf_core_relo *core_relos; - int core_relo_cnt; char attach_target[128]; int attach_kind; - struct ksym_desc *ksyms; - __u32 nr_ksyms; - int fd_array; - int nr_fd_array; }; -void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps); -int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps); +void bpf_gen__init(struct bpf_gen *gen, int log_level); +int bpf_gen__finish(struct bpf_gen *gen); void bpf_gen__free(struct bpf_gen *gen); void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); -void bpf_gen__map_create(struct bpf_gen *gen, - enum bpf_map_type map_type, const char *map_name, - __u32 key_size, __u32 value_size, __u32 max_entries, - struct bpf_map_create_opts *map_attr, int map_idx); -void bpf_gen__prog_load(struct bpf_gen *gen, - enum bpf_prog_type prog_type, const char *prog_name, - const char *license, struct bpf_insn *insns, size_t insn_cnt, - struct bpf_prog_load_opts *load_attr, int prog_idx); +void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_attr *map_attr, int map_idx); +struct bpf_prog_load_params; +void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx); void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, - bool is_typeless, int kind, int insn_idx); -void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo); -void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx); +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx); #endif diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 963b1060d9..b9987c3efa 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -14,6 +14,14 @@ #define __type(name, val) typeof(val) *name #define __array(name, val) typeof(val) *name[] +/* Helper macro to print out debug messages */ +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + /* * Helper macro to place programs, maps, license in * different sections in elf_bpf file. Section names @@ -216,47 +224,4 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) -#ifdef BPF_NO_GLOBAL_DATA -#define BPF_PRINTK_FMT_MOD -#else -#define BPF_PRINTK_FMT_MOD static const -#endif - -#define __bpf_printk(fmt, ...) \ -({ \ - BPF_PRINTK_FMT_MOD char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - -/* - * __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments - * instead of an array of u64. - */ -#define __bpf_vprintk(fmt, args...) \ -({ \ - static const char ___fmt[] = fmt; \ - unsigned long long ___param[___bpf_narg(args)]; \ - \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - ___bpf_fill(___param, args); \ - _Pragma("GCC diagnostic pop") \ - \ - bpf_trace_vprintk(___fmt, sizeof(___fmt), \ - ___param, sizeof(___param)); \ -}) - -/* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args - * Otherwise use __bpf_vprintk - */ -#define ___bpf_pick_printk(...) \ - ___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ - __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ - __bpf_vprintk, __bpf_vprintk, __bpf_printk /*3*/, __bpf_printk /*2*/,\ - __bpf_printk /*1*/, __bpf_printk /*0*/) - -/* Helper macro to print out debug messages */ -#define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args) - #endif diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 90f56b0f58..d6bfbe0092 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -24,9 +24,6 @@ #elif defined(__TARGET_ARCH_sparc) #define bpf_target_sparc #define bpf_target_defined -#elif defined(__TARGET_ARCH_riscv) - #define bpf_target_riscv - #define bpf_target_defined #else /* Fall back to what the compiler says */ @@ -51,9 +48,6 @@ #elif defined(__sparc__) #define bpf_target_sparc #define bpf_target_defined -#elif defined(__riscv) && __riscv_xlen == 64 - #define bpf_target_riscv - #define bpf_target_defined #endif /* no compiler target */ #endif @@ -66,204 +60,251 @@ #if defined(__KERNEL__) || defined(__VMLINUX_H__) -#define __PT_PARM1_REG di -#define __PT_PARM2_REG si -#define __PT_PARM3_REG dx -#define __PT_PARM4_REG cx -#define __PT_PARM5_REG r8 -#define __PT_RET_REG sp -#define __PT_FP_REG bp -#define __PT_RC_REG ax -#define __PT_SP_REG sp -#define __PT_IP_REG ip +#define PT_REGS_PARM1(x) ((x)->di) +#define PT_REGS_PARM2(x) ((x)->si) +#define PT_REGS_PARM3(x) ((x)->dx) +#define PT_REGS_PARM4(x) ((x)->cx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->sp) +#define PT_REGS_FP(x) ((x)->bp) +#define PT_REGS_RC(x) ((x)->ax) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->ip) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip) #else #ifdef __i386__ - -#define __PT_PARM1_REG eax -#define __PT_PARM2_REG edx -#define __PT_PARM3_REG ecx /* i386 kernel is built with -mregparm=3 */ -#define __PT_PARM4_REG __unsupported__ -#define __PT_PARM5_REG __unsupported__ -#define __PT_RET_REG esp -#define __PT_FP_REG ebp -#define __PT_RC_REG eax -#define __PT_SP_REG esp -#define __PT_IP_REG eip +#define PT_REGS_PARM1(x) ((x)->eax) +#define PT_REGS_PARM2(x) ((x)->edx) +#define PT_REGS_PARM3(x) ((x)->ecx) +#define PT_REGS_PARM4(x) 0 +#define PT_REGS_PARM5(x) 0 +#define PT_REGS_RET(x) ((x)->esp) +#define PT_REGS_FP(x) ((x)->ebp) +#define PT_REGS_RC(x) ((x)->eax) +#define PT_REGS_SP(x) ((x)->esp) +#define PT_REGS_IP(x) ((x)->eip) -#else /* __i386__ */ +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx) +#define PT_REGS_PARM4_CORE(x) 0 +#define PT_REGS_PARM5_CORE(x) 0 +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip) -#define __PT_PARM1_REG rdi -#define __PT_PARM2_REG rsi -#define __PT_PARM3_REG rdx -#define __PT_PARM4_REG rcx -#define __PT_PARM5_REG r8 -#define __PT_RET_REG rsp -#define __PT_FP_REG rbp -#define __PT_RC_REG rax -#define __PT_SP_REG rsp -#define __PT_IP_REG rip +#else -#endif /* __i386__ */ +#define PT_REGS_PARM1(x) ((x)->rdi) +#define PT_REGS_PARM2(x) ((x)->rsi) +#define PT_REGS_PARM3(x) ((x)->rdx) +#define PT_REGS_PARM4(x) ((x)->rcx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->rsp) +#define PT_REGS_FP(x) ((x)->rbp) +#define PT_REGS_RC(x) ((x)->rax) +#define PT_REGS_SP(x) ((x)->rsp) +#define PT_REGS_IP(x) ((x)->rip) -#endif /* __KERNEL__ || __VMLINUX_H__ */ +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip) + +#endif +#endif #elif defined(bpf_target_s390) /* s390 provides user_pt_regs instead of struct pt_regs to userspace */ -#define __PT_REGS_CAST(x) ((const user_pt_regs *)(x)) -#define __PT_PARM1_REG gprs[2] -#define __PT_PARM2_REG gprs[3] -#define __PT_PARM3_REG gprs[4] -#define __PT_PARM4_REG gprs[5] -#define __PT_PARM5_REG gprs[6] -#define __PT_RET_REG grps[14] -#define __PT_FP_REG gprs[11] /* Works only with CONFIG_FRAME_POINTER */ -#define __PT_RC_REG gprs[2] -#define __PT_SP_REG gprs[15] -#define __PT_IP_REG psw.addr +struct pt_regs; +#define PT_REGS_S390 const volatile user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) +#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) +#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) +#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) +#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) +#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) +#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr) #elif defined(bpf_target_arm) -#define __PT_PARM1_REG uregs[0] -#define __PT_PARM2_REG uregs[1] -#define __PT_PARM3_REG uregs[2] -#define __PT_PARM4_REG uregs[3] -#define __PT_PARM5_REG uregs[4] -#define __PT_RET_REG uregs[14] -#define __PT_FP_REG uregs[11] /* Works only with CONFIG_FRAME_POINTER */ -#define __PT_RC_REG uregs[0] -#define __PT_SP_REG uregs[13] -#define __PT_IP_REG uregs[12] +#define PT_REGS_PARM1(x) ((x)->uregs[0]) +#define PT_REGS_PARM2(x) ((x)->uregs[1]) +#define PT_REGS_PARM3(x) ((x)->uregs[2]) +#define PT_REGS_PARM4(x) ((x)->uregs[3]) +#define PT_REGS_PARM5(x) ((x)->uregs[4]) +#define PT_REGS_RET(x) ((x)->uregs[14]) +#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->uregs[0]) +#define PT_REGS_SP(x) ((x)->uregs[13]) +#define PT_REGS_IP(x) ((x)->uregs[12]) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12]) #elif defined(bpf_target_arm64) /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ -#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) -#define __PT_PARM1_REG regs[0] -#define __PT_PARM2_REG regs[1] -#define __PT_PARM3_REG regs[2] -#define __PT_PARM4_REG regs[3] -#define __PT_PARM5_REG regs[4] -#define __PT_RET_REG regs[30] -#define __PT_FP_REG regs[29] /* Works only with CONFIG_FRAME_POINTER */ -#define __PT_RC_REG regs[0] -#define __PT_SP_REG sp -#define __PT_IP_REG pc +struct pt_regs; +#define PT_REGS_ARM64 const volatile struct user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) +#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) +#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) +#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) +#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) +#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) +#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc) #elif defined(bpf_target_mips) -#define __PT_PARM1_REG regs[4] -#define __PT_PARM2_REG regs[5] -#define __PT_PARM3_REG regs[6] -#define __PT_PARM4_REG regs[7] -#define __PT_PARM5_REG regs[8] -#define __PT_RET_REG regs[31] -#define __PT_FP_REG regs[30] /* Works only with CONFIG_FRAME_POINTER */ -#define __PT_RC_REG regs[2] -#define __PT_SP_REG regs[29] -#define __PT_IP_REG cp0_epc +#define PT_REGS_PARM1(x) ((x)->regs[4]) +#define PT_REGS_PARM2(x) ((x)->regs[5]) +#define PT_REGS_PARM3(x) ((x)->regs[6]) +#define PT_REGS_PARM4(x) ((x)->regs[7]) +#define PT_REGS_PARM5(x) ((x)->regs[8]) +#define PT_REGS_RET(x) ((x)->regs[31]) +#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->regs[2]) +#define PT_REGS_SP(x) ((x)->regs[29]) +#define PT_REGS_IP(x) ((x)->cp0_epc) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc) #elif defined(bpf_target_powerpc) -#define __PT_PARM1_REG gpr[3] -#define __PT_PARM2_REG gpr[4] -#define __PT_PARM3_REG gpr[5] -#define __PT_PARM4_REG gpr[6] -#define __PT_PARM5_REG gpr[7] -#define __PT_RET_REG regs[31] -#define __PT_FP_REG __unsupported__ -#define __PT_RC_REG gpr[3] -#define __PT_SP_REG sp -#define __PT_IP_REG nip +#define PT_REGS_PARM1(x) ((x)->gpr[3]) +#define PT_REGS_PARM2(x) ((x)->gpr[4]) +#define PT_REGS_PARM3(x) ((x)->gpr[5]) +#define PT_REGS_PARM4(x) ((x)->gpr[6]) +#define PT_REGS_PARM5(x) ((x)->gpr[7]) +#define PT_REGS_RC(x) ((x)->gpr[3]) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->nip) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip) #elif defined(bpf_target_sparc) -#define __PT_PARM1_REG u_regs[UREG_I0] -#define __PT_PARM2_REG u_regs[UREG_I1] -#define __PT_PARM3_REG u_regs[UREG_I2] -#define __PT_PARM4_REG u_regs[UREG_I3] -#define __PT_PARM5_REG u_regs[UREG_I4] -#define __PT_RET_REG u_regs[UREG_I7] -#define __PT_FP_REG __unsupported__ -#define __PT_RC_REG u_regs[UREG_I0] -#define __PT_SP_REG u_regs[UREG_FP] +#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) +#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) +#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) +#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) +#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) +#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP]) + /* Should this also be a bpf_target check for the sparc case? */ #if defined(__arch64__) -#define __PT_IP_REG tpc +#define PT_REGS_IP(x) ((x)->tpc) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc) #else -#define __PT_IP_REG pc +#define PT_REGS_IP(x) ((x)->pc) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) #endif -#elif defined(bpf_target_riscv) - -#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) -#define __PT_PARM1_REG a0 -#define __PT_PARM2_REG a1 -#define __PT_PARM3_REG a2 -#define __PT_PARM4_REG a3 -#define __PT_PARM5_REG a4 -#define __PT_RET_REG ra -#define __PT_FP_REG fp -#define __PT_RC_REG a5 -#define __PT_SP_REG sp -#define __PT_IP_REG epc - #endif -#if defined(bpf_target_defined) - -struct pt_regs; - -/* allow some architecutres to override `struct pt_regs` */ -#ifndef __PT_REGS_CAST -#define __PT_REGS_CAST(x) (x) -#endif - -#define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG) -#define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG) -#define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG) -#define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG) -#define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG) -#define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG) -#define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG) -#define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG) -#define PT_REGS_SP(x) (__PT_REGS_CAST(x)->__PT_SP_REG) -#define PT_REGS_IP(x) (__PT_REGS_CAST(x)->__PT_IP_REG) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_REG) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_REG) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_SP_REG) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_IP_REG) - #if defined(bpf_target_powerpc) - #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP - #elif defined(bpf_target_sparc) - #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP - -#else - +#elif defined(bpf_target_defined) #define BPF_KPROBE_READ_RET_IP(ip, ctx) \ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ - ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) - + ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \ + (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) #endif -#else /* defined(bpf_target_defined) */ +#if !defined(bpf_target_defined) #define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) @@ -290,7 +331,7 @@ struct pt_regs; #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) -#endif /* defined(bpf_target_defined) */ +#endif /* !defined(bpf_target_defined) */ #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b @@ -302,23 +343,25 @@ struct pt_regs; #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N #endif #ifndef ___bpf_narg -#define ___bpf_narg(...) ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define ___bpf_narg(...) \ + ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) #endif -#define ___bpf_ctx_cast0() ctx -#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] -#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] -#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] -#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] -#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] -#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] -#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] -#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] -#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] +#define ___bpf_ctx_cast0() ctx +#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] +#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] +#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] +#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] +#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] +#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] +#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] +#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] +#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] #define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] #define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] #define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] -#define ___bpf_ctx_cast(args...) ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) +#define ___bpf_ctx_cast(args...) \ + ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) /* * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and @@ -351,13 +394,19 @@ ____##name(unsigned long long *ctx, ##args) struct pt_regs; -#define ___bpf_kprobe_args0() ctx -#define ___bpf_kprobe_args1(x) ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) -#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) -#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) -#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) -#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) -#define ___bpf_kprobe_args(args...) ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) +#define ___bpf_kprobe_args0() ctx +#define ___bpf_kprobe_args1(x) \ + ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) +#define ___bpf_kprobe_args2(x, args...) \ + ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) +#define ___bpf_kprobe_args3(x, args...) \ + ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) +#define ___bpf_kprobe_args4(x, args...) \ + ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) +#define ___bpf_kprobe_args5(x, args...) \ + ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args(args...) \ + ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) /* * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for @@ -383,9 +432,11 @@ typeof(name(0)) name(struct pt_regs *ctx) \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) -#define ___bpf_kretprobe_args0() ctx -#define ___bpf_kretprobe_args1(x) ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) -#define ___bpf_kretprobe_args(args...) ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) +#define ___bpf_kretprobe_args0() ctx +#define ___bpf_kretprobe_args1(x) \ + ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) +#define ___bpf_kretprobe_args(args...) \ + ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) /* * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 9aa19c89f7..5f3d20ae66 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -57,7 +57,7 @@ struct btf { * representation is broken up into three independently allocated * memory regions to be able to modify them independently. * raw_data is nulled out at that point, but can be later allocated - * and cached again if user calls btf__raw_data(), at which point + * and cached again if user calls btf__get_raw_data(), at which point * raw_data will contain a contiguous copy of header, types, and * strings: * @@ -189,17 +189,12 @@ int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_ return 0; } -static void *btf_add_type_offs_mem(struct btf *btf, size_t add_cnt) -{ - return libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), - btf->nr_types, BTF_MAX_NR_TYPES, add_cnt); -} - static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) { __u32 *p; - p = btf_add_type_offs_mem(btf, 1); + p = libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), + btf->nr_types, BTF_MAX_NR_TYPES, 1); if (!p) return -ENOMEM; @@ -299,7 +294,6 @@ static int btf_type_size(const struct btf_type *t) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: - case BTF_KIND_TYPE_TAG: return base_size; case BTF_KIND_INT: return base_size + sizeof(__u32); @@ -316,8 +310,6 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(struct btf_var); case BTF_KIND_DATASEC: return base_size + vlen * sizeof(struct btf_var_secinfo); - case BTF_KIND_DECL_TAG: - return base_size + sizeof(struct btf_decl_tag); default: pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); return -EINVAL; @@ -350,7 +342,6 @@ static int btf_bswap_type_rest(struct btf_type *t) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: - case BTF_KIND_TYPE_TAG: return 0; case BTF_KIND_INT: *(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1)); @@ -391,9 +382,6 @@ static int btf_bswap_type_rest(struct btf_type *t) v->size = bswap_32(v->size); } return 0; - case BTF_KIND_DECL_TAG: - btf_decl_tag(t)->component_idx = bswap_32(btf_decl_tag(t)->component_idx); - return 0; default: pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); return -EINVAL; @@ -443,18 +431,13 @@ __u32 btf__get_nr_types(const struct btf *btf) return btf->start_id + btf->nr_types - 1; } -__u32 btf__type_cnt(const struct btf *btf) -{ - return btf->start_id + btf->nr_types; -} - const struct btf *btf__base_btf(const struct btf *btf) { return btf->base_btf; } /* internal helper returning non-const pointer to a type */ -struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id) +struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id) { if (type_id == 0) return &btf_void; @@ -479,8 +462,8 @@ static int determine_ptr_size(const struct btf *btf) if (btf->base_btf && btf->base_btf->ptr_sz > 0) return btf->base_btf->ptr_sz; - n = btf__type_cnt(btf); - for (i = 1; i < n; i++) { + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_int(t)) continue; @@ -540,9 +523,9 @@ int btf__set_pointer_size(struct btf *btf, size_t ptr_sz) static bool is_host_big_endian(void) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN return false; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN return true; #else # error "Unrecognized __BYTE_ORDER__" @@ -609,8 +592,6 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_VAR: - case BTF_KIND_DECL_TAG: - case BTF_KIND_TYPE_TAG: type_id = t->type; break; case BTF_KIND_ARRAY: @@ -652,7 +633,6 @@ int btf__align_of(const struct btf *btf, __u32 id) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: - case BTF_KIND_TYPE_TAG: return btf__align_of(btf, t->type); case BTF_KIND_ARRAY: return btf__align_of(btf, btf_array(t)->type); @@ -699,12 +679,12 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id) __s32 btf__find_by_name(const struct btf *btf, const char *type_name) { - __u32 i, nr_types = btf__type_cnt(btf); + __u32 i, nr_types = btf__get_nr_types(btf); if (!strcmp(type_name, "void")) return 0; - for (i = 1; i < nr_types; i++) { + for (i = 1; i <= nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name = btf__name_by_offset(btf, t->name_off); @@ -715,15 +695,15 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name) return libbpf_err(-ENOENT); } -static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id, - const char *type_name, __u32 kind) +__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, + __u32 kind) { - __u32 i, nr_types = btf__type_cnt(btf); + __u32 i, nr_types = btf__get_nr_types(btf); if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) return 0; - for (i = start_id; i < nr_types; i++) { + for (i = 1; i <= nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name; @@ -737,18 +717,6 @@ static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id, return libbpf_err(-ENOENT); } -__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, - __u32 kind) -{ - return btf_find_by_name_kind(btf, btf->start_id, type_name, kind); -} - -__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, - __u32 kind) -{ - return btf_find_by_name_kind(btf, 1, type_name, kind); -} - static bool btf_is_modifiable(const struct btf *btf) { return (void *)btf->hdr != btf->raw_data; @@ -796,7 +764,7 @@ static struct btf *btf_new_empty(struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; - btf->start_id = btf__type_cnt(base_btf); + btf->start_id = btf__get_nr_types(base_btf) + 1; btf->start_str_off = base_btf->hdr->str_len; } @@ -846,7 +814,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; - btf->start_id = btf__type_cnt(base_btf); + btf->start_id = btf__get_nr_types(base_btf) + 1; btf->start_str_off = base_btf->hdr->str_len; } @@ -901,7 +869,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, return ERR_PTR(-LIBBPF_ERRNO__LIBELF); } - fd = open(path, O_RDONLY | O_CLOEXEC); + fd = open(path, O_RDONLY); if (fd < 0) { err = -errno; pr_warn("failed to open %s: %s\n", path, strerror(errno)); @@ -1122,88 +1090,149 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) return libbpf_ptr(btf_parse(path, base_btf, NULL)); } +static int compare_vsi_off(const void *_a, const void *_b) +{ + const struct btf_var_secinfo *a = _a; + const struct btf_var_secinfo *b = _b; + + return a->offset - b->offset; +} + +static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, + struct btf_type *t) +{ + __u32 size = 0, off = 0, i, vars = btf_vlen(t); + const char *name = btf__name_by_offset(btf, t->name_off); + const struct btf_type *t_var; + struct btf_var_secinfo *vsi; + const struct btf_var *var; + int ret; + + if (!name) { + pr_debug("No name found in string section for DATASEC kind.\n"); + return -ENOENT; + } + + /* .extern datasec size and var offsets were set correctly during + * extern collection step, so just skip straight to sorting variables + */ + if (t->size) + goto sort_vars; + + ret = bpf_object__section_size(obj, name, &size); + if (ret || !size || (t->size && t->size != size)) { + pr_debug("Invalid size for section %s: %u bytes\n", name, size); + return -ENOENT; + } + + t->size = size; + + for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { + t_var = btf__type_by_id(btf, vsi->type); + var = btf_var(t_var); + + if (!btf_is_var(t_var)) { + pr_debug("Non-VAR type seen in section %s\n", name); + return -EINVAL; + } + + if (var->linkage == BTF_VAR_STATIC) + continue; + + name = btf__name_by_offset(btf, t_var->name_off); + if (!name) { + pr_debug("No name found in string section for VAR kind\n"); + return -ENOENT; + } + + ret = bpf_object__variable_offset(obj, name, &off); + if (ret) { + pr_debug("No offset found in symbol table for VAR %s\n", + name); + return -ENOENT; + } + + vsi->offset = off; + } + +sort_vars: + qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); + return 0; +} + +int btf__finalize_data(struct bpf_object *obj, struct btf *btf) +{ + int err = 0; + __u32 i; + + for (i = 1; i <= btf->nr_types; i++) { + struct btf_type *t = btf_type_by_id(btf, i); + + /* Loader needs to fix up some of the things compiler + * couldn't get its hands on while emitting BTF. This + * is section size and global variable offset. We use + * the info from the ELF itself for this purpose. + */ + if (btf_is_datasec(t)) { + err = btf_fixup_datasec(obj, btf, t); + if (err) + break; + } + } + + return libbpf_err(err); +} + static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) +int btf__load_into_kernel(struct btf *btf) { - LIBBPF_OPTS(bpf_btf_load_opts, opts); - __u32 buf_sz = 0, raw_size; - char *buf = NULL, *tmp; + __u32 log_buf_size = 0, raw_size; + char *log_buf = NULL; void *raw_data; int err = 0; if (btf->fd >= 0) return libbpf_err(-EEXIST); - if (log_sz && !log_buf) - return libbpf_err(-EINVAL); - /* cache native raw data representation */ +retry_load: + if (log_buf_size) { + log_buf = malloc(log_buf_size); + if (!log_buf) + return libbpf_err(-ENOMEM); + + *log_buf = 0; + } + raw_data = btf_get_raw_data(btf, &raw_size, false); if (!raw_data) { err = -ENOMEM; goto done; } + /* cache native raw data representation */ btf->raw_size = raw_size; btf->raw_data = raw_data; -retry_load: - /* if log_level is 0, we won't provide log_buf/log_size to the kernel, - * initially. Only if BTF loading fails, we bump log_level to 1 and - * retry, using either auto-allocated or custom log_buf. This way - * non-NULL custom log_buf provides a buffer just in case, but hopes - * for successful load and no need for log_buf. - */ - if (log_level) { - /* if caller didn't provide custom log_buf, we'll keep - * allocating our own progressively bigger buffers for BTF - * verification log - */ - if (!log_buf) { - buf_sz = max((__u32)BPF_LOG_BUF_SIZE, buf_sz * 2); - tmp = realloc(buf, buf_sz); - if (!tmp) { - err = -ENOMEM; - goto done; - } - buf = tmp; - buf[0] = '\0'; - } - - opts.log_buf = log_buf ? log_buf : buf; - opts.log_size = log_buf ? log_sz : buf_sz; - opts.log_level = log_level; - } - - btf->fd = bpf_btf_load(raw_data, raw_size, &opts); + btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false); if (btf->fd < 0) { - /* time to turn on verbose mode and try again */ - if (log_level == 0) { - log_level = 1; + if (!log_buf || errno == ENOSPC) { + log_buf_size = max((__u32)BPF_LOG_BUF_SIZE, + log_buf_size << 1); + free(log_buf); goto retry_load; } - /* only retry if caller didn't provide custom log_buf, but - * make sure we can never overflow buf_sz - */ - if (!log_buf && errno == ENOSPC && buf_sz <= UINT_MAX / 2) - goto retry_load; err = -errno; - pr_warn("BTF loading error: %d\n", err); - /* don't print out contents of custom log_buf */ - if (!log_buf && buf[0]) - pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf); + pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno); + if (*log_buf) + pr_warn("%s\n", log_buf); + goto done; } done: - free(buf); + free(log_buf); return libbpf_err(err); } - -int btf__load_into_kernel(struct btf *btf) -{ - return btf_load_into_kernel(btf, NULL, 0, 0); -} - int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); int btf__fd(const struct btf *btf) @@ -1271,7 +1300,7 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi return NULL; } -const void *btf__raw_data(const struct btf *btf_ro, __u32 *size) +const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) { struct btf *btf = (struct btf *)btf_ro; __u32 data_sz; @@ -1279,7 +1308,7 @@ const void *btf__raw_data(const struct btf *btf_ro, __u32 *size) data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian); if (!data) - return errno = ENOMEM, NULL; + return errno = -ENOMEM, NULL; btf->raw_size = data_sz; if (btf->swapped_endian) @@ -1290,9 +1319,6 @@ const void *btf__raw_data(const struct btf *btf_ro, __u32 *size) return data; } -__attribute__((alias("btf__raw_data"))) -const void *btf__get_raw_data(const struct btf *btf, __u32 *size); - const char *btf__str_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->start_str_off) @@ -1665,111 +1691,6 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t return btf_commit_type(btf, sz); } -static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) -{ - struct btf *btf = ctx; - - if (!*type_id) /* nothing to do for VOID references */ - return 0; - - /* we haven't updated btf's type count yet, so - * btf->start_id + btf->nr_types - 1 is the type ID offset we should - * add to all newly added BTF types - */ - *type_id += btf->start_id + btf->nr_types - 1; - return 0; -} - -int btf__add_btf(struct btf *btf, const struct btf *src_btf) -{ - struct btf_pipe p = { .src = src_btf, .dst = btf }; - int data_sz, sz, cnt, i, err, old_strs_len; - __u32 *off; - void *t; - - /* appending split BTF isn't supported yet */ - if (src_btf->base_btf) - return libbpf_err(-ENOTSUP); - - /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); - - /* remember original strings section size if we have to roll back - * partial strings section changes - */ - old_strs_len = btf->hdr->str_len; - - data_sz = src_btf->hdr->type_len; - cnt = btf__type_cnt(src_btf) - 1; - - /* pre-allocate enough memory for new types */ - t = btf_add_type_mem(btf, data_sz); - if (!t) - return libbpf_err(-ENOMEM); - - /* pre-allocate enough memory for type offset index for new types */ - off = btf_add_type_offs_mem(btf, cnt); - if (!off) - return libbpf_err(-ENOMEM); - - /* bulk copy types data for all types from src_btf */ - memcpy(t, src_btf->types_data, data_sz); - - for (i = 0; i < cnt; i++) { - sz = btf_type_size(t); - if (sz < 0) { - /* unlikely, has to be corrupted src_btf */ - err = sz; - goto err_out; - } - - /* fill out type ID to type offset mapping for lookups by type ID */ - *off = t - btf->types_data; - - /* add, dedup, and remap strings referenced by this BTF type */ - err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); - if (err) - goto err_out; - - /* remap all type IDs referenced from this BTF type */ - err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf); - if (err) - goto err_out; - - /* go to next type data and type offset index entry */ - t += sz; - off++; - } - - /* Up until now any of the copied type data was effectively invisible, - * so if we exited early before this point due to error, BTF would be - * effectively unmodified. There would be extra internal memory - * pre-allocated, but it would not be available for querying. But now - * that we've copied and rewritten all the data successfully, we can - * update type count and various internal offsets and sizes to - * "commit" the changes and made them visible to the outside world. - */ - btf->hdr->type_len += data_sz; - btf->hdr->str_off += data_sz; - btf->nr_types += cnt; - - /* return type ID of the first added BTF type */ - return btf->start_id + btf->nr_types - cnt; -err_out: - /* zero out preallocated memory as if it was just allocated with - * libbpf_add_mem() - */ - memset(btf->types_data + btf->hdr->type_len, 0, data_sz); - memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len); - - /* and now restore original strings section size; types data size - * wasn't modified, so doesn't need restoring, see big comment above */ - btf->hdr->str_len = old_strs_len; - - return libbpf_err(err); -} - /* * Append new BTF_KIND_INT type with: * - *name* - non-empty, non-NULL type name; @@ -2018,7 +1939,7 @@ int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz) static struct btf_type *btf_last_type(struct btf *btf) { - return btf_type_by_id(btf, btf__type_cnt(btf) - 1); + return btf_type_by_id(btf, btf__get_nr_types(btf)); } /* @@ -2271,22 +2192,6 @@ int btf__add_restrict(struct btf *btf, int ref_type_id) return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id); } -/* - * Append new BTF_KIND_TYPE_TAG type with: - * - *value*, non-empty/non-NULL tag value; - * - *ref_type_id* - referenced type ID, it might not exist yet; - * Returns: - * - >0, type ID of newly added BTF type; - * - <0, on error. - */ -int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) -{ - if (!value|| !value[0]) - return libbpf_err(-EINVAL); - - return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); -} - /* * Append new BTF_KIND_FUNC type with: * - *name*, non-empty/non-NULL name; @@ -2541,48 +2446,6 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ return 0; } -/* - * Append new BTF_KIND_DECL_TAG type with: - * - *value* - non-empty/non-NULL string; - * - *ref_type_id* - referenced type ID, it might not exist yet; - * - *component_idx* - -1 for tagging reference type, otherwise struct/union - * member or function argument index; - * Returns: - * - >0, type ID of newly added BTF type; - * - <0, on error. - */ -int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, - int component_idx) -{ - struct btf_type *t; - int sz, value_off; - - if (!value || !value[0] || component_idx < -1) - return libbpf_err(-EINVAL); - - if (validate_type_id(ref_type_id)) - return libbpf_err(-EINVAL); - - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); - - sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag); - t = btf_add_type_mem(btf, sz); - if (!t) - return libbpf_err(-ENOMEM); - - value_off = btf__add_str(btf, value); - if (value_off < 0) - return value_off; - - t->name_off = value_off; - t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false); - t->type = ref_type_id; - btf_decl_tag(t)->component_idx = component_idx; - - return btf_commit_type(btf, sz); -} - struct btf_ext_sec_setup_param { __u32 off; __u32 len; @@ -2898,7 +2761,8 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) struct btf_dedup; -static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts); +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); static void btf_dedup_free(struct btf_dedup *d); static int btf_dedup_prep(struct btf_dedup *d); static int btf_dedup_strings(struct btf_dedup *d); @@ -3045,17 +2909,12 @@ static int btf_dedup_remap_types(struct btf_dedup *d); * deduplicating structs/unions is described in greater details in comments for * `btf_dedup_is_equiv` function. */ - -DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0) -int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts) +int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) { - struct btf_dedup *d; + struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); int err; - if (!OPTS_VALID(opts, btf_dedup_opts)) - return libbpf_err(-EINVAL); - - d = btf_dedup_new(btf, opts); if (IS_ERR(d)) { pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); return libbpf_err(-EINVAL); @@ -3107,19 +2966,6 @@ int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts) return libbpf_err(err); } -COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2) -int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts) -{ - LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext); - - if (unused_opts) { - pr_warn("please use new version of btf__dedup() that supports options\n"); - return libbpf_err(-ENOTSUP); - } - - return btf__dedup(btf, &opts); -} - #define BTF_UNPROCESSED_ID ((__u32)-1) #define BTF_IN_PROGRESS_ID ((__u32)-2) @@ -3232,7 +3078,8 @@ static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx) return k1 == k2; } -static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts) +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) { struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; @@ -3241,11 +3088,13 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_o if (!d) return ERR_PTR(-ENOMEM); - if (OPTS_GET(opts, force_collisions, false)) + d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; + /* dedup_table_size is now used only to force collisions in tests */ + if (opts && opts->dedup_table_size == 1) hash_fn = btf_dedup_collision_hash_fn; d->btf = btf; - d->btf_ext = OPTS_GET(opts, btf_ext, NULL); + d->btf_ext = btf_ext; d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL); if (IS_ERR(d->dedup_table)) { @@ -3254,7 +3103,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_o goto done; } - type_cnt = btf__type_cnt(btf); + type_cnt = btf__get_nr_types(btf) + 1; d->map = malloc(sizeof(__u32) * type_cnt); if (!d->map) { err = -ENOMEM; @@ -3415,8 +3264,8 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) t1->size == t2->size; } -/* Calculate type signature hash of INT or TAG. */ -static long btf_hash_int_decl_tag(struct btf_type *t) +/* Calculate type signature hash of INT. */ +static long btf_hash_int(struct btf_type *t) { __u32 info = *(__u32 *)(t + 1); long h; @@ -3426,8 +3275,8 @@ static long btf_hash_int_decl_tag(struct btf_type *t) return h; } -/* Check structural equality of two INTs or TAGs. */ -static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2) +/* Check structural equality of two INTs. */ +static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) { __u32 info1, info2; @@ -3691,12 +3540,10 @@ static int btf_dedup_prep(struct btf_dedup *d) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: - case BTF_KIND_TYPE_TAG: h = btf_hash_common(t); break; case BTF_KIND_INT: - case BTF_KIND_DECL_TAG: - h = btf_hash_int_decl_tag(t); + h = btf_hash_int(t); break; case BTF_KIND_ENUM: h = btf_hash_enum(t); @@ -3751,16 +3598,14 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_FUNC_PROTO: case BTF_KIND_VAR: case BTF_KIND_DATASEC: - case BTF_KIND_DECL_TAG: - case BTF_KIND_TYPE_TAG: return 0; case BTF_KIND_INT: - h = btf_hash_int_decl_tag(t); + h = btf_hash_int(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); - if (btf_equal_int_tag(t, cand)) { + if (btf_equal_int(t, cand)) { new_id = cand_id; break; } @@ -3776,6 +3621,8 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) new_id = cand_id; break; } + if (d->opts.dont_resolve_fwds) + continue; if (btf_compat_enum(t, cand)) { if (btf_is_enum_fwd(t)) { /* resolve fwd to full enum */ @@ -4055,7 +3902,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return 0; /* FWD <--> STRUCT/UNION equivalence check, if enabled */ - if ((cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) + if (!d->opts.dont_resolve_fwds + && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) && cand_kind != canon_kind) { __u16 real_kind; __u16 fwd_kind; @@ -4078,10 +3926,13 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, switch (cand_kind) { case BTF_KIND_INT: - return btf_equal_int_tag(cand_type, canon_type); + return btf_equal_int(cand_type, canon_type); case BTF_KIND_ENUM: - return btf_compat_enum(cand_type, canon_type); + if (d->opts.dont_resolve_fwds) + return btf_equal_enum(cand_type, canon_type); + else + return btf_compat_enum(cand_type, canon_type); case BTF_KIND_FWD: case BTF_KIND_FLOAT: @@ -4093,7 +3944,6 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: - case BTF_KIND_TYPE_TAG: if (cand_type->info != canon_type->info) return 0; return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); @@ -4389,7 +4239,6 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: - case BTF_KIND_TYPE_TAG: ref_type_id = btf_dedup_ref_type(d, t->type); if (ref_type_id < 0) return ref_type_id; @@ -4406,23 +4255,6 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) } break; - case BTF_KIND_DECL_TAG: - ref_type_id = btf_dedup_ref_type(d, t->type); - if (ref_type_id < 0) - return ref_type_id; - t->type = ref_type_id; - - h = btf_hash_int_decl_tag(t); - for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; - cand = btf_type_by_id(d->btf, cand_id); - if (btf_equal_int_tag(t, cand)) { - new_id = cand_id; - break; - } - } - break; - case BTF_KIND_ARRAY: { struct btf_array *info = btf_array(t); @@ -4695,8 +4527,6 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_VAR: - case BTF_KIND_DECL_TAG: - case BTF_KIND_TYPE_TAG: return visit(&t->type, ctx); case BTF_KIND_ARRAY: { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 061839f045..b0ee338a0c 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* Copyright (c) 2018 Facebook */ -/*! \file */ #ifndef __LIBBPF_BTF_H #define __LIBBPF_BTF_H @@ -31,80 +30,11 @@ enum btf_endianness { BTF_BIG_ENDIAN = 1, }; -/** - * @brief **btf__free()** frees all data of a BTF object - * @param btf BTF object to free - */ LIBBPF_API void btf__free(struct btf *btf); -/** - * @brief **btf__new()** creates a new instance of a BTF object from the raw - * bytes of an ELF's BTF section - * @param data raw bytes - * @param size number of bytes passed in `data` - * @return new BTF object instance which has to be eventually freed with - * **btf__free()** - * - * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract - * error code from such a pointer `libbpf_get_error()` should be used. If - * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is - * returned on error instead. In both cases thread-local `errno` variable is - * always set to error code as well. - */ LIBBPF_API struct btf *btf__new(const void *data, __u32 size); - -/** - * @brief **btf__new_split()** create a new instance of a BTF object from the - * provided raw data bytes. It takes another BTF instance, **base_btf**, which - * serves as a base BTF, which is extended by types in a newly created BTF - * instance - * @param data raw bytes - * @param size length of raw bytes - * @param base_btf the base BTF object - * @return new BTF object instance which has to be eventually freed with - * **btf__free()** - * - * If *base_btf* is NULL, `btf__new_split()` is equivalent to `btf__new()` and - * creates non-split BTF. - * - * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract - * error code from such a pointer `libbpf_get_error()` should be used. If - * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is - * returned on error instead. In both cases thread-local `errno` variable is - * always set to error code as well. - */ LIBBPF_API struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf); - -/** - * @brief **btf__new_empty()** creates an empty BTF object. Use - * `btf__add_*()` to populate such BTF object. - * @return new BTF object instance which has to be eventually freed with - * **btf__free()** - * - * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract - * error code from such a pointer `libbpf_get_error()` should be used. If - * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is - * returned on error instead. In both cases thread-local `errno` variable is - * always set to error code as well. - */ LIBBPF_API struct btf *btf__new_empty(void); - -/** - * @brief **btf__new_empty_split()** creates an unpopulated BTF object from an - * ELF BTF section except with a base BTF on top of which split BTF should be - * based - * @return new BTF object instance which has to be eventually freed with - * **btf__free()** - * - * If *base_btf* is NULL, `btf__new_empty_split()` is equivalent to - * `btf__new_empty()` and creates non-split BTF. - * - * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract - * error code from such a pointer `libbpf_get_error()` should be used. If - * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is - * returned on error instead. In both cases thread-local `errno` variable is - * always set to error code as well. - */ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); @@ -120,21 +50,16 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void); LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); -LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead") LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); -LIBBPF_DEPRECATED_SINCE(0, 6, "intended for internal libbpf use only") LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); -LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead") LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API int btf__load_into_kernel(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, __u32 kind); -LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__type_cnt() instead; note that btf__get_nr_types() == btf__type_cnt() - 1") LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); -LIBBPF_API __u32 btf__type_cnt(const struct btf *btf); LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); @@ -147,9 +72,7 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API void btf__set_fd(struct btf *btf, int fd); -LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__raw_data() instead") LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); -LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, @@ -178,28 +101,6 @@ LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type); -/** - * @brief **btf__add_btf()** appends all the BTF types from *src_btf* into *btf* - * @param btf BTF object which all the BTF types and strings are added to - * @param src_btf BTF object which all BTF types and referenced strings are copied from - * @return BTF type ID of the first appended BTF type, or negative error code - * - * **btf__add_btf()** can be used to simply and efficiently append the entire - * contents of one BTF object to another one. All the BTF type data is copied - * over, all referenced type IDs are adjusted by adding a necessary ID offset. - * Only strings referenced from BTF types are copied over and deduplicated, so - * if there were some unused strings in *src_btf*, those won't be copied over, - * which is consistent with the general string deduplication semantics of BTF - * writing APIs. - * - * If any error is encountered during this process, the contents of *btf* is - * left intact, which means that **btf__add_btf()** follows the transactional - * semantics and the operation as a whole is all-or-nothing. - * - * *src_btf* has to be non-split BTF, as of now copying types from split BTF - * is not supported and will result in -ENOTSUP error code returned. - */ -LIBBPF_API int btf__add_btf(struct btf *btf, const struct btf *src_btf); LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding); LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz); @@ -227,7 +128,6 @@ LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_ LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); -LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id); /* func and func_proto construction APIs */ LIBBPF_API int btf__add_func(struct btf *btf, const char *name, @@ -241,91 +141,26 @@ LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz); -/* tag construction API */ -LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, - int component_idx); - struct btf_dedup_opts { - size_t sz; - /* optional .BTF.ext info to dedup along the main BTF info */ - struct btf_ext *btf_ext; - /* force hash collisions (used for testing) */ - bool force_collisions; - size_t :0; + unsigned int dedup_table_size; + bool dont_resolve_fwds; }; -#define btf_dedup_opts__last_field force_collisions -LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); - -LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts); - -LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead") -LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts); -#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__) -#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts) -#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts) +LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); struct btf_dump; struct btf_dump_opts { - union { - size_t sz; - void *ctx; /* DEPRECATED: will be gone in v1.0 */ - }; + void *ctx; }; typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args); LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf, - btf_dump_printf_fn_t printf_fn, - void *ctx, - const struct btf_dump_opts *opts); - -LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, - btf_dump_printf_fn_t printf_fn, - void *ctx, - const struct btf_dump_opts *opts); - -LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn); - -/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the - * type of 4th argument. If it's btf_dump's print callback, use deprecated - * API; otherwise, choose the new btf_dump__new(). ___libbpf_override() - * doesn't work here because both variants have 4 input arguments. - * - * (void *) casts are necessary to avoid compilation warnings about type - * mismatches, because even though __builtin_choose_expr() only ever evaluates - * one side the other side still has to satisfy type constraints (this is - * compiler implementation limitation which might be lifted eventually, - * according to the documentation). So passing struct btf_ext in place of - * btf_dump_printf_fn_t would be generating compilation warning. Casting to - * void * avoids this issue. - * - * Also, two type compatibility checks for a function and function pointer are - * required because passing function reference into btf_dump__new() as - * btf_dump__new(..., my_callback, ...) and as btf_dump__new(..., - * &my_callback, ...) (not explicit ampersand in the latter case) actually - * differs as far as __builtin_types_compatible_p() is concerned. Thus two - * checks are combined to detect callback argument. - * - * The rest works just like in case of ___libbpf_override() usage with symbol - * versioning. - * - * C++ compilers don't support __builtin_types_compatible_p(), so at least - * don't screw up compilation for them and let C++ users pick btf_dump__new - * vs btf_dump__new_deprecated explicitly. - */ -#ifndef __cplusplus -#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr( \ - __builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) || \ - __builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)), \ - btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4), \ - btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4)) -#endif - + const struct btf_ext *btf_ext, + const struct btf_dump_opts *opts, + btf_dump_printf_fn_t printf_fn); LIBBPF_API void btf_dump__free(struct btf_dump *d); LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); @@ -465,8 +300,7 @@ static inline bool btf_is_mod(const struct btf_type *t) return kind == BTF_KIND_VOLATILE || kind == BTF_KIND_CONST || - kind == BTF_KIND_RESTRICT || - kind == BTF_KIND_TYPE_TAG; + kind == BTF_KIND_RESTRICT; } static inline bool btf_is_func(const struct btf_type *t) @@ -494,16 +328,6 @@ static inline bool btf_is_float(const struct btf_type *t) return btf_kind(t) == BTF_KIND_FLOAT; } -static inline bool btf_is_decl_tag(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_DECL_TAG; -} - -static inline bool btf_is_type_tag(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_TYPE_TAG; -} - static inline __u8 btf_int_encoding(const struct btf_type *t) { return BTF_INT_ENCODING(*(__u32 *)(t + 1)); @@ -572,12 +396,6 @@ btf_var_secinfos(const struct btf_type *t) return (struct btf_var_secinfo *)(t + 1); } -struct btf_decl_tag; -static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t) -{ - return (struct btf_decl_tag *)(t + 1); -} - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index b9a3260c83..8c93258027 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -77,8 +77,9 @@ struct btf_dump_data { struct btf_dump { const struct btf *btf; + const struct btf_ext *btf_ext; btf_dump_printf_fn_t printf_fn; - void *cb_ctx; + struct btf_dump_opts opts; int ptr_sz; bool strip_mods; bool skip_anon_defs; @@ -137,32 +138,29 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) va_list args; va_start(args, fmt); - d->printf_fn(d->cb_ctx, fmt, args); + d->printf_fn(d->opts.ctx, fmt, args); va_end(args); } static int btf_dump_mark_referenced(struct btf_dump *d); static int btf_dump_resize(struct btf_dump *d); -DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0) -struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, - btf_dump_printf_fn_t printf_fn, - void *ctx, - const struct btf_dump_opts *opts) +struct btf_dump *btf_dump__new(const struct btf *btf, + const struct btf_ext *btf_ext, + const struct btf_dump_opts *opts, + btf_dump_printf_fn_t printf_fn) { struct btf_dump *d; int err; - if (!printf_fn) - return libbpf_err_ptr(-EINVAL); - d = calloc(1, sizeof(struct btf_dump)); if (!d) return libbpf_err_ptr(-ENOMEM); d->btf = btf; + d->btf_ext = btf_ext; d->printf_fn = printf_fn; - d->cb_ctx = ctx; + d->opts.ctx = opts ? opts->ctx : NULL; d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *); d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); @@ -188,20 +186,9 @@ struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, return libbpf_err_ptr(err); } -COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4) -struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn) -{ - if (!printf_fn) - return libbpf_err_ptr(-EINVAL); - return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts); -} - static int btf_dump_resize(struct btf_dump *d) { - int err, last_id = btf__type_cnt(d->btf) - 1; + int err, last_id = btf__get_nr_types(d->btf); if (last_id <= d->last_id) return 0; @@ -275,7 +262,7 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) { int err, i; - if (id >= btf__type_cnt(d->btf)) + if (id > btf__get_nr_types(d->btf)) return libbpf_err(-EINVAL); err = btf_dump_resize(d); @@ -307,11 +294,11 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) */ static int btf_dump_mark_referenced(struct btf_dump *d) { - int i, j, n = btf__type_cnt(d->btf); + int i, j, n = btf__get_nr_types(d->btf); const struct btf_type *t; __u16 vlen; - for (i = d->last_id + 1; i < n; i++) { + for (i = d->last_id + 1; i <= n; i++) { t = btf__type_by_id(d->btf, i); vlen = btf_vlen(t); @@ -329,8 +316,6 @@ static int btf_dump_mark_referenced(struct btf_dump *d) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_VAR: - case BTF_KIND_DECL_TAG: - case BTF_KIND_TYPE_TAG: d->type_states[t->type].referenced = 1; break; @@ -574,7 +559,6 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: - case BTF_KIND_TYPE_TAG: return btf_dump_order_type(d, t->type, through_ptr); case BTF_KIND_FUNC_PROTO: { @@ -599,7 +583,6 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) case BTF_KIND_FUNC: case BTF_KIND_VAR: case BTF_KIND_DATASEC: - case BTF_KIND_DECL_TAG: d->type_states[id].order_state = ORDERED; return 0; @@ -749,7 +732,6 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: - case BTF_KIND_TYPE_TAG: btf_dump_emit_type(d, t->type, cont_id); break; case BTF_KIND_ARRAY: @@ -1170,7 +1152,6 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_FUNC_PROTO: - case BTF_KIND_TYPE_TAG: id = t->type; break; case BTF_KIND_ARRAY: @@ -1339,11 +1320,6 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, case BTF_KIND_RESTRICT: btf_dump_printf(d, " restrict"); break; - case BTF_KIND_TYPE_TAG: - btf_dump_emit_mods(d, decls); - name = btf_name_of(d, t->name_off); - btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); - break; case BTF_KIND_ARRAY: { const struct btf_array *a = btf_array(t); const struct btf_type *next_t; @@ -1584,28 +1560,29 @@ static int btf_dump_get_bitfield_value(struct btf_dump *d, __u64 *value) { __u16 left_shift_bits, right_shift_bits; + __u8 nr_copy_bits, nr_copy_bytes; const __u8 *bytes = data; - __u8 nr_copy_bits; + int sz = t->size; __u64 num = 0; int i; /* Maximum supported bitfield size is 64 bits */ - if (t->size > 8) { - pr_warn("unexpected bitfield size %d\n", t->size); + if (sz > 8) { + pr_warn("unexpected bitfield size %d\n", sz); return -EINVAL; } /* Bitfield value retrieval is done in two steps; first relevant bytes are * stored in num, then we left/right shift num to eliminate irrelevant bits. */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - for (i = t->size - 1; i >= 0; i--) - num = num * 256 + bytes[i]; nr_copy_bits = bit_sz + bits_offset; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - for (i = 0; i < t->size; i++) + nr_copy_bytes = t->size; +#if __BYTE_ORDER == __LITTLE_ENDIAN + for (i = nr_copy_bytes - 1; i >= 0; i--) + num = num * 256 + bytes[i]; +#elif __BYTE_ORDER == __BIG_ENDIAN + for (i = 0; i < nr_copy_bytes; i++) num = num * 256 + bytes[i]; - nr_copy_bits = t->size * 8 - bits_offset; #else # error "Unrecognized __BYTE_ORDER__" #endif @@ -1679,15 +1656,9 @@ static int btf_dump_base_type_check_zero(struct btf_dump *d, return 0; } -static bool ptr_is_aligned(const struct btf *btf, __u32 type_id, - const void *data) +static bool ptr_is_aligned(const void *data, int data_sz) { - int alignment = btf__align_of(btf, type_id); - - if (alignment == 0) - return false; - - return ((uintptr_t)data) % alignment == 0; + return ((uintptr_t)data) % data_sz == 0; } static int btf_dump_int_data(struct btf_dump *d, @@ -1698,10 +1669,9 @@ static int btf_dump_int_data(struct btf_dump *d, { __u8 encoding = btf_int_encoding(t); bool sign = encoding & BTF_INT_SIGNED; - char buf[16] __attribute__((aligned(16))); int sz = t->size; - if (sz == 0 || sz > sizeof(buf)) { + if (sz == 0) { pr_warn("unexpected size %d for id [%u]\n", sz, type_id); return -EINVAL; } @@ -1709,10 +1679,8 @@ static int btf_dump_int_data(struct btf_dump *d, /* handle packed int data - accesses of integers not aligned on * int boundaries can cause problems on some platforms. */ - if (!ptr_is_aligned(d->btf, type_id, data)) { - memcpy(buf, data, sz); - data = buf; - } + if (!ptr_is_aligned(data, sz)) + return btf_dump_bitfield_data(d, t, data, 0, 0); switch (sz) { case 16: { @@ -1722,10 +1690,10 @@ static int btf_dump_int_data(struct btf_dump *d, /* avoid use of __int128 as some 32-bit platforms do not * support it. */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN lsi = ints[0]; msi = ints[1]; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN lsi = ints[1]; msi = ints[0]; #else @@ -1798,7 +1766,7 @@ static int btf_dump_float_data(struct btf_dump *d, int sz = t->size; /* handle unaligned data; copy to local union */ - if (!ptr_is_aligned(d->btf, type_id, data)) { + if (!ptr_is_aligned(data, sz)) { memcpy(&fl, data, sz); flp = &fl; } @@ -1961,7 +1929,7 @@ static int btf_dump_ptr_data(struct btf_dump *d, __u32 id, const void *data) { - if (ptr_is_aligned(d->btf, id, data) && d->ptr_sz == sizeof(void *)) { + if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) { btf_dump_type_values(d, "%p", *(void **)data); } else { union ptr_data pt; @@ -1981,8 +1949,10 @@ static int btf_dump_get_enum_value(struct btf_dump *d, __u32 id, __s64 *value) { + int sz = t->size; + /* handle unaligned enum value */ - if (!ptr_is_aligned(d->btf, id, data)) { + if (!ptr_is_aligned(data, sz)) { __u64 val; int err; @@ -2245,7 +2215,6 @@ static int btf_dump_dump_type_data(struct btf_dump *d, case BTF_KIND_FWD: case BTF_KIND_FUNC: case BTF_KIND_FUNC_PROTO: - case BTF_KIND_DECL_TAG: err = btf_dump_unsupported_data(d, t, id); break; case BTF_KIND_INT: @@ -2321,8 +2290,8 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, if (!opts->indent_str) d->typed_dump->indent_str[0] = '\t'; else - libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str, - sizeof(d->typed_dump->indent_str)); + strncat(d->typed_dump->indent_str, opts->indent_str, + sizeof(d->typed_dump->indent_str) - 1); d->typed_dump->compact = OPTS_GET(opts, compact, false); d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 8ecef1088b..33c19590ee 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -5,7 +5,6 @@ #include #include #include -#include #include "btf.h" #include "bpf.h" #include "libbpf.h" @@ -13,12 +12,9 @@ #include "hashmap.h" #include "bpf_gen_internal.h" #include "skel_internal.h" -#include -#define MAX_USED_MAPS 64 -#define MAX_USED_PROGS 32 -#define MAX_KFUNC_DESCS 256 -#define MAX_FD_ARRAY_SZ (MAX_USED_MAPS + MAX_KFUNC_DESCS) +#define MAX_USED_MAPS 64 +#define MAX_USED_PROGS 32 /* The following structure describes the stack layout of the loader program. * In addition R6 contains the pointer to context. @@ -33,8 +29,9 @@ */ struct loader_stack { __u32 btf_fd; - __u32 inner_map_fd; + __u32 map_fd[MAX_USED_MAPS]; __u32 prog_fd[MAX_USED_PROGS]; + __u32 inner_map_fd; }; #define stack_off(field) \ @@ -42,11 +39,6 @@ struct loader_stack { #define attr_field(attr, field) (attr + offsetof(union bpf_attr, field)) -static int blob_fd_array_off(struct bpf_gen *gen, int index) -{ - return gen->fd_array + index * sizeof(int); -} - static int realloc_insn_buf(struct bpf_gen *gen, __u32 size) { size_t off = gen->insn_cur - gen->insn_start; @@ -107,15 +99,11 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in emit(gen, insn2); } -static int add_data(struct bpf_gen *gen, const void *data, __u32 size); -static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off); - -void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps) +void bpf_gen__init(struct bpf_gen *gen, int log_level) { - size_t stack_sz = sizeof(struct loader_stack), nr_progs_sz; + size_t stack_sz = sizeof(struct loader_stack); int i; - gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); gen->log_level = log_level; /* save ctx pointer into R6 */ emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1)); @@ -127,27 +115,19 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); - /* amount of stack actually used, only used to calculate iterations, not stack offset */ - nr_progs_sz = offsetof(struct loader_stack, prog_fd[nr_progs]); /* jump over cleanup code */ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, - /* size of cleanup code below (including map fd cleanup) */ - (nr_progs_sz / 4) * 3 + 2 + - /* 6 insns for emit_sys_close_blob, - * 6 insns for debug_regs in emit_sys_close_blob - */ - nr_maps * (6 + (gen->log_level ? 6 : 0)))); + /* size of cleanup code below */ + (stack_sz / 4) * 3 + 2)); /* remember the label where all error branches will jump to */ gen->cleanup_label = gen->insn_cur - gen->insn_start; /* emit cleanup code: close all temp FDs */ - for (i = 0; i < nr_progs_sz; i += 4) { + for (i = 0; i < stack_sz; i += 4) { emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i)); emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close)); } - for (i = 0; i < nr_maps; i++) - emit_sys_close_blob(gen, blob_fd_array_off(gen, i)); /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); emit(gen, BPF_EXIT_INSN()); @@ -155,47 +135,16 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps static int add_data(struct bpf_gen *gen, const void *data, __u32 size) { - __u32 size8 = roundup(size, 8); - __u64 zero = 0; void *prev; - if (realloc_data_buf(gen, size8)) + if (realloc_data_buf(gen, size)) return 0; prev = gen->data_cur; - if (data) { - memcpy(gen->data_cur, data, size); - memcpy(gen->data_cur + size, &zero, size8 - size); - } else { - memset(gen->data_cur, 0, size8); - } - gen->data_cur += size8; + memcpy(gen->data_cur, data, size); + gen->data_cur += size; return prev - gen->data_start; } -/* Get index for map_fd/btf_fd slot in reserved fd_array, or in data relative - * to start of fd_array. Caller can decide if it is usable or not. - */ -static int add_map_fd(struct bpf_gen *gen) -{ - if (gen->nr_maps == MAX_USED_MAPS) { - pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS); - gen->error = -E2BIG; - return 0; - } - return gen->nr_maps++; -} - -static int add_kfunc_btf_fd(struct bpf_gen *gen) -{ - int cur; - - if (gen->nr_fd_array == MAX_KFUNC_DESCS) { - cur = add_data(gen, NULL, sizeof(int)); - return (cur - gen->fd_array) / sizeof(int); - } - return MAX_USED_MAPS + gen->nr_fd_array++; -} - static int insn_bytes_to_bpf_size(__u32 sz) { switch (sz) { @@ -217,22 +166,14 @@ static void emit_rel_store(struct bpf_gen *gen, int off, int data) emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); } -static void move_blob2blob(struct bpf_gen *gen, int off, int size, int blob_off) +/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */ +static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off) { - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, blob_off)); - emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_2, 0)); + emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10)); + emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off)); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, off)); - emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); -} - -static void move_blob2ctx(struct bpf_gen *gen, int ctx_off, int size, int blob_off) -{ - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, blob_off)); - emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_1, 0)); - emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off)); + emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); } static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off, @@ -367,16 +308,10 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off) __emit_sys_close(gen); } -int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) +int bpf_gen__finish(struct bpf_gen *gen) { int i; - if (nr_progs < gen->nr_progs || nr_maps != gen->nr_maps) { - pr_warn("nr_progs %d/%d nr_maps %d/%d mismatch\n", - nr_progs, gen->nr_progs, nr_maps, gen->nr_maps); - gen->error = -EFAULT; - return gen->error; - } emit_sys_close_stack(gen, stack_off(btf_fd)); for (i = 0; i < gen->nr_progs; i++) move_stack2ctx(gen, @@ -386,11 +321,11 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) offsetof(struct bpf_prog_desc, prog_fd), 4, stack_off(prog_fd[i])); for (i = 0; i < gen->nr_maps; i++) - move_blob2ctx(gen, - sizeof(struct bpf_loader_ctx) + - sizeof(struct bpf_map_desc) * i + - offsetof(struct bpf_map_desc, map_fd), 4, - blob_fd_array_off(gen, i)); + move_stack2ctx(gen, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * i + + offsetof(struct bpf_map_desc, map_fd), 4, + stack_off(map_fd[i])); emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); emit(gen, BPF_EXIT_INSN()); pr_debug("gen: finish %d\n", gen->error); @@ -446,32 +381,46 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, } void bpf_gen__map_create(struct bpf_gen *gen, - enum bpf_map_type map_type, - const char *map_name, - __u32 key_size, __u32 value_size, __u32 max_entries, - struct bpf_map_create_opts *map_attr, int map_idx) + struct bpf_create_map_attr *map_attr, int map_idx) { - int attr_size = offsetofend(union bpf_attr, map_extra); + int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); bool close_inner_map_fd = false; - int map_create_attr, idx; + int map_create_attr; union bpf_attr attr; memset(&attr, 0, attr_size); - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; + attr.map_type = map_attr->map_type; + attr.key_size = map_attr->key_size; + attr.value_size = map_attr->value_size; attr.map_flags = map_attr->map_flags; - attr.map_extra = map_attr->map_extra; - if (map_name) - libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); + memcpy(attr.map_name, map_attr->name, + min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1)); attr.numa_node = map_attr->numa_node; attr.map_ifindex = map_attr->map_ifindex; - attr.max_entries = max_entries; - attr.btf_key_type_id = map_attr->btf_key_type_id; - attr.btf_value_type_id = map_attr->btf_value_type_id; + attr.max_entries = map_attr->max_entries; + switch (attr.map_type) { + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_STACK_TRACE: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: + case BPF_MAP_TYPE_CPUMAP: + case BPF_MAP_TYPE_XSKMAP: + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_RINGBUF: + break; + default: + attr.btf_key_type_id = map_attr->btf_key_type_id; + attr.btf_value_type_id = map_attr->btf_value_type_id; + } pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", - attr.map_name, map_idx, map_type, attr.btf_value_type_id); + attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id); map_create_attr = add_data(gen, &attr, attr_size); if (attr.btf_value_type_id) @@ -498,7 +447,7 @@ void bpf_gen__map_create(struct bpf_gen *gen, /* emit MAP_CREATE command */ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", - attr.map_name, map_idx, map_type, value_size, + attr.map_name, map_idx, map_attr->map_type, attr.value_size, attr.btf_value_type_id); emit_check_err(gen); /* remember map_fd in the stack, if successful */ @@ -513,11 +462,9 @@ void bpf_gen__map_create(struct bpf_gen *gen, gen->error = -EDOM; /* internal bug */ return; } else { - /* add_map_fd does gen->nr_maps++ */ - idx = add_map_fd(gen); - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, blob_fd_array_off(gen, idx))); - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 0)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, + stack_off(map_fd[map_idx]))); + gen->nr_maps++; } if (close_inner_map_fd) emit_sys_close_stack(gen, stack_off(inner_map_fd)); @@ -559,8 +506,8 @@ static void emit_find_attach_target(struct bpf_gen *gen) */ } -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, - bool is_typeless, int kind, int insn_idx) +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, + int insn_idx) { struct ksym_relo_desc *relo; @@ -572,313 +519,38 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, gen->relos = relo; relo += gen->relo_cnt; relo->name = name; - relo->is_weak = is_weak; - relo->is_typeless = is_typeless; relo->kind = kind; relo->insn_idx = insn_idx; gen->relo_cnt++; } -/* returns existing ksym_desc with ref incremented, or inserts a new one */ -static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo) +static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) { - struct ksym_desc *kdesc; - int i; + int name, insn, len = strlen(relo->name) + 1; - for (i = 0; i < gen->nr_ksyms; i++) { - if (!strcmp(gen->ksyms[i].name, relo->name)) { - gen->ksyms[i].ref++; - return &gen->ksyms[i]; - } - } - kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc)); - if (!kdesc) { - gen->error = -ENOMEM; - return NULL; - } - gen->ksyms = kdesc; - kdesc = &gen->ksyms[gen->nr_ksyms++]; - kdesc->name = relo->name; - kdesc->kind = relo->kind; - kdesc->ref = 1; - kdesc->off = 0; - kdesc->insn = 0; - return kdesc; -} + pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx); + name = add_data(gen, relo->name, len); -/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} - * Returns result in BPF_REG_7 - */ -static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_desc *relo) -{ - int name_off, len = strlen(relo->name) + 1; - - name_off = add_data(gen, relo->name, len); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, name_off)); + 0, 0, 0, name)); emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind)); emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind); -} - -/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} - * Returns result in BPF_REG_7 - * Returns u64 symbol addr in BPF_REG_9 - */ -static void emit_bpf_kallsyms_lookup_name(struct bpf_gen *gen, struct ksym_relo_desc *relo) -{ - int name_off, len = strlen(relo->name) + 1, res_off; - - name_off = add_data(gen, relo->name, len); - res_off = add_data(gen, NULL, 8); /* res is u64 */ - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, name_off)); - emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); - emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_4, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, res_off)); - emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_4)); - emit(gen, BPF_EMIT_CALL(BPF_FUNC_kallsyms_lookup_name)); - emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0)); - emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); - debug_ret(gen, "kallsyms_lookup_name(%s,%d)", relo->name, relo->kind); -} - -/* Expects: - * BPF_REG_8 - pointer to instruction - * - * We need to reuse BTF fd for same symbol otherwise each relocation takes a new - * index, while kernel limits total kfunc BTFs to 256. For duplicate symbols, - * this would mean a new BTF fd index for each entry. By pairing symbol name - * with index, we get the insn->imm, insn->off pairing that kernel uses for - * kfunc_tab, which becomes the effective limit even though all of them may - * share same index in fd_array (such that kfunc_btf_tab has 1 element). - */ -static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) -{ - struct ksym_desc *kdesc; - int btf_fd_idx; - - kdesc = get_ksym_desc(gen, relo); - if (!kdesc) - return; - /* try to copy from existing bpf_insn */ - if (kdesc->ref > 1) { - move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, - kdesc->insn + offsetof(struct bpf_insn, imm)); - move_blob2blob(gen, insn + offsetof(struct bpf_insn, off), 2, - kdesc->insn + offsetof(struct bpf_insn, off)); - goto log; - } - /* remember insn offset, so we can copy BTF ID and FD later */ - kdesc->insn = insn; - emit_bpf_find_by_name_kind(gen, relo); - if (!relo->is_weak) - emit_check_err(gen); - /* get index in fd_array to store BTF FD at */ - btf_fd_idx = add_kfunc_btf_fd(gen); - if (btf_fd_idx > INT16_MAX) { - pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n", - btf_fd_idx, relo->name); - gen->error = -E2BIG; - return; - } - kdesc->off = btf_fd_idx; - /* jump to success case */ - emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); - /* set value for imm, off as 0 */ - emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); - emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); - /* skip success case for ret < 0 */ - emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 10)); - /* store btf_id into insn[insn_idx].imm */ - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); - /* obtain fd in BPF_REG_9 */ - emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); - emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); - /* jump to fd_array store if fd denotes module BTF */ - emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); - /* set the default value for off */ - emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); - /* skip BTF fd store for vmlinux BTF */ - emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); - /* load fd_array slot pointer */ - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); - /* store BTF fd in slot */ - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); - /* store index into insn[insn_idx].off */ - emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); -log: - if (!gen->log_level) - return; - emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, - offsetof(struct bpf_insn, imm))); - emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, - offsetof(struct bpf_insn, off))); - debug_regs(gen, BPF_REG_7, BPF_REG_9, " func (%s:count=%d): imm: %%d, off: %%d", - relo->name, kdesc->ref); - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, blob_fd_array_off(gen, kdesc->off))); - emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_0, 0)); - debug_regs(gen, BPF_REG_9, -1, " func (%s:count=%d): btf_fd", - relo->name, kdesc->ref); -} - -static void emit_ksym_relo_log(struct bpf_gen *gen, struct ksym_relo_desc *relo, - int ref) -{ - if (!gen->log_level) - return; - emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, - offsetof(struct bpf_insn, imm))); - emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, sizeof(struct bpf_insn) + - offsetof(struct bpf_insn, imm))); - debug_regs(gen, BPF_REG_7, BPF_REG_9, " var t=%d w=%d (%s:count=%d): imm[0]: %%d, imm[1]: %%d", - relo->is_typeless, relo->is_weak, relo->name, ref); - emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); - debug_regs(gen, BPF_REG_9, -1, " var t=%d w=%d (%s:count=%d): insn.reg", - relo->is_typeless, relo->is_weak, relo->name, ref); -} - -/* Expects: - * BPF_REG_8 - pointer to instruction - */ -static void emit_relo_ksym_typeless(struct bpf_gen *gen, - struct ksym_relo_desc *relo, int insn) -{ - struct ksym_desc *kdesc; - - kdesc = get_ksym_desc(gen, relo); - if (!kdesc) - return; - /* try to copy from existing ldimm64 insn */ - if (kdesc->ref > 1) { - move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, - kdesc->insn + offsetof(struct bpf_insn, imm)); - move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, - kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); - goto log; - } - /* remember insn offset, so we can copy ksym addr later */ - kdesc->insn = insn; - /* skip typeless ksym_desc in fd closing loop in cleanup_relos */ - kdesc->typeless = true; - emit_bpf_kallsyms_lookup_name(gen, relo); - emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_7, -ENOENT, 1)); emit_check_err(gen); - /* store lower half of addr into insn[insn_idx].imm */ - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, offsetof(struct bpf_insn, imm))); - /* store upper half of addr into insn[insn_idx + 1].imm */ - emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, - sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); -log: - emit_ksym_relo_log(gen, relo, kdesc->ref); -} - -static __u32 src_reg_mask(void) -{ -#if defined(__LITTLE_ENDIAN_BITFIELD) - return 0x0f; /* src_reg,dst_reg,... */ -#elif defined(__BIG_ENDIAN_BITFIELD) - return 0xf0; /* dst_reg,src_reg,... */ -#else -#error "Unsupported bit endianness, cannot proceed" -#endif -} - -/* Expects: - * BPF_REG_8 - pointer to instruction - */ -static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) -{ - struct ksym_desc *kdesc; - __u32 reg_mask; - - kdesc = get_ksym_desc(gen, relo); - if (!kdesc) - return; - /* try to copy from existing ldimm64 insn */ - if (kdesc->ref > 1) { - move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, - kdesc->insn + offsetof(struct bpf_insn, imm)); - move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, - kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); - /* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */ - emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3)); - goto clear_src_reg; - } - /* remember insn offset, so we can copy BTF ID and FD later */ - kdesc->insn = insn; - emit_bpf_find_by_name_kind(gen, relo); - if (!relo->is_weak) - emit_check_err(gen); - /* jump to success case */ - emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); - /* set values for insn[insn_idx].imm, insn[insn_idx + 1].imm as 0 */ - emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); - emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0)); - /* skip success case for ret < 0 */ - emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); /* store btf_id into insn[insn_idx].imm */ - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); - /* store btf_obj_fd into insn[insn_idx + 1].imm */ - emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, - sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); - /* skip src_reg adjustment */ - emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); -clear_src_reg: - /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ - reg_mask = src_reg_mask(); - emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); - emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask)); - emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code))); - - emit_ksym_relo_log(gen, relo, kdesc->ref); -} - -void bpf_gen__record_relo_core(struct bpf_gen *gen, - const struct bpf_core_relo *core_relo) -{ - struct bpf_core_relo *relos; - - relos = libbpf_reallocarray(gen->core_relos, gen->core_relo_cnt + 1, sizeof(*relos)); - if (!relos) { - gen->error = -ENOMEM; - return; - } - gen->core_relos = relos; - relos += gen->core_relo_cnt; - memcpy(relos, core_relo, sizeof(*relos)); - gen->core_relo_cnt++; -} - -static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) -{ - int insn; - - pr_debug("gen: emit_relo (%d): %s at %d\n", relo->kind, relo->name, relo->insn_idx); - insn = insns + sizeof(struct bpf_insn) * relo->insn_idx; - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn)); - switch (relo->kind) { - case BTF_KIND_VAR: - if (relo->is_typeless) - emit_relo_ksym_typeless(gen, relo, insn); - else - emit_relo_ksym_btf(gen, relo, insn); - break; - case BTF_KIND_FUNC: - emit_relo_kfunc_btf(gen, relo, insn); - break; - default: - pr_warn("Unknown relocation kind '%d'\n", relo->kind); - gen->error = -EDOM; - return; + insn = insns + sizeof(struct bpf_insn) * relo->insn_idx + + offsetof(struct bpf_insn, imm); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, insn)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, 0)); + if (relo->kind == BTF_KIND_VAR) { + /* store btf_obj_fd into insn[insn_idx + 1].imm */ + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, + sizeof(struct bpf_insn))); } } @@ -890,68 +562,48 @@ static void emit_relos(struct bpf_gen *gen, int insns) emit_relo(gen, gen->relos + i, insns); } -static void cleanup_core_relo(struct bpf_gen *gen) -{ - if (!gen->core_relo_cnt) - return; - free(gen->core_relos); - gen->core_relo_cnt = 0; - gen->core_relos = NULL; -} - static void cleanup_relos(struct bpf_gen *gen, int insns) { int i, insn; - for (i = 0; i < gen->nr_ksyms; i++) { - /* only close fds for typed ksyms and kfuncs */ - if (gen->ksyms[i].kind == BTF_KIND_VAR && !gen->ksyms[i].typeless) { - /* close fd recorded in insn[insn_idx + 1].imm */ - insn = gen->ksyms[i].insn; - insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm); - emit_sys_close_blob(gen, insn); - } else if (gen->ksyms[i].kind == BTF_KIND_FUNC) { - emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off)); - if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ) - gen->nr_fd_array--; - } - } - if (gen->nr_ksyms) { - free(gen->ksyms); - gen->nr_ksyms = 0; - gen->ksyms = NULL; + for (i = 0; i < gen->relo_cnt; i++) { + if (gen->relos[i].kind != BTF_KIND_VAR) + continue; + /* close fd recorded in insn[insn_idx + 1].imm */ + insn = insns + + sizeof(struct bpf_insn) * (gen->relos[i].insn_idx + 1) + + offsetof(struct bpf_insn, imm); + emit_sys_close_blob(gen, insn); } if (gen->relo_cnt) { free(gen->relos); gen->relo_cnt = 0; gen->relos = NULL; } - cleanup_core_relo(gen); } void bpf_gen__prog_load(struct bpf_gen *gen, - enum bpf_prog_type prog_type, const char *prog_name, - const char *license, struct bpf_insn *insns, size_t insn_cnt, - struct bpf_prog_load_opts *load_attr, int prog_idx) + struct bpf_prog_load_params *load_attr, int prog_idx) { - int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; - int attr_size = offsetofend(union bpf_attr, core_relo_rec_size); + int attr_size = offsetofend(union bpf_attr, fd_array); + int prog_load_attr, license, insns, func_info, line_info; union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", - prog_type, insn_cnt, prog_idx); + pr_debug("gen: prog_load: type %d insns_cnt %zd\n", + load_attr->prog_type, load_attr->insn_cnt); /* add license string to blob of bytes */ - license_off = add_data(gen, license, strlen(license) + 1); + license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1); /* add insns to blob of bytes */ - insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); + insns = add_data(gen, load_attr->insns, + load_attr->insn_cnt * sizeof(struct bpf_insn)); - attr.prog_type = prog_type; + attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; attr.attach_btf_id = load_attr->attach_btf_id; attr.prog_ifindex = load_attr->prog_ifindex; attr.kern_version = 0; - attr.insn_cnt = (__u32)insn_cnt; + attr.insn_cnt = (__u32)load_attr->insn_cnt; attr.prog_flags = load_attr->prog_flags; attr.func_info_rec_size = load_attr->func_info_rec_size; @@ -964,19 +616,15 @@ void bpf_gen__prog_load(struct bpf_gen *gen, line_info = add_data(gen, load_attr->line_info, attr.line_info_cnt * attr.line_info_rec_size); - attr.core_relo_rec_size = sizeof(struct bpf_core_relo); - attr.core_relo_cnt = gen->core_relo_cnt; - core_relos = add_data(gen, gen->core_relos, - attr.core_relo_cnt * attr.core_relo_rec_size); - - libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); + memcpy(attr.prog_name, load_attr->name, + min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); prog_load_attr = add_data(gen, &attr, attr_size); /* populate union bpf_attr with a pointer to license */ - emit_rel_store(gen, attr_field(prog_load_attr, license), license_off); + emit_rel_store(gen, attr_field(prog_load_attr, license), license); /* populate union bpf_attr with a pointer to instructions */ - emit_rel_store(gen, attr_field(prog_load_attr, insns), insns_off); + emit_rel_store(gen, attr_field(prog_load_attr, insns), insns); /* populate union bpf_attr with a pointer to func_info */ emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info); @@ -984,11 +632,9 @@ void bpf_gen__prog_load(struct bpf_gen *gen, /* populate union bpf_attr with a pointer to line_info */ emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); - /* populate union bpf_attr with a pointer to core_relos */ - emit_rel_store(gen, attr_field(prog_load_attr, core_relos), core_relos); - - /* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */ - emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array); + /* populate union bpf_attr fd_array with a pointer to stack where map_fds are saved */ + emit_rel_store_sp(gen, attr_field(prog_load_attr, fd_array), + stack_off(map_fd[0])); /* populate union bpf_attr with user provided log details */ move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4, @@ -1011,12 +657,12 @@ void bpf_gen__prog_load(struct bpf_gen *gen, emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(union bpf_attr, attach_btf_obj_fd))); } - emit_relos(gen, insns_off); + emit_relos(gen, insns); /* emit PROG_LOAD command */ emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size); debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt); /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */ - cleanup_relos(gen, insns_off); + cleanup_relos(gen, insns); if (gen->attach_kind) { emit_sys_close_blob(gen, attr_field(prog_load_attr, attach_btf_obj_fd)); @@ -1057,8 +703,8 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); map_update_attr = add_data(gen, &attr, attr_size); - move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, - blob_fd_array_off(gen, map_idx)); + move_stack2blob(gen, attr_field(map_update_attr, map_fd), 4, + stack_off(map_fd[map_idx])); emit_rel_store(gen, attr_field(map_update_attr, key), key); emit_rel_store(gen, attr_field(map_update_attr, value), value); /* emit MAP_UPDATE_ELEM command */ @@ -1067,33 +713,6 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit_check_err(gen); } -void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slot, - int inner_map_idx) -{ - int attr_size = offsetofend(union bpf_attr, flags); - int map_update_attr, key; - union bpf_attr attr; - - memset(&attr, 0, attr_size); - pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", - outer_map_idx, slot, inner_map_idx); - - key = add_data(gen, &slot, sizeof(slot)); - - map_update_attr = add_data(gen, &attr, attr_size); - move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, - blob_fd_array_off(gen, outer_map_idx)); - emit_rel_store(gen, attr_field(map_update_attr, key), key); - emit_rel_store(gen, attr_field(map_update_attr, value), - blob_fd_array_off(gen, inner_map_idx)); - - /* emit MAP_UPDATE_ELEM command */ - emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size); - debug_ret(gen, "populate_outer_map outer %d key %d inner %d", - outer_map_idx, slot, inner_map_idx); - emit_check_err(gen); -} - void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) { int attr_size = offsetofend(union bpf_attr, map_fd); @@ -1103,8 +722,8 @@ void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) memset(&attr, 0, attr_size); pr_debug("gen: map_freeze: idx %d\n", map_idx); map_freeze_attr = add_data(gen, &attr, attr_size); - move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, - blob_fd_array_off(gen, map_idx)); + move_stack2blob(gen, attr_field(map_freeze_attr, map_fd), 4, + stack_off(map_fd[map_idx])); /* emit MAP_FREEZE command */ emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size); debug_ret(gen, "map_freeze"); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7f10dd501a..0ad29203cb 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -168,24 +168,37 @@ int libbpf_set_strict_mode(enum libbpf_strict_mode mode) return 0; } -__u32 libbpf_major_version(void) -{ - return LIBBPF_MAJOR_VERSION; -} +enum kern_feature_id { + /* v4.14: kernel support for program & map names. */ + FEAT_PROG_NAME, + /* v5.2: kernel support for global data sections. */ + FEAT_GLOBAL_DATA, + /* BTF support */ + FEAT_BTF, + /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ + FEAT_BTF_FUNC, + /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ + FEAT_BTF_DATASEC, + /* BTF_FUNC_GLOBAL is supported */ + FEAT_BTF_GLOBAL_FUNC, + /* BPF_F_MMAPABLE is supported for arrays */ + FEAT_ARRAY_MMAP, + /* kernel support for expected_attach_type in BPF_PROG_LOAD */ + FEAT_EXP_ATTACH_TYPE, + /* bpf_probe_read_{kernel,user}[_str] helpers */ + FEAT_PROBE_READ_KERN, + /* BPF_PROG_BIND_MAP is supported */ + FEAT_PROG_BIND_MAP, + /* Kernel support for module BTFs */ + FEAT_MODULE_BTF, + /* BTF_KIND_FLOAT support */ + FEAT_BTF_FLOAT, + /* BPF perf link support */ + FEAT_PERF_LINK, + __FEAT_CNT, +}; -__u32 libbpf_minor_version(void) -{ - return LIBBPF_MINOR_VERSION; -} - -const char *libbpf_version_string(void) -{ -#define __S(X) #X -#define _S(X) __S(X) - return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); -#undef _S -#undef __S -} +static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); enum reloc_type { RELO_LD64, @@ -194,57 +207,29 @@ enum reloc_type { RELO_EXTERN_VAR, RELO_EXTERN_FUNC, RELO_SUBPROG_ADDR, - RELO_CORE, }; struct reloc_desc { enum reloc_type type; int insn_idx; - union { - const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ - struct { - int map_idx; - int sym_off; - }; - }; + int map_idx; + int sym_off; }; struct bpf_sec_def; -typedef int (*init_fn_t)(struct bpf_program *prog, long cookie); -typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_opts *opts, long cookie); -typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie); - -/* stored as sec_def->cookie for all libbpf-supported SEC()s */ -enum sec_def_flags { - SEC_NONE = 0, - /* expected_attach_type is optional, if kernel doesn't support that */ - SEC_EXP_ATTACH_OPT = 1, - /* legacy, only used by libbpf_get_type_names() and - * libbpf_attach_type_by_name(), not used by libbpf itself at all. - * This used to be associated with cgroup (and few other) BPF programs - * that were attachable through BPF_PROG_ATTACH command. Pretty - * meaningless nowadays, though. - */ - SEC_ATTACHABLE = 2, - SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, - /* attachment target is specified through BTF ID in either kernel or - * other BPF program's BTF object */ - SEC_ATTACH_BTF = 4, - /* BPF program type allows sleeping/blocking in kernel */ - SEC_SLEEPABLE = 8, - /* allow non-strict prefix matching */ - SEC_SLOPPY_PFX = 16, -}; +typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec, + struct bpf_program *prog); struct bpf_sec_def { const char *sec; + size_t len; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; - long cookie; - - init_fn_t init_fn; - preload_fn_t preload_fn; + bool is_exp_attach_type_optional; + bool is_attachable; + bool is_attach_btf; + bool is_sleepable; attach_fn_t attach_fn; }; @@ -276,7 +261,7 @@ struct bpf_program { size_t sub_insn_off; char *name; - /* name with / replaced by _; makes recursive pinning + /* sec_name with / replaced by _; makes recursive pinning * in bpf_object__pin_programs easier */ char *pin_name; @@ -295,11 +280,7 @@ struct bpf_program { struct reloc_desc *reloc_desc; int nr_reloc; - - /* BPF verifier log settings */ - char *log_buf; - size_t log_size; - __u32 log_level; + int log_level; struct { int nr; @@ -365,14 +346,15 @@ enum libbpf_map_type { LIBBPF_MAP_KCONFIG, }; +static const char * const libbpf_type_to_btf_name[] = { + [LIBBPF_MAP_DATA] = DATA_SEC, + [LIBBPF_MAP_BSS] = BSS_SEC, + [LIBBPF_MAP_RODATA] = RODATA_SEC, + [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC, +}; + struct bpf_map { char *name; - /* real_name is defined for special internal maps (.rodata*, - * .data*, .bss, .kconfig) and preserves their original ELF section - * name. This is important to be be able to find corresponding BTF - * DATASEC information. - */ - char *real_name; int fd; int sec_idx; size_t sec_offset; @@ -395,8 +377,6 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; - bool skipped; - __u64 map_extra; }; enum extern_type { @@ -439,11 +419,6 @@ struct extern_desc { /* local btf_id of the ksym extern's type. */ __u32 type_id; - /* BTF fd index to be patched in for insn->off, this is - * 0 for vmlinux BTF, index in obj->fd_array for module - * BTF - */ - __s16 btf_fd_idx; } ksym; }; }; @@ -455,41 +430,6 @@ struct module_btf { char *name; __u32 id; int fd; - int fd_array_idx; -}; - -enum sec_type { - SEC_UNUSED = 0, - SEC_RELO, - SEC_BSS, - SEC_DATA, - SEC_RODATA, -}; - -struct elf_sec_desc { - enum sec_type sec_type; - Elf64_Shdr *shdr; - Elf_Data *data; -}; - -struct elf_state { - int fd; - const void *obj_buf; - size_t obj_buf_sz; - Elf *elf; - Elf64_Ehdr *ehdr; - Elf_Data *symbols; - Elf_Data *st_ops_data; - size_t shstrndx; /* section index for section name strings */ - size_t strtabidx; - struct elf_sec_desc *secs; - int sec_cnt; - int maps_shndx; - int btf_maps_shndx; - __u32 btf_maps_sec_btf_id; - int text_shndx; - int symbols_shndx; - int st_ops_shndx; }; struct bpf_object { @@ -507,17 +447,47 @@ struct bpf_object { struct extern_desc *externs; int nr_extern; int kconfig_map_idx; + int rodata_map_idx; bool loaded; bool has_subcalls; - bool has_rodata; struct bpf_gen *gen_loader; - /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ - struct elf_state efile; /* - * All loaded bpf_object are linked in a list, which is + * Information when doing elf related work. Only valid if fd + * is valid. + */ + struct { + int fd; + const void *obj_buf; + size_t obj_buf_sz; + Elf *elf; + GElf_Ehdr ehdr; + Elf_Data *symbols; + Elf_Data *data; + Elf_Data *rodata; + Elf_Data *bss; + Elf_Data *st_ops_data; + size_t shstrndx; /* section index for section name strings */ + size_t strtabidx; + struct { + GElf_Shdr shdr; + Elf_Data *data; + } *reloc_sects; + int nr_reloc_sects; + int maps_shndx; + int btf_maps_shndx; + __u32 btf_maps_sec_btf_id; + int text_shndx; + int symbols_shndx; + int data_shndx; + int rodata_shndx; + int bss_shndx; + int st_ops_shndx; + } efile; + /* + * All loaded bpf_object is linked in a list, which is * hidden to caller. bpf_objects__ handlers deal with * all objects. */ @@ -542,30 +512,20 @@ struct bpf_object { size_t btf_module_cnt; size_t btf_module_cap; - /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ - char *log_buf; - size_t log_size; - __u32 log_level; - void *priv; bpf_object_clear_priv_t clear_priv; - int *fd_array; - size_t fd_array_cap; - size_t fd_array_cnt; - char path[]; }; +#define obj_elf_valid(o) ((o)->efile.elf) static const char *elf_sym_str(const struct bpf_object *obj, size_t off); static const char *elf_sec_str(const struct bpf_object *obj, size_t off); static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); -static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn); +static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr); static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); -static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx); -static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); void bpf_program__unload(struct bpf_program *prog) { @@ -620,16 +580,7 @@ static char *__bpf_program__pin_name(struct bpf_program *prog) { char *name, *p; - if (libbpf_mode & LIBBPF_STRICT_SEC_NAME) - name = strdup(prog->name); - else - name = strdup(prog->sec_name); - - if (!name) - return NULL; - - p = name; - + name = p = strdup(prog->sec_name); while ((p = strchr(p, '/'))) *p = '_'; @@ -682,9 +633,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->instances.fds = NULL; prog->instances.nr = -1; - /* inherit object's log_level */ - prog->log_level = obj->log_level; - prog->sec_name = strdup(sec_name); if (!prog->sec_name) goto errout; @@ -719,25 +667,25 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; int nr_progs, err, i; const char *name; - Elf64_Sym *sym; + GElf_Sym sym; progs = obj->programs; nr_progs = obj->nr_programs; - nr_syms = symbols->d_size / sizeof(Elf64_Sym); + nr_syms = symbols->d_size / sizeof(GElf_Sym); sec_off = 0; for (i = 0; i < nr_syms; i++) { - sym = elf_sym_by_idx(obj, i); - - if (sym->st_shndx != sec_idx) + if (!gelf_getsym(symbols, i, &sym)) continue; - if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) + if (sym.st_shndx != sec_idx) + continue; + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) continue; - prog_sz = sym->st_size; - sec_off = sym->st_value; + prog_sz = sym.st_size; + sec_off = sym.st_value; - name = elf_sym_str(obj, sym->st_name); + name = elf_sym_str(obj, sym.st_name); if (!name) { pr_warn("sec '%s': failed to get symbol name for offset %zu\n", sec_name, sec_off); @@ -750,7 +698,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { + if (sec_idx != obj->efile.text_shndx && GELF_ST_BIND(sym.st_info) == STB_LOCAL) { pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); return -ENOTSUP; } @@ -783,9 +731,9 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, * as static to enable more permissive BPF verification mode * with more outside context available to BPF verifier */ - if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL - && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN - || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) + if (GELF_ST_BIND(sym.st_info) != STB_LOCAL + && (GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN + || GELF_ST_VISIBILITY(sym.st_other) == STV_INTERNAL)) prog->mark_btf_static = true; nr_progs++; @@ -795,36 +743,11 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return 0; } -__u32 get_kernel_version(void) +static __u32 get_kernel_version(void) { - /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release, - * but Ubuntu provides /proc/version_signature file, as described at - * https://ubuntu.com/kernel, with an example contents below, which we - * can use to get a proper LINUX_VERSION_CODE. - * - * Ubuntu 5.4.0-12.15-generic 5.4.8 - * - * In the above, 5.4.8 is what kernel is actually expecting, while - * uname() call will return 5.4.0 in info.release. - */ - const char *ubuntu_kver_file = "/proc/version_signature"; __u32 major, minor, patch; struct utsname info; - if (access(ubuntu_kver_file, R_OK) == 0) { - FILE *f; - - f = fopen(ubuntu_kver_file, "r"); - if (f) { - if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) { - fclose(f); - return KERNEL_VERSION(major, minor, patch); - } - fclose(f); - } - /* something went wrong, fall back to uname() approach */ - } - uname(&info); if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) return 0; @@ -1178,7 +1101,6 @@ static struct bpf_object *bpf_object__new(const char *path, size_t obj_buf_sz, const char *obj_name) { - bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); struct bpf_object *obj; char *end; @@ -1190,10 +1112,12 @@ static struct bpf_object *bpf_object__new(const char *path, strcpy(obj->path, path); if (obj_name) { - libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name)); + strncpy(obj->name, obj_name, sizeof(obj->name) - 1); + obj->name[sizeof(obj->name) - 1] = 0; } else { /* Using basename() GNU version which doesn't modify arg. */ - libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name)); + strncpy(obj->name, basename((void *)path), + sizeof(obj->name) - 1); end = strchr(obj->name, '.'); if (end) *end = 0; @@ -1210,21 +1134,24 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.maps_shndx = -1; obj->efile.btf_maps_shndx = -1; + obj->efile.data_shndx = -1; + obj->efile.rodata_shndx = -1; + obj->efile.bss_shndx = -1; obj->efile.st_ops_shndx = -1; obj->kconfig_map_idx = -1; + obj->rodata_map_idx = -1; obj->kern_version = get_kernel_version(); obj->loaded = false; INIT_LIST_HEAD(&obj->list); - if (!strict) - list_add(&obj->list, &bpf_objects_list); + list_add(&obj->list, &bpf_objects_list); return obj; } static void bpf_object__elf_finish(struct bpf_object *obj) { - if (!obj->efile.elf) + if (!obj_elf_valid(obj)) return; if (obj->efile.elf) { @@ -1232,10 +1159,13 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.elf = NULL; } obj->efile.symbols = NULL; + obj->efile.data = NULL; + obj->efile.rodata = NULL; + obj->efile.bss = NULL; obj->efile.st_ops_data = NULL; - zfree(&obj->efile.secs); - obj->efile.sec_cnt = 0; + zfree(&obj->efile.reloc_sects); + obj->efile.nr_reloc_sects = 0; zclose(obj->efile.fd); obj->efile.obj_buf = NULL; obj->efile.obj_buf_sz = 0; @@ -1243,11 +1173,10 @@ static void bpf_object__elf_finish(struct bpf_object *obj) static int bpf_object__elf_init(struct bpf_object *obj) { - Elf64_Ehdr *ehdr; int err = 0; - Elf *elf; + GElf_Ehdr *ep; - if (obj->efile.elf) { + if (obj_elf_valid(obj)) { pr_warn("elf: init internal error\n"); return -LIBBPF_ERRNO__LIBELF; } @@ -1257,9 +1186,10 @@ static int bpf_object__elf_init(struct bpf_object *obj) * obj_buf should have been validated by * bpf_object__open_buffer(). */ - elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); + obj->efile.elf = elf_memory((char *)obj->efile.obj_buf, + obj->efile.obj_buf_sz); } else { - obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); + obj->efile.fd = open(obj->path, O_RDONLY); if (obj->efile.fd < 0) { char errmsg[STRERR_BUFSIZE], *cp; @@ -1269,37 +1199,23 @@ static int bpf_object__elf_init(struct bpf_object *obj) return err; } - elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); + obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); } - if (!elf) { + if (!obj->efile.elf) { pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__LIBELF; goto errout; } - obj->efile.elf = elf; - - if (elf_kind(elf) != ELF_K_ELF) { - err = -LIBBPF_ERRNO__FORMAT; - pr_warn("elf: '%s' is not a proper ELF object\n", obj->path); - goto errout; - } - - if (gelf_getclass(elf) != ELFCLASS64) { - err = -LIBBPF_ERRNO__FORMAT; - pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path); - goto errout; - } - - obj->efile.ehdr = ehdr = elf64_getehdr(elf); - if (!obj->efile.ehdr) { + if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; goto errout; } + ep = &obj->efile.ehdr; - if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { + if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) { pr_warn("elf: failed to get section names section index for %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; @@ -1307,7 +1223,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) } /* Elf is corrupted/truncated, avoid calling elf_strptr. */ - if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { + if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) { pr_warn("elf: failed to get section names strings from %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; @@ -1315,7 +1231,8 @@ static int bpf_object__elf_init(struct bpf_object *obj) } /* Old LLVM set e_machine to EM_NONE */ - if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) { + if (ep->e_type != ET_REL || + (ep->e_machine && ep->e_machine != EM_BPF)) { pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; goto errout; @@ -1329,11 +1246,11 @@ static int bpf_object__elf_init(struct bpf_object *obj) static int bpf_object__check_endianness(struct bpf_object *obj) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) +#if __BYTE_ORDER == __LITTLE_ENDIAN + if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB) return 0; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) +#elif __BYTE_ORDER == __BIG_ENDIAN + if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB) return 0; #else # error "Unrecognized __BYTE_ORDER__" @@ -1345,10 +1262,7 @@ static int bpf_object__check_endianness(struct bpf_object *obj) static int bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) { - /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't - * go over allowed ELF data section buffer - */ - libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license))); + memcpy(obj->license, data, min(size, sizeof(obj->license) - 1)); pr_debug("license of %s is %s\n", obj->path, obj->license); return 0; } @@ -1376,27 +1290,41 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) return false; } -static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size) { int ret = -ENOENT; - Elf_Data *data; - Elf_Scn *scn; *size = 0; - if (!name) + if (!name) { return -EINVAL; + } else if (!strcmp(name, DATA_SEC)) { + if (obj->efile.data) + *size = obj->efile.data->d_size; + } else if (!strcmp(name, BSS_SEC)) { + if (obj->efile.bss) + *size = obj->efile.bss->d_size; + } else if (!strcmp(name, RODATA_SEC)) { + if (obj->efile.rodata) + *size = obj->efile.rodata->d_size; + } else if (!strcmp(name, STRUCT_OPS_SEC)) { + if (obj->efile.st_ops_data) + *size = obj->efile.st_ops_data->d_size; + } else { + Elf_Scn *scn = elf_sec_by_name(obj, name); + Elf_Data *data = elf_sec_data(obj, scn); - scn = elf_sec_by_name(obj, name); - data = elf_sec_data(obj, scn); - if (data) { - ret = 0; /* found it */ - *size = data->d_size; + if (data) { + ret = 0; /* found it */ + *size = data->d_size; + } } return *size ? 0 : ret; } -static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off) +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off) { Elf_Data *symbols = obj->efile.symbols; const char *sname; @@ -1405,20 +1333,23 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ if (!name || !off) return -EINVAL; - for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, si); + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) { + GElf_Sym sym; - if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL || - ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + if (!gelf_getsym(symbols, si, &sym)) + continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + GELF_ST_TYPE(sym.st_info) != STT_OBJECT) continue; - sname = elf_sym_str(obj, sym->st_name); + sname = elf_sym_str(obj, sym.st_name); if (!sname) { - pr_warn("failed to get sym name string for var %s\n", name); + pr_warn("failed to get sym name string for var %s\n", + name); return -EIO; } if (strcmp(name, sname) == 0) { - *off = sym->st_value; + *off = sym.st_value; return 0; } } @@ -1470,55 +1401,17 @@ static size_t bpf_map_mmap_sz(const struct bpf_map *map) return map_sz; } -static char *internal_map_name(struct bpf_object *obj, const char *real_name) +static char *internal_map_name(struct bpf_object *obj, + enum libbpf_map_type type) { char map_name[BPF_OBJ_NAME_LEN], *p; - int pfx_len, sfx_len = max((size_t)7, strlen(real_name)); - - /* This is one of the more confusing parts of libbpf for various - * reasons, some of which are historical. The original idea for naming - * internal names was to include as much of BPF object name prefix as - * possible, so that it can be distinguished from similar internal - * maps of a different BPF object. - * As an example, let's say we have bpf_object named 'my_object_name' - * and internal map corresponding to '.rodata' ELF section. The final - * map name advertised to user and to the kernel will be - * 'my_objec.rodata', taking first 8 characters of object name and - * entire 7 characters of '.rodata'. - * Somewhat confusingly, if internal map ELF section name is shorter - * than 7 characters, e.g., '.bss', we still reserve 7 characters - * for the suffix, even though we only have 4 actual characters, and - * resulting map will be called 'my_objec.bss', not even using all 15 - * characters allowed by the kernel. Oh well, at least the truncated - * object name is somewhat consistent in this case. But if the map - * name is '.kconfig', we'll still have entirety of '.kconfig' added - * (8 chars) and thus will be left with only first 7 characters of the - * object name ('my_obje'). Happy guessing, user, that the final map - * name will be "my_obje.kconfig". - * Now, with libbpf starting to support arbitrarily named .rodata.* - * and .data.* data sections, it's possible that ELF section name is - * longer than allowed 15 chars, so we now need to be careful to take - * only up to 15 first characters of ELF name, taking no BPF object - * name characters at all. So '.rodata.abracadabra' will result in - * '.rodata.abracad' kernel and user-visible name. - * We need to keep this convoluted logic intact for .data, .bss and - * .rodata maps, but for new custom .data.custom and .rodata.custom - * maps we use their ELF names as is, not prepending bpf_object name - * in front. We still need to truncate them to 15 characters for the - * kernel. Full name can be recovered for such maps by using DATASEC - * BTF type associated with such map's value type, though. - */ - if (sfx_len >= BPF_OBJ_NAME_LEN) - sfx_len = BPF_OBJ_NAME_LEN - 1; - - /* if there are two or more dots in map name, it's a custom dot map */ - if (strchr(real_name + 1, '.') != NULL) - pfx_len = 0; - else - pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name)); + const char *sfx = libbpf_type_to_btf_name[type]; + int sfx_len = max((size_t)7, strlen(sfx)); + int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, + strlen(obj->name)); snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, - sfx_len, real_name); + sfx_len, libbpf_type_to_btf_name[type]); /* sanitise map name to characters allowed by kernel */ for (p = map_name; *p && p < map_name + sizeof(map_name); p++) @@ -1530,7 +1423,7 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name) static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, - const char *real_name, int sec_idx, void *data, size_t data_sz) + int sec_idx, void *data, size_t data_sz) { struct bpf_map_def *def; struct bpf_map *map; @@ -1543,11 +1436,9 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->libbpf_type = type; map->sec_idx = sec_idx; map->sec_offset = 0; - map->real_name = strdup(real_name); - map->name = internal_map_name(obj, real_name); - if (!map->real_name || !map->name) { - zfree(&map->real_name); - zfree(&map->name); + map->name = internal_map_name(obj, type); + if (!map->name) { + pr_warn("failed to alloc map name\n"); return -ENOMEM; } @@ -1570,7 +1461,6 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->mmaped = NULL; pr_warn("failed to alloc map '%s' content buffer: %d\n", map->name, err); - zfree(&map->real_name); zfree(&map->name); return err; } @@ -1584,43 +1474,34 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, static int bpf_object__init_global_data_maps(struct bpf_object *obj) { - struct elf_sec_desc *sec_desc; - const char *sec_name; - int err = 0, sec_idx; + int err; /* * Populate obj->maps with libbpf internal maps. */ - for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { - sec_desc = &obj->efile.secs[sec_idx]; + if (obj->efile.data_shndx >= 0) { + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, + obj->efile.data_shndx, + obj->efile.data->d_buf, + obj->efile.data->d_size); + if (err) + return err; + } + if (obj->efile.rodata_shndx >= 0) { + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, + obj->efile.rodata_shndx, + obj->efile.rodata->d_buf, + obj->efile.rodata->d_size); + if (err) + return err; - switch (sec_desc->sec_type) { - case SEC_DATA: - sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, - sec_name, sec_idx, - sec_desc->data->d_buf, - sec_desc->data->d_size); - break; - case SEC_RODATA: - obj->has_rodata = true; - sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, - sec_name, sec_idx, - sec_desc->data->d_buf, - sec_desc->data->d_size); - break; - case SEC_BSS: - sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, - sec_name, sec_idx, - NULL, - sec_desc->data->d_size); - break; - default: - /* skip */ - break; - } + obj->rodata_map_idx = obj->nr_maps - 1; + } + if (obj->efile.bss_shndx >= 0) { + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, + obj->efile.bss_shndx, + NULL, + obj->efile.bss->d_size); if (err) return err; } @@ -1783,7 +1664,7 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj, void *ext_val; __u64 num; - if (!str_has_pfx(buf, "CONFIG_")) + if (strncmp(buf, "CONFIG_", 7)) return 0; sep = strchr(buf, '='); @@ -1917,7 +1798,7 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj) map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, - ".kconfig", obj->efile.symbols_shndx, + obj->efile.symbols_shndx, NULL, map_sz); if (err) return err; @@ -1955,13 +1836,13 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) * * TODO: Detect array of map and report error. */ - nr_syms = symbols->d_size / sizeof(Elf64_Sym); + nr_syms = symbols->d_size / sizeof(GElf_Sym); for (i = 0; i < nr_syms; i++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, i); + GElf_Sym sym; - if (sym->st_shndx != obj->efile.maps_shndx) + if (!gelf_getsym(symbols, i, &sym)) continue; - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) + if (sym.st_shndx != obj->efile.maps_shndx) continue; nr_maps++; } @@ -1978,38 +1859,39 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) /* Fill obj->maps using data in "maps" section. */ for (i = 0; i < nr_syms; i++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, i); + GElf_Sym sym; const char *map_name; struct bpf_map_def *def; struct bpf_map *map; - if (sym->st_shndx != obj->efile.maps_shndx) + if (!gelf_getsym(symbols, i, &sym)) continue; - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) + if (sym.st_shndx != obj->efile.maps_shndx) continue; map = bpf_object__add_map(obj); if (IS_ERR(map)) return PTR_ERR(map); - map_name = elf_sym_str(obj, sym->st_name); + map_name = elf_sym_str(obj, sym.st_name); if (!map_name) { pr_warn("failed to get map #%d name sym string for obj %s\n", i, obj->path); return -LIBBPF_ERRNO__FORMAT; } - if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { + if (GELF_ST_TYPE(sym.st_info) == STT_SECTION + || GELF_ST_BIND(sym.st_info) == STB_LOCAL) { pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); return -ENOTSUP; } map->libbpf_type = LIBBPF_MAP_UNSPEC; - map->sec_idx = sym->st_shndx; - map->sec_offset = sym->st_value; + map->sec_idx = sym.st_shndx; + map->sec_offset = sym.st_value; pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset); - if (sym->st_value + map_def_sz > data->d_size) { + if (sym.st_value + map_def_sz > data->d_size) { pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", obj->path, map_name); return -EINVAL; @@ -2017,11 +1899,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) map->name = strdup(map_name); if (!map->name) { - pr_warn("map '%s': failed to alloc map name\n", map_name); + pr_warn("failed to alloc map name\n"); return -ENOMEM; } pr_debug("map %d is \"%s\"\n", i, map->name); - def = (struct bpf_map_def *)(data->d_buf + sym->st_value); + def = (struct bpf_map_def *)(data->d_buf + sym.st_value); /* * If the definition of the map in the object file fits in * bpf_map_def, copy it. Any extra fields in our version @@ -2105,8 +1987,6 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_VAR: return "var"; case BTF_KIND_DATASEC: return "datasec"; case BTF_KIND_FLOAT: return "float"; - case BTF_KIND_DECL_TAG: return "decl_tag"; - case BTF_KIND_TYPE_TAG: return "type_tag"; default: return "unknown"; } } @@ -2286,9 +2166,6 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; } else if (strcmp(name, "values") == 0) { - bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); - bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; - const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value"; char inner_map_name[128]; int err; @@ -2302,8 +2179,8 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_name, name); return -EINVAL; } - if (!is_map_in_map && !is_prog_array) { - pr_warn("map '%s': should be map-in-map or prog-array.\n", + if (!bpf_map_type__is_map_in_map(map_def->map_type)) { + pr_warn("map '%s': should be map-in-map.\n", map_name); return -ENOTSUP; } @@ -2315,30 +2192,22 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_def->value_size = 4; t = btf__type_by_id(btf, m->type); if (!t) { - pr_warn("map '%s': %s type [%d] not found.\n", - map_name, desc, m->type); + pr_warn("map '%s': map-in-map inner type [%d] not found.\n", + map_name, m->type); return -EINVAL; } if (!btf_is_array(t) || btf_array(t)->nelems) { - pr_warn("map '%s': %s spec is not a zero-sized array.\n", - map_name, desc); + pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n", + map_name); return -EINVAL; } t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); if (!btf_is_ptr(t)) { - pr_warn("map '%s': %s def is of unexpected kind %s.\n", - map_name, desc, btf_kind_str(t)); + pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", + map_name, btf_kind_str(t)); return -EINVAL; } t = skip_mods_and_typedefs(btf, t->type, NULL); - if (is_prog_array) { - if (!btf_is_func_proto(t)) { - pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", - map_name, btf_kind_str(t)); - return -EINVAL; - } - continue; - } if (!btf_is_struct(t)) { pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", map_name, btf_kind_str(t)); @@ -2367,13 +2236,6 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, } map_def->pinning = val; map_def->parts |= MAP_DEF_PINNING; - } else if (strcmp(name, "map_extra") == 0) { - __u32 map_extra; - - if (!get_map_field_int(map_name, btf, m, &map_extra)) - return -EINVAL; - map_def->map_extra = map_extra; - map_def->parts |= MAP_DEF_MAP_EXTRA; } else { if (strict) { pr_warn("map '%s': unknown field '%s'.\n", map_name, name); @@ -2398,7 +2260,6 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->def.value_size = def->value_size; map->def.max_entries = def->max_entries; map->def.map_flags = def->map_flags; - map->map_extra = def->map_extra; map->numa_node = def->numa_node; map->btf_key_type_id = def->key_type_id; @@ -2422,10 +2283,7 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def if (def->parts & MAP_DEF_MAX_ENTRIES) pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); if (def->parts & MAP_DEF_MAP_FLAGS) - pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); - if (def->parts & MAP_DEF_MAP_EXTRA) - pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, - (unsigned long long)def->map_extra); + pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags); if (def->parts & MAP_DEF_PINNING) pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); if (def->parts & MAP_DEF_NUMA_NODE) @@ -2562,8 +2420,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return -EINVAL; } - nr_types = btf__type_cnt(obj->btf); - for (i = 1; i < nr_types; i++) { + nr_types = btf__get_nr_types(obj->btf); + for (i = 1; i <= nr_types; i++) { t = btf__type_by_id(obj->btf, i); if (!btf_is_datasec(t)) continue; @@ -2614,13 +2472,12 @@ static int bpf_object__init_maps(struct bpf_object *obj, static bool section_have_execinstr(struct bpf_object *obj, int idx) { - Elf64_Shdr *sh; + GElf_Shdr sh; - sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx)); - if (!sh) + if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh)) return false; - return sh->sh_flags & SHF_EXECINSTR; + return sh.sh_flags & SHF_EXECINSTR; } static bool btf_needs_sanitization(struct bpf_object *obj) @@ -2629,11 +2486,8 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); - bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); - bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); - return !has_func || !has_datasec || !has_func_global || !has_float || - !has_decl_tag || !has_type_tag; + return !has_func || !has_datasec || !has_func_global || !has_float; } static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) @@ -2642,16 +2496,14 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); - bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); - bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); struct btf_type *t; int i, j, vlen; - for (i = 1; i < btf__type_cnt(btf); i++) { + for (i = 1; i <= btf__get_nr_types(btf); i++) { t = (struct btf_type *)btf__type_by_id(btf, i); - if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { - /* replace VAR/DECL_TAG with INT */ + if (!has_datasec && btf_is_var(t)) { + /* replace VAR with INT */ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); /* * using size = 1 is the safest choice, 4 will be too @@ -2702,10 +2554,6 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) */ t->name_off = 0; t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); - } else if (!has_type_tag && btf_is_type_tag(t)) { - /* replace TYPE_TAG with a CONST */ - t->name_off = 0; - t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); } } } @@ -2762,103 +2610,6 @@ static int bpf_object__init_btf(struct bpf_object *obj, return 0; } -static int compare_vsi_off(const void *_a, const void *_b) -{ - const struct btf_var_secinfo *a = _a; - const struct btf_var_secinfo *b = _b; - - return a->offset - b->offset; -} - -static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, - struct btf_type *t) -{ - __u32 size = 0, off = 0, i, vars = btf_vlen(t); - const char *name = btf__name_by_offset(btf, t->name_off); - const struct btf_type *t_var; - struct btf_var_secinfo *vsi; - const struct btf_var *var; - int ret; - - if (!name) { - pr_debug("No name found in string section for DATASEC kind.\n"); - return -ENOENT; - } - - /* .extern datasec size and var offsets were set correctly during - * extern collection step, so just skip straight to sorting variables - */ - if (t->size) - goto sort_vars; - - ret = find_elf_sec_sz(obj, name, &size); - if (ret || !size || (t->size && t->size != size)) { - pr_debug("Invalid size for section %s: %u bytes\n", name, size); - return -ENOENT; - } - - t->size = size; - - for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { - t_var = btf__type_by_id(btf, vsi->type); - if (!t_var || !btf_is_var(t_var)) { - pr_debug("Non-VAR type seen in section %s\n", name); - return -EINVAL; - } - - var = btf_var(t_var); - if (var->linkage == BTF_VAR_STATIC) - continue; - - name = btf__name_by_offset(btf, t_var->name_off); - if (!name) { - pr_debug("No name found in string section for VAR kind\n"); - return -ENOENT; - } - - ret = find_elf_var_offset(obj, name, &off); - if (ret) { - pr_debug("No offset found in symbol table for VAR %s\n", - name); - return -ENOENT; - } - - vsi->offset = off; - } - -sort_vars: - qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); - return 0; -} - -static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) -{ - int err = 0; - __u32 i, n = btf__type_cnt(btf); - - for (i = 1; i < n; i++) { - struct btf_type *t = btf_type_by_id(btf, i); - - /* Loader needs to fix up some of the things compiler - * couldn't get its hands on while emitting BTF. This - * is section size and global variable offset. We use - * the info from the ELF itself for this purpose. - */ - if (btf_is_datasec(t)) { - err = btf_fixup_datasec(obj, btf, t); - if (err) - break; - } - } - - return libbpf_err(err); -} - -int btf__finalize_data(struct bpf_object *obj, struct btf *btf) -{ - return btf_finalize_data(obj, btf); -} - static int bpf_object__finalize_btf(struct bpf_object *obj) { int err; @@ -2866,7 +2617,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj) if (!obj->btf) return 0; - err = btf_finalize_data(obj, obj->btf); + err = btf__finalize_data(obj, obj->btf); if (err) { pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); return err; @@ -2976,8 +2727,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) continue; - n = btf__type_cnt(obj->btf); - for (j = 1; j < n; j++) { + n = btf__get_nr_types(obj->btf); + for (j = 1; j <= n; j++) { t = btf_type_by_id(obj->btf, j); if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) continue; @@ -2997,7 +2748,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) __u32 sz; /* clone BTF to sanitize a copy and leave the original intact */ - raw_data = btf__raw_data(obj->btf, &sz); + raw_data = btf__get_raw_data(obj->btf, &sz); kern_btf = btf__new(raw_data, sz); err = libbpf_get_error(kern_btf); if (err) @@ -3010,7 +2761,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (obj->gen_loader) { __u32 raw_size = 0; - const void *raw_data = btf__raw_data(kern_btf, &raw_size); + const void *raw_data = btf__get_raw_data(kern_btf, &raw_size); if (!raw_data) return -ENOMEM; @@ -3020,9 +2771,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) */ btf__set_fd(kern_btf, 0); } else { - /* currently BPF_BTF_LOAD only supports log_level 1 */ - err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, - obj->log_level ? 1 : 0); + err = btf__load_into_kernel(kern_btf); } if (sanitize) { if (!err) { @@ -3104,36 +2853,32 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) return NULL; } -static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn) +static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr) { - Elf64_Shdr *shdr; - if (!scn) - return NULL; + return -EINVAL; - shdr = elf64_getshdr(scn); - if (!shdr) { + if (gelf_getshdr(scn, hdr) != hdr) { pr_warn("elf: failed to get section(%zu) header from %s: %s\n", elf_ndxscn(scn), obj->path, elf_errmsg(-1)); - return NULL; + return -EINVAL; } - return shdr; + return 0; } static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) { const char *name; - Elf64_Shdr *sh; + GElf_Shdr sh; if (!scn) return NULL; - sh = elf_sec_hdr(obj, scn); - if (!sh) + if (elf_sec_hdr(obj, scn, &sh)) return NULL; - name = elf_sec_str(obj, sh->sh_name); + name = elf_sec_str(obj, sh.sh_name); if (!name) { pr_warn("elf: failed to get section(%zu) name from %s: %s\n", elf_ndxscn(scn), obj->path, elf_errmsg(-1)); @@ -3161,29 +2906,13 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) return data; } -static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx) -{ - if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym)) - return NULL; - - return (Elf64_Sym *)obj->efile.symbols->d_buf + idx; -} - -static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx) -{ - if (idx >= data->d_size / sizeof(Elf64_Rel)) - return NULL; - - return (Elf64_Rel *)data->d_buf + idx; -} - static bool is_sec_name_dwarf(const char *name) { /* approximation, but the actual list is too long */ - return str_has_pfx(name, ".debug_"); + return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0; } -static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) +static bool ignore_elf_section(GElf_Shdr *hdr, const char *name) { /* no special handling of .strtab */ if (hdr->sh_type == SHT_STRTAB) @@ -3202,7 +2931,7 @@ static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) if (is_sec_name_dwarf(name)) return true; - if (str_has_pfx(name, ".rel")) { + if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) { name += sizeof(".rel") - 1; /* DWARF section relocations */ if (is_sec_name_dwarf(name)) @@ -3231,7 +2960,6 @@ static int cmp_progs(const void *_a, const void *_b) static int bpf_object__elf_collect(struct bpf_object *obj) { - struct elf_sec_desc *sec_desc; Elf *elf = obj->efile.elf; Elf_Data *btf_ext_data = NULL; Elf_Data *btf_data = NULL; @@ -3239,27 +2967,17 @@ static int bpf_object__elf_collect(struct bpf_object *obj) const char *name; Elf_Data *data; Elf_Scn *scn; - Elf64_Shdr *sh; - - /* ELF section indices are 0-based, but sec #0 is special "invalid" - * section. e_shnum does include sec #0, so e_shnum is the necessary - * size of an array to keep all the sections. - */ - obj->efile.sec_cnt = obj->efile.ehdr->e_shnum; - obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); - if (!obj->efile.secs) - return -ENOMEM; + GElf_Shdr sh; /* a bunch of ELF parsing functionality depends on processing symbols, * so do the first pass and find the symbol table */ scn = NULL; while ((scn = elf_nextscn(elf, scn)) != NULL) { - sh = elf_sec_hdr(obj, scn); - if (!sh) + if (elf_sec_hdr(obj, scn, &sh)) return -LIBBPF_ERRNO__FORMAT; - if (sh->sh_type == SHT_SYMTAB) { + if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warn("elf: multiple symbol tables in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; @@ -3269,11 +2987,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (!data) return -LIBBPF_ERRNO__FORMAT; - idx = elf_ndxscn(scn); - obj->efile.symbols = data; - obj->efile.symbols_shndx = idx; - obj->efile.strtabidx = sh->sh_link; + obj->efile.symbols_shndx = elf_ndxscn(scn); + obj->efile.strtabidx = sh.sh_link; } } @@ -3285,18 +3001,16 @@ static int bpf_object__elf_collect(struct bpf_object *obj) scn = NULL; while ((scn = elf_nextscn(elf, scn)) != NULL) { - idx = elf_ndxscn(scn); - sec_desc = &obj->efile.secs[idx]; + idx++; - sh = elf_sec_hdr(obj, scn); - if (!sh) + if (elf_sec_hdr(obj, scn, &sh)) return -LIBBPF_ERRNO__FORMAT; - name = elf_sec_str(obj, sh->sh_name); + name = elf_sec_str(obj, sh.sh_name); if (!name) return -LIBBPF_ERRNO__FORMAT; - if (ignore_elf_section(sh, name)) + if (ignore_elf_section(&sh, name)) continue; data = elf_sec_data(obj, scn); @@ -3305,8 +3019,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", idx, name, (unsigned long)data->d_size, - (int)sh->sh_link, (unsigned long)sh->sh_flags, - (int)sh->sh_type); + (int)sh.sh_link, (unsigned long)sh.sh_flags, + (int)sh.sh_type); if (strcmp(name, "license") == 0) { err = bpf_object__init_license(obj, data->d_buf, data->d_size); @@ -3321,32 +3035,24 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (strcmp(name, MAPS_ELF_SEC) == 0) { obj->efile.btf_maps_shndx = idx; } else if (strcmp(name, BTF_ELF_SEC) == 0) { - if (sh->sh_type != SHT_PROGBITS) - return -LIBBPF_ERRNO__FORMAT; btf_data = data; } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { - if (sh->sh_type != SHT_PROGBITS) - return -LIBBPF_ERRNO__FORMAT; btf_ext_data = data; - } else if (sh->sh_type == SHT_SYMTAB) { + } else if (sh.sh_type == SHT_SYMTAB) { /* already processed during the first pass above */ - } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) { - if (sh->sh_flags & SHF_EXECINSTR) { + } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { + if (sh.sh_flags & SHF_EXECINSTR) { if (strcmp(name, ".text") == 0) obj->efile.text_shndx = idx; err = bpf_object__add_programs(obj, data, name, idx); if (err) return err; - } else if (strcmp(name, DATA_SEC) == 0 || - str_has_pfx(name, DATA_SEC ".")) { - sec_desc->sec_type = SEC_DATA; - sec_desc->shdr = sh; - sec_desc->data = data; - } else if (strcmp(name, RODATA_SEC) == 0 || - str_has_pfx(name, RODATA_SEC ".")) { - sec_desc->sec_type = SEC_RODATA; - sec_desc->shdr = sh; - sec_desc->data = data; + } else if (strcmp(name, DATA_SEC) == 0) { + obj->efile.data = data; + obj->efile.data_shndx = idx; + } else if (strcmp(name, RODATA_SEC) == 0) { + obj->efile.rodata = data; + obj->efile.rodata_shndx = idx; } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { obj->efile.st_ops_data = data; obj->efile.st_ops_shndx = idx; @@ -3354,33 +3060,37 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); } - } else if (sh->sh_type == SHT_REL) { - int targ_sec_idx = sh->sh_info; /* points to other section */ - - if (sh->sh_entsize != sizeof(Elf64_Rel) || - targ_sec_idx >= obj->efile.sec_cnt) - return -LIBBPF_ERRNO__FORMAT; + } else if (sh.sh_type == SHT_REL) { + int nr_sects = obj->efile.nr_reloc_sects; + void *sects = obj->efile.reloc_sects; + int sec = sh.sh_info; /* points to other section */ /* Only do relo for section with exec instructions */ - if (!section_have_execinstr(obj, targ_sec_idx) && + if (!section_have_execinstr(obj, sec) && strcmp(name, ".rel" STRUCT_OPS_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", - idx, name, targ_sec_idx, - elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: ""); + idx, name, sec, + elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: ""); continue; } - sec_desc->sec_type = SEC_RELO; - sec_desc->shdr = sh; - sec_desc->data = data; - } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { - sec_desc->sec_type = SEC_BSS; - sec_desc->shdr = sh; - sec_desc->data = data; + sects = libbpf_reallocarray(sects, nr_sects + 1, + sizeof(*obj->efile.reloc_sects)); + if (!sects) + return -ENOMEM; + + obj->efile.reloc_sects = sects; + obj->efile.nr_reloc_sects++; + + obj->efile.reloc_sects[nr_sects].shdr = sh; + obj->efile.reloc_sects[nr_sects].data = data; + } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { + obj->efile.bss = data; + obj->efile.bss_shndx = idx; } else { pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, - (size_t)sh->sh_size); + (size_t)sh.sh_size); } } @@ -3391,25 +3101,24 @@ static int bpf_object__elf_collect(struct bpf_object *obj) /* sort BPF programs by section name and in-section instruction offset * for faster search */ - if (obj->nr_programs) - qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); + qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); return bpf_object__init_btf(obj, btf_data, btf_ext_data); } -static bool sym_is_extern(const Elf64_Sym *sym) +static bool sym_is_extern(const GElf_Sym *sym) { - int bind = ELF64_ST_BIND(sym->st_info); + int bind = GELF_ST_BIND(sym->st_info); /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ return sym->st_shndx == SHN_UNDEF && (bind == STB_GLOBAL || bind == STB_WEAK) && - ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE; + GELF_ST_TYPE(sym->st_info) == STT_NOTYPE; } -static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) +static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx) { - int bind = ELF64_ST_BIND(sym->st_info); - int type = ELF64_ST_TYPE(sym->st_info); + int bind = GELF_ST_BIND(sym->st_info); + int type = GELF_ST_TYPE(sym->st_info); /* in .text section */ if (sym->st_shndx != text_shndx) @@ -3432,8 +3141,8 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name) if (!btf) return -ESRCH; - n = btf__type_cnt(btf); - for (i = 1; i < n; i++) { + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_var(t) && !btf_is_func(t)) @@ -3464,8 +3173,8 @@ static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { if (!btf) return -ESRCH; - n = btf__type_cnt(btf); - for (i = 1; i < n; i++) { + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_datasec(t)) @@ -3549,8 +3258,8 @@ static int find_int_btf_id(const struct btf *btf) const struct btf_type *t; int i, n; - n = btf__type_cnt(btf); - for (i = 1; i < n; i++) { + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { t = btf__type_by_id(btf, i); if (btf_is_int(t) && btf_int_bits(t) == 32) @@ -3607,31 +3316,30 @@ static int bpf_object__collect_externs(struct bpf_object *obj) int i, n, off, dummy_var_btf_id; const char *ext_name, *sec_name; Elf_Scn *scn; - Elf64_Shdr *sh; + GElf_Shdr sh; if (!obj->efile.symbols) return 0; scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); - sh = elf_sec_hdr(obj, scn); - if (!sh || sh->sh_entsize != sizeof(Elf64_Sym)) + if (elf_sec_hdr(obj, scn, &sh)) return -LIBBPF_ERRNO__FORMAT; dummy_var_btf_id = add_dummy_ksym_var(obj->btf); if (dummy_var_btf_id < 0) return dummy_var_btf_id; - n = sh->sh_size / sh->sh_entsize; + n = sh.sh_size / sh.sh_entsize; pr_debug("looking for externs among %d symbols...\n", n); for (i = 0; i < n; i++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, i); + GElf_Sym sym; - if (!sym) + if (!gelf_getsym(obj->efile.symbols, i, &sym)) return -LIBBPF_ERRNO__FORMAT; - if (!sym_is_extern(sym)) + if (!sym_is_extern(&sym)) continue; - ext_name = elf_sym_str(obj, sym->st_name); + ext_name = elf_sym_str(obj, sym.st_name); if (!ext_name || !ext_name[0]) continue; @@ -3653,7 +3361,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) t = btf__type_by_id(obj->btf, ext->btf_id); ext->name = btf__name_by_offset(obj->btf, t->name_off); ext->sym_idx = i; - ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; + ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); if (ext->sec_btf_id <= 0) { @@ -3691,6 +3399,11 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { + if (btf_is_func(t) && ext->is_weak) { + pr_warn("extern weak function %s is unsupported\n", + ext->name); + return -ENOTSUP; + } ksym_sec = sec; ext->type = EXT_KSYM; skip_mods_and_typedefs(obj->btf, t->type, @@ -3858,14 +3571,9 @@ bpf_object__find_program_by_name(const struct bpf_object *obj, static bool bpf_object__shndx_is_data(const struct bpf_object *obj, int shndx) { - switch (obj->efile.secs[shndx].sec_type) { - case SEC_BSS: - case SEC_DATA: - case SEC_RODATA: - return true; - default: - return false; - } + return shndx == obj->efile.data_shndx || + shndx == obj->efile.bss_shndx || + shndx == obj->efile.rodata_shndx; } static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, @@ -3878,25 +3586,22 @@ static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, static enum libbpf_map_type bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) { - if (shndx == obj->efile.symbols_shndx) - return LIBBPF_MAP_KCONFIG; - - switch (obj->efile.secs[shndx].sec_type) { - case SEC_BSS: - return LIBBPF_MAP_BSS; - case SEC_DATA: + if (shndx == obj->efile.data_shndx) return LIBBPF_MAP_DATA; - case SEC_RODATA: + else if (shndx == obj->efile.bss_shndx) + return LIBBPF_MAP_BSS; + else if (shndx == obj->efile.rodata_shndx) return LIBBPF_MAP_RODATA; - default: + else if (shndx == obj->efile.symbols_shndx) + return LIBBPF_MAP_KCONFIG; + else return LIBBPF_MAP_UNSPEC; - } } static int bpf_program__record_reloc(struct bpf_program *prog, struct reloc_desc *reloc_desc, __u32 insn_idx, const char *sym_name, - const Elf64_Sym *sym, const Elf64_Rel *rel) + const GElf_Sym *sym, const GElf_Rel *rel) { struct bpf_insn *insn = &prog->insns[insn_idx]; size_t map_idx, nr_maps = prog->obj->nr_maps; @@ -3913,7 +3618,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog, } if (sym_is_extern(sym)) { - int sym_idx = ELF64_R_SYM(rel->r_info); + int sym_idx = GELF_R_SYM(rel->r_info); int i, n = obj->nr_extern; struct extern_desc *ext; @@ -4026,7 +3731,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog, } for (map_idx = 0; map_idx < nr_maps; map_idx++) { map = &obj->maps[map_idx]; - if (map->libbpf_type != type || map->sec_idx != sym->st_shndx) + if (map->libbpf_type != type) continue; pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", prog->name, map_idx, map->name, map->sec_idx, @@ -4078,10 +3783,11 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, } static int -bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) +bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data) { + Elf_Data *symbols = obj->efile.symbols; const char *relo_sec_name, *sec_name; - size_t sec_idx = shdr->sh_info, sym_idx; + size_t sec_idx = shdr->sh_info; struct bpf_program *prog; struct reloc_desc *relos; int err, i, nrels; @@ -4089,11 +3795,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat __u32 insn_idx; Elf_Scn *scn; Elf_Data *scn_data; - Elf64_Sym *sym; - Elf64_Rel *rel; - - if (sec_idx >= obj->efile.sec_cnt) - return -EINVAL; + GElf_Sym sym; + GElf_Rel rel; scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); @@ -4108,43 +3811,33 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - rel = elf_rel_by_idx(data, i); - if (!rel) { + if (!gelf_getrel(data, i, &rel)) { pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); return -LIBBPF_ERRNO__FORMAT; } - - sym_idx = ELF64_R_SYM(rel->r_info); - sym = elf_sym_by_idx(obj, sym_idx); - if (!sym) { - pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", - relo_sec_name, sym_idx, i); + if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n", + relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); return -LIBBPF_ERRNO__FORMAT; } - if (sym->st_shndx >= obj->efile.sec_cnt) { - pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", - relo_sec_name, sym_idx, (size_t)sym->st_shndx, i); - return -LIBBPF_ERRNO__FORMAT; - } - - if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { + if (rel.r_offset % BPF_INSN_SZ || rel.r_offset >= scn_data->d_size) { pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", - relo_sec_name, (size_t)rel->r_offset, i); + relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); return -LIBBPF_ERRNO__FORMAT; } - insn_idx = rel->r_offset / BPF_INSN_SZ; + insn_idx = rel.r_offset / BPF_INSN_SZ; /* relocations against static functions are recorded as * relocations against the section that contains a function; * in such case, symbol will be STT_SECTION and sym.st_name * will point to empty string (0), so fetch section name * instead */ - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0) - sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx)); + if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0) + sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx)); else - sym_name = elf_sym_str(obj, sym->st_name); + sym_name = elf_sym_str(obj, sym.st_name); sym_name = sym_name ?: "sec_insn_off; err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], - insn_idx, sym_name, sym, rel); + insn_idx, sym_name, &sym, &rel); if (err) return err; @@ -4198,7 +3891,8 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) * LLVM annotates global data differently in BTF, that is, * only as '.data', '.bss' or '.rodata'. */ - ret = btf__find_by_name(obj->btf, map->real_name); + ret = btf__find_by_name(obj->btf, + libbpf_type_to_btf_name[map->libbpf_type]); } if (ret < 0) return ret; @@ -4291,7 +3985,6 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) map->btf_key_type_id = info.btf_key_type_id; map->btf_value_type_id = info.btf_value_type_id; map->reused = true; - map->map_extra = info.map_extra; return 0; @@ -4334,24 +4027,30 @@ int bpf_map__resize(struct bpf_map *map, __u32 max_entries) static int bpf_object__probe_loading(struct bpf_object *obj) { + struct bpf_load_program_attr attr; char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret, insn_cnt = ARRAY_SIZE(insns); + int ret; if (obj->gen_loader) return 0; - ret = bump_rlimit_memlock(); - if (ret) - pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); - /* make sure basic loading works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - if (ret < 0) - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + + ret = bpf_load_program_xattr(&attr, NULL, 0); + if (ret < 0) { + attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; + ret = bpf_load_program_xattr(&attr, NULL, 0); + } if (ret < 0) { ret = errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4375,19 +4074,29 @@ static int probe_fd(int fd) static int probe_kern_prog_name(void) { + struct bpf_load_program_attr attr; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret, insn_cnt = ARRAY_SIZE(insns); + int ret; /* make sure loading with name works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL); + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + attr.name = "test"; + ret = bpf_load_program_xattr(&attr, NULL, 0); return probe_fd(ret); } static int probe_kern_global_data(void) { + struct bpf_load_program_attr prg_attr; + struct bpf_create_map_attr map_attr; char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), @@ -4395,9 +4104,15 @@ static int probe_kern_global_data(void) BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret, map, insn_cnt = ARRAY_SIZE(insns); + int ret, map; - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_ARRAY; + map_attr.key_size = sizeof(int); + map_attr.value_size = 32; + map_attr.max_entries = 1; + + map = bpf_create_map_xattr(&map_attr); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4408,7 +4123,13 @@ static int probe_kern_global_data(void) insns[0].imm = map; - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + memset(&prg_attr, 0, sizeof(prg_attr)); + prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + prg_attr.insns = insns; + prg_attr.insns_cnt = ARRAY_SIZE(insns); + prg_attr.license = "GPL"; + + ret = bpf_load_program_xattr(&prg_attr, NULL, 0); close(map); return probe_fd(ret); } @@ -4492,68 +4213,45 @@ static int probe_kern_btf_float(void) strs, sizeof(strs))); } -static int probe_kern_btf_decl_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* attr */ - BTF_TYPE_DECL_TAG_ENC(1, 2, -1), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_type_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* attr */ - BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ - /* ptr */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - static int probe_kern_array_mmap(void) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); - int fd; + struct bpf_create_map_attr attr = { + .map_type = BPF_MAP_TYPE_ARRAY, + .map_flags = BPF_F_MMAPABLE, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, + }; - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts); - return probe_fd(fd); + return probe_fd(bpf_create_map_xattr(&attr)); } static int probe_kern_exp_attach_type(void) { - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); + struct bpf_load_program_attr attr; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int fd, insn_cnt = ARRAY_SIZE(insns); + memset(&attr, 0, sizeof(attr)); /* use any valid combination of program type and (optional) * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) * to see if kernel supports expected_attach_type field for * BPF_PROG_LOAD command */ - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); + attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; + attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + + return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); } static int probe_kern_probe_read_kernel(void) { + struct bpf_load_program_attr attr; struct bpf_insn insns[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ @@ -4562,22 +4260,34 @@ static int probe_kern_probe_read_kernel(void) BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }; - int fd, insn_cnt = ARRAY_SIZE(insns); - fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(fd); + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_KPROBE; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + + return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); } static int probe_prog_bind_map(void) { + struct bpf_load_program_attr prg_attr; + struct bpf_create_map_attr map_attr; char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + int ret, map, prog; - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_ARRAY; + map_attr.key_size = sizeof(int); + map_attr.value_size = 32; + map_attr.max_entries = 1; + + map = bpf_create_map_xattr(&map_attr); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4586,7 +4296,13 @@ static int probe_prog_bind_map(void) return ret; } - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + memset(&prg_attr, 0, sizeof(prg_attr)); + prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + prg_attr.insns = insns; + prg_attr.insns_cnt = ARRAY_SIZE(insns); + prg_attr.license = "GPL"; + + prog = bpf_load_program_xattr(&prg_attr, NULL, 0); if (prog < 0) { close(map); return 0; @@ -4631,14 +4347,19 @@ static int probe_module_btf(void) static int probe_perf_link(void) { + struct bpf_load_program_attr attr; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; int prog_fd, link_fd, err; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + prog_fd = bpf_load_program_xattr(&attr, NULL, 0); if (prog_fd < 0) return -errno; @@ -4708,23 +4429,14 @@ static struct kern_feature_desc { [FEAT_PERF_LINK] = { "BPF perf link support", probe_perf_link, }, - [FEAT_BTF_DECL_TAG] = { - "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, - }, - [FEAT_BTF_TYPE_TAG] = { - "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, - }, - [FEAT_MEMCG_ACCOUNT] = { - "memcg-based memory accounting", probe_memcg_account, - }, }; -bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) +static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { struct kern_feature_desc *feat = &feature_probes[feat_id]; int ret; - if (obj && obj->gen_loader) + if (obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ @@ -4767,8 +4479,7 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) map_info.key_size == map->def.key_size && map_info.value_size == map->def.value_size && map_info.max_entries == map->def.max_entries && - map_info.map_flags == map->def.map_flags && - map_info.map_extra == map->map_extra); + map_info.map_flags == map->def.map_flags); } static int @@ -4851,18 +4562,20 @@ static void bpf_map__destroy(struct bpf_map *map); static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { - LIBBPF_OPTS(bpf_map_create_opts, create_attr); + struct bpf_create_map_attr create_attr; struct bpf_map_def *def = &map->def; - const char *map_name = NULL; - __u32 max_entries; int err = 0; + memset(&create_attr, 0, sizeof(create_attr)); + if (kernel_supports(obj, FEAT_PROG_NAME)) - map_name = map->name; + create_attr.name = map->name; create_attr.map_ifindex = map->map_ifindex; + create_attr.map_type = def->type; create_attr.map_flags = def->map_flags; + create_attr.key_size = def->key_size; + create_attr.value_size = def->value_size; create_attr.numa_node = map->numa_node; - create_attr.map_extra = map->map_extra; if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { int nr_cpus; @@ -4874,14 +4587,18 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b return nr_cpus; } pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); - max_entries = nr_cpus; + create_attr.max_entries = nr_cpus; } else { - max_entries = def->max_entries; + create_attr.max_entries = def->max_entries; } if (bpf_map__is_struct_ops(map)) - create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; + create_attr.btf_vmlinux_value_type_id = + map->btf_vmlinux_value_type_id; + create_attr.btf_fd = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) { create_attr.btf_fd = btf__fd(obj->btf); create_attr.btf_key_type_id = map->btf_key_type_id; @@ -4902,42 +4619,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.inner_map_fd = map->inner_map_fd; } - switch (def->type) { - case BPF_MAP_TYPE_PERF_EVENT_ARRAY: - case BPF_MAP_TYPE_CGROUP_ARRAY: - case BPF_MAP_TYPE_STACK_TRACE: - case BPF_MAP_TYPE_ARRAY_OF_MAPS: - case BPF_MAP_TYPE_HASH_OF_MAPS: - case BPF_MAP_TYPE_DEVMAP: - case BPF_MAP_TYPE_DEVMAP_HASH: - case BPF_MAP_TYPE_CPUMAP: - case BPF_MAP_TYPE_XSKMAP: - case BPF_MAP_TYPE_SOCKMAP: - case BPF_MAP_TYPE_SOCKHASH: - case BPF_MAP_TYPE_QUEUE: - case BPF_MAP_TYPE_STACK: - case BPF_MAP_TYPE_RINGBUF: - create_attr.btf_fd = 0; - create_attr.btf_key_type_id = 0; - create_attr.btf_value_type_id = 0; - map->btf_key_type_id = 0; - map->btf_value_type_id = 0; - default: - break; - } - if (obj->gen_loader) { - bpf_gen__map_create(obj->gen_loader, def->type, map_name, - def->key_size, def->value_size, max_entries, - &create_attr, is_inner ? -1 : map - obj->maps); + bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps); /* Pretend to have valid FD to pass various fd >= 0 checks. * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. */ map->fd = 0; } else { - map->fd = bpf_map_create(def->type, map_name, - def->key_size, def->value_size, - max_entries, &create_attr); + map->fd = bpf_create_map_xattr(&create_attr); } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { @@ -4952,9 +4641,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; - map->fd = bpf_map_create(def->type, map_name, - def->key_size, def->value_size, - max_entries, &create_attr); + map->fd = bpf_create_map_xattr(&create_attr); } err = map->fd < 0 ? -errno : 0; @@ -4969,7 +4656,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b return err; } -static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) +static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) { const struct bpf_map *targ_map; unsigned int i; @@ -4981,18 +4668,18 @@ static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) targ_map = map->init_slots[i]; fd = bpf_map__fd(targ_map); - if (obj->gen_loader) { - bpf_gen__populate_outer_map(obj->gen_loader, - map - obj->maps, i, - targ_map - obj->maps); + pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n", + map - obj->maps, i, targ_map - obj->maps); + return -ENOTSUP; } else { err = bpf_map_update_elem(map->fd, &i, &fd, 0); } if (err) { err = -errno; pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", - map->name, i, targ_map->name, fd, err); + map->name, i, targ_map->name, + fd, err); return err; } pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", @@ -5005,59 +4692,6 @@ static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) return 0; } -static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) -{ - const struct bpf_program *targ_prog; - unsigned int i; - int fd, err; - - if (obj->gen_loader) - return -ENOTSUP; - - for (i = 0; i < map->init_slots_sz; i++) { - if (!map->init_slots[i]) - continue; - - targ_prog = map->init_slots[i]; - fd = bpf_program__fd(targ_prog); - - err = bpf_map_update_elem(map->fd, &i, &fd, 0); - if (err) { - err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", - map->name, i, targ_prog->name, fd, err); - return err; - } - pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", - map->name, i, targ_prog->name, fd); - } - - zfree(&map->init_slots); - map->init_slots_sz = 0; - - return 0; -} - -static int bpf_object_init_prog_arrays(struct bpf_object *obj) -{ - struct bpf_map *map; - int i, err; - - for (i = 0; i < obj->nr_maps; i++) { - map = &obj->maps[i]; - - if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) - continue; - - err = init_prog_array_slots(obj, map); - if (err < 0) { - zclose(map->fd); - return err; - } - } - return 0; -} - static int bpf_object__create_maps(struct bpf_object *obj) { @@ -5070,26 +4704,6 @@ bpf_object__create_maps(struct bpf_object *obj) for (i = 0; i < obj->nr_maps; i++) { map = &obj->maps[i]; - /* To support old kernels, we skip creating global data maps - * (.rodata, .data, .kconfig, etc); later on, during program - * loading, if we detect that at least one of the to-be-loaded - * programs is referencing any global data map, we'll error - * out with program name and relocation index logged. - * This approach allows to accommodate Clang emitting - * unnecessary .rodata.str1.1 sections for string literals, - * but also it allows to have CO-RE applications that use - * global variables in some of BPF programs, but not others. - * If those global variable-using programs are not loaded at - * runtime due to bpf_program__set_autoload(prog, false), - * bpf_object loading will succeed just fine even on old - * kernels. - */ - if (bpf_map__is_internal(map) && - !kernel_supports(obj, FEAT_GLOBAL_DATA)) { - map->skipped = true; - continue; - } - retried = false; retry: if (map->pin_path) { @@ -5126,8 +4740,8 @@ bpf_object__create_maps(struct bpf_object *obj) } } - if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { - err = init_map_in_map_slots(obj, map); + if (map->init_slots_sz) { + err = init_map_slots(obj, map); if (err < 0) { zclose(map->fd); goto err_out; @@ -5199,18 +4813,15 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, struct bpf_core_cand_list *cands) { struct bpf_core_cand *new_cands, *cand; - const struct btf_type *t, *local_t; - const char *targ_name, *local_name; + const struct btf_type *t; + const char *targ_name; size_t targ_essent_len; int n, i; - local_t = btf__type_by_id(local_cand->btf, local_cand->id); - local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); - - n = btf__type_cnt(targ_btf); - for (i = targ_start_id; i < n; i++) { + n = btf__get_nr_types(targ_btf); + for (i = targ_start_id; i <= n; i++) { t = btf__type_by_id(targ_btf, i); - if (btf_kind(t) != btf_kind(local_t)) + if (btf_kind(t) != btf_kind(local_cand->t)) continue; targ_name = btf__name_by_offset(targ_btf, t->name_off); @@ -5221,12 +4832,12 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, if (targ_essent_len != local_essent_len) continue; - if (strncmp(local_name, targ_name, local_essent_len) != 0) + if (strncmp(local_cand->name, targ_name, local_essent_len) != 0) continue; pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", - local_cand->id, btf_kind_str(local_t), - local_name, i, btf_kind_str(t), targ_name, + local_cand->id, btf_kind_str(local_cand->t), + local_cand->name, i, btf_kind_str(t), targ_name, targ_btf_name); new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, sizeof(*cands->cands)); @@ -5235,6 +4846,8 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, cand = &new_cands[cands->len]; cand->btf = targ_btf; + cand->t = t; + cand->name = targ_name; cand->id = i; cands->cands = new_cands; @@ -5341,21 +4954,18 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l struct bpf_core_cand local_cand = {}; struct bpf_core_cand_list *cands; const struct btf *main_btf; - const struct btf_type *local_t; - const char *local_name; size_t local_essent_len; int err, i; local_cand.btf = local_btf; - local_cand.id = local_type_id; - local_t = btf__type_by_id(local_btf, local_type_id); - if (!local_t) + local_cand.t = btf__type_by_id(local_btf, local_type_id); + if (!local_cand.t) return ERR_PTR(-EINVAL); - local_name = btf__name_by_offset(local_btf, local_t->name_off); - if (str_is_empty(local_name)) + local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off); + if (str_is_empty(local_cand.name)) return ERR_PTR(-EINVAL); - local_essent_len = bpf_core_essential_name_len(local_name); + local_essent_len = bpf_core_essential_name_len(local_cand.name); cands = calloc(1, sizeof(*cands)); if (!cands) @@ -5384,7 +4994,7 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l err = bpf_core_add_cands(&local_cand, local_essent_len, obj->btf_modules[i].btf, obj->btf_modules[i].name, - btf__type_cnt(obj->btf_vmlinux), + btf__get_nr_types(obj->btf_vmlinux) + 1, cands); if (err) goto err_out; @@ -5505,31 +5115,12 @@ static void *u32_as_hash_key(__u32 x) return (void *)(uintptr_t)x; } -static int record_relo_core(struct bpf_program *prog, - const struct bpf_core_relo *core_relo, int insn_idx) -{ - struct reloc_desc *relos, *relo; - - relos = libbpf_reallocarray(prog->reloc_desc, - prog->nr_reloc + 1, sizeof(*relos)); - if (!relos) - return -ENOMEM; - relo = &relos[prog->nr_reloc]; - relo->type = RELO_CORE; - relo->insn_idx = insn_idx; - relo->core_relo = core_relo; - prog->reloc_desc = relos; - prog->nr_reloc++; - return 0; -} - static int bpf_core_apply_relo(struct bpf_program *prog, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, struct hashmap *cand_cache) { - struct bpf_core_spec specs_scratch[3] = {}; const void *type_key = u32_as_hash_key(relo->type_id); struct bpf_core_cand_list *cands = NULL; const char *prog_name = prog->name; @@ -5560,15 +5151,13 @@ static int bpf_core_apply_relo(struct bpf_program *prog, return -EINVAL; if (prog->obj->gen_loader) { - const char *spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - - pr_debug("record_relo_core: prog %td insn[%d] %s %s %s final insn_idx %d\n", + pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n", prog - prog->obj->programs, relo->insn_off / 8, - btf_kind_str(local_type), local_name, spec_str, insn_idx); - return record_relo_core(prog, relo, insn_idx); + local_name, relo->kind); + return -ENOTSUP; } - if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && + if (relo->kind != BPF_TYPE_ID_LOCAL && !hashmap__find(cand_cache, type_key, (void **)&cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); if (IS_ERR(cands)) { @@ -5584,8 +5173,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, } } - return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, - relo_idx, local_btf, cands, specs_scratch); + return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands); } static int @@ -5715,13 +5303,6 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; } else { - const struct bpf_map *map = &obj->maps[relo->map_idx]; - - if (map->skipped) { - pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", - prog->name, i); - return -ENOTSUP; - } insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; insn[0].imm = obj->maps[relo->map_idx].fd; } @@ -5751,13 +5332,7 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_EXTERN_FUNC: ext = &obj->externs[relo->sym_off]; insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; - if (ext->is_set) { - insn[0].imm = ext->ksym.kernel_btf_id; - insn[0].off = ext->ksym.btf_fd_idx; - } else { /* unresolved weak kfunc */ - insn[0].imm = 0; - insn[0].off = 0; - } + insn[0].imm = ext->ksym.kernel_btf_id; break; case RELO_SUBPROG_ADDR: if (insn[0].src_reg != BPF_PSEUDO_FUNC) { @@ -5770,9 +5345,6 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_CALL: /* handled already */ break; - case RELO_CORE: - /* will be handled by bpf_program_record_relos() */ - break; default: pr_warn("prog '%s': relo #%d: bad relo type %d\n", prog->name, i, relo->type); @@ -5936,8 +5508,6 @@ static int cmp_relo_by_insn_idx(const void *key, const void *elem) static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) { - if (!prog->nr_reloc) - return NULL; return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); } @@ -5953,9 +5523,8 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); if (!relos) return -ENOMEM; - if (subprog->nr_reloc) - memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, - sizeof(*relos) * subprog->nr_reloc); + memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, + sizeof(*relos) * subprog->nr_reloc); for (i = main_prog->nr_reloc; i < new_cnt; i++) relos[i].insn_idx += subprog->sub_insn_off; @@ -6213,35 +5782,6 @@ bpf_object__free_relocs(struct bpf_object *obj) } } -static int cmp_relocs(const void *_a, const void *_b) -{ - const struct reloc_desc *a = _a; - const struct reloc_desc *b = _b; - - if (a->insn_idx != b->insn_idx) - return a->insn_idx < b->insn_idx ? -1 : 1; - - /* no two relocations should have the same insn_idx, but ... */ - if (a->type != b->type) - return a->type < b->type ? -1 : 1; - - return 0; -} - -static void bpf_object__sort_relos(struct bpf_object *obj) -{ - int i; - - for (i = 0; i < obj->nr_programs; i++) { - struct bpf_program *p = &obj->programs[i]; - - if (!p->nr_reloc) - continue; - - qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); - } -} - static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) { @@ -6256,8 +5796,6 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err); return err; } - if (obj->gen_loader) - bpf_object__sort_relos(obj); } /* Before relocating calls pre-process relocations and mark @@ -6293,8 +5831,6 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) - continue; err = bpf_object__relocate_calls(obj, prog); if (err) { @@ -6308,8 +5844,6 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) - continue; err = bpf_object__relocate_data(obj, prog); if (err) { pr_warn("prog '%s': failed to relocate data references: %d\n", @@ -6323,23 +5857,22 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) } static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, - Elf64_Shdr *shdr, Elf_Data *data); + GElf_Shdr *shdr, Elf_Data *data); static int bpf_object__collect_map_relos(struct bpf_object *obj, - Elf64_Shdr *shdr, Elf_Data *data) + GElf_Shdr *shdr, Elf_Data *data) { const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); int i, j, nrels, new_sz; const struct btf_var_secinfo *vi = NULL; const struct btf_type *sec, *var, *def; - struct bpf_map *map = NULL, *targ_map = NULL; - struct bpf_program *targ_prog = NULL; - bool is_prog_array, is_map_in_map; + struct bpf_map *map = NULL, *targ_map; const struct btf_member *member; - const char *name, *mname, *type; + const char *name, *mname; + Elf_Data *symbols; unsigned int moff; - Elf64_Sym *sym; - Elf64_Rel *rel; + GElf_Sym sym; + GElf_Rel rel; void *tmp; if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) @@ -6348,25 +5881,28 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, if (!sec) return -EINVAL; + symbols = obj->efile.symbols; nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - rel = elf_rel_by_idx(data, i); - if (!rel) { + if (!gelf_getrel(data, i, &rel)) { pr_warn(".maps relo #%d: failed to get ELF relo\n", i); return -LIBBPF_ERRNO__FORMAT; } - - sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); - if (!sym) { + if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { pr_warn(".maps relo #%d: symbol %zx not found\n", - i, (size_t)ELF64_R_SYM(rel->r_info)); + i, (size_t)GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } - name = elf_sym_str(obj, sym->st_name) ?: ""; + name = elf_sym_str(obj, sym.st_name) ?: ""; + if (sym.st_shndx != obj->efile.btf_maps_shndx) { + pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", + i, name); + return -LIBBPF_ERRNO__RELOC; + } - pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", - i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, - (size_t)rel->r_offset, sym->st_name, name); + pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n", + i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value, + (size_t)rel.r_offset, sym.st_name, name); for (j = 0; j < obj->nr_maps; j++) { map = &obj->maps[j]; @@ -6374,55 +5910,29 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, continue; vi = btf_var_secinfos(sec) + map->btf_var_idx; - if (vi->offset <= rel->r_offset && - rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size) + if (vi->offset <= rel.r_offset && + rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size) break; } if (j == obj->nr_maps) { - pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n", - i, name, (size_t)rel->r_offset); + pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n", + i, name, (size_t)rel.r_offset); return -EINVAL; } - is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); - is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; - type = is_map_in_map ? "map" : "prog"; - if (is_map_in_map) { - if (sym->st_shndx != obj->efile.btf_maps_shndx) { - pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", - i, name); - return -LIBBPF_ERRNO__RELOC; - } - if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && - map->def.key_size != sizeof(int)) { - pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", - i, map->name, sizeof(int)); - return -EINVAL; - } - targ_map = bpf_object__find_map_by_name(obj, name); - if (!targ_map) { - pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", - i, name); - return -ESRCH; - } - } else if (is_prog_array) { - targ_prog = bpf_object__find_program_by_name(obj, name); - if (!targ_prog) { - pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", - i, name); - return -ESRCH; - } - if (targ_prog->sec_idx != sym->st_shndx || - targ_prog->sec_insn_off * 8 != sym->st_value || - prog_is_subprog(obj, targ_prog)) { - pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", - i, name); - return -LIBBPF_ERRNO__RELOC; - } - } else { + if (!bpf_map_type__is_map_in_map(map->def.type)) + return -EINVAL; + if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && + map->def.key_size != sizeof(int)) { + pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", + i, map->name, sizeof(int)); return -EINVAL; } + targ_map = bpf_object__find_map_by_name(obj, name); + if (!targ_map) + return -ESRCH; + var = btf__type_by_id(obj->btf, vi->type); def = skip_mods_and_typedefs(obj->btf, var->type, NULL); if (btf_vlen(def) == 0) @@ -6433,10 +5943,10 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, return -EINVAL; moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; - if (rel->r_offset - vi->offset < moff) + if (rel.r_offset - vi->offset < moff) return -EINVAL; - moff = rel->r_offset - vi->offset - moff; + moff = rel.r_offset - vi->offset - moff; /* here we use BPF pointer size, which is always 64 bit, as we * are parsing ELF that was built for BPF target */ @@ -6453,31 +5963,38 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, (new_sz - map->init_slots_sz) * host_ptr_sz); map->init_slots_sz = new_sz; } - map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; + map->init_slots[moff] = targ_map; - pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", - i, map->name, moff, type, name); + pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n", + i, map->name, moff, name); } return 0; } +static int cmp_relocs(const void *_a, const void *_b) +{ + const struct reloc_desc *a = _a; + const struct reloc_desc *b = _b; + + if (a->insn_idx != b->insn_idx) + return a->insn_idx < b->insn_idx ? -1 : 1; + + /* no two relocations should have the same insn_idx, but ... */ + if (a->type != b->type) + return a->type < b->type ? -1 : 1; + + return 0; +} + static int bpf_object__collect_relos(struct bpf_object *obj) { int i, err; - for (i = 0; i < obj->efile.sec_cnt; i++) { - struct elf_sec_desc *sec_desc = &obj->efile.secs[i]; - Elf64_Shdr *shdr; - Elf_Data *data; - int idx; - - if (sec_desc->sec_type != SEC_RELO) - continue; - - shdr = sec_desc->shdr; - data = sec_desc->data; - idx = shdr->sh_info; + for (i = 0; i < obj->efile.nr_reloc_sects; i++) { + GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr; + Elf_Data *data = obj->efile.reloc_sects[i].data; + int idx = shdr->sh_info; if (shdr->sh_type != SHT_REL) { pr_warn("internal error at %d\n", __LINE__); @@ -6494,7 +6011,14 @@ static int bpf_object__collect_relos(struct bpf_object *obj) return err; } - bpf_object__sort_relos(obj); + for (i = 0; i < obj->nr_programs; i++) { + struct bpf_program *p = &obj->programs[i]; + + if (!p->nr_reloc) + continue; + + qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); + } return 0; } @@ -6546,61 +6070,15 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program return 0; } -static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, - int *btf_obj_fd, int *btf_type_id); - -/* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */ -static int libbpf_preload_prog(struct bpf_program *prog, - struct bpf_prog_load_opts *opts, long cookie) +static int +load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, + char *license, __u32 kern_version, int *pfd) { - enum sec_def_flags def = cookie; - - /* old kernels might not support specifying expected_attach_type */ - if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) - opts->expected_attach_type = 0; - - if (def & SEC_SLEEPABLE) - opts->prog_flags |= BPF_F_SLEEPABLE; - - if ((prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_LSM || - prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { - int btf_obj_fd = 0, btf_type_id = 0, err; - const char *attach_name; - - attach_name = strchr(prog->sec_name, '/') + 1; - err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); - if (err) - return err; - - /* cache resolved BTF FD and BTF type ID in the prog */ - prog->attach_btf_obj_fd = btf_obj_fd; - prog->attach_btf_id = btf_type_id; - - /* but by now libbpf common logic is not utilizing - * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because - * this callback is called after opts were populated by - * libbpf, so this callback has to update opts explicitly here - */ - opts->attach_btf_obj_fd = btf_obj_fd; - opts->attach_btf_id = btf_type_id; - } - return 0; -} - -static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, - struct bpf_insn *insns, int insns_cnt, - const char *license, __u32 kern_version, - int *prog_fd) -{ - LIBBPF_OPTS(bpf_prog_load_opts, load_attr); - const char *prog_name = NULL; + struct bpf_prog_load_params load_attr = {}; char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; - char *log_buf = NULL, *tmp; - int btf_fd, ret, err; - bool own_log_buf = true; - __u32 log_level = prog->log_level; + char *log_buf = NULL; + int btf_fd, ret; if (prog->type == BPF_PROG_TYPE_UNSPEC) { /* @@ -6615,18 +6093,30 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog if (!insns || !insns_cnt) return -EINVAL; - load_attr.expected_attach_type = prog->expected_attach_type; - if (kernel_supports(obj, FEAT_PROG_NAME)) - prog_name = prog->name; - load_attr.attach_prog_fd = prog->attach_prog_fd; - load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; + load_attr.prog_type = prog->type; + /* old kernels might not support specifying expected_attach_type */ + if (!kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE) && prog->sec_def && + prog->sec_def->is_exp_attach_type_optional) + load_attr.expected_attach_type = 0; + else + load_attr.expected_attach_type = prog->expected_attach_type; + if (kernel_supports(prog->obj, FEAT_PROG_NAME)) + load_attr.name = prog->name; + load_attr.insns = insns; + load_attr.insn_cnt = insns_cnt; + load_attr.license = license; + load_attr.attach_btf_id = prog->attach_btf_id; + if (prog->attach_prog_fd) + load_attr.attach_prog_fd = prog->attach_prog_fd; + else + load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; /* specify func_info/line_info only if kernel supports them */ - btf_fd = bpf_object__btf_fd(obj); - if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { + btf_fd = bpf_object__btf_fd(prog->obj); + if (btf_fd >= 0 && kernel_supports(prog->obj, FEAT_BTF_FUNC)) { load_attr.prog_btf_fd = btf_fd; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; @@ -6635,125 +6125,93 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog load_attr.line_info_rec_size = prog->line_info_rec_size; load_attr.line_info_cnt = prog->line_info_cnt; } - load_attr.log_level = log_level; + load_attr.log_level = prog->log_level; load_attr.prog_flags = prog->prog_flags; - load_attr.fd_array = obj->fd_array; - /* adjust load_attr if sec_def provides custom preload callback */ - if (prog->sec_def && prog->sec_def->preload_fn) { - err = prog->sec_def->preload_fn(prog, &load_attr, prog->sec_def->cookie); - if (err < 0) { - pr_warn("prog '%s': failed to prepare load attributes: %d\n", - prog->name, err); - return err; - } - } - - if (obj->gen_loader) { - bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, - license, insns, insns_cnt, &load_attr, - prog - obj->programs); - *prog_fd = -1; + if (prog->obj->gen_loader) { + bpf_gen__prog_load(prog->obj->gen_loader, &load_attr, + prog - prog->obj->programs); + *pfd = -1; return 0; } - retry_load: - /* if log_level is zero, we don't request logs initiallly even if - * custom log_buf is specified; if the program load fails, then we'll - * bump log_level to 1 and use either custom log_buf or we'll allocate - * our own and retry the load to get details on what failed - */ - if (log_level) { - if (prog->log_buf) { - log_buf = prog->log_buf; - log_buf_size = prog->log_size; - own_log_buf = false; - } else if (obj->log_buf) { - log_buf = obj->log_buf; - log_buf_size = obj->log_size; - own_log_buf = false; - } else { - log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); - tmp = realloc(log_buf, log_buf_size); - if (!tmp) { - ret = -ENOMEM; - goto out; - } - log_buf = tmp; - log_buf[0] = '\0'; - own_log_buf = true; - } + if (log_buf_size) { + log_buf = malloc(log_buf_size); + if (!log_buf) + return -ENOMEM; + + *log_buf = 0; } load_attr.log_buf = log_buf; - load_attr.log_size = log_buf_size; - load_attr.log_level = log_level; + load_attr.log_buf_sz = log_buf_size; + ret = libbpf__bpf_prog_load(&load_attr); - ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); if (ret >= 0) { - if (log_level && own_log_buf) { - pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", - prog->name, log_buf); - } + if (log_buf && load_attr.log_level) + pr_debug("verifier log:\n%s", log_buf); - if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { - struct bpf_map *map; - int i; + if (prog->obj->rodata_map_idx >= 0 && + kernel_supports(prog->obj, FEAT_PROG_BIND_MAP)) { + struct bpf_map *rodata_map = + &prog->obj->maps[prog->obj->rodata_map_idx]; - for (i = 0; i < obj->nr_maps; i++) { - map = &prog->obj->maps[i]; - if (map->libbpf_type != LIBBPF_MAP_RODATA) - continue; - - if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': failed to bind map '%s': %s\n", - prog->name, map->real_name, cp); - /* Don't fail hard if can't bind rodata. */ - } + if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("prog '%s': failed to bind .rodata map: %s\n", + prog->name, cp); + /* Don't fail hard if can't bind rodata. */ } } - *prog_fd = ret; + *pfd = ret; ret = 0; goto out; } - if (log_level == 0) { - log_level = 1; + if (!log_buf || errno == ENOSPC) { + log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, + log_buf_size << 1); + + free(log_buf); goto retry_load; } - /* On ENOSPC, increase log buffer size and retry, unless custom - * log_buf is specified. - * Be careful to not overflow u32, though. Kernel's log buf size limit - * isn't part of UAPI so it can always be bumped to full 4GB. So don't - * multiply by 2 unless we are sure we'll fit within 32 bits. - * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). - */ - if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) - goto retry_load; - - ret = -errno; + ret = errno ? -errno : -LIBBPF_ERRNO__LOAD; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); + pr_warn("load bpf program failed: %s\n", cp); pr_perm_msg(ret); - if (own_log_buf && log_buf && log_buf[0] != '\0') { - pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", - prog->name, log_buf); - } - if (insns_cnt >= BPF_MAXINSNS) { - pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", - prog->name, insns_cnt, BPF_MAXINSNS); + if (log_buf && log_buf[0] != '\0') { + ret = -LIBBPF_ERRNO__VERIFY; + pr_warn("-- BEGIN DUMP LOG ---\n"); + pr_warn("\n%s\n", log_buf); + pr_warn("-- END LOG --\n"); + } else if (load_attr.insn_cnt >= BPF_MAXINSNS) { + pr_warn("Program too large (%zu insns), at most %d insns\n", + load_attr.insn_cnt, BPF_MAXINSNS); + ret = -LIBBPF_ERRNO__PROG2BIG; + } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { + /* Wrong program type? */ + int fd; + + load_attr.prog_type = BPF_PROG_TYPE_KPROBE; + load_attr.expected_attach_type = 0; + load_attr.log_buf = NULL; + load_attr.log_buf_sz = 0; + fd = libbpf__bpf_prog_load(&load_attr); + if (fd >= 0) { + close(fd); + ret = -LIBBPF_ERRNO__PROGTYPE; + goto out; + } } out: - if (own_log_buf) - free(log_buf); + free(log_buf); return ret; } -static int bpf_program_record_relos(struct bpf_program *prog) +static int bpf_program__record_externs(struct bpf_program *prog) { struct bpf_object *obj = prog->obj; int i; @@ -6766,26 +6224,18 @@ static int bpf_program_record_relos(struct bpf_program *prog) case RELO_EXTERN_VAR: if (ext->type != EXT_KSYM) continue; - bpf_gen__record_extern(obj->gen_loader, ext->name, - ext->is_weak, !ext->ksym.type_id, - BTF_KIND_VAR, relo->insn_idx); - break; - case RELO_EXTERN_FUNC: - bpf_gen__record_extern(obj->gen_loader, ext->name, - ext->is_weak, false, BTF_KIND_FUNC, + if (!ext->ksym.type_id) { + pr_warn("typeless ksym %s is not supported yet\n", + ext->name); + return -ENOTSUP; + } + bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR, relo->insn_idx); break; - case RELO_CORE: { - struct bpf_core_relo cr = { - .insn_off = relo->insn_idx * 8, - .type_id = relo->core_relo->type_id, - .access_str_off = relo->core_relo->access_str_off, - .kind = relo->core_relo->kind, - }; - - bpf_gen__record_relo_core(obj->gen_loader, &cr); + case RELO_EXTERN_FUNC: + bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC, + relo->insn_idx); break; - } default: continue; } @@ -6793,16 +6243,30 @@ static int bpf_program_record_relos(struct bpf_program *prog) return 0; } -static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, - const char *license, __u32 kern_ver) +static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id); + +int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { int err = 0, fd, i; - if (obj->loaded) { + if (prog->obj->loaded) { pr_warn("prog '%s': can't load after object was loaded\n", prog->name); return libbpf_err(-EINVAL); } + if ((prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_LSM || + prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { + int btf_obj_fd = 0, btf_type_id = 0; + + err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id); + if (err) + return libbpf_err(err); + + prog->attach_btf_obj_fd = btf_obj_fd; + prog->attach_btf_id = btf_type_id; + } + if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { pr_warn("Internal error: can't load program '%s'\n", @@ -6824,11 +6288,10 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog pr_warn("prog '%s': inconsistent nr(%d) != 1\n", prog->name, prog->instances.nr); } - if (obj->gen_loader) - bpf_program_record_relos(prog); - err = bpf_object_load_prog_instance(obj, prog, - prog->insns, prog->insns_cnt, - license, kern_ver, &fd); + if (prog->obj->gen_loader) + bpf_program__record_externs(prog); + err = load_program(prog, prog->insns, prog->insns_cnt, + license, kern_ver, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -6856,9 +6319,8 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog continue; } - err = bpf_object_load_prog_instance(obj, prog, - result.new_insn_ptr, result.new_insn_cnt, - license, kern_ver, &fd); + err = load_program(prog, result.new_insn_ptr, + result.new_insn_cnt, license, kern_ver, &fd); if (err) { pr_warn("Loading the %dth instance of program '%s' failed\n", i, prog->name); @@ -6872,14 +6334,11 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog out: if (err) pr_warn("failed to load program '%s'\n", prog->name); + zfree(&prog->insns); + prog->insns_cnt = 0; return libbpf_err(err); } -int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver) -{ - return bpf_object_load_prog(prog->obj, prog, license, kern_ver); -} - static int bpf_object__load_progs(struct bpf_object *obj, int log_level) { @@ -6903,7 +6362,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) continue; } prog->log_level |= log_level; - err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version); + err = bpf_program__load(prog, obj->license, obj->kern_version); if (err) return err; } @@ -6914,56 +6373,15 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) static const struct bpf_sec_def *find_sec_def(const char *sec_name); -static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) -{ - struct bpf_program *prog; - int err; - - bpf_object__for_each_program(prog, obj) { - prog->sec_def = find_sec_def(prog->sec_name); - if (!prog->sec_def) { - /* couldn't guess, but user might manually specify */ - pr_debug("prog '%s': unrecognized ELF section name '%s'\n", - prog->name, prog->sec_name); - continue; - } - - bpf_program__set_type(prog, prog->sec_def->prog_type); - bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING || - prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) - prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); -#pragma GCC diagnostic pop - - /* sec_def can have custom callback which should be called - * after bpf_program is initialized to adjust its properties - */ - if (prog->sec_def->init_fn) { - err = prog->sec_def->init_fn(prog, prog->sec_def->cookie); - if (err < 0) { - pr_warn("prog '%s': failed to initialize: %d\n", - prog->name, err); - return err; - } - } - } - - return 0; -} - -static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, - const struct bpf_object_open_opts *opts) +static struct bpf_object * +__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, + const struct bpf_object_open_opts *opts) { const char *obj_name, *kconfig, *btf_tmp_path; + struct bpf_program *prog; struct bpf_object *obj; char tmp_name[64]; int err; - char *log_buf; - size_t log_size; - __u32 log_level; if (elf_version(EV_CURRENT) == EV_NONE) { pr_warn("failed to init libelf for %s\n", @@ -6986,22 +6404,10 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, pr_debug("loading object '%s' from buffer\n", obj_name); } - log_buf = OPTS_GET(opts, kernel_log_buf, NULL); - log_size = OPTS_GET(opts, kernel_log_size, 0); - log_level = OPTS_GET(opts, kernel_log_level, 0); - if (log_size > UINT_MAX) - return ERR_PTR(-EINVAL); - if (log_size && !log_buf) - return ERR_PTR(-EINVAL); - obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; - obj->log_buf = log_buf; - obj->log_size = log_size; - obj->log_level = log_level; - btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -7030,13 +6436,31 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, err = err ? : bpf_object__collect_externs(obj); err = err ? : bpf_object__finalize_btf(obj); err = err ? : bpf_object__init_maps(obj, opts); - err = err ? : bpf_object_init_progs(obj, opts); err = err ? : bpf_object__collect_relos(obj); if (err) goto out; - bpf_object__elf_finish(obj); + bpf_object__for_each_program(prog, obj) { + prog->sec_def = find_sec_def(prog->sec_name); + if (!prog->sec_def) { + /* couldn't guess, but user might manually specify */ + pr_debug("prog '%s': unrecognized ELF section name '%s'\n", + prog->name, prog->sec_name); + continue; + } + + if (prog->sec_def->is_sleepable) + prog->prog_flags |= BPF_F_SLEEPABLE; + bpf_program__set_type(prog, prog->sec_def->prog_type); + bpf_program__set_expected_attach_type(prog, + prog->sec_def->expected_attach_type); + + if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING || + prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) + prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); + } + return obj; out: bpf_object__close(obj); @@ -7055,7 +6479,7 @@ __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) return NULL; pr_debug("loading %s\n", attr->file); - return bpf_object_open(attr->file, NULL, 0, &opts); + return __bpf_object__open(attr->file, NULL, 0, &opts); } struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) @@ -7081,7 +6505,7 @@ bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) pr_debug("loading %s\n", path); - return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); + return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts)); } struct bpf_object * @@ -7091,7 +6515,7 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, if (!obj_buf || obj_buf_sz == 0) return libbpf_err_ptr(-EINVAL); - return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); + return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts)); } struct bpf_object * @@ -7108,10 +6532,10 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, if (!obj_buf || obj_buf_sz == 0) return errno = EINVAL, NULL; - return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts)); + return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts)); } -static int bpf_object_unload(struct bpf_object *obj) +int bpf_object__unload(struct bpf_object *obj) { size_t i; @@ -7130,8 +6554,6 @@ static int bpf_object_unload(struct bpf_object *obj) return 0; } -int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload"))); - static int bpf_object__sanitize_maps(struct bpf_object *obj) { struct bpf_map *m; @@ -7139,6 +6561,10 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) bpf_object__for_each_map(m, obj) { if (!bpf_map__is_internal(m)) continue; + if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) { + pr_warn("kernel doesn't support global data\n"); + return -ENOTSUP; + } if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) m->def.map_flags ^= BPF_F_MMAPABLE; } @@ -7201,14 +6627,13 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj) static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, __u16 kind, struct btf **res_btf, - struct module_btf **res_mod_btf) + int *res_btf_fd) { - struct module_btf *mod_btf; + int i, id, btf_fd, err; struct btf *btf; - int i, id, err; btf = obj->btf_vmlinux; - mod_btf = NULL; + btf_fd = 0; id = btf__find_by_name_kind(btf, ksym_name, kind); if (id == -ENOENT) { @@ -7217,10 +6642,10 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, return err; for (i = 0; i < obj->btf_module_cnt; i++) { - /* we assume module_btf's BTF FD is always >0 */ - mod_btf = &obj->btf_modules[i]; - btf = mod_btf->btf; - id = btf__find_by_name_kind_own(btf, ksym_name, kind); + btf = obj->btf_modules[i].btf; + /* we assume module BTF FD is always >0 */ + btf_fd = obj->btf_modules[i].fd; + id = btf__find_by_name_kind(btf, ksym_name, kind); if (id != -ENOENT) break; } @@ -7229,7 +6654,7 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, return -ESRCH; *res_btf = btf; - *res_mod_btf = mod_btf; + *res_btf_fd = btf_fd; return id; } @@ -7238,15 +6663,14 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, { const struct btf_type *targ_var, *targ_type; __u32 targ_type_id, local_type_id; - struct module_btf *mod_btf = NULL; const char *targ_var_name; + int id, btf_fd = 0, err; struct btf *btf = NULL; - int id, err; - id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); - if (id < 0) { - if (id == -ESRCH && ext->is_weak) - return 0; + id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd); + if (id == -ESRCH && ext->is_weak) { + return 0; + } else if (id < 0) { pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", ext->name); return id; @@ -7278,7 +6702,7 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, } ext->is_set = true; - ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; + ext->ksym.kernel_btf_obj_fd = btf_fd; ext->ksym.kernel_btf_id = id; pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", ext->name, id, btf_kind_str(targ_var), targ_var_name); @@ -7290,22 +6714,26 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, struct extern_desc *ext) { int local_func_proto_id, kfunc_proto_id, kfunc_id; - struct module_btf *mod_btf = NULL; const struct btf_type *kern_func; struct btf *kern_btf = NULL; - int ret; + int ret, kern_btf_fd = 0; local_func_proto_id = ext->ksym.type_id; - kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); + kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, + &kern_btf, &kern_btf_fd); if (kfunc_id < 0) { - if (kfunc_id == -ESRCH && ext->is_weak) - return 0; - pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", + pr_warn("extern (func ksym) '%s': not found in kernel BTF\n", ext->name); return kfunc_id; } + if (kern_btf != obj->btf_vmlinux) { + pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n", + ext->name); + return -ENOTSUP; + } + kern_func = btf__type_by_id(kern_btf, kfunc_id); kfunc_proto_id = kern_func->type; @@ -7317,30 +6745,9 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, return -EINVAL; } - /* set index for module BTF fd in fd_array, if unset */ - if (mod_btf && !mod_btf->fd_array_idx) { - /* insn->off is s16 */ - if (obj->fd_array_cnt == INT16_MAX) { - pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", - ext->name, mod_btf->fd_array_idx); - return -E2BIG; - } - /* Cannot use index 0 for module BTF fd */ - if (!obj->fd_array_cnt) - obj->fd_array_cnt = 1; - - ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), - obj->fd_array_cnt + 1); - if (ret) - return ret; - mod_btf->fd_array_idx = obj->fd_array_cnt; - /* we assume module BTF FD is always >0 */ - obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; - } - ext->is_set = true; + ext->ksym.kernel_btf_obj_fd = kern_btf_fd; ext->ksym.kernel_btf_id = kfunc_id; - ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n", ext->name, kfunc_id); @@ -7406,7 +6813,8 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, if (err) return err; pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver); - } else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) { + } else if (ext->type == EXT_KCFG && + strncmp(ext->name, "CONFIG_", 7) == 0) { need_config = true; } else if (ext->type == EXT_KSYM) { if (ext->ksym.type_id) @@ -7461,10 +6869,14 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, return 0; } -static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) +int bpf_object__load_xattr(struct bpf_object_load_attr *attr) { + struct bpf_object *obj; int err, i; + if (!attr) + return libbpf_err(-EINVAL); + obj = attr->obj; if (!obj) return libbpf_err(-EINVAL); @@ -7474,7 +6886,7 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch } if (obj->gen_loader) - bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); + bpf_gen__init(obj->gen_loader, attr->log_level); err = bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); @@ -7483,9 +6895,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch err = err ? : bpf_object__sanitize_maps(obj); err = err ? : bpf_object__init_kern_struct_ops_maps(obj); err = err ? : bpf_object__create_maps(obj); - err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); - err = err ? : bpf_object__load_progs(obj, extra_log_level); - err = err ? : bpf_object_init_prog_arrays(obj); + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path); + err = err ? : bpf_object__load_progs(obj, attr->log_level); if (obj->gen_loader) { /* reset FDs */ @@ -7494,12 +6905,9 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch for (i = 0; i < obj->nr_maps; i++) obj->maps[i].fd = -1; if (!err) - err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); + err = bpf_gen__finish(obj->gen_loader); } - /* clean up fd_array */ - zfree(&obj->fd_array); - /* clean up module BTFs */ for (i = 0; i < obj->btf_module_cnt; i++) { close(obj->btf_modules[i].fd); @@ -7524,19 +6932,18 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch if (obj->maps[i].pinned && !obj->maps[i].reused) bpf_map__unpin(&obj->maps[i], NULL); - bpf_object_unload(obj); + bpf_object__unload(obj); pr_warn("failed to load object '%s'\n", obj->path); return libbpf_err(err); } -int bpf_object__load_xattr(struct bpf_object_load_attr *attr) -{ - return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path); -} - int bpf_object__load(struct bpf_object *obj) { - return bpf_object_load(obj, 0, NULL); + struct bpf_object_load_attr attr = { + .obj = obj, + }; + + return bpf_object__load_xattr(&attr); } static int make_parent_dir(const char *path) @@ -7591,7 +6998,8 @@ static int check_path(const char *path) return err; } -static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, int instance) +int bpf_program__pin_instance(struct bpf_program *prog, const char *path, + int instance) { char *cp, errmsg[STRERR_BUFSIZE]; int err; @@ -7626,7 +7034,8 @@ static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, return 0; } -static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path, int instance) +int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, + int instance) { int err; @@ -7654,12 +7063,6 @@ static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path return 0; } -__attribute__((alias("bpf_program_pin_instance"))) -int bpf_object__pin_instance(struct bpf_program *prog, const char *path, int instance); - -__attribute__((alias("bpf_program_unpin_instance"))) -int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); - int bpf_program__pin(struct bpf_program *prog, const char *path) { int i, err; @@ -7684,7 +7087,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) if (prog->instances.nr == 1) { /* don't create subdirs when pinning single instance */ - return bpf_program_pin_instance(prog, path, 0); + return bpf_program__pin_instance(prog, path, 0); } for (i = 0; i < prog->instances.nr; i++) { @@ -7700,7 +7103,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) goto err_unpin; } - err = bpf_program_pin_instance(prog, buf, i); + err = bpf_program__pin_instance(prog, buf, i); if (err) goto err_unpin; } @@ -7718,7 +7121,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) else if (len >= PATH_MAX) continue; - bpf_program_unpin_instance(prog, buf, i); + bpf_program__unpin_instance(prog, buf, i); } rmdir(path); @@ -7746,7 +7149,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) if (prog->instances.nr == 1) { /* don't create subdirs when pinning single instance */ - return bpf_program_unpin_instance(prog, path, 0); + return bpf_program__unpin_instance(prog, path, 0); } for (i = 0; i < prog->instances.nr; i++) { @@ -7759,7 +7162,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) else if (len >= PATH_MAX) return libbpf_err(-ENAMETOOLONG); - err = bpf_program_unpin_instance(prog, buf, i); + err = bpf_program__unpin_instance(prog, buf, i); if (err) return err; } @@ -7925,9 +7328,6 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) char *pin_path = NULL; char buf[PATH_MAX]; - if (map->skipped) - continue; - if (path) { int len; @@ -7954,7 +7354,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) return 0; err_unpin_maps: - while ((map = bpf_object__prev_map(obj, map))) { + while ((map = bpf_map__prev(map, obj))) { if (!map->pin_path) continue; @@ -8034,7 +7434,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) return 0; err_unpin_programs: - while ((prog = bpf_object__prev_program(obj, prog))) { + while ((prog = bpf_program__prev(prog, obj))) { char buf[PATH_MAX]; int len; @@ -8123,7 +7523,6 @@ static void bpf_map__destroy(struct bpf_map *map) } zfree(&map->name); - zfree(&map->real_name); zfree(&map->pin_path); if (map->fd >= 0) @@ -8142,7 +7541,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); - bpf_object_unload(obj); + bpf_object__unload(obj); btf__free(obj->btf); btf_ext__free(obj->btf_ext); @@ -8171,10 +7570,6 @@ struct bpf_object * bpf_object__next(struct bpf_object *prev) { struct bpf_object *next; - bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); - - if (strict) - return NULL; if (!prev) next = list_first_entry(&bpf_objects_list, @@ -8280,12 +7675,6 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, struct bpf_program * bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) -{ - return bpf_object__next_program(obj, prev); -} - -struct bpf_program * -bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) { struct bpf_program *prog = prev; @@ -8298,12 +7687,6 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) struct bpf_program * bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj) -{ - return bpf_object__prev_program(obj, next); -} - -struct bpf_program * -bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) { struct bpf_program *prog = next; @@ -8375,11 +7758,9 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) return 0; } -static int bpf_program_nth_fd(const struct bpf_program *prog, int n); - int bpf_program__fd(const struct bpf_program *prog) { - return bpf_program_nth_fd(prog, 0); + return bpf_program__nth_fd(prog, 0); } size_t bpf_program__size(const struct bpf_program *prog) @@ -8387,16 +7768,6 @@ size_t bpf_program__size(const struct bpf_program *prog) return prog->insns_cnt * BPF_INSN_SZ; } -const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) -{ - return prog->insns; -} - -size_t bpf_program__insn_cnt(const struct bpf_program *prog) -{ - return prog->insns_cnt; -} - int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, bpf_program_prep_t prep) { @@ -8425,10 +7796,7 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, return 0; } -__attribute__((alias("bpf_program_nth_fd"))) -int bpf_program__nth_fd(const struct bpf_program *prog, int n); - -static int bpf_program_nth_fd(const struct bpf_program *prog, int n) +int bpf_program__nth_fd(const struct bpf_program *prog, int n) { int fd; @@ -8507,193 +7875,223 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, prog->expected_attach_type = type; } -__u32 bpf_program__flags(const struct bpf_program *prog) -{ - return prog->prog_flags; -} +#define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional, \ + attachable, attach_btf) \ + { \ + .sec = string, \ + .len = sizeof(string) - 1, \ + .prog_type = ptype, \ + .expected_attach_type = eatype, \ + .is_exp_attach_type_optional = eatype_optional, \ + .is_attachable = attachable, \ + .is_attach_btf = attach_btf, \ + } -int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) -{ - if (prog->obj->loaded) - return libbpf_err(-EBUSY); +/* Programs that can NOT be attached. */ +#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0) - prog->prog_flags = flags; - return 0; -} +/* Programs that can be attached. */ +#define BPF_APROG_SEC(string, ptype, atype) \ + BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0) -__u32 bpf_program__log_level(const struct bpf_program *prog) -{ - return prog->log_level; -} +/* Programs that must specify expected attach type at load time. */ +#define BPF_EAPROG_SEC(string, ptype, eatype) \ + BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0) -int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) -{ - if (prog->obj->loaded) - return libbpf_err(-EBUSY); +/* Programs that use BTF to identify attach point */ +#define BPF_PROG_BTF(string, ptype, eatype) \ + BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1) - prog->log_level = log_level; - return 0; -} +/* Programs that can be attached but attach type can't be identified by section + * name. Kept for backward compatibility. + */ +#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype) -const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) -{ - *log_size = prog->log_size; - return prog->log_buf; -} - -int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) -{ - if (log_size && !log_buf) - return -EINVAL; - if (prog->log_size > UINT_MAX) - return -EINVAL; - if (prog->obj->loaded) - return -EBUSY; - - prog->log_buf = log_buf; - prog->log_size = log_size; - return 0; -} - -#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ +#define SEC_DEF(sec_pfx, ptype, ...) { \ .sec = sec_pfx, \ + .len = sizeof(sec_pfx) - 1, \ .prog_type = BPF_PROG_TYPE_##ptype, \ - .expected_attach_type = atype, \ - .cookie = (long)(flags), \ - .preload_fn = libbpf_preload_prog, \ __VA_ARGS__ \ } -static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie); +static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, + struct bpf_program *prog); static const struct bpf_sec_def section_defs[] = { - SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), - SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), - SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), - SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fexit/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry.s/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fmod_ret.s/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fexit.s/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("freplace/", EXT, 0, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), - SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), - SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), - SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), - SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), - SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), - SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), - SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), + BPF_EAPROG_SEC("sk_reuseport/migrate", BPF_PROG_TYPE_SK_REUSEPORT, + BPF_SK_REUSEPORT_SELECT_OR_MIGRATE), + BPF_EAPROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT, + BPF_SK_REUSEPORT_SELECT), + SEC_DEF("kprobe/", KPROBE, + .attach_fn = attach_kprobe), + BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), + SEC_DEF("kretprobe/", KPROBE, + .attach_fn = attach_kprobe), + BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), + BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), + SEC_DEF("tracepoint/", TRACEPOINT, + .attach_fn = attach_tp), + SEC_DEF("tp/", TRACEPOINT, + .attach_fn = attach_tp), + SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, + .attach_fn = attach_raw_tp), + SEC_DEF("raw_tp/", RAW_TRACEPOINT, + .attach_fn = attach_raw_tp), + SEC_DEF("tp_btf/", TRACING, + .expected_attach_type = BPF_TRACE_RAW_TP, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fentry/", TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fmod_ret/", TRACING, + .expected_attach_type = BPF_MODIFY_RETURN, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fexit/", TRACING, + .expected_attach_type = BPF_TRACE_FEXIT, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fentry.s/", TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .is_attach_btf = true, + .is_sleepable = true, + .attach_fn = attach_trace), + SEC_DEF("fmod_ret.s/", TRACING, + .expected_attach_type = BPF_MODIFY_RETURN, + .is_attach_btf = true, + .is_sleepable = true, + .attach_fn = attach_trace), + SEC_DEF("fexit.s/", TRACING, + .expected_attach_type = BPF_TRACE_FEXIT, + .is_attach_btf = true, + .is_sleepable = true, + .attach_fn = attach_trace), + SEC_DEF("freplace/", EXT, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("lsm/", LSM, + .is_attach_btf = true, + .expected_attach_type = BPF_LSM_MAC, + .attach_fn = attach_lsm), + SEC_DEF("lsm.s/", LSM, + .is_attach_btf = true, + .is_sleepable = true, + .expected_attach_type = BPF_LSM_MAC, + .attach_fn = attach_lsm), + SEC_DEF("iter/", TRACING, + .expected_attach_type = BPF_TRACE_ITER, + .is_attach_btf = true, + .attach_fn = attach_iter), + SEC_DEF("syscall", SYSCALL, + .is_sleepable = true), + BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, + BPF_XDP_DEVMAP), + BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, + BPF_XDP_CPUMAP), + BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP, + BPF_XDP), + BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), + BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), + BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), + BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT), + BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL), + BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB, + BPF_CGROUP_INET_INGRESS), + BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB, + BPF_CGROUP_INET_EGRESS), + BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), + BPF_EAPROG_SEC("cgroup/sock_create", BPF_PROG_TYPE_CGROUP_SOCK, + BPF_CGROUP_INET_SOCK_CREATE), + BPF_EAPROG_SEC("cgroup/sock_release", BPF_PROG_TYPE_CGROUP_SOCK, + BPF_CGROUP_INET_SOCK_RELEASE), + BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK, + BPF_CGROUP_INET_SOCK_CREATE), + BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK, + BPF_CGROUP_INET4_POST_BIND), + BPF_EAPROG_SEC("cgroup/post_bind6", BPF_PROG_TYPE_CGROUP_SOCK, + BPF_CGROUP_INET6_POST_BIND), + BPF_APROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE, + BPF_CGROUP_DEVICE), + BPF_APROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS, + BPF_CGROUP_SOCK_OPS), + BPF_APROG_SEC("sk_skb/stream_parser", BPF_PROG_TYPE_SK_SKB, + BPF_SK_SKB_STREAM_PARSER), + BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB, + BPF_SK_SKB_STREAM_VERDICT), + BPF_APROG_COMPAT("sk_skb", BPF_PROG_TYPE_SK_SKB), + BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG, + BPF_SK_MSG_VERDICT), + BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2, + BPF_LIRC_MODE2), + BPF_APROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR, + BPF_FLOW_DISSECTOR), + BPF_EAPROG_SEC("cgroup/bind4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET4_BIND), + BPF_EAPROG_SEC("cgroup/bind6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET6_BIND), + BPF_EAPROG_SEC("cgroup/connect4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET4_CONNECT), + BPF_EAPROG_SEC("cgroup/connect6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET6_CONNECT), + BPF_EAPROG_SEC("cgroup/sendmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_UDP4_SENDMSG), + BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_UDP6_SENDMSG), + BPF_EAPROG_SEC("cgroup/recvmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_UDP4_RECVMSG), + BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_UDP6_RECVMSG), + BPF_EAPROG_SEC("cgroup/getpeername4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET4_GETPEERNAME), + BPF_EAPROG_SEC("cgroup/getpeername6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET6_GETPEERNAME), + BPF_EAPROG_SEC("cgroup/getsockname4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET4_GETSOCKNAME), + BPF_EAPROG_SEC("cgroup/getsockname6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET6_GETSOCKNAME), + BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_CGROUP_SYSCTL), + BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, + BPF_CGROUP_GETSOCKOPT), + BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, + BPF_CGROUP_SETSOCKOPT), + BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS), + BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP, + BPF_SK_LOOKUP), }; +#undef BPF_PROG_SEC_IMPL +#undef BPF_PROG_SEC +#undef BPF_APROG_SEC +#undef BPF_EAPROG_SEC +#undef BPF_APROG_COMPAT +#undef SEC_DEF + #define MAX_TYPE_NAME_SIZE 32 static const struct bpf_sec_def *find_sec_def(const char *sec_name) { - const struct bpf_sec_def *sec_def; - enum sec_def_flags sec_flags; - int i, n = ARRAY_SIZE(section_defs), len; - bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME; + int i, n = ARRAY_SIZE(section_defs); for (i = 0; i < n; i++) { - sec_def = §ion_defs[i]; - sec_flags = sec_def->cookie; - len = strlen(sec_def->sec); - - /* "type/" always has to have proper SEC("type/extras") form */ - if (sec_def->sec[len - 1] == '/') { - if (str_has_pfx(sec_name, sec_def->sec)) - return sec_def; + if (strncmp(sec_name, + section_defs[i].sec, section_defs[i].len)) continue; - } - - /* "type+" means it can be either exact SEC("type") or - * well-formed SEC("type/extras") with proper '/' separator - */ - if (sec_def->sec[len - 1] == '+') { - len--; - /* not even a prefix */ - if (strncmp(sec_name, sec_def->sec, len) != 0) - continue; - /* exact match or has '/' separator */ - if (sec_name[len] == '\0' || sec_name[len] == '/') - return sec_def; - continue; - } - - /* SEC_SLOPPY_PFX definitions are allowed to be just prefix - * matches, unless strict section name mode - * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the - * match has to be exact. - */ - if ((sec_flags & SEC_SLOPPY_PFX) && !strict) { - if (str_has_pfx(sec_name, sec_def->sec)) - return sec_def; - continue; - } - - /* Definitions not marked SEC_SLOPPY_PFX (e.g., - * SEC("syscall")) are exact matches in both modes. - */ - if (strcmp(sec_name, sec_def->sec) == 0) - return sec_def; + return §ion_defs[i]; } return NULL; } @@ -8710,15 +8108,8 @@ static char *libbpf_get_type_names(bool attach_type) buf[0] = '\0'; /* Forge string buf with all available names */ for (i = 0; i < ARRAY_SIZE(section_defs); i++) { - const struct bpf_sec_def *sec_def = §ion_defs[i]; - - if (attach_type) { - if (sec_def->preload_fn != libbpf_preload_prog) - continue; - - if (!(sec_def->cookie & SEC_ATTACHABLE)) - continue; - } + if (attach_type && !section_defs[i].is_attachable) + continue; if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { free(buf); @@ -8777,7 +8168,7 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, /* Collect the reloc from ELF and populate the st_ops->progs[] */ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, - Elf64_Shdr *shdr, Elf_Data *data) + GElf_Shdr *shdr, Elf_Data *data) { const struct btf_member *member; struct bpf_struct_ops *st_ops; @@ -8785,58 +8176,58 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, unsigned int shdr_idx; const struct btf *btf; struct bpf_map *map; + Elf_Data *symbols; unsigned int moff, insn_idx; const char *name; __u32 member_idx; - Elf64_Sym *sym; - Elf64_Rel *rel; + GElf_Sym sym; + GElf_Rel rel; int i, nrels; + symbols = obj->efile.symbols; btf = obj->btf; nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - rel = elf_rel_by_idx(data, i); - if (!rel) { + if (!gelf_getrel(data, i, &rel)) { pr_warn("struct_ops reloc: failed to get %d reloc\n", i); return -LIBBPF_ERRNO__FORMAT; } - sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); - if (!sym) { + if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { pr_warn("struct_ops reloc: symbol %zx not found\n", - (size_t)ELF64_R_SYM(rel->r_info)); + (size_t)GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } - name = elf_sym_str(obj, sym->st_name) ?: ""; - map = find_struct_ops_map_by_offset(obj, rel->r_offset); + name = elf_sym_str(obj, sym.st_name) ?: ""; + map = find_struct_ops_map_by_offset(obj, rel.r_offset); if (!map) { - pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", - (size_t)rel->r_offset); + pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n", + (size_t)rel.r_offset); return -EINVAL; } - moff = rel->r_offset - map->sec_offset; - shdr_idx = sym->st_shndx; + moff = rel.r_offset - map->sec_offset; + shdr_idx = sym.st_shndx; st_ops = map->st_ops; - pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", + pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", map->name, - (long long)(rel->r_info >> 32), - (long long)sym->st_value, - shdr_idx, (size_t)rel->r_offset, - map->sec_offset, sym->st_name, name); + (long long)(rel.r_info >> 32), + (long long)sym.st_value, + shdr_idx, (size_t)rel.r_offset, + map->sec_offset, sym.st_name, name); if (shdr_idx >= SHN_LORESERVE) { - pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n", - map->name, (size_t)rel->r_offset, shdr_idx); + pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n", + map->name, (size_t)rel.r_offset, shdr_idx); return -LIBBPF_ERRNO__RELOC; } - if (sym->st_value % BPF_INSN_SZ) { + if (sym.st_value % BPF_INSN_SZ) { pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", - map->name, (unsigned long long)sym->st_value); + map->name, (unsigned long long)sym.st_value); return -LIBBPF_ERRNO__FORMAT; } - insn_idx = sym->st_value / BPF_INSN_SZ; + insn_idx = sym.st_value / BPF_INSN_SZ; member = find_member_by_offset(st_ops->type, moff * 8); if (!member) { @@ -8860,37 +8251,35 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, return -EINVAL; } - /* prevent the use of BPF prog with invalid type */ - if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) { - pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n", - map->name, prog->name); - return -EINVAL; - } + if (prog->type == BPF_PROG_TYPE_UNSPEC) { + const struct bpf_sec_def *sec_def; - /* if we haven't yet processed this BPF program, record proper - * attach_btf_id and member_idx - */ - if (!prog->attach_btf_id) { + sec_def = find_sec_def(prog->sec_name); + if (sec_def && + sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) { + /* for pr_warn */ + prog->type = sec_def->prog_type; + goto invalid_prog; + } + + prog->type = BPF_PROG_TYPE_STRUCT_OPS; prog->attach_btf_id = st_ops->type_id; prog->expected_attach_type = member_idx; + } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || + prog->attach_btf_id != st_ops->type_id || + prog->expected_attach_type != member_idx) { + goto invalid_prog; } - - /* struct_ops BPF prog can be re-used between multiple - * .struct_ops as long as it's the same struct_ops struct - * definition and the same function pointer field - */ - if (prog->attach_btf_id != st_ops->type_id || - prog->expected_attach_type != member_idx) { - pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", - map->name, prog->name, prog->sec_name, prog->type, - prog->attach_btf_id, prog->expected_attach_type, name); - return -EINVAL; - } - st_ops->progs[member_idx] = prog; } return 0; + +invalid_prog: + pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", + map->name, prog->name, prog->sec_name, prog->type, + prog->attach_btf_id, prog->expected_attach_type, name); + return -EINVAL; } #define BTF_TRACE_PREFIX "btf_trace_" @@ -8970,27 +8359,28 @@ int libbpf_find_vmlinux_btf_id(const char *name, static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) { - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info *info; struct btf *btf; int err; - err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len); + info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); + err = libbpf_get_error(info_linear); if (err) { - pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n", - attach_prog_fd, err); + pr_warn("failed get_prog_info_linear for FD %d\n", + attach_prog_fd); return err; } err = -EINVAL; - if (!info.btf_id) { + info = &info_linear->info; + if (!info->btf_id) { pr_warn("The target program doesn't have BTF\n"); goto out; } - btf = btf__load_from_kernel_by_id(info.btf_id); - err = libbpf_get_error(btf); - if (err) { - pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { + pr_warn("Failed to get BTF of the program\n"); goto out; } err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); @@ -9000,6 +8390,7 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) goto out; } out: + free(info_linear); return err; } @@ -9040,12 +8431,32 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, return -ESRCH; } -static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, - int *btf_obj_fd, int *btf_type_id) +static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id) { enum bpf_attach_type attach_type = prog->expected_attach_type; __u32 attach_prog_fd = prog->attach_prog_fd; - int err = 0; + const char *name = prog->sec_name, *attach_name; + const struct bpf_sec_def *sec = NULL; + int i, err = 0; + + if (!name) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (!section_defs[i].is_attach_btf) + continue; + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) + continue; + + sec = §ion_defs[i]; + break; + } + + if (!sec) { + pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name); + return -ESRCH; + } + attach_name = name + sec->len; /* BPF program's BTF ID */ if (attach_prog_fd) { @@ -9079,30 +8490,27 @@ int libbpf_attach_type_by_name(const char *name, enum bpf_attach_type *attach_type) { char *type_names; - const struct bpf_sec_def *sec_def; + int i; if (!name) return libbpf_err(-EINVAL); - sec_def = find_sec_def(name); - if (!sec_def) { - pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); - type_names = libbpf_get_type_names(true); - if (type_names != NULL) { - pr_debug("attachable section(type) names are:%s\n", type_names); - free(type_names); - } - - return libbpf_err(-EINVAL); + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) + continue; + if (!section_defs[i].is_attachable) + return libbpf_err(-EINVAL); + *attach_type = section_defs[i].expected_attach_type; + return 0; + } + pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); + type_names = libbpf_get_type_names(true); + if (type_names != NULL) { + pr_debug("attachable section(type) names are:%s\n", type_names); + free(type_names); } - if (sec_def->preload_fn != libbpf_preload_prog) - return libbpf_err(-EINVAL); - if (!(sec_def->cookie & SEC_ATTACHABLE)) - return libbpf_err(-EINVAL); - - *attach_type = sec_def->expected_attach_type; - return 0; + return libbpf_err(-EINVAL); } int bpf_map__fd(const struct bpf_map *map) @@ -9115,30 +8523,9 @@ const struct bpf_map_def *bpf_map__def(const struct bpf_map *map) return map ? &map->def : libbpf_err_ptr(-EINVAL); } -static bool map_uses_real_name(const struct bpf_map *map) -{ - /* Since libbpf started to support custom .data.* and .rodata.* maps, - * their user-visible name differs from kernel-visible name. Users see - * such map's corresponding ELF section name as a map name. - * This check distinguishes .data/.rodata from .data.* and .rodata.* - * maps to know which name has to be returned to the user. - */ - if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) - return true; - if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) - return true; - return false; -} - const char *bpf_map__name(const struct bpf_map *map) { - if (!map) - return NULL; - - if (map_uses_real_name(map)) - return map->real_name; - - return map->name; + return map ? map->name : NULL; } enum bpf_map_type bpf_map__type(const struct bpf_map *map) @@ -9167,19 +8554,6 @@ int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) return 0; } -__u64 bpf_map__map_extra(const struct bpf_map *map) -{ - return map->map_extra; -} - -int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) -{ - if (map->fd >= 0) - return libbpf_err(-EBUSY); - map->map_extra = map_extra; - return 0; -} - __u32 bpf_map__numa_node(const struct bpf_map *map) { return map->numa_node; @@ -9336,12 +8710,6 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) struct bpf_map * bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) -{ - return bpf_object__next_map(obj, prev); -} - -struct bpf_map * -bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { if (prev == NULL) return obj->maps; @@ -9351,12 +8719,6 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) struct bpf_map * bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj) -{ - return bpf_object__prev_map(obj, next); -} - -struct bpf_map * -bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) { if (next == NULL) { if (!obj->nr_maps) @@ -9373,22 +8735,7 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) struct bpf_map *pos; bpf_object__for_each_map(pos, obj) { - /* if it's a special internal map name (which always starts - * with dot) then check if that special name matches the - * real map name (ELF section name) - */ - if (name[0] == '.') { - if (pos->real_name && strcmp(pos->real_name, name) == 0) - return pos; - continue; - } - /* otherwise map name has to be an exact match */ - if (map_uses_real_name(pos)) { - if (strcmp(pos->real_name, name) == 0) - return pos; - continue; - } - if (strcmp(pos->name, name) == 0) + if (pos->name && !strcmp(pos->name, name)) return pos; } return errno = ENOENT, NULL; @@ -9422,12 +8769,21 @@ long libbpf_get_error(const void *ptr) return -errno; } -__attribute__((alias("bpf_prog_load_xattr2"))) -int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd); +int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_prog_load_attr attr; -static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd) + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = file; + attr.prog_type = type; + attr.expected_attach_type = 0; + + return bpf_prog_load_xattr(&attr, pobj, prog_fd); +} + +int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, + struct bpf_object **pobj, int *prog_fd) { struct bpf_object_open_attr open_attr = {}; struct bpf_program *prog, *first_prog = NULL; @@ -9498,20 +8854,6 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, return 0; } -COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1) -int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd) -{ - struct bpf_prog_load_attr attr; - - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = file; - attr.prog_type = type; - attr.expected_attach_type = 0; - - return bpf_prog_load_xattr2(&attr, pobj, prog_fd); -} - struct bpf_link { int (*detach)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); @@ -9658,15 +9000,8 @@ int bpf_link__unpin(struct bpf_link *link) struct bpf_link_perf { struct bpf_link link; int perf_event_fd; - /* legacy kprobe support: keep track of probe identifier and type */ - char *legacy_probe_name; - bool legacy_is_kprobe; - bool legacy_is_retprobe; }; -static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe); -static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe); - static int bpf_link_perf_detach(struct bpf_link *link) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); @@ -9679,29 +9014,17 @@ static int bpf_link_perf_detach(struct bpf_link *link) close(perf_link->perf_event_fd); close(link->fd); - /* legacy uprobe/kprobe needs to be removed after perf event fd closure */ - if (perf_link->legacy_probe_name) { - if (perf_link->legacy_is_kprobe) { - err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, - perf_link->legacy_is_retprobe); - } else { - err = remove_uprobe_event_legacy(perf_link->legacy_probe_name, - perf_link->legacy_is_retprobe); - } - } - - return err; + return libbpf_err(err); } static void bpf_link_perf_dealloc(struct bpf_link *link) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); - free(perf_link->legacy_probe_name); free(perf_link); } -struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, +struct bpf_link *bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts) { char errmsg[STRERR_BUFSIZE]; @@ -9776,7 +9099,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p return libbpf_err_ptr(err); } -struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd) +struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) { return bpf_program__attach_perf_event_opts(prog, pfd, NULL); } @@ -9893,113 +9216,16 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, return pfd; } -static int append_to_file(const char *file, const char *fmt, ...) -{ - int fd, n, err = 0; - va_list ap; - - fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); - if (fd < 0) - return -errno; - - va_start(ap, fmt); - n = vdprintf(fd, fmt, ap); - va_end(ap); - - if (n < 0) - err = -errno; - - close(fd); - return err; -} - -static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *kfunc_name, size_t offset) -{ - static int index = 0; - - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, - __sync_fetch_and_add(&index, 1)); -} - -static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, - const char *kfunc_name, size_t offset) -{ - const char *file = "/sys/kernel/debug/tracing/kprobe_events"; - - return append_to_file(file, "%c:%s/%s %s+0x%zx", - retprobe ? 'r' : 'p', - retprobe ? "kretprobes" : "kprobes", - probe_name, kfunc_name, offset); -} - -static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) -{ - const char *file = "/sys/kernel/debug/tracing/kprobe_events"; - - return append_to_file(file, "-:%s/%s", retprobe ? "kretprobes" : "kprobes", probe_name); -} - -static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) -{ - char file[256]; - - snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - retprobe ? "kretprobes" : "kprobes", probe_name); - - return parse_uint_from_file(file, "%d\n"); -} - -static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, - const char *kfunc_name, size_t offset, int pid) -{ - struct perf_event_attr attr = {}; - char errmsg[STRERR_BUFSIZE]; - int type, pfd, err; - - err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); - if (err < 0) { - pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", - kfunc_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return err; - } - type = determine_kprobe_perf_type_legacy(probe_name, retprobe); - if (type < 0) { - pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", - kfunc_name, offset, - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); - return type; - } - attr.size = sizeof(attr); - attr.config = type; - attr.type = PERF_TYPE_TRACEPOINT; - - pfd = syscall(__NR_perf_event_open, &attr, - pid < 0 ? -1 : pid, /* pid */ - pid == -1 ? 0 : -1, /* cpu */ - -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); - if (pfd < 0) { - err = -errno; - pr_warn("legacy kprobe perf_event_open() failed: %s\n", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return err; - } - return pfd; -} - struct bpf_link * -bpf_program__attach_kprobe_opts(const struct bpf_program *prog, +bpf_program__attach_kprobe_opts(struct bpf_program *prog, const char *func_name, const struct bpf_kprobe_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); char errmsg[STRERR_BUFSIZE]; - char *legacy_probe = NULL; struct bpf_link *link; - size_t offset; - bool retprobe, legacy; + unsigned long offset; + bool retprobe; int pfd, err; if (!OPTS_VALID(opts, bpf_kprobe_opts)) @@ -10009,57 +9235,27 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, offset = OPTS_GET(opts, offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); - legacy = determine_kprobe_perf_type() < 0; - if (!legacy) { - pfd = perf_event_open_probe(false /* uprobe */, retprobe, - func_name, offset, - -1 /* pid */, 0 /* ref_ctr_off */); - } else { - char probe_name[256]; - - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), - func_name, offset); - - legacy_probe = strdup(probe_name); - if (!legacy_probe) - return libbpf_err_ptr(-ENOMEM); - - pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name, - offset, -1 /* pid */); - } + pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, + offset, -1 /* pid */, 0 /* ref_ctr_off */); if (pfd < 0) { - err = -errno; - pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", - prog->name, retprobe ? "kretprobe" : "kprobe", - func_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; + pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", + prog->name, retprobe ? "kretprobe" : "kprobe", func_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return libbpf_err_ptr(pfd); } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); err = libbpf_get_error(link); if (err) { close(pfd); - pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", - prog->name, retprobe ? "kretprobe" : "kprobe", - func_name, offset, + pr_warn("prog '%s': failed to attach to %s '%s': %s\n", + prog->name, retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; + return libbpf_err_ptr(err); } - if (legacy) { - struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); - - perf_link->legacy_probe_name = legacy_probe; - perf_link->legacy_is_kprobe = true; - perf_link->legacy_is_retprobe = retprobe; - } - return link; -err_out: - free(legacy_probe); - return libbpf_err_ptr(err); } -struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, +struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, const char *func_name) { @@ -10070,7 +9266,8 @@ struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, return bpf_program__attach_kprobe_opts(prog, func_name, &opts); } -static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie) +static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, + struct bpf_program *prog) { DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); unsigned long offset = 0; @@ -10079,11 +9276,8 @@ static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cooki char *func; int n, err; - opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); - if (opts.retprobe) - func_name = prog->sec_name + sizeof("kretprobe/") - 1; - else - func_name = prog->sec_name + sizeof("kprobe/") - 1; + func_name = prog->sec_name + sec->len; + opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0; n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); if (n < 1) { @@ -10104,96 +9298,17 @@ static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cooki return link; } -static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *binary_path, uint64_t offset) -{ - int i; - - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); - - /* sanitize binary_path in the probe name */ - for (i = 0; buf[i]; i++) { - if (!isalnum(buf[i])) - buf[i] = '_'; - } -} - -static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, - const char *binary_path, size_t offset) -{ - const char *file = "/sys/kernel/debug/tracing/uprobe_events"; - - return append_to_file(file, "%c:%s/%s %s:0x%zx", - retprobe ? 'r' : 'p', - retprobe ? "uretprobes" : "uprobes", - probe_name, binary_path, offset); -} - -static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) -{ - const char *file = "/sys/kernel/debug/tracing/uprobe_events"; - - return append_to_file(file, "-:%s/%s", retprobe ? "uretprobes" : "uprobes", probe_name); -} - -static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) -{ - char file[512]; - - snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - retprobe ? "uretprobes" : "uprobes", probe_name); - - return parse_uint_from_file(file, "%d\n"); -} - -static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, - const char *binary_path, size_t offset, int pid) -{ - struct perf_event_attr attr; - int type, pfd, err; - - err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); - if (err < 0) { - pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", - binary_path, (size_t)offset, err); - return err; - } - type = determine_uprobe_perf_type_legacy(probe_name, retprobe); - if (type < 0) { - pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", - binary_path, offset, err); - return type; - } - - memset(&attr, 0, sizeof(attr)); - attr.size = sizeof(attr); - attr.config = type; - attr.type = PERF_TYPE_TRACEPOINT; - - pfd = syscall(__NR_perf_event_open, &attr, - pid < 0 ? -1 : pid, /* pid */ - pid == -1 ? 0 : -1, /* cpu */ - -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); - if (pfd < 0) { - err = -errno; - pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); - return err; - } - return pfd; -} - LIBBPF_API struct bpf_link * -bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, +bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; - bool retprobe, legacy; + bool retprobe; if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); @@ -10202,35 +9317,15 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); - legacy = determine_uprobe_perf_type() < 0; - if (!legacy) { - pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, - func_offset, pid, ref_ctr_off); - } else { - char probe_name[512]; - - if (ref_ctr_off) - return libbpf_err_ptr(-EINVAL); - - gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), - binary_path, func_offset); - - legacy_probe = strdup(probe_name); - if (!legacy_probe) - return libbpf_err_ptr(-ENOMEM); - - pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe, - binary_path, func_offset, pid); - } + pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, + func_offset, pid, ref_ctr_off); if (pfd < 0) { - err = -errno; pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return libbpf_err_ptr(pfd); } - link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); err = libbpf_get_error(link); if (err) { @@ -10239,23 +9334,12 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; - } - if (legacy) { - struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); - - perf_link->legacy_probe_name = legacy_probe; - perf_link->legacy_is_kprobe = false; - perf_link->legacy_is_retprobe = retprobe; + return libbpf_err_ptr(err); } return link; -err_out: - free(legacy_probe); - return libbpf_err_ptr(err); - } -struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, +struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset) @@ -10315,7 +9399,7 @@ static int perf_event_open_tracepoint(const char *tp_category, return pfd; } -struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, +struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog, const char *tp_category, const char *tp_name, const struct bpf_tracepoint_opts *opts) @@ -10349,14 +9433,15 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p return link; } -struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, +struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, const char *tp_category, const char *tp_name) { return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); } -static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) +static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog) { char *sec_name, *tp_cat, *tp_name; struct bpf_link *link; @@ -10365,11 +9450,8 @@ static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) if (!sec_name) return libbpf_err_ptr(-ENOMEM); - /* extract "tp//" or "tracepoint//" */ - if (str_has_pfx(prog->sec_name, "tp/")) - tp_cat = sec_name + sizeof("tp/") - 1; - else - tp_cat = sec_name + sizeof("tracepoint/") - 1; + /* extract "tp//" */ + tp_cat = sec_name + sec->len; tp_name = strchr(tp_cat, '/'); if (!tp_name) { free(sec_name); @@ -10383,7 +9465,7 @@ static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) return link; } -struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, +struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, const char *tp_name) { char errmsg[STRERR_BUFSIZE]; @@ -10413,34 +9495,16 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr return link; } -static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie) +static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog) { - static const char *const prefixes[] = { - "raw_tp/", - "raw_tracepoint/", - "raw_tp.w/", - "raw_tracepoint.w/", - }; - size_t i; - const char *tp_name = NULL; - - for (i = 0; i < ARRAY_SIZE(prefixes); i++) { - if (str_has_pfx(prog->sec_name, prefixes[i])) { - tp_name = prog->sec_name + strlen(prefixes[i]); - break; - } - } - if (!tp_name) { - pr_warn("prog '%s': invalid section name '%s'\n", - prog->name, prog->sec_name); - return libbpf_err_ptr(-EINVAL); - } + const char *tp_name = prog->sec_name + sec->len; return bpf_program__attach_raw_tracepoint(prog, tp_name); } /* Common logic for all BPF program types that attach to a btf_id */ -static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog) +static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog) { char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; @@ -10469,28 +9533,30 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return (struct bpf_link *)link; } -struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) +struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) { return bpf_program__attach_btf_id(prog); } -struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) +struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog) { return bpf_program__attach_btf_id(prog); } -static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie) +static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, + struct bpf_program *prog) { return bpf_program__attach_trace(prog); } -static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie) +static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, + struct bpf_program *prog) { return bpf_program__attach_lsm(prog); } static struct bpf_link * -bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id, +bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id, const char *target_name) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, @@ -10526,24 +9592,24 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id } struct bpf_link * -bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) +bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) { return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup"); } struct bpf_link * -bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) +bpf_program__attach_netns(struct bpf_program *prog, int netns_fd) { return bpf_program__attach_fd(prog, netns_fd, 0, "netns"); } -struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) +struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex) { /* target_fd/target_ifindex use the same field in LINK_CREATE */ return bpf_program__attach_fd(prog, ifindex, 0, "xdp"); } -struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, +struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog, int target_fd, const char *attach_func_name) { @@ -10576,7 +9642,7 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, } struct bpf_link * -bpf_program__attach_iter(const struct bpf_program *prog, +bpf_program__attach_iter(struct bpf_program *prog, const struct bpf_iter_attach_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); @@ -10615,17 +9681,21 @@ bpf_program__attach_iter(const struct bpf_program *prog, return link; } -static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie) +static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, + struct bpf_program *prog) { return bpf_program__attach_iter(prog, NULL); } -struct bpf_link *bpf_program__attach(const struct bpf_program *prog) +struct bpf_link *bpf_program__attach(struct bpf_program *prog) { - if (!prog->sec_def || !prog->sec_def->attach_fn) + const struct bpf_sec_def *sec_def; + + sec_def = find_sec_def(prog->sec_name); + if (!sec_def || !sec_def->attach_fn) return libbpf_err_ptr(-ESRCH); - return prog->sec_def->attach_fn(prog, prog->sec_def->cookie); + return sec_def->attach_fn(sec_def, prog); } static int bpf_link__detach_struct_ops(struct bpf_link *link) @@ -10638,7 +9708,7 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link) return 0; } -struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) +struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) { struct bpf_struct_ops *st_ops; struct bpf_link *link; @@ -10679,10 +9749,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) return link; } -static enum bpf_perf_event_ret -perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data) +enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data) { struct perf_event_mmap_page *header = mmap_mem; __u64 data_head = ring_buffer_read_head(header); @@ -10727,12 +9797,6 @@ perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, return libbpf_err(ret); } -__attribute__((alias("perf_event_read_simple"))) -enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data); - struct perf_buffer; struct perf_buffer_params { @@ -10866,18 +9930,11 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p); -DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0) -struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, - perf_buffer_sample_fn sample_cb, - perf_buffer_lost_fn lost_cb, - void *ctx, - const struct perf_buffer_opts *opts) +struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, + const struct perf_buffer_opts *opts) { struct perf_buffer_params p = {}; - struct perf_event_attr attr = {}; - - if (!OPTS_VALID(opts, perf_buffer_opts)) - return libbpf_err_ptr(-EINVAL); + struct perf_event_attr attr = { 0, }; attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; @@ -10886,62 +9943,29 @@ struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, attr.wakeup_events = 1; p.attr = &attr; - p.sample_cb = sample_cb; - p.lost_cb = lost_cb; - p.ctx = ctx; + p.sample_cb = opts ? opts->sample_cb : NULL; + p.lost_cb = opts ? opts->lost_cb : NULL; + p.ctx = opts ? opts->ctx : NULL; return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } -COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4) -struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_opts *opts) -{ - return perf_buffer__new_v0_6_0(map_fd, page_cnt, - opts ? opts->sample_cb : NULL, - opts ? opts->lost_cb : NULL, - opts ? opts->ctx : NULL, - NULL); -} - -DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0) -struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, - struct perf_event_attr *attr, - perf_buffer_event_fn event_cb, void *ctx, - const struct perf_buffer_raw_opts *opts) +struct perf_buffer * +perf_buffer__new_raw(int map_fd, size_t page_cnt, + const struct perf_buffer_raw_opts *opts) { struct perf_buffer_params p = {}; - if (page_cnt == 0 || !attr) - return libbpf_err_ptr(-EINVAL); - - if (!OPTS_VALID(opts, perf_buffer_raw_opts)) - return libbpf_err_ptr(-EINVAL); - - p.attr = attr; - p.event_cb = event_cb; - p.ctx = ctx; - p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); - p.cpus = OPTS_GET(opts, cpus, NULL); - p.map_keys = OPTS_GET(opts, map_keys, NULL); + p.attr = opts->attr; + p.event_cb = opts->event_cb; + p.ctx = opts->ctx; + p.cpu_cnt = opts->cpu_cnt; + p.cpus = opts->cpus; + p.map_keys = opts->map_keys; return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } -COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4) -struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_raw_opts *opts) -{ - LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts, - .cpu_cnt = opts->cpu_cnt, - .cpus = opts->cpus, - .map_keys = opts->map_keys, - ); - - return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr, - opts->event_cb, opts->ctx, &inner_opts); -} - static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p) { @@ -11141,10 +10165,10 @@ static int perf_buffer__process_records(struct perf_buffer *pb, { enum bpf_perf_event_ret ret; - ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, - pb->page_size, &cpu_buf->buf, - &cpu_buf->buf_size, - perf_buffer__process_record, cpu_buf); + ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size, + pb->page_size, &cpu_buf->buf, + &cpu_buf->buf_size, + perf_buffer__process_record, cpu_buf); if (ret != LIBBPF_PERF_EVENT_CONT) return ret; return 0; @@ -11497,29 +10521,18 @@ int bpf_program__set_attach_target(struct bpf_program *prog, { int btf_obj_fd = 0, btf_id = 0, err; - if (!prog || attach_prog_fd < 0) + if (!prog || attach_prog_fd < 0 || !attach_func_name) return libbpf_err(-EINVAL); if (prog->obj->loaded) return libbpf_err(-EINVAL); - if (attach_prog_fd && !attach_func_name) { - /* remember attach_prog_fd and let bpf_program__load() find - * BTF ID during the program load - */ - prog->attach_prog_fd = attach_prog_fd; - return 0; - } - if (attach_prog_fd) { btf_id = libbpf_find_prog_btf_id(attach_func_name, attach_prog_fd); if (btf_id < 0) return libbpf_err(btf_id); } else { - if (!attach_func_name) - return libbpf_err(-EINVAL); - /* load btf_vmlinux, if not yet */ err = bpf_object__load_vmlinux_btf(prog->obj, true); if (err) @@ -11592,7 +10605,7 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) int fd, err = 0, len; char buf[128]; - fd = open(fcpu, O_RDONLY | O_CLOEXEC); + fd = open(fcpu, O_RDONLY); if (fd < 0) { err = -errno; pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); @@ -11761,15 +10774,16 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) for (i = 0; i < s->prog_cnt; i++) { struct bpf_program *prog = *s->progs[i].prog; struct bpf_link **link = s->progs[i].link; + const struct bpf_sec_def *sec_def; if (!prog->load) continue; - /* auto-attaching not supported for this program */ - if (!prog->sec_def || !prog->sec_def->attach_fn) + sec_def = find_sec_def(prog->sec_name); + if (!sec_def || !sec_def->attach_fn) continue; - *link = bpf_program__attach(prog); + *link = sec_def->attach_fn(sec_def, prog); err = libbpf_get_error(*link); if (err) { pr_warn("failed to auto-attach program '%s': %d\n", diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 8b9bc5e90c..f177d897c5 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -24,10 +24,6 @@ extern "C" { #endif -LIBBPF_API __u32 libbpf_major_version(void); -LIBBPF_API __u32 libbpf_minor_version(void); -LIBBPF_API const char *libbpf_version_string(void); - enum libbpf_errno { __LIBBPF_ERRNO__START = 4000, @@ -87,15 +83,12 @@ struct bpf_object_open_opts { * Non-relocatable instructions are replaced with invalid ones to * prevent accidental errors. * */ - LIBBPF_DEPRECATED_SINCE(0, 6, "field has no effect") bool relaxed_core_relocs; /* maps that set the 'pinning' attribute in their definition will have * their pin_path attribute set to a file in this directory, and be * auto-pinned to that path on load; defaults to "/sys/fs/bpf". */ const char *pin_root_path; - - LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__set_attach_target() on each individual bpf_program") __u32 attach_prog_fd; /* Additional kernel config content that augments and overrides * system Kconfig for CONFIG_xxx externs. @@ -108,73 +101,12 @@ struct bpf_object_open_opts { * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux. */ const char *btf_custom_path; - /* Pointer to a buffer for storing kernel logs for applicable BPF - * commands. Valid kernel_log_size has to be specified as well and are - * passed-through to bpf() syscall. Keep in mind that kernel might - * fail operation with -ENOSPC error if provided buffer is too small - * to contain entire log output. - * See the comment below for kernel_log_level for interaction between - * log_buf and log_level settings. - * - * If specified, this log buffer will be passed for: - * - each BPF progral load (BPF_PROG_LOAD) attempt, unless overriden - * with bpf_program__set_log() on per-program level, to get - * BPF verifier log output. - * - during BPF object's BTF load into kernel (BPF_BTF_LOAD) to get - * BTF sanity checking log. - * - * Each BPF command (BPF_BTF_LOAD or BPF_PROG_LOAD) will overwrite - * previous contents, so if you need more fine-grained control, set - * per-program buffer with bpf_program__set_log_buf() to preserve each - * individual program's verification log. Keep using kernel_log_buf - * for BTF verification log, if necessary. - */ - char *kernel_log_buf; - size_t kernel_log_size; - /* - * Log level can be set independently from log buffer. Log_level=0 - * means that libbpf will attempt loading BTF or program without any - * logging requested, but will retry with either its own or custom log - * buffer, if provided, and log_level=1 on any error. - * And vice versa, setting log_level>0 will request BTF or prog - * loading with verbose log from the first attempt (and as such also - * for successfully loaded BTF or program), and the actual log buffer - * could be either libbpf's own auto-allocated log buffer, if - * kernel_log_buffer is NULL, or user-provided custom kernel_log_buf. - * If user didn't provide custom log buffer, libbpf will emit captured - * logs through its print callback. - */ - __u32 kernel_log_level; - - size_t :0; }; -#define bpf_object_open_opts__last_field kernel_log_level +#define bpf_object_open_opts__last_field btf_custom_path LIBBPF_API struct bpf_object *bpf_object__open(const char *path); - -/** - * @brief **bpf_object__open_file()** creates a bpf_object by opening - * the BPF ELF object file pointed to by the passed path and loading it - * into memory. - * @param path BPF object file path - * @param opts options for how to load the bpf object, this parameter is - * optional and can be set to NULL - * @return pointer to the new bpf_object; or NULL is returned on error, - * error code is stored in errno - */ LIBBPF_API struct bpf_object * bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts); - -/** - * @brief **bpf_object__open_mem()** creates a bpf_object by reading - * the BPF objects raw bytes from a memory buffer containing a valid - * BPF ELF object file. - * @param obj_buf pointer to the buffer containing ELF file bytes - * @param obj_buf_sz number of bytes in the buffer - * @param opts options for how to load the bpf object - * @return pointer to the new bpf_object; or NULL is returned on error, - * error code is stored in errno - */ LIBBPF_API struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts); @@ -214,9 +146,7 @@ struct bpf_object_load_attr { /* Load/unload object into/from kernel */ LIBBPF_API int bpf_object__load(struct bpf_object *obj); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__load() instead") LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); -LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead") LIBBPF_API int bpf_object__unload(struct bpf_object *obj); LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); @@ -227,7 +157,6 @@ struct btf; LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj); LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__find_program_by_name() instead") LIBBPF_API struct bpf_program * bpf_object__find_program_by_title(const struct bpf_object *obj, const char *title); @@ -235,8 +164,7 @@ LIBBPF_API struct bpf_program * bpf_object__find_program_by_name(const struct bpf_object *obj, const char *name); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "track bpf_objects in application code instead") -struct bpf_object *bpf_object__next(struct bpf_object *prev); +LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ for ((pos) = bpf_object__next(NULL), \ (tmp) = bpf_object__next(pos); \ @@ -258,22 +186,16 @@ LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, /* Accessors of bpf_program */ struct bpf_program; -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_program() instead") -struct bpf_program *bpf_program__next(struct bpf_program *prog, - const struct bpf_object *obj); -LIBBPF_API struct bpf_program * -bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog); +LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, + const struct bpf_object *obj); -#define bpf_object__for_each_program(pos, obj) \ - for ((pos) = bpf_object__next_program((obj), NULL); \ - (pos) != NULL; \ - (pos) = bpf_object__next_program((obj), (pos))) +#define bpf_object__for_each_program(pos, obj) \ + for ((pos) = bpf_program__next(NULL, (obj)); \ + (pos) != NULL; \ + (pos) = bpf_program__next((pos), (obj))) -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_program() instead") -struct bpf_program *bpf_program__prev(struct bpf_program *prog, - const struct bpf_object *obj); -LIBBPF_API struct bpf_program * -bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog); +LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, + const struct bpf_object *obj); typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); @@ -292,79 +214,18 @@ LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); /* returns program size in bytes */ -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insn_cnt() instead") LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); -struct bpf_insn; - -/** - * @brief **bpf_program__insns()** gives read-only access to BPF program's - * underlying BPF instructions. - * @param prog BPF program for which to return instructions - * @return a pointer to an array of BPF instructions that belong to the - * specified BPF program - * - * Returned pointer is always valid and not NULL. Number of `struct bpf_insn` - * pointed to can be fetched using **bpf_program__insn_cnt()** API. - * - * Keep in mind, libbpf can modify and append/delete BPF program's - * instructions as it processes BPF object file and prepares everything for - * uploading into the kernel. So depending on the point in BPF object - * lifetime, **bpf_program__insns()** can return different sets of - * instructions. As an example, during BPF object load phase BPF program - * instructions will be CO-RE-relocated, BPF subprograms instructions will be - * appended, ldimm64 instructions will have FDs embedded, etc. So instructions - * returned before **bpf_object__load()** and after it might be quite - * different. - */ -LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog); -/** - * @brief **bpf_program__insn_cnt()** returns number of `struct bpf_insn`'s - * that form specified BPF program. - * @param prog BPF program for which to return number of BPF instructions - * - * See **bpf_program__insns()** documentation for notes on how libbpf can - * change instructions and their count during different phases of - * **bpf_object** lifetime. - */ -LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); - -LIBBPF_DEPRECATED_SINCE(0, 6, "use bpf_object__load() instead") -LIBBPF_API int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_version); +LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, + __u32 kern_version); LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, const char *path, int instance); -LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); - -/** - * @brief **bpf_program__pin()** pins the BPF program to a file - * in the BPF FS specified by a path. This increments the programs - * reference count, allowing it to stay loaded after the process - * which loaded it has exited. - * - * @param prog BPF program to pin, must already be loaded - * @param path file path in a BPF file system - * @return 0, on success; negative error code, otherwise - */ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); - -/** - * @brief **bpf_program__unpin()** unpins the BPF program from a file - * in the BPFFS specified by a path. This decrements the programs - * reference count. - * - * The file pinning the BPF program can also be unlinked by a different - * process in which case this function will return an error. - * - * @param prog BPF program to unpin - * @param path file path to the pin in a BPF file system - * @return 0, on success; negative error code, otherwise - */ LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); LIBBPF_API void bpf_program__unload(struct bpf_program *prog); @@ -382,7 +243,7 @@ LIBBPF_API int bpf_link__detach(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); LIBBPF_API struct bpf_link * -bpf_program__attach(const struct bpf_program *prog); +bpf_program__attach(struct bpf_program *prog); struct bpf_perf_event_opts { /* size of this struct, for forward/backward compatiblity */ @@ -393,10 +254,10 @@ struct bpf_perf_event_opts { #define bpf_perf_event_opts__last_field bpf_cookie LIBBPF_API struct bpf_link * -bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd); +bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); LIBBPF_API struct bpf_link * -bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, +bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts); struct bpf_kprobe_opts { @@ -405,7 +266,7 @@ struct bpf_kprobe_opts { /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; /* function's offset to install kprobe to */ - size_t offset; + unsigned long offset; /* kprobe is return probe */ bool retprobe; size_t :0; @@ -413,10 +274,10 @@ struct bpf_kprobe_opts { #define bpf_kprobe_opts__last_field retprobe LIBBPF_API struct bpf_link * -bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe, +bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, const char *func_name); LIBBPF_API struct bpf_link * -bpf_program__attach_kprobe_opts(const struct bpf_program *prog, +bpf_program__attach_kprobe_opts(struct bpf_program *prog, const char *func_name, const struct bpf_kprobe_opts *opts); @@ -435,43 +296,12 @@ struct bpf_uprobe_opts { }; #define bpf_uprobe_opts__last_field retprobe -/** - * @brief **bpf_program__attach_uprobe()** attaches a BPF program - * to the userspace function which is found by binary path and - * offset. You can optionally specify a particular proccess to attach - * to. You can also optionally attach the program to the function - * exit instead of entry. - * - * @param prog BPF program to attach - * @param retprobe Attach to function exit - * @param pid Process ID to attach the uprobe to, 0 for self (own process), - * -1 for all processes - * @param binary_path Path to binary that contains the function symbol - * @param func_offset Offset within the binary of the function symbol - * @return Reference to the newly created BPF link; or NULL is returned on error, - * error code is stored in errno - */ LIBBPF_API struct bpf_link * -bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, +bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset); - -/** - * @brief **bpf_program__attach_uprobe_opts()** is just like - * bpf_program__attach_uprobe() except with a options struct - * for various configurations. - * - * @param prog BPF program to attach - * @param pid Process ID to attach the uprobe to, 0 for self (own process), - * -1 for all processes - * @param binary_path Path to binary that contains the function symbol - * @param func_offset Offset within the binary of the function symbol - * @param opts Options for altering program attachment - * @return Reference to the newly created BPF link; or NULL is returned on error, - * error code is stored in errno - */ LIBBPF_API struct bpf_link * -bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, +bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts); @@ -484,35 +314,35 @@ struct bpf_tracepoint_opts { #define bpf_tracepoint_opts__last_field bpf_cookie LIBBPF_API struct bpf_link * -bpf_program__attach_tracepoint(const struct bpf_program *prog, +bpf_program__attach_tracepoint(struct bpf_program *prog, const char *tp_category, const char *tp_name); LIBBPF_API struct bpf_link * -bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, +bpf_program__attach_tracepoint_opts(struct bpf_program *prog, const char *tp_category, const char *tp_name, const struct bpf_tracepoint_opts *opts); LIBBPF_API struct bpf_link * -bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, +bpf_program__attach_raw_tracepoint(struct bpf_program *prog, const char *tp_name); LIBBPF_API struct bpf_link * -bpf_program__attach_trace(const struct bpf_program *prog); +bpf_program__attach_trace(struct bpf_program *prog); LIBBPF_API struct bpf_link * -bpf_program__attach_lsm(const struct bpf_program *prog); +bpf_program__attach_lsm(struct bpf_program *prog); LIBBPF_API struct bpf_link * -bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd); +bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd); LIBBPF_API struct bpf_link * -bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd); +bpf_program__attach_netns(struct bpf_program *prog, int netns_fd); LIBBPF_API struct bpf_link * -bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex); +bpf_program__attach_xdp(struct bpf_program *prog, int ifindex); LIBBPF_API struct bpf_link * -bpf_program__attach_freplace(const struct bpf_program *prog, +bpf_program__attach_freplace(struct bpf_program *prog, int target_fd, const char *attach_func_name); struct bpf_map; -LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); +LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -522,9 +352,11 @@ struct bpf_iter_attach_opts { #define bpf_iter_attach_opts__last_field link_info_len LIBBPF_API struct bpf_link * -bpf_program__attach_iter(const struct bpf_program *prog, +bpf_program__attach_iter(struct bpf_program *prog, const struct bpf_iter_attach_opts *opts); +struct bpf_insn; + /* * Libbpf allows callers to adjust BPF programs before being loaded * into kernel. One program in an object file can be transformed into @@ -553,6 +385,7 @@ bpf_program__attach_iter(const struct bpf_program *prog, * one instance. In this case bpf_program__fd(prog) is equal to * bpf_program__nth_fd(prog, 0). */ + struct bpf_prog_prep_result { /* * If not NULL, load new instruction array. @@ -581,11 +414,9 @@ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, struct bpf_insn *insns, int insns_cnt, struct bpf_prog_prep_result *res); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, bpf_program_prep_t prep); -LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n); /* @@ -615,18 +446,6 @@ LIBBPF_API void bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); -LIBBPF_API __u32 bpf_program__flags(const struct bpf_program *prog); -LIBBPF_API int bpf_program__set_flags(struct bpf_program *prog, __u32 flags); - -/* Per-program log level and log buffer getters/setters. - * See bpf_object_open_opts comments regarding log_level and log_buf - * interactions. - */ -LIBBPF_API __u32 bpf_program__log_level(const struct bpf_program *prog); -LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level); -LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); -LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); - LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); @@ -659,13 +478,9 @@ struct bpf_map_def { unsigned int map_flags; }; -/** - * @brief **bpf_object__find_map_by_name()** returns BPF map of - * the given name, if it exists within the passed BPF object - * @param obj BPF object - * @param name name of the BPF map - * @return BPF map instance, if such map exists within the BPF object; - * or NULL otherwise. +/* + * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel, + * so no need to worry about a name clash. */ LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); @@ -677,32 +492,21 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); * Get bpf_map through the offset of corresponding struct bpf_map_def * in the BPF object file. */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead") -struct bpf_map * +LIBBPF_API struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") -struct bpf_map *bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * -bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map); - +bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); #define bpf_object__for_each_map(pos, obj) \ - for ((pos) = bpf_object__next_map((obj), NULL); \ + for ((pos) = bpf_map__next(NULL, (obj)); \ (pos) != NULL; \ - (pos) = bpf_object__next_map((obj), (pos))) + (pos) = bpf_map__next((pos), (obj))) #define bpf_map__for_each bpf_object__for_each_map -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_map() instead") -struct bpf_map *bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * -bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); +bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); -/** - * @brief **bpf_map__fd()** gets the file descriptor of the passed - * BPF map - * @param map the BPF map instance - * @return the file descriptor; or -EINVAL in case of an error - */ +/* get/set map FD */ LIBBPF_API int bpf_map__fd(const struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); /* get map definition */ @@ -734,9 +538,6 @@ LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); /* get/set map if_index */ LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map); LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); -/* get/set map map_extra flags */ -LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map); -LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, @@ -745,16 +546,7 @@ LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead") LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); - -/** - * @brief **bpf_map__is_internal()** tells the caller whether or not the - * passed map is a special map created by libbpf automatically for things like - * global variables, __ksym externs, Kconfig values, etc - * @param map the bpf_map - * @return true, if the map is an internal map; false, otherwise - */ LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); @@ -766,38 +558,6 @@ LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); -/** - * @brief **libbpf_get_error()** extracts the error code from the passed - * pointer - * @param ptr pointer returned from libbpf API function - * @return error code; or 0 if no error occured - * - * Many libbpf API functions which return pointers have logic to encode error - * codes as pointers, and do not return NULL. Meaning **libbpf_get_error()** - * should be used on the return value from these functions immediately after - * calling the API function, with no intervening calls that could clobber the - * `errno` variable. Consult the individual functions documentation to verify - * if this logic applies should be used. - * - * For these API functions, if `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` - * is enabled, NULL is returned on error instead. - * - * If ptr is NULL, then errno should be already set by the failing - * API, because libbpf never returns NULL on success and it now always - * sets errno on error. - * - * Example usage: - * - * struct perf_buffer *pb; - * - * pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, &opts); - * err = libbpf_get_error(pb); - * if (err) { - * pb = NULL; - * fprintf(stderr, "failed to open perf buffer: %d\n", err); - * goto cleanup; - * } - */ LIBBPF_API long libbpf_get_error(const void *ptr); struct bpf_prog_load_attr { @@ -809,12 +569,10 @@ struct bpf_prog_load_attr { int prog_flags; }; -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open() and bpf_object__load() instead") LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, struct bpf_object **pobj, int *prog_fd); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead") -LIBBPF_API int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd); +LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd); /* XDP related API */ struct xdp_link_info { @@ -912,52 +670,18 @@ typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt); /* common use perf buffer options */ struct perf_buffer_opts { - union { - size_t sz; - struct { /* DEPRECATED: will be removed in v1.0 */ - /* if specified, sample_cb is called for each sample */ - perf_buffer_sample_fn sample_cb; - /* if specified, lost_cb is called for each batch of lost samples */ - perf_buffer_lost_fn lost_cb; - /* ctx is provided to sample_cb and lost_cb */ - void *ctx; - }; - }; + /* if specified, sample_cb is called for each sample */ + perf_buffer_sample_fn sample_cb; + /* if specified, lost_cb is called for each batch of lost samples */ + perf_buffer_lost_fn lost_cb; + /* ctx is provided to sample_cb and lost_cb */ + void *ctx; }; -#define perf_buffer_opts__last_field sz -/** - * @brief **perf_buffer__new()** creates BPF perfbuf manager for a specified - * BPF_PERF_EVENT_ARRAY map - * @param map_fd FD of BPF_PERF_EVENT_ARRAY BPF map that will be used by BPF - * code to send data over to user-space - * @param page_cnt number of memory pages allocated for each per-CPU buffer - * @param sample_cb function called on each received data record - * @param lost_cb function called when record loss has occurred - * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb* - * @return a new instance of struct perf_buffer on success, NULL on error with - * *errno* containing an error code - */ LIBBPF_API struct perf_buffer * perf_buffer__new(int map_fd, size_t page_cnt, - perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, const struct perf_buffer_opts *opts); -LIBBPF_API struct perf_buffer * -perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, - perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, - const struct perf_buffer_opts *opts); - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new() instead") -struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_opts *opts); - -#define perf_buffer__new(...) ___libbpf_overload(___perf_buffer_new, __VA_ARGS__) -#define ___perf_buffer_new6(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) \ - perf_buffer__new(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) -#define ___perf_buffer_new3(map_fd, page_cnt, opts) \ - perf_buffer__new_deprecated(map_fd, page_cnt, opts) - enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_DONE = 0, LIBBPF_PERF_EVENT_ERROR = -1, @@ -971,21 +695,12 @@ typedef enum bpf_perf_event_ret /* raw perf buffer options, giving most power and control */ struct perf_buffer_raw_opts { - union { - struct { - size_t sz; - long :0; - long :0; - }; - struct { /* DEPRECATED: will be removed in v1.0 */ - /* perf event attrs passed directly into perf_event_open() */ - struct perf_event_attr *attr; - /* raw event callback */ - perf_buffer_event_fn event_cb; - /* ctx is provided to event_cb */ - void *ctx; - }; - }; + /* perf event attrs passed directly into perf_event_open() */ + struct perf_event_attr *attr; + /* raw event callback */ + perf_buffer_event_fn event_cb; + /* ctx is provided to event_cb */ + void *ctx; /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of * max_entries of given PERF_EVENT_ARRAY map) */ @@ -995,28 +710,11 @@ struct perf_buffer_raw_opts { /* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */ int *map_keys; }; -#define perf_buffer_raw_opts__last_field map_keys LIBBPF_API struct perf_buffer * -perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr, - perf_buffer_event_fn event_cb, void *ctx, +perf_buffer__new_raw(int map_fd, size_t page_cnt, const struct perf_buffer_raw_opts *opts); -LIBBPF_API struct perf_buffer * -perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, struct perf_event_attr *attr, - perf_buffer_event_fn event_cb, void *ctx, - const struct perf_buffer_raw_opts *opts); - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new_raw() instead") -struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_raw_opts *opts); - -#define perf_buffer__new_raw(...) ___libbpf_overload(___perf_buffer_new_raw, __VA_ARGS__) -#define ___perf_buffer_new_raw6(map_fd, page_cnt, attr, event_cb, ctx, opts) \ - perf_buffer__new_raw(map_fd, page_cnt, attr, event_cb, ctx, opts) -#define ___perf_buffer_new_raw3(map_fd, page_cnt, opts) \ - perf_buffer__new_raw_deprecated(map_fd, page_cnt, opts) - LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb); LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); @@ -1028,7 +726,6 @@ LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_i typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, void *private_data); -LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead") LIBBPF_API enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, @@ -1055,57 +752,13 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, * user, causing subsequent probes to fail. In this case, the caller may want * to adjust that limit with setrlimit(). */ -LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_prog_type() instead") -LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex); -LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_map_type() instead") +LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, + __u32 ifindex); LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); -LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_helper() instead") -LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); -LIBBPF_DEPRECATED_SINCE(0, 8, "implement your own or use bpftool for feature detection") +LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, + enum bpf_prog_type prog_type, __u32 ifindex); LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex); -/** - * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports - * BPF programs of a given type. - * @param prog_type BPF program type to detect kernel support for - * @param opts reserved for future extensibility, should be NULL - * @return 1, if given program type is supported; 0, if given program type is - * not supported; negative error code if feature detection failed or can't be - * performed - * - * Make sure the process has required set of CAP_* permissions (or runs as - * root) when performing feature checking. - */ -LIBBPF_API int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts); -/** - * @brief **libbpf_probe_bpf_map_type()** detects if host kernel supports - * BPF maps of a given type. - * @param map_type BPF map type to detect kernel support for - * @param opts reserved for future extensibility, should be NULL - * @return 1, if given map type is supported; 0, if given map type is - * not supported; negative error code if feature detection failed or can't be - * performed - * - * Make sure the process has required set of CAP_* permissions (or runs as - * root) when performing feature checking. - */ -LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts); -/** - * @brief **libbpf_probe_bpf_helper()** detects if host kernel supports the - * use of a given BPF helper from specified BPF program type. - * @param prog_type BPF program type used to check the support of BPF helper - * @param helper_id BPF helper ID (enum bpf_func_id) to check support for - * @param opts reserved for future extensibility, should be NULL - * @return 1, if given combination of program type and helper is supported; 0, - * if the combination is not supported; negative error code if feature - * detection for provided input arguments failed or can't be performed - * - * Make sure the process has required set of CAP_* permissions (or runs as - * root) when performing feature checking. - */ -LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, - enum bpf_func_id helper_id, const void *opts); - /* * Get bpf_prog_info in continuous memory * @@ -1160,22 +813,18 @@ struct bpf_prog_info_linear { __u8 data[]; }; -LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") LIBBPF_API struct bpf_prog_info_linear * bpf_program__get_prog_info_linear(int fd, __u64 arrays); -LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") LIBBPF_API void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); -LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") LIBBPF_API void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); -/** - * @brief **libbpf_num_possible_cpus()** is a helper function to get the - * number of possible CPUs that the host kernel supports and expects. - * @return number of possible CPUs; or error code on failure +/* + * A helper function to get the number of possible CPUs before looking up + * per-CPU maps. Negative errno is returned on failure. * * Example usage: * @@ -1185,6 +834,7 @@ bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); * } * long values[ncpus]; * bpf_map_lookup_elem(per_cpu_map_fd, key, values); + * */ LIBBPF_API int libbpf_num_possible_cpus(void); @@ -1204,17 +854,17 @@ struct bpf_object_skeleton { size_t sz; /* size of this struct, for forward/backward compatibility */ const char *name; - const void *data; + void *data; size_t data_sz; struct bpf_object **obj; int map_cnt; - int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */ + int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */ struct bpf_map_skeleton *maps; int prog_cnt; - int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */ + int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */ struct bpf_prog_skeleton *progs; }; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 5297839677..bbc53bb25f 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -386,49 +386,3 @@ LIBBPF_0.5.0 { btf_dump__dump_type_data; libbpf_set_strict_mode; } LIBBPF_0.4.0; - -LIBBPF_0.6.0 { - global: - bpf_map__map_extra; - bpf_map__set_map_extra; - bpf_map_create; - bpf_object__next_map; - bpf_object__next_program; - bpf_object__prev_map; - bpf_object__prev_program; - bpf_prog_load_deprecated; - bpf_prog_load; - bpf_program__flags; - bpf_program__insn_cnt; - bpf_program__insns; - bpf_program__set_flags; - btf__add_btf; - btf__add_decl_tag; - btf__add_type_tag; - btf__dedup; - btf__dedup_deprecated; - btf__raw_data; - btf__type_cnt; - btf_dump__new; - btf_dump__new_deprecated; - libbpf_major_version; - libbpf_minor_version; - libbpf_version_string; - perf_buffer__new; - perf_buffer__new_deprecated; - perf_buffer__new_raw; - perf_buffer__new_raw_deprecated; -} LIBBPF_0.5.0; - -LIBBPF_0.7.0 { - global: - bpf_btf_load; - bpf_program__log_buf; - bpf_program__log_level; - bpf_program__set_log_buf; - bpf_program__set_log_level; - libbpf_probe_bpf_helper; - libbpf_probe_bpf_map_type; - libbpf_probe_bpf_prog_type; - libbpf_set_memlock_rlim_max; -}; diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index 000e37798f..947d8bd8a7 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -10,7 +10,6 @@ #define __LIBBPF_LIBBPF_COMMON_H #include -#include "libbpf_version.h" #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) @@ -18,46 +17,6 @@ #define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg))) -/* Mark a symbol as deprecated when libbpf version is >= {major}.{minor} */ -#define LIBBPF_DEPRECATED_SINCE(major, minor, msg) \ - __LIBBPF_MARK_DEPRECATED_ ## major ## _ ## minor \ - (LIBBPF_DEPRECATED("libbpf v" # major "." # minor "+: " msg)) - -#define __LIBBPF_CURRENT_VERSION_GEQ(major, minor) \ - (LIBBPF_MAJOR_VERSION > (major) || \ - (LIBBPF_MAJOR_VERSION == (major) && LIBBPF_MINOR_VERSION >= (minor))) - -/* Add checks for other versions below when planning deprecation of API symbols - * with the LIBBPF_DEPRECATED_SINCE macro. - */ -#if __LIBBPF_CURRENT_VERSION_GEQ(0, 6) -#define __LIBBPF_MARK_DEPRECATED_0_6(X) X -#else -#define __LIBBPF_MARK_DEPRECATED_0_6(X) -#endif -#if __LIBBPF_CURRENT_VERSION_GEQ(0, 7) -#define __LIBBPF_MARK_DEPRECATED_0_7(X) X -#else -#define __LIBBPF_MARK_DEPRECATED_0_7(X) -#endif -#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8) -#define __LIBBPF_MARK_DEPRECATED_0_8(X) X -#else -#define __LIBBPF_MARK_DEPRECATED_0_8(X) -#endif - -/* This set of internal macros allows to do "function overloading" based on - * number of arguments provided by used in backwards-compatible way during the - * transition to libbpf 1.0 - * It's ugly but necessary evil that will be cleaned up when we get to 1.0. - * See bpf_prog_load() overload for example. - */ -#define ___libbpf_cat(A, B) A ## B -#define ___libbpf_select(NAME, NUM) ___libbpf_cat(NAME, NUM) -#define ___libbpf_nth(_1, _2, _3, _4, _5, _6, N, ...) N -#define ___libbpf_cnt(...) ___libbpf_nth(__VA_ARGS__, 6, 5, 4, 3, 2, 1) -#define ___libbpf_overload(NAME, ...) ___libbpf_select(NAME, ___libbpf_cnt(__VA_ARGS__))(__VA_ARGS__) - /* Helper macro to declare and initialize libbpf options struct * * This dance with uninitialized declaration, followed by memset to zero, @@ -71,7 +30,7 @@ * including any extra padding, it with memset() and then assigns initial * values provided by users in struct initializer-syntax as varargs. */ -#define LIBBPF_OPTS(TYPE, NAME, ...) \ +#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ struct TYPE NAME = ({ \ memset(&NAME, 0, sizeof(struct TYPE)); \ (struct TYPE) { \ diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 1565679eb4..533b0211f4 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -13,8 +13,6 @@ #include #include #include -#include -#include #include "libbpf_legacy.h" #include "relo_core.h" @@ -54,8 +52,8 @@ #endif /* Older libelf all end up in this expression, for both 32 and 64 bit */ -#ifndef ELF64_ST_VISIBILITY -#define ELF64_ST_VISIBILITY(o) ((o) & 0x03) +#ifndef GELF_ST_VISIBILITY +#define GELF_ST_VISIBILITY(o) ((o) & 0x03) #endif #define BTF_INFO_ENC(kind, kind_flag, vlen) \ @@ -71,10 +69,6 @@ #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) #define BTF_TYPE_FLOAT_ENC(name, sz) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) -#define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \ - BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) -#define BTF_TYPE_TYPE_TAG_ENC(value, type) \ - BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type) #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) @@ -93,40 +87,20 @@ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) #endif -/* Check whether a string `str` has prefix `pfx`, regardless if `pfx` is - * a string literal known at compilation time or char * pointer known only at - * runtime. - */ -#define str_has_pfx(str, pfx) \ - (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) - /* Symbol versioning is different between static and shared library. * Properly versioned symbols are needed for shared library, but * only the symbol of the new version is needed for static library. - * Starting with GNU C 10, use symver attribute instead of .symver assembler - * directive, which works better with GCC LTO builds. */ -#if defined(SHARED) && defined(__GNUC__) && __GNUC__ >= 10 - -#define DEFAULT_VERSION(internal_name, api_name, version) \ - __attribute__((symver(#api_name "@@" #version))) -#define COMPAT_VERSION(internal_name, api_name, version) \ - __attribute__((symver(#api_name "@" #version))) - -#elif defined(SHARED) - -#define COMPAT_VERSION(internal_name, api_name, version) \ +#ifdef SHARED +# define COMPAT_VERSION(internal_name, api_name, version) \ asm(".symver " #internal_name "," #api_name "@" #version); -#define DEFAULT_VERSION(internal_name, api_name, version) \ +# define DEFAULT_VERSION(internal_name, api_name, version) \ asm(".symver " #internal_name "," #api_name "@@" #version); - -#else /* !SHARED */ - -#define COMPAT_VERSION(internal_name, api_name, version) -#define DEFAULT_VERSION(internal_name, api_name, version) \ +#else +# define COMPAT_VERSION(internal_name, api_name, version) +# define DEFAULT_VERSION(internal_name, api_name, version) \ extern typeof(internal_name) api_name \ __attribute__((alias(#internal_name))); - #endif extern void libbpf_print(enum libbpf_print_level level, @@ -169,31 +143,10 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size) return realloc(ptr, total); } -/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst - * is zero-terminated string no matter what (unless sz == 0, in which case - * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs - * in what is returned. Given this is internal helper, it's trivial to extend - * this, when necessary. Use this instead of strncpy inside libbpf source code. - */ -static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz) -{ - size_t i; - - if (sz == 0) - return; - - sz--; - for (i = 0; i < sz && src[i]; i++) - dst[i] = src[i]; - dst[i] = '\0'; -} - -__u32 get_kernel_version(void); - struct btf; struct btf_type; -struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id); +struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id); const char *btf_kind_str(const struct btf_type *t); const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); @@ -218,9 +171,8 @@ enum map_def_parts { MAP_DEF_NUMA_NODE = 0x080, MAP_DEF_PINNING = 0x100, MAP_DEF_INNER_MAP = 0x200, - MAP_DEF_MAP_EXTRA = 0x400, - MAP_DEF_ALL = 0x7ff, /* combination of all above */ + MAP_DEF_ALL = 0x3ff, /* combination of all above */ }; struct btf_map_def { @@ -234,7 +186,6 @@ struct btf_map_def { __u32 map_flags; __u32 numa_node; __u32 pinning; - __u64 map_extra; }; int parse_btf_map_def(const char *map_name, struct btf *btf, @@ -293,52 +244,46 @@ static inline bool libbpf_validate_opts(const char *opts, (opts)->sz - __off); \ }) -enum kern_feature_id { - /* v4.14: kernel support for program & map names. */ - FEAT_PROG_NAME, - /* v5.2: kernel support for global data sections. */ - FEAT_GLOBAL_DATA, - /* BTF support */ - FEAT_BTF, - /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ - FEAT_BTF_FUNC, - /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ - FEAT_BTF_DATASEC, - /* BTF_FUNC_GLOBAL is supported */ - FEAT_BTF_GLOBAL_FUNC, - /* BPF_F_MMAPABLE is supported for arrays */ - FEAT_ARRAY_MMAP, - /* kernel support for expected_attach_type in BPF_PROG_LOAD */ - FEAT_EXP_ATTACH_TYPE, - /* bpf_probe_read_{kernel,user}[_str] helpers */ - FEAT_PROBE_READ_KERN, - /* BPF_PROG_BIND_MAP is supported */ - FEAT_PROG_BIND_MAP, - /* Kernel support for module BTFs */ - FEAT_MODULE_BTF, - /* BTF_KIND_FLOAT support */ - FEAT_BTF_FLOAT, - /* BPF perf link support */ - FEAT_PERF_LINK, - /* BTF_KIND_DECL_TAG support */ - FEAT_BTF_DECL_TAG, - /* BTF_KIND_TYPE_TAG support */ - FEAT_BTF_TYPE_TAG, - /* memcg-based accounting for BPF maps and progs */ - FEAT_MEMCG_ACCOUNT, - __FEAT_CNT, -}; - -int probe_memcg_account(void); -bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); -int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); +struct bpf_prog_load_params { + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + const char *name; + const struct bpf_insn *insns; + size_t insn_cnt; + const char *license; + __u32 kern_version; + __u32 attach_prog_fd; + __u32 attach_btf_obj_fd; + __u32 attach_btf_id; + __u32 prog_ifindex; + __u32 prog_btf_fd; + __u32 prog_flags; + + __u32 func_info_rec_size; + const void *func_info; + __u32 func_info_cnt; + + __u32 line_info_rec_size; + const void *line_info; + __u32 line_info_cnt; + + __u32 log_level; + char *log_buf; + size_t log_buf_sz; +}; + +int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); + +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size); +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, const char **prefix, int *kind); @@ -441,8 +386,6 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx); int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); -__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, - __u32 kind); extern enum libbpf_strict_mode libbpf_mode; @@ -504,26 +447,4 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } -/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 - * Takes ownership of the fd passed in, and closes it if calling - * fcntl(fd, F_DUPFD_CLOEXEC, 3). - */ -static inline int ensure_good_fd(int fd) -{ - int old_fd = fd, saved_errno; - - if (fd < 0) - return fd; - if (fd < 3) { - fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); - saved_errno = errno; - close(old_fd); - if (fd < 0) { - pr_warn("failed to dup FD %d to FD > 2: %d\n", old_fd, -saved_errno); - errno = saved_errno; - } - } - return fd; -} - #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 79131f761a..df0d03dcff 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -45,41 +45,12 @@ enum libbpf_strict_mode { * (positive) error code. */ LIBBPF_STRICT_DIRECT_ERRS = 0x02, - /* - * Enforce strict BPF program section (SEC()) names. - * E.g., while prefiously SEC("xdp_whatever") or SEC("perf_event_blah") were - * allowed, with LIBBPF_STRICT_SEC_PREFIX this will become - * unrecognized by libbpf and would have to be just SEC("xdp") and - * SEC("xdp") and SEC("perf_event"). - * - * Note, in this mode the program pin path will be based on the - * function name instead of section name. - */ - LIBBPF_STRICT_SEC_NAME = 0x04, - /* - * Disable the global 'bpf_objects_list'. Maintaining this list adds - * a race condition to bpf_object__open() and bpf_object__close(). - * Clients can maintain it on their own if it is valuable for them. - */ - LIBBPF_STRICT_NO_OBJECT_LIST = 0x08, - /* - * Automatically bump RLIMIT_MEMLOCK using setrlimit() before the - * first BPF program or map creation operation. This is done only if - * kernel is too old to support memcg-based memory accounting for BPF - * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY, - * but it can be overriden with libbpf_set_memlock_rlim_max() API. - * Note that libbpf_set_memlock_rlim_max() needs to be called before - * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load() - * operation. - */ - LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10, __LIBBPF_STRICT_LAST, }; LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); -#define DECLARE_LIBBPF_OPTS LIBBPF_OPTS #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 97b06cede5..cd8c703dde 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -33,7 +33,7 @@ static int get_vendor_id(int ifindex) snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname); - fd = open(path, O_RDONLY | O_CLOEXEC); + fd = open(path, O_RDONLY); if (fd < 0) return -1; @@ -48,65 +48,41 @@ static int get_vendor_id(int ifindex) return strtol(buf, NULL, 0); } -static int probe_prog_load(enum bpf_prog_type prog_type, - const struct bpf_insn *insns, size_t insns_cnt, - char *log_buf, size_t log_buf_sz, - __u32 ifindex) +static int get_kernel_version(void) { - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = log_buf, - .log_size = log_buf_sz, - .log_level = log_buf ? 1 : 0, - .prog_ifindex = ifindex, - ); - int fd, err, exp_err = 0; - const char *exp_msg = NULL; - char buf[4096]; + int version, subversion, patchlevel; + struct utsname utsn; + + /* Return 0 on failure, and attempt to probe with empty kversion */ + if (uname(&utsn)) + return 0; + + if (sscanf(utsn.release, "%d.%d.%d", + &version, &subversion, &patchlevel) != 3) + return 0; + + return (version << 16) + (subversion << 8) + patchlevel; +} + +static void +probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, + size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex) +{ + struct bpf_load_program_attr xattr = {}; + int fd; switch (prog_type) { case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - opts.expected_attach_type = BPF_CGROUP_INET4_CONNECT; + xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; break; case BPF_PROG_TYPE_CGROUP_SOCKOPT: - opts.expected_attach_type = BPF_CGROUP_GETSOCKOPT; + xattr.expected_attach_type = BPF_CGROUP_GETSOCKOPT; break; case BPF_PROG_TYPE_SK_LOOKUP: - opts.expected_attach_type = BPF_SK_LOOKUP; + xattr.expected_attach_type = BPF_SK_LOOKUP; break; case BPF_PROG_TYPE_KPROBE: - opts.kern_version = get_kernel_version(); - break; - case BPF_PROG_TYPE_LIRC_MODE2: - opts.expected_attach_type = BPF_LIRC_MODE2; - break; - case BPF_PROG_TYPE_TRACING: - case BPF_PROG_TYPE_LSM: - opts.log_buf = buf; - opts.log_size = sizeof(buf); - opts.log_level = 1; - if (prog_type == BPF_PROG_TYPE_TRACING) - opts.expected_attach_type = BPF_TRACE_FENTRY; - else - opts.expected_attach_type = BPF_MODIFY_RETURN; - opts.attach_btf_id = 1; - - exp_err = -EINVAL; - exp_msg = "attach_btf_id 1 is not a function"; - break; - case BPF_PROG_TYPE_EXT: - opts.log_buf = buf; - opts.log_size = sizeof(buf); - opts.log_level = 1; - opts.attach_btf_id = 1; - - exp_err = -EINVAL; - exp_msg = "Cannot replace kernel functions"; - break; - case BPF_PROG_TYPE_SYSCALL: - opts.prog_flags = BPF_F_SLEEPABLE; - break; - case BPF_PROG_TYPE_STRUCT_OPS: - exp_err = -524; /* -ENOTSUPP */ + xattr.kern_version = get_kernel_version(); break; case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_SOCKET_FILTER: @@ -127,42 +103,27 @@ static int probe_prog_load(enum bpf_prog_type prog_type, case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_LWT_SEG6LOCAL: + case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_SYSCTL: - break; + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_STRUCT_OPS: + case BPF_PROG_TYPE_EXT: + case BPF_PROG_TYPE_LSM: default: - return -EOPNOTSUPP; + break; } - fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts); - err = -errno; + xattr.prog_type = prog_type; + xattr.insns = insns; + xattr.insns_cnt = insns_cnt; + xattr.license = "GPL"; + xattr.prog_ifindex = ifindex; + + fd = bpf_load_program_xattr(&xattr, buf, buf_len); if (fd >= 0) close(fd); - if (exp_err) { - if (fd >= 0 || err != exp_err) - return 0; - if (exp_msg && !strstr(buf, exp_msg)) - return 0; - return 1; - } - return fd >= 0 ? 1 : 0; -} - -int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN() - }; - const size_t insn_cnt = ARRAY_SIZE(insns); - int ret; - - if (opts) - return libbpf_err(-EINVAL); - - ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0); - return libbpf_err(ret); } bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) @@ -172,16 +133,12 @@ bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) BPF_EXIT_INSN() }; - /* prefer libbpf_probe_bpf_prog_type() unless offload is requested */ - if (ifindex == 0) - return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1; - if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS) /* nfp returns -EINVAL on exit(0) with TC offload */ insns[0].imm = 2; errno = 0; - probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); + probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); return errno != EINVAL && errno != EOPNOTSUPP; } @@ -209,7 +166,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); + btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false); free(raw_btf); return btf_fd; @@ -242,18 +199,17 @@ static int load_local_storage_btf(void) strs, sizeof(strs)); } -static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) +bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) { - LIBBPF_OPTS(bpf_map_create_opts, opts); - int key_size, value_size, max_entries; + int key_size, value_size, max_entries, map_flags; __u32 btf_key_type_id = 0, btf_value_type_id = 0; - int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err; - - opts.map_ifindex = ifindex; + struct bpf_create_map_attr attr = {}; + int fd = -1, btf_fd = -1, fd_inner; key_size = sizeof(__u32); value_size = sizeof(__u32); max_entries = 1; + map_flags = 0; switch (map_type) { case BPF_MAP_TYPE_STACK_TRACE: @@ -262,7 +218,7 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_LPM_TRIE: key_size = sizeof(__u64); value_size = sizeof(__u64); - opts.map_flags = BPF_F_NO_PREALLOC; + map_flags = BPF_F_NO_PREALLOC; break; case BPF_MAP_TYPE_CGROUP_STORAGE: case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: @@ -281,25 +237,17 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) btf_value_type_id = 3; value_size = 8; max_entries = 0; - opts.map_flags = BPF_F_NO_PREALLOC; + map_flags = BPF_F_NO_PREALLOC; btf_fd = load_local_storage_btf(); if (btf_fd < 0) - return btf_fd; + return false; break; case BPF_MAP_TYPE_RINGBUF: key_size = 0; value_size = 0; max_entries = 4096; break; - case BPF_MAP_TYPE_STRUCT_OPS: - /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ - opts.btf_vmlinux_value_type_id = 1; - exp_err = -524; /* -ENOTSUPP */ - break; - case BPF_MAP_TYPE_BLOOM_FILTER: - key_size = 0; - max_entries = 1; - break; + case BPF_MAP_TYPE_UNSPEC: case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PROG_ARRAY: @@ -318,10 +266,9 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_XSKMAP: case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: - break; - case BPF_MAP_TYPE_UNSPEC: + case BPF_MAP_TYPE_STRUCT_OPS: default: - return -EOPNOTSUPP; + break; } if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || @@ -330,102 +277,37 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) * map-in-map for offload */ if (ifindex) - goto cleanup; + return false; - fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, - sizeof(__u32), sizeof(__u32), 1, NULL); + fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH, + sizeof(__u32), sizeof(__u32), 1, 0); if (fd_inner < 0) - goto cleanup; + return false; + fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32), + fd_inner, 1, 0); + close(fd_inner); + } else { + /* Note: No other restriction on map type probes for offload */ + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; + attr.map_flags = map_flags; + attr.map_ifindex = ifindex; + if (btf_fd >= 0) { + attr.btf_fd = btf_fd; + attr.btf_key_type_id = btf_key_type_id; + attr.btf_value_type_id = btf_value_type_id; + } - opts.inner_map_fd = fd_inner; + fd = bpf_create_map_xattr(&attr); } - - if (btf_fd >= 0) { - opts.btf_fd = btf_fd; - opts.btf_key_type_id = btf_key_type_id; - opts.btf_value_type_id = btf_value_type_id; - } - - fd = bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); - err = -errno; - -cleanup: if (fd >= 0) close(fd); - if (fd_inner >= 0) - close(fd_inner); if (btf_fd >= 0) close(btf_fd); - if (exp_err) - return fd < 0 && err == exp_err ? 1 : 0; - else - return fd >= 0 ? 1 : 0; -} - -int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts) -{ - int ret; - - if (opts) - return libbpf_err(-EINVAL); - - ret = probe_map_create(map_type, 0); - return libbpf_err(ret); -} - -bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) -{ - return probe_map_create(map_type, ifindex) == 1; -} - -int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id, - const void *opts) -{ - struct bpf_insn insns[] = { - BPF_EMIT_CALL((__u32)helper_id), - BPF_EXIT_INSN(), - }; - const size_t insn_cnt = ARRAY_SIZE(insns); - char buf[4096]; - int ret; - - if (opts) - return libbpf_err(-EINVAL); - - /* we can't successfully load all prog types to check for BPF helper - * support, so bail out with -EOPNOTSUPP error - */ - switch (prog_type) { - case BPF_PROG_TYPE_TRACING: - case BPF_PROG_TYPE_EXT: - case BPF_PROG_TYPE_LSM: - case BPF_PROG_TYPE_STRUCT_OPS: - return -EOPNOTSUPP; - default: - break; - } - - buf[0] = '\0'; - ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0); - if (ret < 0) - return libbpf_err(ret); - - /* If BPF verifier doesn't recognize BPF helper ID (enum bpf_func_id) - * at all, it will emit something like "invalid func unknown#181". - * If BPF verifier recognizes BPF helper but it's not supported for - * given BPF program type, it will emit "unknown func bpf_sys_bpf#166". - * In both cases, provided combination of BPF program type and BPF - * helper is not supported by the kernel. - * In all other cases, probe_prog_load() above will either succeed (e.g., - * because BPF helper happens to accept no input arguments or it - * accepts one input argument and initial PTR_TO_CTX is fine for - * that), or we'll get some more specific BPF verifier error about - * some unsatisfied conditions. - */ - if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func "))) - return 0; - return 1; /* assume supported */ + return fd >= 0; } bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, @@ -438,7 +320,8 @@ bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, char buf[4096] = {}; bool res; - probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex); + probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), + ifindex); res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); if (ifindex) { @@ -470,8 +353,8 @@ bool bpf_probe_large_insn_limit(__u32 ifindex) insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); errno = 0; - probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, - ifindex); + probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, + ifindex); return errno != E2BIG && errno != EINVAL; } diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 9aa016fb55..6b2f59ddb6 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "libbpf.h" #include "btf.h" @@ -302,7 +303,7 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) if (!linker->filename) return -ENOMEM; - linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); + linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644); if (linker->fd < 0) { err = -errno; pr_warn("failed to create '%s': %d\n", file, err); @@ -324,12 +325,12 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->elf_hdr->e_machine = EM_BPF; linker->elf_hdr->e_type = ET_REL; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB; #else -#error "Unknown __BYTE_ORDER__" +#error "Unknown __BYTE_ORDER" #endif /* STRTAB */ @@ -539,12 +540,12 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts, struct src_obj *obj) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN const int host_endianness = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN const int host_endianness = ELFDATA2MSB; #else -#error "Unknown __BYTE_ORDER__" +#error "Unknown __BYTE_ORDER" #endif int err = 0; Elf_Scn *scn; @@ -557,7 +558,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->filename = filename; - obj->fd = open(filename, O_RDONLY | O_CLOEXEC); + obj->fd = open(filename, O_RDONLY); if (obj->fd < 0) { err = -errno; pr_warn("failed to open file '%s': %d\n", filename, err); @@ -921,7 +922,7 @@ static int check_btf_type_id(__u32 *type_id, void *ctx) { struct btf *btf = ctx; - if (*type_id >= btf__type_cnt(btf)) + if (*type_id > btf__get_nr_types(btf)) return -EINVAL; return 0; @@ -948,8 +949,8 @@ static int linker_sanity_check_btf(struct src_obj *obj) if (!obj->btf) return 0; - n = btf__type_cnt(obj->btf); - for (i = 1; i < n; i++) { + n = btf__get_nr_types(obj->btf); + for (i = 1; i <= n; i++) { t = btf_type_by_id(obj->btf, i); err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf); @@ -1659,8 +1660,8 @@ static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sy return -EINVAL; } - n = btf__type_cnt(obj->btf); - for (i = 1; i < n; i++) { + n = btf__get_nr_types(obj->btf); + for (i = 1; i <= n; i++) { t = btf__type_by_id(obj->btf, i); /* some global and extern FUNCs and VARs might not be associated with any @@ -2134,8 +2135,8 @@ static int linker_fixup_btf(struct src_obj *obj) if (!obj->btf) return 0; - n = btf__type_cnt(obj->btf); - for (i = 1; i < n; i++) { + n = btf__get_nr_types(obj->btf); + for (i = 1; i <= n; i++) { struct btf_var_secinfo *vi; struct btf_type *t; @@ -2238,14 +2239,14 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) if (!obj->btf) return 0; - start_id = btf__type_cnt(linker->btf); - n = btf__type_cnt(obj->btf); + start_id = btf__get_nr_types(linker->btf) + 1; + n = btf__get_nr_types(obj->btf); obj->btf_type_map = calloc(n + 1, sizeof(int)); if (!obj->btf_type_map) return -ENOMEM; - for (i = 1; i < n; i++) { + for (i = 1; i <= n; i++) { struct glob_sym *glob_sym = NULL; t = btf__type_by_id(obj->btf, i); @@ -2300,8 +2301,8 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) } /* remap all the types except DATASECs */ - n = btf__type_cnt(linker->btf); - for (i = start_id; i < n; i++) { + n = btf__get_nr_types(linker->btf); + for (i = start_id; i <= n; i++) { struct btf_type *dst_t = btf_type_by_id(linker->btf, i); if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map)) @@ -2654,14 +2655,13 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name, static int finalize_btf(struct bpf_linker *linker) { - LIBBPF_OPTS(btf_dedup_opts, opts); struct btf *btf = linker->btf; const void *raw_data; int i, j, id, err; __u32 raw_sz; /* bail out if no BTF data was produced */ - if (btf__type_cnt(linker->btf) == 1) + if (btf__get_nr_types(linker->btf) == 0) return 0; for (i = 1; i < linker->sec_cnt; i++) { @@ -2691,15 +2691,14 @@ static int finalize_btf(struct bpf_linker *linker) return err; } - opts.btf_ext = linker->btf_ext; - err = btf__dedup(linker->btf, &opts); + err = btf__dedup(linker->btf, linker->btf_ext, NULL); if (err) { pr_warn("BTF dedup failed: %d\n", err); return err; } /* Emit .BTF section */ - raw_data = btf__raw_data(linker->btf, &raw_sz); + raw_data = btf__get_raw_data(linker->btf, &raw_sz); if (!raw_data) return -ENOMEM; diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 910865e29e..4016ed492d 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -1,60 +1,6 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (c) 2019 Facebook */ -#ifdef __KERNEL__ -#include -#include -#include -#include -#include "relo_core.h" - -static const char *btf_kind_str(const struct btf_type *t) -{ - return btf_type_str(t); -} - -static bool is_ldimm64_insn(struct bpf_insn *insn) -{ - return insn->code == (BPF_LD | BPF_IMM | BPF_DW); -} - -static const struct btf_type * -skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id) -{ - return btf_type_skip_modifiers(btf, id, res_id); -} - -static const char *btf__name_by_offset(const struct btf *btf, u32 offset) -{ - return btf_name_by_offset(btf, offset); -} - -static s64 btf__resolve_size(const struct btf *btf, u32 type_id) -{ - const struct btf_type *t; - int size; - - t = btf_type_by_id(btf, type_id); - t = btf_resolve_size(btf, t, &size); - if (IS_ERR(t)) - return PTR_ERR(t); - return size; -} - -enum libbpf_print_level { - LIBBPF_WARN, - LIBBPF_INFO, - LIBBPF_DEBUG, -}; - -#undef pr_warn -#undef pr_info -#undef pr_debug -#define pr_warn(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) -#define pr_info(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) -#define pr_debug(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) -#define libbpf_print(level, fmt, ...) bpf_log((void *)prog_name, fmt, ##__VA_ARGS__) -#else #include #include #include @@ -66,7 +12,33 @@ enum libbpf_print_level { #include "btf.h" #include "str_error.h" #include "libbpf_internal.h" -#endif + +#define BPF_CORE_SPEC_MAX_LEN 64 + +/* represents BPF CO-RE field or array element accessor */ +struct bpf_core_accessor { + __u32 type_id; /* struct/union type or array element type */ + __u32 idx; /* field index or array index */ + const char *name; /* field name or NULL for array accessor */ +}; + +struct bpf_core_spec { + const struct btf *btf; + /* high-level spec: named fields and array indices only */ + struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; + /* original unresolved (no skip_mods_or_typedefs) root type ID */ + __u32 root_type_id; + /* CO-RE relocation kind */ + enum bpf_core_relo_kind relo_kind; + /* high-level spec length */ + int len; + /* raw, low-level spec: 1-to-1 with accessor spec string */ + int raw_spec[BPF_CORE_SPEC_MAX_LEN]; + /* raw spec length */ + int raw_len; + /* field bit offset represented by spec */ + __u32 bit_offset; +}; static bool is_flex_arr(const struct btf *btf, const struct bpf_core_accessor *acc, @@ -79,25 +51,25 @@ static bool is_flex_arr(const struct btf *btf, return false; /* has to be the last member of enclosing struct */ - t = btf_type_by_id(btf, acc->type_id); + t = btf__type_by_id(btf, acc->type_id); return acc->idx == btf_vlen(t) - 1; } static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_CORE_FIELD_BYTE_OFFSET: return "byte_off"; - case BPF_CORE_FIELD_BYTE_SIZE: return "byte_sz"; - case BPF_CORE_FIELD_EXISTS: return "field_exists"; - case BPF_CORE_FIELD_SIGNED: return "signed"; - case BPF_CORE_FIELD_LSHIFT_U64: return "lshift_u64"; - case BPF_CORE_FIELD_RSHIFT_U64: return "rshift_u64"; - case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id"; - case BPF_CORE_TYPE_ID_TARGET: return "target_type_id"; - case BPF_CORE_TYPE_EXISTS: return "type_exists"; - case BPF_CORE_TYPE_SIZE: return "type_size"; - case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists"; - case BPF_CORE_ENUMVAL_VALUE: return "enumval_value"; + case BPF_FIELD_BYTE_OFFSET: return "byte_off"; + case BPF_FIELD_BYTE_SIZE: return "byte_sz"; + case BPF_FIELD_EXISTS: return "field_exists"; + case BPF_FIELD_SIGNED: return "signed"; + case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; + case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; + case BPF_TYPE_ID_LOCAL: return "local_type_id"; + case BPF_TYPE_ID_TARGET: return "target_type_id"; + case BPF_TYPE_EXISTS: return "type_exists"; + case BPF_TYPE_SIZE: return "type_size"; + case BPF_ENUMVAL_EXISTS: return "enumval_exists"; + case BPF_ENUMVAL_VALUE: return "enumval_value"; default: return "unknown"; } } @@ -105,12 +77,12 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_CORE_FIELD_BYTE_OFFSET: - case BPF_CORE_FIELD_BYTE_SIZE: - case BPF_CORE_FIELD_EXISTS: - case BPF_CORE_FIELD_SIGNED: - case BPF_CORE_FIELD_LSHIFT_U64: - case BPF_CORE_FIELD_RSHIFT_U64: + case BPF_FIELD_BYTE_OFFSET: + case BPF_FIELD_BYTE_SIZE: + case BPF_FIELD_EXISTS: + case BPF_FIELD_SIGNED: + case BPF_FIELD_LSHIFT_U64: + case BPF_FIELD_RSHIFT_U64: return true; default: return false; @@ -120,10 +92,10 @@ static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_CORE_TYPE_ID_LOCAL: - case BPF_CORE_TYPE_ID_TARGET: - case BPF_CORE_TYPE_EXISTS: - case BPF_CORE_TYPE_SIZE: + case BPF_TYPE_ID_LOCAL: + case BPF_TYPE_ID_TARGET: + case BPF_TYPE_EXISTS: + case BPF_TYPE_SIZE: return true; default: return false; @@ -133,8 +105,8 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_CORE_ENUMVAL_EXISTS: - case BPF_CORE_ENUMVAL_VALUE: + case BPF_ENUMVAL_EXISTS: + case BPF_ENUMVAL_VALUE: return true; default: return false; @@ -178,7 +150,7 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access * string to specify enumerator's value index that need to be relocated. */ -static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, +static int bpf_core_parse_spec(const struct btf *btf, __u32 type_id, const char *spec_str, enum bpf_core_relo_kind relo_kind, @@ -300,8 +272,8 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, return sz; spec->bit_offset += access_idx * sz * 8; } else { - pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", - prog_name, type_id, spec_str, i, id, btf_kind_str(t)); + pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", + type_id, spec_str, i, id, btf_kind_str(t)); return -EINVAL; } } @@ -374,6 +346,8 @@ static int bpf_core_fields_are_compat(const struct btf *local_btf, targ_id = btf_array(targ_type)->type; goto recur; default: + pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", + btf_kind(local_type), local_id, targ_id); return 0; } } @@ -414,7 +388,7 @@ static int bpf_core_match_member(const struct btf *local_btf, return 0; local_id = local_acc->type_id; - local_type = btf_type_by_id(local_btf, local_id); + local_type = btf__type_by_id(local_btf, local_id); local_member = btf_members(local_type) + local_acc->idx; local_name = btf__name_by_offset(local_btf, local_member->name_off); @@ -597,7 +571,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, *field_sz = 0; - if (relo->kind == BPF_CORE_FIELD_EXISTS) { + if (relo->kind == BPF_FIELD_EXISTS) { *val = spec ? 1 : 0; return 0; } @@ -606,11 +580,11 @@ static int bpf_core_calc_field_relo(const char *prog_name, return -EUCLEAN; /* request instruction poisoning */ acc = &spec->spec[spec->len - 1]; - t = btf_type_by_id(spec->btf, acc->type_id); + t = btf__type_by_id(spec->btf, acc->type_id); /* a[n] accessor needs special handling */ if (!acc->name) { - if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) { + if (relo->kind == BPF_FIELD_BYTE_OFFSET) { *val = spec->bit_offset / 8; /* remember field size for load/store mem size */ sz = btf__resolve_size(spec->btf, acc->type_id); @@ -618,7 +592,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, return -EINVAL; *field_sz = sz; *type_id = acc->type_id; - } else if (relo->kind == BPF_CORE_FIELD_BYTE_SIZE) { + } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { sz = btf__resolve_size(spec->btf, acc->type_id); if (sz < 0) return -EINVAL; @@ -670,36 +644,36 @@ static int bpf_core_calc_field_relo(const char *prog_name, *validate = !bitfield; switch (relo->kind) { - case BPF_CORE_FIELD_BYTE_OFFSET: + case BPF_FIELD_BYTE_OFFSET: *val = byte_off; if (!bitfield) { *field_sz = byte_sz; *type_id = field_type_id; } break; - case BPF_CORE_FIELD_BYTE_SIZE: + case BPF_FIELD_BYTE_SIZE: *val = byte_sz; break; - case BPF_CORE_FIELD_SIGNED: + case BPF_FIELD_SIGNED: /* enums will be assumed unsigned */ *val = btf_is_enum(mt) || (btf_int_encoding(mt) & BTF_INT_SIGNED); if (validate) *validate = true; /* signedness is never ambiguous */ break; - case BPF_CORE_FIELD_LSHIFT_U64: -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + case BPF_FIELD_LSHIFT_U64: +#if __BYTE_ORDER == __LITTLE_ENDIAN *val = 64 - (bit_off + bit_sz - byte_off * 8); #else *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); #endif break; - case BPF_CORE_FIELD_RSHIFT_U64: + case BPF_FIELD_RSHIFT_U64: *val = 64 - bit_sz; if (validate) *validate = true; /* right shift is never ambiguous */ break; - case BPF_CORE_FIELD_EXISTS: + case BPF_FIELD_EXISTS: default: return -EOPNOTSUPP; } @@ -709,14 +683,10 @@ static int bpf_core_calc_field_relo(const char *prog_name, static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val, bool *validate) + __u32 *val) { __s64 sz; - /* by default, always check expected value in bpf_insn */ - if (validate) - *validate = true; - /* type-based relos return zero when target type is not found */ if (!spec) { *val = 0; @@ -724,25 +694,20 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, } switch (relo->kind) { - case BPF_CORE_TYPE_ID_TARGET: + case BPF_TYPE_ID_TARGET: *val = spec->root_type_id; - /* type ID, embedded in bpf_insn, might change during linking, - * so enforcing it is pointless - */ - if (validate) - *validate = false; break; - case BPF_CORE_TYPE_EXISTS: + case BPF_TYPE_EXISTS: *val = 1; break; - case BPF_CORE_TYPE_SIZE: + case BPF_TYPE_SIZE: sz = btf__resolve_size(spec->btf, spec->root_type_id); if (sz < 0) return -EINVAL; *val = sz; break; - case BPF_CORE_TYPE_ID_LOCAL: - /* BPF_CORE_TYPE_ID_LOCAL is handled specially and shouldn't get here */ + case BPF_TYPE_ID_LOCAL: + /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ default: return -EOPNOTSUPP; } @@ -758,13 +723,13 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, const struct btf_enum *e; switch (relo->kind) { - case BPF_CORE_ENUMVAL_EXISTS: + case BPF_ENUMVAL_EXISTS: *val = spec ? 1 : 0; break; - case BPF_CORE_ENUMVAL_VALUE: + case BPF_ENUMVAL_VALUE: if (!spec) return -EUCLEAN; /* request instruction poisoning */ - t = btf_type_by_id(spec->btf, spec->spec[0].type_id); + t = btf__type_by_id(spec->btf, spec->spec[0].type_id); e = btf_enum(t) + spec->spec[0].idx; *val = e->val; break; @@ -840,8 +805,8 @@ static int bpf_core_calc_relo(const char *prog_name, if (res->orig_sz != res->new_sz) { const struct btf_type *orig_t, *new_t; - orig_t = btf_type_by_id(local_spec->btf, res->orig_type_id); - new_t = btf_type_by_id(targ_spec->btf, res->new_type_id); + orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); + new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); /* There are two use cases in which it's safe to * adjust load/store's mem size: @@ -870,8 +835,8 @@ static int bpf_core_calc_relo(const char *prog_name, res->fail_memsz_adjust = true; } } else if (core_relo_is_type_based(relo->kind)) { - err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val, &res->validate); - err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val, NULL); + err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); + err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); } else if (core_relo_is_enumval_based(relo->kind)) { err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); @@ -1080,7 +1045,7 @@ static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, * [] () + => @, * where is a C-syntax view of recorded field access, e.g.: x.a[3].b */ -static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec) +static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) { const struct btf_type *t; const struct btf_enum *e; @@ -1089,7 +1054,7 @@ static void bpf_core_dump_spec(const char *prog_name, int level, const struct bp int i; type_id = spec->root_type_id; - t = btf_type_by_id(spec->btf, type_id); + t = btf__type_by_id(spec->btf, type_id); s = btf__name_by_offset(spec->btf, t->name_off); libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "" : s); @@ -1182,12 +1147,9 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, - struct bpf_core_cand_list *cands, - struct bpf_core_spec *specs_scratch) + struct bpf_core_cand_list *cands) { - struct bpf_core_spec *local_spec = &specs_scratch[0]; - struct bpf_core_spec *cand_spec = &specs_scratch[1]; - struct bpf_core_spec *targ_spec = &specs_scratch[2]; + struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; struct bpf_core_relo_res cand_res, targ_res; const struct btf_type *local_type; const char *local_name; @@ -1196,7 +1158,10 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, int i, j, err; local_id = relo->type_id; - local_type = btf_type_by_id(local_btf, local_id); + local_type = btf__type_by_id(local_btf, local_id); + if (!local_type) + return -EINVAL; + local_name = btf__name_by_offset(local_btf, local_type->name_off); if (!local_name) return -EINVAL; @@ -1205,8 +1170,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, if (str_is_empty(spec_str)) return -EINVAL; - err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, - relo->kind, local_spec); + err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); if (err) { pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", prog_name, relo_idx, local_id, btf_kind_str(local_type), @@ -1217,17 +1181,15 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec); + bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); libbpf_print(LIBBPF_DEBUG, "\n"); /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ - if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { - /* bpf_insn's imm value could get out of sync during linking */ - memset(&targ_res, 0, sizeof(targ_res)); - targ_res.validate = false; + if (relo->kind == BPF_TYPE_ID_LOCAL) { + targ_res.validate = true; targ_res.poison = false; - targ_res.orig_val = local_spec->root_type_id; - targ_res.new_val = local_spec->root_type_id; + targ_res.orig_val = local_spec.root_type_id; + targ_res.new_val = local_spec.root_type_id; goto patch_insn; } @@ -1238,39 +1200,40 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, return -EOPNOTSUPP; } + for (i = 0, j = 0; i < cands->len; i++) { - err = bpf_core_spec_match(local_spec, cands->cands[i].btf, - cands->cands[i].id, cand_spec); + err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, + cands->cands[i].id, &cand_spec); if (err < 0) { pr_warn("prog '%s': relo #%d: error matching candidate #%d ", prog_name, relo_idx, i); - bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec); + bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); libbpf_print(LIBBPF_WARN, ": %d\n", err); return err; } pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, relo_idx, err == 0 ? "non-matching" : "matching", i); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec); + bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); libbpf_print(LIBBPF_DEBUG, "\n"); if (err == 0) continue; - err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, cand_spec, &cand_res); + err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res); if (err) return err; if (j == 0) { targ_res = cand_res; - *targ_spec = *cand_spec; - } else if (cand_spec->bit_offset != targ_spec->bit_offset) { + targ_spec = cand_spec; + } else if (cand_spec.bit_offset != targ_spec.bit_offset) { /* if there are many field relo candidates, they * should all resolve to the same bit offset */ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", - prog_name, relo_idx, cand_spec->bit_offset, - targ_spec->bit_offset); + prog_name, relo_idx, cand_spec.bit_offset, + targ_spec.bit_offset); return -EINVAL; } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { /* all candidates should result in the same relocation @@ -1288,7 +1251,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, } /* - * For BPF_CORE_FIELD_EXISTS relo or when used BPF program has field + * For BPF_FIELD_EXISTS relo or when used BPF program has field * existence checks or kernel version/config checks, it's expected * that we might not find any candidates. In this case, if field * wasn't found in any candidate, the list of candidates shouldn't @@ -1314,7 +1277,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, prog_name, relo_idx); /* calculate single target relo result explicitly */ - err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, &targ_res); + err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res); if (err) return err; } diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 17799819ad..3b9f8f1834 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -4,10 +4,81 @@ #ifndef __RELO_CORE_H #define __RELO_CORE_H -#include +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. + */ +enum bpf_core_relo_kind { + BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ + BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ + BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ + BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ + BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ + BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ + BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ + BPF_TYPE_SIZE = 9, /* type size in bytes */ + BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ + BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ +}; + +/* The minimum bpf_core_relo checked by the loader + * + * CO-RE relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + * its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + * type or field; + * - access_str_off - offset into corresponding .BTF string section. String + * interpretation depends on specific relocation kind: + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; + * + * Example to provide a better feel. + * + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample *s = ...; + * int x = &s->a; // encoded as "0:0" (a is field #0) + * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + * __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_core_relo { + __u32 insn_off; + __u32 type_id; + __u32 access_str_off; + enum bpf_core_relo_kind kind; +}; struct bpf_core_cand { const struct btf *btf; + const struct btf_type *t; + const char *name; __u32 id; }; @@ -17,39 +88,11 @@ struct bpf_core_cand_list { int len; }; -#define BPF_CORE_SPEC_MAX_LEN 64 - -/* represents BPF CO-RE field or array element accessor */ -struct bpf_core_accessor { - __u32 type_id; /* struct/union type or array element type */ - __u32 idx; /* field index or array index */ - const char *name; /* field name or NULL for array accessor */ -}; - -struct bpf_core_spec { - const struct btf *btf; - /* high-level spec: named fields and array indices only */ - struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; - /* original unresolved (no skip_mods_or_typedefs) root type ID */ - __u32 root_type_id; - /* CO-RE relocation kind */ - enum bpf_core_relo_kind relo_kind; - /* high-level spec length */ - int len; - /* raw, low-level spec: 1-to-1 with accessor spec string */ - int raw_spec[BPF_CORE_SPEC_MAX_LEN]; - /* raw spec length */ - int raw_len; - /* field bit offset represented by spec */ - __u32 bit_offset; -}; - int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, int insn_idx, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, - struct bpf_core_cand_list *cands, - struct bpf_core_spec *specs_scratch); + struct bpf_core_cand_list *cands); int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id); diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 0b84d8e6b7..9cf66702fa 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -7,16 +7,6 @@ #include #include -#ifndef __NR_bpf -# if defined(__mips__) && defined(_ABIO32) -# define __NR_bpf 4355 -# elif defined(__mips__) && defined(_ABIN32) -# define __NR_bpf 6319 -# elif defined(__mips__) && defined(_ABI64) -# define __NR_bpf 5315 -# endif -#endif - /* This file is a base header for auto-generated *.lskel.h files. * Its contents will change and may become part of auto-generation in the future. * @@ -75,7 +65,8 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) int map_fd = -1, prog_fd = -1, key = 0, err; union bpf_attr attr; - map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, NULL); + map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, + opts->data_sz, 1, 0); if (map_fd < 0) { opts->errstr = "failed to create loader map"; err = -errno; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index edafe56664..e9b619aa0c 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -35,11 +35,6 @@ #include "libbpf_internal.h" #include "xsk.h" -/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal - * uses of deprecated APIs - */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - #ifndef SOL_XDP #define SOL_XDP 283 #endif @@ -286,7 +281,6 @@ static int xsk_create_umem_rings(struct xsk_umem *umem, int fd, return err; } -DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, @@ -305,7 +299,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, if (!umem) return -ENOMEM; - umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); + umem->fd = socket(AF_XDP, SOCK_RAW, 0); if (umem->fd < 0) { err = -errno; goto out_umem_alloc; @@ -351,7 +345,6 @@ struct xsk_umem_config_v1 { __u32 frame_headroom; }; -COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, @@ -365,10 +358,14 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, &config); } +COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) +DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) static enum xsk_prog get_xsk_prog(void) { enum xsk_prog detected = XSK_PROG_FALLBACK; + struct bpf_load_program_attr prog_attr; + struct bpf_create_map_attr map_attr; __u32 size_out, retval, duration; char data_in = 0, data_out; struct bpf_insn insns[] = { @@ -378,15 +375,27 @@ static enum xsk_prog get_xsk_prog(void) BPF_EMIT_CALL(BPF_FUNC_redirect_map), BPF_EXIT_INSN(), }; - int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); + int prog_fd, map_fd, ret; - map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_XSKMAP; + map_attr.key_size = sizeof(int); + map_attr.value_size = sizeof(int); + map_attr.max_entries = 1; + + map_fd = bpf_create_map_xattr(&map_attr); if (map_fd < 0) return detected; insns[0].imm = map_fd; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); + memset(&prog_attr, 0, sizeof(prog_attr)); + prog_attr.prog_type = BPF_PROG_TYPE_XDP; + prog_attr.insns = insns; + prog_attr.insns_cnt = ARRAY_SIZE(insns); + prog_attr.license = "GPL"; + + prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); if (prog_fd < 0) { close(map_fd); return detected; @@ -486,13 +495,10 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) }; struct bpf_insn *progs[] = {prog, prog_redirect_flags}; enum xsk_prog option = get_xsk_prog(); - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = log_buf, - .log_size = log_buf_size, - ); - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause", - progs[option], insns_cnt[option], &opts); + prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, progs[option], insns_cnt[option], + "LGPL-2.1 or BSD-2-Clause", 0, log_buf, + log_buf_size); if (prog_fd < 0) { pr_warn("BPF log buffer:\n%s", log_buf); return prog_fd; @@ -543,12 +549,13 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) struct ifreq ifr = {}; int fd, err, ret; - fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0); + fd = socket(AF_LOCAL, SOCK_DGRAM, 0); if (fd < 0) return -errno; ifr.ifr_data = (void *)&channels; - libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ); + memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1); + ifr.ifr_name[IFNAMSIZ - 1] = '\0'; err = ioctl(fd, SIOCETHTOOL, &ifr); if (err && errno != EOPNOTSUPP) { ret = -errno; @@ -583,8 +590,8 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk) if (max_queues < 0) return max_queues; - fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", - sizeof(int), sizeof(int), max_queues, NULL); + fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", + sizeof(int), sizeof(int), max_queues, 0); if (fd < 0) return fd; @@ -718,12 +725,14 @@ static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd) static bool xsk_probe_bpf_link(void) { - LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE); + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, + .flags = XDP_FLAGS_SKB_MODE); + struct bpf_load_program_attr prog_attr; struct bpf_insn insns[2] = { BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), BPF_EXIT_INSN() }; - int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns); + int prog_fd, link_fd = -1; int ifindex_lo = 1; bool ret = false; int err; @@ -735,7 +744,13 @@ static bool xsk_probe_bpf_link(void) if (link_fd >= 0) return true; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); + memset(&prog_attr, 0, sizeof(prog_attr)); + prog_attr.prog_type = BPF_PROG_TYPE_XDP; + prog_attr.insns = insns; + prog_attr.insns_cnt = ARRAY_SIZE(insns); + prog_attr.license = "GPL"; + + prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); if (prog_fd < 0) return ret; @@ -767,7 +782,8 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) } ctx->ifindex = ifindex; - libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); + memcpy(ctx->ifname, ifname, IFNAMSIZ -1); + ctx->ifname[IFNAMSIZ - 1] = 0; xsk->ctx = ctx; xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); @@ -949,7 +965,8 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, ctx->refcount = 1; ctx->umem = umem; ctx->queue_id = queue_id; - libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); + memcpy(ctx->ifname, ifname, IFNAMSIZ - 1); + ctx->ifname[IFNAMSIZ - 1] = '\0'; ctx->fill = fill; ctx->comp = comp; @@ -1029,7 +1046,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, } if (umem->refcount++ > 0) { - xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); + xsk->fd = socket(AF_XDP, SOCK_RAW, 0); if (xsk->fd < 0) { err = -errno; goto out_xsk_alloc; diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 64e9c57fd7..01c12dca9c 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -23,12 +23,6 @@ extern "C" { #endif -/* This whole API has been deprecated and moved to libxdp that can be found at - * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so - * it should just be linking with libxdp instead of libbpf for this set of - * functionality. If not, please submit a bug report on the aforementioned page. - */ - /* Load-Acquire Store-Release barriers used by the XDP socket * library. The following macros should *NOT* be considered part of * the xsk.h API, and is subject to change anytime. @@ -251,10 +245,8 @@ static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); } -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__fd(const struct xsk_umem *umem); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__fd(const struct xsk_socket *xsk); +LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); +LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); #define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 #define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 @@ -271,10 +263,10 @@ struct xsk_umem_config { __u32 flags; }; -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); +LIBBPF_API int xsk_setup_xdp_prog(int ifindex, + int *xsks_map_fd); +LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk, + int xsks_map_fd); /* Flags for the libbpf_flags field. */ #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) @@ -288,46 +280,40 @@ struct xsk_socket_config { }; /* Set config to NULL to get the default configuration. */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__create(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__create_v0_0_2(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__create_v0_0_4(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__create(struct xsk_socket **xsk, - const char *ifname, __u32 queue_id, - struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - const struct xsk_socket_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, - const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_socket_config *config); +LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, + const char *ifname, __u32 queue_id, + struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + const struct xsk_socket_config *config); +LIBBPF_API int +xsk_socket__create_shared(struct xsk_socket **xsk_ptr, + const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_socket_config *config); /* Returns 0 for success and -EBUSY if the umem is still in use. */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__delete(struct xsk_umem *umem); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -void xsk_socket__delete(struct xsk_socket *xsk); +LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); +LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index ba4b8d94e0..109aa7ffcf 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -96,26 +96,6 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) } #endif -#ifndef find_first_and_bit -/* - * Find the first set bit in two memory regions. - */ -unsigned long _find_first_and_bit(const unsigned long *addr1, - const unsigned long *addr2, - unsigned long size) -{ - unsigned long idx, val; - - for (idx = 0; idx * BITS_PER_LONG < size; idx++) { - val = addr1[idx] & addr2[idx]; - if (val) - return min(idx * BITS_PER_LONG + __ffs(val), size); - } - - return size; -} -#endif - #ifndef find_first_zero_bit /* * Find the first cleared bit in a memory region. diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 32c5051c24..63ae5e0195 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -48,7 +48,6 @@ SYNOPSIS int perf_cpu_map__nr(const struct perf_cpu_map *cpus); bool perf_cpu_map__empty(const struct perf_cpu_map *map); int perf_cpu_map__max(struct perf_cpu_map *map); - bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) -- @@ -136,16 +135,16 @@ SYNOPSIS int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); void perf_evsel__close(struct perf_evsel *evsel); - void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx); + void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); int perf_evsel__mmap(struct perf_evsel *evsel, int pages); void perf_evsel__munmap(struct perf_evsel *evsel); - void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); - int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, + void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); + int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); int perf_evsel__enable(struct perf_evsel *evsel); - int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); + int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); int perf_evsel__disable(struct perf_evsel *evsel); - int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); + int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index ee66760f1e..6d8e521c59 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -10,24 +10,15 @@ #include #include -static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) -{ - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); - - if (cpus != NULL) { - cpus->nr = nr_cpus; - refcount_set(&cpus->refcnt, 1); - - } - return cpus; -} - struct perf_cpu_map *perf_cpu_map__dummy_new(void) { - struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); + struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); - if (cpus) - cpus->map[0].cpu = -1; + if (cpus != NULL) { + cpus->nr = 1; + cpus->map[0] = -1; + refcount_set(&cpus->refcnt, 1); + } return cpus; } @@ -63,12 +54,15 @@ static struct perf_cpu_map *cpu_map__default_new(void) if (nr_cpus < 0) return NULL; - cpus = perf_cpu_map__alloc(nr_cpus); + cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int)); if (cpus != NULL) { int i; for (i = 0; i < nr_cpus; ++i) - cpus->map[i].cpu = i; + cpus->map[i] = i; + + cpus->nr = nr_cpus; + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -79,32 +73,31 @@ struct perf_cpu_map *perf_cpu_map__default_new(void) return cpu_map__default_new(); } - -static int cmp_cpu(const void *a, const void *b) +static int cmp_int(const void *a, const void *b) { - const struct perf_cpu *cpu_a = a, *cpu_b = b; - - return cpu_a->cpu - cpu_b->cpu; + return *(const int *)a - *(const int*)b; } -static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus) +static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) { - size_t payload_size = nr_cpus * sizeof(struct perf_cpu); - struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus); + size_t payload_size = nr_cpus * sizeof(int); + struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size); int i, j; if (cpus != NULL) { memcpy(cpus->map, tmp_cpus, payload_size); - qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); + qsort(cpus->map, nr_cpus, sizeof(int), cmp_int); /* Remove dups */ j = 0; for (i = 0; i < nr_cpus; i++) { - if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu) - cpus->map[j++].cpu = cpus->map[i].cpu; + if (i == 0 || cpus->map[i] != cpus->map[i - 1]) + cpus->map[j++] = cpus->map[i]; } cpus->nr = j; assert(j <= nr_cpus); + refcount_set(&cpus->refcnt, 1); } + return cpus; } @@ -112,7 +105,7 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) { struct perf_cpu_map *cpus = NULL; int nr_cpus = 0; - struct perf_cpu *tmp_cpus = NULL, *tmp; + int *tmp_cpus = NULL, *tmp; int max_entries = 0; int n, cpu, prev; char sep; @@ -131,24 +124,24 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) if (new_max >= max_entries) { max_entries = new_max + MAX_NR_CPUS / 2; - tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); + tmp = realloc(tmp_cpus, max_entries * sizeof(int)); if (tmp == NULL) goto out_free_tmp; tmp_cpus = tmp; } while (++prev < cpu) - tmp_cpus[nr_cpus++].cpu = prev; + tmp_cpus[nr_cpus++] = prev; } if (nr_cpus == max_entries) { max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); + tmp = realloc(tmp_cpus, max_entries * sizeof(int)); if (tmp == NULL) goto out_free_tmp; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++].cpu = cpu; + tmp_cpus[nr_cpus++] = cpu; if (n == 2 && sep == '-') prev = cpu; else @@ -186,7 +179,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) unsigned long start_cpu, end_cpu = 0; char *p = NULL; int i, nr_cpus = 0; - struct perf_cpu *tmp_cpus = NULL, *tmp; + int *tmp_cpus = NULL, *tmp; int max_entries = 0; if (!cpu_list) @@ -227,17 +220,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) for (; start_cpu <= end_cpu; start_cpu++) { /* check for duplicates */ for (i = 0; i < nr_cpus; i++) - if (tmp_cpus[i].cpu == (int)start_cpu) + if (tmp_cpus[i] == (int)start_cpu) goto invalid; if (nr_cpus == max_entries) { max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); + tmp = realloc(tmp_cpus, max_entries * sizeof(int)); if (tmp == NULL) goto invalid; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++].cpu = (int)start_cpu; + tmp_cpus[nr_cpus++] = (int)start_cpu; } if (*p) ++p; @@ -257,16 +250,12 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) return cpus; } -struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) { - struct perf_cpu result = { - .cpu = -1 - }; - if (cpus && idx < cpus->nr) return cpus->map[idx]; - return result; + return -1; } int perf_cpu_map__nr(const struct perf_cpu_map *cpus) @@ -276,47 +265,25 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus) bool perf_cpu_map__empty(const struct perf_cpu_map *map) { - return map ? map->map[0].cpu == -1 : true; + return map ? map->map[0] == -1 : true; } -int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) +int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) { - int low, high; + int i; - if (!cpus) - return -1; - - low = 0; - high = cpus->nr; - while (low < high) { - int idx = (low + high) / 2; - struct perf_cpu cpu_at_idx = cpus->map[idx]; - - if (cpu_at_idx.cpu == cpu.cpu) - return idx; - - if (cpu_at_idx.cpu > cpu.cpu) - high = idx; - else - low = idx + 1; + for (i = 0; i < cpus->nr; ++i) { + if (cpus->map[i] == cpu) + return i; } return -1; } -bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu) +int perf_cpu_map__max(struct perf_cpu_map *map) { - return perf_cpu_map__idx(cpus, cpu) != -1; -} - -struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map) -{ - struct perf_cpu result = { - .cpu = -1 - }; - // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. - return map->nr > 0 ? map->map[map->nr - 1] : result; + return map->nr > 0 ? map->map[map->nr - 1] : -1; } /* @@ -330,7 +297,7 @@ struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map) struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other) { - struct perf_cpu *tmp_cpus; + int *tmp_cpus; int tmp_len; int i, j, k; struct perf_cpu_map *merged; @@ -344,19 +311,19 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, if (!other) return orig; if (orig->nr == other->nr && - !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu))) + !memcmp(orig->map, other->map, orig->nr * sizeof(int))) return orig; tmp_len = orig->nr + other->nr; - tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); + tmp_cpus = malloc(tmp_len * sizeof(int)); if (!tmp_cpus) return NULL; /* Standard merge algorithm from wikipedia */ i = j = k = 0; while (i < orig->nr && j < other->nr) { - if (orig->map[i].cpu <= other->map[j].cpu) { - if (orig->map[i].cpu == other->map[j].cpu) + if (orig->map[i] <= other->map[j]) { + if (orig->map[i] == other->map[j]) j++; tmp_cpus[k++] = orig->map[i++]; } else diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 9a770bfdc8..e37dfad313 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -407,7 +407,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) static int perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int output, struct perf_cpu cpu) + int output, int cpu) { return perf_mmap__mmap(map, mp, output, cpu); } @@ -426,7 +426,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, int thread, int *_output, int *_output_overwrite) { - struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); + int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); struct perf_evsel *evsel; int revent; @@ -643,14 +643,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first; } -void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader) +void __perf_evlist__set_leader(struct list_head *list) { - struct perf_evsel *first, *last, *evsel; + struct perf_evsel *evsel, *leader; - first = list_first_entry(list, struct perf_evsel, node); - last = list_last_entry(list, struct perf_evsel, node); + leader = list_entry(list->next, struct perf_evsel, node); + evsel = list_entry(list->prev, struct perf_evsel, node); - leader->nr_members = last->idx - first->idx + 1; + leader->nr_members = evsel->idx - leader->idx + 1; __perf_evlist__for_each_entry(list, evsel) evsel->leader = leader; @@ -659,10 +659,7 @@ void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader void perf_evlist__set_leader(struct perf_evlist *evlist) { if (evlist->nr_entries) { - struct perf_evsel *first = list_entry(evlist->entries.next, - struct perf_evsel, node); - evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; - __perf_evlist__set_leader(&evlist->entries, first); + __perf_evlist__set_leader(&evlist->entries); } } diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 210ea7c06c..8441e3e1aa 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -43,22 +43,18 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -#define FD(_evsel, _cpu_map_idx, _thread) \ - ((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread)) -#define MMAP(_evsel, _cpu_map_idx, _thread) \ - (_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \ - : NULL) +#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y)) +#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); if (evsel->fd) { - int idx, thread; - - for (idx = 0; idx < ncpus; idx++) { + int cpu, thread; + for (cpu = 0; cpu < ncpus; cpu++) { for (thread = 0; thread < nthreads; thread++) { - int *fd = FD(evsel, idx, thread); + int *fd = FD(evsel, cpu, thread); if (fd) *fd = -1; @@ -78,13 +74,13 @@ static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthre static int sys_perf_event_open(struct perf_event_attr *attr, - pid_t pid, struct perf_cpu cpu, int group_fd, + pid_t pid, int cpu, int group_fd, unsigned long flags) { - return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags); + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } -static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd) +static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd) { struct perf_evsel *leader = evsel->leader; int *fd; @@ -101,7 +97,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, i if (!leader->fd) return -ENOTCONN; - fd = FD(leader, cpu_map_idx, thread); + fd = FD(leader, cpu, thread); if (fd == NULL || *fd == -1) return -EBADF; @@ -113,8 +109,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, i int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - struct perf_cpu cpu; - int idx, thread, err = 0; + int cpu, thread, err = 0; if (cpus == NULL) { static struct perf_cpu_map *empty_cpu_map; @@ -141,24 +136,24 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, } if (evsel->fd == NULL && - perf_evsel__alloc_fd(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) + perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) return -ENOMEM; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < threads->nr; thread++) { int fd, group_fd, *evsel_fd; - evsel_fd = FD(evsel, idx, thread); + evsel_fd = FD(evsel, cpu, thread); if (evsel_fd == NULL) return -EINVAL; - err = get_group_fd(evsel, idx, thread, &group_fd); + err = get_group_fd(evsel, cpu, thread, &group_fd); if (err < 0) return err; fd = sys_perf_event_open(&evsel->attr, threads->map[thread].pid, - cpu, group_fd, 0); + cpus->map[cpu], group_fd, 0); if (fd < 0) return -errno; @@ -170,12 +165,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, return err; } -static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx) +static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) { int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - int *fd = FD(evsel, cpu_map_idx, thread); + int *fd = FD(evsel, cpu, thread); if (fd && *fd >= 0) { close(*fd); @@ -186,8 +181,10 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx) void perf_evsel__close_fd(struct perf_evsel *evsel) { - for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++) - perf_evsel__close_fd_cpu(evsel, idx); + int cpu; + + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) + perf_evsel__close_fd_cpu(evsel, cpu); } void perf_evsel__free_fd(struct perf_evsel *evsel) @@ -205,29 +202,29 @@ void perf_evsel__close(struct perf_evsel *evsel) perf_evsel__free_fd(evsel); } -void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx) +void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu) { if (evsel->fd == NULL) return; - perf_evsel__close_fd_cpu(evsel, cpu_map_idx); + perf_evsel__close_fd_cpu(evsel, cpu); } void perf_evsel__munmap(struct perf_evsel *evsel) { - int idx, thread; + int cpu, thread; if (evsel->fd == NULL || evsel->mmap == NULL) return; - for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) { + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int *fd = FD(evsel, idx, thread); + int *fd = FD(evsel, cpu, thread); if (fd == NULL || *fd < 0) continue; - perf_mmap__munmap(MMAP(evsel, idx, thread)); + perf_mmap__munmap(MMAP(evsel, cpu, thread)); } } @@ -237,7 +234,7 @@ void perf_evsel__munmap(struct perf_evsel *evsel) int perf_evsel__mmap(struct perf_evsel *evsel, int pages) { - int ret, idx, thread; + int ret, cpu, thread; struct perf_mmap_param mp = { .prot = PROT_READ | PROT_WRITE, .mask = (pages * page_size) - 1, @@ -249,16 +246,15 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0) return -ENOMEM; - for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) { + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int *fd = FD(evsel, idx, thread); + int *fd = FD(evsel, cpu, thread); struct perf_mmap *map; - struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx); if (fd == NULL || *fd < 0) continue; - map = MMAP(evsel, idx, thread); + map = MMAP(evsel, cpu, thread); perf_mmap__init(map, NULL, false, NULL); ret = perf_mmap__mmap(map, &mp, *fd, cpu); @@ -272,14 +268,14 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) return 0; } -void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread) +void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) { - int *fd = FD(evsel, cpu_map_idx, thread); + int *fd = FD(evsel, cpu, thread); - if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL) + if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL) return NULL; - return MMAP(evsel, cpu_map_idx, thread)->base; + return MMAP(evsel, cpu, thread)->base; } int perf_evsel__read_size(struct perf_evsel *evsel) @@ -307,19 +303,19 @@ int perf_evsel__read_size(struct perf_evsel *evsel) return size; } -int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, +int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { size_t size = perf_evsel__read_size(evsel); - int *fd = FD(evsel, cpu_map_idx, thread); + int *fd = FD(evsel, cpu, thread); memset(count, 0, sizeof(*count)); if (fd == NULL || *fd < 0) return -EINVAL; - if (MMAP(evsel, cpu_map_idx, thread) && - !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count)) + if (MMAP(evsel, cpu, thread) && + !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) return 0; if (readn(*fd, count->values, size) <= 0) @@ -330,13 +326,13 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ioc, void *arg, - int cpu_map_idx) + int cpu) { int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { int err; - int *fd = FD(evsel, cpu_map_idx, thread); + int *fd = FD(evsel, cpu, thread); if (fd == NULL || *fd < 0) return -1; @@ -350,9 +346,9 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, return 0; } -int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx) +int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu) { - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx); + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu); } int perf_evsel__enable(struct perf_evsel *evsel) @@ -365,9 +361,9 @@ int perf_evsel__enable(struct perf_evsel *evsel) return err; } -int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx) +int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu) { - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx); + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu); } int perf_evsel__disable(struct perf_evsel *evsel) @@ -384,7 +380,7 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter) { int err = 0, i; - for (i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++) + for (i = 0; i < evsel->cpus->nr && !err; i++) err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_SET_FILTER, (void *)filter, i); @@ -435,22 +431,3 @@ void perf_evsel__free_id(struct perf_evsel *evsel) zfree(&evsel->id); evsel->ids = 0; } - -void perf_counts_values__scale(struct perf_counts_values *count, - bool scale, __s8 *pscaled) -{ - s8 scaled = 0; - - if (scale) { - if (count->run == 0) { - scaled = -1; - count->val = 0; - } else if (count->run < count->ena) { - scaled = 1; - count->val = (u64)((double)count->val * count->ena / count->run); - } - } - - if (pscaled) - *pscaled = scaled; -} diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 1973a18c09..840d403258 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -3,27 +3,17 @@ #define __LIBPERF_INTERNAL_CPUMAP_H #include -#include -/** - * A sized, reference counted, sorted array of integers representing CPU - * numbers. This is commonly used to capture which CPUs a PMU is associated - * with. The indices into the cpumap are frequently used as they avoid having - * gaps if CPU numbers were used. For events associated with a pid, rather than - * a CPU, a single dummy map with an entry of -1 is used. - */ struct perf_cpu_map { refcount_t refcnt; - /** Length of the map array. */ int nr; - /** The CPU values. */ - struct perf_cpu map[]; + int map[]; }; #ifndef MAX_NR_CPUS #define MAX_NR_CPUS 2048 #endif -int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu); +int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu); #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 4cefade540..f366dbad6a 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -4,7 +4,6 @@ #include #include -#include #include #define PERF_EVLIST__HLIST_BITS 8 @@ -37,7 +36,7 @@ typedef void typedef struct perf_mmap* (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); typedef int -(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu); +(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int); struct perf_evlist_mmap_ops { perf_evlist_mmap__cb_idx_t idx; @@ -128,5 +127,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, void perf_evlist__reset_id_hash(struct perf_evlist *evlist); -void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader); +void __perf_evlist__set_leader(struct list_head *list); #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index cfc9ebd796..1f3eacbad2 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -6,8 +6,8 @@ #include #include #include -#include +struct perf_cpu_map; struct perf_thread_map; struct xyarray; @@ -27,7 +27,7 @@ struct perf_sample_id { * queue number. */ int idx; - struct perf_cpu cpu; + int cpu; pid_t tid; /* Holds total ID period value for PERF_SAMPLE_READ processing. */ diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index 5a062af8e9..5e3422f40e 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -6,7 +6,6 @@ #include #include #include -#include /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) @@ -25,7 +24,7 @@ struct perf_mmap { void *base; int mask; int fd; - struct perf_cpu cpu; + int cpu; refcount_t refcnt; u64 prev; u64 start; @@ -47,7 +46,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, bool overwrite, libperf_unmap_cb_t unmap_cb); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, struct perf_cpu cpu); + int fd, int cpu); void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); void perf_mmap__put(struct perf_mmap *map); diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 4a2edbdb5e..7c27766ea0 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,14 +3,10 @@ #define __LIBPERF_CPUMAP_H #include -#include #include #include -/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ -struct perf_cpu { - int cpu; -}; +struct perf_cpu_map; LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); @@ -20,11 +16,10 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); -LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); -LIBPERF_API struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map); -LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); +LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 75ee385fb0..4d0c02ba3f 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -289,11 +289,6 @@ struct perf_record_itrace_start { __u32 tid; }; -struct perf_record_aux_output_hw_id { - struct perf_event_header header; - __u64 hw_id; -}; - struct perf_record_thread_map_entry { __u64 pid; char comm[16]; @@ -419,7 +414,6 @@ union perf_event { struct perf_record_auxtrace_error auxtrace_error; struct perf_record_aux aux; struct perf_record_itrace_start itrace_start; - struct perf_record_aux_output_hw_id aux_output_hw_id; struct perf_record_switch context_switch; struct perf_record_thread_map thread_map; struct perf_record_cpu_map cpu_map; diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index 2a9516b42d..60eae25076 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -4,8 +4,6 @@ #include #include -#include -#include struct perf_evsel; struct perf_event_attr; @@ -28,20 +26,18 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel); -LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx); +LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages); LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel); -LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); -LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, +LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); +LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); +LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); +LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); -LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count, - bool scale, __s8 *pscaled); #endif /* __LIBPERF_EVSEL_H */ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 6fa0d65157..71468606e8 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -2,7 +2,6 @@ LIBPERF_0.0.1 { global: libperf_init; perf_cpu_map__dummy_new; - perf_cpu_map__default_new; perf_cpu_map__get; perf_cpu_map__put; perf_cpu_map__new; @@ -11,7 +10,6 @@ LIBPERF_0.0.1 { perf_cpu_map__cpu; perf_cpu_map__empty; perf_cpu_map__max; - perf_cpu_map__has; perf_thread_map__new_dummy; perf_thread_map__set_pid; perf_thread_map__comm; @@ -52,7 +50,6 @@ LIBPERF_0.0.1 { perf_mmap__read_init; perf_mmap__read_done; perf_mmap__read_event; - perf_counts_values__scale; local: *; }; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index 0d1634cedf..c89dfa5f67 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -13,7 +13,6 @@ #include #include #include -#include #include "internal.h" void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, @@ -33,7 +32,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) } int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, struct perf_cpu cpu) + int fd, int cpu) { map->prev = 0; map->mask = mp->mask; @@ -295,103 +294,6 @@ static u64 read_timestamp(void) return low | ((u64)high) << 32; } -#elif defined(__aarch64__) -#define read_sysreg(r) ({ \ - u64 __val; \ - asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ - __val; \ -}) - -static u64 read_pmccntr(void) -{ - return read_sysreg(pmccntr_el0); -} - -#define PMEVCNTR_READ(idx) \ - static u64 read_pmevcntr_##idx(void) { \ - return read_sysreg(pmevcntr##idx##_el0); \ - } - -PMEVCNTR_READ(0); -PMEVCNTR_READ(1); -PMEVCNTR_READ(2); -PMEVCNTR_READ(3); -PMEVCNTR_READ(4); -PMEVCNTR_READ(5); -PMEVCNTR_READ(6); -PMEVCNTR_READ(7); -PMEVCNTR_READ(8); -PMEVCNTR_READ(9); -PMEVCNTR_READ(10); -PMEVCNTR_READ(11); -PMEVCNTR_READ(12); -PMEVCNTR_READ(13); -PMEVCNTR_READ(14); -PMEVCNTR_READ(15); -PMEVCNTR_READ(16); -PMEVCNTR_READ(17); -PMEVCNTR_READ(18); -PMEVCNTR_READ(19); -PMEVCNTR_READ(20); -PMEVCNTR_READ(21); -PMEVCNTR_READ(22); -PMEVCNTR_READ(23); -PMEVCNTR_READ(24); -PMEVCNTR_READ(25); -PMEVCNTR_READ(26); -PMEVCNTR_READ(27); -PMEVCNTR_READ(28); -PMEVCNTR_READ(29); -PMEVCNTR_READ(30); - -/* - * Read a value direct from PMEVCNTR - */ -static u64 read_perf_counter(unsigned int counter) -{ - static u64 (* const read_f[])(void) = { - read_pmevcntr_0, - read_pmevcntr_1, - read_pmevcntr_2, - read_pmevcntr_3, - read_pmevcntr_4, - read_pmevcntr_5, - read_pmevcntr_6, - read_pmevcntr_7, - read_pmevcntr_8, - read_pmevcntr_9, - read_pmevcntr_10, - read_pmevcntr_11, - read_pmevcntr_13, - read_pmevcntr_12, - read_pmevcntr_14, - read_pmevcntr_15, - read_pmevcntr_16, - read_pmevcntr_17, - read_pmevcntr_18, - read_pmevcntr_19, - read_pmevcntr_20, - read_pmevcntr_21, - read_pmevcntr_22, - read_pmevcntr_23, - read_pmevcntr_24, - read_pmevcntr_25, - read_pmevcntr_26, - read_pmevcntr_27, - read_pmevcntr_28, - read_pmevcntr_29, - read_pmevcntr_30, - read_pmccntr - }; - - if (counter < ARRAY_SIZE(read_f)) - return (read_f[counter])(); - - return 0; -} - -static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } - #else static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } static u64 read_timestamp(void) { return 0; } @@ -451,6 +353,8 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count count->ena += delta; if (idx) count->run += delta; + + cnt = mul_u64_u64_div64(cnt, count->ena, count->run); } count->val = cnt; diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index 87b0510a55..d39378eaf8 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -14,8 +14,6 @@ static int libperf_print(enum libperf_print_level level, int test_cpumap(int argc, char **argv) { struct perf_cpu_map *cpus; - struct perf_cpu cpu; - int idx; __T_START; @@ -29,15 +27,6 @@ int test_cpumap(int argc, char **argv) perf_cpu_map__put(cpus); perf_cpu_map__put(cpus); - cpus = perf_cpu_map__default_new(); - if (!cpus) - return -1; - - perf_cpu_map__for_each_cpu(cpu, idx, cpus) - __T("wrong cpu number", cpu.cpu != -1); - - perf_cpu_map__put(cpus); - __T_END; return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index fa854c83b7..ce91a582f0 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE // needed for sched.h to get sched_[gs]etaffinity and CPU_(ZERO,SET) -#include #include #include #include @@ -22,9 +21,6 @@ #include "tests.h" #include -#define EVENT_NUM 15 -#define WAIT_COUNT 100000000UL - static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { @@ -335,8 +331,7 @@ static int test_mmap_cpus(void) }; cpu_set_t saved_mask; char path[PATH_MAX]; - int id, err, tmp; - struct perf_cpu cpu; + int id, err, cpu, tmp; union perf_event *event; int count = 0; @@ -379,7 +374,7 @@ static int test_mmap_cpus(void) cpu_set_t mask; CPU_ZERO(&mask); - CPU_SET(cpu.cpu, &mask); + CPU_SET(cpu, &mask); err = sched_setaffinity(0, sizeof(mask), &mask); __T("sched_setaffinity failed", err == 0); @@ -418,159 +413,6 @@ static int test_mmap_cpus(void) return 0; } -static double display_error(long long average, - long long high, - long long low, - long long expected) -{ - double error; - - error = (((double)average - expected) / expected) * 100.0; - - __T_VERBOSE(" Expected: %lld\n", expected); - __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n", - high, low, average); - - __T_VERBOSE(" Average Error = %.2f%%\n", error); - - return error; -} - -static int test_stat_multiplexing(void) -{ - struct perf_counts_values expected_counts = { .val = 0 }; - struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },}; - struct perf_thread_map *threads; - struct perf_evlist *evlist; - struct perf_evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_INSTRUCTIONS, - .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING, - .disabled = 1, - }; - int err, i, nonzero = 0; - unsigned long count; - long long max = 0, min = 0, avg = 0; - double error = 0.0; - s8 scaled = 0; - - /* read for non-multiplexing event count */ - threads = perf_thread_map__new_dummy(); - __T("failed to create threads", threads); - - perf_thread_map__set_pid(threads, 0, 0); - - evsel = perf_evsel__new(&attr); - __T("failed to create evsel", evsel); - - err = perf_evsel__open(evsel, NULL, threads); - __T("failed to open evsel", err == 0); - - err = perf_evsel__enable(evsel); - __T("failed to enable evsel", err == 0); - - /* wait loop */ - count = WAIT_COUNT; - while (count--) - ; - - perf_evsel__read(evsel, 0, 0, &expected_counts); - __T("failed to read value for evsel", expected_counts.val != 0); - __T("failed to read non-multiplexing event count", - expected_counts.ena == expected_counts.run); - - err = perf_evsel__disable(evsel); - __T("failed to enable evsel", err == 0); - - perf_evsel__close(evsel); - perf_evsel__delete(evsel); - - perf_thread_map__put(threads); - - /* read for multiplexing event count */ - threads = perf_thread_map__new_dummy(); - __T("failed to create threads", threads); - - perf_thread_map__set_pid(threads, 0, 0); - - evlist = perf_evlist__new(); - __T("failed to create evlist", evlist); - - for (i = 0; i < EVENT_NUM; i++) { - evsel = perf_evsel__new(&attr); - __T("failed to create evsel", evsel); - - perf_evlist__add(evlist, evsel); - } - perf_evlist__set_maps(evlist, NULL, threads); - - err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); - - perf_evlist__enable(evlist); - - /* wait loop */ - count = WAIT_COUNT; - while (count--) - ; - - i = 0; - perf_evlist__for_each_evsel(evlist, evsel) { - perf_evsel__read(evsel, 0, 0, &counts[i]); - __T("failed to read value for evsel", counts[i].val != 0); - i++; - } - - perf_evlist__disable(evlist); - - min = counts[0].val; - for (i = 0; i < EVENT_NUM; i++) { - __T_VERBOSE("Event %2d -- Raw count = %" PRIu64 ", run = %" PRIu64 ", enable = %" PRIu64 "\n", - i, counts[i].val, counts[i].run, counts[i].ena); - - perf_counts_values__scale(&counts[i], true, &scaled); - if (scaled == 1) { - __T_VERBOSE("\t Scaled count = %" PRIu64 " (%.2lf%%, %" PRIu64 "/%" PRIu64 ")\n", - counts[i].val, - (double)counts[i].run / (double)counts[i].ena * 100.0, - counts[i].run, counts[i].ena); - } else if (scaled == -1) { - __T_VERBOSE("\t Not Running\n"); - } else { - __T_VERBOSE("\t Not Scaling\n"); - } - - if (counts[i].val > max) - max = counts[i].val; - - if (counts[i].val < min) - min = counts[i].val; - - avg += counts[i].val; - - if (counts[i].val != 0) - nonzero++; - } - - if (nonzero != 0) - avg = avg / nonzero; - else - avg = 0; - - error = display_error(avg, max, min, expected_counts.val); - - __T("Error out of range!", ((error <= 1.0) && (error >= -1.0))); - - perf_evlist__close(evlist); - perf_evlist__delete(evlist); - - perf_thread_map__put(threads); - - return 0; -} - int test_evlist(int argc, char **argv) { __T_START; @@ -582,7 +424,6 @@ int test_evlist(int argc, char **argv) test_stat_thread_enable(); test_mmap_thread(); test_mmap_cpus(); - test_stat_multiplexing(); __T_END; return tests_failed == 0 ? 0 : -1; diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 89be89afb2..33ae933486 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -130,9 +130,6 @@ static int test_stat_user_read(int event) struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, .config = event, -#ifdef __aarch64__ - .config1 = 0x2, /* Request user access */ -#endif }; int err, i; @@ -153,7 +150,7 @@ static int test_stat_user_read(int event) pc = perf_evsel__mmap_base(evsel, 0, 0); __T("failed to get mmapped address", pc); -#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) +#if defined(__i386__) || defined(__x86_64__) __T("userspace counter access not supported", pc->cap_user_rdpmc); __T("userspace counter access not enabled", pc->index); __T("userspace counter width not set", pc->pmc_width >= 32); diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 8e24c4c78c..fe58843d04 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1367,14 +1367,6 @@ static int field_is_dynamic(struct tep_format_field *field) return 0; } -static int field_is_relative_dynamic(struct tep_format_field *field) -{ - if (strncmp(field->type, "__rel_loc", 9) == 0) - return 1; - - return 0; -} - static int field_is_long(struct tep_format_field *field) { /* includes long long */ @@ -1630,8 +1622,6 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** field->flags |= TEP_FIELD_IS_STRING; if (field_is_dynamic(field)) field->flags |= TEP_FIELD_IS_DYNAMIC; - if (field_is_relative_dynamic(field)) - field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE; if (field_is_long(field)) field->flags |= TEP_FIELD_IS_LONG; @@ -2938,7 +2928,7 @@ process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, arg->type = TEP_PRINT_STRING; arg->string.string = token; - arg->string.field = NULL; + arg->string.offset = -1; if (read_expected(TEP_EVENT_DELIM, ")") < 0) goto out_err; @@ -2967,7 +2957,7 @@ process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar arg->type = TEP_PRINT_BITMASK; arg->bitmask.bitmask = token; - arg->bitmask.field = NULL; + arg->bitmask.offset = -1; if (read_expected(TEP_EVENT_DELIM, ")") < 0) goto out_err; @@ -3133,23 +3123,19 @@ process_function(struct tep_event *event, struct tep_print_arg *arg, free_token(token); return process_int_array(event, arg, tok); } - if (strcmp(token, "__get_str") == 0 || - strcmp(token, "__get_rel_str") == 0) { + if (strcmp(token, "__get_str") == 0) { free_token(token); return process_str(event, arg, tok); } - if (strcmp(token, "__get_bitmask") == 0 || - strcmp(token, "__get_rel_bitmask") == 0) { + if (strcmp(token, "__get_bitmask") == 0) { free_token(token); return process_bitmask(event, arg, tok); } - if (strcmp(token, "__get_dynamic_array") == 0 || - strcmp(token, "__get_rel_dynamic_array") == 0) { + if (strcmp(token, "__get_dynamic_array") == 0) { free_token(token); return process_dynamic_array(event, arg, tok); } - if (strcmp(token, "__get_dynamic_array_len") == 0 || - strcmp(token, "__get_rel_dynamic_array_len") == 0) { + if (strcmp(token, "__get_dynamic_array_len") == 0) { free_token(token); return process_dynamic_array_len(event, arg, tok); } @@ -4177,16 +4163,14 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, case TEP_PRINT_STRING: { int str_offset; - if (!arg->string.field) - arg->string.field = tep_find_any_field(event, arg->string.string); - if (!arg->string.field) - break; + if (arg->string.offset == -1) { + struct tep_format_field *f; - str_offset = data2host4(tep, - *(unsigned int *)(data + arg->string.field->offset)); + f = tep_find_any_field(event, arg->string.string); + arg->string.offset = f->offset; + } + str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset)); str_offset &= 0xffff; - if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE) - str_offset += arg->string.field->offset + arg->string.field->size; print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); break; } @@ -4197,16 +4181,15 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, int bitmask_offset; int bitmask_size; - if (!arg->bitmask.field) - arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask); - if (!arg->bitmask.field) - break; - bitmask_offset = data2host4(tep, - *(unsigned int *)(data + arg->bitmask.field->offset)); + if (arg->bitmask.offset == -1) { + struct tep_format_field *f; + + f = tep_find_any_field(event, arg->bitmask.bitmask); + arg->bitmask.offset = f->offset; + } + bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset)); bitmask_size = bitmask_offset >> 16; bitmask_offset &= 0xffff; - if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE) - bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size; print_bitmask_to_seq(tep, s, format, len_arg, data + bitmask_offset, bitmask_size); break; @@ -5126,8 +5109,6 @@ void tep_print_field(struct trace_seq *s, void *data, offset = val; len = offset >> 16; offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { @@ -7006,8 +6987,6 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, data + offset, field->size); *len = offset >> 16; offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; } else *len = field->size; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 41d4f9f6a8..a67ad9a5b8 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -125,7 +125,6 @@ enum tep_format_flags { TEP_FIELD_IS_LONG = 32, TEP_FIELD_IS_FLAG = 64, TEP_FIELD_IS_SYMBOLIC = 128, - TEP_FIELD_IS_RELATIVE = 256, }; struct tep_format_field { @@ -154,12 +153,12 @@ struct tep_print_arg_atom { struct tep_print_arg_string { char *string; - struct tep_format_field *field; + int offset; }; struct tep_print_arg_bitmask { char *bitmask; - struct tep_format_field *field; + int offset; }; struct tep_print_arg_field { diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 5df177070d..368826bb5a 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1712,11 +1712,8 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record * if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) { addr = *(unsigned int *)val; + val = record->data + (addr & 0xffff); size = addr >> 16; - addr &= 0xffff; - if (arg->str.field->flags & TEP_FIELD_IS_RELATIVE) - addr += arg->str.field->offset + arg->str.field->size; - val = record->data + addr; } /* diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index 394ee57d58..5d72f3112e 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -1813,16 +1813,15 @@ spin_trylock() -- we can call these things lock-releases and lock-acquires -- have two properties beyond those of ordinary releases and acquires. -First, when a lock-acquire reads from or is po-after a lock-release, -the LKMM requires that every instruction po-before the lock-release -must execute before any instruction po-after the lock-acquire. This -would naturally hold if the release and acquire operations were on -different CPUs and accessed the same lock variable, but the LKMM says -it also holds when they are on the same CPU, even if they access -different lock variables. For example: +First, when a lock-acquire reads from a lock-release, the LKMM +requires that every instruction po-before the lock-release must +execute before any instruction po-after the lock-acquire. This would +naturally hold if the release and acquire operations were on different +CPUs, but the LKMM says it holds even when they are on the same CPU. +For example: int x, y; - spinlock_t s, t; + spinlock_t s; P0() { @@ -1831,9 +1830,9 @@ different lock variables. For example: spin_lock(&s); r1 = READ_ONCE(x); spin_unlock(&s); - spin_lock(&t); + spin_lock(&s); r2 = READ_ONCE(y); - spin_unlock(&t); + spin_unlock(&s); } P1() @@ -1843,10 +1842,10 @@ different lock variables. For example: WRITE_ONCE(x, 1); } -Here the second spin_lock() is po-after the first spin_unlock(), and -therefore the load of x must execute before the load of y, even though -the two locking operations use different locks. Thus we cannot have -r1 = 1 and r2 = 0 at the end (this is an instance of the MP pattern). +Here the second spin_lock() reads from the first spin_unlock(), and +therefore the load of x must execute before the load of y. Thus we +cannot have r1 = 1 and r2 = 0 at the end (this is an instance of the +MP pattern). This requirement does not apply to ordinary release and acquire fences, only to lock-related operations. For instance, suppose P0() @@ -1873,13 +1872,13 @@ instructions in the following order: and thus it could load y before x, obtaining r2 = 0 and r1 = 1. -Second, when a lock-acquire reads from or is po-after a lock-release, -and some other stores W and W' occur po-before the lock-release and -po-after the lock-acquire respectively, the LKMM requires that W must -propagate to each CPU before W' does. For example, consider: +Second, when a lock-acquire reads from a lock-release, and some other +stores W and W' occur po-before the lock-release and po-after the +lock-acquire respectively, the LKMM requires that W must propagate to +each CPU before W' does. For example, consider: int x, y; - spinlock_t s; + spinlock_t x; P0() { @@ -1909,12 +1908,7 @@ propagate to each CPU before W' does. For example, consider: If r1 = 1 at the end then the spin_lock() in P1 must have read from the spin_unlock() in P0. Hence the store to x must propagate to P2 -before the store to y does, so we cannot have r2 = 1 and r3 = 0. But -if P1 had used a lock variable different from s, the writes could have -propagated in either order. (On the other hand, if the code in P0 and -P1 had all executed on a single CPU, as in the example before this -one, then the writes would have propagated in order even if the two -critical sections used different lock variables.) +before the store to y does, so we cannot have r2 = 1 and r3 = 0. These two special requirements for lock-release and lock-acquire do not arise from the operational model. Nevertheless, kernel developers diff --git a/tools/memory-model/README b/tools/memory-model/README index 9edd402704..9a84c45504 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -195,18 +195,6 @@ litmus-tests are listed in litmus-tests/README. A great deal more litmus tests are available at https://github.com/paulmckrcu/litmus. - By "representative", it means the one in the litmus-tests - directory is: - - 1) simple, the number of threads should be relatively - small and each thread function should be relatively - simple. - 2) orthogonal, there should be no two litmus tests - describing the same aspect of the memory model. - 3) textbook, developers can easily copy-paste-modify - the litmus tests to use the patterns on their own - code. - lock.cat Provides a front-end analysis of lock acquisition and release, for example, associating a lock acquisition with the preceding diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index d70315fdde..2a9b4fe4a8 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -27,7 +27,7 @@ include "lock.cat" (* Release Acquire *) let acq-po = [Acquire] ; po ; [M] let po-rel = [M] ; po ; [Release] -let po-unlock-lock-po = po ; [UL] ; (po|rf) ; [LKR] ; po +let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po (* Fences *) let R4rmb = R \ Noreturn (* Reads for which rmb works *) @@ -70,12 +70,12 @@ let rwdep = (dep | ctrl) ; [W] let overwrite = co | fr let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb) let to-r = addr | (dep ; [Marked] ; rfi) -let ppo = to-r | to-w | fence | (po-unlock-lock-po & int) +let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int) (* Propagation: Ordering from release operations and strong fences. *) let A-cumul(r) = (rfe ; [Marked])? ; r let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb | - po-unlock-lock-po) ; [Marked] + po-unlock-rf-lock-po) ; [Marked] let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ; [Marked] ; rfe? ; [Marked] diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README index d311a0ff1a..681f9067fa 100644 --- a/tools/memory-model/litmus-tests/README +++ b/tools/memory-model/litmus-tests/README @@ -63,10 +63,6 @@ LB+poonceonces.litmus As above, but with store-release replaced with WRITE_ONCE() and load-acquire replaced with READ_ONCE(). -LB+unlocklockonceonce+poacquireonce.litmus - Does a unlock+lock pair provides ordering guarantee between a - load and a store? - MP+onceassign+derefonce.litmus As below, but with rcu_assign_pointer() and an rcu_dereference(). @@ -94,10 +90,6 @@ MP+porevlocks.litmus As below, but with the first access of the writer process and the second access of reader process protected by a lock. -MP+unlocklockonceonce+fencermbonceonce.litmus - Does a unlock+lock pair provides ordering guarantee between a - store and another store? - MP+fencewmbonceonce+fencermbonceonce.litmus Does a smp_wmb() (between the stores) and an smp_rmb() (between the loads) suffice for the message-passing litmus test, where one diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus index 10a2aa04cd..415248fb66 100644 --- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus +++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus @@ -1,11 +1,12 @@ -C Z6.0+pooncelock+pooncelock+pombonce +C Z6.0+pooncelock+poonceLock+pombonce (* - * Result: Sometimes + * Result: Never * - * This example demonstrates that a pair of accesses made by different - * processes each while holding a given lock will not necessarily be - * seen as ordered by a third process not holding that lock. + * This litmus test demonstrates how smp_mb__after_spinlock() may be + * used to ensure that accesses in different critical sections for a + * given lock running on different CPUs are nevertheless seen in order + * by CPUs not holding that lock. *) {} @@ -23,6 +24,7 @@ P1(int *y, int *z, spinlock_t *mylock) int r0; spin_lock(mylock); + smp_mb__after_spinlock(); r0 = READ_ONCE(*y); WRITE_ONCE(*z, 1); spin_unlock(mylock); diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index c10ef78df0..77b51600e3 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -20,7 +20,6 @@ #include #include #include -#include #include static int is_x86_64(const struct elf *elf) @@ -103,13 +102,12 @@ unsigned long arch_jump_destination(struct instruction *insn) #define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg)) #define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg)) -int arch_decode_instruction(struct objtool_file *file, const struct section *sec, +int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, unsigned long *immediate, struct list_head *ops_list) { - const struct elf *elf = file->elf; struct insn insn; int x86_64, ret; unsigned char op1, op2, @@ -531,11 +529,6 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec } break; - case 0xcc: - /* int3 */ - *type = INSN_TRAP; - break; - case 0xe3: /* jecxz/jrcxz */ *type = INSN_JUMP_CONDITIONAL; @@ -551,36 +544,6 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec *type = INSN_RETURN; break; - case 0xc7: /* mov imm, r/m */ - if (!noinstr) - break; - - if (insn.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) { - struct reloc *immr, *disp; - struct symbol *func; - int idx; - - immr = find_reloc_by_dest(elf, (void *)sec, offset+3); - disp = find_reloc_by_dest(elf, (void *)sec, offset+7); - - if (!immr || strcmp(immr->sym->name, "pv_ops")) - break; - - idx = (immr->addend + 8) / sizeof(void *); - - func = disp->sym; - if (disp->sym->type == STT_SECTION) - func = find_symbol_by_offset(disp->sym->sec, disp->addend); - if (!func) { - WARN("no func for pv_ops[]"); - return -1; - } - - objtool_pv_add(file, idx, func); - } - - break; - case 0xcf: /* iret */ /* * Handle sync_core(), which has an IRET to self. @@ -702,10 +665,10 @@ const char *arch_ret_insn(int len) { static const char ret[5][5] = { { BYTE_RET }, - { BYTE_RET, 0xcc }, - { BYTE_RET, 0xcc, BYTES_NOP1 }, - { BYTE_RET, 0xcc, BYTES_NOP2 }, - { BYTE_RET, 0xcc, BYTES_NOP3 }, + { BYTE_RET, BYTES_NOP1 }, + { BYTE_RET, BYTES_NOP2 }, + { BYTE_RET, BYTES_NOP3 }, + { BYTE_RET, BYTES_NOP4 }, }; if (len < 1 || len > 5) { @@ -716,32 +679,154 @@ const char *arch_ret_insn(int len) return ret[len-1]; } -int arch_decode_hint_reg(u8 sp_reg, int *base) +/* asm/alternative.h ? */ + +#define ALTINSTR_FLAG_INV (1 << 15) +#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) + +struct alt_instr { + s32 instr_offset; /* original instruction */ + s32 repl_offset; /* offset to replacement instruction */ + u16 cpuid; /* cpuid bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +static int elf_add_alternative(struct elf *elf, + struct instruction *orig, struct symbol *sym, + int cpuid, u8 orig_len, u8 repl_len) { + const int size = sizeof(struct alt_instr); + struct alt_instr *alt; + struct section *sec; + Elf_Scn *s; + + sec = find_section_by_name(elf, ".altinstructions"); + if (!sec) { + sec = elf_create_section(elf, ".altinstructions", + SHF_ALLOC, 0, 0); + + if (!sec) { + WARN_ELF("elf_create_section"); + return -1; + } + } + + s = elf_getscn(elf->elf, sec->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return -1; + } + + sec->data = elf_newdata(s); + if (!sec->data) { + WARN_ELF("elf_newdata"); + return -1; + } + + sec->data->d_size = size; + sec->data->d_align = 1; + + alt = sec->data->d_buf = malloc(size); + if (!sec->data->d_buf) { + perror("malloc"); + return -1; + } + memset(sec->data->d_buf, 0, size); + + if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size, + R_X86_64_PC32, orig->sec, orig->offset)) { + WARN("elf_create_reloc: alt_instr::instr_offset"); + return -1; + } + + if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4, + R_X86_64_PC32, sym, 0)) { + WARN("elf_create_reloc: alt_instr::repl_offset"); + return -1; + } + + alt->cpuid = bswap_if_needed(cpuid); + alt->instrlen = orig_len; + alt->replacementlen = repl_len; + + sec->sh.sh_size += size; + sec->changed = true; + + return 0; +} + +#define X86_FEATURE_RETPOLINE ( 7*32+12) + +int arch_rewrite_retpolines(struct objtool_file *file) +{ + struct instruction *insn; + struct reloc *reloc; + struct symbol *sym; + char name[32] = ""; + + list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC) + continue; + + if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk")) + continue; + + reloc = insn->reloc; + + sprintf(name, "__x86_indirect_alt_%s_%s", + insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call", + reloc->sym->name + 21); + + sym = find_symbol_by_name(file->elf, name); + if (!sym) { + sym = elf_create_undef_symbol(file->elf, name); + if (!sym) { + WARN("elf_create_undef_symbol"); + return -1; + } + } + + if (elf_add_alternative(file->elf, insn, sym, + ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) { + WARN("elf_add_alternative"); + return -1; + } + } + + return 0; +} + +int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) +{ + struct cfi_reg *cfa = &insn->cfi.cfa; + switch (sp_reg) { case ORC_REG_UNDEFINED: - *base = CFI_UNDEFINED; + cfa->base = CFI_UNDEFINED; break; case ORC_REG_SP: - *base = CFI_SP; + cfa->base = CFI_SP; break; case ORC_REG_BP: - *base = CFI_BP; + cfa->base = CFI_BP; break; case ORC_REG_SP_INDIRECT: - *base = CFI_SP_INDIRECT; + cfa->base = CFI_SP_INDIRECT; break; case ORC_REG_R10: - *base = CFI_R10; + cfa->base = CFI_R10; break; case ORC_REG_R13: - *base = CFI_R13; + cfa->base = CFI_R13; break; case ORC_REG_DI: - *base = CFI_DI; + cfa->base = CFI_DI; break; case ORC_REG_DX: - *base = CFI_DX; + cfa->base = CFI_DX; break; default: return -1; diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 38070f2610..8b38b5d6fe 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -20,7 +20,7 @@ #include bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls; + validate_dup, vmlinux, mcount, noinstr, backup; static const char * const check_usage[] = { "objtool check [] file.o", @@ -45,7 +45,6 @@ const struct option check_options[] = { OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"), - OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"), OPT_END(), }; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 7c33ec67c4..81982948f9 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -5,7 +5,6 @@ #include #include -#include #include #include @@ -27,11 +26,7 @@ struct alternative { bool skip_orig; }; -static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache; - -static struct cfi_init_state initial_func_cfi; -static struct cfi_state init_cfi; -static struct cfi_state func_cfi; +struct cfi_init_state initial_func_cfi; struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset) @@ -168,16 +163,14 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "panic", "do_exit", "do_task_dead", - "kthread_exit", - "make_task_dead", - "__module_put_and_kthread_exit", - "kthread_complete_and_exit", + "__module_put_and_exit", + "complete_and_exit", "__reiserfs_panic", "lbug_with_loc", "fortify_panic", "usercopy_abort", "machine_real_restart", - "rewind_stack_and_make_dead", + "rewind_stack_do_exit", "kunit_try_catch_throw", "xen_start_kernel", "cpu_bringup_and_idle", @@ -273,78 +266,6 @@ static void init_insn_state(struct insn_state *state, struct section *sec) state->noinstr = sec->noinstr; } -static struct cfi_state *cfi_alloc(void) -{ - struct cfi_state *cfi = calloc(sizeof(struct cfi_state), 1); - if (!cfi) { - WARN("calloc failed"); - exit(1); - } - nr_cfi++; - return cfi; -} - -static int cfi_bits; -static struct hlist_head *cfi_hash; - -static inline bool cficmp(struct cfi_state *cfi1, struct cfi_state *cfi2) -{ - return memcmp((void *)cfi1 + sizeof(cfi1->hash), - (void *)cfi2 + sizeof(cfi2->hash), - sizeof(struct cfi_state) - sizeof(struct hlist_node)); -} - -static inline u32 cfi_key(struct cfi_state *cfi) -{ - return jhash((void *)cfi + sizeof(cfi->hash), - sizeof(*cfi) - sizeof(cfi->hash), 0); -} - -static struct cfi_state *cfi_hash_find_or_add(struct cfi_state *cfi) -{ - struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; - struct cfi_state *obj; - - hlist_for_each_entry(obj, head, hash) { - if (!cficmp(cfi, obj)) { - nr_cfi_cache++; - return obj; - } - } - - obj = cfi_alloc(); - *obj = *cfi; - hlist_add_head(&obj->hash, head); - - return obj; -} - -static void cfi_hash_add(struct cfi_state *cfi) -{ - struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; - - hlist_add_head(&cfi->hash, head); -} - -static void *cfi_hash_alloc(unsigned long size) -{ - cfi_bits = max(10, ilog2(size)); - cfi_hash = mmap(NULL, sizeof(struct hlist_head) << cfi_bits, - PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANON, -1, 0); - if (cfi_hash == (void *)-1L) { - WARN("mmap fail cfi_hash"); - cfi_hash = NULL; - } else if (stats) { - printf("cfi_bits: %d\n", cfi_bits); - } - - return cfi_hash; -} - -static unsigned long nr_insns; -static unsigned long nr_insns_visited; - /* * Call the arch-specific instruction decoder for all the instructions and add * them to the global instruction list. @@ -355,6 +276,7 @@ static int decode_instructions(struct objtool_file *file) struct symbol *func; unsigned long offset; struct instruction *insn; + unsigned long nr_insns = 0; int ret; for_each_sec(file, sec) { @@ -380,11 +302,12 @@ static int decode_instructions(struct objtool_file *file) memset(insn, 0, sizeof(*insn)); INIT_LIST_HEAD(&insn->alts); INIT_LIST_HEAD(&insn->stack_ops); + init_cfi_state(&insn->cfi); insn->sec = sec; insn->offset = offset; - ret = arch_decode_instruction(file, sec, offset, + ret = arch_decode_instruction(file->elf, sec, offset, sec->sh.sh_size - offset, &insn->len, &insn->type, &insn->immediate, @@ -422,82 +345,6 @@ static int decode_instructions(struct objtool_file *file) return ret; } -/* - * Read the pv_ops[] .data table to find the static initialized values. - */ -static int add_pv_ops(struct objtool_file *file, const char *symname) -{ - struct symbol *sym, *func; - unsigned long off, end; - struct reloc *rel; - int idx; - - sym = find_symbol_by_name(file->elf, symname); - if (!sym) - return 0; - - off = sym->offset; - end = off + sym->len; - for (;;) { - rel = find_reloc_by_dest_range(file->elf, sym->sec, off, end - off); - if (!rel) - break; - - func = rel->sym; - if (func->type == STT_SECTION) - func = find_symbol_by_offset(rel->sym->sec, rel->addend); - - idx = (rel->offset - sym->offset) / sizeof(unsigned long); - - objtool_pv_add(file, idx, func); - - off = rel->offset + 1; - if (off > end) - break; - } - - return 0; -} - -/* - * Allocate and initialize file->pv_ops[]. - */ -static int init_pv_ops(struct objtool_file *file) -{ - static const char *pv_ops_tables[] = { - "pv_ops", - "xen_cpu_ops", - "xen_irq_ops", - "xen_mmu_ops", - NULL, - }; - const char *pv_ops; - struct symbol *sym; - int idx, nr; - - if (!noinstr) - return 0; - - file->pv_ops = NULL; - - sym = find_symbol_by_name(file->elf, "pv_ops"); - if (!sym) - return 0; - - nr = sym->len / sizeof(unsigned long); - file->pv_ops = calloc(sizeof(struct pv_state), nr); - if (!file->pv_ops) - return -1; - - for (idx = 0; idx < nr; idx++) - INIT_LIST_HEAD(&file->pv_ops[idx].targets); - - for (idx = 0; (pv_ops = pv_ops_tables[idx]); idx++) - add_pv_ops(file, pv_ops); - - return 0; -} - static struct instruction *find_last_insn(struct objtool_file *file, struct section *sec) { @@ -685,52 +532,6 @@ static int create_static_call_sections(struct objtool_file *file) return 0; } -static int create_retpoline_sites_sections(struct objtool_file *file) -{ - struct instruction *insn; - struct section *sec; - int idx; - - sec = find_section_by_name(file->elf, ".retpoline_sites"); - if (sec) { - WARN("file already has .retpoline_sites, skipping"); - return 0; - } - - idx = 0; - list_for_each_entry(insn, &file->retpoline_call_list, call_node) - idx++; - - if (!idx) - return 0; - - sec = elf_create_section(file->elf, ".retpoline_sites", 0, - sizeof(int), idx); - if (!sec) { - WARN("elf_create_section: .retpoline_sites"); - return -1; - } - - idx = 0; - list_for_each_entry(insn, &file->retpoline_call_list, call_node) { - - int *site = (int *)sec->data->d_buf + idx; - *site = 0; - - if (elf_add_reloc_to_insn(file->elf, sec, - idx * sizeof(int), - R_X86_64_PC32, - insn->sec, insn->offset)) { - WARN("elf_add_reloc_to_insn: .retpoline_sites"); - return -1; - } - - idx++; - } - - return 0; -} - static int create_mcount_loc_sections(struct objtool_file *file) { struct section *sec; @@ -749,7 +550,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) return 0; idx = 0; - list_for_each_entry(insn, &file->mcount_loc_list, call_node) + list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) idx++; sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx); @@ -757,7 +558,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) return -1; idx = 0; - list_for_each_entry(insn, &file->mcount_loc_list, call_node) { + list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) { loc = (unsigned long *)sec->data->d_buf + idx; memset(loc, 0, sizeof(unsigned long)); @@ -851,10 +652,6 @@ static const char *uaccess_safe_builtin[] = { "__asan_report_store16_noabort", /* KCSAN */ "__kcsan_check_access", - "__kcsan_mb", - "__kcsan_wmb", - "__kcsan_rmb", - "__kcsan_release", "kcsan_found_watchpoint", "kcsan_setup_watchpoint", "kcsan_check_scoped_accesses", @@ -1021,9 +818,6 @@ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *i return NULL; if (!insn->reloc) { - if (!file) - return NULL; - insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec, insn->offset, insn->len); if (!insn->reloc) { @@ -1045,40 +839,27 @@ static void remove_insn_ops(struct instruction *insn) } } -static void annotate_call_site(struct objtool_file *file, - struct instruction *insn, bool sibling) +static void add_call_dest(struct objtool_file *file, struct instruction *insn, + struct symbol *dest, bool sibling) { struct reloc *reloc = insn_reloc(file, insn); - struct symbol *sym = insn->call_dest; - if (!sym) - sym = reloc->sym; - - /* - * Alternative replacement code is just template code which is - * sometimes copied to the original instruction. For now, don't - * annotate it. (In the future we might consider annotating the - * original instruction if/when it ever makes sense to do so.) - */ - if (!strcmp(insn->sec->name, ".altinstr_replacement")) + insn->call_dest = dest; + if (!dest) return; - if (sym->static_call_tramp) { - list_add_tail(&insn->call_node, &file->static_call_list); - return; - } - - if (sym->retpoline_thunk) { - list_add_tail(&insn->call_node, &file->retpoline_call_list); - return; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->call_node, + &file->static_call_list); } /* - * Many compilers cannot disable KCOV or sanitizer calls with a function - * attribute so they need a little help, NOP out any such calls from - * noinstr text. + * Many compilers cannot disable KCOV with a function attribute + * so they need a little help, NOP out any KCOV calls from noinstr + * text. */ - if (insn->sec->noinstr && sym->profiling_func) { + if (insn->sec->noinstr && + !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) { if (reloc) { reloc->type = R_NONE; elf_write_reloc(file->elf, reloc); @@ -1090,10 +871,9 @@ static void annotate_call_site(struct objtool_file *file, : arch_nop_insn(insn->len)); insn->type = sibling ? INSN_RETURN : INSN_NOP; - return; } - if (mcount && sym->fentry) { + if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) { if (sibling) WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset); @@ -1108,17 +888,9 @@ static void annotate_call_site(struct objtool_file *file, insn->type = INSN_NOP; - list_add_tail(&insn->call_node, &file->mcount_loc_list); - return; + list_add_tail(&insn->mcount_loc_node, + &file->mcount_loc_list); } -} - -static void add_call_dest(struct objtool_file *file, struct instruction *insn, - struct symbol *dest, bool sibling) -{ - insn->call_dest = dest; - if (!dest) - return; /* * Whatever stack impact regular CALLs have, should be undone @@ -1128,43 +900,8 @@ static void add_call_dest(struct objtool_file *file, struct instruction *insn, * are converted to JUMP, see read_intra_function_calls(). */ remove_insn_ops(insn); - - annotate_call_site(file, insn, sibling); } -static void add_retpoline_call(struct objtool_file *file, struct instruction *insn) -{ - /* - * Retpoline calls/jumps are really dynamic calls/jumps in disguise, - * so convert them accordingly. - */ - switch (insn->type) { - case INSN_CALL: - insn->type = INSN_CALL_DYNAMIC; - break; - case INSN_JUMP_UNCONDITIONAL: - insn->type = INSN_JUMP_DYNAMIC; - break; - case INSN_JUMP_CONDITIONAL: - insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; - break; - default: - return; - } - - insn->retpoline_safe = true; - - /* - * Whatever stack impact regular CALLs have, should be undone - * by the RETURN of the called function. - * - * Annotated intra-function calls retain the stack_ops but - * are converted to JUMP, see read_intra_function_calls(). - */ - remove_insn_ops(insn); - - annotate_call_site(file, insn, false); -} /* * Find the destination instructions for all jumps. */ @@ -1186,8 +923,20 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); - } else if (reloc->sym->retpoline_thunk) { - add_retpoline_call(file, insn); + } else if (arch_is_retpoline(reloc->sym)) { + /* + * Retpoline jumps are really dynamic jumps in + * disguise, so convert them accordingly. + */ + if (insn->type == INSN_JUMP_UNCONDITIONAL) + insn->type = INSN_JUMP_DYNAMIC; + else + insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; + + list_add_tail(&insn->call_node, + &file->retpoline_call_list); + + insn->retpoline_safe = true; continue; } else if (insn->func) { /* internal or external sibling call (with reloc) */ @@ -1315,8 +1064,19 @@ static int add_call_destinations(struct objtool_file *file) add_call_dest(file, insn, dest, false); - } else if (reloc->sym->retpoline_thunk) { - add_retpoline_call(file, insn); + } else if (arch_is_retpoline(reloc->sym)) { + /* + * Retpoline calls are really dynamic calls in + * disguise, so convert them accordingly. + */ + insn->type = INSN_CALL_DYNAMIC; + insn->retpoline_safe = true; + + list_add_tail(&insn->call_node, + &file->retpoline_call_list); + + remove_insn_ops(insn); + continue; } else add_call_dest(file, insn, reloc->sym, false); @@ -1387,6 +1147,7 @@ static int handle_group_alt(struct objtool_file *file, memset(nop, 0, sizeof(*nop)); INIT_LIST_HEAD(&nop->alts); INIT_LIST_HEAD(&nop->stack_ops); + init_cfi_state(&nop->cfi); nop->sec = special_alt->new_sec; nop->offset = special_alt->new_off + special_alt->new_len; @@ -1795,11 +1556,10 @@ static void set_func_state(struct cfi_state *state) static int read_unwind_hints(struct objtool_file *file) { - struct cfi_state cfi = init_cfi; struct section *sec, *relocsec; + struct reloc *reloc; struct unwind_hint *hint; struct instruction *insn; - struct reloc *reloc; int i; sec = find_section_by_name(file->elf, ".discard.unwind_hints"); @@ -1837,24 +1597,19 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; if (hint->type == UNWIND_HINT_TYPE_FUNC) { - insn->cfi = &func_cfi; + set_func_state(&insn->cfi); continue; } - if (insn->cfi) - cfi = *(insn->cfi); - - if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) { + if (arch_decode_hint_reg(insn, hint->sp_reg)) { WARN_FUNC("unsupported unwind_hint sp base reg %d", insn->sec, insn->offset, hint->sp_reg); return -1; } - cfi.cfa.offset = bswap_if_needed(hint->sp_offset); - cfi.type = hint->type; - cfi.end = hint->end; - - insn->cfi = cfi_hash_find_or_add(&cfi); + insn->cfi.cfa.offset = bswap_if_needed(hint->sp_offset); + insn->cfi.type = hint->type; + insn->cfi.end = hint->end; } return 0; @@ -1993,53 +1748,17 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } -/* - * Return true if name matches an instrumentation function, where calls to that - * function from noinstr code can safely be removed, but compilers won't do so. - */ -static bool is_profiling_func(const char *name) -{ - /* - * Many compilers cannot disable KCOV with a function attribute. - */ - if (!strncmp(name, "__sanitizer_cov_", 16)) - return true; - - /* - * Some compilers currently do not remove __tsan_func_entry/exit nor - * __tsan_atomic_signal_fence (used for barrier instrumentation) with - * the __no_sanitize_thread attribute, remove them. Once the kernel's - * minimum Clang version is 14.0, this can be removed. - */ - if (!strncmp(name, "__tsan_func_", 12) || - !strcmp(name, "__tsan_atomic_signal_fence")) - return true; - - return false; -} - -static int classify_symbols(struct objtool_file *file) +static int read_static_call_tramps(struct objtool_file *file) { struct section *sec; struct symbol *func; for_each_sec(file, sec) { list_for_each_entry(func, &sec->symbol_list, list) { - if (func->bind != STB_GLOBAL) - continue; - - if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + if (func->bind == STB_GLOBAL && + !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, strlen(STATIC_CALL_TRAMP_PREFIX_STR))) func->static_call_tramp = true; - - if (arch_is_retpoline(func)) - func->retpoline_thunk = true; - - if (!strcmp(func->name, "__fentry__")) - func->fentry = true; - - if (is_profiling_func(func->name)) - func->profiling_func = true; } } @@ -2072,16 +1791,17 @@ static void mark_rodata(struct objtool_file *file) file->rodata = found; } +__weak int arch_rewrite_retpolines(struct objtool_file *file) +{ + return 0; +} + static int decode_sections(struct objtool_file *file) { int ret; mark_rodata(file); - ret = init_pv_ops(file); - if (ret) - return ret; - ret = decode_instructions(file); if (ret) return ret; @@ -2100,7 +1820,7 @@ static int decode_sections(struct objtool_file *file) /* * Must be before add_{jump_call}_destination. */ - ret = classify_symbols(file); + ret = read_static_call_tramps(file); if (ret) return ret; @@ -2144,14 +1864,23 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be after add_special_section_alts(), since this will emit + * alternatives. Must be after add_{jump,call}_destination(), since + * those create the call insn lists. + */ + ret = arch_rewrite_retpolines(file); + if (ret) + return ret; + return 0; } static bool is_fentry_call(struct instruction *insn) { - if (insn->type == INSN_CALL && - insn->call_dest && - insn->call_dest->fentry) + if (insn->type == INSN_CALL && insn->call_dest && + insn->call_dest->type == STT_NOTYPE && + !strcmp(insn->call_dest->name, "__fentry__")) return true; return false; @@ -2734,18 +2463,13 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn if (!insn->alt_group) return 0; - if (!insn->cfi) { - WARN("CFI missing"); - return -1; - } - alt_cfi = insn->alt_group->cfi; group_off = insn->offset - insn->alt_group->first_insn->offset; if (!alt_cfi[group_off]) { - alt_cfi[group_off] = insn->cfi; + alt_cfi[group_off] = &insn->cfi; } else { - if (cficmp(alt_cfi[group_off], insn->cfi)) { + if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) { WARN_FUNC("stack layout conflict in alternatives", insn->sec, insn->offset); return -1; @@ -2796,14 +2520,9 @@ static int handle_insn_ops(struct instruction *insn, static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2) { - struct cfi_state *cfi1 = insn->cfi; + struct cfi_state *cfi1 = &insn->cfi; int i; - if (!cfi1) { - WARN("CFI missing"); - return false; - } - if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) { WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d", @@ -2854,64 +2573,20 @@ static inline bool func_uaccess_safe(struct symbol *func) static inline const char *call_dest_name(struct instruction *insn) { - static char pvname[19]; - struct reloc *rel; - int idx; - if (insn->call_dest) return insn->call_dest->name; - rel = insn_reloc(NULL, insn); - if (rel && !strcmp(rel->sym->name, "pv_ops")) { - idx = (rel->addend / sizeof(void *)); - snprintf(pvname, sizeof(pvname), "pv_ops[%d]", idx); - return pvname; - } - return "{dynamic}"; } -static bool pv_call_dest(struct objtool_file *file, struct instruction *insn) -{ - struct symbol *target; - struct reloc *rel; - int idx; - - rel = insn_reloc(file, insn); - if (!rel || strcmp(rel->sym->name, "pv_ops")) - return false; - - idx = (arch_dest_reloc_offset(rel->addend) / sizeof(void *)); - - if (file->pv_ops[idx].clean) - return true; - - file->pv_ops[idx].clean = true; - - list_for_each_entry(target, &file->pv_ops[idx].targets, pv_target) { - if (!target->sec->noinstr) { - WARN("pv_ops[%d]: %s", idx, target->name); - file->pv_ops[idx].clean = false; - } - } - - return file->pv_ops[idx].clean; -} - -static inline bool noinstr_call_dest(struct objtool_file *file, - struct instruction *insn, - struct symbol *func) +static inline bool noinstr_call_dest(struct symbol *func) { /* * We can't deal with indirect function calls at present; * assume they're instrumented. */ - if (!func) { - if (file->pv_ops) - return pv_call_dest(file, insn); - + if (!func) return false; - } /* * If the symbol is from a noinstr section; we good. @@ -2930,12 +2605,10 @@ static inline bool noinstr_call_dest(struct objtool_file *file, return false; } -static int validate_call(struct objtool_file *file, - struct instruction *insn, - struct insn_state *state) +static int validate_call(struct instruction *insn, struct insn_state *state) { if (state->noinstr && state->instr <= 0 && - !noinstr_call_dest(file, insn, insn->call_dest)) { + !noinstr_call_dest(insn->call_dest)) { WARN_FUNC("call to %s() leaves .noinstr.text section", insn->sec, insn->offset, call_dest_name(insn)); return 1; @@ -2956,9 +2629,7 @@ static int validate_call(struct objtool_file *file, return 0; } -static int validate_sibling_call(struct objtool_file *file, - struct instruction *insn, - struct insn_state *state) +static int validate_sibling_call(struct instruction *insn, struct insn_state *state) { if (has_modified_stack_frame(insn, state)) { WARN_FUNC("sibling call from callable instruction with modified stack frame", @@ -2966,7 +2637,7 @@ static int validate_sibling_call(struct objtool_file *file, return 1; } - return validate_call(file, insn, state); + return validate_call(insn, state); } static int validate_return(struct symbol *func, struct instruction *insn, struct insn_state *state) @@ -3036,7 +2707,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, struct instruction *insn, struct insn_state state) { struct alternative *alt; - struct instruction *next_insn, *prev_insn = NULL; + struct instruction *next_insn; struct section *sec; u8 visited; int ret; @@ -3065,25 +2736,15 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (insn->visited & visited) return 0; - } else { - nr_insns_visited++; } if (state.noinstr) state.instr += insn->instr; - if (insn->hint) { - state.cfi = *insn->cfi; - } else { - /* XXX track if we actually changed state.cfi */ - - if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) { - insn->cfi = prev_insn->cfi; - nr_cfi_reused++; - } else { - insn->cfi = cfi_hash_find_or_add(&state.cfi); - } - } + if (insn->hint) + state.cfi = insn->cfi; + else + insn->cfi = state.cfi; insn->visited |= visited; @@ -3115,17 +2776,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, switch (insn->type) { case INSN_RETURN: - if (next_insn && next_insn->type == INSN_TRAP) { - next_insn->ignore = true; - } else if (sls && !insn->retpoline_safe) { - WARN_FUNC("missing int3 after ret", - insn->sec, insn->offset); - } return validate_return(func, insn, &state); case INSN_CALL: case INSN_CALL_DYNAMIC: - ret = validate_call(file, insn, &state); + ret = validate_call(insn, &state); if (ret) return ret; @@ -3144,7 +2799,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, case INSN_JUMP_CONDITIONAL: case INSN_JUMP_UNCONDITIONAL: if (is_sibling_call(insn)) { - ret = validate_sibling_call(file, insn, &state); + ret = validate_sibling_call(insn, &state); if (ret) return ret; @@ -3164,17 +2819,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_JUMP_DYNAMIC: - if (next_insn && next_insn->type == INSN_TRAP) { - next_insn->ignore = true; - } else if (sls && !insn->retpoline_safe) { - WARN_FUNC("missing int3 after indirect jump", - insn->sec, insn->offset); - } - - /* fallthrough */ case INSN_JUMP_DYNAMIC_CONDITIONAL: if (is_sibling_call(insn)) { - ret = validate_sibling_call(file, insn, &state); + ret = validate_sibling_call(insn, &state); if (ret) return ret; } @@ -3247,7 +2894,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } - prev_insn = insn; insn = next_insn; } @@ -3274,7 +2920,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) } while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) { - if (insn->hint && !insn->visited && !insn->ignore) { + if (insn->hint && !insn->visited) { ret = validate_branch(file, insn->func, insn, state); if (ret && backtrace) BT_FUNC("<=== (hint)", insn); @@ -3341,19 +2987,20 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio return true; /* - * Ignore alternative replacement instructions. This can happen + * Ignore any unused exceptions. This can happen when a whitelisted + * function has an exception table entry. + * + * Also ignore alternative replacement instructions. This can happen * when a whitelisted function uses one of the ALTERNATIVE macros. */ - if (!strcmp(insn->sec->name, ".altinstr_replacement") || + if (!strcmp(insn->sec->name, ".fixup") || + !strcmp(insn->sec->name, ".altinstr_replacement") || !strcmp(insn->sec->name, ".altinstr_aux")) return true; if (!insn->func) return false; - if (insn->func->static_call_tramp) - return true; - /* * CONFIG_UBSAN_TRAP inserts a UD2 when it sees * __builtin_unreachable(). The BUG() macro has an unreachable() after @@ -3502,20 +3149,10 @@ int check(struct objtool_file *file) int ret, warnings = 0; arch_initial_func_cfi_state(&initial_func_cfi); - init_cfi_state(&init_cfi); - init_cfi_state(&func_cfi); - set_func_state(&func_cfi); - - if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) - goto out; - - cfi_hash_add(&init_cfi); - cfi_hash_add(&func_cfi); ret = decode_sections(file); if (ret < 0) goto out; - warnings += ret; if (list_empty(&file->insn_list)) @@ -3559,13 +3196,6 @@ int check(struct objtool_file *file) goto out; warnings += ret; - if (retpoline) { - ret = create_retpoline_sites_sections(file); - if (ret < 0) - goto out; - warnings += ret; - } - if (mcount) { ret = create_mcount_loc_sections(file); if (ret < 0) @@ -3573,13 +3203,6 @@ int check(struct objtool_file *file) warnings += ret; } - if (stats) { - printf("nr_insns_visited: %ld\n", nr_insns_visited); - printf("nr_cfi: %ld\n", nr_cfi); - printf("nr_cfi_reused: %ld\n", nr_cfi_reused); - printf("nr_cfi_cache: %ld\n", nr_cfi_cache); - } - out: /* * For now, don't fail the kernel build on fatal warnings. These diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 4b384c9070..fee03b744a 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -375,7 +375,6 @@ static int read_symbols(struct elf *elf) return -1; } memset(sym, 0, sizeof(*sym)); - INIT_LIST_HEAD(&sym->pv_target); sym->alias = sym; sym->idx = i; @@ -742,6 +741,90 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str) return len; } +struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) +{ + struct section *symtab, *symtab_shndx; + struct symbol *sym; + Elf_Data *data; + Elf_Scn *s; + + sym = malloc(sizeof(*sym)); + if (!sym) { + perror("malloc"); + return NULL; + } + memset(sym, 0, sizeof(*sym)); + + sym->name = strdup(name); + + sym->sym.st_name = elf_add_string(elf, NULL, sym->name); + if (sym->sym.st_name == -1) + return NULL; + + sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); + // st_other 0 + // st_shndx 0 + // st_value 0 + // st_size 0 + + symtab = find_section_by_name(elf, ".symtab"); + if (!symtab) { + WARN("can't find .symtab"); + return NULL; + } + + s = elf_getscn(elf->elf, symtab->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return NULL; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return NULL; + } + + data->d_buf = &sym->sym; + data->d_size = sizeof(sym->sym); + data->d_align = 1; + data->d_type = ELF_T_SYM; + + sym->idx = symtab->sh.sh_size / sizeof(sym->sym); + + symtab->sh.sh_size += data->d_size; + symtab->changed = true; + + symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); + if (symtab_shndx) { + s = elf_getscn(elf->elf, symtab_shndx->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return NULL; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return NULL; + } + + data->d_buf = &sym->sym.st_size; /* conveniently 0 */ + data->d_size = sizeof(Elf32_Word); + data->d_align = 4; + data->d_type = ELF_T_WORD; + + symtab_shndx->sh.sh_size += 4; + symtab_shndx->changed = true; + } + + sym->sec = find_section_by_index(elf, 0); + + elf_add_symbol(elf, sym); + + return sym; +} + struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr) { diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 76bae30782..478e054fcd 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -26,7 +26,6 @@ enum insn_type { INSN_CLAC, INSN_STD, INSN_CLD, - INSN_TRAP, INSN_OTHER, }; @@ -70,7 +69,7 @@ struct instruction; void arch_initial_func_cfi_state(struct cfi_init_state *state); -int arch_decode_instruction(struct objtool_file *file, const struct section *sec, +int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, unsigned long *immediate, @@ -85,7 +84,7 @@ unsigned long arch_dest_reloc_offset(int addend); const char *arch_nop_insn(int len); const char *arch_ret_insn(int len); -int arch_decode_hint_reg(u8 sp_reg, int *base); +int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); bool arch_is_retpoline(struct symbol *sym); diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 89ba869ed0..15ac0b7d3d 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -9,7 +9,7 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls; + validate_dup, vmlinux, mcount, noinstr, backup; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); diff --git a/tools/objtool/include/objtool/cfi.h b/tools/objtool/include/objtool/cfi.h index f11d1ac1da..fd5cb0bed9 100644 --- a/tools/objtool/include/objtool/cfi.h +++ b/tools/objtool/include/objtool/cfi.h @@ -7,7 +7,6 @@ #define _OBJTOOL_CFI_H #include -#include #define CFI_UNDEFINED -1 #define CFI_CFA -2 @@ -25,7 +24,6 @@ struct cfi_init_state { }; struct cfi_state { - struct hlist_node hash; /* must be first, cficmp() */ struct cfi_reg regs[CFI_NUM_REGS]; struct cfi_reg vals[CFI_NUM_REGS]; struct cfi_reg cfa; diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 6cfff07889..56d50bc50c 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -40,6 +40,7 @@ struct instruction { struct list_head list; struct hlist_node hash; struct list_head call_node; + struct list_head mcount_loc_node; struct section *sec; unsigned long offset; unsigned int len; @@ -59,7 +60,7 @@ struct instruction { struct list_head alts; struct symbol *func; struct list_head stack_ops; - struct cfi_state *cfi; + struct cfi_state cfi; }; static inline bool is_static_jump(struct instruction *insn) diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index d223367814..075d8291b8 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -54,12 +54,8 @@ struct symbol { unsigned long offset; unsigned int len; struct symbol *pfunc, *cfunc, *alias; - u8 uaccess_safe : 1; - u8 static_call_tramp : 1; - u8 retpoline_thunk : 1; - u8 fentry : 1; - u8 profiling_func : 1; - struct list_head pv_target; + bool uaccess_safe; + bool static_call_tramp; }; struct reloc { @@ -144,6 +140,7 @@ int elf_write_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int len, const char *insn); int elf_write_reloc(struct elf *elf, struct reloc *reloc); +struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name); int elf_write(struct elf *elf); void elf_close(struct elf *elf); diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index f99fbc6078..24fa83634d 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -14,11 +14,6 @@ #define __weak __attribute__((weak)) -struct pv_state { - bool clean; - struct list_head targets; -}; - struct objtool_file { struct elf *elf; struct list_head insn_list; @@ -30,14 +25,10 @@ struct objtool_file { unsigned long jl_short, jl_long; unsigned long jl_nop_short, jl_nop_long; - - struct pv_state *pv_ops; }; struct objtool_file *objtool_open_read(const char *_objname); -void objtool_pv_add(struct objtool_file *file, int idx, struct symbol *func); - int check(struct objtool_file *file); int orc_dump(const char *objname); int orc_create(struct objtool_file *file); diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index bdf699f655..e21db8bce4 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -135,32 +135,6 @@ struct objtool_file *objtool_open_read(const char *_objname) return &file; } -void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) -{ - if (!noinstr) - return; - - if (!f->pv_ops) { - WARN("paravirt confusion"); - return; - } - - /* - * These functions will be patched into native code, - * see paravirt_patch(). - */ - if (!strcmp(func->name, "_paravirt_nop") || - !strcmp(func->name, "_paravirt_ident_64")) - return; - - /* already added this function */ - if (!list_empty(&func->pv_target)) - return; - - list_add(&func->pv_target, &f->pv_ops[idx].targets); - f->pv_ops[idx].clean = false; -} - static void cmd_usage(void) { unsigned int i, longest = 0; diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index dd3c64af9d..b5865e2450 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -13,19 +13,13 @@ #include #include -static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, - struct instruction *insn) +static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi) { + struct instruction *insn = container_of(cfi, struct instruction, cfi); struct cfi_reg *bp = &cfi->regs[CFI_BP]; memset(orc, 0, sizeof(*orc)); - if (!cfi) { - orc->end = 0; - orc->sp_reg = ORC_REG_UNDEFINED; - return 0; - } - orc->end = cfi->end; if (cfi->cfa.base == CFI_UNDEFINED) { @@ -168,7 +162,7 @@ int orc_create(struct objtool_file *file) int i; if (!alt_group) { - if (init_orc_entry(&orc, insn->cfi, insn)) + if (init_orc_entry(&orc, &insn->cfi)) return -1; if (!memcmp(&prev_orc, &orc, sizeof(orc))) continue; @@ -192,8 +186,7 @@ int orc_create(struct objtool_file *file) struct cfi_state *cfi = alt_group->cfi[i]; if (!cfi) continue; - /* errors are reported on the original insn */ - if (init_orc_entry(&orc, cfi, insn)) + if (init_orc_entry(&orc, cfi)) return -1; if (!memcmp(&prev_orc, &orc, sizeof(orc))) continue; diff --git a/tools/objtool/special.c b/tools/objtool/special.c index e2223dd91c..06c3eacab3 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -109,6 +109,14 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, return -1; } + /* + * Skip retpoline .altinstr_replacement... we already rewrite the + * instructions for retpolines anyway, see arch_is_retpoline() + * usage in add_{call,jump}_destinations(). + */ + if (arch_is_retpoline(new_reloc->sym)) + return 1; + reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off); /* _ASM_EXTABLE_EX hack */ diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 20b8ab984d..8e0163b7ef 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -37,7 +37,6 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents feature/ -libbpf/ fixdep libtraceevent-dynamic-list Documentation/doc.dep diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index c52755481e..2d586fe5e4 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -20,7 +20,6 @@ L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding - A approximate IPC Z prefer to ignore timestamps (so-called "timeless" decoding) The default is all events i.e. the same as --itrace=ibxwpe, @@ -62,6 +61,5 @@ debug messages will or will not be logged. Each flag must be preceded by either '+' or '-'. The flags are: a all perf events - o output to stdout If supported, the 'q' option may be repeated to increase the effect. diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index 7e44b419d3..cd8ce6e8ec 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -74,15 +74,12 @@ OPTIONS used when creating a uprobe for a process that resides in a different mount namespace from the perf(1) utility. ---debuginfod[=URLs]:: +--debuginfod=URLs:: Specify debuginfod URL to be used when retrieving perf.data binaries, it follows the same syntax as the DEBUGINFOD_URLS variable, like: buildid-cache.debuginfod=http://192.168.122.174:8002 - If the URLs is not specified, the value of DEBUGINFOD_URLS - system environment variable is used. - SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1] diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 0420e71698..3bb75c1f25 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -587,15 +587,6 @@ record.*:: Use 'n' control blocks in asynchronous (Posix AIO) trace writing mode ('n' default: 1, max: 4). - record.debuginfod:: - Specify debuginfod URL to be used when cacheing perf.data binaries, - it follows the same syntax as the DEBUGINFOD_URLS variable, like: - - http://192.168.122.174:8002 - - If the URLs is 'system', the value of DEBUGINFOD_URLS system environment - variable is used. - diff.*:: diff.order:: This option sets the number of columns to sort the result. diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 0570a1ccd3..91108fe3ad 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -45,13 +45,6 @@ OPTIONS tasks slept. sched_switch contains a callchain where a task slept and sched_stat contains a timeslice how long a task slept. --k:: ---vmlinux=:: - vmlinux pathname - ---ignore-vmlinux:: - Ignore vmlinux files. - --kallsyms=:: kallsyms pathname diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index cbb920f5d0..db465fa7ee 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -157,17 +157,6 @@ of instructions and number of cycles since the last update, and thus represent the average IPC since the last IPC for that event type. Note IPC for "branches" events is calculated separately from IPC for "instructions" events. -Even with the 'cyc' config term, it is possible to produce IPC information for -every change of timestamp, but at the expense of accuracy. That is selected by -specifying the itrace 'A' option. Due to the granularity of timestamps, the -actual number of cycles increases even though the cycles reported does not. -The number of instructions is known, but if IPC is reported, cycles can be too -low and so IPC is too high. Note that inaccuracy decreases as the period of -sampling increases i.e. if the number of cycles is too low by a small amount, -that becomes less significant if the number of cycles is large. It may also be -useful to use the 'A' option in conjunction with dlfilter-show-cycles.so to -provide higher granularity cycle information. - Also note that the IPC instruction count may or may not include the current instruction. If the cycle count is associated with an asynchronous branch (e.g. page fault or interrupt), then the instruction count does not include the @@ -884,7 +873,6 @@ The letters are: L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding - A approximate IPC Z prefer to ignore timestamps (so-called "timeless" decoding) "Instructions" events look like they were recorded by "perf record -e @@ -953,7 +941,6 @@ by flags which affect what debug messages will or will not be logged. Each flag must be preceded by either '+' or '-'. The flags support by Intel PT are: -a Suppress logging of perf events +a Log all perf events - +o Output to stdout instead of "intel_pt.log" By default, logged perf events are filtered by any specified time ranges, but flag +a overrides that. @@ -1085,21 +1072,6 @@ The Z option is equivalent to having recorded a trace without TSC decoding a trace of a virtual machine. -dlfilter-show-cycles.so -~~~~~~~~~~~~~~~~~~~~~~~ - -Cycles can be displayed using dlfilter-show-cycles.so in which case the itrace A -option can be useful to provide higher granularity cycle information: - - perf script --itrace=A --call-trace --dlfilter dlfilter-show-cycles.so - -To see a list of dlfilters: - - perf script -v --list-dlfilters - -See also linkperf:perf-dlfilters[1] - - dump option ~~~~~~~~~~~ @@ -1172,12 +1144,7 @@ Recording is selected by using the aux-output config term e.g. perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname -Originally, software only supported redirecting at most one PEBS event because it -was not able to differentiate one event from another. To overcome that, more recent -kernels and perf tools add support for the PERF_RECORD_AUX_OUTPUT_HW_ID side-band event. -To check for the presence of that event in a PEBS-via-PT trace: - - perf script -D --no-itrace | grep PERF_RECORD_AUX_OUTPUT_HW_ID +Note that currently, software only supports redirecting at most one PEBS event. To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt index f378ac5935..85b8ac695c 100644 --- a/tools/perf/Documentation/perf-kmem.txt +++ b/tools/perf/Documentation/perf-kmem.txt @@ -8,25 +8,22 @@ perf-kmem - Tool to trace/measure kernel memory properties SYNOPSIS -------- [verse] -'perf kmem' [] {record|stat} +'perf kmem' {record|stat} [] DESCRIPTION ----------- There are two variants of perf kmem: - 'perf kmem [] record [] ' to - record the kmem events of an arbitrary workload. Additional 'perf - record' options may be specified after record, such as '-o' to - change the output file name. + 'perf kmem record ' to record the kmem events + of an arbitrary workload. - 'perf kmem [] stat' to report kernel memory statistics. + 'perf kmem stat' to report kernel memory statistics. OPTIONS ------- -i :: --input=:: - For stat, select the input file (default: perf.data unless stdin is a - fifo) + Select the input file (default: perf.data unless stdin is a fifo) -f:: --force:: diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 57384a97c0..4c7db1da8f 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -39,10 +39,6 @@ any extra expressions computed by perf stat. --deprecated:: Print deprecated events. By default the deprecated events are hidden. ---cputype:: -Print events applying cpu with this type for hybrid platform -(e.g. --cputype core or --cputype atom) - [[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- @@ -81,11 +77,7 @@ On AMD systems it is implemented using IBS (up to precise-level 2). The precise modifier works with event types 0x76 (cpu-cycles, CPU clocks not halted) and 0xC1 (micro-ops retired). Both events map to IBS execution sampling (IBS op) with the IBS Op Counter Control bit -(IbsOpCntCtl) set respectively (see the -Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS) -section of the [AMD Processor Programming Reference (PPR)] relevant to the -family, model and stepping of the processor being used). - +(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s Manual Volume 2: System Programming, 13.3 Instruction-Based Sampling). Examples to use IBS: @@ -98,12 +90,10 @@ RAW HARDWARE EVENT DESCRIPTOR Even when an event is not available in a symbolic form within perf right now, it can be encoded in a per processor specific way. -For instance on x86 CPUs, N is a hexadecimal value that represents the raw register encoding with the +For instance For x86 CPUs NNN represents the raw register encoding with the layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout -of IA32_PERFEVTSELx MSRs) or AMD's PERF_CTL MSRs (see the -Core Complex (CCX) -> Processor x86 Core -> MSR Registers section of the -[AMD Processor Programming Reference (PPR)] relevant to the family, model -and stepping of the processor being used). +of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344, +Figure 13-7 Performance Event-Select Register (PerfEvtSeln)). Note: Only the following bit fields can be set in x86 counter registers: event, umask, edge, inv, cmask. Esp. guest/host only and @@ -132,38 +122,6 @@ It's also possible to use pmu syntax: perf record -e cpu/r1a8/ ... perf record -e cpu/r0x1a8/ ... -Some processors, like those from AMD, support event codes and unit masks -larger than a byte. In such cases, the bits corresponding to the event -configuration parameters can be seen with: - - cat /sys/bus/event_source/devices//format/ - -Example: - -If the AMD docs for an EPYC 7713 processor describe an event as: - - Event Umask Event Mask - Num. Value Mnemonic Description - - 28FH 03H op_cache_hit_miss.op_cache_hit Counts Op Cache micro-tag - hit events. - -raw encoding of 0x0328F cannot be used since the upper nibble of the -EventSelect bits have to be specified via bits 32-35 as can be seen with: - - cat /sys/bus/event_source/devices/cpu/format/event - -raw encoding of 0x20000038F should be used instead: - - perf stat -e r20000038f -a sleep 1 - perf record -e r20000038f ... - -It's also possible to use pmu syntax: - - perf record -e r20000038f -a sleep 1 - perf record -e cpu/r20000038f/ ... - perf record -e cpu/r0x20000038f/ ... - You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. @@ -354,4 +312,4 @@ SEE ALSO linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], -https://bugzilla.kernel.org/show_bug.cgi?id=206537[AMD Processor Programming Reference (PPR)] +http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 9ccc75935b..f1079ee7f2 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -30,10 +30,8 @@ OPTIONS - a symbolic event name (use 'perf list' to list all events) - - a raw PMU event in the form of rN where N is a hexadecimal value - that represents the raw register encoding with the layout of the - event control registers as described by entries in - /sys/bus/event_sources/devices/cpu/format/*. + - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. - a symbolic or raw PMU event followed by an optional colon and a list of event modifiers, e.g., cpu-cycles:p. See the @@ -471,7 +469,7 @@ This option sets the time out limit. The default value is 500 ms. --switch-events:: Record context switch events i.e. events of type PERF_RECORD_SWITCH or -PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT, CoreSight or Arm SPE) +PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight) switch events will be enabled automatically, which can be suppressed by by the option --no-switch-events. @@ -598,22 +596,6 @@ options. 'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj in config file is set to true. ---synth=TYPE:: -Collect and synthesize given type of events (comma separated). Note that -this option controls the synthesis from the /proc filesystem which represent -task status for pre-existing threads. - -Kernel (and some other) events are recorded regardless of the -choice in this option. For example, --synth=no would have MMAP events for -kernel and modules. - -Available types are: - 'task' - synthesize FORK and COMM events for each task - 'mmap' - synthesize MMAP events for each process (implies 'task') - 'cgroup' - synthesize CGROUP events for each cgroup - 'all' - synthesize all events (default) - 'no' - do not synthesize any of the above events - --tail-synthesize:: Instead of collecting non-sample events (for example, fork, comm, mmap) at the beginning of record, collect them during finalizing an output file. @@ -715,15 +697,6 @@ measurements: include::intel-hybrid.txt[] ---debuginfod[=URLs]:: - Specify debuginfod URL to be used when cacheing perf.data binaries, - it follows the same syntax as the DEBUGINFOD_URLS variable, like: - - http://192.168.122.174:8002 - - If the URLs is not specified, the value of DEBUGINFOD_URLS - system environment variable is used. - SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index b007071878..c805152435 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -130,7 +130,7 @@ OPTIONS comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr, - metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat. + metric, misc, srccode, ipc, data_page_size, code_page_size. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index c06c341e72..7e6fb7cbc0 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -36,10 +36,8 @@ report:: - a symbolic event name (use 'perf list' to list all events) - - a raw PMU event in the form of rN where N is a hexadecimal value - that represents the raw register encoding with the layout of the - event control registers as described by entries in - /sys/bus/event_sources/devices/cpu/format/*. + - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. - a symbolic or raw PMU event followed by an optional colon and a list of event modifiers, e.g., cpu-cycles:p. See the @@ -495,10 +493,6 @@ This option can be enabled in perf config by setting the variable $ perf config stat.no-csv-summary=true ---cputype:: -Only enable events on applying cpu with this type for hybrid platform -(e.g. core or atom)" - EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index cac3dfbee7..9898a32b8d 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -38,10 +38,9 @@ Default is to monitor all CPUS. -e :: --event=:: Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU event in the form - of rN where N is a hexadecimal value that represents the raw register - encoding with the layout of the event control registers as described - by entries in /sys/bus/event_sources/devices/cpu/format/*. + (use 'perf list' to list all events) or a raw PMU + event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. -E :: --entries=:: diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index f56d0e0fbf..e6ff8c898a 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -346,7 +346,7 @@ to special needs. HEADER_BPF_PROG_INFO = 25, -struct perf_bpil, which contains detailed information about +struct bpf_prog_info_linear, which contains detailed information about a BPF program, including type, id, tag, jited/xlated instructions, etc. HEADER_BPF_BTF = 26, diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index f5d72f936a..f05c4d48fd 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -17,11 +17,7 @@ tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/find_bit.c tools/lib/bitmap.c -tools/lib/list_sort.c tools/lib/str_error_r.c tools/lib/vsprintf.c tools/lib/zalloc.c scripts/bpf_doc.py -tools/bpf/bpftool -kernel/bpf/disasm.c -kernel/bpf/disasm.h diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 96ad944ca6..71772b20ea 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -17,7 +17,6 @@ detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) -HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) include $(srctree)/tools/scripts/Makefile.arch @@ -215,7 +214,6 @@ endif ifneq ($(WERROR),0) CORE_CFLAGS += -Werror CXXFLAGS += -Werror - HOSTCFLAGS += -Werror endif ifndef DEBUG @@ -288,16 +286,13 @@ CORE_CFLAGS += -Wall CORE_CFLAGS += -Wextra CORE_CFLAGS += -std=gnu99 -CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti +CXXFLAGS += -std=gnu++11 -fno-exceptions -fno-rtti CXXFLAGS += -Wall CXXFLAGS += -fno-omit-frame-pointer CXXFLAGS += -ggdb3 CXXFLAGS += -funwind-tables CXXFLAGS += -Wno-strict-aliasing -HOSTCFLAGS += -Wall -HOSTCFLAGS += -Wextra - # Enforce a non-executable stack, as we may regress (again) in the future by # adding assembler files missing the .GNU-stack linker note. LDFLAGS += -Wl,-z,noexecstack @@ -1016,9 +1011,6 @@ ifndef NO_AUXTRACE ifndef NO_AUXTRACE $(call detected,CONFIG_AUXTRACE) CFLAGS += -DHAVE_AUXTRACE_SUPPORT - ifeq ($(feature-reallocarray), 0) - CFLAGS += -DCOMPAT_NEED_REALLOCARRAY - endif endif endif @@ -1102,32 +1094,11 @@ ifdef LIBTRACEEVENT_DYNAMIC $(call feature_check,libtraceevent) ifeq ($(feature-libtraceevent), 1) EXTLIBS += -ltraceevent - LIBTRACEEVENT_VERSION := $(shell $(PKG_CONFIG) --modversion libtraceevent) - LIBTRACEEVENT_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEEVENT_VERSION))) - LIBTRACEEVENT_VERSION_2 := $(word 2, $(subst ., ,$(LIBTRACEEVENT_VERSION))) - LIBTRACEEVENT_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEEVENT_VERSION))) - LIBTRACEEVENT_VERSION_CPP := $(shell expr $(LIBTRACEEVENT_VERSION_1) \* 255 \* 255 + $(LIBTRACEEVENT_VERSION_2) \* 255 + $(LIBTRACEEVENT_VERSION_3)) - CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP) else dummy := $(error Error: No libtraceevent devel library found, please install libtraceevent-devel); endif endif -ifdef LIBTRACEFS_DYNAMIC - $(call feature_check,libtracefs) - ifeq ($(feature-libtracefs), 1) - EXTLIBS += -ltracefs - LIBTRACEFS_VERSION := $(shell $(PKG_CONFIG) --modversion libtracefs) - LIBTRACEFS_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEFS_VERSION))) - LIBTRACEFS_VERSION_2 := $(word 2, $(subst ., ,$(LIBTRACEFS_VERSION))) - LIBTRACEFS_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEFS_VERSION))) - LIBTRACEFS_VERSION_CPP := $(shell expr $(LIBTRACEFS_VERSION_1) \* 255 \* 255 + $(LIBTRACEFS_VERSION_2) \* 255 + $(LIBTRACEFS_VERSION_3)) - CFLAGS += -DLIBTRACEFS_VERSION=$(LIBTRACEFS_VERSION_CPP) - else - dummy := $(error Error: No libtracefs devel library found, please install libtracefs-dev); - endif -endif - # Among the variables below, these: # perfexecdir # perf_include_dir diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index ac861e42c8..b856afa6eb 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -130,8 +130,6 @@ include ../scripts/utilities.mak # # Define LIBTRACEEVENT_DYNAMIC to enable libtraceevent dynamic linking # -# Define LIBTRACEFS_DYNAMIC to enable libtracefs dynamic linking -# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -226,7 +224,7 @@ else endif export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK -export HOSTCC HOSTLD HOSTAR HOSTCFLAGS +export HOSTCC HOSTLD HOSTAR include $(srctree)/tools/build/Makefile.include @@ -243,7 +241,7 @@ else # force_fixdep LIB_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ -LIBBPF_DIR = $(srctree)/tools/lib/bpf/ +BPF_DIR = $(srctree)/tools/lib/bpf/ SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ DOC_DIR = $(srctree)/tools/perf/Documentation/ @@ -295,6 +293,7 @@ strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) PLUGINS_PATH=$(OUTPUT) + BPF_PATH=$(OUTPUT) SUBCMD_PATH=$(OUTPUT) LIBPERF_PATH=$(OUTPUT) ifneq ($(subdir),) @@ -306,6 +305,7 @@ else TE_PATH=$(TRACE_EVENT_DIR) PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ API_PATH=$(LIB_DIR) + BPF_PATH=$(BPF_DIR) SUBCMD_PATH=$(SUBCMD_DIR) LIBPERF_PATH=$(LIBPERF_DIR) endif @@ -324,14 +324,7 @@ LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DY LIBAPI = $(API_PATH)libapi.a export LIBAPI -ifneq ($(OUTPUT),) - LIBBPF_OUTPUT = $(abspath $(OUTPUT))/libbpf -else - LIBBPF_OUTPUT = $(CURDIR)/libbpf -endif -LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) -LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include -LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a +LIBBPF = $(BPF_PATH)libbpf.a LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a @@ -367,7 +360,7 @@ ifndef NO_JVMTI PROGRAMS += $(OUTPUT)$(LIBJVMTI) endif -DLFILTERS := dlfilter-test-api-v0.so dlfilter-show-cycles.so +DLFILTERS := dlfilter-test-api-v0.so DLFILTERS := $(patsubst %,$(OUTPUT)dlfilters/%,$(DLFILTERS)) # what 'all' will build and 'install' will install, in perfexecdir @@ -516,17 +509,17 @@ kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh $(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl) $(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@ -socket_arrays := $(beauty_outdir)/socket.c +socket_ipproto_array := $(beauty_outdir)/socket_ipproto_array.c +socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh + +$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl) + $(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@ + +socket_arrays := $(beauty_outdir)/socket_arrays.c socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh -$(socket_arrays): $(linux_uapi_dir)/in.h $(beauty_linux_dir)/socket.h $(socket_tbl) - $(Q)$(SHELL) '$(socket_tbl)' $(linux_uapi_dir) $(beauty_linux_dir) > $@ - -sockaddr_arrays := $(beauty_outdir)/sockaddr.c -sockaddr_tbl := $(srctree)/tools/perf/trace/beauty/sockaddr.sh - -$(sockaddr_arrays): $(beauty_linux_dir)/socket.h $(sockaddr_tbl) - $(Q)$(SHELL) '$(sockaddr_tbl)' $(beauty_linux_dir) > $@ +$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl) + $(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@ vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux @@ -736,8 +729,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(sndrv_ctl_ioctl_array) \ $(kcmp_type_array) \ $(kvm_ioctl_array) \ + $(socket_ipproto_array) \ $(socket_arrays) \ - $(sockaddr_arrays) \ $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ $(mmap_flags_array) \ @@ -836,14 +829,12 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) - $(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \ - O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ - $@ install_headers +$(LIBBPF): FORCE + $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) - $(Q)$(RM) -r -- $(LIBBPF_OUTPUT) + $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null $(LIBPERF): FORCE $(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a @@ -1041,17 +1032,18 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h -SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h - -$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): - $(Q)$(MKDIR) -p $@ +SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool -BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) +LIBBPF_SRC := $(abspath ../lib/bpf) +BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(BPF_PATH) -I$(LIBBPF_SRC)/.. + +$(SKEL_TMP_OUT): + $(Q)$(MKDIR) -p $@ $(BPFTOOL): | $(SKEL_TMP_OUT) - $(Q)CFLAGS= $(MAKE) -C ../bpf/bpftool \ + CFLAGS= $(MAKE) -C ../bpf/bpftool \ OUTPUT=$(SKEL_TMP_OUT)/ bootstrap VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ @@ -1113,8 +1105,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(sndrv_pcm_ioctl_array) \ $(OUTPUT)$(kvm_ioctl_array) \ $(OUTPUT)$(kcmp_type_array) \ + $(OUTPUT)$(socket_ipproto_array) \ $(OUTPUT)$(socket_arrays) \ - $(OUTPUT)$(sockaddr_arrays) \ $(OUTPUT)$(vhost_virtio_ioctl_array) \ $(OUTPUT)$(perf_ioctl_array) \ $(OUTPUT)$(prctl_option_array) \ diff --git a/tools/perf/arch/arm/include/arch-tests.h b/tools/perf/arch/arm/include/arch-tests.h index 452b3d9045..c625380524 100644 --- a/tools/perf/arch/arm/include/arch-tests.h +++ b/tools/perf/arch/arm/include/arch-tests.h @@ -2,6 +2,6 @@ #ifndef ARCH_TESTS_H #define ARCH_TESTS_H -extern struct test_suite *arch_tests[]; +extern struct test arch_tests[]; #endif diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h index 99a06550e2..4085419283 100644 --- a/tools/perf/arch/arm/include/perf_regs.h +++ b/tools/perf/arch/arm/include/perf_regs.h @@ -15,4 +15,46 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM_PC #define PERF_REG_SP PERF_REG_ARM_SP +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_ARM_R0: + return "r0"; + case PERF_REG_ARM_R1: + return "r1"; + case PERF_REG_ARM_R2: + return "r2"; + case PERF_REG_ARM_R3: + return "r3"; + case PERF_REG_ARM_R4: + return "r4"; + case PERF_REG_ARM_R5: + return "r5"; + case PERF_REG_ARM_R6: + return "r6"; + case PERF_REG_ARM_R7: + return "r7"; + case PERF_REG_ARM_R8: + return "r8"; + case PERF_REG_ARM_R9: + return "r9"; + case PERF_REG_ARM_R10: + return "r10"; + case PERF_REG_ARM_FP: + return "fp"; + case PERF_REG_ARM_IP: + return "ip"; + case PERF_REG_ARM_SP: + return "sp"; + case PERF_REG_ARM_LR: + return "lr"; + case PERF_REG_ARM_PC: + return "pc"; + default: + return NULL; + } + + return NULL; +} + #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/arm/tests/arch-tests.c b/tools/perf/arch/arm/tests/arch-tests.c index 69561111cc..6848101a85 100644 --- a/tools/perf/arch/arm/tests/arch-tests.c +++ b/tools/perf/arch/arm/tests/arch-tests.c @@ -3,10 +3,18 @@ #include "tests/tests.h" #include "arch-tests.h" -struct test_suite *arch_tests[] = { +struct test arch_tests[] = { #ifdef HAVE_DWARF_UNWIND_SUPPORT - &suite__dwarf_unwind, + { + .desc = "DWARF unwind", + .func = test__dwarf_unwind, + }, #endif - &suite__vectors_page, - NULL, + { + .desc = "Vectors page", + .func = test__vectors_page, + }, + { + .func = NULL, + }, }; diff --git a/tools/perf/arch/arm/tests/vectors-page.c b/tools/perf/arch/arm/tests/vectors-page.c index 55a8358374..7ffdd79971 100644 --- a/tools/perf/arch/arm/tests/vectors-page.c +++ b/tools/perf/arch/arm/tests/vectors-page.c @@ -9,7 +9,8 @@ #define VECTORS__MAP_NAME "[vectors]" -static int test__vectors_page(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__vectors_page(struct test *test __maybe_unused, + int subtest __maybe_unused) { void *start, *end; @@ -21,5 +22,3 @@ static int test__vectors_page(struct test_suite *test __maybe_unused, int subtes return TEST_OK; } - -DEFINE_SUITE("Vectors page", vectors_page); diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 2e8b2c4365..293a23bf8b 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -203,11 +203,9 @@ static int cs_etm_set_option(struct auxtrace_record *itr, struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); /* Set option of each CPU we have */ - for (i = 0; i < cpu__max_cpu().cpu; i++) { - struct perf_cpu cpu = { .cpu = i, }; - - if (!perf_cpu_map__has(event_cpus, cpu) || - !perf_cpu_map__has(online_cpus, cpu)) + for (i = 0; i < cpu__max_cpu(); i++) { + if (!cpu_map__has(event_cpus, i) || + !cpu_map__has(online_cpus, i)) continue; if (option & BIT(ETM_OPT_CTXTID)) { @@ -409,6 +407,25 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, } + /* Validate auxtrace_mmap_pages provided by user */ + if (opts->auxtrace_mmap_pages) { + unsigned int max_page = (KiB(128) / page_size); + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + + if (!privileged && + opts->auxtrace_mmap_pages > max_page) { + opts->auxtrace_mmap_pages = max_page; + pr_err("auxtrace too big, truncating to %d\n", + max_page); + } + + if (!is_power_of_2(sz)) { + pr_err("Invalid mmap size for %s: must be a power of 2\n", + CORESIGHT_ETM_PMU_NAME); + return -EINVAL; + } + } + if (opts->auxtrace_snapshot_mode) pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME, opts->auxtrace_snapshot_size); @@ -524,11 +541,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, /* cpu map is not empty, we have specific CPUs to work with */ if (!perf_cpu_map__empty(event_cpus)) { - for (i = 0; i < cpu__max_cpu().cpu; i++) { - struct perf_cpu cpu = { .cpu = i, }; - - if (!perf_cpu_map__has(event_cpus, cpu) || - !perf_cpu_map__has(online_cpus, cpu)) + for (i = 0; i < cpu__max_cpu(); i++) { + if (!cpu_map__has(event_cpus, i) || + !cpu_map__has(online_cpus, i)) continue; if (cs_etm_is_ete(itr, i)) @@ -540,10 +555,8 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, } } else { /* get configuration for all CPUs in the system */ - for (i = 0; i < cpu__max_cpu().cpu; i++) { - struct perf_cpu cpu = { .cpu = i, }; - - if (!perf_cpu_map__has(online_cpus, cpu)) + for (i = 0; i < cpu__max_cpu(); i++) { + if (!cpu_map__has(online_cpus, i)) continue; if (cs_etm_is_ete(itr, i)) @@ -728,10 +741,8 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, } else { /* Make sure all specified CPUs are online */ for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) { - struct perf_cpu cpu = { .cpu = i, }; - - if (perf_cpu_map__has(event_cpus, cpu) && - !perf_cpu_map__has(online_cpus, cpu)) + if (cpu_map__has(event_cpus, i) && + !cpu_map__has(online_cpus, i)) return -EINVAL; } @@ -751,12 +762,9 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, offset = CS_ETM_SNAPSHOT + 1; - for (i = 0; i < cpu__max_cpu().cpu && offset < priv_size; i++) { - struct perf_cpu cpu = { .cpu = i, }; - - if (perf_cpu_map__has(cpu_map, cpu)) + for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++) + if (cpu_map__has(cpu_map, i)) cs_etm_get_metadata(i, &offset, itr, info); - } perf_cpu_map__put(online_cpus); diff --git a/tools/perf/arch/arm64/include/arch-tests.h b/tools/perf/arch/arm64/include/arch-tests.h index 452b3d9045..c625380524 100644 --- a/tools/perf/arch/arm64/include/arch-tests.h +++ b/tools/perf/arch/arm64/include/arch-tests.h @@ -2,6 +2,6 @@ #ifndef ARCH_TESTS_H #define ARCH_TESTS_H -extern struct test_suite *arch_tests[]; +extern struct test arch_tests[]; #endif diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h index 35a3cc775b..fa3e07459f 100644 --- a/tools/perf/arch/arm64/include/perf_regs.h +++ b/tools/perf/arch/arm64/include/perf_regs.h @@ -4,9 +4,7 @@ #include #include -#define perf_event_arm_regs perf_event_arm64_regs #include -#undef perf_event_arm_regs void perf_regs_load(u64 *regs); @@ -17,4 +15,80 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM64_PC #define PERF_REG_SP PERF_REG_ARM64_SP +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_ARM64_X0: + return "x0"; + case PERF_REG_ARM64_X1: + return "x1"; + case PERF_REG_ARM64_X2: + return "x2"; + case PERF_REG_ARM64_X3: + return "x3"; + case PERF_REG_ARM64_X4: + return "x4"; + case PERF_REG_ARM64_X5: + return "x5"; + case PERF_REG_ARM64_X6: + return "x6"; + case PERF_REG_ARM64_X7: + return "x7"; + case PERF_REG_ARM64_X8: + return "x8"; + case PERF_REG_ARM64_X9: + return "x9"; + case PERF_REG_ARM64_X10: + return "x10"; + case PERF_REG_ARM64_X11: + return "x11"; + case PERF_REG_ARM64_X12: + return "x12"; + case PERF_REG_ARM64_X13: + return "x13"; + case PERF_REG_ARM64_X14: + return "x14"; + case PERF_REG_ARM64_X15: + return "x15"; + case PERF_REG_ARM64_X16: + return "x16"; + case PERF_REG_ARM64_X17: + return "x17"; + case PERF_REG_ARM64_X18: + return "x18"; + case PERF_REG_ARM64_X19: + return "x19"; + case PERF_REG_ARM64_X20: + return "x20"; + case PERF_REG_ARM64_X21: + return "x21"; + case PERF_REG_ARM64_X22: + return "x22"; + case PERF_REG_ARM64_X23: + return "x23"; + case PERF_REG_ARM64_X24: + return "x24"; + case PERF_REG_ARM64_X25: + return "x25"; + case PERF_REG_ARM64_X26: + return "x26"; + case PERF_REG_ARM64_X27: + return "x27"; + case PERF_REG_ARM64_X28: + return "x28"; + case PERF_REG_ARM64_X29: + return "x29"; + case PERF_REG_ARM64_SP: + return "sp"; + case PERF_REG_ARM64_LR: + return "lr"; + case PERF_REG_ARM64_PC: + return "pc"; + default: + return NULL; + } + + return NULL; +} + #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/arm64/tests/arch-tests.c b/tools/perf/arch/arm64/tests/arch-tests.c index ad16b4f8f6..5b1543c980 100644 --- a/tools/perf/arch/arm64/tests/arch-tests.c +++ b/tools/perf/arch/arm64/tests/arch-tests.c @@ -3,9 +3,14 @@ #include "tests/tests.h" #include "arch-tests.h" -struct test_suite *arch_tests[] = { +struct test arch_tests[] = { #ifdef HAVE_DWARF_UNWIND_SUPPORT - &suite__dwarf_unwind, + { + .desc = "DWARF unwind", + .func = test__dwarf_unwind, + }, #endif - NULL, + { + .func = NULL, + }, }; diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 2100d46ccf..a4420d4df5 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -23,7 +23,6 @@ #include "../../../util/auxtrace.h" #include "../../../util/record.h" #include "../../../util/arm-spe.h" -#include // reallocarray #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -32,8 +31,6 @@ struct arm_spe_recording { struct auxtrace_record itr; struct perf_pmu *arm_spe_pmu; struct evlist *evlist; - int wrapped_cnt; - bool *wrapped; }; static void arm_spe_set_timestamp(struct auxtrace_record *itr, @@ -87,55 +84,6 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, return 0; } -static void -arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts, - bool privileged) -{ - /* - * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor - * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for - * unprivileged users. - * - * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for - * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages - * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the - * user is likely to get an error as they exceed their mlock limmit. - */ - - /* - * No size were given to '-S' or '-m,', so go with the default - */ - if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { - if (privileged) { - opts->auxtrace_mmap_pages = MiB(4) / page_size; - } else { - opts->auxtrace_mmap_pages = KiB(128) / page_size; - if (opts->mmap_pages == UINT_MAX) - opts->mmap_pages = KiB(256) / page_size; - } - } else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) { - opts->mmap_pages = KiB(256) / page_size; - } - - /* - * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the - * auxtrace mmap area. - */ - if (!opts->auxtrace_snapshot_size) - opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size; - - /* - * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big - * enough to fit the requested snapshot size. - */ - if (!opts->auxtrace_mmap_pages) { - size_t sz = opts->auxtrace_snapshot_size; - - sz = round_up(sz, page_size) / page_size; - opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); - } -} - static int arm_spe_recording_options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts) @@ -167,36 +115,6 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, if (!opts->full_auxtrace) return 0; - /* - * we are in snapshot mode. - */ - if (opts->auxtrace_snapshot_mode) { - /* - * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with - * default values. - */ - if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) - arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged); - - /* - * Snapshot size can't be bigger than the auxtrace area. - */ - if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) { - pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", - opts->auxtrace_snapshot_size, - opts->auxtrace_mmap_pages * (size_t)page_size); - return -EINVAL; - } - - /* - * Something went wrong somewhere - this shouldn't happen. - */ - if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { - pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); - return -EINVAL; - } - } - /* We are in full trace mode but '-m,xyz' wasn't specified */ if (!opts->auxtrace_mmap_pages) { if (privileged) { @@ -220,9 +138,6 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, } } - if (opts->auxtrace_snapshot_mode) - pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME, - opts->auxtrace_snapshot_size); /* * To obtain the auxtrace buffer file descriptor, the auxtrace event @@ -251,199 +166,8 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, tracking_evsel->core.attr.sample_period = 1; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__empty(cpus)) evsel__set_sample_bit(tracking_evsel, TIME); - evsel__set_sample_bit(tracking_evsel, CPU); - - /* also track task context switch */ - if (!record_opts__no_switch_events(opts)) - tracking_evsel->core.attr.context_switch = 1; - } - - return 0; -} - -static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, - struct record_opts *opts, - const char *str) -{ - unsigned long long snapshot_size = 0; - char *endptr; - - if (str) { - snapshot_size = strtoull(str, &endptr, 0); - if (*endptr || snapshot_size > SIZE_MAX) - return -1; - } - - opts->auxtrace_snapshot_mode = true; - opts->auxtrace_snapshot_size = snapshot_size; - - return 0; -} - -static int arm_spe_snapshot_start(struct auxtrace_record *itr) -{ - struct arm_spe_recording *ptr = - container_of(itr, struct arm_spe_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->arm_spe_pmu->type) - return evsel__disable(evsel); - } - return -EINVAL; -} - -static int arm_spe_snapshot_finish(struct auxtrace_record *itr) -{ - struct arm_spe_recording *ptr = - container_of(itr, struct arm_spe_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->arm_spe_pmu->type) - return evsel__enable(evsel); - } - return -EINVAL; -} - -static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx) -{ - bool *wrapped; - int cnt = ptr->wrapped_cnt, new_cnt, i; - - /* - * No need to allocate, so return early. - */ - if (idx < cnt) - return 0; - - /* - * Make ptr->wrapped as big as idx. - */ - new_cnt = idx + 1; - - /* - * Free'ed in arm_spe_recording_free(). - */ - wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool)); - if (!wrapped) - return -ENOMEM; - - /* - * init new allocated values. - */ - for (i = cnt; i < new_cnt; i++) - wrapped[i] = false; - - ptr->wrapped_cnt = new_cnt; - ptr->wrapped = wrapped; - - return 0; -} - -static bool arm_spe_buffer_has_wrapped(unsigned char *buffer, - size_t buffer_size, u64 head) -{ - u64 i, watermark; - u64 *buf = (u64 *)buffer; - size_t buf_size = buffer_size; - - /* - * Defensively handle the case where head might be continually increasing - if its value is - * equal or greater than the size of the ring buffer, then we can safely determine it has - * wrapped around. Otherwise, continue to detect if head might have wrapped. - */ - if (head >= buffer_size) - return true; - - /* - * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer. - */ - watermark = buf_size - 512; - - /* - * The value of head is somewhere within the size of the ring buffer. This can be that there - * hasn't been enough data to fill the ring buffer yet or the trace time was so long that - * head has numerically wrapped around. To find we need to check if we have data at the - * very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed - * out and there is a fresh mapping with every new session. - */ - - /* - * head is less than 512 byte from the end of the ring buffer. - */ - if (head > watermark) - watermark = head; - - /* - * Speed things up by using 64 bit transactions (see "u64 *buf" above) - */ - watermark /= sizeof(u64); - buf_size /= sizeof(u64); - - /* - * If we find trace data at the end of the ring buffer, head has been there and has - * numerically wrapped around at least once. - */ - for (i = watermark; i < buf_size; i++) - if (buf[i]) - return true; - - return false; -} - -static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx, - struct auxtrace_mmap *mm, unsigned char *data, - u64 *head, u64 *old) -{ - int err; - bool wrapped; - struct arm_spe_recording *ptr = - container_of(itr, struct arm_spe_recording, itr); - - /* - * Allocate memory to keep track of wrapping if this is the first - * time we deal with this *mm. - */ - if (idx >= ptr->wrapped_cnt) { - err = arm_spe_alloc_wrapped_array(ptr, idx); - if (err) - return err; - } - - /* - * Check to see if *head has wrapped around. If it hasn't only the - * amount of data between *head and *old is snapshot'ed to avoid - * bloating the perf.data file with zeros. But as soon as *head has - * wrapped around the entire size of the AUX ring buffer it taken. - */ - wrapped = ptr->wrapped[idx]; - if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) { - wrapped = true; - ptr->wrapped[idx] = true; - } - - pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", - __func__, idx, (size_t)*old, (size_t)*head, mm->len); - - /* - * No wrap has occurred, we can just use *head and *old. - */ - if (!wrapped) - return 0; - - /* - * *head has wrapped around - adjust *head and *old to pickup the - * entire content of the AUX buffer. - */ - if (*head >= mm->len) { - *old = *head - mm->len; - } else { - *head += mm->len; - *old = *head - mm->len; - } return 0; } @@ -462,7 +186,6 @@ static void arm_spe_recording_free(struct auxtrace_record *itr) struct arm_spe_recording *sper = container_of(itr, struct arm_spe_recording, itr); - free(sper->wrapped); free(sper); } @@ -484,10 +207,6 @@ struct auxtrace_record *arm_spe_recording_init(int *err, sper->arm_spe_pmu = arm_spe_pmu; sper->itr.pmu = arm_spe_pmu; - sper->itr.snapshot_start = arm_spe_snapshot_start; - sper->itr.snapshot_finish = arm_spe_snapshot_finish; - sper->itr.find_snapshot = arm_spe_find_snapshot; - sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options; sper->itr.recording_options = arm_spe_recording_options; sper->itr.info_priv_size = arm_spe_info_priv_size; sper->itr.info_fill = arm_spe_info_fill; diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index d2ce31e28c..7e7714290a 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -5,8 +5,6 @@ #include #include "debug.h" #include "symbol.h" -#include "callchain.h" -#include "record.h" /* On arm64, kernel text segment starts at high memory address, * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory @@ -28,8 +26,3 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) p->end = c->start; pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); } - -void arch__add_leaf_frame_record_opts(struct record_opts *opts) -{ - opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask; -} diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index 79124bba71..2234fbd0a9 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -3,7 +3,7 @@ #include "../../../util/cpumap.h" #include "../../../util/pmu.h" -const struct pmu_events_map *pmu_events_map__find(void) +struct pmu_events_map *pmu_events_map__find(void) { struct perf_pmu *pmu = NULL; @@ -15,7 +15,7 @@ const struct pmu_events_map *pmu_events_map__find(void) * The cpumap should cover all CPUs. Otherwise, some CPUs may * not support some events or have different event IDs. */ - if (pmu->cpus->nr != cpu__max_cpu().cpu) + if (pmu->cpus->nr != cpu__max_cpu()) return NULL; return perf_pmu__find_map(pmu); diff --git a/tools/perf/arch/csky/include/perf_regs.h b/tools/perf/arch/csky/include/perf_regs.h index 1afcc0e916..25ac3bdcb9 100644 --- a/tools/perf/arch/csky/include/perf_regs.h +++ b/tools/perf/arch/csky/include/perf_regs.h @@ -15,4 +15,86 @@ #define PERF_REG_IP PERF_REG_CSKY_PC #define PERF_REG_SP PERF_REG_CSKY_SP +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_CSKY_A0: + return "a0"; + case PERF_REG_CSKY_A1: + return "a1"; + case PERF_REG_CSKY_A2: + return "a2"; + case PERF_REG_CSKY_A3: + return "a3"; + case PERF_REG_CSKY_REGS0: + return "regs0"; + case PERF_REG_CSKY_REGS1: + return "regs1"; + case PERF_REG_CSKY_REGS2: + return "regs2"; + case PERF_REG_CSKY_REGS3: + return "regs3"; + case PERF_REG_CSKY_REGS4: + return "regs4"; + case PERF_REG_CSKY_REGS5: + return "regs5"; + case PERF_REG_CSKY_REGS6: + return "regs6"; + case PERF_REG_CSKY_REGS7: + return "regs7"; + case PERF_REG_CSKY_REGS8: + return "regs8"; + case PERF_REG_CSKY_REGS9: + return "regs9"; + case PERF_REG_CSKY_SP: + return "sp"; + case PERF_REG_CSKY_LR: + return "lr"; + case PERF_REG_CSKY_PC: + return "pc"; +#if defined(__CSKYABIV2__) + case PERF_REG_CSKY_EXREGS0: + return "exregs0"; + case PERF_REG_CSKY_EXREGS1: + return "exregs1"; + case PERF_REG_CSKY_EXREGS2: + return "exregs2"; + case PERF_REG_CSKY_EXREGS3: + return "exregs3"; + case PERF_REG_CSKY_EXREGS4: + return "exregs4"; + case PERF_REG_CSKY_EXREGS5: + return "exregs5"; + case PERF_REG_CSKY_EXREGS6: + return "exregs6"; + case PERF_REG_CSKY_EXREGS7: + return "exregs7"; + case PERF_REG_CSKY_EXREGS8: + return "exregs8"; + case PERF_REG_CSKY_EXREGS9: + return "exregs9"; + case PERF_REG_CSKY_EXREGS10: + return "exregs10"; + case PERF_REG_CSKY_EXREGS11: + return "exregs11"; + case PERF_REG_CSKY_EXREGS12: + return "exregs12"; + case PERF_REG_CSKY_EXREGS13: + return "exregs13"; + case PERF_REG_CSKY_EXREGS14: + return "exregs14"; + case PERF_REG_CSKY_TLS: + return "tls"; + case PERF_REG_CSKY_HI: + return "hi"; + case PERF_REG_CSKY_LO: + return "lo"; +#endif + default: + return NULL; + } + + return NULL; +} + #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 3f1886ad9d..1ca7bc3379 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -363,5 +363,3 @@ 446 n64 landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 n64 process_mrelease sys_process_mrelease -449 n64 futex_waitv sys_futex_waitv -450 common set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/tools/perf/arch/mips/include/perf_regs.h b/tools/perf/arch/mips/include/perf_regs.h index b8cd8bbb37..ee73b36a14 100644 --- a/tools/perf/arch/mips/include/perf_regs.h +++ b/tools/perf/arch/mips/include/perf_regs.h @@ -12,4 +12,73 @@ #define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1) +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_MIPS_PC: + return "PC"; + case PERF_REG_MIPS_R1: + return "$1"; + case PERF_REG_MIPS_R2: + return "$2"; + case PERF_REG_MIPS_R3: + return "$3"; + case PERF_REG_MIPS_R4: + return "$4"; + case PERF_REG_MIPS_R5: + return "$5"; + case PERF_REG_MIPS_R6: + return "$6"; + case PERF_REG_MIPS_R7: + return "$7"; + case PERF_REG_MIPS_R8: + return "$8"; + case PERF_REG_MIPS_R9: + return "$9"; + case PERF_REG_MIPS_R10: + return "$10"; + case PERF_REG_MIPS_R11: + return "$11"; + case PERF_REG_MIPS_R12: + return "$12"; + case PERF_REG_MIPS_R13: + return "$13"; + case PERF_REG_MIPS_R14: + return "$14"; + case PERF_REG_MIPS_R15: + return "$15"; + case PERF_REG_MIPS_R16: + return "$16"; + case PERF_REG_MIPS_R17: + return "$17"; + case PERF_REG_MIPS_R18: + return "$18"; + case PERF_REG_MIPS_R19: + return "$19"; + case PERF_REG_MIPS_R20: + return "$20"; + case PERF_REG_MIPS_R21: + return "$21"; + case PERF_REG_MIPS_R22: + return "$22"; + case PERF_REG_MIPS_R23: + return "$23"; + case PERF_REG_MIPS_R24: + return "$24"; + case PERF_REG_MIPS_R25: + return "$25"; + case PERF_REG_MIPS_R28: + return "$28"; + case PERF_REG_MIPS_R29: + return "$29"; + case PERF_REG_MIPS_R30: + return "$30"; + case PERF_REG_MIPS_R31: + return "$31"; + default: + break; + } + return NULL; +} + #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 2600b42372..7bef917cc8 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -528,5 +528,3 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease -449 common futex_waitv sys_futex_waitv -450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/tools/perf/arch/powerpc/include/arch-tests.h b/tools/perf/arch/powerpc/include/arch-tests.h index 452b3d9045..c625380524 100644 --- a/tools/perf/arch/powerpc/include/arch-tests.h +++ b/tools/perf/arch/powerpc/include/arch-tests.h @@ -2,6 +2,6 @@ #ifndef ARCH_TESTS_H #define ARCH_TESTS_H -extern struct test_suite *arch_tests[]; +extern struct test arch_tests[]; #endif diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index 9bb17c3f37..04e5dc07e9 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -19,4 +19,68 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_POWERPC_NIP #define PERF_REG_SP PERF_REG_POWERPC_R1 +static const char *reg_names[] = { + [PERF_REG_POWERPC_R0] = "r0", + [PERF_REG_POWERPC_R1] = "r1", + [PERF_REG_POWERPC_R2] = "r2", + [PERF_REG_POWERPC_R3] = "r3", + [PERF_REG_POWERPC_R4] = "r4", + [PERF_REG_POWERPC_R5] = "r5", + [PERF_REG_POWERPC_R6] = "r6", + [PERF_REG_POWERPC_R7] = "r7", + [PERF_REG_POWERPC_R8] = "r8", + [PERF_REG_POWERPC_R9] = "r9", + [PERF_REG_POWERPC_R10] = "r10", + [PERF_REG_POWERPC_R11] = "r11", + [PERF_REG_POWERPC_R12] = "r12", + [PERF_REG_POWERPC_R13] = "r13", + [PERF_REG_POWERPC_R14] = "r14", + [PERF_REG_POWERPC_R15] = "r15", + [PERF_REG_POWERPC_R16] = "r16", + [PERF_REG_POWERPC_R17] = "r17", + [PERF_REG_POWERPC_R18] = "r18", + [PERF_REG_POWERPC_R19] = "r19", + [PERF_REG_POWERPC_R20] = "r20", + [PERF_REG_POWERPC_R21] = "r21", + [PERF_REG_POWERPC_R22] = "r22", + [PERF_REG_POWERPC_R23] = "r23", + [PERF_REG_POWERPC_R24] = "r24", + [PERF_REG_POWERPC_R25] = "r25", + [PERF_REG_POWERPC_R26] = "r26", + [PERF_REG_POWERPC_R27] = "r27", + [PERF_REG_POWERPC_R28] = "r28", + [PERF_REG_POWERPC_R29] = "r29", + [PERF_REG_POWERPC_R30] = "r30", + [PERF_REG_POWERPC_R31] = "r31", + [PERF_REG_POWERPC_NIP] = "nip", + [PERF_REG_POWERPC_MSR] = "msr", + [PERF_REG_POWERPC_ORIG_R3] = "orig_r3", + [PERF_REG_POWERPC_CTR] = "ctr", + [PERF_REG_POWERPC_LINK] = "link", + [PERF_REG_POWERPC_XER] = "xer", + [PERF_REG_POWERPC_CCR] = "ccr", + [PERF_REG_POWERPC_SOFTE] = "softe", + [PERF_REG_POWERPC_TRAP] = "trap", + [PERF_REG_POWERPC_DAR] = "dar", + [PERF_REG_POWERPC_DSISR] = "dsisr", + [PERF_REG_POWERPC_SIER] = "sier", + [PERF_REG_POWERPC_MMCRA] = "mmcra", + [PERF_REG_POWERPC_MMCR0] = "mmcr0", + [PERF_REG_POWERPC_MMCR1] = "mmcr1", + [PERF_REG_POWERPC_MMCR2] = "mmcr2", + [PERF_REG_POWERPC_MMCR3] = "mmcr3", + [PERF_REG_POWERPC_SIER2] = "sier2", + [PERF_REG_POWERPC_SIER3] = "sier3", + [PERF_REG_POWERPC_PMC1] = "pmc1", + [PERF_REG_POWERPC_PMC2] = "pmc2", + [PERF_REG_POWERPC_PMC3] = "pmc3", + [PERF_REG_POWERPC_PMC4] = "pmc4", + [PERF_REG_POWERPC_PMC5] = "pmc5", + [PERF_REG_POWERPC_PMC6] = "pmc6", +}; + +static inline const char *__perf_reg_name(int id) +{ + return reg_names[id]; +} #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/powerpc/tests/arch-tests.c b/tools/perf/arch/powerpc/tests/arch-tests.c index eb98c57b5a..8c3fbd4af8 100644 --- a/tools/perf/arch/powerpc/tests/arch-tests.c +++ b/tools/perf/arch/powerpc/tests/arch-tests.c @@ -3,10 +3,14 @@ #include "tests/tests.h" #include "arch-tests.h" - -struct test_suite *arch_tests[] = { +struct test arch_tests[] = { #ifdef HAVE_DWARF_UNWIND_SUPPORT - &suite__dwarf_unwind, + { + .desc = "Test dwarf unwind", + .func = test__dwarf_unwind, + }, #endif - NULL, + { + .func = NULL, + }, }; diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index cf430a4c55..3bf4412574 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -40,12 +40,8 @@ const char *arch_perf_header_entry(const char *se_header) { if (!strcmp(se_header, "Local INSTR Latency")) return "Finish Cyc"; - else if (!strcmp(se_header, "INSTR Latency")) - return "Global Finish_cyc"; - else if (!strcmp(se_header, "Local Pipeline Stage Cycle")) - return "Dispatch Cyc"; else if (!strcmp(se_header, "Pipeline Stage Cycle")) - return "Global Dispatch_cyc"; + return "Dispatch Cyc"; return se_header; } @@ -53,7 +49,5 @@ int arch_support_sort_key(const char *sort_key) { if (!strcmp(sort_key, "p_stage_cyc")) return 1; - if (!strcmp(sort_key, "local_p_stage_cyc")) - return 1; return 0; } diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index e8fe36b10d..58b2d610aa 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -40,7 +40,7 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused) return bufp; } -int arch_get_runtimeparam(const struct pmu_event *pe) +int arch_get_runtimeparam(struct pmu_event *pe) { int count; char path[PATH_MAX] = "/devices/hv_24x7/interface/"; diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index 1a9b40ea92..16510686c1 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -113,11 +113,10 @@ static int is_tracepoint_available(const char *str, struct evlist *evlist) struct parse_events_error err; int ret; - parse_events_error__init(&err); + bzero(&err, sizeof(err)); ret = parse_events(evlist, str, &err); if (err.str) - parse_events_error__print(&err, "tracepoint"); - parse_events_error__exit(&err); + parse_events_print_error(&err, "tracepoint"); return ret; } diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index 8d07a78e74..8116a253f9 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -74,8 +74,6 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(pmc4, PERF_REG_POWERPC_PMC4), SMPL_REG(pmc5, PERF_REG_POWERPC_PMC5), SMPL_REG(pmc6, PERF_REG_POWERPC_PMC6), - SMPL_REG(sdar, PERF_REG_POWERPC_SDAR), - SMPL_REG(siar, PERF_REG_POWERPC_SIAR), SMPL_REG_END }; diff --git a/tools/perf/arch/riscv/include/perf_regs.h b/tools/perf/arch/riscv/include/perf_regs.h index 6944bf0de5..6b02a767c9 100644 --- a/tools/perf/arch/riscv/include/perf_regs.h +++ b/tools/perf/arch/riscv/include/perf_regs.h @@ -19,4 +19,78 @@ #define PERF_REG_IP PERF_REG_RISCV_PC #define PERF_REG_SP PERF_REG_RISCV_SP +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_RISCV_PC: + return "pc"; + case PERF_REG_RISCV_RA: + return "ra"; + case PERF_REG_RISCV_SP: + return "sp"; + case PERF_REG_RISCV_GP: + return "gp"; + case PERF_REG_RISCV_TP: + return "tp"; + case PERF_REG_RISCV_T0: + return "t0"; + case PERF_REG_RISCV_T1: + return "t1"; + case PERF_REG_RISCV_T2: + return "t2"; + case PERF_REG_RISCV_S0: + return "s0"; + case PERF_REG_RISCV_S1: + return "s1"; + case PERF_REG_RISCV_A0: + return "a0"; + case PERF_REG_RISCV_A1: + return "a1"; + case PERF_REG_RISCV_A2: + return "a2"; + case PERF_REG_RISCV_A3: + return "a3"; + case PERF_REG_RISCV_A4: + return "a4"; + case PERF_REG_RISCV_A5: + return "a5"; + case PERF_REG_RISCV_A6: + return "a6"; + case PERF_REG_RISCV_A7: + return "a7"; + case PERF_REG_RISCV_S2: + return "s2"; + case PERF_REG_RISCV_S3: + return "s3"; + case PERF_REG_RISCV_S4: + return "s4"; + case PERF_REG_RISCV_S5: + return "s5"; + case PERF_REG_RISCV_S6: + return "s6"; + case PERF_REG_RISCV_S7: + return "s7"; + case PERF_REG_RISCV_S8: + return "s8"; + case PERF_REG_RISCV_S9: + return "s9"; + case PERF_REG_RISCV_S10: + return "s10"; + case PERF_REG_RISCV_S11: + return "s11"; + case PERF_REG_RISCV_T3: + return "t3"; + case PERF_REG_RISCV_T4: + return "t4"; + case PERF_REG_RISCV_T5: + return "t5"; + case PERF_REG_RISCV_T6: + return "t6"; + default: + return NULL; + } + + return NULL; +} + #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 799147658d..df5261e5cf 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -451,5 +451,3 @@ 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease sys_process_mrelease -449 common futex_waitv sys_futex_waitv sys_futex_waitv -450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h index 52fcc0891d..ce30315266 100644 --- a/tools/perf/arch/s390/include/perf_regs.h +++ b/tools/perf/arch/s390/include/perf_regs.h @@ -14,4 +14,82 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_S390_PC #define PERF_REG_SP PERF_REG_S390_R15 +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_S390_R0: + return "R0"; + case PERF_REG_S390_R1: + return "R1"; + case PERF_REG_S390_R2: + return "R2"; + case PERF_REG_S390_R3: + return "R3"; + case PERF_REG_S390_R4: + return "R4"; + case PERF_REG_S390_R5: + return "R5"; + case PERF_REG_S390_R6: + return "R6"; + case PERF_REG_S390_R7: + return "R7"; + case PERF_REG_S390_R8: + return "R8"; + case PERF_REG_S390_R9: + return "R9"; + case PERF_REG_S390_R10: + return "R10"; + case PERF_REG_S390_R11: + return "R11"; + case PERF_REG_S390_R12: + return "R12"; + case PERF_REG_S390_R13: + return "R13"; + case PERF_REG_S390_R14: + return "R14"; + case PERF_REG_S390_R15: + return "R15"; + case PERF_REG_S390_FP0: + return "FP0"; + case PERF_REG_S390_FP1: + return "FP1"; + case PERF_REG_S390_FP2: + return "FP2"; + case PERF_REG_S390_FP3: + return "FP3"; + case PERF_REG_S390_FP4: + return "FP4"; + case PERF_REG_S390_FP5: + return "FP5"; + case PERF_REG_S390_FP6: + return "FP6"; + case PERF_REG_S390_FP7: + return "FP7"; + case PERF_REG_S390_FP8: + return "FP8"; + case PERF_REG_S390_FP9: + return "FP9"; + case PERF_REG_S390_FP10: + return "FP10"; + case PERF_REG_S390_FP11: + return "FP11"; + case PERF_REG_S390_FP12: + return "FP12"; + case PERF_REG_S390_FP13: + return "FP13"; + case PERF_REG_S390_FP14: + return "FP14"; + case PERF_REG_S390_FP15: + return "FP15"; + case PERF_REG_S390_MASK: + return "MASK"; + case PERF_REG_S390_PC: + return "PC"; + default: + return NULL; + } + + return NULL; +} + #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 305872692b..24ea12ec7e 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -144,31 +144,8 @@ static struct ins x86__instructions[] = { { .name = "xorps", .ops = &mov_ops, }, }; -static bool amd__ins_is_fused(struct arch *arch, const char *ins1, +static bool x86__ins_is_fused(struct arch *arch, const char *ins1, const char *ins2) -{ - if (strstr(ins2, "jmp")) - return false; - - /* Family >= 15h supports cmp/test + branch fusion */ - if (arch->family >= 0x15 && (strstarts(ins1, "test") || - (strstarts(ins1, "cmp") && !strstr(ins1, "xchg")))) { - return true; - } - - /* Family >= 19h supports some ALU + branch fusion */ - if (arch->family >= 0x19 && (strstarts(ins1, "add") || - strstarts(ins1, "sub") || strstarts(ins1, "and") || - strstarts(ins1, "inc") || strstarts(ins1, "dec") || - strstarts(ins1, "or") || strstarts(ins1, "xor"))) { - return true; - } - - return false; -} - -static bool intel__ins_is_fused(struct arch *arch, const char *ins1, - const char *ins2) { if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp")) return false; @@ -207,9 +184,6 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid) if (ret == 3) { arch->family = family; arch->model = model; - arch->ins_is_fused = strstarts(cpuid, "AuthenticAMD") ? - amd__ins_is_fused : - intel__ins_is_fused; return 0; } diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index c84d12608c..18b5500ea8 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -370,8 +370,6 @@ 446 common landlock_restrict_self sys_landlock_restrict_self 447 common memfd_secret sys_memfd_secret 448 common process_mrelease sys_process_mrelease -449 common futex_waitv sys_futex_waitv -450 common set_mempolicy_home_node sys_set_mempolicy_home_node # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 6a1a1b3c08..9599e7a3f1 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -2,15 +2,15 @@ #ifndef ARCH_TESTS_H #define ARCH_TESTS_H -struct test_suite; +struct test; /* Tests */ -int test__rdpmc(struct test_suite *test, int subtest); -int test__insn_x86(struct test_suite *test, int subtest); -int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest); -int test__bp_modify(struct test_suite *test, int subtest); -int test__x86_sample_parsing(struct test_suite *test, int subtest); +int test__rdpmc(struct test *test, int subtest); +int test__insn_x86(struct test *test, int subtest); +int test__intel_pt_pkt_decoder(struct test *test, int subtest); +int test__bp_modify(struct test *test, int subtest); +int test__x86_sample_parsing(struct test *test, int subtest); -extern struct test_suite *arch_tests[]; +extern struct test arch_tests[]; #endif diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index 16e23b7220..cddc4cdc0d 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -23,4 +23,86 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_X86_IP #define PERF_REG_SP PERF_REG_X86_SP +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_X86_AX: + return "AX"; + case PERF_REG_X86_BX: + return "BX"; + case PERF_REG_X86_CX: + return "CX"; + case PERF_REG_X86_DX: + return "DX"; + case PERF_REG_X86_SI: + return "SI"; + case PERF_REG_X86_DI: + return "DI"; + case PERF_REG_X86_BP: + return "BP"; + case PERF_REG_X86_SP: + return "SP"; + case PERF_REG_X86_IP: + return "IP"; + case PERF_REG_X86_FLAGS: + return "FLAGS"; + case PERF_REG_X86_CS: + return "CS"; + case PERF_REG_X86_SS: + return "SS"; + case PERF_REG_X86_DS: + return "DS"; + case PERF_REG_X86_ES: + return "ES"; + case PERF_REG_X86_FS: + return "FS"; + case PERF_REG_X86_GS: + return "GS"; +#ifdef HAVE_ARCH_X86_64_SUPPORT + case PERF_REG_X86_R8: + return "R8"; + case PERF_REG_X86_R9: + return "R9"; + case PERF_REG_X86_R10: + return "R10"; + case PERF_REG_X86_R11: + return "R11"; + case PERF_REG_X86_R12: + return "R12"; + case PERF_REG_X86_R13: + return "R13"; + case PERF_REG_X86_R14: + return "R14"; + case PERF_REG_X86_R15: + return "R15"; +#endif /* HAVE_ARCH_X86_64_SUPPORT */ + +#define XMM(x) \ + case PERF_REG_X86_XMM ## x: \ + case PERF_REG_X86_XMM ## x + 1: \ + return "XMM" #x; + XMM(0) + XMM(1) + XMM(2) + XMM(3) + XMM(4) + XMM(5) + XMM(6) + XMM(7) + XMM(8) + XMM(9) + XMM(10) + XMM(11) + XMM(12) + XMM(13) + XMM(14) + XMM(15) +#undef XMM + default: + return NULL; + } + + return NULL; +} + #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 64fb73d14d..71aa67367a 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -3,28 +3,39 @@ #include "tests/tests.h" #include "arch-tests.h" -DEFINE_SUITE("x86 rdpmc", rdpmc); -#ifdef HAVE_AUXTRACE_SUPPORT -DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86); -DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder); -#endif -#if defined(__x86_64__) -DEFINE_SUITE("x86 bp modify", bp_modify); -#endif -DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing); - -struct test_suite *arch_tests[] = { - &suite__rdpmc, +struct test arch_tests[] = { + { + .desc = "x86 rdpmc", + .func = test__rdpmc, + }, #ifdef HAVE_DWARF_UNWIND_SUPPORT - &suite__dwarf_unwind, + { + .desc = "DWARF unwind", + .func = test__dwarf_unwind, + }, #endif #ifdef HAVE_AUXTRACE_SUPPORT - &suite__insn_x86, - &suite__intel_pt_pkt_decoder, + { + .desc = "x86 instruction decoder - new instructions", + .func = test__insn_x86, + }, + { + .desc = "Intel PT packet decoder", + .func = test__intel_pt_pkt_decoder, + }, #endif #if defined(__x86_64__) - &suite__bp_modify, + { + .desc = "x86 bp modify", + .func = test__bp_modify, + }, #endif - &suite__x86_sample_parsing, - NULL, + { + .desc = "x86 Sample parsing", + .func = test__x86_sample_parsing, + }, + { + .func = NULL, + }, + }; diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c index 0924ccd9e3..dffcf9b521 100644 --- a/tools/perf/arch/x86/tests/bp-modify.c +++ b/tools/perf/arch/x86/tests/bp-modify.c @@ -204,7 +204,7 @@ static int bp_modify2(void) return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL; } -int test__bp_modify(struct test_suite *test __maybe_unused, +int test__bp_modify(struct test *test __maybe_unused, int subtest __maybe_unused) { TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1()); diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c index 94b490c434..0262b0d8cc 100644 --- a/tools/perf/arch/x86/tests/insn-x86.c +++ b/tools/perf/arch/x86/tests/insn-x86.c @@ -173,7 +173,7 @@ static int test_data_set(struct test_data *dat_set, int x86_64) * verbose (-v) option to see all the instructions and whether or not they * decoded successfully. */ -int test__insn_x86(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__insn_x86(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret = 0; diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index cb5b2c6c3b..27dd8cf9e0 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -37,7 +37,7 @@ static pid_t spawn(void) * the last read counter value to avoid triggering a WARN_ON_ONCE() in * smp_call_function_many() caused by sending IPIs from NMI context. */ -int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused) { struct evlist *evlist = NULL; struct evsel *evsel = NULL; diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c index 2fc882ab24..c933e3dcd0 100644 --- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c +++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c @@ -289,7 +289,7 @@ static int test_one(struct test_data *d) * This test feeds byte sequences to the Intel PT packet decoder and checks the * results. Changes to the packet context are also checked. */ -int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__intel_pt_pkt_decoder(struct test *test __maybe_unused, int subtest __maybe_unused) { struct test_data *d = data; int ret; diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 498413ad9c..1ea916656a 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -157,7 +157,7 @@ static int __test__rdpmc(void) return 0; } -int test__rdpmc(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused) { int status = 0; int wret = 0; diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c index bfbd3662b6..c92db87e44 100644 --- a/tools/perf/arch/x86/tests/sample-parsing.c +++ b/tools/perf/arch/x86/tests/sample-parsing.c @@ -115,7 +115,7 @@ static int do_test(u64 sample_type) * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type. * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type. */ -int test__x86_sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused) { return do_test(PERF_SAMPLE_WEIGHT_STRUCT); } diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index f924246eff..0b0951030a 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -17,20 +17,3 @@ int arch_evlist__add_default_attrs(struct evlist *evlist) else return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL); } - -struct evsel *arch_evlist__leader(struct list_head *list) -{ - struct evsel *evsel, *first; - - first = list_first_entry(list, struct evsel, core.node); - - if (!pmu_have_event("cpu", "slots")) - return first; - - __evlist__for_each_entry(list, evsel) { - if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && - evsel->name && strstr(evsel->name, "slots")) - return evsel; - } - return first; -} diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index ac2899a25b..2f733cdc8d 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -1,31 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include #include "util/evsel.h" -#include "util/env.h" -#include "linux/string.h" void arch_evsel__set_sample_weight(struct evsel *evsel) { evsel__set_sample_bit(evsel, WEIGHT_STRUCT); } - -void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr) -{ - struct perf_env env = { .total_mem = 0, } ; - - if (!perf_env__cpuid(&env)) - return; - - /* - * On AMD, precise cycles event sampling internally uses IBS pmu. - * But IBS does not have filtering capabilities and perf by default - * sets exclude_guest = 1. This makes IBS pmu event init fail and - * thus perf ends up doing non-precise sampling. Avoid it by clearing - * exclude_guest. - */ - if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD")) - attr->exclude_guest = 0; - - free(env.cpuid); -} diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 740ae76453..ddaca75c3b 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -253,7 +253,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) { CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) @@ -333,7 +333,7 @@ int bench_epoll_ctl(int argc, const char **argv) /* default to the number of CPUs */ if (!nthreads) - nthreads = perf_cpu_map__nr(cpu); + nthreads = cpu->nr; worker = calloc(nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 37de970c97..79d13dbc0a 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -342,7 +342,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) { CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) @@ -452,7 +452,7 @@ int bench_epoll_wait(int argc, const char **argv) /* default to the number of CPUs and leave one for the writer pthread */ if (!nthreads) - nthreads = perf_cpu_map__nr(cpu) - 1; + nthreads = cpu->nr - 1; worker = calloc(nthreads, sizeof(*worker)); if (!worker) { diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c index de56601f69..83e9897c64 100644 --- a/tools/perf/bench/evlist-open-close.c +++ b/tools/perf/bench/evlist-open-close.c @@ -25,11 +25,6 @@ static int iterations = 100; static int nr_events = 1; static const char *event_string = "dummy"; -static inline u64 timeval2usec(struct timeval *tv) -{ - return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; -} - static struct record_opts opts = { .sample_time = true, .mmap_pages = UINT_MAX, @@ -71,14 +66,14 @@ static int evlist__count_evsel_fds(struct evlist *evlist) int cnt = 0; evlist__for_each_entry(evlist, evsel) - cnt += evsel->core.threads->nr * perf_cpu_map__nr(evsel->core.cpus); + cnt += evsel->core.threads->nr * evsel->core.cpus->nr; return cnt; } static struct evlist *bench__create_evlist(char *evstr) { - struct parse_events_error err; + struct parse_events_error err = { .idx = 0, }; struct evlist *evlist = evlist__new(); int ret; @@ -87,16 +82,14 @@ static struct evlist *bench__create_evlist(char *evstr) return NULL; } - parse_events_error__init(&err); ret = parse_events(evlist, evstr, &err); if (ret) { - parse_events_error__print(&err, evstr); - parse_events_error__exit(&err); + parse_events_print_error(&err, evstr); pr_err("Run 'perf list' for a list of valid events\n"); ret = 1; goto out_delete_evlist; } - parse_events_error__exit(&err); + ret = evlist__create_maps(evlist, &opts.target); if (ret < 0) { pr_err("Not enough memory to create thread/cpu maps\n"); @@ -151,7 +144,7 @@ static int bench_evlist_open_close__run(char *evstr) init_stats(&time_stats); - printf(" Number of cpus:\t%d\n", perf_cpu_map__nr(evlist->core.cpus)); + printf(" Number of cpus:\t%d\n", evlist->core.cpus->nr); printf(" Number of threads:\t%d\n", evlist->core.threads->nr); printf(" Number of events:\t%d (%d fds)\n", evlist->core.nr_entries, evlist__count_evsel_fds(evlist)); @@ -174,7 +167,7 @@ static int bench_evlist_open_close__run(char *evstr) gettimeofday(&end, NULL); timersub(&end, &start, &diff); - runtime_us = timeval2usec(&diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; update_stats(&time_stats, runtime_us); evlist__delete(evlist); diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index dbcecec4ee..fcdea3e449 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -150,7 +150,7 @@ int bench_futex_hash(int argc, const char **argv) } if (!params.nthreads) /* default to the number of CPUs */ - params.nthreads = perf_cpu_map__nr(cpu); + params.nthreads = cpu->nr; worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) @@ -177,7 +177,7 @@ int bench_futex_hash(int argc, const char **argv) goto errmem; CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 6fc9a3d55c..137890f78e 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -136,7 +136,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, worker[i].futex = &global_futex; CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); @@ -173,7 +173,7 @@ int bench_futex_lock_pi(int argc, const char **argv) } if (!params.nthreads) - params.nthreads = perf_cpu_map__nr(cpu); + params.nthreads = cpu->nr; worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 2f59d5d1c5..f7a5ffebb9 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -131,7 +131,7 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); @@ -175,7 +175,7 @@ int bench_futex_requeue(int argc, const char **argv) } if (!params.nthreads) - params.nthreads = perf_cpu_map__nr(cpu); + params.nthreads = cpu->nr; worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 861deb9347..0983f40b4b 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -152,7 +152,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); @@ -252,7 +252,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) err(EXIT_FAILURE, "calloc"); if (!params.nthreads) - params.nthreads = perf_cpu_map__nr(cpu); + params.nthreads = cpu->nr; /* some sanity checks */ if (params.nwakes > params.nthreads || diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index cfda48bef1..2226a475e7 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -105,7 +105,7 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); @@ -151,7 +151,7 @@ int bench_futex_wake(int argc, const char **argv) } if (!params.nthreads) - params.nthreads = perf_cpu_map__nr(cpu); + params.nthreads = cpu->nr; worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index ebdc2b032a..b3853aac30 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -28,7 +28,7 @@ struct bench_futex_parameters { }; /** - * futex_syscall() - SYS_futex syscall wrapper + * futex() - SYS_futex syscall wrapper * @uaddr: address of first futex * @op: futex op code * @val: typically expected value of uaddr, but varies by op @@ -38,26 +38,17 @@ struct bench_futex_parameters { * @val3: varies by op * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG * - * futex_syscall() is used by all the following futex op wrappers. It can also be + * futex() is used by all the following futex op wrappers. It can also be * used for misuse and abuse testing. Generally, the specific op wrappers - * should be used instead. + * should be used instead. It is a macro instead of an static inline function as + * some of the types over overloaded (timeout is used for nr_requeue for + * example). * * These argument descriptions are the defaults for all * like-named arguments in the following wrappers except where noted below. */ -static inline int -futex_syscall(volatile u_int32_t *uaddr, int op, u_int32_t val, struct timespec *timeout, - volatile u_int32_t *uaddr2, int val3, int opflags) -{ - return syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3); -} - -static inline int -futex_syscall_nr_requeue(volatile u_int32_t *uaddr, int op, u_int32_t val, int nr_requeue, - volatile u_int32_t *uaddr2, int val3, int opflags) -{ - return syscall(SYS_futex, uaddr, op | opflags, val, nr_requeue, uaddr2, val3); -} +#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \ + syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3) /** * futex_wait() - block on uaddr with optional timeout @@ -66,7 +57,7 @@ futex_syscall_nr_requeue(volatile u_int32_t *uaddr, int op, u_int32_t val, int n static inline int futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags) { - return futex_syscall(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); + return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); } /** @@ -76,7 +67,7 @@ futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflag static inline int futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) { - return futex_syscall(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); + return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); } /** @@ -85,7 +76,7 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) static inline int futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags) { - return futex_syscall(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags); + return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags); } /** @@ -94,7 +85,7 @@ futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags) static inline int futex_unlock_pi(u_int32_t *uaddr, int opflags) { - return futex_syscall(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); + return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); } /** @@ -106,8 +97,8 @@ static inline int futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake, int nr_requeue, int opflags) { - return futex_syscall_nr_requeue(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, - val, opflags); + return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, + val, opflags); } /** @@ -122,8 +113,8 @@ static inline int futex_wait_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, struct timespec *timeout, int opflags) { - return futex_syscall(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0, - opflags); + return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0, + opflags); } /** @@ -139,8 +130,8 @@ static inline int futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_requeue, int opflags) { - return futex_syscall_nr_requeue(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2, - val, opflags); + return futex(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2, + val, opflags); } #endif /* _FUTEX_H */ diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c index 7401ebbac1..05f7c923c7 100644 --- a/tools/perf/bench/synthesize.c +++ b/tools/perf/bench/synthesize.c @@ -80,7 +80,7 @@ static int do_run_single_threaded(struct perf_session *session, NULL, target, threads, process_synthesized_event, - true, data_mmap, + data_mmap, nr_threads_synthesize); if (err) return err; @@ -171,7 +171,7 @@ static int do_run_multi_threaded(struct target *target, NULL, target, NULL, process_synthesized_event, - true, false, + false, nr_threads_synthesize); if (err) { perf_session__delete(session); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 490bb9b8cf..05eb098cb0 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -591,10 +591,6 @@ int cmd_annotate(int argc, const char **argv) return ret; } - ret = symbol__validate_sym_arguments(); - if (ret) - return ret; - if (quiet) perf_quiet_option(); diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index d291f3a8af..d0895162c2 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -226,6 +226,7 @@ static void run_collection(struct collection *coll) if (!bench->fn) break; printf("# Running %s/%s benchmark...\n", coll->name, bench->name); + fflush(stdout); argv[1] = bench->name; run_bench(coll->name, bench->name, bench->fn, 1, argv); @@ -246,9 +247,6 @@ int cmd_bench(int argc, const char **argv) struct collection *coll; int ret = 0; - /* Unbuffered output */ - setvbuf(stdout, NULL, _IONBF, 0); - if (argc < 2) { /* No collection specified. */ print_usage(); @@ -302,6 +300,7 @@ int cmd_bench(int argc, const char **argv) if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name); + fflush(stdout); ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1); goto end; } diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index cd38169365..0db3cfc04c 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -351,14 +351,10 @@ static int build_id_cache__show_all(void) static int perf_buildid_cache_config(const char *var, const char *value, void *cb) { - struct perf_debuginfod *di = cb; + const char **debuginfod = cb; - if (!strcmp(var, "buildid-cache.debuginfod")) { - di->urls = strdup(value); - if (!di->urls) - return -ENOMEM; - di->set = true; - } + if (!strcmp(var, "buildid-cache.debuginfod")) + *debuginfod = strdup(value); return 0; } @@ -377,8 +373,8 @@ int cmd_buildid_cache(int argc, const char **argv) *purge_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, - *kcore_filename = NULL; - struct perf_debuginfod debuginfod = { }; + *kcore_filename = NULL, + *debuginfod = NULL; char sbuf[STRERR_BUFSIZE]; struct perf_data data = { @@ -403,10 +399,8 @@ int cmd_buildid_cache(int argc, const char **argv) OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_STRING('u', "update", &update_name_list_str, "file list", "file(s) to update"), - OPT_STRING_OPTARG_SET(0, "debuginfod", &debuginfod.urls, - &debuginfod.set, "debuginfod urls", - "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", - "system"), + OPT_STRING(0, "debuginfod", &debuginfod, "debuginfod url", + "set debuginfod url"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"), OPT_END() @@ -431,7 +425,10 @@ int cmd_buildid_cache(int argc, const char **argv) if (argc || !(list_files || opts_flag)) usage_with_options(buildid_cache_usage, buildid_cache_options); - perf_debuginfod_setup(&debuginfod); + if (debuginfod) { + pr_debug("DEBUGINFOD_URLS=%s\n", debuginfod); + setenv("DEBUGINFOD_URLS", debuginfod, 1); + } /* -l is exclusive. It can not be used with other options. */ if (list_files && opts_flag) { diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 77dd4afacc..a192014fa5 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2015,8 +2015,7 @@ static int setup_nodes(struct perf_session *session) { struct numa_node *n; unsigned long **nodes; - int node, idx; - struct perf_cpu cpu; + int node, cpu; int *cpu2node; if (c2c.node_info > 2) @@ -2039,8 +2038,8 @@ static int setup_nodes(struct perf_session *session) if (!cpu2node) return -ENOMEM; - for (idx = 0; idx < c2c.cpus_cnt; idx++) - cpu2node[idx] = -1; + for (cpu = 0; cpu < c2c.cpus_cnt; cpu++) + cpu2node[cpu] = -1; c2c.cpu2node = cpu2node; @@ -2058,13 +2057,13 @@ static int setup_nodes(struct perf_session *session) if (perf_cpu_map__empty(map)) continue; - perf_cpu_map__for_each_cpu(cpu, idx, map) { - set_bit(cpu.cpu, set); + for (cpu = 0; cpu < map->nr; cpu++) { + set_bit(map->map[cpu], set); - if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug")) + if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug")) return -EINVAL; - cpu2node[cpu.cpu] = node; + cpu2node[map->map[cpu]] = node; } } @@ -2769,10 +2768,6 @@ static int perf_c2c__report(int argc, const char **argv) if (c2c.stats_only) c2c.use_stdio = true; - err = symbol__validate_sym_arguments(); - if (err) - goto out; - if (!input_name || !strlen(input_name)) input_name = "perf.data"; diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 6cb3f6cc36..61929f63a0 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -1121,6 +1121,8 @@ static int setup_config(struct daemon *daemon) #ifndef F_TLOCK #define F_TLOCK 2 +#include + static int lockf(int fd, int cmd, off_t len) { if (cmd != F_TLOCK || len != 0) @@ -1401,10 +1403,8 @@ static int send_cmd(struct daemon *daemon, union cmd *cmd) static int send_cmd_list(struct daemon *daemon) { - union cmd cmd; + union cmd cmd = { .cmd = CMD_LIST, }; - memset(&cmd, 0, sizeof(cmd)); - cmd.list.cmd = CMD_LIST; cmd.list.verbose = verbose; cmd.list.csv_sep = daemon->csv_sep ? *daemon->csv_sep : 0; @@ -1432,7 +1432,6 @@ static int __cmd_signal(struct daemon *daemon, struct option parent_options[], return -1; } - memset(&cmd, 0, sizeof(cmd)); cmd.signal.cmd = CMD_SIGNAL, cmd.signal.sig = SIGUSR2; strncpy(cmd.signal.name, name, sizeof(cmd.signal.name) - 1); @@ -1447,7 +1446,7 @@ static int __cmd_stop(struct daemon *daemon, struct option parent_options[], OPT_PARENT(parent_options), OPT_END() }; - union cmd cmd; + union cmd cmd = { .cmd = CMD_STOP, }; argc = parse_options(argc, argv, start_options, daemon_usage, 0); if (argc) @@ -1458,8 +1457,6 @@ static int __cmd_stop(struct daemon *daemon, struct option parent_options[], return -1; } - memset(&cmd, 0, sizeof(cmd)); - cmd.cmd = CMD_STOP; return send_cmd(daemon, &cmd); } @@ -1473,7 +1470,7 @@ static int __cmd_ping(struct daemon *daemon, struct option parent_options[], OPT_PARENT(parent_options), OPT_END() }; - union cmd cmd; + union cmd cmd = { .cmd = CMD_PING, }; argc = parse_options(argc, argv, ping_options, daemon_usage, 0); if (argc) @@ -1484,8 +1481,6 @@ static int __cmd_ping(struct daemon *daemon, struct option parent_options[], return -1; } - memset(&cmd, 0, sizeof(cmd)); - cmd.cmd = CMD_PING; scnprintf(cmd.ping.name, sizeof(cmd.ping.name), "%s", name); return send_cmd(daemon, &cmd); } diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index a8785dec5c..87cb11a7a3 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -13,9 +13,7 @@ #include #include #include -#include #include -#include #include #include @@ -30,12 +28,36 @@ #include "strfilter.h" #include "util/cap.h" #include "util/config.h" -#include "util/ftrace.h" #include "util/units.h" #include "util/parse-sublevel-options.h" #define DEFAULT_TRACER "function_graph" +struct perf_ftrace { + struct evlist *evlist; + struct target target; + const char *tracer; + struct list_head filters; + struct list_head notrace; + struct list_head graph_funcs; + struct list_head nograph_funcs; + int graph_depth; + unsigned long percpu_buffer_size; + bool inherit; + int func_stack_trace; + int func_irq_info; + int graph_nosleep_time; + int graph_noirqs; + int graph_verbose; + int graph_thresh; + unsigned int initial_delay; +}; + +struct filter_entry { + struct list_head list; + char name[]; +}; + static volatile int workload_exec_errno; static bool done; @@ -281,7 +303,7 @@ static int set_tracing_cpumask(struct perf_cpu_map *cpumap) int ret; int last_cpu; - last_cpu = perf_cpu_map__cpu(cpumap, perf_cpu_map__nr(cpumap) - 1).cpu; + last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1); mask_size = last_cpu / 4 + 2; /* one more byte for EOS */ mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */ @@ -543,24 +565,7 @@ static int set_tracing_options(struct perf_ftrace *ftrace) return 0; } -static void select_tracer(struct perf_ftrace *ftrace) -{ - bool graph = !list_empty(&ftrace->graph_funcs) || - !list_empty(&ftrace->nograph_funcs); - bool func = !list_empty(&ftrace->filters) || - !list_empty(&ftrace->notrace); - - /* The function_graph has priority over function tracer. */ - if (graph) - ftrace->tracer = "function_graph"; - else if (func) - ftrace->tracer = "function"; - /* Otherwise, the default tracer is used. */ - - pr_debug("%s tracer is used\n", ftrace->tracer); -} - -static int __cmd_ftrace(struct perf_ftrace *ftrace) +static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; int trace_fd; @@ -581,7 +586,10 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace) return -1; } - select_tracer(ftrace); + signal(SIGINT, sig_handler); + signal(SIGUSR1, sig_handler); + signal(SIGCHLD, sig_handler); + signal(SIGPIPE, sig_handler); if (reset_tracing_files(ftrace) < 0) { pr_err("failed to reset ftrace\n"); @@ -592,6 +600,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace) if (write_tracing_file("trace", "0") < 0) goto out; + if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false, + ftrace__workload_exec_failed_signal) < 0) { + goto out; + } + if (set_tracing_options(ftrace) < 0) goto out_reset; @@ -680,270 +693,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace) return (done && !workload_exec_errno) ? 0 : -1; } -static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf) -{ - char *p, *q; - char *unit; - double num; - int i; - - /* ensure NUL termination */ - buf[len] = '\0'; - - /* handle data line by line */ - for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) { - *q = '\0'; - /* move it to the line buffer */ - strcat(linebuf, p); - - /* - * parse trace output to get function duration like in - * - * # tracer: function_graph - * # - * # CPU DURATION FUNCTION CALLS - * # | | | | | | | - * 1) + 10.291 us | do_filp_open(); - * 1) 4.889 us | do_filp_open(); - * 1) 6.086 us | do_filp_open(); - * - */ - if (linebuf[0] == '#') - goto next; - - /* ignore CPU */ - p = strchr(linebuf, ')'); - if (p == NULL) - p = linebuf; - - while (*p && !isdigit(*p) && (*p != '|')) - p++; - - /* no duration */ - if (*p == '\0' || *p == '|') - goto next; - - num = strtod(p, &unit); - if (!unit || strncmp(unit, " us", 3)) - goto next; - - i = log2(num); - if (i < 0) - i = 0; - if (i >= NUM_BUCKET) - i = NUM_BUCKET - 1; - - buckets[i]++; - -next: - /* empty the line buffer for the next output */ - linebuf[0] = '\0'; - } - - /* preserve any remaining output (before newline) */ - strcat(linebuf, p); -} - -static void display_histogram(int buckets[]) -{ - int i; - int total = 0; - int bar_total = 46; /* to fit in 80 column */ - char bar[] = "###############################################"; - int bar_len; - - for (i = 0; i < NUM_BUCKET; i++) - total += buckets[i]; - - if (total == 0) { - printf("No data found\n"); - return; - } - - printf("# %14s | %10s | %-*s |\n", - " DURATION ", "COUNT", bar_total, "GRAPH"); - - bar_len = buckets[0] * bar_total / total; - printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", - 0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); - - for (i = 1; i < NUM_BUCKET - 1; i++) { - int start = (1 << (i - 1)); - int stop = 1 << i; - const char *unit = "us"; - - if (start >= 1024) { - start >>= 10; - stop >>= 10; - unit = "ms"; - } - bar_len = buckets[i] * bar_total / total; - printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", - start, stop, unit, buckets[i], bar_len, bar, - bar_total - bar_len, ""); - } - - bar_len = buckets[NUM_BUCKET - 1] * bar_total / total; - printf(" %4d - %-4s %s | %10d | %.*s%*s |\n", - 1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar, - bar_total - bar_len, ""); - -} - -static int prepare_func_latency(struct perf_ftrace *ftrace) -{ - char *trace_file; - int fd; - - if (ftrace->target.use_bpf) - return perf_ftrace__latency_prepare_bpf(ftrace); - - if (reset_tracing_files(ftrace) < 0) { - pr_err("failed to reset ftrace\n"); - return -1; - } - - /* reset ftrace buffer */ - if (write_tracing_file("trace", "0") < 0) - return -1; - - if (set_tracing_options(ftrace) < 0) - return -1; - - /* force to use the function_graph tracer to track duration */ - if (write_tracing_file("current_tracer", "function_graph") < 0) { - pr_err("failed to set current_tracer to function_graph\n"); - return -1; - } - - trace_file = get_tracing_file("trace_pipe"); - if (!trace_file) { - pr_err("failed to open trace_pipe\n"); - return -1; - } - - fd = open(trace_file, O_RDONLY); - if (fd < 0) - pr_err("failed to open trace_pipe\n"); - - put_tracing_file(trace_file); - return fd; -} - -static int start_func_latency(struct perf_ftrace *ftrace) -{ - if (ftrace->target.use_bpf) - return perf_ftrace__latency_start_bpf(ftrace); - - if (write_tracing_file("tracing_on", "1") < 0) { - pr_err("can't enable tracing\n"); - return -1; - } - - return 0; -} - -static int stop_func_latency(struct perf_ftrace *ftrace) -{ - if (ftrace->target.use_bpf) - return perf_ftrace__latency_stop_bpf(ftrace); - - write_tracing_file("tracing_on", "0"); - return 0; -} - -static int read_func_latency(struct perf_ftrace *ftrace, int buckets[]) -{ - if (ftrace->target.use_bpf) - return perf_ftrace__latency_read_bpf(ftrace, buckets); - - return 0; -} - -static int cleanup_func_latency(struct perf_ftrace *ftrace) -{ - if (ftrace->target.use_bpf) - return perf_ftrace__latency_cleanup_bpf(ftrace); - - reset_tracing_files(ftrace); - return 0; -} - -static int __cmd_latency(struct perf_ftrace *ftrace) -{ - int trace_fd; - char buf[4096]; - char line[256]; - struct pollfd pollfd = { - .events = POLLIN, - }; - int buckets[NUM_BUCKET] = { }; - - if (!(perf_cap__capable(CAP_PERFMON) || - perf_cap__capable(CAP_SYS_ADMIN))) { - pr_err("ftrace only works for %s!\n", -#ifdef HAVE_LIBCAP_SUPPORT - "users with the CAP_PERFMON or CAP_SYS_ADMIN capability" -#else - "root" -#endif - ); - return -1; - } - - trace_fd = prepare_func_latency(ftrace); - if (trace_fd < 0) - goto out; - - fcntl(trace_fd, F_SETFL, O_NONBLOCK); - pollfd.fd = trace_fd; - - if (start_func_latency(ftrace) < 0) - goto out; - - evlist__start_workload(ftrace->evlist); - - line[0] = '\0'; - while (!done) { - if (poll(&pollfd, 1, -1) < 0) - break; - - if (pollfd.revents & POLLIN) { - int n = read(trace_fd, buf, sizeof(buf) - 1); - if (n < 0) - break; - - make_histogram(buckets, buf, n, line); - } - } - - stop_func_latency(ftrace); - - if (workload_exec_errno) { - const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf)); - pr_err("workload failed: %s\n", emsg); - goto out; - } - - /* read remaining buffer contents */ - while (!ftrace->target.use_bpf) { - int n = read(trace_fd, buf, sizeof(buf) - 1); - if (n <= 0) - break; - make_histogram(buckets, buf, n, line); - } - - read_func_latency(ftrace, buckets); - - display_histogram(buckets); - -out: - close(trace_fd); - cleanup_func_latency(ftrace); - - return (done && !workload_exec_errno) ? 0 : -1; -} - static int perf_ftrace_config(const char *var, const char *value, void *cb) { struct perf_ftrace *ftrace = cb; @@ -1106,21 +855,41 @@ static int parse_graph_tracer_opts(const struct option *opt, return 0; } -enum perf_ftrace_subcommand { - PERF_FTRACE_NONE, - PERF_FTRACE_TRACE, - PERF_FTRACE_LATENCY, -}; +static void select_tracer(struct perf_ftrace *ftrace) +{ + bool graph = !list_empty(&ftrace->graph_funcs) || + !list_empty(&ftrace->nograph_funcs); + bool func = !list_empty(&ftrace->filters) || + !list_empty(&ftrace->notrace); + + /* The function_graph has priority over function tracer. */ + if (graph) + ftrace->tracer = "function_graph"; + else if (func) + ftrace->tracer = "function"; + /* Otherwise, the default tracer is used. */ + + pr_debug("%s tracer is used\n", ftrace->tracer); +} int cmd_ftrace(int argc, const char **argv) { int ret; - int (*cmd_func)(struct perf_ftrace *) = NULL; struct perf_ftrace ftrace = { .tracer = DEFAULT_TRACER, .target = { .uid = UINT_MAX, }, }; - const struct option common_options[] = { + const char * const ftrace_usage[] = { + "perf ftrace [] []", + "perf ftrace [] -- []", + NULL + }; + const struct option ftrace_options[] = { + OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", + "Tracer to use: function_graph(default) or function"), + OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]", + "Show available functions to filter", + opt_list_avail_functions, "*"), OPT_STRING('p', "pid", &ftrace.target.pid, "pid", "Trace on existing process id"), /* TODO: Add short option -t after -t/--tracer can be removed. */ @@ -1132,14 +901,6 @@ int cmd_ftrace(int argc, const char **argv) "System-wide collection from all CPUs"), OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", "List of cpus to monitor"), - OPT_END() - }; - const struct option ftrace_options[] = { - OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", - "Tracer to use: function_graph(default) or function"), - OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]", - "Show available functions to filter", - opt_list_avail_functions, "*"), OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", "Trace given functions using function tracer", parse_filter_func), @@ -1162,87 +923,24 @@ int cmd_ftrace(int argc, const char **argv) "Trace children processes"), OPT_UINTEGER('D', "delay", &ftrace.initial_delay, "Number of milliseconds to wait before starting tracing after program start"), - OPT_PARENT(common_options), + OPT_END() }; - const struct option latency_options[] = { - OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", - "Show latency of given function", parse_filter_func), -#ifdef HAVE_BPF_SKEL - OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf, - "Use BPF to measure function latency"), -#endif - OPT_PARENT(common_options), - }; - const struct option *options = ftrace_options; - - const char * const ftrace_usage[] = { - "perf ftrace [] []", - "perf ftrace [] -- [] []", - "perf ftrace {trace|latency} [] []", - "perf ftrace {trace|latency} [] -- [] []", - NULL - }; - enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE; INIT_LIST_HEAD(&ftrace.filters); INIT_LIST_HEAD(&ftrace.notrace); INIT_LIST_HEAD(&ftrace.graph_funcs); INIT_LIST_HEAD(&ftrace.nograph_funcs); - signal(SIGINT, sig_handler); - signal(SIGUSR1, sig_handler); - signal(SIGCHLD, sig_handler); - signal(SIGPIPE, sig_handler); - ret = perf_config(perf_ftrace_config, &ftrace); if (ret < 0) return -1; - if (argc > 1) { - if (!strcmp(argv[1], "trace")) { - subcmd = PERF_FTRACE_TRACE; - } else if (!strcmp(argv[1], "latency")) { - subcmd = PERF_FTRACE_LATENCY; - options = latency_options; - } - - if (subcmd != PERF_FTRACE_NONE) { - argc--; - argv++; - } - } - /* for backward compatibility */ - if (subcmd == PERF_FTRACE_NONE) - subcmd = PERF_FTRACE_TRACE; - - argc = parse_options(argc, argv, options, ftrace_usage, + argc = parse_options(argc, argv, ftrace_options, ftrace_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (argc < 0) { - ret = -EINVAL; - goto out_delete_filters; - } + if (!argc && target__none(&ftrace.target)) + ftrace.target.system_wide = true; - switch (subcmd) { - case PERF_FTRACE_TRACE: - if (!argc && target__none(&ftrace.target)) - ftrace.target.system_wide = true; - cmd_func = __cmd_ftrace; - break; - case PERF_FTRACE_LATENCY: - if (list_empty(&ftrace.filters)) { - pr_err("Should provide a function to measure\n"); - parse_options_usage(ftrace_usage, options, "T", 1); - ret = -EINVAL; - goto out_delete_filters; - } - cmd_func = __cmd_latency; - break; - case PERF_FTRACE_NONE: - default: - pr_err("Invalid subcommand\n"); - ret = -EINVAL; - goto out_delete_filters; - } + select_tracer(&ftrace); ret = target__validate(&ftrace.target); if (ret) { @@ -1263,15 +961,7 @@ int cmd_ftrace(int argc, const char **argv) if (ret < 0) goto out_delete_evlist; - if (argc) { - ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target, - argv, false, - ftrace__workload_exec_failed_signal); - if (ret < 0) - goto out_delete_evlist; - } - - ret = cmd_func(&ftrace); + ret = __cmd_ftrace(&ftrace, argc, argv); out_delete_evlist: evlist__delete(ftrace.evlist); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index fbf43a454c..50c2e6892b 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -535,9 +535,12 @@ static int perf_event__repipe_exit(struct perf_tool *tool, static int perf_event__repipe_tracing_data(struct perf_session *session, union perf_event *event) { - perf_event__repipe_synth(session->tool, event); + int err; - return perf_event__process_tracing_data(session, event); + perf_event__repipe_synth(session->tool, event); + err = perf_event__process_tracing_data(session, event); + + return err; } static int dso__read_build_id(struct dso *dso) @@ -816,8 +819,7 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.auxtrace_info = perf_event__process_auxtrace_info; inject->tool.auxtrace = perf_event__process_auxtrace; inject->tool.aux = perf_event__drop_aux; - inject->tool.itrace_start = perf_event__drop_aux; - inject->tool.aux_output_hw_id = perf_event__drop_aux; + inject->tool.itrace_start = perf_event__drop_aux, inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ @@ -884,7 +886,6 @@ int cmd_inject(int argc, const char **argv) .lost_samples = perf_event__repipe, .aux = perf_event__repipe, .itrace_start = perf_event__repipe, - .aux_output_hw_id = perf_event__repipe, .context_switch = perf_event__repipe, .throttle = perf_event__repipe, .unthrottle = perf_event__repipe, @@ -941,10 +942,6 @@ int cmd_inject(int argc, const char **argv) #endif OPT_INCR('v', "verbose", &verbose, "be more verbose (show build ids, etc)"), - OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, - "file", "vmlinux pathname"), - OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, - "don't load vmlinux even if found"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), @@ -979,9 +976,6 @@ int cmd_inject(int argc, const char **argv) return -1; } - if (symbol__validate_sym_arguments()) - return -1; - if (inject.in_place_update) { if (!strcmp(inject.input_name, "-")) { pr_err("Input file name required for in-place updating\n"); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 99d7ff9a8e..da03a341c6 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -192,7 +192,7 @@ static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_samp int ret = evsel__process_alloc_event(evsel, sample); if (!ret) { - int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}), + int node1 = cpu__get_node(sample->cpu), node2 = evsel__intval(evsel, sample, "node"); if (node1 != node2) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index c6f352ee57..aa1b127ffb 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1456,7 +1456,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, perf_session__set_id_hdr_size(kvm->session); ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true); machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target, - kvm->evlist->core.threads, true, false, 1); + kvm->evlist->core.threads, false, 1); err = kvm_live_open_events(kvm); if (err) goto out; diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 468958154e..10ab5e40a3 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -12,7 +12,6 @@ #include "util/parse-events.h" #include "util/pmu.h" -#include "util/pmu-hybrid.h" #include "util/debug.h" #include "util/metricgroup.h" #include @@ -21,15 +20,13 @@ static bool desc_flag = true; static bool details_flag; -static const char *hybrid_type; int cmd_list(int argc, const char **argv) { - int i, ret = 0; + int i; bool raw_dump = false; bool long_desc_flag = false; bool deprecated = false; - char *pmu_name = NULL; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), OPT_BOOLEAN('d', "desc", &desc_flag, @@ -40,9 +37,6 @@ int cmd_list(int argc, const char **argv) "Print information on the perf event names and expressions used internally by events."), OPT_BOOLEAN(0, "deprecated", &deprecated, "Print deprecated events."), - OPT_STRING(0, "cputype", &hybrid_type, "hybrid cpu type", - "Print events applying cpu with this type for hybrid platform " - "(e.g. core or atom)"), OPT_INCR(0, "debug", &verbose, "Enable debugging output"), OPT_END() @@ -62,16 +56,10 @@ int cmd_list(int argc, const char **argv) if (!raw_dump && pager_in_use()) printf("\nList of pre-defined events (to be used in -e):\n\n"); - if (hybrid_type) { - pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_type); - if (!pmu_name) - pr_warning("WARNING: hybrid cputype is not supported!\n"); - } - if (argc == 0) { print_events(NULL, raw_dump, !desc_flag, long_desc_flag, - details_flag, deprecated, pmu_name); - goto out; + details_flag, deprecated); + return 0; } for (i = 0; i < argc; ++i) { @@ -94,27 +82,25 @@ int cmd_list(int argc, const char **argv) else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, raw_dump, !desc_flag, long_desc_flag, details_flag, - deprecated, pmu_name); + deprecated); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) - metricgroup__print(true, false, NULL, raw_dump, details_flag, pmu_name); + metricgroup__print(true, false, NULL, raw_dump, details_flag); else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0) - metricgroup__print(false, true, NULL, raw_dump, details_flag, pmu_name); + metricgroup__print(false, true, NULL, raw_dump, details_flag); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; sep_idx = sep - argv[i]; s = strdup(argv[i]); - if (s == NULL) { - ret = -1; - goto out; - } + if (s == NULL) + return -1; s[sep_idx] = '\0'; print_tracepoint_events(s, s + sep_idx + 1, raw_dump); print_sdt_events(s, s + sep_idx + 1, raw_dump); - metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name); + metricgroup__print(true, true, s, raw_dump, details_flag); free(s); } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { @@ -130,16 +116,12 @@ int cmd_list(int argc, const char **argv) print_pmu_events(s, raw_dump, !desc_flag, long_desc_flag, details_flag, - deprecated, - pmu_name); + deprecated); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); - metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name); + metricgroup__print(true, true, s, raw_dump, details_flag); free(s); } } - -out: - free(pmu_name); - return ret; + return 0; } diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index c31627af75..e1dd51f287 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -21,7 +21,6 @@ #include "util/build-id.h" #include "util/strlist.h" #include "util/strfilter.h" -#include "util/symbol.h" #include "util/symbol_conf.h" #include "util/debug.h" #include @@ -630,10 +629,6 @@ __cmd_probe(int argc, const char **argv) params.command = 'a'; } - ret = symbol__validate_sym_arguments(); - if (ret) - return ret; - if (params.quiet) { if (verbose != 0) { pr_err(" Error: -v and -q are exclusive.\n"); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index bb716c953d..b3509d9d20 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -111,7 +111,6 @@ struct record { unsigned long long samples; struct mmap_cpu_mask affinity_mask; unsigned long output_max_size; /* = 0: unlimited */ - struct perf_debuginfod debuginfod; }; static volatile int done; @@ -1256,7 +1255,6 @@ static int record__synthesize_workload(struct record *rec, bool tail) { int err; struct perf_thread_map *thread_map; - bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; if (rec->opts.tail_synthesize != tail) return 0; @@ -1268,7 +1266,6 @@ static int record__synthesize_workload(struct record *rec, bool tail) err = perf_event__synthesize_thread_map(&rec->tool, thread_map, process_synthesized_event, &rec->session->machines.host, - needs_mmap, rec->opts.sample_address); perf_thread_map__put(thread_map); return err; @@ -1412,7 +1409,7 @@ static int record__synthesize(struct record *rec, bool tail) goto out; /* Synthesize id_index before auxtrace_info */ - if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) { + if (rec->opts.auxtrace_sample_mode) { err = perf_event__synthesize_id_index(tool, process_synthesized_event, session->evlist, machine); @@ -1473,26 +1470,19 @@ static int record__synthesize(struct record *rec, bool tail) if (err < 0) pr_warning("Couldn't synthesize bpf events.\n"); - if (rec->opts.synth & PERF_SYNTH_CGROUP) { - err = perf_event__synthesize_cgroups(tool, process_synthesized_event, - machine); - if (err < 0) - pr_warning("Couldn't synthesize cgroup events.\n"); - } + err = perf_event__synthesize_cgroups(tool, process_synthesized_event, + machine); + if (err < 0) + pr_warning("Couldn't synthesize cgroup events.\n"); if (rec->opts.nr_threads_synthesize > 1) { perf_set_multithreaded(); f = process_locked_synthesized_event; } - if (rec->opts.synth & PERF_SYNTH_TASK) { - bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; - - err = __machine__synthesize_threads(machine, tool, &opts->target, - rec->evlist->core.threads, - f, needs_mmap, opts->sample_address, - rec->opts.nr_threads_synthesize); - } + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, + f, opts->sample_address, + rec->opts.nr_threads_synthesize); if (rec->opts.nr_threads_synthesize > 1) perf_set_singlethreaded(); @@ -2178,12 +2168,6 @@ static int perf_record_config(const char *var, const char *value, void *cb) rec->opts.nr_cblocks = nr_cblocks_default; } #endif - if (!strcmp(var, "record.debuginfod")) { - rec->debuginfod.urls = strdup(value); - if (!rec->debuginfod.urls) - return -ENOMEM; - rec->debuginfod.set = true; - } return 0; } @@ -2274,10 +2258,6 @@ static int record__parse_mmap_pages(const struct option *opt, return ret; } -void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) -{ -} - static int parse_control_option(const struct option *opt, const char *str, int unset __maybe_unused) @@ -2411,26 +2391,6 @@ static int process_timestamp_boundary(struct perf_tool *tool, return 0; } -static int parse_record_synth_option(const struct option *opt, - const char *str, - int unset __maybe_unused) -{ - struct record_opts *opts = opt->value; - char *p = strdup(str); - - if (p == NULL) - return -1; - - opts->synth = parse_synth_opt(p); - free(p); - - if (opts->synth < 0) { - pr_err("Invalid synth option: %s\n", str); - return -1; - } - return 0; -} - /* * XXX Ideally would be local to cmd_record() and passed to a record__new * because we need to have access to it in record__exit, that is called @@ -2456,7 +2416,6 @@ static struct record record = { .nr_threads_synthesize = 1, .ctl_fd = -1, .ctl_fd_ack = -1, - .synth = PERF_SYNTH_ALL, }, .tool = { .sample = process_sample_event, @@ -2672,12 +2631,6 @@ static struct option __record_options[] = { "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", parse_control_option), - OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", - "Fine-tune event synthesis: default=all", parse_record_synth_option), - OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, - &record.debuginfod.set, "debuginfod urls", - "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", - "system"), OPT_END() }; @@ -2727,12 +2680,6 @@ int cmd_record(int argc, const char **argv) if (quiet) perf_quiet_option(); - err = symbol__validate_sym_arguments(); - if (err) - return err; - - perf_debuginfod_setup(&record.debuginfod); - /* Make system wide (-a) the default target. */ if (!argc && target__none(&rec->opts.target)) rec->opts.target.system_wide = true; @@ -2809,7 +2756,7 @@ int cmd_record(int argc, const char **argv) symbol__init(NULL); if (rec->opts.affinity != PERF_AFFINITY_SYS) { - rec->affinity_mask.nbits = cpu__max_cpu().cpu; + rec->affinity_mask.nbits = cpu__max_cpu(); rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits); if (!rec->affinity_mask.bits) { pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits); @@ -2915,10 +2862,6 @@ int cmd_record(int argc, const char **argv) } rec->opts.target.hybrid = perf_pmu__has_hybrid(); - - if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) - arch__add_leaf_frame_record_opts(&rec->opts); - err = -ENOMEM; if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1dd92d8c92..997e0a4b09 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep) } } - callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env)); + callchain_param_setup(sample_type); if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" @@ -1127,7 +1127,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused, * on events sample_type. */ sample_type = evlist__combined_sample_type(*pevlist); - callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env)); + callchain_param_setup(sample_type); return 0; } @@ -1381,9 +1381,18 @@ int cmd_report(int argc, const char **argv) if (quiet) perf_quiet_option(); - ret = symbol__validate_sym_arguments(); - if (ret) + if (symbol_conf.vmlinux_name && + access(symbol_conf.vmlinux_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); + ret = -EINVAL; goto exit; + } + if (symbol_conf.kallsyms_name && + access(symbol_conf.kallsyms_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); + ret = -EINVAL; + goto exit; + } if (report.inverted_callchain) callchain_param.order = ORDER_CALLER; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 72d446de9c..635a6b5a9e 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -167,7 +167,7 @@ struct trace_sched_handler { struct perf_sched_map { DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); - struct perf_cpu *comp_cpus; + int *comp_cpus; bool comp; struct perf_thread_map *color_pids; const char *color_pids_str; @@ -191,7 +191,7 @@ struct perf_sched { * Track the current task - that way we can know whether there's any * weird events, such as a task being switched away that is not current. */ - struct perf_cpu max_cpu; + int max_cpu; u32 curr_pid[MAX_CPUS]; struct thread *curr_thread[MAX_CPUS]; char next_shortname1; @@ -1535,31 +1535,28 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, int new_shortname; u64 timestamp0, timestamp = sample->time; s64 delta; - int i; - struct perf_cpu this_cpu = { - .cpu = sample->cpu, - }; + int i, this_cpu = sample->cpu; int cpus_nr; bool new_cpu = false; const char *color = PERF_COLOR_NORMAL; char stimestamp[32]; - BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0); + BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); - if (this_cpu.cpu > sched->max_cpu.cpu) + if (this_cpu > sched->max_cpu) sched->max_cpu = this_cpu; if (sched->map.comp) { cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); - if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { + if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) { sched->map.comp_cpus[cpus_nr++] = this_cpu; new_cpu = true; } } else - cpus_nr = sched->max_cpu.cpu; + cpus_nr = sched->max_cpu; - timestamp0 = sched->cpu_last_switched[this_cpu.cpu]; - sched->cpu_last_switched[this_cpu.cpu] = timestamp; + timestamp0 = sched->cpu_last_switched[this_cpu]; + sched->cpu_last_switched[this_cpu] = timestamp; if (timestamp0) delta = timestamp - timestamp0; else @@ -1580,7 +1577,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, return -1; } - sched->curr_thread[this_cpu.cpu] = thread__get(sched_in); + sched->curr_thread[this_cpu] = thread__get(sched_in); printf(" "); @@ -1611,10 +1608,8 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, } for (i = 0; i < cpus_nr; i++) { - struct perf_cpu cpu = { - .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i, - }; - struct thread *curr_thread = sched->curr_thread[cpu.cpu]; + int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; + struct thread *curr_thread = sched->curr_thread[cpu]; struct thread_runtime *curr_tr; const char *pid_color = color; const char *cpu_color = color; @@ -1622,19 +1617,19 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, if (curr_thread && thread__has_color(curr_thread)) pid_color = COLOR_PIDS; - if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, cpu)) + if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu)) continue; - if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu)) + if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu)) cpu_color = COLOR_CPUS; - if (cpu.cpu != this_cpu.cpu) + if (cpu != this_cpu) color_fprintf(stdout, color, " "); else color_fprintf(stdout, cpu_color, "*"); - if (sched->curr_thread[cpu.cpu]) { - curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]); + if (sched->curr_thread[cpu]) { + curr_tr = thread__get_runtime(sched->curr_thread[cpu]); if (curr_tr == NULL) { thread__put(sched_in); return -1; @@ -1644,7 +1639,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, color_fprintf(stdout, color, " "); } - if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu)) + if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) goto out; timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); @@ -1934,7 +1929,7 @@ static char *timehist_get_commstr(struct thread *thread) static void timehist_header(struct perf_sched *sched) { - u32 ncpus = sched->max_cpu.cpu + 1; + u32 ncpus = sched->max_cpu + 1; u32 i, j; printf("%15s %6s ", "time", "cpu"); @@ -2013,7 +2008,7 @@ static void timehist_print_sample(struct perf_sched *sched, struct thread_runtime *tr = thread__priv(thread); const char *next_comm = evsel__strval(evsel, sample, "next_comm"); const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); - u32 max_cpus = sched->max_cpu.cpu + 1; + u32 max_cpus = sched->max_cpu + 1; char tstr[64]; char nstr[30]; u64 wait_time; @@ -2394,7 +2389,7 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); printf("%15s [%04d] ", tstr, sample->cpu); if (sched->show_cpu_visual) - printf(" %*s ", sched->max_cpu.cpu + 1, ""); + printf(" %*s ", sched->max_cpu + 1, ""); printf(" %-*s ", comm_width, timehist_get_commstr(thread)); @@ -2454,13 +2449,13 @@ static void timehist_print_migration_event(struct perf_sched *sched, { struct thread *thread; char tstr[64]; - u32 max_cpus; + u32 max_cpus = sched->max_cpu + 1; u32 ocpu, dcpu; if (sched->summary_only) return; - max_cpus = sched->max_cpu.cpu + 1; + max_cpus = sched->max_cpu + 1; ocpu = evsel__intval(evsel, sample, "orig_cpu"); dcpu = evsel__intval(evsel, sample, "dest_cpu"); @@ -2923,7 +2918,7 @@ static void timehist_print_summary(struct perf_sched *sched, printf(" Total scheduling time (msec): "); print_sched_time(hist_time, 2); - printf(" (x %d)\n", sched->max_cpu.cpu); + printf(" (x %d)\n", sched->max_cpu); } typedef int (*sched_handler)(struct perf_tool *tool, @@ -2940,11 +2935,9 @@ static int perf_timehist__process_sample(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); int err = 0; - struct perf_cpu this_cpu = { - .cpu = sample->cpu, - }; + int this_cpu = sample->cpu; - if (this_cpu.cpu > sched->max_cpu.cpu) + if (this_cpu > sched->max_cpu) sched->max_cpu = this_cpu; if (evsel->handler != NULL) { @@ -3061,10 +3054,10 @@ static int perf_sched__timehist(struct perf_sched *sched) goto out; /* pre-allocate struct for per-CPU idle stats */ - sched->max_cpu.cpu = session->header.env.nr_cpus_online; - if (sched->max_cpu.cpu == 0) - sched->max_cpu.cpu = 4; - if (init_idle_threads(sched->max_cpu.cpu)) + sched->max_cpu = session->header.env.nr_cpus_online; + if (sched->max_cpu == 0) + sched->max_cpu = 4; + if (init_idle_threads(sched->max_cpu)) goto out; /* summary_only implies summary option, but don't overwrite summary if set */ @@ -3216,10 +3209,10 @@ static int setup_map_cpus(struct perf_sched *sched) { struct perf_cpu_map *map; - sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF); + sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); if (sched->map.comp) { - sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int)); + sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int)); if (!sched->map.comp_cpus) return -1; } @@ -3545,7 +3538,6 @@ int cmd_sched(int argc, const char **argv) .fork_event = replay_fork_event, }; unsigned int i; - int ret; for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++) sched.curr_pid[i] = -1; @@ -3606,9 +3598,6 @@ int cmd_sched(int argc, const char **argv) parse_options_usage(NULL, timehist_options, "n", true); return -EINVAL; } - ret = symbol__validate_sym_arguments(); - if (ret) - return ret; return perf_sched__timehist(&sched); } else { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index fa478ddcd1..18b56256bb 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -15,7 +15,6 @@ #include "util/symbol.h" #include "util/thread.h" #include "util/trace-event.h" -#include "util/env.h" #include "util/evlist.h" #include "util/evsel.h" #include "util/evsel_fprintf.h" @@ -123,7 +122,6 @@ enum perf_output_field { PERF_OUTPUT_TOD = 1ULL << 32, PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33, PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34, - PERF_OUTPUT_INS_LAT = 1ULL << 35, }; struct perf_script { @@ -190,7 +188,6 @@ struct output_option { {.str = "tod", .field = PERF_OUTPUT_TOD}, {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE}, {.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE}, - {.str = "ins_lat", .field = PERF_OUTPUT_INS_LAT}, }; enum { @@ -265,8 +262,7 @@ static struct { PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR | - PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE | - PERF_OUTPUT_INS_LAT, + PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -463,7 +459,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; if (PRINT_FIELD(WEIGHT) && - evsel__do_check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT, allow_user_set)) + evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT)) return -EINVAL; if (PRINT_FIELD(SYM) && @@ -515,7 +511,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; if (PRINT_FIELD(PHYS_ADDR) && - evsel__do_check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR, allow_user_set)) + evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR)) return -EINVAL; if (PRINT_FIELD(DATA_PAGE_SIZE) && @@ -526,10 +522,6 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) evsel__check_stype(evsel, PERF_SAMPLE_CODE_PAGE_SIZE, "CODE_PAGE_SIZE", PERF_OUTPUT_CODE_PAGE_SIZE)) return -EINVAL; - if (PRINT_FIELD(INS_LAT) && - evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_STRUCT, "WEIGHT_STRUCT", PERF_OUTPUT_INS_LAT)) - return -EINVAL; - return 0; } @@ -649,7 +641,7 @@ static int perf_session__check_output_opt(struct perf_session *session) return 0; } -static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, const char *arch, +static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, FILE *fp) { unsigned i = 0, r; @@ -662,7 +654,7 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, cons for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { u64 val = regs->regs[i++]; - printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r, arch), val); + printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val); } return printed; @@ -719,17 +711,17 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen, } static int perf_sample__fprintf_iregs(struct perf_sample *sample, - struct perf_event_attr *attr, const char *arch, FILE *fp) + struct perf_event_attr *attr, FILE *fp) { return perf_sample__fprintf_regs(&sample->intr_regs, - attr->sample_regs_intr, arch, fp); + attr->sample_regs_intr, fp); } static int perf_sample__fprintf_uregs(struct perf_sample *sample, - struct perf_event_attr *attr, const char *arch, FILE *fp) + struct perf_event_attr *attr, FILE *fp) { return perf_sample__fprintf_regs(&sample->user_regs, - attr->sample_regs_user, arch, fp); + attr->sample_regs_user, fp); } static int perf_sample__fprintf_start(struct perf_script *script, @@ -2001,7 +1993,6 @@ static void process_event(struct perf_script *script, struct evsel_script *es = evsel->priv; FILE *fp = es->fp; char str[PAGE_SIZE_NAME_LEN]; - const char *arch = perf_env__arch(machine->env); if (output[type].fields == 0) return; @@ -2048,9 +2039,6 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(WEIGHT)) fprintf(fp, "%16" PRIu64, sample->weight); - if (PRINT_FIELD(INS_LAT)) - fprintf(fp, "%16" PRIu16, sample->ins_lat); - if (PRINT_FIELD(IP)) { struct callchain_cursor *cursor = NULL; @@ -2068,10 +2056,10 @@ static void process_event(struct perf_script *script, } if (PRINT_FIELD(IREGS)) - perf_sample__fprintf_iregs(sample, attr, arch, fp); + perf_sample__fprintf_iregs(sample, attr, fp); if (PRINT_FIELD(UREGS)) - perf_sample__fprintf_uregs(sample, attr, arch, fp); + perf_sample__fprintf_uregs(sample, attr, fp); if (PRINT_FIELD(BRSTACK)) perf_sample__fprintf_brstack(sample, thread, attr, fp); @@ -2115,8 +2103,8 @@ static struct scripting_ops *scripting_ops; static void __process_stat(struct evsel *counter, u64 tstamp) { int nthreads = perf_thread_map__nr(counter->core.threads); - int idx, thread; - struct perf_cpu cpu; + int ncpus = evsel__nr_cpus(counter); + int cpu, thread; static int header_printed; if (counter->core.system_wide) @@ -2129,13 +2117,13 @@ static void __process_stat(struct evsel *counter, u64 tstamp) } for (thread = 0; thread < nthreads; thread++) { - perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { + for (cpu = 0; cpu < ncpus; cpu++) { struct perf_counts_values *counts; - counts = perf_counts(counter->counts, idx, thread); + counts = perf_counts(counter->counts, cpu, thread); printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n", - cpu.cpu, + counter->core.cpus->map[cpu], perf_thread_map__pid(counter->core.threads, thread), counts->val, counts->ena, @@ -2318,7 +2306,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, * on events sample_type. */ sample_type = evlist__combined_sample_type(evlist); - callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env)); + callchain_param_setup(sample_type); /* Enable fields for callchain entries */ if (symbol_conf.use_callchain && @@ -3468,7 +3456,16 @@ static void script__setup_sample_type(struct perf_script *script) struct perf_session *session = script->session; u64 sample_type = evlist__combined_sample_type(session->evlist); - callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env)); + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { + if ((sample_type & PERF_SAMPLE_REGS_USER) && + (sample_type & PERF_SAMPLE_STACK_USER)) { + callchain_param.record_mode = CALLCHAIN_DWARF; + dwarf_callchain_users = true; + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + callchain_param.record_mode = CALLCHAIN_LBR; + else + callchain_param.record_mode = CALLCHAIN_FP; + } if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" @@ -3718,7 +3715,7 @@ int cmd_script(int argc, const char **argv) "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," - "data_page_size,code_page_size,ins_lat", + "data_page_size,code_page_size", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), @@ -3839,9 +3836,6 @@ int cmd_script(int argc, const char **argv) data.path = input_name; data.force = symbol_conf.force; - if (symbol__validate_sym_arguments()) - return -1; - if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); if (!rec_script_path) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3f98689dd6..f0ecfda34e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -230,12 +230,11 @@ static bool cpus_map_matched(struct evsel *a, struct evsel *b) if (!a->core.cpus || !b->core.cpus) return false; - if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus)) + if (a->core.cpus->nr != b->core.cpus->nr) return false; - for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) { - if (perf_cpu_map__cpu(a->core.cpus, i).cpu != - perf_cpu_map__cpu(b->core.cpus, i).cpu) + for (int i = 0; i < a->core.cpus->nr; i++) { + if (a->core.cpus->map[i] != b->core.cpus->map[i]) return false; } @@ -328,35 +327,34 @@ static int write_stat_round_event(u64 tm, u64 type) #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) -static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 thread, +static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, struct perf_counts_values *count) { - struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread); - struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx); + struct perf_sample_id *sid = SID(counter, cpu, thread); return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, process_synthesized_event, NULL); } -static int read_single_counter(struct evsel *counter, int cpu_map_idx, +static int read_single_counter(struct evsel *counter, int cpu, int thread, struct timespec *rs) { if (counter->tool_event == PERF_TOOL_DURATION_TIME) { u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL; struct perf_counts_values *count = - perf_counts(counter->counts, cpu_map_idx, thread); + perf_counts(counter->counts, cpu, thread); count->ena = count->run = val; count->val = val; return 0; } - return evsel__read_counter(counter, cpu_map_idx, thread); + return evsel__read_counter(counter, cpu, thread); } /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode */ -static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_map_idx) +static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) { int nthreads = perf_thread_map__nr(evsel_list->core.threads); int thread; @@ -370,24 +368,24 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_ for (thread = 0; thread < nthreads; thread++) { struct perf_counts_values *count; - count = perf_counts(counter->counts, cpu_map_idx, thread); + count = perf_counts(counter->counts, cpu, thread); /* * The leader's group read loads data into its group members * (via evsel__read_counter()) and sets their count->loaded. */ - if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) && - read_single_counter(counter, cpu_map_idx, thread, rs)) { + if (!perf_counts__is_loaded(counter->counts, cpu, thread) && + read_single_counter(counter, cpu, thread, rs)) { counter->counts->scaled = -1; - perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0; - perf_counts(counter->counts, cpu_map_idx, thread)->run = 0; + perf_counts(counter->counts, cpu, thread)->ena = 0; + perf_counts(counter->counts, cpu, thread)->run = 0; return -1; } - perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false); + perf_counts__set_loaded(counter->counts, cpu, thread, false); if (STAT_RECORD) { - if (evsel__write_stat_event(counter, cpu_map_idx, thread, count)) { + if (evsel__write_stat_event(counter, cpu, thread, count)) { pr_err("failed to write stat event\n"); return -1; } @@ -397,8 +395,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_ fprintf(stat_config.output, "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", evsel__name(counter), - perf_cpu_map__cpu(evsel__cpus(counter), - cpu_map_idx).cpu, + cpu, count->val, count->ena, count->run); } } @@ -408,33 +405,36 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_ static int read_affinity_counters(struct timespec *rs) { - struct evlist_cpu_iterator evlist_cpu_itr; - struct affinity saved_affinity, *affinity; + struct evsel *counter; + struct affinity affinity; + int i, ncpus, cpu; if (all_counters_use_bpf) return 0; - if (!target__has_cpu(&target) || target__has_per_thread(&target)) - affinity = NULL; - else if (affinity__setup(&saved_affinity) < 0) + if (affinity__setup(&affinity) < 0) return -1; - else - affinity = &saved_affinity; - evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { - struct evsel *counter = evlist_cpu_itr.evsel; + ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus); + if (!target__has_cpu(&target) || target__has_per_thread(&target)) + ncpus = 1; + evlist__for_each_cpu(evsel_list, i, cpu) { + if (i >= ncpus) + break; + affinity__set(&affinity, cpu); - if (evsel__is_bpf(counter)) - continue; - - if (!counter->err) { - counter->err = read_counter_cpu(counter, rs, - evlist_cpu_itr.cpu_map_idx); + evlist__for_each_entry(evsel_list, counter) { + if (evsel__cpu_iter_skip(counter, cpu)) + continue; + if (evsel__is_bpf(counter)) + continue; + if (!counter->err) { + counter->err = read_counter_cpu(counter, rs, + counter->cpu_iter - 1); + } } } - if (affinity) - affinity__cleanup(&saved_affinity); - + affinity__cleanup(&affinity); return 0; } @@ -788,9 +788,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) int status = 0; const bool forks = (argc > 0); bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; - struct evlist_cpu_iterator evlist_cpu_itr; - struct affinity saved_affinity, *affinity = NULL; - int err; + struct affinity affinity; + int i, cpu, err; bool second_pass = false; if (forks) { @@ -804,11 +803,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (group) evlist__set_leader(evsel_list); - if (!cpu_map__is_dummy(evsel_list->core.cpus)) { - if (affinity__setup(&saved_affinity) < 0) - return -1; - affinity = &saved_affinity; - } + if (affinity__setup(&affinity) < 0) + return -1; evlist__for_each_entry(evsel_list, counter) { if (bpf_counter__load(counter, &target)) @@ -817,53 +813,56 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) all_counters_use_bpf = false; } - evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { - counter = evlist_cpu_itr.evsel; - + evlist__for_each_cpu (evsel_list, i, cpu) { /* * bperf calls evsel__open_per_cpu() in bperf__load(), so * no need to call it again here. */ if (target.use_bpf) break; + affinity__set(&affinity, cpu); - if (counter->reset_group || counter->errored) - continue; - if (evsel__is_bpf(counter)) - continue; + evlist__for_each_entry(evsel_list, counter) { + if (evsel__cpu_iter_skip(counter, cpu)) + continue; + if (counter->reset_group || counter->errored) + continue; + if (evsel__is_bpf(counter)) + continue; try_again: - if (create_perf_stat_counter(counter, &stat_config, &target, - evlist_cpu_itr.cpu_map_idx) < 0) { + if (create_perf_stat_counter(counter, &stat_config, &target, + counter->cpu_iter - 1) < 0) { + + /* + * Weak group failed. We cannot just undo this here + * because earlier CPUs might be in group mode, and the kernel + * doesn't support mixing group and non group reads. Defer + * it to later. + * Don't close here because we're in the wrong affinity. + */ + if ((errno == EINVAL || errno == EBADF) && + evsel__leader(counter) != counter && + counter->weak_group) { + evlist__reset_weak_group(evsel_list, counter, false); + assert(counter->reset_group); + second_pass = true; + continue; + } + + switch (stat_handle_error(counter)) { + case COUNTER_FATAL: + return -1; + case COUNTER_RETRY: + goto try_again; + case COUNTER_SKIP: + continue; + default: + break; + } - /* - * Weak group failed. We cannot just undo this here - * because earlier CPUs might be in group mode, and the kernel - * doesn't support mixing group and non group reads. Defer - * it to later. - * Don't close here because we're in the wrong affinity. - */ - if ((errno == EINVAL || errno == EBADF) && - evsel__leader(counter) != counter && - counter->weak_group) { - evlist__reset_weak_group(evsel_list, counter, false); - assert(counter->reset_group); - second_pass = true; - continue; } - - switch (stat_handle_error(counter)) { - case COUNTER_FATAL: - return -1; - case COUNTER_RETRY: - goto try_again; - case COUNTER_SKIP: - continue; - default: - break; - } - + counter->supported = true; } - counter->supported = true; } if (second_pass) { @@ -872,43 +871,45 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) * and also close errored counters. */ - /* First close errored or weak retry */ - evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { - counter = evlist_cpu_itr.evsel; - - if (!counter->reset_group && !counter->errored) - continue; - - perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx); - } - /* Now reopen weak */ - evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { - counter = evlist_cpu_itr.evsel; - - if (!counter->reset_group && !counter->errored) - continue; - if (!counter->reset_group) - continue; -try_again_reset: - pr_debug2("reopening weak %s\n", evsel__name(counter)); - if (create_perf_stat_counter(counter, &stat_config, &target, - evlist_cpu_itr.cpu_map_idx) < 0) { - - switch (stat_handle_error(counter)) { - case COUNTER_FATAL: - return -1; - case COUNTER_RETRY: - goto try_again_reset; - case COUNTER_SKIP: + evlist__for_each_cpu(evsel_list, i, cpu) { + affinity__set(&affinity, cpu); + /* First close errored or weak retry */ + evlist__for_each_entry(evsel_list, counter) { + if (!counter->reset_group && !counter->errored) continue; - default: - break; - } + if (evsel__cpu_iter_skip_no_inc(counter, cpu)) + continue; + perf_evsel__close_cpu(&counter->core, counter->cpu_iter); + } + /* Now reopen weak */ + evlist__for_each_entry(evsel_list, counter) { + if (!counter->reset_group && !counter->errored) + continue; + if (evsel__cpu_iter_skip(counter, cpu)) + continue; + if (!counter->reset_group) + continue; +try_again_reset: + pr_debug2("reopening weak %s\n", evsel__name(counter)); + if (create_perf_stat_counter(counter, &stat_config, &target, + counter->cpu_iter - 1) < 0) { + + switch (stat_handle_error(counter)) { + case COUNTER_FATAL: + return -1; + case COUNTER_RETRY: + goto try_again_reset; + case COUNTER_SKIP: + continue; + default: + break; + } + } + counter->supported = true; } - counter->supported = true; } } - affinity__cleanup(affinity); + affinity__cleanup(&affinity); evlist__for_each_entry(evsel_list, counter) { if (!counter->supported) { @@ -1167,26 +1168,6 @@ static int parse_stat_cgroups(const struct option *opt, return parse_cgroups(opt, str, unset); } -static int parse_hybrid_type(const struct option *opt, - const char *str, - int unset __maybe_unused) -{ - struct evlist *evlist = *(struct evlist **)opt->value; - - if (!list_empty(&evlist->core.entries)) { - fprintf(stderr, "Must define cputype before events/metrics\n"); - return -1; - } - - evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str); - if (!evlist->hybrid_pmu_name) { - fprintf(stderr, "--cputype %s is not supported!\n", str); - return -1; - } - - return 0; -} - static struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1301,10 +1282,6 @@ static struct option stat_options[] = { "don't print 'summary' for CSV summary output"), OPT_BOOLEAN(0, "quiet", &stat_config.quiet, "don't print output (useful with record)"), - OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type", - "Only enable events on applying cpu with this type " - "for hybrid platform (e.g. core or atom)", - parse_hybrid_type), #ifdef HAVE_LIBPFM OPT_CALLBACK(0, "pfm-events", &evsel_list, "event", "libpfm4 event selector. use 'perf list' to list available events", @@ -1321,75 +1298,70 @@ static struct option stat_options[] = { OPT_END() }; -static const char *const aggr_mode__string[] = { - [AGGR_CORE] = "core", - [AGGR_DIE] = "die", - [AGGR_GLOBAL] = "global", - [AGGR_NODE] = "node", - [AGGR_NONE] = "none", - [AGGR_SOCKET] = "socket", - [AGGR_THREAD] = "thread", - [AGGR_UNSET] = "unset", -}; - static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, - struct perf_cpu cpu) + struct perf_cpu_map *map, int cpu) { - return aggr_cpu_id__socket(cpu, /*data=*/NULL); + return cpu_map__get_socket(map, cpu, NULL); } static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused, - struct perf_cpu cpu) + struct perf_cpu_map *map, int cpu) { - return aggr_cpu_id__die(cpu, /*data=*/NULL); + return cpu_map__get_die(map, cpu, NULL); } static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused, - struct perf_cpu cpu) + struct perf_cpu_map *map, int cpu) { - return aggr_cpu_id__core(cpu, /*data=*/NULL); + return cpu_map__get_core(map, cpu, NULL); } static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused, - struct perf_cpu cpu) + struct perf_cpu_map *map, int cpu) { - return aggr_cpu_id__node(cpu, /*data=*/NULL); + return cpu_map__get_node(map, cpu, NULL); } static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, - aggr_get_id_t get_id, struct perf_cpu cpu) + aggr_get_id_t get_id, struct perf_cpu_map *map, int idx) { - struct aggr_cpu_id id = aggr_cpu_id__empty(); + int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu])) - config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu); + if (idx >= map->nr) + return id; - id = config->cpus_aggr_map->map[cpu.cpu]; + cpu = map->map[idx]; + + if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu])) + config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); + + id = config->cpus_aggr_map->map[cpu]; return id; } static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config, - struct perf_cpu cpu) + struct perf_cpu_map *map, int idx) { - return perf_stat__get_aggr(config, perf_stat__get_socket, cpu); + return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); } static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config, - struct perf_cpu cpu) + struct perf_cpu_map *map, int idx) { - return perf_stat__get_aggr(config, perf_stat__get_die, cpu); + return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); } static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config, - struct perf_cpu cpu) + struct perf_cpu_map *map, int idx) { - return perf_stat__get_aggr(config, perf_stat__get_core, cpu); + return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); } static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config, - struct perf_cpu cpu) + struct perf_cpu_map *map, int idx) { - return perf_stat__get_aggr(config, perf_stat__get_node, cpu); + return perf_stat__get_aggr(config, perf_stat__get_node, map, idx); } static bool term_percore_set(void) @@ -1404,67 +1376,54 @@ static bool term_percore_set(void) return false; } -static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) -{ - switch (aggr_mode) { - case AGGR_SOCKET: - return aggr_cpu_id__socket; - case AGGR_DIE: - return aggr_cpu_id__die; - case AGGR_CORE: - return aggr_cpu_id__core; - case AGGR_NODE: - return aggr_cpu_id__node; - case AGGR_NONE: - if (term_percore_set()) - return aggr_cpu_id__core; - - return NULL; - case AGGR_GLOBAL: - case AGGR_THREAD: - case AGGR_UNSET: - default: - return NULL; - } -} - -static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) -{ - switch (aggr_mode) { - case AGGR_SOCKET: - return perf_stat__get_socket_cached; - case AGGR_DIE: - return perf_stat__get_die_cached; - case AGGR_CORE: - return perf_stat__get_core_cached; - case AGGR_NODE: - return perf_stat__get_node_cached; - case AGGR_NONE: - if (term_percore_set()) { - return perf_stat__get_core_cached; - } - return NULL; - case AGGR_GLOBAL: - case AGGR_THREAD: - case AGGR_UNSET: - default: - return NULL; - } -} - static int perf_stat_init_aggr_mode(void) { int nr; - aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode); - if (get_id) { - stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, - get_id, /*data=*/NULL); - if (!stat_config.aggr_map) { - pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); + switch (stat_config.aggr_mode) { + case AGGR_SOCKET: + if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build socket map"); return -1; } - stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode); + stat_config.aggr_get_id = perf_stat__get_socket_cached; + break; + case AGGR_DIE: + if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build die map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_die_cached; + break; + case AGGR_CORE: + if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build core map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_core_cached; + break; + case AGGR_NODE: + if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build core map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_node_cached; + break; + case AGGR_NONE: + if (term_percore_set()) { + if (cpu_map__build_core_map(evsel_list->core.cpus, + &stat_config.aggr_map)) { + perror("cannot build core map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_core_cached; + } + break; + case AGGR_GLOBAL: + case AGGR_THREAD: + case AGGR_UNSET: + default: + break; } /* @@ -1472,7 +1431,7 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - nr = perf_cpu_map__max(evsel_list->core.cpus).cpu; + nr = perf_cpu_map__max(evsel_list->core.cpus); stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1); return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } @@ -1500,139 +1459,169 @@ static void perf_stat__exit_aggr_mode(void) stat_config.cpus_aggr_map = NULL; } -static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data) +static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx) +{ + int cpu; + + if (idx > map->nr) + return -1; + + cpu = map->map[idx]; + + if (cpu >= env->nr_cpus_avail) + return -1; + + return cpu; +} + +static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; - struct aggr_cpu_id id = aggr_cpu_id__empty(); + int cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - if (cpu.cpu != -1) - id.socket = env->cpu[cpu.cpu].socket_id; + if (cpu != -1) + id.socket = env->cpu[cpu].socket_id; return id; } -static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data) +static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; - struct aggr_cpu_id id = aggr_cpu_id__empty(); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + int cpu = perf_env__get_cpu(env, map, idx); - if (cpu.cpu != -1) { + if (cpu != -1) { /* * die_id is relative to socket, so start * with the socket ID and then add die to * make a unique ID. */ - id.socket = env->cpu[cpu.cpu].socket_id; - id.die = env->cpu[cpu.cpu].die_id; + id.socket = env->cpu[cpu].socket_id; + id.die = env->cpu[cpu].die_id; } return id; } -static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data) +static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; - struct aggr_cpu_id id = aggr_cpu_id__empty(); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + int cpu = perf_env__get_cpu(env, map, idx); - if (cpu.cpu != -1) { + if (cpu != -1) { /* * core_id is relative to socket and die, * we need a global id. So we set * socket, die id and core id */ - id.socket = env->cpu[cpu.cpu].socket_id; - id.die = env->cpu[cpu.cpu].die_id; - id.core = env->cpu[cpu.cpu].core_id; + id.socket = env->cpu[cpu].socket_id; + id.die = env->cpu[cpu].die_id; + id.core = env->cpu[cpu].core_id; } return id; } -static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data) +static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) { - struct aggr_cpu_id id = aggr_cpu_id__empty(); + int cpu = perf_env__get_cpu(data, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); id.node = perf_env__numa_node(data, cpu); return id; } -static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu cpu) +static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, + struct cpu_aggr_map **sockp) { - return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env); + return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); +} + +static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus, + struct cpu_aggr_map **diep) +{ + return cpu_map__build_map(cpus, diep, perf_env__get_die, env); +} + +static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus, + struct cpu_aggr_map **corep) +{ + return cpu_map__build_map(cpus, corep, perf_env__get_core, env); +} + +static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus, + struct cpu_aggr_map **nodep) +{ + return cpu_map__build_map(cpus, nodep, perf_env__get_node, env); +} + +static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, + struct perf_cpu_map *map, int idx) +{ + return perf_env__get_socket(map, idx, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu cpu) + struct perf_cpu_map *map, int idx) { - return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env); + return perf_env__get_die(map, idx, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu cpu) + struct perf_cpu_map *map, int idx) { - return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env); + return perf_env__get_core(map, idx, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu cpu) + struct perf_cpu_map *map, int idx) { - return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env); -} - -static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) -{ - switch (aggr_mode) { - case AGGR_SOCKET: - return perf_env__get_socket_aggr_by_cpu; - case AGGR_DIE: - return perf_env__get_die_aggr_by_cpu; - case AGGR_CORE: - return perf_env__get_core_aggr_by_cpu; - case AGGR_NODE: - return perf_env__get_node_aggr_by_cpu; - case AGGR_NONE: - case AGGR_GLOBAL: - case AGGR_THREAD: - case AGGR_UNSET: - default: - return NULL; - } -} - -static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) -{ - switch (aggr_mode) { - case AGGR_SOCKET: - return perf_stat__get_socket_file; - case AGGR_DIE: - return perf_stat__get_die_file; - case AGGR_CORE: - return perf_stat__get_core_file; - case AGGR_NODE: - return perf_stat__get_node_file; - case AGGR_NONE: - case AGGR_GLOBAL: - case AGGR_THREAD: - case AGGR_UNSET: - default: - return NULL; - } + return perf_env__get_node(map, idx, &perf_stat.session->header.env); } static int perf_stat_init_aggr_mode_file(struct perf_stat *st) { struct perf_env *env = &st->session->header.env; - aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); - if (!get_id) - return 0; - - stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, get_id, env); - if (!stat_config.aggr_map) { - pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); - return -1; + switch (stat_config.aggr_mode) { + case AGGR_SOCKET: + if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build socket map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_socket_file; + break; + case AGGR_DIE: + if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build die map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_die_file; + break; + case AGGR_CORE: + if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build core map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_core_file; + break; + case AGGR_NODE: + if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build core map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_node_file; + break; + case AGGR_NONE: + case AGGR_GLOBAL: + case AGGR_THREAD: + case AGGR_UNSET: + default: + break; } - stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode); + return 0; } @@ -1761,12 +1750,14 @@ static int add_default_attributes(void) (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, }; + struct parse_events_error errinfo; + /* Set attrs if no event is selected and !null_run: */ if (stat_config.null_run) return 0; + bzero(&errinfo, sizeof(errinfo)); if (transaction_run) { - struct parse_events_error errinfo; /* Handle -T as -M transaction. Once platform specific metrics * support has been added to the json files, all architectures * will use this approach. To determine transaction support @@ -1781,7 +1772,6 @@ static int add_default_attributes(void) &stat_config.metric_events); } - parse_events_error__init(&errinfo); if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) err = parse_events(evsel_list, transaction_attrs, @@ -1792,14 +1782,13 @@ static int add_default_attributes(void) &errinfo); if (err) { fprintf(stderr, "Cannot set up transaction events\n"); - parse_events_error__print(&errinfo, transaction_attrs); + parse_events_print_error(&errinfo, transaction_attrs); + return -1; } - parse_events_error__exit(&errinfo); - return err ? -1 : 0; + return 0; } if (smi_cost) { - struct parse_events_error errinfo; int smi; if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { @@ -1815,23 +1804,23 @@ static int add_default_attributes(void) smi_reset = true; } - if (!pmu_have_event("msr", "aperf") || - !pmu_have_event("msr", "smi")) { + if (pmu_have_event("msr", "aperf") && + pmu_have_event("msr", "smi")) { + if (!force_metric_only) + stat_config.metric_only = true; + err = parse_events(evsel_list, smi_cost_attrs, &errinfo); + } else { fprintf(stderr, "To measure SMI cost, it needs " "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); + parse_events_print_error(&errinfo, smi_cost_attrs); return -1; } - if (!force_metric_only) - stat_config.metric_only = true; - - parse_events_error__init(&errinfo); - err = parse_events(evsel_list, smi_cost_attrs, &errinfo); if (err) { - parse_events_error__print(&errinfo, smi_cost_attrs); + parse_events_print_error(&errinfo, smi_cost_attrs); fprintf(stderr, "Cannot set up SMI cost events\n"); + return -1; } - parse_events_error__exit(&errinfo); - return err ? -1 : 0; + return 0; } if (topdown_run) { @@ -1886,22 +1875,18 @@ static int add_default_attributes(void) return -1; } if (topdown_attrs[0] && str) { - struct parse_events_error errinfo; if (warn) arch_topdown_group_warn(); setup_metrics: - parse_events_error__init(&errinfo); err = parse_events(evsel_list, str, &errinfo); if (err) { fprintf(stderr, "Cannot set up top down events %s: %d\n", str, err); - parse_events_error__print(&errinfo, str); - parse_events_error__exit(&errinfo); + parse_events_print_error(&errinfo, str); free(str); return -1; } - parse_events_error__exit(&errinfo); } else { fprintf(stderr, "System does not support topdown\n"); return -1; @@ -1911,7 +1896,6 @@ static int add_default_attributes(void) if (!evsel_list->core.nr_entries) { if (perf_pmu__has_hybrid()) { - struct parse_events_error errinfo; const char *hybrid_str = "cycles,instructions,branches,branch-misses"; if (target__has_cpu(&target)) @@ -1922,16 +1906,15 @@ static int add_default_attributes(void) return -1; } - parse_events_error__init(&errinfo); err = parse_events(evsel_list, hybrid_str, &errinfo); if (err) { fprintf(stderr, "Cannot set up hybrid events %s: %d\n", hybrid_str, err); - parse_events_error__print(&errinfo, hybrid_str); + parse_events_print_error(&errinfo, hybrid_str); + return -1; } - parse_events_error__exit(&errinfo); - return err ? -1 : 0; + return err; } if (target__has_cpu(&target)) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1fc390f136..a3ae9176a8 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1271,7 +1271,7 @@ static int __cmd_top(struct perf_top *top) pr_debug("Couldn't synthesize cgroup events.\n"); machine__synthesize_threads(&top->session->machines.host, &opts->target, - top->evlist->core.threads, true, false, + top->evlist->core.threads, false, top->nr_threads_synthesize); if (top->nr_threads_synthesize > 1) @@ -1618,10 +1618,6 @@ int cmd_top(int argc, const char **argv) if (argc) usage_with_options(top_usage, options); - status = symbol__validate_sym_arguments(); - if (status) - goto out_delete_evlist; - if (annotate_check_args(&top.annotation_opts) < 0) goto out_delete_evlist; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 52b137a184..2bf21194c7 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -979,8 +979,6 @@ static struct syscall_fmt syscall_fmts[] = { .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, { .name = "getrlimit", .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, - { .name = "getsockopt", - .arg = { [1] = STRARRAY(level, socket_level), }, }, { .name = "gettid", .errpid = true, }, { .name = "ioctl", .arg = { @@ -1123,8 +1121,6 @@ static struct syscall_fmt syscall_fmts[] = { .arg = { [0] = STRARRAY(which, itimers), }, }, { .name = "setrlimit", .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, - { .name = "setsockopt", - .arg = { [1] = STRARRAY(level, socket_level), }, }, { .name = "socket", .arg = { [0] = STRARRAY(family, socket_families), [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, @@ -1536,20 +1532,13 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) return fprintf(fp, " ? "); } -static pid_t workload_pid = -1; static bool done = false; static bool interrupted = false; -static void sighandler_interrupt(int sig __maybe_unused) +static void sig_handler(int sig) { - done = interrupted = true; -} - -static void sighandler_chld(int sig __maybe_unused, siginfo_t *info, - void *context __maybe_unused) -{ - if (info->si_pid == workload_pid) - done = true; + done = true; + interrupted = sig == SIGINT; } static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp) @@ -1639,8 +1628,8 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist) goto out; err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, - evlist->core.threads, trace__tool_process, - true, false, 1); + evlist->core.threads, trace__tool_process, false, + 1); out: if (err) symbol__exit(); @@ -2733,8 +2722,6 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, offset = format_field__intval(field, sample, evsel->needs_swap); syscall_arg.len = offset >> 16; offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; } val = (uintptr_t)(sample->raw_data + offset); @@ -3076,11 +3063,15 @@ static bool evlist__add_vfs_getname(struct evlist *evlist) struct parse_events_error err; int ret; - parse_events_error__init(&err); + bzero(&err, sizeof(err)); ret = parse_events(evlist, "probe:vfs_getname*", &err); - parse_events_error__exit(&err); - if (ret) + if (ret) { + free(err.str); + free(err.help); + free(err.first_str); + free(err.first_help); return false; + } evlist__for_each_entry_safe(evlist, evsel, tmp) { if (!strstarts(evsel__name(evsel), "probe:vfs_getname")) @@ -3266,21 +3257,10 @@ static void trace__set_bpf_map_syscalls(struct trace *trace) static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) { - struct bpf_program *pos, *prog = NULL; - const char *sec_name; - if (trace->bpf_obj == NULL) return NULL; - bpf_object__for_each_program(pos, trace->bpf_obj) { - sec_name = bpf_program__section_name(pos); - if (sec_name && !strcmp(sec_name, name)) { - prog = pos; - break; - } - } - - return prog; + return bpf_object__find_program_by_title(trace->bpf_obj, name); } static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc, @@ -3970,9 +3950,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__add(evlist, pgfault_min); } - /* Enable ignoring missing threads when -u/-p option is defined. */ - trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid; - if (trace->sched && evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) goto out_error_sched_stat_runtime; @@ -4024,7 +4001,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) fprintf(trace->output, "Couldn't run the workload!\n"); goto out_delete_evlist; } - workload_pid = evlist->workload.pid; } err = evlist__open(evlist); @@ -4894,16 +4870,11 @@ int cmd_trace(int argc, const char **argv) const char * const trace_subcommands[] = { "record", NULL }; int err = -1; char bf[BUFSIZ]; - struct sigaction sigchld_act; signal(SIGSEGV, sighandler_dump_stack); signal(SIGFPE, sighandler_dump_stack); - signal(SIGINT, sighandler_interrupt); - - memset(&sigchld_act, 0, sizeof(sigchld_act)); - sigchld_act.sa_flags = SA_SIGINFO; - sigchld_act.sa_sigaction = sighandler_chld; - sigaction(SIGCHLD, &sigchld_act, NULL); + signal(SIGCHLD, sig_handler); + signal(SIGINT, sig_handler); trace.evlist = evlist__new(); trace.sctbl = syscalltbl__new(); @@ -4954,13 +4925,12 @@ int cmd_trace(int argc, const char **argv) if (trace.perfconfig_events != NULL) { struct parse_events_error parse_err; - parse_events_error__init(&parse_err); + bzero(&parse_err, sizeof(parse_err)); err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err); - if (err) - parse_events_error__print(&parse_err, trace.perfconfig_events); - parse_events_error__exit(&parse_err); - if (err) + if (err) { + parse_events_print_error(&parse_err, trace.perfconfig_events); goto out; + } } if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) { diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 30ecf3a0f6..f1e46277e8 100644 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -26,7 +26,6 @@ include/vdso/bits.h include/linux/const.h include/vdso/const.h include/linux/hash.h -include/linux/list-sort.h include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/disabled-features.h arch/x86/include/asm/required-features.h @@ -151,7 +150,6 @@ check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' check include/linux/ctype.h '-I "isdigit("' check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include " -B' -check lib/list_sort.c '-I "^#include "' # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff --git a/tools/perf/design.txt b/tools/perf/design.txt index aa8cfeabb7..a42fab308f 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -106,9 +106,6 @@ enum perf_hw_id { PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_HW_BRANCH_MISSES = 5, PERF_COUNT_HW_BUS_CYCLES = 6, - PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, - PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, - PERF_COUNT_HW_REF_CPU_CYCLES = 9, }; These are standardized types of events that work relatively uniformly diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index b17eb52a06..7565a1852c 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -308,6 +308,8 @@ int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, vo int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx) { + struct filter_data *d = data; + pr_debug("%s API\n", __func__); return do_checks(data, sample, ctx, false); diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json index cf48d0dfc7..9bea1ba1c4 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json @@ -18,6 +18,6 @@ "ArchStdEvent": "BUS_ACCESS_PERIPH" }, { - "ArchStdEvent": "BUS_ACCESS" + "ArchStdEvent": "BUS_ACCESS", } ] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json index 4cc50b7da5..1e25f2ae4a 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json @@ -39,31 +39,31 @@ "ArchStdEvent": "L2D_CACHE_INVAL" }, { - "ArchStdEvent": "L1I_CACHE_REFILL" + "ArchStdEvent": "L1I_CACHE_REFILL", }, { - "ArchStdEvent": "L1I_TLB_REFILL" + "ArchStdEvent": "L1I_TLB_REFILL", }, { - "ArchStdEvent": "L1D_CACHE_REFILL" + "ArchStdEvent": "L1D_CACHE_REFILL", }, { - "ArchStdEvent": "L1D_CACHE" + "ArchStdEvent": "L1D_CACHE", }, { - "ArchStdEvent": "L1D_TLB_REFILL" + "ArchStdEvent": "L1D_TLB_REFILL", }, { - "ArchStdEvent": "L1I_CACHE" + "ArchStdEvent": "L1I_CACHE", }, { - "ArchStdEvent": "L2D_CACHE" + "ArchStdEvent": "L2D_CACHE", }, { - "ArchStdEvent": "L2D_CACHE_REFILL" + "ArchStdEvent": "L2D_CACHE_REFILL", }, { - "ArchStdEvent": "L2D_CACHE_WB" + "ArchStdEvent": "L2D_CACHE_WB", }, { "PublicDescription": "This event counts any load or store operation which accesses the data L1 TLB", @@ -72,7 +72,7 @@ }, { "PublicDescription": "This event counts any instruction fetch which accesses the instruction L1 TLB", - "ArchStdEvent": "L1I_TLB" + "ArchStdEvent": "L1I_TLB", }, { "PublicDescription": "Level 2 access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json index 927a6f629a..9076ca2daf 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json @@ -1,7 +1,7 @@ [ { "PublicDescription": "The number of core clock cycles", - "ArchStdEvent": "CPU_CYCLES" + "ArchStdEvent": "CPU_CYCLES", }, { "PublicDescription": "FSU clocking gated off cycle", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json index ada052e196..9761433ad3 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json @@ -36,9 +36,9 @@ "ArchStdEvent": "EXC_TRAP_FIQ" }, { - "ArchStdEvent": "EXC_TAKEN" + "ArchStdEvent": "EXC_TAKEN", }, { - "ArchStdEvent": "EXC_RETURN" + "ArchStdEvent": "EXC_RETURN", } ] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json index 62f6276e30..482aa3f19e 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json @@ -44,25 +44,25 @@ "BriefDescription": "Software increment" }, { - "ArchStdEvent": "INST_RETIRED" + "ArchStdEvent": "INST_RETIRED", }, { "ArchStdEvent": "CID_WRITE_RETIRED", "BriefDescription": "Write to CONTEXTIDR" }, { - "ArchStdEvent": "INST_SPEC" + "ArchStdEvent": "INST_SPEC", }, { - "ArchStdEvent": "TTBR_WRITE_RETIRED" + "ArchStdEvent": "TTBR_WRITE_RETIRED", }, { "PublicDescription": "This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches", - "ArchStdEvent": "BR_RETIRED" + "ArchStdEvent": "BR_RETIRED", }, { "PublicDescription": "This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush", - "ArchStdEvent": "BR_MIS_PRED_RETIRED" + "ArchStdEvent": "BR_MIS_PRED_RETIRED", }, { "PublicDescription": "Operation speculatively executed, NOP", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json index 50157e8c20..2e7555696c 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json @@ -15,10 +15,10 @@ "ArchStdEvent": "UNALIGNED_LDST_SPEC" }, { - "ArchStdEvent": "MEM_ACCESS" + "ArchStdEvent": "MEM_ACCESS", }, { "PublicDescription": "This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", - "ArchStdEvent": "MEMORY_ERROR" + "ArchStdEvent": "MEMORY_ERROR", } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json index db68de1883..ec0dc92288 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json @@ -1,10 +1,10 @@ [ { "PublicDescription": "This event counts any predictable branch instruction which is mispredicted either due to dynamic misprediction or because the MMU is off and the branches are statically predicted not taken", - "ArchStdEvent": "BR_MIS_PRED" + "ArchStdEvent": "BR_MIS_PRED", }, { "PublicDescription": "This event counts all predictable branches.", - "ArchStdEvent": "BR_PRED" + "ArchStdEvent": "BR_PRED", } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json index e0875d3a68..6263929efc 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json @@ -1,21 +1,21 @@ [ { - "PublicDescription": "The number of core clock cycles", + "PublicDescription": "The number of core clock cycles" "ArchStdEvent": "CPU_CYCLES", "BriefDescription": "The number of core clock cycles." }, { "PublicDescription": "This event counts for every beat of data transferred over the data channels between the core and the SCU. If both read and write data beats are transferred on a given cycle, this event is counted twice on that cycle. This event counts the sum of BUS_ACCESS_RD and BUS_ACCESS_WR.", - "ArchStdEvent": "BUS_ACCESS" + "ArchStdEvent": "BUS_ACCESS", }, { - "PublicDescription": "This event duplicates CPU_CYCLES.", - "ArchStdEvent": "BUS_CYCLES" + "PublicDescription": "This event duplicates CPU_CYCLES." + "ArchStdEvent": "BUS_CYCLES", }, { - "ArchStdEvent": "BUS_ACCESS_RD" + "ArchStdEvent": "BUS_ACCESS_RD", }, { - "ArchStdEvent": "BUS_ACCESS_WR" + "ArchStdEvent": "BUS_ACCESS_WR", } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json index fc448c2d5e..cd67bb9df1 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json @@ -1,47 +1,47 @@ [ { "PublicDescription": "This event counts any instruction fetch which misses in the cache.", - "ArchStdEvent": "L1I_CACHE_REFILL" + "ArchStdEvent": "L1I_CACHE_REFILL", }, { "PublicDescription": "This event counts any refill of the instruction L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", - "ArchStdEvent": "L1I_TLB_REFILL" + "ArchStdEvent": "L1I_TLB_REFILL", }, { "PublicDescription": "This event counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.", - "ArchStdEvent": "L1D_CACHE_REFILL" + "ArchStdEvent": "L1D_CACHE_REFILL", }, { "PublicDescription": "This event counts any load or store operation or page table walk access which looks up in the L1 data cache. In particular, any access which could count the L1D_CACHE_REFILL event causes this event to count.", - "ArchStdEvent": "L1D_CACHE" + "ArchStdEvent": "L1D_CACHE", }, { "PublicDescription": "This event counts any refill of the data L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", - "ArchStdEvent": "L1D_TLB_REFILL" + "ArchStdEvent": "L1D_TLB_REFILL", }, - { + {, "PublicDescription": "Level 1 instruction cache access or Level 0 Macro-op cache access. This event counts any instruction fetch which accesses the L1 instruction cache or L0 Macro-op cache.", - "ArchStdEvent": "L1I_CACHE" + "ArchStdEvent": "L1I_CACHE", }, { "PublicDescription": "This event counts any write-back of data from the L1 data cache to L2 or L3. This counts both victim line evictions and snoops, including cache maintenance operations.", - "ArchStdEvent": "L1D_CACHE_WB" + "ArchStdEvent": "L1D_CACHE_WB", }, { "PublicDescription": "This event counts any transaction from L1 which looks up in the L2 cache, and any write-back from the L1 to the L2. Snoops from outside the core and cache maintenance operations are not counted.", - "ArchStdEvent": "L2D_CACHE" + "ArchStdEvent": "L2D_CACHE", }, { "PublicDescription": "L2 data cache refill. This event counts any cacheable transaction from L1 which causes data to be read from outside the core. L2 refills caused by stashes into L2 should not be counted", - "ArchStdEvent": "L2D_CACHE_REFILL" + "ArchStdEvent": "L2D_CACHE_REFILL", }, { "PublicDescription": "This event counts any write-back of data from the L2 cache to outside the core. This includes snoops to the L2 which return data, regardless of whether they cause an invalidation. Invalidations from the L2 which do not write data outside of the core and snoops which return data from the L1 are not counted", - "ArchStdEvent": "L2D_CACHE_WB" + "ArchStdEvent": "L2D_CACHE_WB", }, { "PublicDescription": "This event counts any full cache line write into the L2 cache which does not cause a linefill, including write-backs from L1 to L2 and full-line writes which do not allocate into L1.", - "ArchStdEvent": "L2D_CACHE_ALLOCATE" + "ArchStdEvent": "L2D_CACHE_ALLOCATE", }, { "PublicDescription": "This event counts any load or store operation which accesses the data L1 TLB. If both a load and a store are executed on a cycle, this event counts twice. This event counts regardless of whether the MMU is enabled.", @@ -75,21 +75,21 @@ }, { "PublicDescription": "This event counts on any access to the L2 TLB (caused by a refill of any of the L1 TLBs). This event does not count if the MMU is disabled.", - "ArchStdEvent": "L2D_TLB" + "ArchStdEvent": "L2D_TLB", }, { "PublicDescription": "This event counts on any data access which causes L2D_TLB_REFILL to count.", - "ArchStdEvent": "DTLB_WALK" + "ArchStdEvent": "DTLB_WALK", }, { "PublicDescription": "This event counts on any instruction access which causes L2D_TLB_REFILL to count.", - "ArchStdEvent": "ITLB_WALK" + "ArchStdEvent": "ITLB_WALK", }, { - "ArchStdEvent": "LL_CACHE_RD" + "ArchStdEvent": "LL_CACHE_RD", }, { - "ArchStdEvent": "LL_CACHE_MISS_RD" + "ArchStdEvent": "LL_CACHE_MISS_RD", }, { "ArchStdEvent": "L1D_CACHE_INVAL" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json index ce942324ee..ea4631db41 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json @@ -1,10 +1,10 @@ [ { - "ArchStdEvent": "EXC_TAKEN" + "ArchStdEvent": "EXC_TAKEN", }, { "PublicDescription": "This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", - "ArchStdEvent": "MEMORY_ERROR" + "ArchStdEvent": "MEMORY_ERROR", }, { "ArchStdEvent": "EXC_DABORT" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json index b0b439a36a..8e59566cba 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json @@ -1,32 +1,32 @@ [ { - "ArchStdEvent": "SW_INCR" + "ArchStdEvent": "SW_INCR", }, { "PublicDescription": "This event counts all retired instructions, including those that fail their condition check.", - "ArchStdEvent": "INST_RETIRED" + "ArchStdEvent": "INST_RETIRED", }, { - "ArchStdEvent": "EXC_RETURN" + "ArchStdEvent": "EXC_RETURN", }, { "PublicDescription": "This event only counts writes to CONTEXTIDR in AArch32 state, and via the CONTEXTIDR_EL1 mnemonic in AArch64 state.", - "ArchStdEvent": "CID_WRITE_RETIRED" + "ArchStdEvent": "CID_WRITE_RETIRED", }, { - "ArchStdEvent": "INST_SPEC" + "ArchStdEvent": "INST_SPEC", }, { "PublicDescription": "This event only counts writes to TTBR0/TTBR1 in AArch32 state and TTBR0_EL1/TTBR1_EL1 in AArch64 state.", - "ArchStdEvent": "TTBR_WRITE_RETIRED" + "ArchStdEvent": "TTBR_WRITE_RETIRED", }, - { + {, "PublicDescription": "This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches.", - "ArchStdEvent": "BR_RETIRED" + "ArchStdEvent": "BR_RETIRED", }, { "PublicDescription": "This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush.", - "ArchStdEvent": "BR_MIS_PRED_RETIRED" + "ArchStdEvent": "BR_MIS_PRED_RETIRED", }, { "ArchStdEvent": "ASE_SPEC" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json index 20a929e772..f06f399051 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json @@ -1,7 +1,7 @@ [ { "PublicDescription": "This event counts memory accesses due to load or store instructions. This event counts the sum of MEM_ACCESS_RD and MEM_ACCESS_WR.", - "ArchStdEvent": "MEM_ACCESS" + "ArchStdEvent": "MEM_ACCESS", }, { "ArchStdEvent": "MEM_ACCESS_RD" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json index 20d8365756..c2ccbf6fbf 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json @@ -1,5 +1,5 @@ [ { - "ArchStdEvent": "REMOTE_ACCESS" + "ArchStdEvent": "REMOTE_ACCESS", } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json index b4e96551d5..d79f0aeaf7 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json @@ -1,10 +1,10 @@ [ { "PublicDescription": "The counter counts on any cycle when there are no fetched instructions available to dispatch.", - "ArchStdEvent": "STALL_FRONTEND" + "ArchStdEvent": "STALL_FRONTEND", }, { "PublicDescription": "The counter counts on any cycle fetched instructions are not dispatched due to resource constraints.", - "ArchStdEvent": "STALL_BACKEND" + "ArchStdEvent": "STALL_BACKEND", } ] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json index 6970203cb2..dda8e59149 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json @@ -229,5 +229,5 @@ "BriefDescription": "Store bound L3 topdown metric", "MetricGroup": "TopDownL3", "MetricName": "store_bound" - } + }, ] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json index 2b3cb55df2..61514d3860 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json @@ -1,56 +1,56 @@ [ { - "ConfigCode": "0x00", - "EventName": "flux_wr", + "EventCode": "0x00", + "EventName": "uncore_hisi_ddrc.flux_wr", "BriefDescription": "DDRC total write operations", "PublicDescription": "DDRC total write operations", "Unit": "hisi_sccl,ddrc" }, { - "ConfigCode": "0x01", - "EventName": "flux_rd", + "EventCode": "0x01", + "EventName": "uncore_hisi_ddrc.flux_rd", "BriefDescription": "DDRC total read operations", "PublicDescription": "DDRC total read operations", "Unit": "hisi_sccl,ddrc" }, { - "ConfigCode": "0x02", - "EventName": "flux_wcmd", + "EventCode": "0x02", + "EventName": "uncore_hisi_ddrc.flux_wcmd", "BriefDescription": "DDRC write commands", "PublicDescription": "DDRC write commands", "Unit": "hisi_sccl,ddrc" }, { - "ConfigCode": "0x03", - "EventName": "flux_rcmd", + "EventCode": "0x03", + "EventName": "uncore_hisi_ddrc.flux_rcmd", "BriefDescription": "DDRC read commands", "PublicDescription": "DDRC read commands", "Unit": "hisi_sccl,ddrc" }, { - "ConfigCode": "0x04", - "EventName": "pre_cmd", + "EventCode": "0x04", + "EventName": "uncore_hisi_ddrc.pre_cmd", "BriefDescription": "DDRC precharge commands", "PublicDescription": "DDRC precharge commands", "Unit": "hisi_sccl,ddrc" }, { - "ConfigCode": "0x05", - "EventName": "act_cmd", + "EventCode": "0x05", + "EventName": "uncore_hisi_ddrc.act_cmd", "BriefDescription": "DDRC active commands", "PublicDescription": "DDRC active commands", "Unit": "hisi_sccl,ddrc" }, { - "ConfigCode": "0x06", - "EventName": "rnk_chg", + "EventCode": "0x06", + "EventName": "uncore_hisi_ddrc.rnk_chg", "BriefDescription": "DDRC rank commands", "PublicDescription": "DDRC rank commands", "Unit": "hisi_sccl,ddrc" }, { - "ConfigCode": "0x07", - "EventName": "rw_chg", + "EventCode": "0x07", + "EventName": "uncore_hisi_ddrc.rw_chg", "BriefDescription": "DDRC read and write changes", "PublicDescription": "DDRC read and write changes", "Unit": "hisi_sccl,ddrc" diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json index 9a7ec7af20..ada8678293 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json @@ -1,152 +1,72 @@ [ { - "ConfigCode": "0x00", - "EventName": "rx_ops_num", + "EventCode": "0x00", + "EventName": "uncore_hisi_hha.rx_ops_num", "BriefDescription": "The number of all operations received by the HHA", "PublicDescription": "The number of all operations received by the HHA", "Unit": "hisi_sccl,hha" }, { - "ConfigCode": "0x01", - "EventName": "rx_outer", + "EventCode": "0x01", + "EventName": "uncore_hisi_hha.rx_outer", "BriefDescription": "The number of all operations received by the HHA from another socket", "PublicDescription": "The number of all operations received by the HHA from another socket", "Unit": "hisi_sccl,hha" }, { - "ConfigCode": "0x02", - "EventName": "rx_sccl", + "EventCode": "0x02", + "EventName": "uncore_hisi_hha.rx_sccl", "BriefDescription": "The number of all operations received by the HHA from another SCCL in this socket", "PublicDescription": "The number of all operations received by the HHA from another SCCL in this socket", "Unit": "hisi_sccl,hha" }, { - "ConfigCode": "0x03", - "EventName": "rx_ccix", + "EventCode": "0x03", + "EventName": "uncore_hisi_hha.rx_ccix", "BriefDescription": "Count of the number of operations that HHA has received from CCIX", "PublicDescription": "Count of the number of operations that HHA has received from CCIX", "Unit": "hisi_sccl,hha" }, { - "ConfigCode": "0x4", - "EventName": "rx_wbi", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x5", - "EventName": "rx_wbip", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x11", - "EventName": "rx_wtistash", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x1c", - "EventName": "rd_ddr_64b", + "EventCode": "0x1c", + "EventName": "uncore_hisi_hha.rd_ddr_64b", "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 64 bytes", "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 64bytes", "Unit": "hisi_sccl,hha" }, { - "ConfigCode": "0x1d", - "EventName": "wr_ddr_64b", + "EventCode": "0x1d", + "EventName": "uncore_hisi_hha.wr_ddr_64b", "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", "Unit": "hisi_sccl,hha" }, { - "ConfigCode": "0x1e", - "EventName": "rd_ddr_128b", + "EventCode": "0x1e", + "EventName": "uncore_hisi_hha.rd_ddr_128b", "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", "Unit": "hisi_sccl,hha" }, { - "ConfigCode": "0x1f", - "EventName": "wr_ddr_128b", + "EventCode": "0x1f", + "EventName": "uncore_hisi_hha.wr_ddr_128b", "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", "Unit": "hisi_sccl,hha" }, { - "ConfigCode": "0x20", - "EventName": "spill_num", + "EventCode": "0x20", + "EventName": "uncore_hisi_hha.spill_num", "BriefDescription": "Count of the number of spill operations that the HHA has sent", "PublicDescription": "Count of the number of spill operations that the HHA has sent", "Unit": "hisi_sccl,hha" }, { - "ConfigCode": "0x21", - "EventName": "spill_success", + "EventCode": "0x21", + "EventName": "uncore_hisi_hha.spill_success", "BriefDescription": "Count of the number of successful spill operations that the HHA has sent", "PublicDescription": "Count of the number of successful spill operations that the HHA has sent", "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x23", - "EventName": "bi_num", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x32", - "EventName": "mediated_num", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x33", - "EventName": "tx_snp_num", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x34", - "EventName": "tx_snp_outer", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x35", - "EventName": "tx_snp_ccix", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x38", - "EventName": "rx_snprspdata", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x3c", - "EventName": "rx_snprsp_outer", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x40", - "EventName": "sdir-lookup", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x41", - "EventName": "edir-lookup", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x42", - "EventName": "sdir-hit", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x43", - "EventName": "edir-hit", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x4c", - "EventName": "sdir-home-migrate", - "Unit": "hisi_sccl,hha" - }, - { - "ConfigCode": "0x4d", - "EventName": "edir-home-migrate", - "Unit": "hisi_sccl,hha" } ] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json index e3479b65be..67ab19e8cf 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json @@ -1,91 +1,91 @@ [ { - "ConfigCode": "0x00", - "EventName": "rd_cpipe", + "EventCode": "0x00", + "EventName": "uncore_hisi_l3c.rd_cpipe", "BriefDescription": "Total read accesses", "PublicDescription": "Total read accesses", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x01", - "EventName": "wr_cpipe", + "EventCode": "0x01", + "EventName": "uncore_hisi_l3c.wr_cpipe", "BriefDescription": "Total write accesses", "PublicDescription": "Total write accesses", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x02", - "EventName": "rd_hit_cpipe", + "EventCode": "0x02", + "EventName": "uncore_hisi_l3c.rd_hit_cpipe", "BriefDescription": "Total read hits", "PublicDescription": "Total read hits", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x03", - "EventName": "wr_hit_cpipe", + "EventCode": "0x03", + "EventName": "uncore_hisi_l3c.wr_hit_cpipe", "BriefDescription": "Total write hits", "PublicDescription": "Total write hits", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x04", - "EventName": "victim_num", + "EventCode": "0x04", + "EventName": "uncore_hisi_l3c.victim_num", "BriefDescription": "l3c precharge commands", "PublicDescription": "l3c precharge commands", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x20", - "EventName": "rd_spipe", + "EventCode": "0x20", + "EventName": "uncore_hisi_l3c.rd_spipe", "BriefDescription": "Count of the number of read lines that come from this cluster of CPU core in spipe", "PublicDescription": "Count of the number of read lines that come from this cluster of CPU core in spipe", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x21", - "EventName": "wr_spipe", + "EventCode": "0x21", + "EventName": "uncore_hisi_l3c.wr_spipe", "BriefDescription": "Count of the number of write lines that come from this cluster of CPU core in spipe", "PublicDescription": "Count of the number of write lines that come from this cluster of CPU core in spipe", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x22", - "EventName": "rd_hit_spipe", + "EventCode": "0x22", + "EventName": "uncore_hisi_l3c.rd_hit_spipe", "BriefDescription": "Count of the number of read lines that hits in spipe of this L3C", "PublicDescription": "Count of the number of read lines that hits in spipe of this L3C", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x23", - "EventName": "wr_hit_spipe", + "EventCode": "0x23", + "EventName": "uncore_hisi_l3c.wr_hit_spipe", "BriefDescription": "Count of the number of write lines that hits in spipe of this L3C", "PublicDescription": "Count of the number of write lines that hits in spipe of this L3C", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x29", - "EventName": "back_invalid", + "EventCode": "0x29", + "EventName": "uncore_hisi_l3c.back_invalid", "BriefDescription": "Count of the number of L3C back invalid operations", "PublicDescription": "Count of the number of L3C back invalid operations", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x40", - "EventName": "retry_cpu", + "EventCode": "0x40", + "EventName": "uncore_hisi_l3c.retry_cpu", "BriefDescription": "Count of the number of retry that L3C suppresses the CPU operations", "PublicDescription": "Count of the number of retry that L3C suppresses the CPU operations", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x41", - "EventName": "retry_ring", + "EventCode": "0x41", + "EventName": "uncore_hisi_l3c.retry_ring", "BriefDescription": "Count of the number of retry that L3C suppresses the ring operations", "PublicDescription": "Count of the number of retry that L3C suppresses the ring operations", "Unit": "hisi_sccl,l3c" }, { - "ConfigCode": "0x42", - "EventName": "prefetch_drop", + "EventCode": "0x42", + "EventName": "uncore_hisi_l3c.prefetch_drop", "BriefDescription": "Count of the number of prefetch drops from this L3C", "PublicDescription": "Count of the number of prefetch drops from this L3C", "Unit": "hisi_sccl,l3c" diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index b899db48c1..c43591d831 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -18,8 +18,6 @@ 0x00000000410fd080,v1,arm/cortex-a57-a72,core 0x00000000410fd0b0,v1,arm/cortex-a76-n1,core 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core -0x00000000410fd400,v1,arm/neoverse-v1,core -0x00000000410fd490,v1,arm/neoverse-n2,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000460f0010,v1,fujitsu/a64fx,core diff --git a/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json b/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json index 3e7ac409d8..5347350c36 100644 --- a/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json +++ b/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json @@ -286,5 +286,5 @@ "EventCode": "0x21e", "EventName": "pop25_inst", "BriefDescription": "V3 POP25 instructions" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json index 783de7f1ae..2dd8dafff2 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json index 3f28007d38..db286f19e7 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json index 86bd8ba939..b6b7f29ca8 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json @@ -124,5 +124,5 @@ "EventName": "L2C_STORES_SENT", "BriefDescription": "L2C Stores Sent", "PublicDescription": "Incremented by one for every store sent to Level-2 (L1.5) cache" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json index 783de7f1ae..2dd8dafff2 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json index 3f28007d38..db286f19e7 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json index 1a5e4f89c5..5da8296b66 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json @@ -390,5 +390,5 @@ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", "BriefDescription": "Cycle count with two threads active", "PublicDescription": "Cycle count with two threads active" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json index fc762e9f1d..17fb524192 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json @@ -54,5 +54,5 @@ "EventName": "PROBLEM_STATE_INSTRUCTIONS", "BriefDescription": "Problem-State Instructions", "PublicDescription": "Problem-State Instruction Count" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json index 3f28007d38..db286f19e7 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json index 4942b20a1e..89e070727e 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json @@ -369,5 +369,5 @@ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", "BriefDescription": "Cycle count with two threads active", "PublicDescription": "Cycle count with two threads active" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/basic.json b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json index fc762e9f1d..17fb524192 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json @@ -54,5 +54,5 @@ "EventName": "PROBLEM_STATE_INSTRUCTIONS", "BriefDescription": "Problem-State Instructions", "PublicDescription": "Problem-State Instruction Count" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json index 3f28007d38..db286f19e7 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json index ad79189050..c998e4f1d1 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json @@ -26,5 +26,5 @@ "EventName": "ECC_BLOCKED_CYCLES_COUNT", "BriefDescription": "ECC Blocked Cycles Count", "PublicDescription": "This counter counts the total number of CPU cycles blocked for the elliptic-curve cryptography (ECC) functions issued by the CPU because the ECC coprocessor is busy performing a function issued by another CPU." - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json index 8ac61f8f28..24c4ba2a9a 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json @@ -397,5 +397,5 @@ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", "BriefDescription": "Cycle count with two threads active", "PublicDescription": "Cycle count with two threads active" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json index 783de7f1ae..2dd8dafff2 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json index 3f28007d38..db286f19e7 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json index 86b29fd181..b7b42a870b 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json @@ -166,5 +166,5 @@ "EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1I Off-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json index 783de7f1ae..2dd8dafff2 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json index 3f28007d38..db286f19e7 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - } + }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json index f40cbed894..1622510372 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json @@ -243,5 +243,5 @@ "EventName": "TX_C_TABORT_SPECIAL", "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete" - } + }, ] diff --git a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json index 41bac1c6a0..788766f45d 100644 --- a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json +++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json @@ -18,22 +18,6 @@ "Invert": "0", "EdgeDetect": "0" }, - { - "Unit": "CBO", - "EventCode": "0xE0", - "UMask": "0x00", - "EventName": "event-hyphen", - "BriefDescription": "UNC_CBO_HYPHEN", - "PublicDescription": "UNC_CBO_HYPHEN" - }, - { - "Unit": "CBO", - "EventCode": "0xC0", - "UMask": "0x00", - "EventName": "event-two-hyph", - "BriefDescription": "UNC_CBO_TWO_HYPH", - "PublicDescription": "UNC_CBO_TWO_HYPH" - }, { "EventCode": "0x7", "EventName": "uncore_hisi_l3c.rd_hit_cpipe", @@ -54,5 +38,5 @@ "BriefDescription": "Total cache hits", "PublicDescription": "Total cache hits", "Unit": "imc" - } + }, ] diff --git a/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json index c7e7528db3..0f681a6e10 100644 --- a/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json +++ b/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json @@ -6,11 +6,4 @@ "Unit": "sys_ddr_pmu", "Compat": "v8" }, - { - "BriefDescription": "ccn read-cycles event", - "ConfigCode": "0x2c", - "EventName": "sys_ccn_pmu.read_cycles", - "Unit": "sys_ccn_pmu", - "Compat": "0x01" - } ] diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json index 14b9a8ab15..57ddbb9f9b 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json @@ -311,5 +311,5 @@ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", "MetricName": "C6_Pkg_Residency" - } + }, ] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 1a57c3f81d..7c887d37b8 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -45,7 +45,6 @@ #include /* getrlimit */ #include #include -#include #include #include "jsmn.h" #include "json.h" @@ -71,7 +70,7 @@ struct json_event { char *metric_constraint; }; -static enum aggr_mode_class convert(const char *aggr_mode) +enum aggr_mode_class convert(const char *aggr_mode) { if (!strcmp(aggr_mode, "PerCore")) return PerCore; @@ -82,6 +81,8 @@ static enum aggr_mode_class convert(const char *aggr_mode) return -1; } +typedef int (*func)(void *data, struct json_event *je); + static LIST_HEAD(sys_event_tables); struct sys_event_table { @@ -360,7 +361,7 @@ static int close_table; static void print_events_table_prefix(FILE *fp, const char *tblname) { - fprintf(fp, "static const struct pmu_event %s[] = {\n", tblname); + fprintf(fp, "struct pmu_event %s[] = {\n", tblname); close_table = 1; } @@ -368,7 +369,7 @@ static int print_events_table_entry(void *data, struct json_event *je) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; - char *topic_local = pd->topic; + char *topic = pd->topic; /* * TODO: Remove formatting chars after debugging to reduce @@ -383,7 +384,7 @@ static int print_events_table_entry(void *data, struct json_event *je) fprintf(outfp, "\t.desc = \"%s\",\n", je->desc); if (je->compat) fprintf(outfp, "\t.compat = \"%s\",\n", je->compat); - fprintf(outfp, "\t.topic = \"%s\",\n", topic_local); + fprintf(outfp, "\t.topic = \"%s\",\n", topic); if (je->long_desc && je->long_desc[0]) fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc); if (je->pmu) @@ -469,7 +470,7 @@ static void free_arch_std_events(void) } } -static int save_arch_std_events(void *data __maybe_unused, struct json_event *je) +static int save_arch_std_events(void *data, struct json_event *je) { struct event_struct *es; @@ -574,12 +575,10 @@ static int json_events(const char *fn, struct json_event je = {}; char *arch_std = NULL; unsigned long long eventcode = 0; - unsigned long long configcode = 0; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; jsmntok_t *precise = NULL; jsmntok_t *obj = tok++; - bool configcode_present = false; EXPECT(obj->type == JSMN_OBJECT, obj, "expected object"); for (j = 0; j < obj->size; j += 2) { @@ -602,12 +601,6 @@ static int json_events(const char *fn, addfield(map, &code, "", "", val); eventcode |= strtoul(code, NULL, 0); free(code); - } else if (json_streq(map, field, "ConfigCode")) { - char *code = NULL; - addfield(map, &code, "", "", val); - configcode |= strtoul(code, NULL, 0); - free(code); - configcode_present = true; } else if (json_streq(map, field, "ExtSel")) { char *code = NULL; addfield(map, &code, "", "", val); @@ -672,6 +665,8 @@ static int json_events(const char *fn, addfield(map, &je.metric_constraint, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { addfield(map, &je.metric_expr, "", "", val); + for (s = je.metric_expr; *s; s++) + *s = tolower(*s); } else if (json_streq(map, field, "ArchStdEvent")) { addfield(map, &arch_std, "", "", val); for (s = arch_std; *s; s++) @@ -687,10 +682,7 @@ static int json_events(const char *fn, addfield(map, &extra_desc, " ", "(Precise event)", NULL); } - if (configcode_present) - snprintf(buf, sizeof buf, "config=%#llx", configcode); - else - snprintf(buf, sizeof buf, "event=%#llx", eventcode); + snprintf(buf, sizeof buf, "event=%#llx", eventcode); addfield(map, &event, ",", buf, NULL); if (je.desc && extra_desc) addfield(map, &je.desc, " ", extra_desc, NULL); @@ -794,7 +786,7 @@ static bool is_sys_dir(char *fname) static void print_mapping_table_prefix(FILE *outfp) { - fprintf(outfp, "const struct pmu_events_map pmu_events_map[] = {\n"); + fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n"); } static void print_mapping_table_suffix(FILE *outfp) @@ -828,7 +820,7 @@ static void print_mapping_test_table(FILE *outfp) static void print_system_event_mapping_table_prefix(FILE *outfp) { - fprintf(outfp, "\nconst struct pmu_sys_events pmu_sys_event_tables[] = {"); + fprintf(outfp, "\nstruct pmu_sys_events pmu_sys_event_tables[] = {"); } static void print_system_event_mapping_table_suffix(FILE *outfp) @@ -1204,7 +1196,7 @@ int main(int argc, char *argv[]) const char *arch; const char *output_file; const char *start_dirname; - const char *err_string_ext = ""; + char *err_string_ext = ""; struct stat stbuf; prog = basename(argv[0]); diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c index 831dc44c45..11d1fa18bf 100644 --- a/tools/perf/pmu-events/jsmn.c +++ b/tools/perf/pmu-events/jsmn.c @@ -24,7 +24,6 @@ #include #include "jsmn.h" -#define JSMN_STRICT /* * Allocates a fresh unused token from the token pool. @@ -177,14 +176,6 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, jsmnerr_t r; int i; jsmntok_t *token; -#ifdef JSMN_STRICT - /* - * Keeps track of whether a new object/list/primitive is expected. New items are only - * allowed after an opening brace, comma or colon. A closing brace after a comma is not - * valid JSON. - */ - int expecting_item = 1; -#endif for (; parser->pos < len; parser->pos++) { char c; @@ -194,10 +185,6 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, switch (c) { case '{': case '[': -#ifdef JSMN_STRICT - if (!expecting_item) - return JSMN_ERROR_INVAL; -#endif token = jsmn_alloc_token(parser, tokens, num_tokens); if (token == NULL) return JSMN_ERROR_NOMEM; @@ -209,10 +196,6 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, break; case '}': case ']': -#ifdef JSMN_STRICT - if (expecting_item) - return JSMN_ERROR_INVAL; -#endif type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); for (i = parser->toknext - 1; i >= 0; i--) { token = &tokens[i]; @@ -236,11 +219,6 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, } break; case '\"': -#ifdef JSMN_STRICT - if (!expecting_item) - return JSMN_ERROR_INVAL; - expecting_item = 0; -#endif r = jsmn_parse_string(parser, js, len, tokens, num_tokens); if (r < 0) @@ -251,15 +229,11 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, case '\t': case '\r': case '\n': + case ':': + case ',': case ' ': break; #ifdef JSMN_STRICT - case ':': - case ',': - if (expecting_item) - return JSMN_ERROR_INVAL; - expecting_item = 1; - break; /* * In strict mode primitives are: * numbers and booleans. @@ -279,9 +253,6 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, case 'f': case 'n': #else - case ':': - case ',': - break; /* * In non-strict mode every unquoted value * is a primitive. @@ -289,12 +260,6 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, /*FALL THROUGH */ default: #endif - -#ifdef JSMN_STRICT - if (!expecting_item) - return JSMN_ERROR_INVAL; - expecting_item = 0; -#endif r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens); if (r < 0) @@ -317,11 +282,7 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, return JSMN_ERROR_PART; } -#ifdef JSMN_STRICT - return expecting_item ? JSMN_ERROR_INVAL : JSMN_SUCCESS; -#else return JSMN_SUCCESS; -#endif } /* diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 6efe739764..5c2bf7275c 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -41,19 +41,19 @@ struct pmu_events_map { const char *cpuid; const char *version; const char *type; /* core, uncore etc */ - const struct pmu_event *table; + struct pmu_event *table; }; struct pmu_sys_events { const char *name; - const struct pmu_event *table; + struct pmu_event *table; }; /* * Global table mapping each known CPU for the architecture to its * table of PMU events. */ -extern const struct pmu_events_map pmu_events_map[]; -extern const struct pmu_sys_events pmu_sys_event_tables[]; +extern struct pmu_events_map pmu_events_map[]; +extern struct pmu_sys_events pmu_sys_event_tables[]; #endif diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index af2b37ef7c..803ca426f8 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -65,7 +65,6 @@ perf-y += pe-file-parsing.o perf-y += expand-cgroup.o perf-y += perf-time-to-tsc.o perf-y += dlfilter-test.o -perf-y += sigtrap.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/api-io.c b/tools/perf/tests/api-io.c index e91cf2c127..2ada86ad60 100644 --- a/tools/perf/tests/api-io.c +++ b/tools/perf/tests/api-io.c @@ -289,8 +289,8 @@ static int test_get_dec(void) return ret; } -static int test__api_io(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__api_io(struct test *test __maybe_unused, + int subtest __maybe_unused) { int ret = 0; @@ -302,5 +302,3 @@ static int test__api_io(struct test_suite *test __maybe_unused, ret = TEST_FAIL; return ret; } - -DEFINE_SUITE("Test api io", api_io); diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 56fba08a30..9b40a25376 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -65,7 +65,7 @@ do { \ #define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) -static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, +static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, int fd, int group_fd, unsigned long flags) { FILE *file; @@ -93,7 +93,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu /* syscall arguments */ __WRITE_ASS(fd, "d", fd); __WRITE_ASS(group_fd, "d", group_fd); - __WRITE_ASS(cpu, "d", cpu.cpu); + __WRITE_ASS(cpu, "d", cpu); __WRITE_ASS(pid, "d", pid); __WRITE_ASS(flags, "lu", flags); @@ -144,7 +144,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu return 0; } -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, int fd, int group_fd, unsigned long flags) { int errno_saved = errno; @@ -178,7 +178,7 @@ static int run_dir(const char *d, const char *perf) return system(cmd) ? TEST_FAIL : TEST_OK; } -static int test__attr(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused) { struct stat st; char path_perf[PATH_MAX]; @@ -207,5 +207,3 @@ static int test__attr(struct test_suite *test __maybe_unused, int subtest __mayb return TEST_SKIP; } - -DEFINE_SUITE("Setup struct perf_event_attr", attr); diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README index 1116fc6bf2..a36f49fb4d 100644 --- a/tools/perf/tests/attr/README +++ b/tools/perf/tests/attr/README @@ -45,10 +45,8 @@ Following tests are defined (with perf commands): perf record -d kill (test-record-data) perf record -F 100 kill (test-record-freq) perf record -g kill (test-record-graph-default) - perf record -g kill (test-record-graph-default-aarch64) perf record --call-graph dwarf kill (test-record-graph-dwarf) perf record --call-graph fp kill (test-record-graph-fp) - perf record --call-graph fp kill (test-record-graph-fp-aarch64) perf record --group -e cycles,instructions kill (test-record-group) perf record -e '{cycles,instructions}' kill (test-record-group1) perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2) diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/attr/test-record-graph-default index f0a18b4ea4..5d8234d508 100644 --- a/tools/perf/tests/attr/test-record-graph-default +++ b/tools/perf/tests/attr/test-record-graph-default @@ -2,8 +2,6 @@ command = record args = --no-bpf-event -g kill >/dev/null 2>&1 ret = 1 -# arm64 enables registers in the default mode (fp) -arch = !aarch64 [event:base-record] sample_type=295 diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/attr/test-record-graph-fp index a6e60e8392..5630521c0b 100644 --- a/tools/perf/tests/attr/test-record-graph-fp +++ b/tools/perf/tests/attr/test-record-graph-fp @@ -2,8 +2,6 @@ command = record args = --no-bpf-event --call-graph fp kill >/dev/null 2>&1 ret = 1 -# arm64 enables registers in fp mode -arch = !aarch64 [event:base-record] sample_type=295 diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 79a980b1e7..b4b9a9488d 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -82,7 +82,7 @@ static int do_test(struct evlist *evlist, int mmap_pages, } -static int test__backward_ring_buffer(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0; char pid[16], sbuf[STRERR_BUFSIZE]; @@ -115,13 +115,12 @@ static int test__backward_ring_buffer(struct test_suite *test __maybe_unused, in goto out_delete_evlist; } - parse_events_error__init(&parse_error); + bzero(&parse_error, sizeof(parse_error)); /* * Set backward bit, ring buffer should be writing from end. Record * it in aux evlist */ err = parse_events(evlist, "syscalls:sys_enter_prctl/overwrite/", &parse_error); - parse_events_error__exit(&parse_error); if (err) { pr_debug("Failed to parse tracepoint event, try use root\n"); ret = TEST_SKIP; @@ -167,5 +166,3 @@ static int test__backward_ring_buffer(struct test_suite *test __maybe_unused, in evlist__delete(evlist); return ret; } - -DEFINE_SUITE("Read backward ring buffer", backward_ring_buffer); diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index 4965dd6669..12b805efdc 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -17,8 +17,8 @@ static unsigned long *get_bitmap(const char *str, int nbits) bm = bitmap_zalloc(nbits); if (map && bm) { - for (i = 0; i < perf_cpu_map__nr(map); i++) - set_bit(perf_cpu_map__cpu(map, i).cpu, bm); + for (i = 0; i < map->nr; i++) + set_bit(map->map[i], bm); } if (map) @@ -40,7 +40,7 @@ static int test_bitmap(const char *str) return ret; } -static int test__bitmap_print(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__bitmap_print(struct test *test __maybe_unused, int subtest __maybe_unused) { TEST_ASSERT_VAL("failed to convert map", test_bitmap("1")); TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,5")); @@ -51,5 +51,3 @@ static int test__bitmap_print(struct test_suite *test __maybe_unused, int subtes TEST_ASSERT_VAL("failed to convert map", test_bitmap("1-10,12-20,22-30,32-40")); return 0; } - -DEFINE_SUITE("Print bitmap", bitmap_print); diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index d1ebb5561e..489b50604c 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -19,19 +19,6 @@ #include "../perf-sys.h" #include "cloexec.h" -/* - * PowerPC and S390 do not support creation of instruction breakpoints using the - * perf_event interface. - * - * Just disable the test for these architectures until these issues are - * resolved. - */ -#if defined(__powerpc__) || defined(__s390x__) -#define BP_ACCOUNT_IS_SUPPORTED 0 -#else -#define BP_ACCOUNT_IS_SUPPORTED 1 -#endif - static volatile long the_var; static noinline int test_function(void) @@ -186,18 +173,13 @@ static int detect_share(int wp_cnt, int bp_cnt) * we create another watchpoint to ensure * the slot accounting is correct */ -static int test__bp_accounting(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__bp_accounting(struct test *test __maybe_unused, int subtest __maybe_unused) { int has_ioctl = detect_ioctl(); int wp_cnt = detect_cnt(false); int bp_cnt = detect_cnt(true); int share = detect_share(wp_cnt, bp_cnt); - if (!BP_ACCOUNT_IS_SUPPORTED) { - pr_debug("Test not supported on this architecture"); - return TEST_SKIP; - } - pr_debug("watchpoints count %d, breakpoints count %d, has_ioctl %d, share %d\n", wp_cnt, bp_cnt, has_ioctl, share); @@ -207,4 +189,18 @@ static int test__bp_accounting(struct test_suite *test __maybe_unused, int subte return bp_accounting(wp_cnt, share); } -DEFINE_SUITE("Breakpoint accounting", bp_accounting); +bool test__bp_account_is_supported(void) +{ + /* + * PowerPC and S390 do not support creation of instruction + * breakpoints using the perf_event interface. + * + * Just disable the test for these architectures until these + * issues are resolved. + */ +#if defined(__powerpc__) || defined(__s390x__) + return false; +#else + return true; +#endif +} diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index 1f2908f023..ef37353636 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -161,16 +161,11 @@ static long long bp_count(int fd) return count; } -static int test__bp_signal(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused) { struct sigaction sa; long long count1, count2, count3; - if (!BP_SIGNAL_IS_SUPPORTED) { - pr_debug("Test not supported on this architecture"); - return TEST_SKIP; - } - /* setup SIGIO signal handler */ memset(&sa, 0, sizeof(struct sigaction)); sa.sa_sigaction = (void *) sig_handler; @@ -290,4 +285,29 @@ static int test__bp_signal(struct test_suite *test __maybe_unused, int subtest _ TEST_OK : TEST_FAIL; } -DEFINE_SUITE("Breakpoint overflow signal handler", bp_signal); +bool test__bp_signal_is_supported(void) +{ + /* + * PowerPC and S390 do not support creation of instruction + * breakpoints using the perf_event interface. + * + * ARM requires explicit rounding down of the instruction + * pointer in Thumb mode, and then requires the single-step + * to be handled explicitly in the overflow handler to avoid + * stepping into the SIGIO handler and getting stuck on the + * breakpointed instruction. + * + * Since arm64 has the same issue with arm for the single-step + * handling, this case also gets stuck on the breakpointed + * instruction. + * + * Just disable the test for these architectures until these + * issues are resolved. + */ +#if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) || \ + defined(__aarch64__) + return false; +#else + return true; +#endif +} diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c index 4e897c2cf2..eb4dbbddf4 100644 --- a/tools/perf/tests/bp_signal_overflow.c +++ b/tools/perf/tests/bp_signal_overflow.c @@ -59,18 +59,13 @@ static long long bp_count(int fd) #define EXECUTIONS 10000 #define THRESHOLD 100 -static int test__bp_signal_overflow(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__bp_signal_overflow(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_event_attr pe; struct sigaction sa; long long count; int fd, i, fails = 0; - if (!BP_SIGNAL_IS_SUPPORTED) { - pr_debug("Test not supported on this architecture"); - return TEST_SKIP; - } - /* setup SIGIO signal handler */ memset(&sa, 0, sizeof(struct sigaction)); sa.sa_sigaction = (void *) sig_handler; @@ -138,5 +133,3 @@ static int test__bp_signal_overflow(struct test_suite *test __maybe_unused, int return fails ? TEST_FAIL : TEST_OK; } - -DEFINE_SUITE("Breakpoint overflow sampling", bp_signal_overflow); diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 5734905301..fa03ff0dc0 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -62,6 +62,7 @@ static int llseek_loop(void) static struct { enum test_llvm__testcase prog_id; + const char *desc; const char *name; const char *msg_compile_fail; const char *msg_load_fail; @@ -71,6 +72,7 @@ static struct { } bpf_testcase_table[] = { { .prog_id = LLVM_TESTCASE_BASE, + .desc = "Basic BPF filtering", .name = "[basic_bpf_test]", .msg_compile_fail = "fix 'perf test LLVM' first", .msg_load_fail = "load bpf object failed", @@ -79,6 +81,7 @@ static struct { }, { .prog_id = LLVM_TESTCASE_BASE, + .desc = "BPF pinning", .name = "[bpf_pinning]", .msg_compile_fail = "fix kbuild first", .msg_load_fail = "check your vmlinux setting?", @@ -89,6 +92,7 @@ static struct { #ifdef HAVE_BPF_PROLOGUE { .prog_id = LLVM_TESTCASE_BPF_PROLOGUE, + .desc = "BPF prologue generation", .name = "[bpf_prologue_test]", .msg_compile_fail = "fix kbuild first", .msg_load_fail = "check your vmlinux setting?", @@ -119,13 +123,12 @@ static int do_test(struct bpf_object *obj, int (*func)(void), struct parse_events_state parse_state; struct parse_events_error parse_error; - parse_events_error__init(&parse_error); + bzero(&parse_error, sizeof(parse_error)); bzero(&parse_state, sizeof(parse_state)); parse_state.error = &parse_error; INIT_LIST_HEAD(&parse_state.list); err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL); - parse_events_error__exit(&parse_error); if (err || list_empty(&parse_state.list)) { pr_debug("Failed to add events selected by BPF\n"); return TEST_FAIL; @@ -279,6 +282,18 @@ static int __test__bpf(int idx) return ret; } +int test__bpf_subtest_get_nr(void) +{ + return (int)ARRAY_SIZE(bpf_testcase_table); +} + +const char *test__bpf_subtest_get_desc(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table)) + return NULL; + return bpf_testcase_table[i].desc; +} + static int check_env(void) { int err; @@ -296,13 +311,9 @@ static int check_env(void) return err; } -/* temporarily disable libbpf deprecation warnings */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, - ARRAY_SIZE(insns), + sizeof(insns) / sizeof(insns[0]), license, kver_int, NULL, 0); -#pragma GCC diagnostic pop if (err < 0) { pr_err("Missing basic BPF support, skip this test: %s\n", strerror(errno)); @@ -313,7 +324,7 @@ static int check_env(void) return 0; } -static int test__bpf(int i) +int test__bpf(struct test *test __maybe_unused, int i) { int err; @@ -331,60 +342,21 @@ static int test__bpf(int i) err = __test__bpf(i); return err; } -#endif -static int test__basic_bpf_test(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ -#ifdef HAVE_LIBBPF_SUPPORT - return test__bpf(0); #else - pr_debug("Skip BPF test because BPF support is not compiled\n"); - return TEST_SKIP; -#endif +int test__bpf_subtest_get_nr(void) +{ + return 0; } -static int test__bpf_pinning(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +const char *test__bpf_subtest_get_desc(int i __maybe_unused) { -#ifdef HAVE_LIBBPF_SUPPORT - return test__bpf(1); -#else - pr_debug("Skip BPF test because BPF support is not compiled\n"); - return TEST_SKIP; -#endif + return NULL; } -static int test__bpf_prologue_test(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__bpf(struct test *test __maybe_unused, int i __maybe_unused) { -#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_BPF_PROLOGUE) - return test__bpf(2); -#else pr_debug("Skip BPF test because BPF support is not compiled\n"); return TEST_SKIP; -#endif } - - -static struct test_case bpf_tests[] = { -#ifdef HAVE_LIBBPF_SUPPORT - TEST_CASE("Basic BPF filtering", basic_bpf_test), - TEST_CASE("BPF pinning", bpf_pinning), -#ifdef HAVE_BPF_PROLOGUE - TEST_CASE("BPF prologue generation", bpf_prologue_test), -#else - TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"), #endif -#else - TEST_CASE_REASON("Basic BPF filtering", basic_bpf_test, "not compiled in"), - TEST_CASE_REASON("BPF pinning", bpf_pinning, "not compiled in"), - TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"), -#endif - { .name = NULL, } -}; - -struct test_suite suite__bpf = { - .desc = "BPF filter", - .test_cases = bpf_tests, -}; diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index fac3717d9b..da7dc5e45d 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -30,135 +30,350 @@ static bool dont_fork; -struct test_suite *__weak arch_tests[] = { - NULL, +struct test __weak arch_tests[] = { + { + .func = NULL, + }, }; -static struct test_suite *generic_tests[] = { - &suite__vmlinux_matches_kallsyms, - &suite__openat_syscall_event, - &suite__openat_syscall_event_on_all_cpus, - &suite__basic_mmap, - &suite__mem, - &suite__parse_events, - &suite__expr, - &suite__PERF_RECORD, - &suite__pmu, - &suite__pmu_events, - &suite__dso_data, - &suite__dso_data_cache, - &suite__dso_data_reopen, - &suite__perf_evsel__roundtrip_name_test, - &suite__perf_evsel__tp_sched_test, - &suite__syscall_openat_tp_fields, - &suite__attr, - &suite__hists_link, - &suite__python_use, - &suite__bp_signal, - &suite__bp_signal_overflow, - &suite__bp_accounting, - &suite__wp, - &suite__task_exit, - &suite__sw_clock_freq, - &suite__code_reading, - &suite__sample_parsing, - &suite__keep_tracking, - &suite__parse_no_sample_id_all, - &suite__hists_filter, - &suite__mmap_thread_lookup, - &suite__thread_maps_share, - &suite__hists_output, - &suite__hists_cumulate, - &suite__switch_tracking, - &suite__fdarray__filter, - &suite__fdarray__add, - &suite__kmod_path__parse, - &suite__thread_map, - &suite__llvm, - &suite__session_topology, - &suite__bpf, - &suite__thread_map_synthesize, - &suite__thread_map_remove, - &suite__cpu_map_synthesize, - &suite__synthesize_stat_config, - &suite__synthesize_stat, - &suite__synthesize_stat_round, - &suite__event_update, - &suite__event_times, - &suite__backward_ring_buffer, - &suite__cpu_map_print, - &suite__cpu_map_merge, - &suite__sdt_event, - &suite__is_printable_array, - &suite__bitmap_print, - &suite__perf_hooks, - &suite__clang, - &suite__unit_number__scnprint, - &suite__mem2node, - &suite__time_utils, - &suite__jit_write_elf, - &suite__pfm, - &suite__api_io, - &suite__maps__merge_in, - &suite__demangle_java, - &suite__demangle_ocaml, - &suite__parse_metric, - &suite__pe_file_parsing, - &suite__expand_cgroup_events, - &suite__perf_time_to_tsc, - &suite__dlfilter, - &suite__sigtrap, - NULL, +static struct test generic_tests[] = { + { + .desc = "vmlinux symtab matches kallsyms", + .func = test__vmlinux_matches_kallsyms, + }, + { + .desc = "Detect openat syscall event", + .func = test__openat_syscall_event, + }, + { + .desc = "Detect openat syscall event on all cpus", + .func = test__openat_syscall_event_on_all_cpus, + }, + { + .desc = "Read samples using the mmap interface", + .func = test__basic_mmap, + }, + { + .desc = "Test data source output", + .func = test__mem, + }, + { + .desc = "Parse event definition strings", + .func = test__parse_events, + }, + { + .desc = "Simple expression parser", + .func = test__expr, + }, + { + .desc = "PERF_RECORD_* events & perf_sample fields", + .func = test__PERF_RECORD, + }, + { + .desc = "Parse perf pmu format", + .func = test__pmu, + }, + { + .desc = "PMU events", + .func = test__pmu_events, + .subtest = { + .skip_if_fail = false, + .get_nr = test__pmu_events_subtest_get_nr, + .get_desc = test__pmu_events_subtest_get_desc, + .skip_reason = test__pmu_events_subtest_skip_reason, + }, + + }, + { + .desc = "DSO data read", + .func = test__dso_data, + }, + { + .desc = "DSO data cache", + .func = test__dso_data_cache, + }, + { + .desc = "DSO data reopen", + .func = test__dso_data_reopen, + }, + { + .desc = "Roundtrip evsel->name", + .func = test__perf_evsel__roundtrip_name_test, + }, + { + .desc = "Parse sched tracepoints fields", + .func = test__perf_evsel__tp_sched_test, + }, + { + .desc = "syscalls:sys_enter_openat event fields", + .func = test__syscall_openat_tp_fields, + }, + { + .desc = "Setup struct perf_event_attr", + .func = test__attr, + }, + { + .desc = "Match and link multiple hists", + .func = test__hists_link, + }, + { + .desc = "'import perf' in python", + .func = test__python_use, + }, + { + .desc = "Breakpoint overflow signal handler", + .func = test__bp_signal, + .is_supported = test__bp_signal_is_supported, + }, + { + .desc = "Breakpoint overflow sampling", + .func = test__bp_signal_overflow, + .is_supported = test__bp_signal_is_supported, + }, + { + .desc = "Breakpoint accounting", + .func = test__bp_accounting, + .is_supported = test__bp_account_is_supported, + }, + { + .desc = "Watchpoint", + .func = test__wp, + .is_supported = test__wp_is_supported, + .subtest = { + .skip_if_fail = false, + .get_nr = test__wp_subtest_get_nr, + .get_desc = test__wp_subtest_get_desc, + .skip_reason = test__wp_subtest_skip_reason, + }, + }, + { + .desc = "Number of exit events of a simple workload", + .func = test__task_exit, + }, + { + .desc = "Software clock events period values", + .func = test__sw_clock_freq, + }, + { + .desc = "Object code reading", + .func = test__code_reading, + }, + { + .desc = "Sample parsing", + .func = test__sample_parsing, + }, + { + .desc = "Use a dummy software event to keep tracking", + .func = test__keep_tracking, + }, + { + .desc = "Parse with no sample_id_all bit set", + .func = test__parse_no_sample_id_all, + }, + { + .desc = "Filter hist entries", + .func = test__hists_filter, + }, + { + .desc = "Lookup mmap thread", + .func = test__mmap_thread_lookup, + }, + { + .desc = "Share thread maps", + .func = test__thread_maps_share, + }, + { + .desc = "Sort output of hist entries", + .func = test__hists_output, + }, + { + .desc = "Cumulate child hist entries", + .func = test__hists_cumulate, + }, + { + .desc = "Track with sched_switch", + .func = test__switch_tracking, + }, + { + .desc = "Filter fds with revents mask in a fdarray", + .func = test__fdarray__filter, + }, + { + .desc = "Add fd to a fdarray, making it autogrow", + .func = test__fdarray__add, + }, + { + .desc = "kmod_path__parse", + .func = test__kmod_path__parse, + }, + { + .desc = "Thread map", + .func = test__thread_map, + }, + { + .desc = "LLVM search and compile", + .func = test__llvm, + .subtest = { + .skip_if_fail = true, + .get_nr = test__llvm_subtest_get_nr, + .get_desc = test__llvm_subtest_get_desc, + }, + }, + { + .desc = "Session topology", + .func = test__session_topology, + }, + { + .desc = "BPF filter", + .func = test__bpf, + .subtest = { + .skip_if_fail = true, + .get_nr = test__bpf_subtest_get_nr, + .get_desc = test__bpf_subtest_get_desc, + }, + }, + { + .desc = "Synthesize thread map", + .func = test__thread_map_synthesize, + }, + { + .desc = "Remove thread map", + .func = test__thread_map_remove, + }, + { + .desc = "Synthesize cpu map", + .func = test__cpu_map_synthesize, + }, + { + .desc = "Synthesize stat config", + .func = test__synthesize_stat_config, + }, + { + .desc = "Synthesize stat", + .func = test__synthesize_stat, + }, + { + .desc = "Synthesize stat round", + .func = test__synthesize_stat_round, + }, + { + .desc = "Synthesize attr update", + .func = test__event_update, + }, + { + .desc = "Event times", + .func = test__event_times, + }, + { + .desc = "Read backward ring buffer", + .func = test__backward_ring_buffer, + }, + { + .desc = "Print cpu map", + .func = test__cpu_map_print, + }, + { + .desc = "Merge cpu map", + .func = test__cpu_map_merge, + }, + + { + .desc = "Probe SDT events", + .func = test__sdt_event, + }, + { + .desc = "is_printable_array", + .func = test__is_printable_array, + }, + { + .desc = "Print bitmap", + .func = test__bitmap_print, + }, + { + .desc = "perf hooks", + .func = test__perf_hooks, + }, + { + .desc = "builtin clang support", + .func = test__clang, + .subtest = { + .skip_if_fail = true, + .get_nr = test__clang_subtest_get_nr, + .get_desc = test__clang_subtest_get_desc, + } + }, + { + .desc = "unit_number__scnprintf", + .func = test__unit_number__scnprint, + }, + { + .desc = "mem2node", + .func = test__mem2node, + }, + { + .desc = "time utils", + .func = test__time_utils, + }, + { + .desc = "Test jit_write_elf", + .func = test__jit_write_elf, + }, + { + .desc = "Test libpfm4 support", + .func = test__pfm, + .subtest = { + .skip_if_fail = true, + .get_nr = test__pfm_subtest_get_nr, + .get_desc = test__pfm_subtest_get_desc, + } + }, + { + .desc = "Test api io", + .func = test__api_io, + }, + { + .desc = "maps__merge_in", + .func = test__maps__merge_in, + }, + { + .desc = "Demangle Java", + .func = test__demangle_java, + }, + { + .desc = "Demangle OCaml", + .func = test__demangle_ocaml, + }, + { + .desc = "Parse and process metrics", + .func = test__parse_metric, + }, + { + .desc = "PE file support", + .func = test__pe_file_parsing, + }, + { + .desc = "Event expansion for cgroups", + .func = test__expand_cgroup_events, + }, + { + .desc = "Convert perf time to TSC", + .func = test__perf_time_to_tsc, + .is_supported = test__tsc_is_supported, + }, + { + .desc = "dlfilter C API", + .func = test__dlfilter, + }, + { + .func = NULL, + }, }; -static struct test_suite **tests[] = { +static struct test *tests[] = { generic_tests, arch_tests, }; -static int num_subtests(const struct test_suite *t) -{ - int num; - - if (!t->test_cases) - return 0; - - num = 0; - while (t->test_cases[num].name) - num++; - - return num; -} - -static bool has_subtests(const struct test_suite *t) -{ - return num_subtests(t) > 1; -} - -static const char *skip_reason(const struct test_suite *t, int subtest) -{ - if (t->test_cases && subtest >= 0) - return t->test_cases[subtest].skip_reason; - - return NULL; -} - -static const char *test_description(const struct test_suite *t, int subtest) -{ - if (t->test_cases && subtest >= 0) - return t->test_cases[subtest].desc; - - return t->desc; -} - -static test_fnptr test_function(const struct test_suite *t, int subtest) -{ - if (subtest <= 0) - return t->test_cases[0].run_case; - - return t->test_cases[subtest].run_case; -} - static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[]) { int i; @@ -183,7 +398,7 @@ static bool perf_test__matches(const char *desc, int curr, int argc, const char return false; } -static int run_test(struct test_suite *test, int subtest) +static int run_test(struct test *test, int subtest) { int status, err = -1, child = dont_fork ? 0 : fork(); char sbuf[STRERR_BUFSIZE]; @@ -215,7 +430,7 @@ static int run_test(struct test_suite *test, int subtest) } } - err = test_function(test, subtest)(test, subtest); + err = test->func(test, subtest); if (!dont_fork) exit(err); } @@ -235,19 +450,24 @@ static int run_test(struct test_suite *test, int subtest) return err; } -#define for_each_test(j, k, t) \ +#define for_each_test(j, t) \ for (j = 0; j < ARRAY_SIZE(tests); j++) \ - for (k = 0, t = tests[j][k]; tests[j][k]; k++, t = tests[j][k]) + for (t = &tests[j][0]; t->func; t++) -static int test_and_print(struct test_suite *t, int subtest) +static int test_and_print(struct test *t, bool force_skip, int subtest) { int err; - pr_debug("\n--- start ---\n"); - err = run_test(t, subtest); - pr_debug("---- end ----\n"); + if (!force_skip) { + pr_debug("\n--- start ---\n"); + err = run_test(t, subtest); + pr_debug("---- end ----\n"); + } else { + pr_debug("\n--- force skipped ---\n"); + err = TEST_SKIP; + } - if (!has_subtests(t)) + if (!t->subtest.get_nr) pr_debug("%s:", t->desc); else pr_debug("%s subtest %d:", t->desc, subtest + 1); @@ -257,10 +477,11 @@ static int test_and_print(struct test_suite *t, int subtest) pr_info(" Ok\n"); break; case TEST_SKIP: { - const char *reason = skip_reason(t, subtest); - - if (reason) - color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", reason); + const char *skip_reason = NULL; + if (t->subtest.skip_reason) + skip_reason = t->subtest.skip_reason(subtest); + if (skip_reason) + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", skip_reason); else color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n"); } @@ -359,7 +580,7 @@ struct shell_test { const char *file; }; -static int shell_test__run(struct test_suite *test, int subdir __maybe_unused) +static int shell_test__run(struct test *test, int subdir __maybe_unused) { int err; char script[PATH_MAX]; @@ -401,34 +622,24 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width, for_each_shell_test(entlist, n_dirs, st.dir, ent) { int curr = i++; char desc[256]; - struct test_case test_cases[] = { - { - .desc = shell_test__description(desc, - sizeof(desc), - st.dir, - ent->d_name), - .run_case = shell_test__run, - }, - { .name = NULL, } - }; - struct test_suite test_suite = { - .desc = test_cases[0].desc, - .test_cases = test_cases, + struct test test = { + .desc = shell_test__description(desc, sizeof(desc), st.dir, ent->d_name), + .func = shell_test__run, .priv = &st, }; - if (!perf_test__matches(test_suite.desc, curr, argc, argv)) + if (!perf_test__matches(test.desc, curr, argc, argv)) continue; st.file = ent->d_name; - pr_info("%3d: %-*s:", i, width, test_suite.desc); + pr_info("%2d: %-*s:", i, width, test.desc); if (intlist__find(skiplist, i)) { color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); continue; } - test_and_print(&test_suite, 0); + test_and_print(&test, false, -1); } for (e = 0; e < n_dirs; e++) @@ -439,31 +650,33 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width, static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) { - struct test_suite *t; - unsigned int j, k; + struct test *t; + unsigned int j; int i = 0; int width = shell_tests__max_desc_width(); - for_each_test(j, k, t) { - int len = strlen(test_description(t, -1)); + for_each_test(j, t) { + int len = strlen(t->desc); if (width < len) width = len; } - for_each_test(j, k, t) { - int curr = i++; + for_each_test(j, t) { + int curr = i++, err; int subi; - if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) { + if (!perf_test__matches(t->desc, curr, argc, argv)) { bool skip = true; int subn; - subn = num_subtests(t); + if (!t->subtest.get_nr) + continue; + + subn = t->subtest.get_nr(); for (subi = 0; subi < subn; subi++) { - if (perf_test__matches(test_description(t, subi), - curr, argc, argv)) + if (perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv)) skip = false; } @@ -471,17 +684,22 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) continue; } - pr_info("%3d: %-*s:", i, width, test_description(t, -1)); + if (t->is_supported && !t->is_supported()) { + pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc); + continue; + } + + pr_info("%2d: %-*s:", i, width, t->desc); if (intlist__find(skiplist, i)) { color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); continue; } - if (!has_subtests(t)) { - test_and_print(t, -1); + if (!t->subtest.get_nr) { + test_and_print(t, false, -1); } else { - int subn = num_subtests(t); + int subn = t->subtest.get_nr(); /* * minus 2 to align with normal testcases. * For subtest we print additional '.x' in number. @@ -491,6 +709,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) * 35.1: Basic BPF llvm compiling test : Ok */ int subw = width > 2 ? width - 2 : width; + bool skip = false; if (subn <= 0) { color_fprintf(stderr, PERF_COLOR_YELLOW, @@ -500,20 +719,21 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) pr_info("\n"); for (subi = 0; subi < subn; subi++) { - int len = strlen(test_description(t, subi)); + int len = strlen(t->subtest.get_desc(subi)); if (subw < len) subw = len; } for (subi = 0; subi < subn; subi++) { - if (!perf_test__matches(test_description(t, subi), - curr, argc, argv)) + if (!perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv)) continue; - pr_info("%3d.%1d: %-*s:", i, subi + 1, subw, - test_description(t, subi)); - test_and_print(t, subi); + pr_info("%2d.%1d: %-*s:", i, subi + 1, subw, + t->subtest.get_desc(subi)); + err = test_and_print(t, skip, subi); + if (err != TEST_OK && t->subtest.skip_if_fail) + skip = true; } } } @@ -539,14 +759,14 @@ static int perf_test__list_shell(int argc, const char **argv, int i) for_each_shell_test(entlist, n_dirs, path, ent) { int curr = i++; char bf[256]; - struct test_suite t = { + struct test t = { .desc = shell_test__description(bf, sizeof(bf), path, ent->d_name), }; if (!perf_test__matches(t.desc, curr, argc, argv)) continue; - pr_info("%3d: %s\n", i, t.desc); + pr_info("%2d: %s\n", i, t.desc); } @@ -558,25 +778,26 @@ static int perf_test__list_shell(int argc, const char **argv, int i) static int perf_test__list(int argc, const char **argv) { - unsigned int j, k; - struct test_suite *t; + unsigned int j; + struct test *t; int i = 0; - for_each_test(j, k, t) { + for_each_test(j, t) { int curr = i++; - if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) + if (!perf_test__matches(t->desc, curr, argc, argv) || + (t->is_supported && !t->is_supported())) continue; - pr_info("%3d: %s\n", i, test_description(t, -1)); + pr_info("%2d: %s\n", i, t->desc); - if (has_subtests(t)) { - int subn = num_subtests(t); + if (t->subtest.get_nr) { + int subn = t->subtest.get_nr(); int subi; for (subi = 0; subi < subn; subi++) - pr_info("%3d:%1d: %s\n", i, subi + 1, - test_description(t, subi)); + pr_info("%2d:%1d: %s\n", i, subi + 1, + t->subtest.get_desc(subi)); } } @@ -607,9 +828,6 @@ int cmd_test(int argc, const char **argv) if (ret < 0) return ret; - /* Unbuffered output */ - setvbuf(stdout, NULL, _IONBF, 0); - argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); if (argc >= 1 && !strcmp(argv[0], "list")) return perf_test__list(argc - 1, argv + 1); diff --git a/tools/perf/tests/clang.c b/tools/perf/tests/clang.c index a7111005d5..2577d3ed15 100644 --- a/tools/perf/tests/clang.c +++ b/tools/perf/tests/clang.c @@ -3,30 +3,44 @@ #include "c++/clang-c.h" #include +static struct { + int (*func)(void); + const char *desc; +} clang_testcase_table[] = { +#ifdef HAVE_LIBCLANGLLVM_SUPPORT + { + .func = test__clang_to_IR, + .desc = "builtin clang compile C source to IR", + }, + { + .func = test__clang_to_obj, + .desc = "builtin clang compile C source to ELF object", + }, +#endif +}; + +int test__clang_subtest_get_nr(void) +{ + return (int)ARRAY_SIZE(clang_testcase_table); +} + +const char *test__clang_subtest_get_desc(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table)) + return NULL; + return clang_testcase_table[i].desc; +} + #ifndef HAVE_LIBCLANGLLVM_SUPPORT -static int test__clang_to_IR(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__clang(struct test *test __maybe_unused, int i __maybe_unused) { return TEST_SKIP; } - -static int test__clang_to_obj(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +#else +int test__clang(struct test *test __maybe_unused, int i) { - return TEST_SKIP; + if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table)) + return TEST_FAIL; + return clang_testcase_table[i].func(); } #endif - -static struct test_case clang_tests[] = { - TEST_CASE_REASON("builtin clang compile C source to IR", clang_to_IR, - "not compiled in"), - TEST_CASE_REASON("builtin clang compile C source to ELF object", - clang_to_obj, - "not compiled in"), - { .name = NULL, } -}; - -struct test_suite suite__clang = { - .desc = "builtin clang support", - .test_cases = clang_tests, -}; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 5610767b40..9b4a765e4b 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -606,8 +606,7 @@ static int do_test_code_reading(bool try_kcore) } ret = perf_event__synthesize_thread_map(NULL, threads, - perf_event__process, machine, - true, false); + perf_event__process, machine, false); if (ret < 0) { pr_debug("perf_event__synthesize_thread_map failed\n"); goto out_err; @@ -716,7 +715,7 @@ static int do_test_code_reading(bool try_kcore) return err; } -static int test__code_reading(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__code_reading(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret; @@ -743,5 +742,3 @@ static int test__code_reading(struct test_suite *test __maybe_unused, int subtes return -1; }; } - -DEFINE_SUITE("Object code reading", code_reading); diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 84e87e31f1..0472b110fe 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -38,7 +38,7 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong nr", map->nr == 20); for (i = 0; i < 20; i++) { - TEST_ASSERT_VAL("wrong cpu", map->map[i].cpu == i); + TEST_ASSERT_VAL("wrong cpu", map->map[i] == i); } perf_cpu_map__put(map); @@ -67,15 +67,15 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, map = cpu_map__new_data(data); TEST_ASSERT_VAL("wrong nr", map->nr == 2); - TEST_ASSERT_VAL("wrong cpu", map->map[0].cpu == 1); - TEST_ASSERT_VAL("wrong cpu", map->map[1].cpu == 256); + TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1); + TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256); TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1); perf_cpu_map__put(map); return 0; } -static int test__cpu_map_synthesize(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__cpu_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_cpu_map *cpus; @@ -111,7 +111,7 @@ static int cpu_map_print(const char *str) return !strcmp(buf, str); } -static int test__cpu_map_print(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__cpu_map_print(struct test *test __maybe_unused, int subtest __maybe_unused) { TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1")); TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,5")); @@ -123,7 +123,7 @@ static int test__cpu_map_print(struct test_suite *test __maybe_unused, int subte return 0; } -static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_cpu_map *a = perf_cpu_map__new("4,2,1"); struct perf_cpu_map *b = perf_cpu_map__new("4,5,7"); @@ -137,7 +137,3 @@ static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subte perf_cpu_map__put(c); return 0; } - -DEFINE_SUITE("Synthesize cpu map", cpu_map_synthesize); -DEFINE_SUITE("Print cpu map", cpu_map_print); -DEFINE_SUITE("Merge cpu map", cpu_map_merge); diff --git a/tools/perf/tests/demangle-java-test.c b/tools/perf/tests/demangle-java-test.c index 44d1be303b..8f3b90832f 100644 --- a/tools/perf/tests/demangle-java-test.c +++ b/tools/perf/tests/demangle-java-test.c @@ -7,7 +7,7 @@ #include "debug.h" #include "demangle-java.h" -static int test__demangle_java(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__demangle_java(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret = TEST_OK; char *buf = NULL; @@ -40,5 +40,3 @@ static int test__demangle_java(struct test_suite *test __maybe_unused, int subte return ret; } - -DEFINE_SUITE("Demangle Java", demangle_java); diff --git a/tools/perf/tests/demangle-ocaml-test.c b/tools/perf/tests/demangle-ocaml-test.c index 90a4285e2a..0043be8123 100644 --- a/tools/perf/tests/demangle-ocaml-test.c +++ b/tools/perf/tests/demangle-ocaml-test.c @@ -7,7 +7,7 @@ #include "debug.h" #include "demangle-ocaml.h" -static int test__demangle_ocaml(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret = TEST_OK; char *buf = NULL; @@ -41,5 +41,3 @@ static int test__demangle_ocaml(struct test_suite *test __maybe_unused, int subt return ret; } - -DEFINE_SUITE("Demangle OCaml", demangle_ocaml); diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c index 84352d5534..bc03b5df68 100644 --- a/tools/perf/tests/dlfilter-test.c +++ b/tools/perf/tests/dlfilter-test.c @@ -398,7 +398,7 @@ static void test_data__free(struct test_data *td) } } -static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__dlfilter(struct test *test __maybe_unused, int subtest __maybe_unused) { struct test_data td = {.fd = -1}; int pid = getpid(); @@ -414,5 +414,3 @@ static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __ test_data__free(&td); return err; } - -DEFINE_SUITE("dlfilter C API", dlfilter); diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 3419a4ab55..43e1b01e5a 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -113,7 +113,7 @@ static int dso__data_fd(struct dso *dso, struct machine *machine) return fd; } -static int test__dso_data(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__dso_data(struct test *test __maybe_unused, int subtest __maybe_unused) { struct machine machine; struct dso *dso; @@ -248,7 +248,7 @@ static int set_fd_limit(int n) return setrlimit(RLIMIT_NOFILE, &rlim); } -static int test__dso_data_cache(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__dso_data_cache(struct test *test __maybe_unused, int subtest __maybe_unused) { struct machine machine; long nr_end, nr = open_files_cnt(); @@ -318,7 +318,7 @@ static long new_limit(int count) return ret; } -static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_unused) { struct machine machine; long nr_end, nr = open_files_cnt(), lim = new_limit(3); @@ -393,7 +393,3 @@ static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int sub TEST_ASSERT_VAL("failed leaking files", nr == nr_end); return 0; } - -DEFINE_SUITE("DSO data read", dso_data); -DEFINE_SUITE("DSO data cache", dso_data_cache); -DEFINE_SUITE("DSO data reopen", dso_data_reopen); diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2dab2d2620..c756284b3b 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -195,8 +195,7 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__krava_1(struct thread *th return ret; } -static int test__dwarf_unwind(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused) { struct machine *machine; struct thread *thread; @@ -238,5 +237,3 @@ static int test__dwarf_unwind(struct test_suite *test __maybe_unused, machine__delete(machine); return err; } - -DEFINE_SUITE("Test dwarf unwind", dwarf_unwind); diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 7606eb3df9..04ce4401f7 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -216,7 +216,7 @@ static int test_times(int (attach)(struct evlist *), * and checks that enabled and running times * match. */ -static int test__event_times(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__event_times(struct test *test __maybe_unused, int subtest __maybe_unused) { int err, ret = 0; @@ -239,5 +239,3 @@ static int test__event_times(struct test_suite *test __maybe_unused, int subtest #undef _T return ret; } - -DEFINE_SUITE("Event times", event_times); diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 78db4d704e..44a50527f9 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -75,19 +75,20 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong id", ev->id == 123); TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS); - TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__nr(map) == 3); - TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 0).cpu == 1); - TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 1).cpu == 2); - TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 2).cpu == 3); + TEST_ASSERT_VAL("wrong cpus", map->nr == 3); + TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1); + TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2); + TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3); perf_cpu_map__put(map); return 0; } -static int test__event_update(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unused) { struct evsel *evsel; struct event_name tmp; struct evlist *evlist = evlist__new_default(); + char *unit = strdup("KRAVA"); TEST_ASSERT_VAL("failed to get evlist", evlist); @@ -98,8 +99,7 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123); - free((char *)evsel->unit); - evsel->unit = strdup("KRAVA"); + evsel->unit = unit; TEST_ASSERT_VAL("failed to synthesize attr update unit", !perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit)); @@ -119,8 +119,7 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes TEST_ASSERT_VAL("failed to synthesize attr update cpus", !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus)); + free(unit); evlist__delete(evlist); return 0; } - -DEFINE_SUITE("Synthesize attr update", event_update); diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index fdbf17642e..4e09f0a312 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -99,8 +99,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names, #define perf_evsel__name_array_test(names, distance) \ __perf_evsel__name_array_test(names, ARRAY_SIZE(names), distance) -static int test__perf_evsel__roundtrip_name_test(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = 0, ret = 0; @@ -121,5 +120,3 @@ static int test__perf_evsel__roundtrip_name_test(struct test_suite *test __maybe return ret; } - -DEFINE_SUITE("Roundtrip evsel->name", perf_evsel__roundtrip_name_test); diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index cf4da3d748..f9e34bd26c 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -32,8 +32,7 @@ static int evsel__test_field(struct evsel *evsel, const char *name, int size, bo return ret; } -static int test__perf_evsel__tp_sched_test(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused) { struct evsel *evsel = evsel__newtp("sched", "sched_switch"); int ret = 0; @@ -88,5 +87,3 @@ static int test__perf_evsel__tp_sched_test(struct test_suite *test __maybe_unuse evsel__delete(evsel); return ret; } - -DEFINE_SUITE("Parse sched tracepoints fields", perf_evsel__tp_sched_test); diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index dfefe5b60e..0e46aeb843 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -124,19 +124,17 @@ static int expand_group_events(void) evlist = evlist__new(); TEST_ASSERT_VAL("failed to get evlist", evlist); - parse_events_error__init(&err); ret = parse_events(evlist, event_str, &err); if (ret < 0) { pr_debug("failed to parse event '%s', err %d, str '%s'\n", event_str, ret, err.str); - parse_events_error__print(&err, event_str); + parse_events_print_error(&err, event_str); goto out; } rblist__init(&metric_events); ret = test_expand_events(evlist, &metric_events); out: - parse_events_error__exit(&err); evlist__delete(evlist); return ret; } @@ -195,7 +193,7 @@ static int expand_metric_events(void) .metric_name = NULL, }, }; - const struct pmu_events_map ev_map = { + struct pmu_events_map ev_map = { .cpuid = "test", .version = "1", .type = "core", @@ -221,8 +219,8 @@ static int expand_metric_events(void) return ret; } -static int test__expand_cgroup_events(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__expand_cgroup_events(struct test *test __maybe_unused, + int subtest __maybe_unused) { int ret; @@ -240,5 +238,3 @@ static int test__expand_cgroup_events(struct test_suite *test __maybe_unused, return ret; } - -DEFINE_SUITE("Event expansion for cgroups", expand_cgroup_events); diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index d54c5371c6..4d01051951 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -1,193 +1,88 @@ // SPDX-License-Identifier: GPL-2.0 #include "util/debug.h" #include "util/expr.h" -#include "util/smt.h" #include "tests.h" #include #include #include -static int test_ids_union(void) -{ - struct hashmap *ids1, *ids2; - - /* Empty union. */ - ids1 = ids__new(); - TEST_ASSERT_VAL("ids__new", ids1); - ids2 = ids__new(); - TEST_ASSERT_VAL("ids__new", ids2); - - ids1 = ids__union(ids1, ids2); - TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 0); - - /* Union {foo, bar} against {}. */ - ids2 = ids__new(); - TEST_ASSERT_VAL("ids__new", ids2); - - TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids1, strdup("foo")), 0); - TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids1, strdup("bar")), 0); - - ids1 = ids__union(ids1, ids2); - TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 2); - - /* Union {foo, bar} against {foo}. */ - ids2 = ids__new(); - TEST_ASSERT_VAL("ids__new", ids2); - TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("foo")), 0); - - ids1 = ids__union(ids1, ids2); - TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 2); - - /* Union {foo, bar} against {bar,baz}. */ - ids2 = ids__new(); - TEST_ASSERT_VAL("ids__new", ids2); - TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("bar")), 0); - TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("baz")), 0); - - ids1 = ids__union(ids1, ids2); - TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 3); - - ids__free(ids1); - - return 0; -} - static int test(struct expr_parse_ctx *ctx, const char *e, double val2) { double val; - if (expr__parse(&val, ctx, e)) + if (expr__parse(&val, ctx, e, 1)) TEST_ASSERT_VAL("parse test failed", 0); TEST_ASSERT_VAL("unexpected value", val == val2); return 0; } -static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_unused) +int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) { struct expr_id_data *val_ptr; const char *p; - double val, num_cpus, num_cores, num_dies, num_packages; + double val; int ret; - struct expr_parse_ctx *ctx; + struct expr_parse_ctx ctx; - TEST_ASSERT_EQUAL("ids_union", test_ids_union(), 0); + expr__ctx_init(&ctx); + expr__add_id_val(&ctx, strdup("FOO"), 1); + expr__add_id_val(&ctx, strdup("BAR"), 2); - ctx = expr__ctx_new(); - TEST_ASSERT_VAL("expr__ctx_new", ctx); - expr__add_id_val(ctx, strdup("FOO"), 1); - expr__add_id_val(ctx, strdup("BAR"), 2); + ret = test(&ctx, "1+1", 2); + ret |= test(&ctx, "FOO+BAR", 3); + ret |= test(&ctx, "(BAR/2)%2", 1); + ret |= test(&ctx, "1 - -4", 5); + ret |= test(&ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5); + ret |= test(&ctx, "1-1 | 1", 1); + ret |= test(&ctx, "1-1 & 1", 0); + ret |= test(&ctx, "min(1,2) + 1", 2); + ret |= test(&ctx, "max(1,2) + 1", 3); + ret |= test(&ctx, "1+1 if 3*4 else 0", 2); + ret |= test(&ctx, "1.1 + 2.1", 3.2); + ret |= test(&ctx, ".1 + 2.", 2.1); + ret |= test(&ctx, "d_ratio(1, 2)", 0.5); + ret |= test(&ctx, "d_ratio(2.5, 0)", 0); + ret |= test(&ctx, "1.1 < 2.2", 1); + ret |= test(&ctx, "2.2 > 1.1", 1); + ret |= test(&ctx, "1.1 < 1.1", 0); + ret |= test(&ctx, "2.2 > 2.2", 0); + ret |= test(&ctx, "2.2 < 1.1", 0); + ret |= test(&ctx, "1.1 > 2.2", 0); - ret = test(ctx, "1+1", 2); - ret |= test(ctx, "FOO+BAR", 3); - ret |= test(ctx, "(BAR/2)%2", 1); - ret |= test(ctx, "1 - -4", 5); - ret |= test(ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5); - ret |= test(ctx, "1-1 | 1", 1); - ret |= test(ctx, "1-1 & 1", 0); - ret |= test(ctx, "min(1,2) + 1", 2); - ret |= test(ctx, "max(1,2) + 1", 3); - ret |= test(ctx, "1+1 if 3*4 else 0", 2); - ret |= test(ctx, "1.1 + 2.1", 3.2); - ret |= test(ctx, ".1 + 2.", 2.1); - ret |= test(ctx, "d_ratio(1, 2)", 0.5); - ret |= test(ctx, "d_ratio(2.5, 0)", 0); - ret |= test(ctx, "1.1 < 2.2", 1); - ret |= test(ctx, "2.2 > 1.1", 1); - ret |= test(ctx, "1.1 < 1.1", 0); - ret |= test(ctx, "2.2 > 2.2", 0); - ret |= test(ctx, "2.2 < 1.1", 0); - ret |= test(ctx, "1.1 > 2.2", 0); - - if (ret) { - expr__ctx_free(ctx); + if (ret) return ret; - } p = "FOO/0"; - ret = expr__parse(&val, ctx, p); + ret = expr__parse(&val, &ctx, p, 1); TEST_ASSERT_VAL("division by zero", ret == -1); p = "BAR/"; - ret = expr__parse(&val, ctx, p); + ret = expr__parse(&val, &ctx, p, 1); TEST_ASSERT_VAL("missing operand", ret == -1); - expr__ctx_clear(ctx); - TEST_ASSERT_VAL("find ids", - expr__find_ids("FOO + BAR + BAZ + BOZO", "FOO", - ctx) == 0); - TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 3); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAR", + expr__ctx_clear(&ctx); + TEST_ASSERT_VAL("find other", + expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", + &ctx, 1) == 0); + TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 3); + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAR", (void **)&val_ptr)); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAZ", + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAZ", (void **)&val_ptr)); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BOZO", + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BOZO", (void **)&val_ptr)); - expr__ctx_clear(ctx); - ctx->runtime = 3; - TEST_ASSERT_VAL("find ids", - expr__find_ids("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", - NULL, ctx) == 0); - TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3@", + expr__ctx_clear(&ctx); + TEST_ASSERT_VAL("find other", + expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", + NULL, &ctx, 3) == 0); + TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 2); + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT1,param=3/", (void **)&val_ptr)); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@", + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT2,param=3/", (void **)&val_ptr)); - expr__ctx_clear(ctx); - TEST_ASSERT_VAL("find ids", - expr__find_ids("dash\\-event1 - dash\\-event2", - NULL, ctx) == 0); - TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event1", - (void **)&val_ptr)); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event2", - (void **)&val_ptr)); - - /* Only EVENT1 or EVENT2 need be measured depending on the value of smt_on. */ - expr__ctx_clear(ctx); - TEST_ASSERT_VAL("find ids", - expr__find_ids("EVENT1 if #smt_on else EVENT2", - NULL, ctx) == 0); - TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, - smt_on() ? "EVENT1" : "EVENT2", - (void **)&val_ptr)); - - /* The expression is a constant 1.0 without needing to evaluate EVENT1. */ - expr__ctx_clear(ctx); - TEST_ASSERT_VAL("find ids", - expr__find_ids("1.0 if EVENT1 > 100.0 else 1.0", - NULL, ctx) == 0); - TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0); - - /* Test toplogy constants appear well ordered. */ - expr__ctx_clear(ctx); - TEST_ASSERT_VAL("#num_cpus", expr__parse(&num_cpus, ctx, "#num_cpus") == 0); - TEST_ASSERT_VAL("#num_cores", expr__parse(&num_cores, ctx, "#num_cores") == 0); - TEST_ASSERT_VAL("#num_cpus >= #num_cores", num_cpus >= num_cores); - TEST_ASSERT_VAL("#num_dies", expr__parse(&num_dies, ctx, "#num_dies") == 0); - TEST_ASSERT_VAL("#num_cores >= #num_dies", num_cores >= num_dies); - TEST_ASSERT_VAL("#num_packages", expr__parse(&num_packages, ctx, "#num_packages") == 0); - - if (num_dies) // Some platforms do not have CPU die support, for example s390 - TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); - - /* - * Source count returns the number of events aggregating in a leader - * event including the leader. Check parsing yields an id. - */ - expr__ctx_clear(ctx); - TEST_ASSERT_VAL("source count", - expr__find_ids("source_count(EVENT1)", - NULL, ctx) == 0); - TEST_ASSERT_VAL("source count", hashmap__size(ctx->ids) == 1); - TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1", - (void **)&val_ptr)); - - expr__ctx_free(ctx); + expr__ctx_clear(&ctx); return 0; } - -DEFINE_SUITE("Simple expression parser", expr); diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c index 40983c3574..d9eca8e86a 100644 --- a/tools/perf/tests/fdarray.c +++ b/tools/perf/tests/fdarray.c @@ -28,7 +28,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE return printed + fdarray__fprintf(fda, fp); } -static int test__fdarray__filter(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused) { int nr_fds, err = TEST_FAIL; struct fdarray *fda = fdarray__new(5, 5); @@ -89,7 +89,7 @@ static int test__fdarray__filter(struct test_suite *test __maybe_unused, int sub return err; } -static int test__fdarray__add(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = TEST_FAIL; struct fdarray *fda = fdarray__new(2, 2); @@ -158,6 +158,3 @@ static int test__fdarray__add(struct test_suite *test __maybe_unused, int subtes out: return err; } - -DEFINE_SUITE("Filter fds with revents mask in a fdarray", fdarray__filter); -DEFINE_SUITE("Add fd to a fdarray, making it autogrow", fdarray__add); diff --git a/tools/perf/tests/genelf.c b/tools/perf/tests/genelf.c index 95f3be1b68..f797f9823e 100644 --- a/tools/perf/tests/genelf.c +++ b/tools/perf/tests/genelf.c @@ -16,8 +16,8 @@ #define TEMPL "/tmp/perf-test-XXXXXX" -static int test__jit_write_elf(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__jit_write_elf(struct test *test __maybe_unused, + int subtest __maybe_unused) { #ifdef HAVE_JITDUMP static unsigned char x86_code[] = { @@ -49,5 +49,3 @@ static int test__jit_write_elf(struct test_suite *test __maybe_unused, return TEST_SKIP; #endif } - -DEFINE_SUITE("Test jit_write_elf", jit_write_elf); diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 17f4fcd6bd..890cb1f5bf 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -689,7 +689,7 @@ static int test4(struct evsel *evsel, struct machine *machine) return err; } -static int test__hists_cumulate(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__hists_cumulate(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = TEST_FAIL; struct machines machines; @@ -736,5 +736,3 @@ static int test__hists_cumulate(struct test_suite *test __maybe_unused, int subt return err; } - -DEFINE_SUITE("Cumulate child hist entries", hists_cumulate); diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 08cbeb9e39..ca6120cd1d 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -101,7 +101,7 @@ static int add_hist_entries(struct evlist *evlist, return TEST_FAIL; } -static int test__hists_filter(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = TEST_FAIL; struct machines machines; @@ -325,5 +325,3 @@ static int test__hists_filter(struct test_suite *test __maybe_unused, int subtes return err; } - -DEFINE_SUITE("Filter hist entries", hists_filter); diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index c575e13a85..a024d3f3a4 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -264,7 +264,7 @@ static int validate_link(struct hists *leader, struct hists *other) return __validate_link(leader, 0) || __validate_link(other, 1); } -static int test__hists_link(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__hists_link(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1; struct hists *hists, *first_hists; @@ -339,5 +339,3 @@ static int test__hists_link(struct test_suite *test __maybe_unused, int subtest return err; } - -DEFINE_SUITE("Match and link multiple hists", hists_link); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 0bde4a768c..8973f35df6 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -575,7 +575,7 @@ static int test5(struct evsel *evsel, struct machine *machine) return err; } -static int test__hists_output(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__hists_output(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = TEST_FAIL; struct machines machines; @@ -623,5 +623,3 @@ static int test__hists_output(struct test_suite *test __maybe_unused, int subtes return err; } - -DEFINE_SUITE("Sort output of hist entries", hists_output); diff --git a/tools/perf/tests/is_printable_array.c b/tools/perf/tests/is_printable_array.c index f72de2457f..9c7b3baca4 100644 --- a/tools/perf/tests/is_printable_array.c +++ b/tools/perf/tests/is_printable_array.c @@ -5,7 +5,7 @@ #include "debug.h" #include "print_binary.h" -static int test__is_printable_array(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__is_printable_array(struct test *test __maybe_unused, int subtest __maybe_unused) { char buf1[] = { 'k', 'r', 4, 'v', 'a', 0 }; char buf2[] = { 'k', 'r', 'a', 'v', 4, 0 }; @@ -36,5 +36,3 @@ static int test__is_printable_array(struct test_suite *test __maybe_unused, int return TEST_OK; } - -DEFINE_SUITE("is_printable_array", is_printable_array); diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index dd20673124..a0438b0f08 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -61,7 +61,7 @@ static int find_comm(struct evlist *evlist, const char *comm) * when an event is disabled but a dummy software event is not disabled. If the * test passes %0 is returned, otherwise %-1 is returned. */ -static int test__keep_tracking(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_unused) { struct record_opts opts = { .mmap_pages = UINT_MAX, @@ -160,5 +160,3 @@ static int test__keep_tracking(struct test_suite *test __maybe_unused, int subte return err; } - -DEFINE_SUITE("Use a dummy software event to keep tracking", keep_tracking); diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c index dfe1bd5dab..e483210b17 100644 --- a/tools/perf/tests/kmod-path.c +++ b/tools/perf/tests/kmod-path.c @@ -47,7 +47,7 @@ static int test_is_kernel_module(const char *path, int cpumode, bool expect) #define M(path, c, e) \ TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e)) -static int test__kmod_path__parse(struct test_suite *t __maybe_unused, int subtest __maybe_unused) +int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_unused) { /* path alloc_name kmod comp name */ T("/xxxx/xxxx/x-x.ko", true , true, 0 , "[x_x]"); @@ -159,5 +159,3 @@ static int test__kmod_path__parse(struct test_suite *t __maybe_unused, int subte return 0; } - -DEFINE_SUITE("kmod_path__parse", kmod_path__parse); diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 8ac0a3a457..33e43cce90 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -124,7 +124,7 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf, return ret; } -static int test__llvm(int subtest) +int test__llvm(struct test *test __maybe_unused, int subtest) { int ret; void *obj_buf = NULL; @@ -148,72 +148,32 @@ static int test__llvm(int subtest) return ret; } -#endif //HAVE_LIBBPF_SUPPORT -static int test__llvm__bpf_base_prog(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__llvm_subtest_get_nr(void) { -#ifdef HAVE_LIBBPF_SUPPORT - return test__llvm(LLVM_TESTCASE_BASE); -#else - pr_debug("Skip LLVM test because BPF support is not compiled\n"); - return TEST_SKIP; -#endif + return __LLVM_TESTCASE_MAX; } -static int test__llvm__bpf_test_kbuild_prog(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +const char *test__llvm_subtest_get_desc(int subtest) +{ + if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX)) + return NULL; + + return bpf_source_table[subtest].desc; +} +#else //HAVE_LIBBPF_SUPPORT +int test__llvm(struct test *test __maybe_unused, int subtest __maybe_unused) { -#ifdef HAVE_LIBBPF_SUPPORT - return test__llvm(LLVM_TESTCASE_KBUILD); -#else - pr_debug("Skip LLVM test because BPF support is not compiled\n"); return TEST_SKIP; -#endif } -static int test__llvm__bpf_test_prologue_prog(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__llvm_subtest_get_nr(void) { -#ifdef HAVE_LIBBPF_SUPPORT - return test__llvm(LLVM_TESTCASE_BPF_PROLOGUE); -#else - pr_debug("Skip LLVM test because BPF support is not compiled\n"); - return TEST_SKIP; -#endif + return 0; } -static int test__llvm__bpf_test_relocation(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +const char *test__llvm_subtest_get_desc(int subtest __maybe_unused) { -#ifdef HAVE_LIBBPF_SUPPORT - return test__llvm(LLVM_TESTCASE_BPF_RELOCATION); -#else - pr_debug("Skip LLVM test because BPF support is not compiled\n"); - return TEST_SKIP; -#endif + return NULL; } - - -static struct test_case llvm_tests[] = { -#ifdef HAVE_LIBBPF_SUPPORT - TEST_CASE("Basic BPF llvm compile", llvm__bpf_base_prog), - TEST_CASE("kbuild searching", llvm__bpf_test_kbuild_prog), - TEST_CASE("Compile source for BPF prologue generation", - llvm__bpf_test_prologue_prog), - TEST_CASE("Compile source for BPF relocation", llvm__bpf_test_relocation), -#else - TEST_CASE_REASON("Basic BPF llvm compile", llvm__bpf_base_prog, "not compiled in"), - TEST_CASE_REASON("kbuild searching", llvm__bpf_test_kbuild_prog, "not compiled in"), - TEST_CASE_REASON("Compile source for BPF prologue generation", - llvm__bpf_test_prologue_prog, "not compiled in"), - TEST_CASE_REASON("Compile source for BPF relocation", - llvm__bpf_test_relocation, "not compiled in"), -#endif - { .name = NULL, } -}; - -struct test_suite suite__llvm = { - .desc = "LLVM search and compile", - .test_cases = llvm_tests, -}; +#endif // HAVE_LIBBPF_SUPPORT diff --git a/tools/perf/tests/maps.c b/tools/perf/tests/maps.c index e308a3296c..1ac72919fa 100644 --- a/tools/perf/tests/maps.c +++ b/tools/perf/tests/maps.c @@ -33,7 +33,7 @@ static int check_maps(struct map_def *merged, unsigned int size, struct maps *ma return TEST_OK; } -static int test__maps__merge_in(struct test_suite *t __maybe_unused, int subtest __maybe_unused) +int test__maps__merge_in(struct test *t __maybe_unused, int subtest __maybe_unused) { struct maps maps; unsigned int i; @@ -120,5 +120,3 @@ static int test__maps__merge_in(struct test_suite *t __maybe_unused, int subtest maps__exit(&maps); return TEST_OK; } - -DEFINE_SUITE("maps__merge_in", maps__merge_in); diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c index 56014ec7d4..673a11a6cd 100644 --- a/tools/perf/tests/mem.c +++ b/tools/perf/tests/mem.c @@ -23,7 +23,7 @@ static int check(union perf_mem_data_src data_src, return 0; } -static int test__mem(struct test_suite *text __maybe_unused, int subtest __maybe_unused) +int test__mem(struct test *text __maybe_unused, int subtest __maybe_unused) { int ret = 0; union perf_mem_data_src src; @@ -56,5 +56,3 @@ static int test__mem(struct test_suite *text __maybe_unused, int subtest __maybe return ret; } - -DEFINE_SUITE("Test data source output", mem); diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index 4c96829510..e4d0d58b97 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -25,15 +25,14 @@ static unsigned long *get_bitmap(const char *str, int nbits) { struct perf_cpu_map *map = perf_cpu_map__new(str); unsigned long *bm = NULL; + int i; bm = bitmap_zalloc(nbits); if (map && bm) { - struct perf_cpu cpu; - int i; - - perf_cpu_map__for_each_cpu(cpu, i, map) - set_bit(cpu.cpu, bm); + for (i = 0; i < map->nr; i++) { + set_bit(map->map[i], bm); + } } if (map) @@ -44,7 +43,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) return bm && map ? bm : NULL; } -static int test__mem2node(struct test_suite *t __maybe_unused, int subtest __maybe_unused) +int test__mem2node(struct test *t __maybe_unused, int subtest __maybe_unused) { struct mem2node map; struct memory_node nodes[3]; @@ -78,5 +77,3 @@ static int test__mem2node(struct test_suite *t __maybe_unused, int subtest __may mem2node__exit(&map); return 0; } - -DEFINE_SUITE("mem2node", mem2node); diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index c3c17600f2..d38757db2d 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -29,7 +29,7 @@ * Then it checks if the number of syscalls reported as perf events by * the kernel corresponds to the number of syscalls made. */ -static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1; union perf_event *event; @@ -59,12 +59,11 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest } CPU_ZERO(&cpu_set); - CPU_SET(perf_cpu_map__cpu(cpus, 0).cpu, &cpu_set); + CPU_SET(cpus->map[0], &cpu_set); sched_setaffinity(0, sizeof(cpu_set), &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - perf_cpu_map__cpu(cpus, 0).cpu, - str_error_r(errno, sbuf, sizeof(sbuf))); + cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf))); goto out_free_cpus; } @@ -165,5 +164,3 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest perf_thread_map__put(threads); return err; } - -DEFINE_SUITE("Read samples using the mmap interface", basic_mmap); diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index a4301fc7b7..8d9d4cbff7 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -135,7 +135,7 @@ static int synth_all(struct machine *machine) { return perf_event__synthesize_threads(NULL, perf_event__process, - machine, 1, 0, 1); + machine, 0, 1); } static int synth_process(struct machine *machine) @@ -147,7 +147,7 @@ static int synth_process(struct machine *machine) err = perf_event__synthesize_thread_map(NULL, map, perf_event__process, - machine, 1, 0); + machine, 0); perf_thread_map__put(map); return err; @@ -224,7 +224,7 @@ static int mmap_events(synth_cb synth) * * by using all thread objects. */ -static int test__mmap_thread_lookup(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__mmap_thread_lookup(struct test *test __maybe_unused, int subtest __maybe_unused) { /* perf_event__synthesize_threads synthesize */ TEST_ASSERT_VAL("failed with sythesizing all", @@ -236,5 +236,3 @@ static int test__mmap_thread_lookup(struct test_suite *test __maybe_unused, int return 0; } - -DEFINE_SUITE("Lookup mmap thread", mmap_thread_lookup); diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 1ab362323d..f7dd6c463f 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -19,11 +19,9 @@ #include "stat.h" #include "util/counts.h" -static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int subtest __maybe_unused) { - int err = -1, fd, idx; - struct perf_cpu cpu; + int err = -1, fd, cpu; struct perf_cpu_map *cpus; struct evsel *evsel; unsigned int nr_openat_calls = 111, i; @@ -59,23 +57,23 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb goto out_evsel_delete; } - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - unsigned int ncalls = nr_openat_calls + idx; + for (cpu = 0; cpu < cpus->nr; ++cpu) { + unsigned int ncalls = nr_openat_calls + cpu; /* * XXX eventually lift this restriction in a way that * keeps perf building on older glibc installations * without CPU_ALLOC. 1024 cpus in 2010 still seems * a reasonable upper limit tho :-) */ - if (cpu.cpu >= CPU_SETSIZE) { - pr_debug("Ignoring CPU %d\n", cpu.cpu); + if (cpus->map[cpu] >= CPU_SETSIZE) { + pr_debug("Ignoring CPU %d\n", cpus->map[cpu]); continue; } - CPU_SET(cpu.cpu, &cpu_set); + CPU_SET(cpus->map[cpu], &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpu.cpu, + cpus->map[cpu], str_error_r(errno, sbuf, sizeof(sbuf))); goto out_close_fd; } @@ -83,29 +81,37 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb fd = openat(0, "/etc/passwd", O_RDONLY); close(fd); } - CPU_CLR(cpu.cpu, &cpu_set); + CPU_CLR(cpus->map[cpu], &cpu_set); } - evsel->core.cpus = perf_cpu_map__get(cpus); + /* + * Here we need to explicitly preallocate the counts, as if + * we use the auto allocation it will allocate just for 1 cpu, + * as we start by cpu 0. + */ + if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { + pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr); + goto out_close_fd; + } err = 0; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + for (cpu = 0; cpu < cpus->nr; ++cpu) { unsigned int expected; - if (cpu.cpu >= CPU_SETSIZE) + if (cpus->map[cpu] >= CPU_SETSIZE) continue; - if (evsel__read_on_cpu(evsel, idx, 0) < 0) { + if (evsel__read_on_cpu(evsel, cpu, 0) < 0) { pr_debug("evsel__read_on_cpu\n"); err = -1; break; } - expected = nr_openat_calls + idx; - if (perf_counts(evsel->counts, idx, 0)->val != expected) { + expected = nr_openat_calls + cpu; + if (perf_counts(evsel->counts, cpu, 0)->val != expected) { pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", - expected, cpu.cpu, perf_counts(evsel->counts, idx, 0)->val); + expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); err = -1; } } @@ -121,5 +127,3 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb perf_thread_map__put(threads); return err; } - -DEFINE_SUITE("Detect openat syscall event on all cpus", openat_syscall_event_on_all_cpus); diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index a7b2800652..5e4af2f0f1 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -22,8 +22,7 @@ #define AT_FDCWD -100 #endif -static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest __maybe_unused) { struct record_opts opts = { .target = { @@ -143,5 +142,3 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused out: return err; } - -DEFINE_SUITE("syscalls:sys_enter_openat event fields", syscall_openat_tp_fields); diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 7f4c13c4b1..85a8f0fe7a 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -13,8 +13,7 @@ #include "tests.h" #include "util/counts.h" -static int test__openat_syscall_event(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1, fd; struct evsel *evsel; @@ -67,5 +66,3 @@ static int test__openat_syscall_event(struct test_suite *test __maybe_unused, perf_thread_map__put(threads); return err; } - -DEFINE_SUITE("Detect openat syscall event", openat_syscall_event); diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index e71efadb24..fd3556cc9a 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -605,7 +605,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 10); - TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config")); + TEST_ASSERT_VAL("wrong config", !term->config); /* config1 */ term = list_entry(term->list.next, struct parse_events_term, list); @@ -614,7 +614,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 1); - TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config1")); + TEST_ASSERT_VAL("wrong config", !term->config); /* config2=3 */ term = list_entry(term->list.next, struct parse_events_term, list); @@ -623,7 +623,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 3); - TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config2")); + TEST_ASSERT_VAL("wrong config", !term->config); /* umask=1*/ term = list_entry(term->list.next, struct parse_events_term, list); @@ -661,7 +661,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 0xead); - TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config")); + TEST_ASSERT_VAL("wrong config", !term->config); return 0; } @@ -2045,6 +2045,7 @@ static int test_event(struct evlist_test *e) struct evlist *evlist; int ret; + bzero(&err, sizeof(err)); if (e->valid && !e->valid()) { pr_debug("... SKIP"); return 0; @@ -2054,41 +2055,15 @@ static int test_event(struct evlist_test *e) if (evlist == NULL) return -ENOMEM; - parse_events_error__init(&err); ret = parse_events(evlist, e->name, &err); if (ret) { pr_debug("failed to parse event '%s', err %d, str '%s'\n", e->name, ret, err.str); - parse_events_error__print(&err, e->name); + parse_events_print_error(&err, e->name); } else { ret = e->check(evlist); } - parse_events_error__exit(&err); - evlist__delete(evlist); - return ret; -} - -static int test_event_fake_pmu(const char *str) -{ - struct parse_events_error err; - struct evlist *evlist; - int ret; - - evlist = evlist__new(); - if (!evlist) - return -ENOMEM; - - parse_events_error__init(&err); - perf_pmu__test_parse_init(); - ret = __parse_events(evlist, str, &err, &perf_pmu__fake); - if (ret) { - pr_debug("failed to parse event '%s', err %d, str '%s'\n", - str, ret, err.str); - parse_events_error__print(&err, str); - } - - parse_events_error__exit(&err); evlist__delete(evlist); return ret; @@ -2301,27 +2276,7 @@ static int test_pmu_events_alias(char *event, char *alias) return test_event(&e); } -static int test_pmu_events_alias2(void) -{ - static const char events[][30] = { - "event-hyphen", - "event-two-hyph", - }; - unsigned long i; - int ret = 0; - - for (i = 0; i < ARRAY_SIZE(events); i++) { - ret = test_event_fake_pmu(&events[i][0]); - if (ret) { - pr_err("check_parse_fake %s failed\n", &events[i][0]); - break; - } - } - - return ret; -} - -static int test__parse_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__parse_events(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret1, ret2 = 0; char *event, *alias; @@ -2358,15 +2313,9 @@ do { \ return ret; } - ret1 = test_pmu_events_alias2(); - if (!ret2) - ret2 = ret1; - ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms)); if (!ret2) ret2 = ret1; return ret2; } - -DEFINE_SUITE("Parse event definition strings", parse_events); diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 07b6f4ec02..4f6f4904e8 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -79,7 +79,7 @@ static struct pmu_event pme_test[] = { } }; -static const struct pmu_events_map map = { +static struct pmu_events_map map = { .cpuid = "test", .version = "1", .type = "core", @@ -109,7 +109,6 @@ static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist, struct evsel *evsel; u64 count; - perf_stat__reset_shadow_stats(); evlist__for_each_entry(evlist, evsel) { count = find_value(evsel->name, vals); perf_stat__update_shadow_stats(evsel, count, 0, st); @@ -370,7 +369,7 @@ static int test_metric_group(void) return 0; } -static int test__parse_metric(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused) { TEST_ASSERT_VAL("IPC failed", test_ipc() == 0); TEST_ASSERT_VAL("frontend failed", test_frontend() == 0); @@ -384,5 +383,3 @@ static int test__parse_metric(struct test_suite *test __maybe_unused, int subtes } return 0; } - -DEFINE_SUITE("Parse and process metrics", parse_metric); diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index d62e31595a..4712736767 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -67,8 +67,7 @@ struct test_attr_event { * * Return: %0 on success, %-1 if the test fails. */ -static int test__parse_no_sample_id_all(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__parse_no_sample_id_all(struct test *test __maybe_unused, int subtest __maybe_unused) { int err; @@ -104,5 +103,3 @@ static int test__parse_no_sample_id_all(struct test_suite *test __maybe_unused, return 0; } - -DEFINE_SUITE("Parse with no sample_id_all bit set", parse_no_sample_id_all); diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c index c09a9fae16..58b90c42eb 100644 --- a/tools/perf/tests/pe-file-parsing.c +++ b/tools/perf/tests/pe-file-parsing.c @@ -68,7 +68,7 @@ static int run_dir(const char *d) return TEST_OK; } -static int test__pe_file_parsing(struct test_suite *test __maybe_unused, +int test__pe_file_parsing(struct test *test __maybe_unused, int subtest __maybe_unused) { struct stat st; @@ -89,12 +89,10 @@ static int test__pe_file_parsing(struct test_suite *test __maybe_unused, #else -static int test__pe_file_parsing(struct test_suite *test __maybe_unused, +int test__pe_file_parsing(struct test *test __maybe_unused, int subtest __maybe_unused) { return TEST_SKIP; } #endif - -DEFINE_SUITE("PE file support", pe_file_parsing); diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c index 78cdeb8964..dd865e0bea 100644 --- a/tools/perf/tests/perf-hooks.c +++ b/tools/perf/tests/perf-hooks.c @@ -26,7 +26,7 @@ static void the_hook(void *_hook_flags) raise(SIGSEGV); } -static int test__perf_hooks(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused) { int hook_flags = 0; @@ -45,5 +45,3 @@ static int test__perf_hooks(struct test_suite *test __maybe_unused, int subtest return TEST_FAIL; return TEST_OK; } - -DEFINE_SUITE("perf hooks", perf_hooks); diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 6354465067..0df471bf15 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -41,7 +41,7 @@ static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp) return cpu; } -static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unused) { struct record_opts opts = { .target = { @@ -332,5 +332,3 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest out: return (err < 0 || errs > 0) ? -1 : 0; } - -DEFINE_SUITE("PERF_RECORD_* events & perf_sample fields", PERF_RECORD); diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index d12d0ad818..7c56bc1f4c 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -23,16 +23,6 @@ #include "pmu.h" #include "pmu-hybrid.h" -/* - * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just - * enable the test for x86_64/i386 and Arm64 archs. - */ -#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) -#define TSC_IS_SUPPORTED 1 -#else -#define TSC_IS_SUPPORTED 0 -#endif - #define CHECK__(x) { \ while ((x) < 0) { \ pr_debug(#x " failed!\n"); \ @@ -55,7 +45,7 @@ * %0 is returned, otherwise %-1 is returned. If TSC conversion is not * supported then then the test passes but " (not supported)" is printed. */ -static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused) { struct record_opts opts = { .mmap_pages = UINT_MAX, @@ -79,11 +69,6 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su u64 test_time, comm1_time = 0, comm2_time = 0; struct mmap *md; - if (!TSC_IS_SUPPORTED) { - pr_debug("Test not supported on this architecture"); - return TEST_SKIP; - } - threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); @@ -200,4 +185,15 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su return err; } -DEFINE_SUITE("Convert perf time to TSC", perf_time_to_tsc); +bool test__tsc_is_supported(void) +{ + /* + * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. + * Just enable the test for x86_64/i386 and Arm64 archs. + */ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) + return true; +#else + return false; +#endif +} diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c index 71b76deb1f..e8fd0da076 100644 --- a/tools/perf/tests/pfm.c +++ b/tools/perf/tests/pfm.c @@ -11,6 +11,27 @@ #include +#ifdef HAVE_LIBPFM +static int test__pfm_events(void); +static int test__pfm_group(void); +#endif + +static const struct { + int (*func)(void); + const char *desc; +} pfm_testcase_table[] = { +#ifdef HAVE_LIBPFM + { + .func = test__pfm_events, + .desc = "test of individual --pfm-events", + }, + { + .func = test__pfm_group, + .desc = "test groups of --pfm-events", + }, +#endif +}; + #ifdef HAVE_LIBPFM static int count_pfm_events(struct perf_evlist *evlist) { @@ -23,8 +44,7 @@ static int count_pfm_events(struct perf_evlist *evlist) return count; } -static int test__pfm_events(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +static int test__pfm_events(void) { struct evlist *evlist; struct option opt; @@ -84,8 +104,7 @@ static int test__pfm_events(struct test_suite *test __maybe_unused, return 0; } -static int test__pfm_group(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +static int test__pfm_group(void) { struct evlist *evlist; struct option opt; @@ -168,27 +187,27 @@ static int test__pfm_group(struct test_suite *test __maybe_unused, } return 0; } -#else -static int test__pfm_events(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ - return TEST_SKIP; -} - -static int test__pfm_group(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ - return TEST_SKIP; -} #endif -static struct test_case pfm_tests[] = { - TEST_CASE_REASON("test of individual --pfm-events", pfm_events, "not compiled in"), - TEST_CASE_REASON("test groups of --pfm-events", pfm_group, "not compiled in"), - { .name = NULL, } -}; +const char *test__pfm_subtest_get_desc(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table)) + return NULL; + return pfm_testcase_table[i].desc; +} -struct test_suite suite__pfm = { - .desc = "Test libpfm4 support", - .test_cases = pfm_tests, -}; +int test__pfm_subtest_get_nr(void) +{ + return (int)ARRAY_SIZE(pfm_testcase_table); +} + +int test__pfm(struct test *test __maybe_unused, int i __maybe_unused) +{ +#ifdef HAVE_LIBPFM + if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table)) + return TEST_FAIL; + return pfm_testcase_table[i].func(); +#else + return TEST_SKIP; +#endif +} diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 1c695fb5a7..43743cf719 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -67,7 +67,7 @@ static const struct perf_pmu_test_event segment_reg_loads_any = { .desc = "Number of segment register loads", .topic = "other", }, - .alias_str = "umask=0x80,period=0x30d40,event=0x6", + .alias_str = "umask=0x80,(null)=0x30d40,event=0x6", .alias_long_desc = "Number of segment register loads", }; @@ -78,7 +78,7 @@ static const struct perf_pmu_test_event dispatch_blocked_any = { .desc = "Memory cluster signals to block micro-op dispatch for any reason", .topic = "other", }, - .alias_str = "umask=0x20,period=0x30d40,event=0x9", + .alias_str = "umask=0x20,(null)=0x30d40,event=0x9", .alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason", }; @@ -89,7 +89,7 @@ static const struct perf_pmu_test_event eist_trans = { .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", .topic = "other", }, - .alias_str = "umask=0,period=0x30d40,event=0x3a", + .alias_str = "umask=0,(null)=0x30d40,event=0x3a", .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", }; @@ -143,38 +143,10 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = { .matching_pmu = "uncore_cbox_0", }; -static const struct perf_pmu_test_event uncore_hyphen = { - .event = { - .name = "event-hyphen", - .event = "umask=0x00,event=0xe0", - .desc = "Unit: uncore_cbox UNC_CBO_HYPHEN", - .topic = "uncore", - .long_desc = "UNC_CBO_HYPHEN", - .pmu = "uncore_cbox", - }, - .alias_str = "umask=0,event=0xe0", - .alias_long_desc = "UNC_CBO_HYPHEN", - .matching_pmu = "uncore_cbox_0", -}; - -static const struct perf_pmu_test_event uncore_two_hyph = { - .event = { - .name = "event-two-hyph", - .event = "umask=0x00,event=0xc0", - .desc = "Unit: uncore_cbox UNC_CBO_TWO_HYPH", - .topic = "uncore", - .long_desc = "UNC_CBO_TWO_HYPH", - .pmu = "uncore_cbox", - }, - .alias_str = "umask=0,event=0xc0", - .alias_long_desc = "UNC_CBO_TWO_HYPH", - .matching_pmu = "uncore_cbox_0", -}; - static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = { .event = { .name = "uncore_hisi_l3c.rd_hit_cpipe", - .event = "event=0x7", + .event = "event=0x2", .desc = "Total read hits. Unit: hisi_sccl,l3c ", .topic = "uncore", .long_desc = "Total read hits", @@ -216,8 +188,6 @@ static const struct perf_pmu_test_event uncore_imc_cache_hits = { static const struct perf_pmu_test_event *uncore_events[] = { &uncore_hisi_ddrc_flux_wcmd, &unc_cbo_xsnp_response_miss_eviction, - &uncore_hyphen, - &uncore_two_hyph, &uncore_hisi_l3c_rd_hit_cpipe, &uncore_imc_free_running_cache_miss, &uncore_imc_cache_hits, @@ -238,23 +208,8 @@ static const struct perf_pmu_test_event sys_ddr_pmu_write_cycles = { .matching_pmu = "uncore_sys_ddr_pmu", }; -static const struct perf_pmu_test_event sys_ccn_pmu_read_cycles = { - .event = { - .name = "sys_ccn_pmu.read_cycles", - .event = "config=0x2c", - .desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ", - .topic = "uncore", - .pmu = "uncore_sys_ccn_pmu", - .compat = "0x01", - }, - .alias_str = "config=0x2c", - .alias_long_desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ", - .matching_pmu = "uncore_sys_ccn_pmu", -}; - static const struct perf_pmu_test_event *sys_events[] = { &sys_ddr_pmu_write_cycles, - &sys_ccn_pmu_read_cycles, NULL }; @@ -272,9 +227,9 @@ static bool is_same(const char *reference, const char *test) return !strcmp(reference, test); } -static const struct pmu_events_map *__test_pmu_get_events_map(void) +static struct pmu_events_map *__test_pmu_get_events_map(void) { - const struct pmu_events_map *map; + struct pmu_events_map *map; for (map = &pmu_events_map[0]; map->cpuid; map++) { if (!strcmp(map->cpuid, "testcpu")) @@ -286,9 +241,9 @@ static const struct pmu_events_map *__test_pmu_get_events_map(void) return NULL; } -static const struct pmu_event *__test_pmu_get_sys_events_table(void) +static struct pmu_event *__test_pmu_get_sys_events_table(void) { - const struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; + struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; for ( ; tables->name; tables++) { if (!strcmp("pme_test_soc_sys", tables->name)) @@ -298,26 +253,8 @@ static const struct pmu_event *__test_pmu_get_sys_events_table(void) return NULL; } -static int compare_pmu_events(const struct pmu_event *e1, const struct pmu_event *e2) +static int compare_pmu_events(struct pmu_event *e1, const struct pmu_event *e2) { - if (!is_same(e1->name, e2->name)) { - pr_debug2("testing event e1 %s: mismatched name string, %s vs %s\n", - e1->name, e1->name, e2->name); - return -1; - } - - if (!is_same(e1->compat, e2->compat)) { - pr_debug2("testing event e1 %s: mismatched compat string, %s vs %s\n", - e1->name, e1->compat, e2->compat); - return -1; - } - - if (!is_same(e1->event, e2->event)) { - pr_debug2("testing event e1 %s: mismatched event, %s vs %s\n", - e1->name, e1->event, e2->event); - return -1; - } - if (!is_same(e1->desc, e2->desc)) { pr_debug2("testing event e1 %s: mismatched desc, %s vs %s\n", e1->name, e1->desc, e2->desc); @@ -336,12 +273,6 @@ static int compare_pmu_events(const struct pmu_event *e1, const struct pmu_event return -1; } - if (!is_same(e1->pmu, e2->pmu)) { - pr_debug2("testing event e1 %s: mismatched pmu string, %s vs %s\n", - e1->name, e1->pmu, e2->pmu); - return -1; - } - if (!is_same(e1->unit, e2->unit)) { pr_debug2("testing event e1 %s: mismatched unit, %s vs %s\n", e1->name, e1->unit, e2->unit); @@ -354,12 +285,6 @@ static int compare_pmu_events(const struct pmu_event *e1, const struct pmu_event return -1; } - if (!is_same(e1->aggr_mode, e2->aggr_mode)) { - pr_debug2("testing event e1 %s: mismatched aggr_mode, %s vs %s\n", - e1->name, e1->aggr_mode, e2->aggr_mode); - return -1; - } - if (!is_same(e1->metric_expr, e2->metric_expr)) { pr_debug2("testing event e1 %s: mismatched metric_expr, %s vs %s\n", e1->name, e1->metric_expr, e2->metric_expr); @@ -372,21 +297,21 @@ static int compare_pmu_events(const struct pmu_event *e1, const struct pmu_event return -1; } - if (!is_same(e1->metric_group, e2->metric_group)) { - pr_debug2("testing event e1 %s: mismatched metric_group, %s vs %s\n", - e1->name, e1->metric_group, e2->metric_group); - return -1; - } - if (!is_same(e1->deprecated, e2->deprecated)) { pr_debug2("testing event e1 %s: mismatched deprecated, %s vs %s\n", e1->name, e1->deprecated, e2->deprecated); return -1; } - if (!is_same(e1->metric_constraint, e2->metric_constraint)) { - pr_debug2("testing event e1 %s: mismatched metric_constant, %s vs %s\n", - e1->name, e1->metric_constraint, e2->metric_constraint); + if (!is_same(e1->pmu, e2->pmu)) { + pr_debug2("testing event e1 %s: mismatched pmu string, %s vs %s\n", + e1->name, e1->pmu, e2->pmu); + return -1; + } + + if (!is_same(e1->compat, e2->compat)) { + pr_debug2("testing event e1 %s: mismatched compat string, %s vs %s\n", + e1->name, e1->compat, e2->compat); return -1; } @@ -448,12 +373,11 @@ static int compare_alias_to_test_event(struct perf_pmu_alias *alias, } /* Verify generated events from pmu-events.c are as expected */ -static int test__pmu_event_table(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +static int test_pmu_event_table(void) { - const struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table(); - const struct pmu_events_map *map = __test_pmu_get_events_map(); - const struct pmu_event *table; + struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table(); + struct pmu_events_map *map = __test_pmu_get_events_map(); + struct pmu_event *table; int map_events = 0, expected_events; /* ignore 3x sentinels */ @@ -549,7 +473,7 @@ static int __test_core_pmu_event_aliases(char *pmu_name, int *count) struct perf_pmu *pmu; LIST_HEAD(aliases); int res = 0; - const struct pmu_events_map *map = __test_pmu_get_events_map(); + struct pmu_events_map *map = __test_pmu_get_events_map(); struct perf_pmu_alias *a, *tmp; if (!map) @@ -602,7 +526,7 @@ static int __test_uncore_pmu_event_aliases(struct perf_pmu_test_pmu *test_pmu) struct perf_pmu *pmu = &test_pmu->pmu; const char *pmu_name = pmu->name; struct perf_pmu_alias *a, *tmp, *alias; - const struct pmu_events_map *map; + struct pmu_events_map *map; LIST_HEAD(aliases); int res = 0; @@ -684,8 +608,6 @@ static struct perf_pmu_test_pmu test_pmus[] = { }, .aliases = { &unc_cbo_xsnp_response_miss_eviction, - &uncore_hyphen, - &uncore_two_hyph, }, }, { @@ -725,21 +647,10 @@ static struct perf_pmu_test_pmu test_pmus[] = { &sys_ddr_pmu_write_cycles, }, }, - { - .pmu = { - .name = (char *)"uncore_sys_ccn_pmu4", - .is_uncore = 1, - .id = (char *)"0x01", - }, - .aliases = { - &sys_ccn_pmu_read_cycles, - }, - }, }; /* Test that aliases generated are as expected */ -static int test__aliases(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +static int test_aliases(void) { struct perf_pmu *pmu = NULL; unsigned long i; @@ -795,7 +706,6 @@ static int check_parse_id(const char *id, struct parse_events_error *error, { struct evlist *evlist; int ret; - char *dup, *cur; /* Numbers are always valid. */ if (is_number(id)) @@ -804,28 +714,16 @@ static int check_parse_id(const char *id, struct parse_events_error *error, evlist = evlist__new(); if (!evlist) return -ENOMEM; - - dup = strdup(id); - if (!dup) - return -ENOMEM; - - for (cur = strchr(dup, '@') ; cur; cur = strchr(++cur, '@')) - *cur = '/'; - - ret = __parse_events(evlist, dup, error, fake_pmu); - free(dup); - + ret = __parse_events(evlist, id, error, fake_pmu); evlist__delete(evlist); return ret; } -static int check_parse_cpu(const char *id, bool same_cpu, const struct pmu_event *pe) +static int check_parse_cpu(const char *id, bool same_cpu, struct pmu_event *pe) { - struct parse_events_error error; - int ret; + struct parse_events_error error = { .idx = 0, }; - parse_events_error__init(&error); - ret = check_parse_id(id, &error, NULL); + int ret = check_parse_id(id, &error, NULL); if (ret && same_cpu) { pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n", pe->metric_name, id, pe->metric_expr); @@ -836,18 +734,22 @@ static int check_parse_cpu(const char *id, bool same_cpu, const struct pmu_event id, pe->metric_name, pe->metric_expr); ret = 0; } - parse_events_error__exit(&error); + free(error.str); + free(error.help); + free(error.first_str); + free(error.first_help); return ret; } static int check_parse_fake(const char *id) { - struct parse_events_error error; - int ret; + struct parse_events_error error = { .idx = 0, }; + int ret = check_parse_id(id, &error, &perf_pmu__fake); - parse_events_error__init(&error); - ret = check_parse_id(id, &error, &perf_pmu__fake); - parse_events_error__exit(&error); + free(error.str); + free(error.help); + free(error.first_str); + free(error.first_help); return ret; } @@ -868,7 +770,7 @@ struct metric { static int resolve_metric_simple(struct expr_parse_ctx *pctx, struct list_head *compound_list, - const struct pmu_events_map *map, + struct pmu_events_map *map, const char *metric_name) { struct hashmap_entry *cur, *cur_tmp; @@ -879,9 +781,9 @@ static int resolve_metric_simple(struct expr_parse_ctx *pctx, do { all = true; - hashmap__for_each_entry_safe(pctx->ids, cur, cur_tmp, bkt) { + hashmap__for_each_entry_safe((&pctx->ids), cur, cur_tmp, bkt) { struct metric_ref *ref; - const struct pmu_event *pe; + struct pmu_event *pe; pe = metricgroup__find_metric(cur->key, map); if (!pe) @@ -909,7 +811,7 @@ static int resolve_metric_simple(struct expr_parse_ctx *pctx, ref->metric_expr = pe->metric_expr; list_add_tail(&metric->list, compound_list); - rc = expr__find_ids(pe->metric_expr, NULL, pctx); + rc = expr__find_other(pe->metric_expr, NULL, pctx, 0); if (rc) goto out_err; break; /* The hashmap has been modified, so restart */ @@ -926,22 +828,16 @@ static int resolve_metric_simple(struct expr_parse_ctx *pctx, } -static int test__parsing(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +static int test_parsing(void) { - const struct pmu_events_map *cpus_map = pmu_events_map__find(); - const struct pmu_events_map *map; - const struct pmu_event *pe; + struct pmu_events_map *cpus_map = pmu_events_map__find(); + struct pmu_events_map *map; + struct pmu_event *pe; int i, j, k; int ret = 0; - struct expr_parse_ctx *ctx; + struct expr_parse_ctx ctx; double result; - ctx = expr__ctx_new(); - if (!ctx) { - pr_debug("expr__ctx_new failed"); - return TEST_FAIL; - } i = 0; for (;;) { map = &pmu_events_map[i++]; @@ -959,14 +855,15 @@ static int test__parsing(struct test_suite *test __maybe_unused, break; if (!pe->metric_expr) continue; - expr__ctx_clear(ctx); - if (expr__find_ids(pe->metric_expr, NULL, ctx) < 0) { - expr_failure("Parse find ids failed", map, pe); + expr__ctx_init(&ctx); + if (expr__find_other(pe->metric_expr, NULL, &ctx, 0) + < 0) { + expr_failure("Parse other failed", map, pe); ret++; continue; } - if (resolve_metric_simple(ctx, &compound_list, map, + if (resolve_metric_simple(&ctx, &compound_list, map, pe->metric_name)) { expr_failure("Could not resolve metrics", map, pe); ret++; @@ -979,27 +876,27 @@ static int test__parsing(struct test_suite *test __maybe_unused, * make them unique. */ k = 1; - hashmap__for_each_entry(ctx->ids, cur, bkt) - expr__add_id_val(ctx, strdup(cur->key), k++); + hashmap__for_each_entry((&ctx.ids), cur, bkt) + expr__add_id_val(&ctx, strdup(cur->key), k++); - hashmap__for_each_entry(ctx->ids, cur, bkt) { + hashmap__for_each_entry((&ctx.ids), cur, bkt) { if (check_parse_cpu(cur->key, map == cpus_map, pe)) ret++; } list_for_each_entry_safe(metric, tmp, &compound_list, list) { - expr__add_ref(ctx, &metric->metric_ref); + expr__add_ref(&ctx, &metric->metric_ref); free(metric); } - if (expr__parse(&result, ctx, pe->metric_expr)) { + if (expr__parse(&result, &ctx, pe->metric_expr, 0)) { expr_failure("Parse failed", map, pe); ret++; } + expr__ctx_clear(&ctx); } } - expr__ctx_free(ctx); /* TODO: fail when not ok */ exit: return ret == 0 ? TEST_OK : TEST_SKIP; @@ -1019,7 +916,7 @@ static struct test_metric metrics[] = { static int metric_parse_fake(const char *str) { - struct expr_parse_ctx *ctx; + struct expr_parse_ctx ctx; struct hashmap_entry *cur; double result; int ret = -1; @@ -1028,13 +925,9 @@ static int metric_parse_fake(const char *str) pr_debug("parsing '%s'\n", str); - ctx = expr__ctx_new(); - if (!ctx) { - pr_debug("expr__ctx_new failed"); - return TEST_FAIL; - } - if (expr__find_ids(str, NULL, ctx) < 0) { - pr_err("expr__find_ids failed\n"); + expr__ctx_init(&ctx); + if (expr__find_other(str, NULL, &ctx, 0) < 0) { + pr_err("expr__find_other failed\n"); return -1; } @@ -1044,23 +937,23 @@ static int metric_parse_fake(const char *str) * make them unique. */ i = 1; - hashmap__for_each_entry(ctx->ids, cur, bkt) - expr__add_id_val(ctx, strdup(cur->key), i++); + hashmap__for_each_entry((&ctx.ids), cur, bkt) + expr__add_id_val(&ctx, strdup(cur->key), i++); - hashmap__for_each_entry(ctx->ids, cur, bkt) { + hashmap__for_each_entry((&ctx.ids), cur, bkt) { if (check_parse_fake(cur->key)) { pr_err("check_parse_fake failed\n"); goto out; } } - if (expr__parse(&result, ctx, str)) + if (expr__parse(&result, &ctx, str, 0)) pr_err("expr__parse failed\n"); else ret = 0; out: - expr__ctx_free(ctx); + expr__ctx_clear(&ctx); return ret; } @@ -1069,11 +962,10 @@ static int metric_parse_fake(const char *str) * or all defined cpus via the 'fake_pmu' * in parse_events. */ -static int test__parsing_fake(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +static int test_parsing_fake(void) { - const struct pmu_events_map *map; - const struct pmu_event *pe; + struct pmu_events_map *map; + struct pmu_event *pe; unsigned int i, j; int err = 0; @@ -1104,16 +996,55 @@ static int test__parsing_fake(struct test_suite *test __maybe_unused, return 0; } -static struct test_case pmu_events_tests[] = { - TEST_CASE("PMU event table sanity", pmu_event_table), - TEST_CASE("PMU event map aliases", aliases), - TEST_CASE_REASON("Parsing of PMU event table metrics", parsing, - "some metrics failed"), - TEST_CASE("Parsing of PMU event table metrics with fake PMUs", parsing_fake), - { .name = NULL, } +static const struct { + int (*func)(void); + const char *desc; +} pmu_events_testcase_table[] = { + { + .func = test_pmu_event_table, + .desc = "PMU event table sanity", + }, + { + .func = test_aliases, + .desc = "PMU event map aliases", + }, + { + .func = test_parsing, + .desc = "Parsing of PMU event table metrics", + }, + { + .func = test_parsing_fake, + .desc = "Parsing of PMU event table metrics with fake PMUs", + }, }; -struct test_suite suite__pmu_events = { - .desc = "PMU events", - .test_cases = pmu_events_tests, -}; +const char *test__pmu_events_subtest_get_desc(int subtest) +{ + if (subtest < 0 || + subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) + return NULL; + return pmu_events_testcase_table[subtest].desc; +} + +const char *test__pmu_events_subtest_skip_reason(int subtest) +{ + if (subtest < 0 || + subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) + return NULL; + if (pmu_events_testcase_table[subtest].func != test_parsing) + return NULL; + return "some metrics failed"; +} + +int test__pmu_events_subtest_get_nr(void) +{ + return (int)ARRAY_SIZE(pmu_events_testcase_table); +} + +int test__pmu_events(struct test *test __maybe_unused, int subtest) +{ + if (subtest < 0 || + subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) + return TEST_FAIL; + return pmu_events_testcase_table[subtest].func(); +} diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 8507bd615e..714e6830a7 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -137,7 +137,7 @@ static struct list_head *test_terms_list(void) return &terms; } -static int test__pmu(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused) { char *format = test_format_dir_get(); LIST_HEAD(formats); @@ -177,5 +177,3 @@ static int test__pmu(struct test_suite *test __maybe_unused, int subtest __maybe test_format_dir_put(format); return ret; } - -DEFINE_SUITE("Parse perf pmu format", pmu); diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c index 6b990ee385..98c6d474aa 100644 --- a/tools/perf/tests/python-use.c +++ b/tools/perf/tests/python-use.c @@ -9,7 +9,7 @@ #include "tests.h" #include "util/debug.h" -static int test__python_use(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused) { char *cmd; int ret; @@ -23,5 +23,3 @@ static int test__python_use(struct test_suite *test __maybe_unused, int subtest free(cmd); return ret; } - -DEFINE_SUITE("'import perf' in python", python_use); diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 07f2411b0a..8fd8a4ef97 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -13,7 +13,6 @@ #include "evsel.h" #include "debug.h" #include "util/synthetic-events.h" -#include "util/trace-event.h" #include "tests.h" @@ -31,18 +30,9 @@ } \ } while (0) -/* - * Hardcode the expected values for branch_entry flags. - * These are based on the input value (213) specified - * in branch_stack variable. - */ -#define BS_EXPECTED_BE 0xa000d00000000000 -#define BS_EXPECTED_LE 0xd5000000 -#define FLAG(s) s->branch_stack->entries[i].flags - static bool samples_same(const struct perf_sample *s1, const struct perf_sample *s2, - u64 type, u64 read_format, bool needs_swap) + u64 type, u64 read_format) { size_t i; @@ -110,14 +100,8 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_BRANCH_STACK) { COMP(branch_stack->nr); COMP(branch_stack->hw_idx); - for (i = 0; i < s1->branch_stack->nr; i++) { - if (needs_swap) - return ((tep_is_bigendian()) ? - (FLAG(s2).value == BS_EXPECTED_BE) : - (FLAG(s2).value == BS_EXPECTED_LE)); - else - MCOMP(branch_stack->entries[i]); - } + for (i = 0; i < s1->branch_stack->nr; i++) + MCOMP(branch_stack->entries[i]); } if (type & PERF_SAMPLE_REGS_USER) { @@ -264,7 +248,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) }, }; struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},}; - struct perf_sample sample_out, sample_out_endian; + struct perf_sample sample_out; size_t i, sz, bufsz; int err, ret = -1; @@ -329,29 +313,12 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) goto out_free; } - if (!samples_same(&sample, &sample_out, sample_type, read_format, evsel.needs_swap)) { + if (!samples_same(&sample, &sample_out, sample_type, read_format)) { pr_debug("parsing failed for sample_type %#"PRIx64"\n", sample_type); goto out_free; } - if (sample_type == PERF_SAMPLE_BRANCH_STACK) { - evsel.needs_swap = true; - evsel.sample_size = __evsel__sample_size(sample_type); - err = evsel__parse_sample(&evsel, event, &sample_out_endian); - if (err) { - pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", - "evsel__parse_sample", sample_type, err); - goto out_free; - } - - if (!samples_same(&sample, &sample_out_endian, sample_type, read_format, evsel.needs_swap)) { - pr_debug("parsing failed for sample_type %#"PRIx64"\n", - sample_type); - goto out_free; - } - } - ret = 0; out_free: free(event); @@ -368,7 +335,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) * checks sample format bits separately and together. If the test passes %0 is * returned, otherwise %-1 is returned. */ -static int test__sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused) { const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15}; u64 sample_type; @@ -426,5 +393,3 @@ static int test__sample_parsing(struct test_suite *test __maybe_unused, int subt return 0; } - -DEFINE_SUITE("Sample parsing", sample_parsing); diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 9197128992..ed76c693f6 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -76,7 +76,7 @@ static int search_cached_probe(const char *target, return ret; } -static int test__sdt_event(struct test_suite *test __maybe_unused, int subtests __maybe_unused) +int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused) { int ret = TEST_FAIL; char __tempdir[] = "./test-buildid-XXXXXX"; @@ -114,11 +114,9 @@ static int test__sdt_event(struct test_suite *test __maybe_unused, int subtests return ret; } #else -static int test__sdt_event(struct test_suite *test __maybe_unused, int subtests __maybe_unused) +int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused) { pr_debug("Skip SDT event test because SDT support is not compiled\n"); return TEST_SKIP; } #endif - -DEFINE_SUITE("Probe SDT events", sdt_event); diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh index 13473aeba4..2aed20dc22 100644 --- a/tools/perf/tests/shell/stat_bpf_counters.sh +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -23,7 +23,7 @@ compare_number() # skip if --bpf-counters is not supported if ! perf stat --bpf-counters true > /dev/null 2>&1; then - if [ "$1" = "-v" ]; then + if [ "$1" == "-v" ]; then echo "Skipping: --bpf-counters not supported" perf --no-pager stat --bpf-counters true || true fi diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 6de53b7ef5..c9eef0bba6 100644 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -9,6 +9,8 @@ # SPDX-License-Identifier: GPL-2.0 # Leo Yan , 2020 +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +file=$(mktemp /tmp/temporary_file.XXXXX) glb_err=0 skip_if_no_cs_etm_event() { @@ -20,20 +22,13 @@ skip_if_no_cs_etm_event() { skip_if_no_cs_etm_event || exit 2 -perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) -file=$(mktemp /tmp/temporary_file.XXXXX) - cleanup_files() { rm -f ${perfdata} rm -f ${file} - rm -f "${perfdata}.old" - trap - exit term int - kill -2 $$ - exit $glb_err } -trap cleanup_files exit term int +trap cleanup_files exit record_touch_file() { echo "Recording trace (only user mode) with path: CPU$2 => $1" diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c index 500974040f..c1911501c3 100644 --- a/tools/perf/tests/stat.c +++ b/tools/perf/tests/stat.c @@ -47,8 +47,7 @@ static int process_stat_config_event(struct perf_tool *tool __maybe_unused, return 0; } -static int test__synthesize_stat_config(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__synthesize_stat_config(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_stat_config stat_config = { .aggr_mode = AGGR_CORE, @@ -78,7 +77,7 @@ static int process_stat_event(struct perf_tool *tool __maybe_unused, return 0; } -static int test__synthesize_stat(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__synthesize_stat(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_counts_values count; @@ -87,8 +86,7 @@ static int test__synthesize_stat(struct test_suite *test __maybe_unused, int sub count.run = 300; TEST_ASSERT_VAL("failed to synthesize stat_config", - !perf_event__synthesize_stat(NULL, (struct perf_cpu){.cpu = 1}, 2, 3, - &count, process_stat_event, NULL)); + !perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL)); return 0; } @@ -105,7 +103,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, return 0; } -static int test__synthesize_stat_round(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__synthesize_stat_round(struct test *test __maybe_unused, int subtest __maybe_unused) { TEST_ASSERT_VAL("failed to synthesize stat_config", !perf_event__synthesize_stat_round(NULL, 0xdeadbeef, PERF_STAT_ROUND_TYPE__INTERVAL, @@ -113,7 +111,3 @@ static int test__synthesize_stat_round(struct test_suite *test __maybe_unused, i return 0; } - -DEFINE_SUITE("Synthesize stat config", synthesize_stat_config); -DEFINE_SUITE("Synthesize stat", synthesize_stat); -DEFINE_SUITE("Synthesize stat round", synthesize_stat_round); diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 9cd6fec375..74988846be 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -133,7 +133,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) return err; } -static int test__sw_clock_freq(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__sw_clock_freq(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret; @@ -143,5 +143,3 @@ static int test__sw_clock_freq(struct test_suite *test __maybe_unused, int subte return ret; } - -DEFINE_SUITE("Software clock events period values", sw_clock_freq); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 0c0c2328bf..62c0ec21aa 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -321,7 +321,7 @@ static int process_events(struct evlist *evlist, * evsel->core.system_wide and evsel->tracking flags (respectively) with other events * sometimes enabled or disabled. */ -static int test__switch_tracking(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused) { const char *sched_switch = "sched:sched_switch"; struct switch_tracking switch_tracking = { .tids = NULL, }; @@ -588,5 +588,3 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub err = -1; goto out; } - -DEFINE_SUITE("Track with sched_switch", switch_tracking); diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 25f075fa91..4c2969db59 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -39,7 +39,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, * if the number of exit event reported by the kernel is 1 or not * in order to check the kernel returns correct number of event. */ -static int test__task_exit(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1; union perf_event *event; @@ -151,5 +151,3 @@ static int test__task_exit(struct test_suite *test __maybe_unused, int subtest _ evlist__delete(evlist); return err; } - -DEFINE_SUITE("Number of exit events of a simple workload", task_exit); diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 5bbb8f6a48..fe1306f584 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -27,147 +27,112 @@ enum { TEST_SKIP = -2, }; -struct test_suite; - -typedef int (*test_fnptr)(struct test_suite *, int); - -struct test_case { - const char *name; +struct test { const char *desc; - const char *skip_reason; - test_fnptr run_case; -}; - -struct test_suite { - const char *desc; - struct test_case *test_cases; + int (*func)(struct test *test, int subtest); + struct { + bool skip_if_fail; + int (*get_nr)(void); + const char *(*get_desc)(int subtest); + const char *(*skip_reason)(int subtest); + } subtest; + bool (*is_supported)(void); void *priv; }; -#define DECLARE_SUITE(name) \ - extern struct test_suite suite__##name; - -#define TEST_CASE(description, _name) \ - { \ - .name = #_name, \ - .desc = description, \ - .run_case = test__##_name, \ - } - -#define TEST_CASE_REASON(description, _name, _reason) \ - { \ - .name = #_name, \ - .desc = description, \ - .run_case = test__##_name, \ - .skip_reason = _reason, \ - } - -#define DEFINE_SUITE(description, _name) \ - struct test_case tests__##_name[] = { \ - TEST_CASE(description, _name), \ - { .name = NULL, } \ - }; \ - struct test_suite suite__##_name = { \ - .desc = description, \ - .test_cases = tests__##_name, \ - } - /* Tests */ -DECLARE_SUITE(vmlinux_matches_kallsyms); -DECLARE_SUITE(openat_syscall_event); -DECLARE_SUITE(openat_syscall_event_on_all_cpus); -DECLARE_SUITE(basic_mmap); -DECLARE_SUITE(PERF_RECORD); -DECLARE_SUITE(perf_evsel__roundtrip_name_test); -DECLARE_SUITE(perf_evsel__tp_sched_test); -DECLARE_SUITE(syscall_openat_tp_fields); -DECLARE_SUITE(pmu); -DECLARE_SUITE(pmu_events); -DECLARE_SUITE(attr); -DECLARE_SUITE(dso_data); -DECLARE_SUITE(dso_data_cache); -DECLARE_SUITE(dso_data_reopen); -DECLARE_SUITE(parse_events); -DECLARE_SUITE(hists_link); -DECLARE_SUITE(python_use); -DECLARE_SUITE(bp_signal); -DECLARE_SUITE(bp_signal_overflow); -DECLARE_SUITE(bp_accounting); -DECLARE_SUITE(wp); -DECLARE_SUITE(task_exit); -DECLARE_SUITE(mem); -DECLARE_SUITE(sw_clock_freq); -DECLARE_SUITE(code_reading); -DECLARE_SUITE(sample_parsing); -DECLARE_SUITE(keep_tracking); -DECLARE_SUITE(parse_no_sample_id_all); -DECLARE_SUITE(dwarf_unwind); -DECLARE_SUITE(expr); -DECLARE_SUITE(hists_filter); -DECLARE_SUITE(mmap_thread_lookup); -DECLARE_SUITE(thread_maps_share); -DECLARE_SUITE(hists_output); -DECLARE_SUITE(hists_cumulate); -DECLARE_SUITE(switch_tracking); -DECLARE_SUITE(fdarray__filter); -DECLARE_SUITE(fdarray__add); -DECLARE_SUITE(kmod_path__parse); -DECLARE_SUITE(thread_map); -DECLARE_SUITE(llvm); -DECLARE_SUITE(bpf); -DECLARE_SUITE(session_topology); -DECLARE_SUITE(thread_map_synthesize); -DECLARE_SUITE(thread_map_remove); -DECLARE_SUITE(cpu_map_synthesize); -DECLARE_SUITE(synthesize_stat_config); -DECLARE_SUITE(synthesize_stat); -DECLARE_SUITE(synthesize_stat_round); -DECLARE_SUITE(event_update); -DECLARE_SUITE(event_times); -DECLARE_SUITE(backward_ring_buffer); -DECLARE_SUITE(cpu_map_print); -DECLARE_SUITE(cpu_map_merge); -DECLARE_SUITE(sdt_event); -DECLARE_SUITE(is_printable_array); -DECLARE_SUITE(bitmap_print); -DECLARE_SUITE(perf_hooks); -DECLARE_SUITE(clang); -DECLARE_SUITE(unit_number__scnprint); -DECLARE_SUITE(mem2node); -DECLARE_SUITE(maps__merge_in); -DECLARE_SUITE(time_utils); -DECLARE_SUITE(jit_write_elf); -DECLARE_SUITE(api_io); -DECLARE_SUITE(demangle_java); -DECLARE_SUITE(demangle_ocaml); -DECLARE_SUITE(pfm); -DECLARE_SUITE(parse_metric); -DECLARE_SUITE(pe_file_parsing); -DECLARE_SUITE(expand_cgroup_events); -DECLARE_SUITE(perf_time_to_tsc); -DECLARE_SUITE(dlfilter); -DECLARE_SUITE(sigtrap); +int test__vmlinux_matches_kallsyms(struct test *test, int subtest); +int test__openat_syscall_event(struct test *test, int subtest); +int test__openat_syscall_event_on_all_cpus(struct test *test, int subtest); +int test__basic_mmap(struct test *test, int subtest); +int test__PERF_RECORD(struct test *test, int subtest); +int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest); +int test__perf_evsel__tp_sched_test(struct test *test, int subtest); +int test__syscall_openat_tp_fields(struct test *test, int subtest); +int test__pmu(struct test *test, int subtest); +int test__pmu_events(struct test *test, int subtest); +const char *test__pmu_events_subtest_get_desc(int subtest); +const char *test__pmu_events_subtest_skip_reason(int subtest); +int test__pmu_events_subtest_get_nr(void); +int test__attr(struct test *test, int subtest); +int test__dso_data(struct test *test, int subtest); +int test__dso_data_cache(struct test *test, int subtest); +int test__dso_data_reopen(struct test *test, int subtest); +int test__parse_events(struct test *test, int subtest); +int test__hists_link(struct test *test, int subtest); +int test__python_use(struct test *test, int subtest); +int test__bp_signal(struct test *test, int subtest); +int test__bp_signal_overflow(struct test *test, int subtest); +int test__bp_accounting(struct test *test, int subtest); +int test__wp(struct test *test, int subtest); +const char *test__wp_subtest_get_desc(int subtest); +const char *test__wp_subtest_skip_reason(int subtest); +int test__wp_subtest_get_nr(void); +int test__task_exit(struct test *test, int subtest); +int test__mem(struct test *test, int subtest); +int test__sw_clock_freq(struct test *test, int subtest); +int test__code_reading(struct test *test, int subtest); +int test__sample_parsing(struct test *test, int subtest); +int test__keep_tracking(struct test *test, int subtest); +int test__parse_no_sample_id_all(struct test *test, int subtest); +int test__dwarf_unwind(struct test *test, int subtest); +int test__expr(struct test *test, int subtest); +int test__hists_filter(struct test *test, int subtest); +int test__mmap_thread_lookup(struct test *test, int subtest); +int test__thread_maps_share(struct test *test, int subtest); +int test__hists_output(struct test *test, int subtest); +int test__hists_cumulate(struct test *test, int subtest); +int test__switch_tracking(struct test *test, int subtest); +int test__fdarray__filter(struct test *test, int subtest); +int test__fdarray__add(struct test *test, int subtest); +int test__kmod_path__parse(struct test *test, int subtest); +int test__thread_map(struct test *test, int subtest); +int test__llvm(struct test *test, int subtest); +const char *test__llvm_subtest_get_desc(int subtest); +int test__llvm_subtest_get_nr(void); +int test__bpf(struct test *test, int subtest); +const char *test__bpf_subtest_get_desc(int subtest); +int test__bpf_subtest_get_nr(void); +int test__session_topology(struct test *test, int subtest); +int test__thread_map_synthesize(struct test *test, int subtest); +int test__thread_map_remove(struct test *test, int subtest); +int test__cpu_map_synthesize(struct test *test, int subtest); +int test__synthesize_stat_config(struct test *test, int subtest); +int test__synthesize_stat(struct test *test, int subtest); +int test__synthesize_stat_round(struct test *test, int subtest); +int test__event_update(struct test *test, int subtest); +int test__event_times(struct test *test, int subtest); +int test__backward_ring_buffer(struct test *test, int subtest); +int test__cpu_map_print(struct test *test, int subtest); +int test__cpu_map_merge(struct test *test, int subtest); +int test__sdt_event(struct test *test, int subtest); +int test__is_printable_array(struct test *test, int subtest); +int test__bitmap_print(struct test *test, int subtest); +int test__perf_hooks(struct test *test, int subtest); +int test__clang(struct test *test, int subtest); +const char *test__clang_subtest_get_desc(int subtest); +int test__clang_subtest_get_nr(void); +int test__unit_number__scnprint(struct test *test, int subtest); +int test__mem2node(struct test *t, int subtest); +int test__maps__merge_in(struct test *t, int subtest); +int test__time_utils(struct test *t, int subtest); +int test__jit_write_elf(struct test *test, int subtest); +int test__api_io(struct test *test, int subtest); +int test__demangle_java(struct test *test, int subtest); +int test__demangle_ocaml(struct test *test, int subtest); +int test__pfm(struct test *test, int subtest); +const char *test__pfm_subtest_get_desc(int subtest); +int test__pfm_subtest_get_nr(void); +int test__parse_metric(struct test *test, int subtest); +int test__pe_file_parsing(struct test *test, int subtest); +int test__expand_cgroup_events(struct test *test, int subtest); +int test__perf_time_to_tsc(struct test *test, int subtest); +int test__dlfilter(struct test *test, int subtest); -/* - * PowerPC and S390 do not support creation of instruction breakpoints using the - * perf_event interface. - * - * ARM requires explicit rounding down of the instruction pointer in Thumb mode, - * and then requires the single-step to be handled explicitly in the overflow - * handler to avoid stepping into the SIGIO handler and getting stuck on the - * breakpointed instruction. - * - * Since arm64 has the same issue with arm for the single-step handling, this - * case also gets stuck on the breakpointed instruction. - * - * Just disable the test for these architectures until these issues are - * resolved. - */ -#if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) || defined(__aarch64__) -#define BP_SIGNAL_IS_SUPPORTED 0 -#else -#define BP_SIGNAL_IS_SUPPORTED 1 -#endif +bool test__bp_signal_is_supported(void); +bool test__bp_account_is_supported(void); +bool test__wp_is_supported(void); +bool test__tsc_is_supported(void); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; @@ -177,7 +142,7 @@ int test__arch_unwind_sample(struct perf_sample *sample, #endif #if defined(__arm__) -DECLARE_SUITE(vectors_page); +int test__vectors_page(struct test *test, int subtest); #endif #endif /* TESTS_H */ diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index e413c1387f..d1e208b4a5 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -19,7 +19,7 @@ struct machine; #define NAME (const char *) "perf" #define NAMEUL (unsigned long) NAME -static int test__thread_map(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__thread_map(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_thread_map *map; @@ -86,7 +86,7 @@ static int process_event(struct perf_tool *tool __maybe_unused, return 0; } -static int test__thread_map_synthesize(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_thread_map *threads; @@ -106,7 +106,7 @@ static int test__thread_map_synthesize(struct test_suite *test __maybe_unused, i return 0; } -static int test__thread_map_remove(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__thread_map_remove(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_thread_map *threads; char *str; @@ -145,7 +145,3 @@ static int test__thread_map_remove(struct test_suite *test __maybe_unused, int s perf_thread_map__put(threads); return 0; } - -DEFINE_SUITE("Thread map", thread_map); -DEFINE_SUITE("Synthesize thread map", thread_map_synthesize); -DEFINE_SUITE("Remove thread map", thread_map_remove); diff --git a/tools/perf/tests/thread-maps-share.c b/tools/perf/tests/thread-maps-share.c index 84edd82c51..9371484973 100644 --- a/tools/perf/tests/thread-maps-share.c +++ b/tools/perf/tests/thread-maps-share.c @@ -4,7 +4,7 @@ #include "thread.h" #include "debug.h" -static int test__thread_maps_share(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__thread_maps_share(struct test *test __maybe_unused, int subtest __maybe_unused) { struct machines machines; struct machine *machine; @@ -96,5 +96,3 @@ static int test__thread_maps_share(struct test_suite *test __maybe_unused, int s machines__exit(&machines); return 0; } - -DEFINE_SUITE("Share thread maps", thread_maps_share); diff --git a/tools/perf/tests/time-utils-test.c b/tools/perf/tests/time-utils-test.c index 38df10373c..fe57ca3b6e 100644 --- a/tools/perf/tests/time-utils-test.c +++ b/tools/perf/tests/time-utils-test.c @@ -131,7 +131,7 @@ static bool test__perf_time__parse_for_ranges(struct test_data *d) return pass; } -static int test__time_utils(struct test_suite *t __maybe_unused, int subtest __maybe_unused) +int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused) { bool pass = true; @@ -249,5 +249,3 @@ static int test__time_utils(struct test_suite *t __maybe_unused, int subtest __m return pass ? 0 : TEST_FAIL; } - -DEFINE_SUITE("time utils", time_utils); diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index ee1e3dcbc0..b9028e304d 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -49,9 +49,7 @@ static int session_write_header(char *path) session->evlist = evlist__new(); TEST_ASSERT_VAL("can't get evlist", session->evlist); - parse_events_error__init(&err); parse_events(session->evlist, "cpu_core/cycles/", &err); - parse_events_error__exit(&err); } perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); @@ -112,88 +110,62 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu); for (i = 0; i < session->header.env.nr_cpus_avail; i++) { - struct perf_cpu cpu = { .cpu = i }; - - if (!perf_cpu_map__has(map, cpu)) + if (!cpu_map__has(map, i)) continue; pr_debug("CPU %d, core %d, socket %d\n", i, session->header.env.cpu[i].core_id, session->header.env.cpu[i].socket_id); } - // Test that CPU ID contains socket, die, core and CPU - for (i = 0; i < perf_cpu_map__nr(map); i++) { - id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL); - TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match", - perf_cpu_map__cpu(map, i).cpu == id.cpu.cpu); - - TEST_ASSERT_VAL("Cpu map - Core ID doesn't match", - session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core); - TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match", - session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == - id.socket); - - TEST_ASSERT_VAL("Cpu map - Die ID doesn't match", - session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die); - TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1); - TEST_ASSERT_VAL("Cpu map - Thread is set", id.thread == -1); - } - // Test that core ID contains socket, die and core - for (i = 0; i < perf_cpu_map__nr(map); i++) { - id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL); + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_core(map, i, NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", - session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core); + session->header.env.cpu[map->map[i]].core_id == id.core); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", - session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == - id.socket); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Core map - Die ID doesn't match", - session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die); + session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1); } // Test that die ID contains socket and die - for (i = 0; i < perf_cpu_map__nr(map); i++) { - id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL); + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_die(map, i, NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", - session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == - id.socket); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Die map - Die ID doesn't match", - session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die); + session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); - TEST_ASSERT_VAL("Die map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1); } // Test that socket ID contains only socket - for (i = 0; i < perf_cpu_map__nr(map); i++) { - id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL); + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_socket(map, i, NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", - session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == - id.socket); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1); - TEST_ASSERT_VAL("Socket map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1); } // Test that node ID contains only node - for (i = 0; i < perf_cpu_map__nr(map); i++) { - id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL); + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_node(map, i, NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", - cpu__get_node(perf_cpu_map__cpu(map, i)) == id.node); + cpu__get_node(map->map[i]) == id.node); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Node map - Core is set", id.core == -1); - TEST_ASSERT_VAL("Node map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1); } perf_session__delete(session); @@ -201,7 +173,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) return 0; } -static int test__session_topology(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +int test__session_topology(struct test *test __maybe_unused, int subtest __maybe_unused) { char path[PATH_MAX]; struct perf_cpu_map *map; @@ -227,5 +199,3 @@ static int test__session_topology(struct test_suite *test __maybe_unused, int su unlink(path); return ret; } - -DEFINE_SUITE("Session topology", session_topology); diff --git a/tools/perf/tests/unit_number__scnprintf.c b/tools/perf/tests/unit_number__scnprintf.c index 88bcada1c7..3721757435 100644 --- a/tools/perf/tests/unit_number__scnprintf.c +++ b/tools/perf/tests/unit_number__scnprintf.c @@ -7,7 +7,7 @@ #include "units.h" #include "debug.h" -static int test__unit_number__scnprint(struct test_suite *t __maybe_unused, int subtest __maybe_unused) +int test__unit_number__scnprint(struct test *t __maybe_unused, int subtest __maybe_unused) { struct { u64 n; @@ -38,5 +38,3 @@ static int test__unit_number__scnprint(struct test_suite *t __maybe_unused, int return TEST_OK; } - -DEFINE_SUITE("unit_number__scnprintf", unit_number__scnprint); diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index e80df13c04..193b7c91b4 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include "dso.h" #include "map.h" @@ -15,104 +14,7 @@ #define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x)) -static bool is_ignored_symbol(const char *name, char type) -{ - /* Symbol names that exactly match to the following are ignored.*/ - static const char * const ignored_symbols[] = { - /* - * Symbols which vary between passes. Passes 1 and 2 must have - * identical symbol lists. The kallsyms_* symbols below are - * only added after pass 1, they would be included in pass 2 - * when --all-symbols is specified so exclude them to get a - * stable symbol list. - */ - "kallsyms_addresses", - "kallsyms_offsets", - "kallsyms_relative_base", - "kallsyms_num_syms", - "kallsyms_names", - "kallsyms_markers", - "kallsyms_token_table", - "kallsyms_token_index", - /* Exclude linker generated symbols which vary between passes */ - "_SDA_BASE_", /* ppc */ - "_SDA2_BASE_", /* ppc */ - NULL - }; - - /* Symbol names that begin with the following are ignored.*/ - static const char * const ignored_prefixes[] = { - "$", /* local symbols for ARM, MIPS, etc. */ - ".LASANPC", /* s390 kasan local symbols */ - "__crc_", /* modversions */ - "__efistub_", /* arm64 EFI stub namespace */ - "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ - "__AArch64ADRPThunk_", /* arm64 lld */ - "__ARMV5PILongThunk_", /* arm lld */ - "__ARMV7PILongThunk_", - "__ThumbV7PILongThunk_", - "__LA25Thunk_", /* mips lld */ - "__microLA25Thunk_", - NULL - }; - - /* Symbol names that end with the following are ignored.*/ - static const char * const ignored_suffixes[] = { - "_from_arm", /* arm */ - "_from_thumb", /* arm */ - "_veneer", /* arm */ - NULL - }; - - /* Symbol names that contain the following are ignored.*/ - static const char * const ignored_matches[] = { - ".long_branch.", /* ppc stub */ - ".plt_branch.", /* ppc stub */ - NULL - }; - - const char * const *p; - - for (p = ignored_symbols; *p; p++) - if (!strcmp(name, *p)) - return true; - - for (p = ignored_prefixes; *p; p++) - if (!strncmp(name, *p, strlen(*p))) - return true; - - for (p = ignored_suffixes; *p; p++) { - int l = strlen(name) - strlen(*p); - - if (l >= 0 && !strcmp(name + l, *p)) - return true; - } - - for (p = ignored_matches; *p; p++) { - if (strstr(name, *p)) - return true; - } - - if (type == 'U' || type == 'u') - return true; - /* exclude debugging symbols */ - if (type == 'N' || type == 'n') - return true; - - if (toupper(type) == 'A') { - /* Keep these useful absolute symbols */ - if (strcmp(name, "__kernel_syscall_via_break") && - strcmp(name, "__kernel_syscall_via_epc") && - strcmp(name, "__kernel_sigtramp") && - strcmp(name, "__gp")) - return true; - } - - return false; -} - -static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1; struct rb_node *nd; @@ -267,11 +169,6 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * such as __indirect_thunk_end. */ continue; - } else if (is_ignored_symbol(sym->name, sym->type)) { - /* - * Ignore hidden symbols, see scripts/kallsyms.c for the details - */ - continue; } else { pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n", mem_start, sym->name); @@ -353,5 +250,3 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused machine__exit(&vmlinux); return err; } - -DEFINE_SUITE("vmlinux symtab matches kallsyms", vmlinux_matches_kallsyms); diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c index 9d4c45184e..9387fa76fa 100644 --- a/tools/perf/tests/wp.c +++ b/tools/perf/tests/wp.c @@ -21,7 +21,6 @@ do { \ volatile u64 data1; volatile u8 data2[3]; -#ifndef __s390x__ static int wp_read(int fd, long long *count, int size) { int ret = read(fd, count, size); @@ -62,14 +61,9 @@ static int __event(int wp_type, void *wp_addr, unsigned long wp_len) return fd; } -#endif -static int test__wp_ro(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +static int wp_ro_test(void) { -#if defined(__s390x__) || defined(__x86_64__) || defined(__i386__) - return TEST_SKIP; -#else int fd; unsigned long tmp, tmp1 = rand(); @@ -85,15 +79,10 @@ static int test__wp_ro(struct test_suite *test __maybe_unused, close(fd); return 0; -#endif } -static int test__wp_wo(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +static int wp_wo_test(void) { -#if defined(__s390x__) - return TEST_SKIP; -#else int fd; unsigned long tmp, tmp1 = rand(); @@ -109,15 +98,10 @@ static int test__wp_wo(struct test_suite *test __maybe_unused, close(fd); return 0; -#endif } -static int test__wp_rw(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +static int wp_rw_test(void) { -#if defined(__s390x__) - return TEST_SKIP; -#else int fd; unsigned long tmp, tmp1 = rand(); @@ -134,15 +118,10 @@ static int test__wp_rw(struct test_suite *test __maybe_unused, close(fd); return 0; -#endif } -static int test__wp_modify(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +static int wp_modify_test(void) { -#if defined(__s390x__) - return TEST_SKIP; -#else int fd, ret; unsigned long tmp = rand(); struct perf_event_attr new_attr; @@ -184,18 +163,93 @@ static int test__wp_modify(struct test_suite *test __maybe_unused, close(fd); return 0; +} + +static bool wp_ro_supported(void) +{ +#if defined (__x86_64__) || defined (__i386__) + return false; +#else + return true; #endif } -static struct test_case wp_tests[] = { - TEST_CASE_REASON("Read Only Watchpoint", wp_ro, "missing hardware support"), - TEST_CASE_REASON("Write Only Watchpoint", wp_wo, "missing hardware support"), - TEST_CASE_REASON("Read / Write Watchpoint", wp_rw, "missing hardware support"), - TEST_CASE_REASON("Modify Watchpoint", wp_modify, "missing hardware support"), - { .name = NULL, } +static const char *wp_ro_skip_msg(void) +{ +#if defined (__x86_64__) || defined (__i386__) + return "missing hardware support"; +#else + return NULL; +#endif +} + +static struct { + const char *desc; + int (*target_func)(void); + bool (*is_supported)(void); + const char *(*skip_msg)(void); +} wp_testcase_table[] = { + { + .desc = "Read Only Watchpoint", + .target_func = &wp_ro_test, + .is_supported = &wp_ro_supported, + .skip_msg = &wp_ro_skip_msg, + }, + { + .desc = "Write Only Watchpoint", + .target_func = &wp_wo_test, + }, + { + .desc = "Read / Write Watchpoint", + .target_func = &wp_rw_test, + }, + { + .desc = "Modify Watchpoint", + .target_func = &wp_modify_test, + }, }; -struct test_suite suite__wp = { - .desc = "Watchpoint", - .test_cases = wp_tests, -}; +int test__wp_subtest_get_nr(void) +{ + return (int)ARRAY_SIZE(wp_testcase_table); +} + +const char *test__wp_subtest_get_desc(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) + return NULL; + return wp_testcase_table[i].desc; +} + +const char *test__wp_subtest_skip_reason(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) + return NULL; + if (!wp_testcase_table[i].skip_msg) + return NULL; + return wp_testcase_table[i].skip_msg(); +} + +int test__wp(struct test *test __maybe_unused, int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) + return TEST_FAIL; + + if (wp_testcase_table[i].is_supported && + !wp_testcase_table[i].is_supported()) + return TEST_SKIP; + + return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL; +} + +/* The s390 so far does not have support for + * instruction breakpoint using the perf_event_open() system call. + */ +bool test__wp_is_supported(void) +{ +#if defined(__s390x__) + return false; +#else + return true; +#endif +} diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index f527a46ab4..d6dfe68a76 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -62,8 +62,6 @@ size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_ extern struct strarray strarray__socket_families; -extern struct strarray strarray__socket_level; - /** * augmented_arg: extra payload for syscall pointer arguments @@ -232,9 +230,6 @@ size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg); #define SCA_SK_PROTO syscall_arg__scnprintf_socket_protocol -size_t syscall_arg__scnprintf_socket_level(char *bf, size_t size, struct syscall_arg *arg); -#define SCA_SK_LEVEL syscall_arg__scnprintf_socket_level - size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 8ef26d89ef..041d6032a3 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -364,8 +364,6 @@ struct ucred { #define SOL_KCM 281 #define SOL_TLS 282 #define SOL_XDP 283 -#define SOL_MPTCP 284 -#define SOL_MCTP 285 /* IPX options */ #define IPX_TYPE 1 diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh index 3d278785fe..3109d7b05e 100644 --- a/tools/perf/trace/beauty/prctl_option.sh +++ b/tools/perf/trace/beauty/prctl_option.sh @@ -4,7 +4,7 @@ [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *prctl_options[] = {\n" -regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$' +regex='^#define[[:space:]]+PR_(\w+)[[:space:]]*([[:xdigit:]]+).*' egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \ sed -r "s/$regex/\2 \1/g" | \ sort -n | xargs printf "\t[%s] = \"%s\",\n" diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c index 2e0e867c0c..cd110634ab 100644 --- a/tools/perf/trace/beauty/sockaddr.c +++ b/tools/perf/trace/beauty/sockaddr.c @@ -7,7 +7,7 @@ #include #include -#include "trace/beauty/generated/sockaddr.c" +#include "trace/beauty/generated/socket_arrays.c" DEFINE_STRARRAY(socket_families, "PF_"); static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size) diff --git a/tools/perf/trace/beauty/socket.c b/tools/perf/trace/beauty/socket.c index b0870c7b48..f23a3dda29 100644 --- a/tools/perf/trace/beauty/socket.c +++ b/tools/perf/trace/beauty/socket.c @@ -9,10 +9,9 @@ #include #include -#include "trace/beauty/generated/socket.c" - static size_t socket__scnprintf_ipproto(int protocol, char *bf, size_t size, bool show_prefix) { +#include "trace/beauty/generated/socket_ipproto_array.c" static DEFINE_STRARRAY(socket_ipproto, "IPPROTO_"); return strarray__scnprintf(&strarray__socket_ipproto, bf, size, "%d", show_prefix, protocol); @@ -27,21 +26,3 @@ size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct sysc return syscall_arg__scnprintf_int(bf, size, arg); } - -static size_t socket__scnprintf_level(int level, char *bf, size_t size, bool show_prefix) -{ -#if defined(__alpha__) || defined(__hppa__) || defined(__mips__) || defined(__sparc__) - const int sol_socket = 0xffff; -#else - const int sol_socket = 1; -#endif - if (level == sol_socket) - return scnprintf(bf, size, "%sSOCKET", show_prefix ? "SOL_" : ""); - - return strarray__scnprintf(&strarray__socket_level, bf, size, "%d", show_prefix, level); -} - -size_t syscall_arg__scnprintf_socket_level(char *bf, size_t size, struct syscall_arg *arg) -{ - return socket__scnprintf_level(arg->val, bf, size, arg->show_string_prefix); -} diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh index 76330acb27..3820e5c822 100644 --- a/tools/perf/trace/beauty/socket.sh +++ b/tools/perf/trace/beauty/socket.sh @@ -1,28 +1,24 @@ #!/bin/sh # SPDX-License-Identifier: LGPL-2.1 -if [ $# -gt 0 ] ; then - uapi_header_dir=$1 - beauty_header_dir=$2 -else - uapi_header_dir=tools/include/uapi/linux/ - beauty_header_dir=tools/perf/trace/beauty/include/linux/ -fi +# This one uses a copy from the kernel sources headers that is in a +# place used just for these tools/perf/beauty/ usage, we shouldn't not +# put it in tools/include/linux otherwise they would be used in the +# normal compiler building process and would drag needless stuff from the +# kernel. -printf "static const char *socket_ipproto[] = {\n" -ipproto_regex='^[[:space:]]+IPPROTO_(\w+)[[:space:]]+=[[:space:]]+([[:digit:]]+),.*' +# When what these scripts need is already in tools/include/ then use it, +# otherwise grab and check the copy from the kernel sources just for these +# string table building scripts. -egrep $ipproto_regex ${uapi_header_dir}/in.h | \ - sed -r "s/$ipproto_regex/\2 \1/g" | \ - sort -n | xargs printf "\t[%s] = \"%s\",\n" -printf "};\n\n" +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/ -printf "static const char *socket_level[] = {\n" -socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+\/.*)?' +printf "static const char *socket_families[] = {\n" +# #define AF_LOCAL 1 /* POSIX name for AF_UNIX */ +regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*' -egrep $socket_level_regex ${beauty_header_dir}/socket.h | \ - sed -r "s/$socket_level_regex/\2 \1/g" | \ - sort -n | xargs printf "\t[%s] = \"%s\",\n" -printf "};\n\n" - -printf 'DEFINE_STRARRAY(socket_level, "SOL_");\n' +egrep $regex ${header_dir}/socket.h | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[%s] = \"%s\",\n" | \ + egrep -v "\"(UNIX|MAX)\"" +printf "};\n" diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index b1be59b4e2..e9bfe856a5 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -170,11 +170,9 @@ void ui__exit(bool wait_for_ok) "Press any key...", 0); SLtt_set_cursor_visibility(1); - if (!pthread_mutex_trylock(&ui__lock)) { - SLsmg_refresh(); - SLsmg_reset_smg(); - pthread_mutex_unlock(&ui__lock); - } + SLsmg_refresh(); + SLsmg_reset_smg(); SLang_reset_tty(); + perf_error__unregister(&perf_tui_eops); } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2a403cefca..f2914d5bed 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,4 +1,3 @@ -perf-y += arm64-frame-pointer-unwind-support.o perf-y += annotate.o perf-y += block-info.o perf-y += block-range.o @@ -139,13 +138,11 @@ perf-y += expr.o perf-y += branch.o perf-y += mem2node.o perf-y += clockid.o -perf-y += list_sort.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o @@ -204,7 +201,6 @@ endif perf-y += perf-hooks.o perf-$(CONFIG_LIBBPF) += bpf-event.o -perf-$(CONFIG_LIBBPF) += bpf-utils.o perf-$(CONFIG_CXX) += c++/ @@ -319,7 +315,3 @@ $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE $(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) - -$(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE - $(call rule_mkdir) - $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index 4d216c0dc4..7b12bd7a30 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -11,7 +11,7 @@ static int get_cpu_set_size(void) { - int sz = cpu__max_cpu().cpu + 8 - 1; + int sz = cpu__max_cpu() + 8 - 1; /* * sched_getaffinity doesn't like masks smaller than the kernel. * Hopefully that's big enough. @@ -62,7 +62,7 @@ void affinity__set(struct affinity *a, int cpu) clear_bit(cpu, a->sched_cpus); } -static void __affinity__cleanup(struct affinity *a) +void affinity__cleanup(struct affinity *a) { int cpu_set_size = get_cpu_set_size(); @@ -71,9 +71,3 @@ static void __affinity__cleanup(struct affinity *a) zfree(&a->sched_cpus); zfree(&a->orig_cpus); } - -void affinity__cleanup(struct affinity *a) -{ - if (a != NULL) - __affinity__cleanup(a); -} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 8190a124b9..0bae061b2d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -28,7 +28,6 @@ #include "evsel.h" #include "evlist.h" #include "bpf-event.h" -#include "bpf-utils.h" #include "block-range.h" #include "string2.h" #include "util/event.h" @@ -152,7 +151,6 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/mips/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" -#include "arch/riscv64/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" #include "arch/sparc/annotate/instructions.c" @@ -185,6 +183,7 @@ static struct arch architectures[] = { .init = x86__annotate_init, .instructions = x86__instructions, .nr_instructions = ARRAY_SIZE(x86__instructions), + .ins_is_fused = x86__ins_is_fused, .objdump = { .comment_char = '#', }, @@ -193,10 +192,6 @@ static struct arch architectures[] = { .name = "powerpc", .init = powerpc__annotate_init, }, - { - .name = "riscv64", - .init = riscv64__annotate_init, - }, { .name = "s390", .init = s390__annotate_init, @@ -1255,17 +1250,6 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); } -void annotation__init(struct annotation *notes) -{ - pthread_mutex_init(¬es->lock, NULL); -} - -void annotation__exit(struct annotation *notes) -{ - annotated_source__delete(notes->src); - pthread_mutex_destroy(¬es->lock); -} - static void annotation_line__add(struct annotation_line *al, struct list_head *head) { list_add_tail(&al->node, head); @@ -1716,12 +1700,12 @@ static int symbol__disassemble_bpf(struct symbol *sym, { struct annotation *notes = symbol__annotation(sym); struct annotation_options *opts = args->options; + struct bpf_prog_info_linear *info_linear; struct bpf_prog_linfo *prog_linfo = NULL; struct bpf_prog_info_node *info_node; int len = sym->end - sym->start; disassembler_ftype disassemble; struct map *map = args->ms.map; - struct perf_bpil *info_linear; struct disassemble_info info; struct dso *dso = map->dso; int pc = 0, count, sub_id; @@ -2036,7 +2020,6 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) memset(&objdump_process, 0, sizeof(objdump_process)); objdump_process.argv = objdump_argv; objdump_process.out = -1; - objdump_process.err = -1; if (start_command(&objdump_process)) { pr_err("Failure starting to run %s\n", command); err = -1; @@ -3144,7 +3127,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, notes->nr_events = nr_pcnt; annotation__update_column_widths(notes); - sym->annotate2 = 1; + sym->annotate2 = true; return 0; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 986f2bbe48..3757416bcf 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -299,9 +299,6 @@ struct annotation { struct annotated_source *src; }; -void annotation__init(struct annotation *notes); -void annotation__exit(struct annotation *notes); - static inline int annotation__cycles_width(struct annotation *notes) { if (notes->have_cycles && notes->options->show_minmax_cycle) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 5e390a1a79..32fe41835f 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -151,7 +151,6 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) u64 payload, ip; memset(&decoder->record, 0x0, sizeof(decoder->record)); - decoder->record.context_id = (u64)-1; while (1) { err = arm_spe_get_next_packet(decoder); @@ -179,11 +178,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.phys_addr = ip; break; case ARM_SPE_COUNTER: - if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT) - decoder->record.latency = payload; break; case ARM_SPE_CONTEXT: - decoder->record.context_id = payload; break; case ARM_SPE_OP_TYPE: if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) { diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 69b31084d6..59bdb73096 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -33,13 +33,11 @@ struct arm_spe_record { enum arm_spe_sample_type type; int err; u32 op; - u32 latency; u64 from_ip; u64 to_ip; u64 timestamp; u64 virt_addr; u64 phys_addr; - u64 context_id; }; struct arm_spe_insn; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 2f311189c6..2e5eff4f8f 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -13,7 +13,7 @@ #include "arm-spe-pkt-decoder.h" -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 #define le64_to_cpu bswap_64 diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index d2b64e3f58..7054f23150 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -58,8 +58,6 @@ struct arm_spe { u8 sample_branch; u8 sample_remote_access; u8 sample_memory; - u8 sample_instructions; - u64 instructions_sample_period; u64 l1d_miss_id; u64 l1d_access_id; @@ -70,12 +68,10 @@ struct arm_spe { u64 branch_miss_id; u64 remote_access_id; u64 memory_id; - u64 instructions_id; u64 kernel_start; unsigned long num_events; - u8 use_ctx_pkt_for_pid; }; struct arm_spe_queue { @@ -93,7 +89,6 @@ struct arm_spe_queue { u64 time; u64 timestamp; struct thread *thread; - u64 period_instructions; }; static void arm_spe_dump(struct arm_spe *spe __maybe_unused, @@ -106,7 +101,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused, const char *color = PERF_COLOR_BLUE; color_fprintf(stdout, color, - ". ... ARM SPE data: size %#zx bytes\n", + ". ... ARM SPE data: size %zu bytes\n", len); while (len) { @@ -206,7 +201,6 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, speq->pid = -1; speq->tid = -1; speq->cpu = -1; - speq->period_instructions = 0; /* params set */ params.get_trace = arm_spe_get_trace; @@ -233,44 +227,6 @@ static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) PERF_RECORD_MISC_USER; } -static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, - struct auxtrace_queue *queue) -{ - struct arm_spe_queue *speq = queue->priv; - pid_t tid; - - tid = machine__get_current_tid(spe->machine, speq->cpu); - if (tid != -1) { - speq->tid = tid; - thread__zput(speq->thread); - } else - speq->tid = queue->tid; - - if ((!speq->thread) && (speq->tid != -1)) { - speq->thread = machine__find_thread(spe->machine, -1, - speq->tid); - } - - if (speq->thread) { - speq->pid = speq->thread->pid_; - if (queue->cpu == -1) - speq->cpu = speq->thread->cpu; - } -} - -static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) -{ - struct arm_spe *spe = speq->spe; - int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); - - if (err) - return err; - - arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); - - return 0; -} - static void arm_spe_prep_sample(struct arm_spe *spe, struct arm_spe_queue *speq, union perf_event *event, @@ -335,7 +291,6 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, sample.addr = record->virt_addr; sample.phys_addr = record->phys_addr; sample.data_src = data_src; - sample.weight = record->latency; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -353,36 +308,6 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, sample.id = spe_events_id; sample.stream_id = spe_events_id; sample.addr = record->to_ip; - sample.weight = record->latency; - - return arm_spe_deliver_synth_event(spe, speq, event, &sample); -} - -static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, - u64 spe_events_id, u64 data_src) -{ - struct arm_spe *spe = speq->spe; - struct arm_spe_record *record = &speq->decoder->record; - union perf_event *event = speq->event_buf; - struct perf_sample sample = { .ip = 0, }; - - /* - * Handles perf instruction sampling period. - */ - speq->period_instructions++; - if (speq->period_instructions < spe->instructions_sample_period) - return 0; - speq->period_instructions = 0; - - arm_spe_prep_sample(spe, speq, event, &sample); - - sample.id = spe_events_id; - sample.stream_id = spe_events_id; - sample.addr = record->virt_addr; - sample.phys_addr = record->phys_addr; - sample.data_src = data_src; - sample.period = spe->instructions_sample_period; - sample.weight = record->latency; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -516,12 +441,6 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return err; } - if (spe->sample_instructions) { - err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); - if (err) - return err; - } - return 0; } @@ -554,19 +473,6 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) * can correlate samples between Arm SPE trace data and other * perf events with correct time ordering. */ - - /* - * Update pid/tid info. - */ - record = &speq->decoder->record; - if (!spe->timeless_decoding && record->context_id != (u64)-1) { - ret = arm_spe_set_tid(speq, record->context_id); - if (ret) - return ret; - - spe->use_ctx_pkt_for_pid = true; - } - ret = arm_spe_sample(speq); if (ret) return ret; @@ -693,6 +599,31 @@ static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) return timeless_decoding; } +static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, + struct auxtrace_queue *queue) +{ + struct arm_spe_queue *speq = queue->priv; + pid_t tid; + + tid = machine__get_current_tid(spe->machine, speq->cpu); + if (tid != -1) { + speq->tid = tid; + thread__zput(speq->thread); + } else + speq->tid = queue->tid; + + if ((!speq->thread) && (speq->tid != -1)) { + speq->thread = machine__find_thread(spe->machine, -1, + speq->tid); + } + + if (speq->thread) { + speq->pid = speq->thread->pid_; + if (queue->cpu == -1) + speq->cpu = speq->thread->cpu; + } +} + static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) { unsigned int queue_nr; @@ -723,12 +654,7 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) ts = timestamp; } - /* - * A previous context-switch event has set pid/tid in the machine's context, so - * here we need to update the pid/tid in the thread and SPE queue. - */ - if (!spe->use_ctx_pkt_for_pid) - arm_spe_set_pid_tid_cpu(spe, queue); + arm_spe_set_pid_tid_cpu(spe, queue); ret = arm_spe_run_decoder(speq, &ts); if (ret < 0) { @@ -768,25 +694,6 @@ static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, return 0; } -static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, - struct perf_sample *sample) -{ - pid_t pid, tid; - int cpu; - - if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) - return 0; - - pid = event->context_switch.next_prev_pid; - tid = event->context_switch.next_prev_tid; - cpu = sample->cpu; - - if (tid == -1) - pr_warning("context_switch event has no tid\n"); - - return machine__set_current_tid(spe->machine, cpu, pid, tid); -} - static int arm_spe_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -824,13 +731,6 @@ static int arm_spe_process_event(struct perf_session *session, } } else if (timestamp) { err = arm_spe_process_queues(spe, timestamp); - if (err) - return err; - - if (!spe->use_ctx_pkt_for_pid && - (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || - event->header.type == PERF_RECORD_SWITCH)) - err = arm_spe_context_switch(spe, event, sample); } return err; @@ -896,15 +796,7 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return arm_spe_process_timeless_queues(spe, -1, MAX_TIMESTAMP - 1); - ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); - if (ret) - return ret; - - if (!spe->use_ctx_pkt_for_pid) - ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" - "Matching of TIDs to SPE events could be inaccurate.\n"); - - return 0; + return arm_spe_process_queues(spe, MAX_TIMESTAMP); } static void arm_spe_free_queue(void *priv) @@ -1035,8 +927,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) attr.type = PERF_TYPE_HARDWARE; attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | - PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | - PERF_SAMPLE_WEIGHT; + PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC; if (spe->timeless_decoding) attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; else @@ -1150,30 +1041,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) return err; spe->memory_id = id; arm_spe_set_event_name(evlist, id, "memory"); - id += 1; } - if (spe->synth_opts.instructions) { - if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { - pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n"); - goto synth_instructions_out; - } - if (spe->synth_opts.period > 1) - pr_warning("Arm SPE has a hardware-based sample period.\n" - "Additional instruction events will be discarded by --itrace\n"); - - spe->sample_instructions = true; - attr.config = PERF_COUNT_HW_INSTRUCTIONS; - attr.sample_period = spe->synth_opts.period; - spe->instructions_sample_period = attr.sample_period; - err = arm_spe_synth_event(session, &attr, id); - if (err) - return err; - spe->instructions_id = id; - arm_spe_set_event_name(evlist, id, "instructions"); - } -synth_instructions_out: - return 0; } diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 825336304a..8d2865b9ad 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -123,7 +123,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, mm->prev = 0; mm->idx = mp->idx; mm->tid = mp->tid; - mm->cpu = mp->cpu.cpu; + mm->cpu = mp->cpu; if (!mp->len) { mm->base = NULL; @@ -174,13 +174,13 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, mp->idx = idx; if (per_cpu) { - mp->cpu = perf_cpu_map__cpu(evlist->core.cpus, idx); + mp->cpu = evlist->core.cpus->map[idx]; if (evlist->core.threads) mp->tid = perf_thread_map__pid(evlist->core.threads, 0); else mp->tid = -1; } else { - mp->cpu.cpu = -1; + mp->cpu = -1; mp->tid = perf_thread_map__pid(evlist->core.threads, idx); } } @@ -292,7 +292,7 @@ static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues, if (!queue->set) { queue->set = true; queue->tid = buffer->tid; - queue->cpu = buffer->cpu.cpu; + queue->cpu = buffer->cpu; } buffer->buffer_nr = queues->next_buffer_nr++; @@ -339,11 +339,11 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues, return 0; } -static bool filter_cpu(struct perf_session *session, struct perf_cpu cpu) +static bool filter_cpu(struct perf_session *session, int cpu) { unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap; - return cpu_bitmap && cpu.cpu != -1 && !test_bit(cpu.cpu, cpu_bitmap); + return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap); } static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues, @@ -399,7 +399,7 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues, struct auxtrace_buffer buffer = { .pid = -1, .tid = event->auxtrace.tid, - .cpu = { event->auxtrace.cpu }, + .cpu = event->auxtrace.cpu, .data_offset = data_offset, .offset = event->auxtrace.offset, .reference = event->auxtrace.reference, @@ -1564,9 +1564,6 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, case 'q': synth_opts->quick += 1; break; - case 'A': - synth_opts->approx_ipc = true; - break; case 'Z': synth_opts->timeless_decoding = true; break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 19910b9011..5f383908ca 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -60,7 +59,6 @@ enum itrace_period_type { #define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a')) #define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a')) -#define AUXTRACE_LOG_FLG_USE_STDOUT (1 << ('o' - 'a')) /** * struct itrace_synth_opts - AUX area tracing synthesis options. @@ -86,7 +84,6 @@ enum itrace_period_type { * @thread_stack: feed branches to the thread_stack * @last_branch: add branch context to 'instruction' events * @add_last_branch: add branch context to existing event records - * @approx_ipc: approximate IPC * @flc: whether to synthesize first level cache events * @llc: whether to synthesize last level cache events * @tlb: whether to synthesize TLB events @@ -130,7 +127,6 @@ struct itrace_synth_opts { bool thread_stack; bool last_branch; bool add_last_branch; - bool approx_ipc; bool flc; bool llc; bool tlb; @@ -241,7 +237,7 @@ struct auxtrace_buffer { size_t size; pid_t pid; pid_t tid; - struct perf_cpu cpu; + int cpu; void *data; off_t data_offset; void *mmap_addr; @@ -351,7 +347,7 @@ struct auxtrace_mmap_params { int prot; int idx; pid_t tid; - struct perf_cpu cpu; + int cpu; }; /** @@ -643,7 +639,6 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " d[flags]: create a debug log\n" \ " each flag must be preceded by + or -\n" \ " log flags are: a (all perf events)\n" \ -" o (output to stdout)\n" \ " f: synthesize first level cache events\n" \ " m: synthesize last level cache events\n" \ " t: synthesize TLB events\n" \ @@ -654,8 +649,6 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " L[len]: synthesize last branch entries on existing event records\n" \ " sNUMBER: skip initial number of events\n" \ " q: quicker (less detailed) decoding\n" \ -" A: approximate IPC\n" \ -" Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is ibxwpe or cewp\n" diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index a517eaa51e..16ad0e6e9e 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -10,7 +10,6 @@ #include #include #include "bpf-event.h" -#include "bpf-utils.h" #include "debug.h" #include "dso.h" #include "symbol.h" @@ -33,32 +32,7 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id) return err ? ERR_PTR(err) : btf; } -struct bpf_program * __weak -bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) -{ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return bpf_program__next(prev, obj); -#pragma GCC diagnostic pop -} - -struct bpf_map * __weak -bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) -{ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return bpf_map__next(prev, obj); -#pragma GCC diagnostic pop -} - -const void * __weak -btf__raw_data(const struct btf *btf_ro, __u32 *size) -{ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return btf__get_raw_data(btf_ro, size); -#pragma GCC diagnostic pop -} +#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) { @@ -74,9 +48,9 @@ static int machine__process_bpf_event_load(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { + struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; struct perf_env *env = machine->env; - struct perf_bpil *info_linear; int id = event->bpf.id; unsigned int i; @@ -136,7 +110,7 @@ static int perf_env__fetch_btf(struct perf_env *env, u32 data_size; const void *data; - data = btf__raw_data(btf, &data_size); + data = btf__get_raw_data(btf, &data_size); node = malloc(data_size + sizeof(struct btf_node)); if (!node) @@ -205,9 +179,9 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, { struct perf_record_ksymbol *ksymbol_event = &event->ksymbol; struct perf_record_bpf_event *bpf_event = &event->bpf; + struct bpf_prog_info_linear *info_linear; struct perf_tool *tool = session->tool; struct bpf_prog_info_node *info_node; - struct perf_bpil *info_linear; struct bpf_prog_info *info; struct btf *btf = NULL; struct perf_env *env; @@ -221,15 +195,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, */ env = session->data ? &session->header.env : &perf_env; - arrays = 1UL << PERF_BPIL_JITED_KSYMS; - arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; - arrays |= 1UL << PERF_BPIL_FUNC_INFO; - arrays |= 1UL << PERF_BPIL_PROG_TAGS; - arrays |= 1UL << PERF_BPIL_JITED_INSNS; - arrays |= 1UL << PERF_BPIL_LINE_INFO; - arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; + arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; + arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; + arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; - info_linear = get_bpf_prog_info_linear(fd, arrays); + info_linear = bpf_program__get_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { info_linear = NULL; pr_debug("%s: failed to get BPF program info. aborting\n", __func__); @@ -482,8 +456,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, static void perf_env__add_bpf_info(struct perf_env *env, u32 id) { + struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; - struct perf_bpil *info_linear; struct btf *btf = NULL; u64 arrays; u32 btf_id; @@ -493,15 +467,15 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) if (fd < 0) return; - arrays = 1UL << PERF_BPIL_JITED_KSYMS; - arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; - arrays |= 1UL << PERF_BPIL_FUNC_INFO; - arrays |= 1UL << PERF_BPIL_PROG_TAGS; - arrays |= 1UL << PERF_BPIL_JITED_INSNS; - arrays |= 1UL << PERF_BPIL_LINE_INFO; - arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; + arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; + arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; + arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; - info_linear = get_bpf_prog_info_linear(fd, arrays); + info_linear = bpf_program__get_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("%s: failed to get BPF program info. aborting\n", __func__); goto out; diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 144a8a24cc..68f315c3df 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -19,7 +19,7 @@ struct evlist; struct target; struct bpf_prog_info_node { - struct perf_bpil *info_linear; + struct bpf_prog_info_linear *info_linear; struct rb_node rb_node; }; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 16ec605a9f..71710a1da4 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -29,9 +29,6 @@ #include -/* temporarily disable libbpf deprecation warnings */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)), const char *fmt, va_list args) { @@ -424,7 +421,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n, size_t prologue_cnt = 0; int i, err; - if (IS_ERR_OR_NULL(priv) || priv->is_tp) + if (IS_ERR(priv) || !priv || priv->is_tp) goto errout; pev = &priv->pev; @@ -573,7 +570,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) bool need_prologue = false; int err, i; - if (IS_ERR_OR_NULL(priv)) { + if (IS_ERR(priv) || !priv) { pr_debug("Internal error when hook preprocessor\n"); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -645,11 +642,8 @@ int bpf__probe(struct bpf_object *obj) goto out; priv = bpf_program__priv(prog); - if (IS_ERR_OR_NULL(priv)) { - if (!priv) - err = -BPF_LOADER_ERRNO__INTERNAL; - else - err = PTR_ERR(priv); + if (IS_ERR(priv) || !priv) { + err = PTR_ERR(priv); goto out; } @@ -699,7 +693,7 @@ int bpf__unprobe(struct bpf_object *obj) struct bpf_prog_priv *priv = bpf_program__priv(prog); int i; - if (IS_ERR_OR_NULL(priv) || priv->is_tp) + if (IS_ERR(priv) || !priv || priv->is_tp) continue; for (i = 0; i < priv->pev.ntevs; i++) { @@ -757,7 +751,7 @@ int bpf__foreach_event(struct bpf_object *obj, struct perf_probe_event *pev; int i, fd; - if (IS_ERR_OR_NULL(priv)) { + if (IS_ERR(priv) || !priv) { pr_debug("bpf: failed to get private field\n"); return -BPF_LOADER_ERRNO__INTERNAL; } diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 3ce8d03cb7..ba0f208536 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -13,7 +13,6 @@ #include #include "bpf_counter.h" -#include "bpf-utils.h" #include "counts.h" #include "debug.h" #include "evsel.h" @@ -62,13 +61,14 @@ static int bpf_program_profiler__destroy(struct evsel *evsel) static char *bpf_target_prog_name(int tgt_fd) { + struct bpf_prog_info_linear *info_linear; struct bpf_func_info *func_info; - struct perf_bpil *info_linear; const struct btf_type *t; struct btf *btf = NULL; char *name = NULL; - info_linear = get_bpf_prog_info_linear(tgt_fd, 1UL << PERF_BPIL_FUNC_INFO); + info_linear = bpf_program__get_prog_info_linear( + tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd); return NULL; @@ -127,9 +127,9 @@ static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id) skel->rodata->num_cpu = evsel__nr_cpus(evsel); - bpf_map__set_max_entries(skel->maps.events, evsel__nr_cpus(evsel)); - bpf_map__set_max_entries(skel->maps.fentry_readings, 1); - bpf_map__set_max_entries(skel->maps.accum_readings, 1); + bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel)); + bpf_map__resize(skel->maps.fentry_readings, 1); + bpf_map__resize(skel->maps.accum_readings, 1); prog_name = bpf_target_prog_name(prog_fd); if (!prog_name) { @@ -265,7 +265,7 @@ static int bpf_program_profiler__read(struct evsel *evsel) return 0; } -static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx, +static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, int fd) { struct bpf_prog_profiler_bpf *skel; @@ -277,7 +277,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx assert(skel != NULL); ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu_map_idx, &fd, BPF_ANY); + &cpu, &fd, BPF_ANY); if (ret) return ret; } @@ -307,20 +307,6 @@ static bool bperf_attr_map_compatible(int attr_map_fd) (map_info.value_size == sizeof(struct perf_event_attr_map_entry)); } -int __weak -bpf_map_create(enum bpf_map_type map_type, - const char *map_name __maybe_unused, - __u32 key_size, - __u32 value_size, - __u32 max_entries, - const struct bpf_map_create_opts *opts __maybe_unused) -{ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return bpf_create_map(map_type, key_size, value_size, max_entries, 0); -#pragma GCC diagnostic pop -} - static int bperf_lock_attr_map(struct target *target) { char path[PATH_MAX]; @@ -334,10 +320,10 @@ static int bperf_lock_attr_map(struct target *target) } if (access(path, F_OK)) { - map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(struct perf_event_attr), sizeof(struct perf_event_attr_map_entry), - ATTR_MAP_SIZE, NULL); + ATTR_MAP_SIZE, 0); if (map_fd < 0) return -1; @@ -413,7 +399,7 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, return -1; } - bpf_map__set_max_entries(skel->maps.events, libbpf_num_possible_cpus()); + bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus()); err = bperf_leader_bpf__load(skel); if (err) { pr_err("Failed to load leader skeleton\n"); @@ -554,7 +540,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) filter_type == BPERF_FILTER_TGID) key = evsel->core.threads->map[i].pid; else if (filter_type == BPERF_FILTER_CPU) - key = evsel->core.cpus->map[i].cpu; + key = evsel->core.cpus->map[i]; else break; @@ -580,12 +566,12 @@ static int bperf__load(struct evsel *evsel, struct target *target) return err; } -static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) +static int bperf__install_pe(struct evsel *evsel, int cpu, int fd) { struct bperf_leader_bpf *skel = evsel->leader_skel; return bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu_map_idx, &fd, BPF_ANY); + &cpu, &fd, BPF_ANY); } /* @@ -598,7 +584,7 @@ static int bperf_sync_counters(struct evsel *evsel) num_cpu = all_cpu_map->nr; for (i = 0; i < num_cpu; i++) { - cpu = all_cpu_map->map[i].cpu; + cpu = all_cpu_map->map[i]; bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu); } return 0; @@ -619,17 +605,15 @@ static int bperf__disable(struct evsel *evsel) static int bperf__read(struct evsel *evsel) { struct bperf_follower_bpf *skel = evsel->follower_skel; - __u32 num_cpu_bpf = cpu__max_cpu().cpu; + __u32 num_cpu_bpf = cpu__max_cpu(); struct bpf_perf_event_value values[num_cpu_bpf]; int reading_map_fd, err = 0; - __u32 i; - int j; + __u32 i, j, num_cpu; bperf_sync_counters(evsel); reading_map_fd = bpf_map__fd(skel->maps.accum_readings); for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { - struct perf_cpu entry; __u32 cpu; err = bpf_map_lookup_elem(reading_map_fd, &i, values); @@ -639,15 +623,16 @@ static int bperf__read(struct evsel *evsel) case BPERF_FILTER_GLOBAL: assert(i == 0); - perf_cpu_map__for_each_cpu(entry, j, all_cpu_map) { - cpu = entry.cpu; + num_cpu = all_cpu_map->nr; + for (j = 0; j < num_cpu; j++) { + cpu = all_cpu_map->map[j]; perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; } break; case BPERF_FILTER_CPU: - cpu = evsel->core.cpus->map[i].cpu; + cpu = evsel->core.cpus->map[i]; perf_counts(evsel->counts, i, 0)->val = values[cpu].counter; perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled; perf_counts(evsel->counts, i, 0)->run = values[cpu].running; @@ -772,11 +757,11 @@ static inline bool bpf_counter_skip(struct evsel *evsel) evsel->follower_skel == NULL; } -int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) +int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) { if (bpf_counter_skip(evsel)) return 0; - return evsel->bpf_counter_ops->install_pe(evsel, cpu_map_idx, fd); + return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd); } int bpf_counter__load(struct evsel *evsel, struct target *target) diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 4dbf26408b..65ebaa6694 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -16,7 +16,7 @@ typedef int (*bpf_counter_evsel_op)(struct evsel *evsel); typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel, struct target *target); typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel, - int cpu_map_idx, + int cpu, int fd); struct bpf_counter_ops { @@ -40,7 +40,7 @@ int bpf_counter__enable(struct evsel *evsel); int bpf_counter__disable(struct evsel *evsel); int bpf_counter__read(struct evsel *evsel); void bpf_counter__destroy(struct evsel *evsel); -int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd); +int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); #else /* HAVE_BPF_SKEL */ diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index ac60c08e8e..89aa5e71db 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -48,7 +48,7 @@ static int bperf_load_program(struct evlist *evlist) struct cgroup *cgrp, *leader_cgrp; __u32 i, cpu; __u32 nr_cpus = evlist->core.all_cpus->nr; - int total_cpus = cpu__max_cpu().cpu; + int total_cpus = cpu__max_cpu(); int map_size, map_fd; int prog_fd, err; @@ -65,14 +65,14 @@ static int bperf_load_program(struct evlist *evlist) /* we need one copy of events per cpu for reading */ map_size = total_cpus * evlist->core.nr_entries / nr_cgroups; - bpf_map__set_max_entries(skel->maps.events, map_size); - bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups); + bpf_map__resize(skel->maps.events, map_size); + bpf_map__resize(skel->maps.cgrp_idx, nr_cgroups); /* previous result is saved in a per-cpu array */ map_size = evlist->core.nr_entries / nr_cgroups; - bpf_map__set_max_entries(skel->maps.prev_readings, map_size); + bpf_map__resize(skel->maps.prev_readings, map_size); /* cgroup result needs all events (per-cpu) */ map_size = evlist->core.nr_entries; - bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size); + bpf_map__resize(skel->maps.cgrp_readings, map_size); set_max_rlimit(); @@ -125,7 +125,7 @@ static int bperf_load_program(struct evlist *evlist) for (cpu = 0; cpu < nr_cpus; cpu++) { int fd = FD(evsel, cpu); __u32 idx = evsel->core.idx * total_cpus + - evlist->core.all_cpus->map[cpu].cpu; + evlist->core.all_cpus->map[cpu]; err = bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY); @@ -212,7 +212,7 @@ static int bperf_cgrp__sync_counters(struct evlist *evlist) int prog_fd = bpf_program__fd(skel->progs.trigger_read); for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i].cpu; + cpu = evlist->core.all_cpus->map[i]; bperf_trigger_reading(prog_fd, cpu); } @@ -245,7 +245,7 @@ static int bperf_cgrp__read(struct evsel *evsel) { struct evlist *evlist = evsel->evlist; int i, cpu, nr_cpus = evlist->core.all_cpus->nr; - int total_cpus = cpu__max_cpu().cpu; + int total_cpus = cpu__max_cpu(); struct perf_counts_values *counts; struct bpf_perf_event_value *values; int reading_map_fd, err = 0; @@ -266,13 +266,13 @@ static int bperf_cgrp__read(struct evsel *evsel) idx = evsel->core.idx; err = bpf_map_lookup_elem(reading_map_fd, &idx, values); if (err) { - pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n", + pr_err("bpf map lookup falied: idx=%u, event=%s, cgrp=%s\n", idx, evsel__name(evsel), evsel->cgrp->name); goto out; } for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i].cpu; + cpu = evlist->core.all_cpus->map[i]; counts = perf_counts(evsel->counts, i, 0); counts->val = values[cpu].counter; diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index f193998530..6d2ea67b16 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook -#include "vmlinux.h" +#include +#include #include #include #include "bperf_u.h" diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c index e2a2d4cd77..d82e1633a2 100644 --- a/tools/perf/util/bpf_skel/bperf_leader.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook -#include "vmlinux.h" +#include +#include #include #include diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c index 97037d3b3d..ab12b4c4ec 100644 --- a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c +++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2020 Facebook -#include "vmlinux.h" +#include #include #include diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h index d3731a876b..2df8a45bd0 100644 --- a/tools/perf/util/c++/clang-c.h +++ b/tools/perf/util/c++/clang-c.h @@ -12,9 +12,8 @@ extern "C" { extern void perf_clang__init(void); extern void perf_clang__cleanup(void); -struct test_suite; -extern int test__clang_to_IR(struct test_suite *test, int subtest); -extern int test__clang_to_obj(struct test_suite *test, int subtest); +extern int test__clang_to_IR(void); +extern int test__clang_to_obj(void); extern int perf_clang__compile_bpf(const char *filename, void **p_obj_buf, @@ -27,6 +26,9 @@ extern int perf_clang__compile_bpf(const char *filename, static inline void perf_clang__init(void) { } static inline void perf_clang__cleanup(void) { } +static inline int test__clang_to_IR(void) { return -1; } +static inline int test__clang_to_obj(void) { return -1;} + static inline int perf_clang__compile_bpf(const char *filename __maybe_unused, void **p_obj_buf __maybe_unused, diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp index a4683ca536..21b23605f7 100644 --- a/tools/perf/util/c++/clang-test.cpp +++ b/tools/perf/util/c++/clang-test.cpp @@ -35,8 +35,7 @@ __test__clang_to_IR(void) } extern "C" { -int test__clang_to_IR(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__clang_to_IR(void) { perf_clang_scope _scope; @@ -49,8 +48,7 @@ int test__clang_to_IR(struct test_suite *test __maybe_unused, return -1; } -int test__clang_to_obj(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +int test__clang_to_obj(void) { perf_clang_scope _scope; diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index df7b18fb6b..c8885dfa36 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -43,6 +43,8 @@ createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, "-cc1", "-triple", "bpf-pc-linux", "-fsyntax-only", + "-ferror-limit", "19", + "-fmessage-length", "127", "-O2", "-nostdsysteminc", "-nobuiltininc", @@ -53,11 +55,7 @@ createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, "-x", "c"}; CCArgs.append(CFlags.begin(), CFlags.end()); - CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs -#if CLANG_VERSION_MAJOR >= 11 - ,/*BinaryName=*/nullptr -#endif - ); + CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs); FrontendOptions& Opts = CI->getFrontendOpts(); Opts.Inputs.clear(); @@ -153,16 +151,13 @@ getBPFObjectFromModule(llvm::Module *Module) legacy::PassManager PM; bool NotAdded; - NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream -#if CLANG_VERSION_MAJOR >= 7 - , /*DwoOut=*/nullptr -#endif -#if CLANG_VERSION_MAJOR < 10 - , TargetMachine::CGFT_ObjectFile +#if CLANG_VERSION_MAJOR < 7 + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, + TargetMachine::CGFT_ObjectFile); #else - , llvm::CGFT_ObjectFile + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, nullptr, + TargetMachine::CGFT_ObjectFile); #endif - ); if (NotAdded) { llvm::errs() << "TargetMachine can't emit a file of this type\n"; return std::unique_ptr>(nullptr); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 131207b91d..8e2777133b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor) map__zput(node->ms.map); } -void callchain_param_setup(u64 sample_type, const char *arch) +void callchain_param_setup(u64 sample_type) { if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && @@ -1612,18 +1612,6 @@ void callchain_param_setup(u64 sample_type, const char *arch) else callchain_param.record_mode = CALLCHAIN_FP; } - - /* - * It's necessary to use libunwind to reliably determine the caller of - * a leaf function on aarch64, as otherwise we cannot know whether to - * start from the LR or FP. - * - * Always starting from the LR can result in duplicate or entirely - * erroneous entries. Always skipping the LR and starting from the FP - * can result in missing entries. - */ - if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64")) - dwarf_callchain_users = true; } static bool chain_match(struct callchain_list *base_chain, diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index d95615daed..5824134f98 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -280,8 +280,6 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, } #endif -void arch__add_leaf_frame_record_opts(struct record_opts *opts); - char *callchain_list__sym_name(struct callchain_list *cl, char *bf, size_t bfsize, bool show_dso); char *callchain_node__scnprintf_value(struct callchain_node *node, @@ -300,7 +298,7 @@ int callchain_branch_counts(struct callchain_root *root, u64 *branch_count, u64 *predicted_count, u64 *abort_count, u64 *cycles_count); -void callchain_param_setup(u64 sample_type, const char *arch); +void callchain_param_setup(u64 sample_type); bool callchain_cnode_matched(struct callchain_node *base_cnode, struct callchain_node *pair_cnode); diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index 7a447d9184..582f3aeaf5 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -4,7 +4,6 @@ #include #include "evsel.h" #include "counts.h" -#include #include struct perf_counts *perf_counts__new(int ncpus, int nthreads) @@ -56,12 +55,9 @@ void evsel__reset_counts(struct evsel *evsel) perf_counts__reset(evsel->counts); } -int evsel__alloc_counts(struct evsel *evsel) +int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads) { - struct perf_cpu_map *cpus = evsel__cpus(evsel); - int nthreads = perf_thread_map__nr(evsel->core.threads); - - evsel->counts = perf_counts__new(perf_cpu_map__nr(cpus), nthreads); + evsel->counts = perf_counts__new(ncpus, nthreads); return evsel->counts != NULL ? 0 : -ENOMEM; } diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index 5de275194f..7ff36bf6d6 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -18,21 +18,21 @@ struct perf_counts { static inline struct perf_counts_values* -perf_counts(struct perf_counts *counts, int cpu_map_idx, int thread) +perf_counts(struct perf_counts *counts, int cpu, int thread) { - return xyarray__entry(counts->values, cpu_map_idx, thread); + return xyarray__entry(counts->values, cpu, thread); } static inline bool -perf_counts__is_loaded(struct perf_counts *counts, int cpu_map_idx, int thread) +perf_counts__is_loaded(struct perf_counts *counts, int cpu, int thread) { - return *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)); + return *((bool *) xyarray__entry(counts->loaded, cpu, thread)); } static inline void -perf_counts__set_loaded(struct perf_counts *counts, int cpu_map_idx, int thread, bool loaded) +perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool loaded) { - *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)) = loaded; + *((bool *) xyarray__entry(counts->loaded, cpu, thread)) = loaded; } struct perf_counts *perf_counts__new(int ncpus, int nthreads); @@ -40,7 +40,7 @@ void perf_counts__delete(struct perf_counts *counts); void perf_counts__reset(struct perf_counts *counts); void evsel__reset_counts(struct evsel *evsel); -int evsel__alloc_counts(struct evsel *evsel); +int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads); void evsel__free_counts(struct evsel *evsel); #endif /* __PERF_COUNTS_H */ diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 12b2243222..87d3eca9b8 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -13,13 +13,9 @@ #include #include -static struct perf_cpu max_cpu_num; -static struct perf_cpu max_present_cpu_num; +static int max_cpu_num; +static int max_present_cpu_num; static int max_node_num; -/** - * The numa node X as read from /sys/devices/system/node/nodeX indexed by the - * CPU number. - */ static int *cpunode_map; static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus) @@ -37,9 +33,9 @@ static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus) * otherwise it would become 65535. */ if (cpus->cpu[i] == (u16) -1) - map->map[i].cpu = -1; + map->map[i] = -1; else - map->map[i].cpu = (int) cpus->cpu[i]; + map->map[i] = (int) cpus->cpu[i]; } } @@ -58,7 +54,7 @@ static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map int cpu, i = 0; for_each_set_bit(cpu, mask->mask, nbits) - map->map[i++].cpu = cpu; + map->map[i++] = cpu; } return map; @@ -91,7 +87,7 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr) cpus->nr = nr; for (i = 0; i < nr; i++) - cpus->map[i].cpu = -1; + cpus->map[i] = -1; refcount_set(&cpus->refcnt, 1); } @@ -108,7 +104,7 @@ struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) cpus->nr = nr; for (i = 0; i < nr; i++) - cpus->map[i] = aggr_cpu_id__empty(); + cpus->map[i] = cpu_map__empty_aggr_cpu_id(); refcount_set(&cpus->refcnt, 1); } @@ -126,21 +122,28 @@ static int cpu__get_topology_int(int cpu, const char *name, int *value) return sysfs__read_int(path, value); } -int cpu__get_socket_id(struct perf_cpu cpu) +int cpu_map__get_socket_id(int cpu) { - int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value); + int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value); return ret ?: value; } -struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused) +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, + void *data __maybe_unused) { - struct aggr_cpu_id id = aggr_cpu_id__empty(); + int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - id.socket = cpu__get_socket_id(cpu); + if (idx > map->nr) + return id; + + cpu = map->map[idx]; + + id.socket = cpu_map__get_socket_id(cpu); return id; } -static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) +static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) { struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; @@ -157,64 +160,57 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) return a->thread - b->thread; } -struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, - aggr_cpu_id_get_t get_id, - void *data) +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, + struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), + void *data) { - int idx; - struct perf_cpu cpu; - struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr); + int nr = cpus->nr; + struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr); + int cpu, s2; + struct aggr_cpu_id s1; if (!c) - return NULL; + return -1; /* Reset size as it may only be partially filled */ c->nr = 0; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - bool duplicate = false; - struct aggr_cpu_id cpu_id = get_id(cpu, data); - - for (int j = 0; j < c->nr; j++) { - if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) { - duplicate = true; + for (cpu = 0; cpu < nr; cpu++) { + s1 = f(cpus, cpu, data); + for (s2 = 0; s2 < c->nr; s2++) { + if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2])) break; - } } - if (!duplicate) { - c->map[c->nr] = cpu_id; + if (s2 == c->nr) { + c->map[c->nr] = s1; c->nr++; } } - /* Trim. */ - if (c->nr != cpus->nr) { - struct cpu_aggr_map *trimmed_c = - realloc(c, - sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr); - - if (trimmed_c) - c = trimmed_c; - } /* ensure we process id in increasing order */ - qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); - - return c; + qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id); + *res = c; + return 0; } -int cpu__get_die_id(struct perf_cpu cpu) +int cpu_map__get_die_id(int cpu) { - int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value); + int value, ret = cpu__get_topology_int(cpu, "die_id", &value); return ret ?: value; } -struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) { - struct aggr_cpu_id id; - int die; + int cpu, die; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - die = cpu__get_die_id(cpu); + if (idx > map->nr) + return id; + + cpu = map->map[idx]; + + die = cpu_map__get_die_id(cpu); /* There is no die_id on legacy system. */ if (die == -1) die = 0; @@ -224,59 +220,79 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) * with the socket ID and then add die to * make a unique ID. */ - id = aggr_cpu_id__socket(cpu, data); - if (aggr_cpu_id__is_empty(&id)) + id = cpu_map__get_socket(map, idx, data); + if (cpu_map__aggr_cpu_id_is_empty(id)) return id; id.die = die; return id; } -int cpu__get_core_id(struct perf_cpu cpu) +int cpu_map__get_core_id(int cpu) { - int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value); + int value, ret = cpu__get_topology_int(cpu, "core_id", &value); return ret ?: value; } -struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data) +int cpu_map__get_node_id(int cpu) { - struct aggr_cpu_id id; - int core = cpu__get_core_id(cpu); + return cpu__get_node(cpu); +} - /* aggr_cpu_id__die returns a struct with socket and die set. */ - id = aggr_cpu_id__die(cpu, data); - if (aggr_cpu_id__is_empty(&id)) +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) +{ + int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + + if (idx > map->nr) + return id; + + cpu = map->map[idx]; + + cpu = cpu_map__get_core_id(cpu); + + /* cpu_map__get_die returns a struct with socket and die set*/ + id = cpu_map__get_die(map, idx, data); + if (cpu_map__aggr_cpu_id_is_empty(id)) return id; /* * core_id is relative to socket and die, we need a global id. * So we combine the result from cpu_map__get_die with the core id */ - id.core = core; + id.core = cpu; return id; - } -struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data) +struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused) { - struct aggr_cpu_id id; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - /* aggr_cpu_id__core returns a struct with socket, die and core set. */ - id = aggr_cpu_id__core(cpu, data); - if (aggr_cpu_id__is_empty(&id)) + if (idx < 0 || idx >= map->nr) return id; - id.cpu = cpu; + id.node = cpu_map__get_node_id(map->map[idx]); return id; - } -struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused) +int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) { - struct aggr_cpu_id id = aggr_cpu_id__empty(); + return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); +} - id.node = cpu__get_node(cpu); - return id; +int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep) +{ + return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL); +} + +int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep) +{ + return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); +} + +int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap) +{ + return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL); } /* setup simple routines to easily access node numbers given a cpu number */ @@ -319,8 +335,8 @@ static void set_max_cpu_num(void) int ret = -1; /* set up default */ - max_cpu_num.cpu = 4096; - max_present_cpu_num.cpu = 4096; + max_cpu_num = 4096; + max_present_cpu_num = 4096; mnt = sysfs__mountpoint(); if (!mnt) @@ -333,7 +349,7 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_cpu_num.cpu); + ret = get_max_num(path, &max_cpu_num); if (ret) goto out; @@ -344,11 +360,11 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_present_cpu_num.cpu); + ret = get_max_num(path, &max_present_cpu_num); out: if (ret) - pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu); + pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num); } /* Determine highest possible node in the system for sparse allocation */ @@ -387,31 +403,31 @@ int cpu__max_node(void) return max_node_num; } -struct perf_cpu cpu__max_cpu(void) +int cpu__max_cpu(void) { - if (unlikely(!max_cpu_num.cpu)) + if (unlikely(!max_cpu_num)) set_max_cpu_num(); return max_cpu_num; } -struct perf_cpu cpu__max_present_cpu(void) +int cpu__max_present_cpu(void) { - if (unlikely(!max_present_cpu_num.cpu)) + if (unlikely(!max_present_cpu_num)) set_max_cpu_num(); return max_present_cpu_num; } -int cpu__get_node(struct perf_cpu cpu) +int cpu__get_node(int cpu) { if (unlikely(cpunode_map == NULL)) { pr_debug("cpu_map not initialized\n"); return -1; } - return cpunode_map[cpu.cpu]; + return cpunode_map[cpu]; } static int init_cpunode_map(void) @@ -421,13 +437,13 @@ static int init_cpunode_map(void) set_max_cpu_num(); set_max_node_num(); - cpunode_map = calloc(max_cpu_num.cpu, sizeof(int)); + cpunode_map = calloc(max_cpu_num, sizeof(int)); if (!cpunode_map) { pr_err("%s: calloc failed\n", __func__); return -1; } - for (i = 0; i < max_cpu_num.cpu; i++) + for (i = 0; i < max_cpu_num; i++) cpunode_map[i] = -1; return 0; @@ -486,39 +502,47 @@ int cpu__setup_cpunode_map(void) return 0; } +bool cpu_map__has(struct perf_cpu_map *cpus, int cpu) +{ + return perf_cpu_map__idx(cpus, cpu) != -1; +} + +int cpu_map__cpu(struct perf_cpu_map *cpus, int idx) +{ + return cpus->map[idx]; +} + size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) { - int i, start = -1; + int i, cpu, start = -1; bool first = true; size_t ret = 0; #define COMMA first ? "" : "," for (i = 0; i < map->nr + 1; i++) { - struct perf_cpu cpu = { .cpu = INT_MAX }; bool last = i == map->nr; - if (!last) - cpu = map->map[i]; + cpu = last ? INT_MAX : map->map[i]; if (start == -1) { start = i; if (last) { ret += snprintf(buf + ret, size - ret, "%s%d", COMMA, - map->map[i].cpu); + map->map[i]); } - } else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) { + } else if (((i - start) != (cpu - map->map[start])) || last) { int end = i - 1; if (start == end) { ret += snprintf(buf + ret, size - ret, "%s%d", COMMA, - map->map[start].cpu); + map->map[start]); } else { ret += snprintf(buf + ret, size - ret, "%s%d-%d", COMMA, - map->map[start].cpu, map->map[end].cpu); + map->map[start], map->map[end]); } first = false; start = i; @@ -545,23 +569,23 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) int i, cpu; char *ptr = buf; unsigned char *bitmap; - struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1); + int last_cpu = cpu_map__cpu(map, map->nr - 1); if (buf == NULL) return 0; - bitmap = zalloc(last_cpu.cpu / 8 + 1); + bitmap = zalloc(last_cpu / 8 + 1); if (bitmap == NULL) { buf[0] = '\0'; return 0; } for (i = 0; i < map->nr; i++) { - cpu = perf_cpu_map__cpu(map, i).cpu; + cpu = cpu_map__cpu(map, i); bitmap[cpu / 8] |= 1 << (cpu % 8); } - for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) { + for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) { unsigned char bits = bitmap[cpu / 8]; if (cpu % 8) @@ -590,35 +614,32 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ return online; } -bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b) +bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) { - return a->thread == b->thread && - a->node == b->node && - a->socket == b->socket && - a->die == b->die && - a->core == b->core && - a->cpu.cpu == b->cpu.cpu; + return a.thread == b.thread && + a.node == b.node && + a.socket == b.socket && + a.die == b.die && + a.core == b.core; } -bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) +bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) { - return a->thread == -1 && - a->node == -1 && - a->socket == -1 && - a->die == -1 && - a->core == -1 && - a->cpu.cpu == -1; + return a.thread == -1 && + a.node == -1 && + a.socket == -1 && + a.die == -1 && + a.core == -1; } -struct aggr_cpu_id aggr_cpu_id__empty(void) +struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) { struct aggr_cpu_id ret = { .thread = -1, .node = -1, .socket = -1, .die = -1, - .core = -1, - .cpu = (struct perf_cpu){ .cpu = -1 }, + .core = -1 }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 703ae6d338..a27eeaf086 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -2,134 +2,71 @@ #ifndef __PERF_CPUMAP_H #define __PERF_CPUMAP_H -#include #include +#include #include #include -/** Identify where counts are aggregated, -1 implies not to aggregate. */ struct aggr_cpu_id { - /** A value in the range 0 to number of threads. */ int thread; - /** The numa node X as read from /sys/devices/system/node/nodeX. */ int node; - /** - * The socket number as read from - * /sys/devices/system/cpu/cpuX/topology/physical_package_id. - */ int socket; - /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */ int die; - /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */ int core; - /** CPU aggregation, note there is one CPU for each SMT thread. */ - struct perf_cpu cpu; }; -/** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */ struct cpu_aggr_map { refcount_t refcnt; - /** Number of valid entries. */ int nr; - /** The entries. */ struct aggr_cpu_id map[]; }; struct perf_record_cpu_map_data; struct perf_cpu_map *perf_cpu_map__empty_new(int nr); +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data); size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); +int cpu_map__get_socket_id(int cpu); +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data); +int cpu_map__get_die_id(int cpu); +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); +int cpu_map__get_core_id(int cpu); +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); +int cpu_map__get_node_id(int cpu); +struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); +int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp); +int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep); +int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep); +int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep); const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ +static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) +{ + if (!sock || s > sock->nr || s < 0) + return 0; + return sock->map[s]; +} + int cpu__setup_cpunode_map(void); int cpu__max_node(void); -struct perf_cpu cpu__max_cpu(void); -struct perf_cpu cpu__max_present_cpu(void); +int cpu__max_cpu(void); +int cpu__max_present_cpu(void); +int cpu__get_node(int cpu); -/** - * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1. - */ -static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus) -{ - return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1; -} +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, + struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), + void *data); -/** - * cpu__get_node - Returns the numa node X as read from - * /sys/devices/system/node/nodeX for the given CPU. - */ -int cpu__get_node(struct perf_cpu cpu); -/** - * cpu__get_socket_id - Returns the socket number as read from - * /sys/devices/system/cpu/cpuX/topology/physical_package_id for the given CPU. - */ -int cpu__get_socket_id(struct perf_cpu cpu); -/** - * cpu__get_die_id - Returns the die id as read from - * /sys/devices/system/cpu/cpuX/topology/die_id for the given CPU. - */ -int cpu__get_die_id(struct perf_cpu cpu); -/** - * cpu__get_core_id - Returns the core id as read from - * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU. - */ -int cpu__get_core_id(struct perf_cpu cpu); +int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); +bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); -/** - * cpu_aggr_map__empty_new - Create a cpu_aggr_map of size nr with every entry - * being empty. - */ -struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); - -typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data); - -/** - * cpu_aggr_map__new - Create a cpu_aggr_map with an aggr_cpu_id for each cpu in - * cpus. The aggr_cpu_id is created with 'get_id' that may have a data value - * passed to it. The cpu_aggr_map is sorted with duplicate values removed. - */ -struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, - aggr_cpu_id_get_t get_id, - void *data); - -bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b); -bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a); -struct aggr_cpu_id aggr_cpu_id__empty(void); - - -/** - * aggr_cpu_id__socket - Create an aggr_cpu_id with the socket populated with - * the socket for cpu. The function signature is compatible with - * aggr_cpu_id_get_t. - */ -struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data); -/** - * aggr_cpu_id__die - Create an aggr_cpu_id with the die and socket populated - * with the die and socket for cpu. The function signature is compatible with - * aggr_cpu_id_get_t. - */ -struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data); -/** - * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket - * populated with the core, die and socket for cpu. The function signature is - * compatible with aggr_cpu_id_get_t. - */ -struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data); -/** - * aggr_cpu_id__core - Create an aggr_cpu_id with the cpu, core, die and socket - * populated with the cpu, core, die and socket for cpu. The function signature - * is compatible with aggr_cpu_id_get_t. - */ -struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data); -/** - * aggr_cpu_id__node - Create an aggr_cpu_id with the numa node populated for - * cpu. The function signature is compatible with aggr_cpu_id_get_t. - */ -struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data); +bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b); +bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a); +struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void); #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index d275d843c1..ec77e2a7b3 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -14,16 +14,14 @@ #include "env.h" #include "pmu-hybrid.h" -#define PACKAGE_CPUS_FMT \ - "%s/devices/system/cpu/cpu%d/topology/package_cpus_list" -#define PACKAGE_CPUS_FMT_OLD \ +#define CORE_SIB_FMT \ "%s/devices/system/cpu/cpu%d/topology/core_siblings_list" -#define DIE_CPUS_FMT \ +#define DIE_SIB_FMT \ "%s/devices/system/cpu/cpu%d/topology/die_cpus_list" -#define CORE_CPUS_FMT \ - "%s/devices/system/cpu/cpu%d/topology/core_cpus_list" -#define CORE_CPUS_FMT_OLD \ +#define THRD_SIB_FMT \ "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" +#define THRD_SIB_FMT_NEW \ + "%s/devices/system/cpu/cpu%d/topology/core_cpus_list" #define NODE_ONLINE_FMT \ "%s/devices/system/node/online" #define NODE_MEMINFO_FMT \ @@ -41,12 +39,8 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) u32 i = 0; int ret = -1; - scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT, + scnprintf(filename, MAXPATHLEN, CORE_SIB_FMT, sysfs__mountpoint(), cpu); - if (access(filename, F_OK) == -1) { - scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT_OLD, - sysfs__mountpoint(), cpu); - } fp = fopen(filename, "r"); if (!fp) goto try_dies; @@ -60,23 +54,23 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) if (p) *p = '\0'; - for (i = 0; i < tp->package_cpus_lists; i++) { - if (!strcmp(buf, tp->package_cpus_list[i])) + for (i = 0; i < tp->core_sib; i++) { + if (!strcmp(buf, tp->core_siblings[i])) break; } - if (i == tp->package_cpus_lists) { - tp->package_cpus_list[i] = buf; - tp->package_cpus_lists++; + if (i == tp->core_sib) { + tp->core_siblings[i] = buf; + tp->core_sib++; buf = NULL; len = 0; } ret = 0; try_dies: - if (!tp->die_cpus_list) + if (!tp->die_siblings) goto try_threads; - scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT, + scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, sysfs__mountpoint(), cpu); fp = fopen(filename, "r"); if (!fp) @@ -91,23 +85,23 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) if (p) *p = '\0'; - for (i = 0; i < tp->die_cpus_lists; i++) { - if (!strcmp(buf, tp->die_cpus_list[i])) + for (i = 0; i < tp->die_sib; i++) { + if (!strcmp(buf, tp->die_siblings[i])) break; } - if (i == tp->die_cpus_lists) { - tp->die_cpus_list[i] = buf; - tp->die_cpus_lists++; + if (i == tp->die_sib) { + tp->die_siblings[i] = buf; + tp->die_sib++; buf = NULL; len = 0; } ret = 0; try_threads: - scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT, + scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW, sysfs__mountpoint(), cpu); if (access(filename, F_OK) == -1) { - scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT_OLD, + scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, sysfs__mountpoint(), cpu); } fp = fopen(filename, "r"); @@ -121,13 +115,13 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) if (p) *p = '\0'; - for (i = 0; i < tp->core_cpus_lists; i++) { - if (!strcmp(buf, tp->core_cpus_list[i])) + for (i = 0; i < tp->thread_sib; i++) { + if (!strcmp(buf, tp->thread_siblings[i])) break; } - if (i == tp->core_cpus_lists) { - tp->core_cpus_list[i] = buf; - tp->core_cpus_lists++; + if (i == tp->thread_sib) { + tp->thread_siblings[i] = buf; + tp->thread_sib++; buf = NULL; } ret = 0; @@ -145,14 +139,16 @@ void cpu_topology__delete(struct cpu_topology *tp) if (!tp) return; - for (i = 0 ; i < tp->package_cpus_lists; i++) - zfree(&tp->package_cpus_list[i]); + for (i = 0 ; i < tp->core_sib; i++) + zfree(&tp->core_siblings[i]); - for (i = 0 ; i < tp->die_cpus_lists; i++) - zfree(&tp->die_cpus_list[i]); + if (tp->die_sib) { + for (i = 0 ; i < tp->die_sib; i++) + zfree(&tp->die_siblings[i]); + } - for (i = 0 ; i < tp->core_cpus_lists; i++) - zfree(&tp->core_cpus_list[i]); + for (i = 0 ; i < tp->thread_sib; i++) + zfree(&tp->thread_siblings[i]); free(tp); } @@ -165,11 +161,10 @@ static bool has_die_topology(void) if (uname(&uts) < 0) return false; - if (strncmp(uts.machine, "x86_64", 6) && - strncmp(uts.machine, "s390x", 5)) + if (strncmp(uts.machine, "x86_64", 6)) return false; - scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT, + scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, sysfs__mountpoint(), 0); if (access(filename, F_OK) == -1) return false; @@ -188,7 +183,7 @@ struct cpu_topology *cpu_topology__new(void) struct perf_cpu_map *map; bool has_die = has_die_topology(); - ncpus = cpu__max_present_cpu().cpu; + ncpus = cpu__max_present_cpu(); /* build online CPU map */ map = perf_cpu_map__new(NULL); @@ -210,16 +205,16 @@ struct cpu_topology *cpu_topology__new(void) tp = addr; addr += sizeof(*tp); - tp->package_cpus_list = addr; + tp->core_siblings = addr; addr += sz; if (has_die) { - tp->die_cpus_list = addr; + tp->die_siblings = addr; addr += sz; } - tp->core_cpus_list = addr; + tp->thread_siblings = addr; for (i = 0; i < nr; i++) { - if (!perf_cpu_map__has(map, (struct perf_cpu){ .cpu = i })) + if (!cpu_map__has(map, i)) continue; ret = build_cpu_topology(tp, i); @@ -325,7 +320,7 @@ struct numa_topology *numa_topology__new(void) if (!node_map) goto out; - nr = (u32) perf_cpu_map__nr(node_map); + nr = (u32) node_map->nr; tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr); if (!tp) @@ -334,7 +329,7 @@ struct numa_topology *numa_topology__new(void) tp->nr = nr; for (i = 0; i < nr; i++) { - if (load_numa_node(&tp->nodes[i], perf_cpu_map__cpu(node_map, i).cpu)) { + if (load_numa_node(&tp->nodes[i], node_map->map[i])) { numa_topology__delete(tp); tp = NULL; break; diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h index 854e18f904..d9af971770 100644 --- a/tools/perf/util/cputopo.h +++ b/tools/perf/util/cputopo.h @@ -5,33 +5,12 @@ #include struct cpu_topology { - /* The number of unique package_cpus_lists below. */ - u32 package_cpus_lists; - /* The number of unique die_cpu_lists below. */ - u32 die_cpus_lists; - /* The number of unique core_cpu_lists below. */ - u32 core_cpus_lists; - /* - * An array of strings where each string is unique and read from - * /sys/devices/system/cpu/cpuX/topology/package_cpus_list. From the ABI - * each of these is a human-readable list of CPUs sharing the same - * physical_package_id. The format is like 0-3, 8-11, 14,17. - */ - const char **package_cpus_list; - /* - * An array of string where each string is unique and from - * /sys/devices/system/cpu/cpuX/topology/die_cpus_list. From the ABI - * each of these is a human-readable list of CPUs within the same die. - * The format is like 0-3, 8-11, 14,17. - */ - const char **die_cpus_list; - /* - * An array of string where each string is unique and from - * /sys/devices/system/cpu/cpuX/topology/core_cpus_list. From the ABI - * each of these is a human-readable list of CPUs within the same - * core. The format is like 0-3, 8-11, 14,17. - */ - const char **core_cpus_list; + u32 core_sib; + u32 die_sib; + u32 thread_sib; + char **core_siblings; + char **die_siblings; + char **thread_siblings; }; struct numa_topology_node { diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 8b95fb3c4d..f323adb1af 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -50,6 +50,8 @@ struct cs_etm_auxtrace { u8 timeless_decoding; u8 snapshot_mode; u8 data_queued; + u8 sample_branches; + u8 sample_instructions; int num_cpu; u64 latest_kernel_timestamp; @@ -408,8 +410,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, { struct cs_etm_packet *tmp; - if (etm->synth_opts.branches || etm->synth_opts.last_branch || - etm->synth_opts.instructions) { + if (etm->sample_branches || etm->synth_opts.last_branch || + etm->sample_instructions) { /* * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. @@ -535,7 +537,7 @@ static void cs_etm__dump_event(struct cs_etm_queue *etmq, fprintf(stdout, "\n"); color_fprintf(stdout, color, - ". ... CoreSight %s Trace data: size %#zx bytes\n", + ". ... CoreSight %s Trace data: size %zu bytes\n", cs_etm_decoder__get_name(etmq->decoder), buffer->size); do { @@ -1363,6 +1365,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, err = cs_etm__synth_event(session, &attr, id); if (err) return err; + etm->sample_branches = true; etm->branches_sample_type = attr.sample_type; etm->branches_id = id; id += 1; @@ -1386,6 +1389,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, err = cs_etm__synth_event(session, &attr, id); if (err) return err; + etm->sample_instructions = true; etm->instructions_sample_type = attr.sample_type; etm->instructions_id = id; id += 1; @@ -1416,7 +1420,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, tidq->prev_packet->last_instr_taken_branch) cs_etm__update_last_branch_rb(etmq, tidq); - if (etm->synth_opts.instructions && + if (etm->sample_instructions && tidq->period_instructions >= etm->instructions_sample_period) { /* * Emit instruction sample periodically @@ -1499,7 +1503,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, } } - if (etm->synth_opts.branches) { + if (etm->sample_branches) { bool generate_sample = false; /* Generate sample for tracing on packet */ @@ -1553,7 +1557,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, goto swap_packet; if (etmq->etm->synth_opts.last_branch && - etmq->etm->synth_opts.instructions && tidq->prev_packet->sample_type == CS_ETM_RANGE) { u64 addr; @@ -1579,7 +1582,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, } - if (etm->synth_opts.branches && + if (etm->sample_branches && tidq->prev_packet->sample_type == CS_ETM_RANGE) { err = cs_etm__synth_branch_sample(etmq, tidq); if (err) @@ -1611,7 +1614,6 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq, * the trace. */ if (etmq->etm->synth_opts.last_branch && - etmq->etm->synth_opts.instructions && tidq->prev_packet->sample_type == CS_ETM_RANGE) { u64 addr; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 9e0aee276d..aa862a26d9 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -318,8 +318,6 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, offset = tmp_val; len = offset >> 16; offset &= 0xffff; - if (flags & TEP_FIELD_IS_RELATIVE) - offset += fmtf->offset + fmtf->size; } if (flags & TEP_FIELD_IS_ARRAY) { @@ -1439,7 +1437,7 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) goto err; -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if __BYTE_ORDER == __BIG_ENDIAN bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN); #else bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN); diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 9cc8a1772b..9ed9a5676d 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -14,7 +14,6 @@ #ifdef HAVE_LIBBPF_SUPPORT #include #include "bpf-event.h" -#include "bpf-utils.h" #endif #include "compress.h" #include "env.h" diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 011da3924f..83723ba11d 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -193,7 +193,7 @@ struct dso { int fd; int status; u32 status_seen; - u64 file_size; + size_t file_size; struct list_head open_entry; u64 debug_frame_offset; u64 eh_frame_hdr_offset; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 579e44c599..5b24eb0103 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -16,7 +16,6 @@ struct perf_env perf_env; #ifdef HAVE_LIBBPF_SUPPORT #include "bpf-event.h" -#include "bpf-utils.h" #include void perf_env__insert_bpf_prog_info(struct perf_env *env, @@ -285,13 +284,13 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) int perf_env__read_cpu_topology_map(struct perf_env *env) { - int idx, nr_cpus; + int cpu, nr_cpus; if (env->cpu != NULL) return 0; if (env->nr_cpus_avail == 0) - env->nr_cpus_avail = cpu__max_present_cpu().cpu; + env->nr_cpus_avail = cpu__max_present_cpu(); nr_cpus = env->nr_cpus_avail; if (nr_cpus == -1) @@ -301,12 +300,10 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) if (env->cpu == NULL) return -ENOMEM; - for (idx = 0; idx < nr_cpus; ++idx) { - struct perf_cpu cpu = { .cpu = idx }; - - env->cpu[idx].core_id = cpu__get_core_id(cpu); - env->cpu[idx].socket_id = cpu__get_socket_id(cpu); - env->cpu[idx].die_id = cpu__get_die_id(cpu); + for (cpu = 0; cpu < nr_cpus; ++cpu) { + env->cpu[cpu].core_id = cpu_map__get_core_id(cpu); + env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu); + env->cpu[cpu].die_id = cpu_map__get_die_id(cpu); } env->nr_cpus_avail = nr_cpus; @@ -383,7 +380,7 @@ static int perf_env__read_arch(struct perf_env *env) static int perf_env__read_nr_cpus_avail(struct perf_env *env) { if (env->nr_cpus_avail == 0) - env->nr_cpus_avail = cpu__max_present_cpu().cpu; + env->nr_cpus_avail = cpu__max_present_cpu(); return env->nr_cpus_avail ? 0 : -ENOENT; } @@ -489,7 +486,7 @@ const char *perf_env__pmu_mappings(struct perf_env *env) return env->pmu_mappings; } -int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) +int perf_env__numa_node(struct perf_env *env, int cpu) { if (!env->nr_numa_map) { struct numa_node *nn; @@ -497,7 +494,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) for (i = 0; i < env->nr_numa_nodes; i++) { nn = &env->numa_nodes[i]; - nr = max(nr, perf_cpu_map__max(nn->map).cpu); + nr = max(nr, perf_cpu_map__max(nn->map)); } nr++; @@ -516,14 +513,13 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) env->nr_numa_map = nr; for (i = 0; i < env->nr_numa_nodes; i++) { - struct perf_cpu tmp; - int j; + int tmp, j; nn = &env->numa_nodes[i]; - perf_cpu_map__for_each_cpu(tmp, j, nn->map) - env->numa_map[tmp.cpu] = i; + perf_cpu_map__for_each_cpu(j, tmp, nn->map) + env->numa_map[j] = i; } } - return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1; + return cpu >= 0 && cpu < env->nr_numa_map ? env->numa_map[cpu] : -1; } diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index a3541f98e1..163e5ec503 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -4,7 +4,6 @@ #include #include -#include "cpumap.h" #include "rwsem.h" struct perf_cpu_map; @@ -171,5 +170,5 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); -int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); +int perf_env__numa_node(struct perf_env *env, int cpu); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index fe24801f8e..ac706304af 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -57,7 +57,6 @@ static const char *perf_event__names[] = { [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", [PERF_RECORD_CGROUP] = "CGROUP", [PERF_RECORD_TEXT_POKE] = "TEXT_POKE", - [PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -238,14 +237,6 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused, return machine__process_itrace_start_event(machine, event); } -int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine) -{ - return machine__process_aux_output_hw_id_event(machine, event); -} - int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -416,12 +407,6 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) event->itrace_start.pid, event->itrace_start.tid); } -size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp) -{ - return fprintf(fp, " hw_id: %#"PRI_lx64"\n", - event->aux_output_hw_id.hw_id); -} - size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) { bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; @@ -549,9 +534,6 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL case PERF_RECORD_TEXT_POKE: ret += perf_event__fprintf_text_poke(event, machine, fp); break; - case PERF_RECORD_AUX_OUTPUT_HW_ID: - ret += perf_event__fprintf_aux_output_hw_id(event, fp); - break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c59331eea1..19ad64f2bd 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -44,16 +44,13 @@ struct perf_event_attr; /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) -/* number of register is bound by the number of bits in regs_dump::mask (64) */ -#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64)) - struct regs_dump { u64 abi; u64 mask; u64 *regs; /* Cached values/mask filled by first register access. */ - u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE]; + u64 cache_regs[PERF_REGS_MAX]; u64 cache_mask; }; @@ -333,10 +330,6 @@ int perf_event__process_itrace_start(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_aux_output_hw_id(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample, - struct machine *machine); int perf_event__process_switch(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -404,7 +397,6 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); -size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp); size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c index 57f02beef0..f39c8ffc5a 100644 --- a/tools/perf/util/evlist-hybrid.c +++ b/tools/perf/util/evlist-hybrid.c @@ -124,23 +124,22 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list) events_nr++; - if (perf_cpu_map__nr(matched_cpus) > 0 && - (perf_cpu_map__nr(unmatched_cpus) > 0 || - perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) || - perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) { + if (matched_cpus->nr > 0 && (unmatched_cpus->nr > 0 || + matched_cpus->nr < cpus->nr || + matched_cpus->nr < pmu->cpus->nr)) { perf_cpu_map__put(evsel->core.cpus); perf_cpu_map__put(evsel->core.own_cpus); evsel->core.cpus = perf_cpu_map__get(matched_cpus); evsel->core.own_cpus = perf_cpu_map__get(matched_cpus); - if (perf_cpu_map__nr(unmatched_cpus) > 0) { + if (unmatched_cpus->nr > 0) { cpu_map__snprint(matched_cpus, buf1, sizeof(buf1)); pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n", buf1, pmu->name, evsel->name); } } - if (perf_cpu_map__nr(matched_cpus) == 0) { + if (matched_cpus->nr == 0) { evlist__remove(evlist, evsel); evsel__delete(evsel); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index eaad04e167..5f92319ce2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -342,65 +342,36 @@ static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel) return perf_thread_map__nr(evlist->core.threads); } -struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity) +void evlist__cpu_iter_start(struct evlist *evlist) { - struct evlist_cpu_iterator itr = { - .container = evlist, - .evsel = evlist__first(evlist), - .cpu_map_idx = 0, - .evlist_cpu_map_idx = 0, - .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus), - .cpu = (struct perf_cpu){ .cpu = -1}, - .affinity = affinity, - }; + struct evsel *pos; - if (itr.affinity) { - itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0); - affinity__set(itr.affinity, itr.cpu.cpu); - itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu); - /* - * If this CPU isn't in the evsel's cpu map then advance through - * the list. - */ - if (itr.cpu_map_idx == -1) - evlist_cpu_iterator__next(&itr); - } - return itr; + /* + * Reset the per evsel cpu_iter. This is needed because + * each evsel's cpumap may have a different index space, + * and some operations need the index to modify + * the FD xyarray (e.g. open, close) + */ + evlist__for_each_entry(evlist, pos) + pos->cpu_iter = 0; } -void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr) +bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu) { - while (evlist_cpu_itr->evsel != evlist__last(evlist_cpu_itr->container)) { - evlist_cpu_itr->evsel = evsel__next(evlist_cpu_itr->evsel); - evlist_cpu_itr->cpu_map_idx = - perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus, - evlist_cpu_itr->cpu); - if (evlist_cpu_itr->cpu_map_idx != -1) - return; - } - evlist_cpu_itr->evlist_cpu_map_idx++; - if (evlist_cpu_itr->evlist_cpu_map_idx < evlist_cpu_itr->evlist_cpu_map_nr) { - evlist_cpu_itr->evsel = evlist__first(evlist_cpu_itr->container); - evlist_cpu_itr->cpu = - perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus, - evlist_cpu_itr->evlist_cpu_map_idx); - if (evlist_cpu_itr->affinity) - affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu.cpu); - evlist_cpu_itr->cpu_map_idx = - perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus, - evlist_cpu_itr->cpu); - /* - * If this CPU isn't in the evsel's cpu map then advance through - * the list. - */ - if (evlist_cpu_itr->cpu_map_idx == -1) - evlist_cpu_iterator__next(evlist_cpu_itr); - } + if (ev->cpu_iter >= ev->core.cpus->nr) + return true; + if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu) + return true; + return false; } -bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr) +bool evsel__cpu_iter_skip(struct evsel *ev, int cpu) { - return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr; + if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) { + ev->cpu_iter++; + return false; + } + return true; } static int evsel__strcmp(struct evsel *pos, char *evsel_name) @@ -429,36 +400,37 @@ static int evlist__is_enabled(struct evlist *evlist) static void __evlist__disable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; - struct evlist_cpu_iterator evlist_cpu_itr; - struct affinity saved_affinity, *affinity = NULL; + struct affinity affinity; + int cpu, i, imm = 0; bool has_imm = false; - // See explanation in evlist__close() - if (!cpu_map__is_dummy(evlist->core.cpus)) { - if (affinity__setup(&saved_affinity) < 0) - return; - affinity = &saved_affinity; - } + if (affinity__setup(&affinity) < 0) + return; /* Disable 'immediate' events last */ - for (int imm = 0; imm <= 1; imm++) { - evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) { - pos = evlist_cpu_itr.evsel; - if (evsel__strcmp(pos, evsel_name)) - continue; - if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) - continue; - if (pos->immediate) - has_imm = true; - if (pos->immediate != imm) - continue; - evsel__disable_cpu(pos, evlist_cpu_itr.cpu_map_idx); + for (imm = 0; imm <= 1; imm++) { + evlist__for_each_cpu(evlist, i, cpu) { + affinity__set(&affinity, cpu); + + evlist__for_each_entry(evlist, pos) { + if (evsel__strcmp(pos, evsel_name)) + continue; + if (evsel__cpu_iter_skip(pos, cpu)) + continue; + if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) + continue; + if (pos->immediate) + has_imm = true; + if (pos->immediate != imm) + continue; + evsel__disable_cpu(pos, pos->cpu_iter - 1); + } } if (!has_imm) break; } - affinity__cleanup(affinity); + affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { if (evsel__strcmp(pos, evsel_name)) continue; @@ -490,25 +462,26 @@ void evlist__disable_evsel(struct evlist *evlist, char *evsel_name) static void __evlist__enable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; - struct evlist_cpu_iterator evlist_cpu_itr; - struct affinity saved_affinity, *affinity = NULL; + struct affinity affinity; + int cpu, i; - // See explanation in evlist__close() - if (!cpu_map__is_dummy(evlist->core.cpus)) { - if (affinity__setup(&saved_affinity) < 0) - return; - affinity = &saved_affinity; - } + if (affinity__setup(&affinity) < 0) + return; - evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) { - pos = evlist_cpu_itr.evsel; - if (evsel__strcmp(pos, evsel_name)) - continue; - if (!evsel__is_group_leader(pos) || !pos->core.fd) - continue; - evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx); + evlist__for_each_cpu(evlist, i, cpu) { + affinity__set(&affinity, cpu); + + evlist__for_each_entry(evlist, pos) { + if (evsel__strcmp(pos, evsel_name)) + continue; + if (evsel__cpu_iter_skip(pos, cpu)) + continue; + if (!evsel__is_group_leader(pos) || !pos->core.fd) + continue; + evsel__enable_cpu(pos, pos->cpu_iter - 1); + } } - affinity__cleanup(affinity); + affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { if (evsel__strcmp(pos, evsel_name)) continue; @@ -827,7 +800,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx) static int perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp, - int output, struct perf_cpu cpu) + int output, int cpu) { struct mmap *map = container_of(_map, struct mmap, core); struct mmap_params *mp = container_of(_mp, struct mmap_params, core); @@ -1291,14 +1264,14 @@ void evlist__set_selected(struct evlist *evlist, struct evsel *evsel) void evlist__close(struct evlist *evlist) { struct evsel *evsel; - struct evlist_cpu_iterator evlist_cpu_itr; struct affinity affinity; + int cpu, i; /* * With perf record core.cpus is usually NULL. * Use the old method to handle this for now. */ - if (!evlist->core.cpus || cpu_map__is_dummy(evlist->core.cpus)) { + if (!evlist->core.cpus) { evlist__for_each_entry_reverse(evlist, evsel) evsel__close(evsel); return; @@ -1306,12 +1279,15 @@ void evlist__close(struct evlist *evlist) if (affinity__setup(&affinity) < 0) return; + evlist__for_each_cpu(evlist, i, cpu) { + affinity__set(&affinity, cpu); - evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) { - perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core, - evlist_cpu_itr.cpu_map_idx); + evlist__for_each_entry_reverse(evlist, evsel) { + if (evsel__cpu_iter_skip(evsel, cpu)) + continue; + perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1); + } } - affinity__cleanup(&affinity); evlist__for_each_entry_reverse(evlist, evsel) { perf_evsel__free_fd(&evsel->core); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 64cba56fbc..97bfb8d0be 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -64,7 +64,6 @@ struct evlist { struct evsel *selected; struct events_stats stats; struct perf_env *env; - const char *hybrid_pmu_name; void (*trace_event_sample_raw)(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); @@ -111,7 +110,6 @@ int __evlist__add_default_attrs(struct evlist *evlist, __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) int arch_evlist__add_default_attrs(struct evlist *evlist); -struct evsel *arch_evlist__leader(struct list_head *list); int evlist__add_dummy(struct evlist *evlist); @@ -327,53 +325,17 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel); #define evlist__for_each_entry_safe(evlist, tmp, evsel) \ __evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel) -/** Iterator state for evlist__for_each_cpu */ -struct evlist_cpu_iterator { - /** The list being iterated through. */ - struct evlist *container; - /** The current evsel of the iterator. */ - struct evsel *evsel; - /** The CPU map index corresponding to the evsel->core.cpus for the current CPU. */ - int cpu_map_idx; - /** - * The CPU map index corresponding to evlist->core.all_cpus for the - * current CPU. Distinct from cpu_map_idx as the evsel's cpu map may - * contain fewer entries. - */ - int evlist_cpu_map_idx; - /** The number of CPU map entries in evlist->core.all_cpus. */ - int evlist_cpu_map_nr; - /** The current CPU of the iterator. */ - struct perf_cpu cpu; - /** If present, used to set the affinity when switching between CPUs. */ - struct affinity *affinity; -}; - -/** - * evlist__for_each_cpu - without affinity, iterate over the evlist. With - * affinity, iterate over all CPUs and then the evlist - * for each evsel on that CPU. When switching between - * CPUs the affinity is set to the CPU to avoid IPIs - * during syscalls. - * @evlist_cpu_itr: the iterator instance. - * @evlist: evlist instance to iterate. - * @affinity: NULL or used to set the affinity to the current CPU. - */ -#define evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) \ - for ((evlist_cpu_itr) = evlist__cpu_begin(evlist, affinity); \ - !evlist_cpu_iterator__end(&evlist_cpu_itr); \ - evlist_cpu_iterator__next(&evlist_cpu_itr)) - -/** Returns an iterator set to the first CPU/evsel of evlist. */ -struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity); -/** Move to next element in iterator, updating CPU, evsel and the affinity. */ -void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr); -/** Returns true when iterator is at the end of the CPUs and evlist. */ -bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr); +#define evlist__for_each_cpu(evlist, index, cpu) \ + evlist__cpu_iter_start(evlist); \ + perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus) struct evsel *evlist__get_tracking_event(struct evlist *evlist); void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel); +void evlist__cpu_iter_start(struct evlist *evlist); +bool evsel__cpu_iter_skip(struct evsel *ev, int cpu); +bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu); + struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str); struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 22d3267ce2..c87f9974c0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -241,7 +241,7 @@ void evsel__init(struct evsel *evsel, { perf_evsel__init(&evsel->core, attr, idx); evsel->tracking = !idx; - evsel->unit = strdup(""); + evsel->unit = ""; evsel->scale = 1.0; evsel->max_events = ULONG_MAX; evsel->evlist = NULL; @@ -276,8 +276,13 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) } if (evsel__is_clock(evsel)) { - free((char *)evsel->unit); - evsel->unit = strdup("msec"); + /* + * The evsel->unit points to static alias->unit + * so it's ok to use static string in here. + */ + static const char *unit = "msec"; + + evsel->unit = unit; evsel->scale = 1e-6; } @@ -289,7 +294,7 @@ static bool perf_event_can_profile_kernel(void) return perf_event_paranoid_check(1); } -struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config) +struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) { struct perf_event_attr attr = { .type = type, @@ -300,16 +305,18 @@ struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 c event_attr_init(&attr); + if (!precise) + goto new_event; + /* * Now let the usual logic to set up the perf_event_attr defaults * to kick in when we return and before perf_evsel__open() is called. */ +new_event: evsel = evsel__new(&attr); if (evsel == NULL) goto out; - arch_evsel__fixup_new_cycles(&evsel->core.attr); - evsel->precise_max = true; /* use asprintf() because free(evsel) assumes name is allocated */ @@ -403,11 +410,6 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel->filter == NULL) goto out_err; } - if (orig->metric_id) { - evsel->metric_id = strdup(orig->metric_id); - if (evsel->metric_id == NULL) - goto out_err; - } evsel->cgrp = cgroup__get(orig->cgrp); evsel->tp_format = orig->tp_format; evsel->handler = orig->handler; @@ -415,11 +417,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->max_events = orig->max_events; evsel->tool_event = orig->tool_event; - free((char *)evsel->unit); - evsel->unit = strdup(orig->unit); - if (evsel->unit == NULL) - goto out_err; - + evsel->unit = orig->unit; evsel->scale = orig->scale; evsel->snapshot = orig->snapshot; evsel->per_pkg = orig->per_pkg; @@ -781,17 +779,6 @@ const char *evsel__name(struct evsel *evsel) return "unknown"; } -const char *evsel__metric_id(const struct evsel *evsel) -{ - if (evsel->metric_id) - return evsel->metric_id; - - if (evsel->core.attr.type == PERF_TYPE_SOFTWARE && evsel->tool_event) - return "duration_time"; - - return "unknown"; -} - const char *evsel__group_name(struct evsel *evsel) { return evsel->group_name ?: "anon group"; @@ -1060,10 +1047,6 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel) evsel__set_sample_bit(evsel, WEIGHT); } -void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused) -{ -} - static void evsel__set_default_freq_period(struct record_opts *opts, struct perf_event_attr *attr) { @@ -1381,9 +1364,9 @@ int evsel__append_addr_filter(struct evsel *evsel, const char *filter) } /* Caller has to clear disabled after going through all CPUs. */ -int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx) +int evsel__enable_cpu(struct evsel *evsel, int cpu) { - return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx); + return perf_evsel__enable_cpu(&evsel->core, cpu); } int evsel__enable(struct evsel *evsel) @@ -1396,9 +1379,9 @@ int evsel__enable(struct evsel *evsel) } /* Caller has to set disabled after going through all CPUs. */ -int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx) +int evsel__disable_cpu(struct evsel *evsel, int cpu) { - return perf_evsel__disable_cpu(&evsel->core, cpu_map_idx); + return perf_evsel__disable_cpu(&evsel->core, cpu); } int evsel__disable(struct evsel *evsel) @@ -1449,8 +1432,6 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); - zfree(&evsel->unit); - zfree(&evsel->metric_id); evsel__zero_per_pkg(evsel); hashmap__free(evsel->per_pkg_mask); evsel->per_pkg_mask = NULL; @@ -1464,7 +1445,7 @@ void evsel__delete(struct evsel *evsel) free(evsel); } -void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread, +void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -1472,12 +1453,12 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread, if (!evsel->prev_raw_counts) return; - if (cpu_map_idx == -1) { + if (cpu == -1) { tmp = evsel->prev_raw_counts->aggr; evsel->prev_raw_counts->aggr = *count; } else { - tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); - *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count; + tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread); + *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count; } count->val = count->val - tmp.val; @@ -1485,28 +1466,46 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread, count->run = count->run - tmp.run; } -static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread) +void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, s8 *pscaled) { - struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread); + s8 scaled = 0; - return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count); + if (scale) { + if (count->run == 0) { + scaled = -1; + count->val = 0; + } else if (count->run < count->ena) { + scaled = 1; + count->val = (u64)((double) count->val * count->ena / count->run); + } + } + + if (pscaled) + *pscaled = scaled; } -static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, - u64 val, u64 ena, u64 run) +static int evsel__read_one(struct evsel *evsel, int cpu, int thread) +{ + struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread); + + return perf_evsel__read(&evsel->core, cpu, thread, count); +} + +static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run) { struct perf_counts_values *count; - count = perf_counts(counter->counts, cpu_map_idx, thread); + count = perf_counts(counter->counts, cpu, thread); count->val = val; count->ena = ena; count->run = run; - perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true); + perf_counts__set_loaded(counter->counts, cpu, thread, true); } -static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data) +static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, u64 *data) { u64 read_format = leader->core.attr.read_format; struct sample_read_value *v; @@ -1525,7 +1524,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int v = (struct sample_read_value *) data; - evsel__set_count(leader, cpu_map_idx, thread, v[0].value, ena, run); + evsel__set_count(leader, cpu, thread, v[0].value, ena, run); for (i = 1; i < nr; i++) { struct evsel *counter; @@ -1534,13 +1533,13 @@ static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int if (!counter) return -EINVAL; - evsel__set_count(counter, cpu_map_idx, thread, v[i].value, ena, run); + evsel__set_count(counter, cpu, thread, v[i].value, ena, run); } return 0; } -static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) +static int evsel__read_group(struct evsel *leader, int cpu, int thread) { struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->core.attr.read_format; @@ -1561,67 +1560,67 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) ps->group_data = data; } - if (FD(leader, cpu_map_idx, thread) < 0) + if (FD(leader, cpu, thread) < 0) return -EINVAL; - if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0) + if (readn(FD(leader, cpu, thread), data, size) <= 0) return -errno; - return evsel__process_group_data(leader, cpu_map_idx, thread, data); + return evsel__process_group_data(leader, cpu, thread, data); } -int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) +int evsel__read_counter(struct evsel *evsel, int cpu, int thread) { u64 read_format = evsel->core.attr.read_format; if (read_format & PERF_FORMAT_GROUP) - return evsel__read_group(evsel, cpu_map_idx, thread); + return evsel__read_group(evsel, cpu, thread); - return evsel__read_one(evsel, cpu_map_idx, thread); + return evsel__read_one(evsel, cpu, thread); } -int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale) +int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale) { struct perf_counts_values count; size_t nv = scale ? 3 : 1; - if (FD(evsel, cpu_map_idx, thread) < 0) + if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0) + if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) return -ENOMEM; - if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0) + if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) return -errno; - evsel__compute_deltas(evsel, cpu_map_idx, thread, &count); + evsel__compute_deltas(evsel, cpu, thread, &count); perf_counts_values__scale(&count, scale, NULL); - *perf_counts(evsel->counts, cpu_map_idx, thread) = count; + *perf_counts(evsel->counts, cpu, thread) = count; return 0; } static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other, - int cpu_map_idx) + int cpu) { - struct perf_cpu cpu; + int cpuid; - cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx); - return perf_cpu_map__idx(other->core.cpus, cpu); + cpuid = perf_cpu_map__cpu(evsel->core.cpus, cpu); + return perf_cpu_map__idx(other->core.cpus, cpuid); } -static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx) +static int evsel__hybrid_group_cpu(struct evsel *evsel, int cpu) { struct evsel *leader = evsel__leader(evsel); if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) || (!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) { - return evsel__match_other_cpu(evsel, leader, cpu_map_idx); + return evsel__match_other_cpu(evsel, leader, cpu); } - return cpu_map_idx; + return cpu; } -static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread) +static int get_group_fd(struct evsel *evsel, int cpu, int thread) { struct evsel *leader = evsel__leader(evsel); int fd; @@ -1635,11 +1634,11 @@ static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread) */ BUG_ON(!leader->core.fd); - cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx); - if (cpu_map_idx == -1) + cpu = evsel__hybrid_group_cpu(evsel, cpu); + if (cpu == -1) return -1; - fd = FD(leader, cpu_map_idx, thread); + fd = FD(leader, cpu, thread); BUG_ON(fd == -1); return fd; @@ -1653,16 +1652,16 @@ static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int } static int update_fds(struct evsel *evsel, - int nr_cpus, int cpu_map_idx, + int nr_cpus, int cpu_idx, int nr_threads, int thread_idx) { struct evsel *pos; - if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads) + if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) return -EINVAL; evlist__for_each_entry(evsel->evlist, pos) { - nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx; + nr_cpus = pos != evsel ? nr_cpus : cpu_idx; evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); @@ -1676,10 +1675,10 @@ static int update_fds(struct evsel *evsel, return 0; } -static bool evsel__ignore_missing_thread(struct evsel *evsel, - int nr_cpus, int cpu_map_idx, - struct perf_thread_map *threads, - int thread, int err) +bool evsel__ignore_missing_thread(struct evsel *evsel, + int nr_cpus, int cpu, + struct perf_thread_map *threads, + int thread, int err) { pid_t ignore_pid = perf_thread_map__pid(threads, thread); @@ -1702,7 +1701,7 @@ static bool evsel__ignore_missing_thread(struct evsel *evsel, * We should remove fd for missing_thread first * because thread_map__remove() will decrease threads->nr. */ - if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread)) + if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) return false; if (thread_map__remove(threads, thread)) @@ -1791,7 +1790,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, nthreads = threads->nr; if (evsel->core.fd == NULL && - perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0) + perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0) return -ENOMEM; evsel->open_flags = PERF_FLAG_FD_CLOEXEC; @@ -1817,7 +1816,7 @@ static void evsel__disable_missing_features(struct evsel *evsel) evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC; if (perf_missing_features.mmap2) evsel->core.attr.mmap2 = 0; - if (evsel->pmu && evsel->pmu->missing_features.exclude_guest) + if (perf_missing_features.exclude_guest) evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0; if (perf_missing_features.lbr_flags) evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS | @@ -1910,27 +1909,10 @@ bool evsel__detect_missing_features(struct evsel *evsel) perf_missing_features.mmap2 = true; pr_debug2_peo("switching off mmap2\n"); return true; - } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) && - (evsel->pmu == NULL || evsel->pmu->missing_features.exclude_guest)) { - if (evsel->pmu == NULL) { - evsel->pmu = evsel__find_pmu(evsel); - if (evsel->pmu) - evsel->pmu->missing_features.exclude_guest = true; - else { - /* we cannot find PMU, disable attrs now */ - evsel->core.attr.exclude_host = false; - evsel->core.attr.exclude_guest = false; - } - } - - if (evsel->exclude_GH) { - pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n"); - return false; - } - if (!perf_missing_features.exclude_guest) { - perf_missing_features.exclude_guest = true; - pr_debug2_peo("switching off exclude_guest, exclude_host\n"); - } + } else if (!perf_missing_features.exclude_guest && + (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) { + perf_missing_features.exclude_guest = true; + pr_debug2_peo("switching off exclude_guest, exclude_host\n"); return true; } else if (!perf_missing_features.sample_id_all) { perf_missing_features.sample_id_all = true; @@ -1984,9 +1966,9 @@ bool evsel__increase_rlimit(enum rlimit_action *set_rlimit) static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads, - int start_cpu_map_idx, int end_cpu_map_idx) + int start_cpu, int end_cpu) { - int idx, thread, nthreads; + int cpu, thread, nthreads; int pid = -1, err, old_errno; enum rlimit_action set_rlimit = NO_CHANGE; @@ -2013,7 +1995,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, display_attr(&evsel->core.attr); - for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + for (cpu = start_cpu; cpu < end_cpu; cpu++) { for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; @@ -2024,18 +2006,17 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, if (!evsel->cgrp && !evsel->core.system_wide) pid = perf_thread_map__pid(threads, thread); - group_fd = get_group_fd(evsel, idx, thread); + group_fd = get_group_fd(evsel, cpu, thread); test_attr__ready(); pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", - pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); + pid, cpus->map[cpu], group_fd, evsel->open_flags); - fd = sys_perf_event_open(&evsel->core.attr, pid, - perf_cpu_map__cpu(cpus, idx).cpu, + fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[cpu], group_fd, evsel->open_flags); - FD(evsel, idx, thread) = fd; + FD(evsel, cpu, thread) = fd; if (fd < 0) { err = -errno; @@ -2045,11 +2026,10 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, goto try_fallback; } - bpf_counter__install_pe(evsel, idx, fd); + bpf_counter__install_pe(evsel, cpu, fd); if (unlikely(test_attr__enabled)) { - test_attr__open(&evsel->core.attr, pid, - perf_cpu_map__cpu(cpus, idx), + test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], fd, group_fd, evsel->open_flags); } @@ -2090,8 +2070,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus), - idx, threads, thread, err)) { + if (evsel__ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { /* We just removed 1 thread, so lower the upper nthreads limit. */ nthreads--; @@ -2106,7 +2085,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit)) goto retry_open; - if (err != -EINVAL || idx > 0 || thread > 0) + if (err != -EINVAL || cpu > 0 || thread > 0) goto out_close; if (evsel__detect_missing_features(evsel)) @@ -2118,12 +2097,12 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, old_errno = errno; do { while (--thread >= 0) { - if (FD(evsel, idx, thread) >= 0) - close(FD(evsel, idx, thread)); - FD(evsel, idx, thread) = -1; + if (FD(evsel, cpu, thread) >= 0) + close(FD(evsel, cpu, thread)); + FD(evsel, cpu, thread) = -1; } thread = nthreads; - } while (--idx >= 0); + } while (--cpu >= 0); errno = old_errno; return err; } @@ -2131,7 +2110,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus)); + return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1); } void evsel__close(struct evsel *evsel) @@ -2140,12 +2119,13 @@ void evsel__close(struct evsel *evsel) perf_evsel__free_id(&evsel->core); } -int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx) +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu) { - if (cpu_map_idx == -1) - return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus)); + if (cpu == -1) + return evsel__open_cpu(evsel, cpus, NULL, 0, + cpus ? cpus->nr : 1); - return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1); + return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1); } int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads) @@ -2250,54 +2230,6 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data, data->weight = *array; } -u64 evsel__bitfield_swap_branch_flags(u64 value) -{ - u64 new_val = 0; - - /* - * branch_flags - * union { - * u64 values; - * struct { - * mispred:1 //target mispredicted - * predicted:1 //target predicted - * in_tx:1 //in transaction - * abort:1 //transaction abort - * cycles:16 //cycle count to last branch - * type:4 //branch type - * reserved:40 - * } - * } - * - * Avoid bswap64() the entire branch_flag.value, - * as it has variable bit-field sizes. Instead the - * macro takes the bit-field position/size, - * swaps it based on the host endianness. - * - * tep_is_bigendian() is used here instead of - * bigendian() to avoid python test fails. - */ - if (tep_is_bigendian()) { - new_val = bitfield_swap(value, 0, 1); - new_val |= bitfield_swap(value, 1, 1); - new_val |= bitfield_swap(value, 2, 1); - new_val |= bitfield_swap(value, 3, 1); - new_val |= bitfield_swap(value, 4, 16); - new_val |= bitfield_swap(value, 20, 4); - new_val |= bitfield_swap(value, 24, 40); - } else { - new_val = bitfield_swap(value, 63, 1); - new_val |= bitfield_swap(value, 62, 1); - new_val |= bitfield_swap(value, 61, 1); - new_val |= bitfield_swap(value, 60, 1); - new_val |= bitfield_swap(value, 44, 16); - new_val |= bitfield_swap(value, 40, 4); - new_val |= bitfield_swap(value, 0, 40); - } - - return new_val; -} - int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -2485,8 +2417,6 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_BRANCH_STACK) { const u64 max_branch_nr = UINT64_MAX / sizeof(struct branch_entry); - struct branch_entry *e; - unsigned int i; OVERFLOW_CHECK_u64(array); data->branch_stack = (struct branch_stack *)array++; @@ -2495,33 +2425,10 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, return -EFAULT; sz = data->branch_stack->nr * sizeof(struct branch_entry); - if (evsel__has_branch_hw_idx(evsel)) { + if (evsel__has_branch_hw_idx(evsel)) sz += sizeof(u64); - e = &data->branch_stack->entries[0]; - } else { + else data->no_hw_idx = true; - /* - * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied, - * only nr and entries[] will be output by kernel. - */ - e = (struct branch_entry *)&data->branch_stack->hw_idx; - } - - if (swapped) { - /* - * struct branch_flag does not have endian - * specific bit field definition. And bswap - * will not resolve the issue, since these - * are bit fields. - * - * evsel__bitfield_swap_branch_flags() uses a - * bitfield_swap macro to swap the bit position - * based on the host endians. - */ - for (i = 0; i < data->branch_stack->nr; i++, e++) - e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value); - } - OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; } @@ -2699,8 +2606,6 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(sample->raw_data + field->offset); offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; } return sample->raw_data + offset; @@ -2945,10 +2850,6 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "wrong clockid (%d).", clockid); if (perf_missing_features.aux_output) return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel."); - if (!target__has_cpu(target)) - return scnprintf(msg, size, - "Invalid event (%s) in per-thread mode, enable system wide with '-a'.", - evsel__name(evsel)); break; case ENODATA: return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. " @@ -2972,15 +2873,15 @@ struct perf_env *evsel__env(struct evsel *evsel) static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) { - int cpu_map_idx, thread; + int cpu, thread; - for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) { + for (cpu = 0; cpu < xyarray__max_x(evsel->core.fd); cpu++) { for (thread = 0; thread < xyarray__max_y(evsel->core.fd); thread++) { - int fd = FD(evsel, cpu_map_idx, thread); + int fd = FD(evsel, cpu, thread); if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, - cpu_map_idx, thread, fd) < 0) + cpu, thread, fd) < 0) return -1; } } @@ -2993,7 +2894,7 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist) struct perf_cpu_map *cpus = evsel->core.cpus; struct perf_thread_map *threads = evsel->core.threads; - if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr)) + if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr)) return -ENOMEM; return store_evsel_ids(evsel, evlist); @@ -3036,15 +2937,3 @@ void evsel__set_leader(struct evsel *evsel, struct evsel *leader) { evsel->core.leader = &leader->core; } - -int evsel__source_count(const struct evsel *evsel) -{ - struct evsel *pos; - int count = 0; - - evlist__for_each_entry(evsel->evlist, pos) { - if (pos->metric_leader == evsel) - count++; - } - return count; -} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 041b42d33b..1f7edfa856 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -11,7 +11,6 @@ #include #include "symbol_conf.h" #include -#include struct bpf_object; struct cgroup; @@ -23,7 +22,6 @@ struct target; struct hashmap; struct bperf_leader_bpf; struct bperf_follower_bpf; -struct perf_pmu; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -70,7 +68,6 @@ struct evsel { double scale; const char *unit; struct cgroup *cgrp; - const char *metric_id; enum perf_tool_event tool_event; /* parse modifier helper */ int exclude_GH; @@ -122,6 +119,7 @@ struct evsel { bool errored; struct hashmap *per_pkg_mask; int err; + int cpu_iter; struct { evsel__sb_cb_t *cb; void *data; @@ -154,9 +152,6 @@ struct evsel { }; unsigned long open_flags; int precise_ip_original; - - /* for missing_features */ - struct perf_pmu *pmu; }; struct perf_missing_features { @@ -192,9 +187,12 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel) static inline int evsel__nr_cpus(struct evsel *evsel) { - return perf_cpu_map__nr(evsel__cpus(evsel)); + return evsel__cpus(evsel)->nr; } +void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, s8 *pscaled); + void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, struct perf_counts_values *count); @@ -263,7 +261,6 @@ bool evsel__match_bpf_counter_events(const char *name); int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *evsel__name(struct evsel *evsel); -const char *evsel__metric_id(const struct evsel *evsel); const char *evsel__group_name(struct evsel *evsel); int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); @@ -280,17 +277,16 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); void arch_evsel__set_sample_weight(struct evsel *evsel); -void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr); int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); int evsel__append_addr_filter(struct evsel *evsel, const char *filter); -int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx); +int evsel__enable_cpu(struct evsel *evsel, int cpu); int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); -int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx); +int evsel__disable_cpu(struct evsel *evsel, int cpu); -int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx); +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu); int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads); int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); @@ -302,6 +298,10 @@ bool evsel__detect_missing_features(struct evsel *evsel); enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX }; bool evsel__increase_rlimit(enum rlimit_action *set_rlimit); +bool evsel__ignore_missing_thread(struct evsel *evsel, + int nr_cpus, int cpu, + struct perf_thread_map *threads, + int thread, int err); bool evsel__precise_ip_fallback(struct evsel *evsel); struct perf_sample; @@ -330,32 +330,32 @@ static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) (e1->core.attr.config == e2->core.attr.config); } -int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread); +int evsel__read_counter(struct evsel *evsel, int cpu, int thread); -int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale); +int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale); /** * evsel__read_on_cpu - Read out the results on a CPU and thread * * @evsel - event selector to read value - * @cpu_map_idx - CPU of interest + * @cpu - CPU of interest * @thread - thread of interest */ -static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread) +static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread) { - return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, false); + return __evsel__read_on_cpu(evsel, cpu, thread, false); } /** * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled * * @evsel - event selector to read value - * @cpu_map_idx - CPU of interest + * @cpu - CPU of interest * @thread - thread of interest */ -static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx, int thread) +static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread) { - return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); + return __evsel__read_on_cpu(evsel, cpu, thread, true); } int evsel__parse_sample(struct evsel *evsel, union perf_event *event, @@ -482,18 +482,4 @@ struct evsel *evsel__leader(struct evsel *evsel); bool evsel__has_leader(struct evsel *evsel, struct evsel *leader); bool evsel__is_leader(struct evsel *evsel); void evsel__set_leader(struct evsel *evsel, struct evsel *leader); -int evsel__source_count(const struct evsel *evsel); - -/* - * Macro to swap the bit-field postition and size. - * Used when, - * - dont need to swap the entire u64 && - * - when u64 has variable bit-field sizes && - * - when presented in a host endian which is different - * than the source endian of the perf.data file - */ -#define bitfield_swap(src, pos, size) \ - ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1))) - -u64 evsel__bitfield_swap_branch_flags(u64 value); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 8c2ea80013..bfedd7b235 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -11,7 +11,6 @@ #include "strlist.h" #include "symbol.h" #include "srcline.h" -#include "dso.h" static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) { @@ -145,17 +144,12 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (print_arrow && !first) printed += fprintf(fp, " <-"); + if (print_ip) + printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); + if (map) addr = map->map_ip(map, node->ip); - if (print_ip) { - /* Show binary offset for userspace addr */ - if (map && !map->dso->kernel) - printed += fprintf(fp, "%c%16" PRIx64, s, addr); - else - printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); - } - if (print_sym) { printed += fprintf(fp, " "); node_al.addr = addr; diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 675f318ce7..a850fd0be3 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -5,18 +5,13 @@ #include #include #include "metricgroup.h" -#include "cpumap.h" -#include "cputopo.h" #include "debug.h" #include "expr.h" #include "expr-bison.h" #include "expr-flex.h" -#include "smt.h" -#include #include #include #include -#include #ifdef PARSER_DEBUG extern int expr_debug; @@ -24,15 +19,13 @@ extern int expr_debug; struct expr_id_data { union { - struct { - double val; - int source_count; - } val; + double val; struct { double val; const char *metric_name; const char *metric_expr; } ref; + struct expr_id *parent; }; enum { @@ -42,6 +35,8 @@ struct expr_id_data { EXPR_ID_DATA__REF, /* A reference but the value has been computed. */ EXPR_ID_DATA__REF_VALUE, + /* A parent is remembered for the recursion check. */ + EXPR_ID_DATA__PARENT, } kind; }; @@ -64,98 +59,8 @@ static bool key_equal(const void *key1, const void *key2, return !strcmp((const char *)key1, (const char *)key2); } -struct hashmap *ids__new(void) -{ - struct hashmap *hash; - - hash = hashmap__new(key_hash, key_equal, NULL); - if (IS_ERR(hash)) - return NULL; - return hash; -} - -void ids__free(struct hashmap *ids) -{ - struct hashmap_entry *cur; - size_t bkt; - - if (ids == NULL) - return; - - hashmap__for_each_entry(ids, cur, bkt) { - free((char *)cur->key); - free(cur->value); - } - - hashmap__free(ids); -} - -int ids__insert(struct hashmap *ids, const char *id) -{ - struct expr_id_data *data_ptr = NULL, *old_data = NULL; - char *old_key = NULL; - int ret; - - ret = hashmap__set(ids, id, data_ptr, - (const void **)&old_key, (void **)&old_data); - if (ret) - free(data_ptr); - free(old_key); - free(old_data); - return ret; -} - -struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2) -{ - size_t bkt; - struct hashmap_entry *cur; - int ret; - struct expr_id_data *old_data = NULL; - char *old_key = NULL; - - if (!ids1) - return ids2; - - if (!ids2) - return ids1; - - if (hashmap__size(ids1) < hashmap__size(ids2)) { - struct hashmap *tmp = ids1; - - ids1 = ids2; - ids2 = tmp; - } - hashmap__for_each_entry(ids2, cur, bkt) { - ret = hashmap__set(ids1, cur->key, cur->value, - (const void **)&old_key, (void **)&old_data); - free(old_key); - free(old_data); - - if (ret) { - hashmap__free(ids1); - hashmap__free(ids2); - return NULL; - } - } - hashmap__free(ids2); - return ids1; -} - /* Caller must make sure id is allocated */ int expr__add_id(struct expr_parse_ctx *ctx, const char *id) -{ - return ids__insert(ctx->ids, id); -} - -/* Caller must make sure id is allocated */ -int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) -{ - return expr__add_id_val_source_count(ctx, id, val, /*source_count=*/1); -} - -/* Caller must make sure id is allocated */ -int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id, - double val, int source_count) { struct expr_id_data *data_ptr = NULL, *old_data = NULL; char *old_key = NULL; @@ -164,11 +69,33 @@ int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id, data_ptr = malloc(sizeof(*data_ptr)); if (!data_ptr) return -ENOMEM; - data_ptr->val.val = val; - data_ptr->val.source_count = source_count; + + data_ptr->parent = ctx->parent; + data_ptr->kind = EXPR_ID_DATA__PARENT; + + ret = hashmap__set(&ctx->ids, id, data_ptr, + (const void **)&old_key, (void **)&old_data); + if (ret) + free(data_ptr); + free(old_key); + free(old_data); + return ret; +} + +/* Caller must make sure id is allocated */ +int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) +{ + struct expr_id_data *data_ptr = NULL, *old_data = NULL; + char *old_key = NULL; + int ret; + + data_ptr = malloc(sizeof(*data_ptr)); + if (!data_ptr) + return -ENOMEM; + data_ptr->val = val; data_ptr->kind = EXPR_ID_DATA__VALUE; - ret = hashmap__set(ctx->ids, id, data_ptr, + ret = hashmap__set(&ctx->ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -213,7 +140,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) data_ptr->ref.metric_expr = ref->metric_expr; data_ptr->kind = EXPR_ID_DATA__REF; - ret = hashmap__set(ctx->ids, name, data_ptr, + ret = hashmap__set(&ctx->ids, name, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -229,24 +156,9 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data) { - return hashmap__find(ctx->ids, id, (void **)data) ? 0 : -1; + return hashmap__find(&ctx->ids, id, (void **)data) ? 0 : -1; } -bool expr__subset_of_ids(struct expr_parse_ctx *haystack, - struct expr_parse_ctx *needles) -{ - struct hashmap_entry *cur; - size_t bkt; - struct expr_id_data *data; - - hashmap__for_each_entry(needles->ids, cur, bkt) { - if (expr__get_id(haystack, cur->key, &data)) - return false; - } - return true; -} - - int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **datap) { @@ -261,18 +173,21 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, switch (data->kind) { case EXPR_ID_DATA__VALUE: - pr_debug2("lookup(%s): val %f\n", id, data->val.val); + pr_debug2("lookup(%s): val %f\n", id, data->val); + break; + case EXPR_ID_DATA__PARENT: + pr_debug2("lookup(%s): parent %s\n", id, data->parent->id); break; case EXPR_ID_DATA__REF: pr_debug2("lookup(%s): ref metric name %s\n", id, data->ref.metric_name); pr_debug("processing metric: %s ENTRY\n", id); data->kind = EXPR_ID_DATA__REF_VALUE; - if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr)) { + if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr, 1)) { pr_debug("%s failed to count\n", id); return -1; } - pr_debug("processing metric: %s EXIT: %f\n", id, data->ref.val); + pr_debug("processing metric: %s EXIT: %f\n", id, data->val); break; case EXPR_ID_DATA__REF_VALUE: pr_debug2("lookup(%s): ref val %f metric name %s\n", id, @@ -290,28 +205,15 @@ void expr__del_id(struct expr_parse_ctx *ctx, const char *id) struct expr_id_data *old_val = NULL; char *old_key = NULL; - hashmap__delete(ctx->ids, id, + hashmap__delete(&ctx->ids, id, (const void **)&old_key, (void **)&old_val); free(old_key); free(old_val); } -struct expr_parse_ctx *expr__ctx_new(void) +void expr__ctx_init(struct expr_parse_ctx *ctx) { - struct expr_parse_ctx *ctx; - - ctx = malloc(sizeof(struct expr_parse_ctx)); - if (!ctx) - return NULL; - - ctx->ids = hashmap__new(key_hash, key_equal, NULL); - if (IS_ERR(ctx->ids)) { - free(ctx); - return NULL; - } - ctx->runtime = 0; - - return ctx; + hashmap__init(&ctx->ids, key_hash, key_equal, NULL); } void expr__ctx_clear(struct expr_parse_ctx *ctx) @@ -319,32 +221,20 @@ void expr__ctx_clear(struct expr_parse_ctx *ctx) struct hashmap_entry *cur; size_t bkt; - hashmap__for_each_entry(ctx->ids, cur, bkt) { + hashmap__for_each_entry((&ctx->ids), cur, bkt) { free((char *)cur->key); free(cur->value); } - hashmap__clear(ctx->ids); -} - -void expr__ctx_free(struct expr_parse_ctx *ctx) -{ - struct hashmap_entry *cur; - size_t bkt; - - hashmap__for_each_entry(ctx->ids, cur, bkt) { - free((char *)cur->key); - free(cur->value); - } - hashmap__free(ctx->ids); - free(ctx); + hashmap__clear(&ctx->ids); } static int __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, - bool compute_ids) + int start, int runtime) { struct expr_scanner_ctx scanner_ctx = { - .runtime = ctx->runtime, + .start_token = start, + .runtime = runtime, }; YY_BUFFER_STATE buffer; void *scanner; @@ -363,7 +253,7 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, expr_set_debug(1, scanner); #endif - ret = expr_parse(val, ctx, compute_ids, scanner); + ret = expr_parse(val, ctx, scanner); expr__flush_buffer(buffer, scanner); expr__delete_buffer(buffer, scanner); @@ -372,15 +262,15 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, } int expr__parse(double *final_val, struct expr_parse_ctx *ctx, - const char *expr) + const char *expr, int runtime) { - return __expr__parse(final_val, ctx, expr, /*compute_ids=*/false) ? -1 : 0; + return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0; } -int expr__find_ids(const char *expr, const char *one, - struct expr_parse_ctx *ctx) +int expr__find_other(const char *expr, const char *one, + struct expr_parse_ctx *ctx, int runtime) { - int ret = __expr__parse(NULL, ctx, expr, /*compute_ids=*/true); + int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime); if (one) expr__del_id(ctx, one); @@ -391,60 +281,13 @@ int expr__find_ids(const char *expr, const char *one, double expr_id_data__value(const struct expr_id_data *data) { if (data->kind == EXPR_ID_DATA__VALUE) - return data->val.val; + return data->val; assert(data->kind == EXPR_ID_DATA__REF_VALUE); return data->ref.val; } -double expr_id_data__source_count(const struct expr_id_data *data) +struct expr_id *expr_id_data__parent(struct expr_id_data *data) { - assert(data->kind == EXPR_ID_DATA__VALUE); - return data->val.source_count; -} - -double expr__get_literal(const char *literal) -{ - static struct cpu_topology *topology; - double result = NAN; - - if (!strcasecmp("#smt_on", literal)) { - result = smt_on() > 0 ? 1.0 : 0.0; - goto out; - } - - if (!strcmp("#num_cpus", literal)) { - result = cpu__max_present_cpu().cpu; - goto out; - } - - /* - * Assume that topology strings are consistent, such as CPUs "0-1" - * wouldn't be listed as "0,1", and so after deduplication the number of - * these strings gives an indication of the number of packages, dies, - * etc. - */ - if (!topology) { - topology = cpu_topology__new(); - if (!topology) { - pr_err("Error creating CPU topology"); - goto out; - } - } - if (!strcmp("#num_packages", literal)) { - result = topology->package_cpus_lists; - goto out; - } - if (!strcmp("#num_dies", literal)) { - result = topology->die_cpus_lists; - goto out; - } - if (!strcmp("#num_cores", literal)) { - result = topology->core_cpus_lists; - goto out; - } - - pr_err("Unrecognized literal '%s'", literal); -out: - pr_debug2("literal: %s = %f\n", literal, result); - return result; + assert(data->kind == EXPR_ID_DATA__PARENT); + return data->parent; } diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index bd2116983b..85df3e4771 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -13,51 +13,39 @@ struct metric_ref; +struct expr_id { + char *id; + struct expr_id *parent; +}; + struct expr_parse_ctx { - struct hashmap *ids; - int runtime; + struct hashmap ids; + struct expr_id *parent; }; struct expr_id_data; struct expr_scanner_ctx { + int start_token; int runtime; }; -struct hashmap *ids__new(void); -void ids__free(struct hashmap *ids); -int ids__insert(struct hashmap *ids, const char *id); -/* - * Union two sets of ids (hashmaps) and construct a third, freeing ids1 and - * ids2. - */ -struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2); - -struct expr_parse_ctx *expr__ctx_new(void); +void expr__ctx_init(struct expr_parse_ctx *ctx); void expr__ctx_clear(struct expr_parse_ctx *ctx); -void expr__ctx_free(struct expr_parse_ctx *ctx); - void expr__del_id(struct expr_parse_ctx *ctx, const char *id); int expr__add_id(struct expr_parse_ctx *ctx, const char *id); int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val); -int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id, - double val, int source_count); int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref); int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data); -bool expr__subset_of_ids(struct expr_parse_ctx *haystack, - struct expr_parse_ctx *needles); int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **datap); - int expr__parse(double *final_val, struct expr_parse_ctx *ctx, - const char *expr); - -int expr__find_ids(const char *expr, const char *one, - struct expr_parse_ctx *ids); + const char *expr, int runtime); +int expr__find_other(const char *expr, const char *one, + struct expr_parse_ctx *ids, int runtime); double expr_id_data__value(const struct expr_id_data *data); -double expr_id_data__source_count(const struct expr_id_data *data); -double expr__get_literal(const char *literal); +struct expr_id *expr_id_data__parent(struct expr_id_data *data); #endif diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 0a13eb20c8..13e5e3c75f 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -6,7 +6,6 @@ #include #include "expr.h" #include "expr-bison.h" -#include char *expr_get_text(yyscan_t yyscanner); YYSTYPE *expr_get_lval(yyscan_t yyscanner); @@ -42,9 +41,11 @@ static char *normalize(char *str, int runtime) char *dst = str; while (*str) { - if (*str == '\\') + if (*str == '@') + *dst++ = '/'; + else if (*str == '\\') *dst++ = *++str; - else if (*str == '?') { + else if (*str == '?') { char *paramval; int i = 0; int size = asprintf(¶mval, "%d", runtime); @@ -78,17 +79,6 @@ static int str(yyscan_t scanner, int token, int runtime) yylval->str = normalize(yylval->str, runtime); return token; } - -static int literal(yyscan_t scanner) -{ - YYSTYPE *yylval = expr_get_lval(scanner); - - yylval->num = expr__get_literal(expr_get_text(scanner)); - if (isnan(yylval->num)) - return EXPR_ERROR; - - return LITERAL; -} %} number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+) @@ -97,18 +87,25 @@ sch [-,=] spec \\{sch} sym [0-9a-zA-Z_\.:@?]+ symbol ({spec}|{sym})+ -literal #[0-9a-zA-Z_\.\-]+ %% struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner); + { + int start_token = sctx->start_token; + + if (sctx->start_token) { + sctx->start_token = 0; + return start_token; + } + } + d_ratio { return D_RATIO; } max { return MAX; } min { return MIN; } if { return IF; } else { return ELSE; } -source_count { return SOURCE_COUNT; } -{literal} { return literal(yyscanner); } +#smt_on { return SMT_ON; } {number} { return value(yyscanner); } {symbol} { return str(yyscanner, ID, sctx->runtime); } "|" { return '|'; } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index a30b825adb..b2ada8f830 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -1,43 +1,42 @@ /* Simple expression parser */ %{ #define YYDEBUG 1 -#include -#include -#include +#include +#include "util.h" #include "util/debug.h" +#include // strtod() #define IN_EXPR_Y 1 #include "expr.h" +#include "smt.h" +#include + +static double d_ratio(double val0, double val1) +{ + if (val1 == 0) { + return 0; + } + return val0 / val1; +} + %} %define api.pure full %parse-param { double *final_val } %parse-param { struct expr_parse_ctx *ctx } -%parse-param { bool compute_ids } %parse-param {void *scanner} %lex-param {void* scanner} %union { double num; char *str; - struct ids { - /* - * When creating ids, holds the working set of event ids. NULL - * implies the set is empty. - */ - struct hashmap *ids; - /* - * The metric value. When not creating ids this is the value - * read from a counter, a constant or some computed value. When - * creating ids the value is either a constant or BOTTOM. NAN is - * used as the special BOTTOM value, representing a "set of all - * values" case. - */ - double val; - } ids; } -%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT EXPR_ERROR +%token EXPR_PARSE EXPR_OTHER EXPR_ERROR +%token NUMBER +%token ID +%destructor { free ($$); } +%token MIN MAX IF ELSE SMT_ON D_RATIO %left MIN MAX IF %left '|' %left '^' @@ -46,256 +45,83 @@ %left '-' '+' %left '*' '/' '%' %left NEG NOT -%type NUMBER LITERAL -%type ID -%destructor { free ($$); } -%type expr if_expr -%destructor { ids__free($$.ids); } +%type expr if_expr %{ static void expr_error(double *final_val __maybe_unused, struct expr_parse_ctx *ctx __maybe_unused, - bool compute_ids __maybe_unused, void *scanner, const char *s) { pr_debug("%s\n", s); } -/* - * During compute ids, the special "bottom" value uses NAN to represent the set - * of all values. NAN is selected as it isn't a useful constant value. - */ -#define BOTTOM NAN - -/* During computing ids, does val represent a constant (non-BOTTOM) value? */ -static bool is_const(double val) -{ - return isfinite(val); -} - -static struct ids union_expr(struct ids ids1, struct ids ids2) -{ - struct ids result = { - .val = BOTTOM, - .ids = ids__union(ids1.ids, ids2.ids), - }; - return result; -} - -static struct ids handle_id(struct expr_parse_ctx *ctx, char *id, - bool compute_ids, bool source_count) -{ - struct ids result; - - if (!compute_ids) { - /* - * Compute the event's value from ID. If the ID isn't known then - * it isn't used to compute the formula so set to NAN. - */ - struct expr_id_data *data; - - result.val = NAN; - if (expr__resolve_id(ctx, id, &data) == 0) { - result.val = source_count - ? expr_id_data__source_count(data) - : expr_id_data__value(data); - } - result.ids = NULL; - free(id); - } else { - /* - * Set the value to BOTTOM to show that any value is possible - * when the event is computed. Create a set of just the ID. - */ - result.val = BOTTOM; - result.ids = ids__new(); - if (!result.ids || ids__insert(result.ids, id)) { - pr_err("Error creating IDs for '%s'", id); - free(id); - } - } - return result; -} - -/* - * If we're not computing ids or $1 and $3 are constants, compute the new - * constant value using OP. Its invariant that there are no ids. If computing - * ids for non-constants union the set of IDs that must be computed. - */ -#define BINARY_LONG_OP(RESULT, OP, LHS, RHS) \ - if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \ - assert(LHS.ids == NULL); \ - assert(RHS.ids == NULL); \ - RESULT.val = (long)LHS.val OP (long)RHS.val; \ - RESULT.ids = NULL; \ - } else { \ - RESULT = union_expr(LHS, RHS); \ - } - -#define BINARY_OP(RESULT, OP, LHS, RHS) \ - if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \ - assert(LHS.ids == NULL); \ - assert(RHS.ids == NULL); \ - RESULT.val = LHS.val OP RHS.val; \ - RESULT.ids = NULL; \ - } else { \ - RESULT = union_expr(LHS, RHS); \ - } - %} %% -start: if_expr -{ - if (compute_ids) - ctx->ids = ids__union($1.ids, ctx->ids); +start: +EXPR_PARSE all_expr +| +EXPR_OTHER all_other - if (final_val) - *final_val = $1.val; -} -; +all_other: all_other other +| -if_expr: expr IF expr ELSE expr +other: ID { - if (fpclassify($3.val) == FP_ZERO) { - /* - * The IF expression evaluated to 0 so treat as false, take the - * ELSE and discard everything else. - */ - $$.val = $5.val; - $$.ids = $5.ids; - ids__free($1.ids); - ids__free($3.ids); - } else if (!compute_ids || is_const($3.val)) { - /* - * If ids aren't computed then treat the expression as true. If - * ids are being computed and the IF expr is a non-zero - * constant, then also evaluate the true case. - */ - $$.val = $1.val; - $$.ids = $1.ids; - ids__free($3.ids); - ids__free($5.ids); - } else if ($1.val == $5.val) { - /* - * LHS == RHS, so both are an identical constant. No need to - * evaluate any events. - */ - $$.val = $1.val; - $$.ids = NULL; - ids__free($1.ids); - ids__free($3.ids); - ids__free($5.ids); - } else { - /* - * Value is either the LHS or RHS and we need the IF expression - * to compute it. - */ - $$ = union_expr($1, union_expr($3, $5)); - } + expr__add_id(ctx, $1); } -| expr -; +| +MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ',' +| +'<' | '>' | D_RATIO -expr: NUMBER -{ - $$.val = $1; - $$.ids = NULL; -} -| ID { $$ = handle_id(ctx, $1, compute_ids, /*source_count=*/false); } -| SOURCE_COUNT '(' ID ')' { $$ = handle_id(ctx, $3, compute_ids, /*source_count=*/true); } -| expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); } -| expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); } -| expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); } -| expr '<' expr { BINARY_OP($$, <, $1, $3); } -| expr '>' expr { BINARY_OP($$, >, $1, $3); } -| expr '+' expr { BINARY_OP($$, +, $1, $3); } -| expr '-' expr { BINARY_OP($$, -, $1, $3); } -| expr '*' expr { BINARY_OP($$, *, $1, $3); } -| expr '/' expr -{ - if (fpclassify($3.val) == FP_ZERO) { - pr_debug("division by zero\n"); - YYABORT; - } else if (!compute_ids || (is_const($1.val) && is_const($3.val))) { - assert($1.ids == NULL); - assert($3.ids == NULL); - $$.val = $1.val / $3.val; - $$.ids = NULL; - } else { - /* LHS and/or RHS need computing from event IDs so union. */ - $$ = union_expr($1, $3); - } -} -| expr '%' expr -{ - if (fpclassify($3.val) == FP_ZERO) { - pr_debug("division by zero\n"); - YYABORT; - } else if (!compute_ids || (is_const($1.val) && is_const($3.val))) { - assert($1.ids == NULL); - assert($3.ids == NULL); - $$.val = (long)$1.val % (long)$3.val; - $$.ids = NULL; - } else { - /* LHS and/or RHS need computing from event IDs so union. */ - $$ = union_expr($1, $3); - } -} -| D_RATIO '(' expr ',' expr ')' -{ - if (fpclassify($5.val) == FP_ZERO) { - /* - * Division by constant zero always yields zero and no events - * are necessary. - */ - assert($5.ids == NULL); - $$.val = 0.0; - $$.ids = NULL; - ids__free($3.ids); - } else if (!compute_ids || (is_const($3.val) && is_const($5.val))) { - assert($3.ids == NULL); - assert($5.ids == NULL); - $$.val = $3.val / $5.val; - $$.ids = NULL; - } else { - /* LHS and/or RHS need computing from event IDs so union. */ - $$ = union_expr($3, $5); - } -} -| '-' expr %prec NEG -{ - $$.val = -$2.val; - $$.ids = $2.ids; -} -| '(' if_expr ')' -{ - $$ = $2; -} -| MIN '(' expr ',' expr ')' -{ - if (!compute_ids) { - $$.val = $3.val < $5.val ? $3.val : $5.val; - $$.ids = NULL; - } else { - $$ = union_expr($3, $5); - } -} -| MAX '(' expr ',' expr ')' -{ - if (!compute_ids) { - $$.val = $3.val > $5.val ? $3.val : $5.val; - $$.ids = NULL; - } else { - $$ = union_expr($3, $5); - } -} -| LITERAL -{ - $$.val = $1; - $$.ids = NULL; -} -; +all_expr: if_expr { *final_val = $1; } + ; + +if_expr: + expr IF expr ELSE expr { $$ = $3 ? $1 : $5; } + | expr + ; + +expr: NUMBER + | ID { + struct expr_id_data *data; + + if (expr__resolve_id(ctx, $1, &data)) { + free($1); + YYABORT; + } + + $$ = expr_id_data__value(data); + free($1); + } + | expr '|' expr { $$ = (long)$1 | (long)$3; } + | expr '&' expr { $$ = (long)$1 & (long)$3; } + | expr '^' expr { $$ = (long)$1 ^ (long)$3; } + | expr '<' expr { $$ = $1 < $3; } + | expr '>' expr { $$ = $1 > $3; } + | expr '+' expr { $$ = $1 + $3; } + | expr '-' expr { $$ = $1 - $3; } + | expr '*' expr { $$ = $1 * $3; } + | expr '/' expr { if ($3 == 0) { + pr_debug("division by zero\n"); + YYABORT; + } + $$ = $1 / $3; + } + | expr '%' expr { if ((long)$3 == 0) { + pr_debug("division by zero\n"); + YYABORT; + } + $$ = (long)$1 % (long)$3; + } + | '-' expr %prec NEG { $$ = -$2; } + | '(' if_expr ')' { $$ = $2; } + | MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; } + | MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; } + | SMT_ON { $$ = smt_on() > 0; } + | D_RATIO '(' expr ',' expr ')' { $$ = d_ratio($3,$5); } + ; %% diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 3db3293213..d4137559be 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -42,7 +42,7 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #error "unsupported architecture" #endif -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if __BYTE_ORDER == __BIG_ENDIAN #define GEN_ELF_ENDIAN ELFDATA2MSB #else #define GEN_ELF_ENDIAN ELFDATA2LSB diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 6da12e522e..1c7414f666 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -48,7 +48,6 @@ #include "util/util.h" // perf_exe() #include "cputopo.h" #include "bpf-event.h" -#include "bpf-utils.h" #include "clockid.h" #include "pmu-hybrid.h" @@ -472,7 +471,7 @@ static int write_nrcpus(struct feat_fd *ff, u32 nrc, nra; int ret; - nrc = cpu__max_present_cpu().cpu; + nrc = cpu__max_present_cpu(); nr = sysconf(_SC_NPROCESSORS_ONLN); if (nr < 0) @@ -583,21 +582,21 @@ static int write_cpu_topology(struct feat_fd *ff, if (!tp) return -1; - ret = do_write(ff, &tp->package_cpus_lists, sizeof(tp->package_cpus_lists)); + ret = do_write(ff, &tp->core_sib, sizeof(tp->core_sib)); if (ret < 0) goto done; - for (i = 0; i < tp->package_cpus_lists; i++) { - ret = do_write_string(ff, tp->package_cpus_list[i]); + for (i = 0; i < tp->core_sib; i++) { + ret = do_write_string(ff, tp->core_siblings[i]); if (ret < 0) goto done; } - ret = do_write(ff, &tp->core_cpus_lists, sizeof(tp->core_cpus_lists)); + ret = do_write(ff, &tp->thread_sib, sizeof(tp->thread_sib)); if (ret < 0) goto done; - for (i = 0; i < tp->core_cpus_lists; i++) { - ret = do_write_string(ff, tp->core_cpus_list[i]); + for (i = 0; i < tp->thread_sib; i++) { + ret = do_write_string(ff, tp->thread_siblings[i]); if (ret < 0) break; } @@ -617,15 +616,15 @@ static int write_cpu_topology(struct feat_fd *ff, return ret; } - if (!tp->die_cpus_lists) + if (!tp->die_sib) goto done; - ret = do_write(ff, &tp->die_cpus_lists, sizeof(tp->die_cpus_lists)); + ret = do_write(ff, &tp->die_sib, sizeof(tp->die_sib)); if (ret < 0) goto done; - for (i = 0; i < tp->die_cpus_lists; i++) { - ret = do_write_string(ff, tp->die_cpus_list[i]); + for (i = 0; i < tp->die_sib; i++) { + ret = do_write_string(ff, tp->die_siblings[i]); if (ret < 0) goto done; } @@ -1007,17 +1006,17 @@ static int write_bpf_prog_info(struct feat_fd *ff, node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); - len = sizeof(struct perf_bpil) + + len = sizeof(struct bpf_prog_info_linear) + node->info_linear->data_len; /* before writing to file, translate address to offset */ - bpil_addr_to_offs(node->info_linear); + bpf_program__bpil_addr_to_offs(node->info_linear); ret = do_write(ff, node->info_linear, len); /* * translate back to address even when do_write() fails, * so that this function never changes the data. */ - bpil_offs_to_addr(node->info_linear); + bpf_program__bpil_offs_to_addr(node->info_linear); if (ret < 0) goto out; } @@ -1163,7 +1162,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp) u32 nr, cpu; u16 level; - nr = cpu__max_cpu().cpu; + nr = cpu__max_cpu(); for (cpu = 0; cpu < nr; cpu++) { for (level = 0; level < MAX_CACHE_LVL; level++) { @@ -1195,7 +1194,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp) static int write_cache(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { - u32 max_caches = cpu__max_cpu().cpu * MAX_CACHE_LVL; + u32 max_caches = cpu__max_cpu() * MAX_CACHE_LVL; struct cpu_cache_level caches[max_caches]; u32 cnt = 0, i, version = 1; int ret; @@ -2321,7 +2320,6 @@ static int perf_header__read_build_ids(struct perf_header *header, #define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \ static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \ {\ - free(ff->ph->env.__feat_env); \ ff->ph->env.__feat_env = do_read_string(ff); \ return ff->ph->env.__feat_env ? 0 : -ENOMEM; \ } @@ -3020,9 +3018,9 @@ static int process_dir_format(struct feat_fd *ff, #ifdef HAVE_LIBBPF_SUPPORT static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) { + struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; struct perf_env *env = &ff->ph->env; - struct perf_bpil *info_linear; u32 count, i; int err = -1; @@ -3051,7 +3049,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) goto out; } - info_linear = malloc(sizeof(struct perf_bpil) + + info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len); if (!info_linear) goto out; @@ -3073,7 +3071,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) goto out; /* after reading from file, translate offset to address */ - bpil_offs_to_addr(info_linear); + bpf_program__bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; perf_env__insert_bpf_prog_info(env, info_node); } @@ -4125,7 +4123,6 @@ int perf_event__process_feature(struct perf_session *session, struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event; int type = fe->header.type; u64 feat = fe->feat_id; - int ret = 0; if (type < 0 || type >= PERF_RECORD_HEADER_MAX) { pr_warning("invalid record type %d in pipe-mode\n", type); @@ -4143,13 +4140,11 @@ int perf_event__process_feature(struct perf_session *session, ff.size = event->header.size - sizeof(*fe); ff.ph = &session->header; - if (feat_ops[feat].process(&ff, NULL)) { - ret = -1; - goto out; - } + if (feat_ops[feat].process(&ff, NULL)) + return -1; if (!feat_ops[feat].print || !tool->show_feat_hdr) - goto out; + return 0; if (!feat_ops[feat].full_only || tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { @@ -4158,9 +4153,8 @@ int perf_event__process_feature(struct perf_session *session, fprintf(stdout, "# %s info available, use -I to display\n", feat_ops[feat].name); } -out: - free_event_desc(ff.events); - return ret; + + return 0; } size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) @@ -4262,11 +4256,9 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, switch (ev->type) { case PERF_EVENT_UPDATE__UNIT: - free((char *)evsel->unit); evsel->unit = strdup(ev->data); break; case PERF_EVENT_UPDATE__NAME: - free(evsel->name); evsel->name = strdup(ev->data); break; case PERF_EVENT_UPDATE__SCALE: @@ -4275,11 +4267,11 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, break; case PERF_EVENT_UPDATE__CPUS: ev_cpus = (struct perf_record_event_update_cpus *)ev->data; + map = cpu_map__new_data(&ev_cpus->cpus); - if (map) { - perf_cpu_map__put(evsel->core.own_cpus); + if (map) evsel->core.own_cpus = map; - } else + else pr_err("failed to get event_update cpus\n"); default: break; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0a8033b09e..b776465e04 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -211,9 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); - hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); - hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); - + hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); else diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2a15e22fb8..621f35ae1e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,8 +75,7 @@ enum hist_column { HISTC_MEM_BLOCKED, HISTC_LOCAL_INS_LAT, HISTC_GLOBAL_INS_LAT, - HISTC_LOCAL_P_STAGE_CYC, - HISTC_GLOBAL_P_STAGE_CYC, + HISTC_P_STAGE_CYC, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 2c8147a622..af1e78d762 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -35,7 +35,7 @@ #define INTEL_BTS_ERR_NOINSN 5 #define INTEL_BTS_ERR_LOST 9 -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if __BYTE_ORDER == __BIG_ENDIAN #define le64_to_cpu bswap_64 #else #define le64_to_cpu diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 0e013c2d9e..b0034ee4bb 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -608,7 +608,6 @@ static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) { decoder->sample_timestamp = decoder->timestamp; decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; - decoder->state.cycles = decoder->tot_cyc_cnt; } static void intel_pt_reposition(struct intel_pt_decoder *decoder) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 8fd68f7a09..4b5e79fcf5 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -218,7 +218,6 @@ struct intel_pt_state { uint64_t to_ip; uint64_t tot_insn_cnt; uint64_t tot_cyc_cnt; - uint64_t cycles; uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 9d5e65cec8..593f20e977 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -143,7 +143,7 @@ static void intel_pt_insn_decoder(struct insn *insn, if (branch == INTEL_PT_BR_CONDITIONAL || branch == INTEL_PT_BR_UNCONDITIONAL) { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if __BYTE_ORDER == __BIG_ENDIAN switch (insn->immediate.nbytes) { case 1: intel_pt_insn->rel = insn->immediate.value; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index 5f5dfc8753..09feb5b07d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -82,10 +82,10 @@ static int intel_pt_log_open(void) if (f) return 0; - if (log_name[0]) - f = fopen(log_name, "w+"); - else - f = stdout; + if (!log_name[0]) + return -1; + + f = fopen(log_name, "w+"); if (!f) { intel_pt_enable_logging = false; return -1; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 4bd154848c..02a3395d6c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -16,7 +16,7 @@ #define BIT63 ((uint64_t)1 << 63) -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 #define le64_to_cpu bswap_64 diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index e8613cbda3..c3ceac1388 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -111,7 +111,6 @@ struct intel_pt { u64 cbr_id; u64 psb_id; - bool single_pebs; bool sample_pebs; struct evsel *pebs_evsel; @@ -149,14 +148,6 @@ enum switch_state { INTEL_PT_SS_EXPECTING_SWITCH_IP, }; -/* applicable_counters is 64-bits */ -#define INTEL_PT_MAX_PEBS 64 - -struct intel_pt_pebs_event { - struct evsel *evsel; - u64 id; -}; - struct intel_pt_queue { struct intel_pt *pt; unsigned int queue_nr; @@ -172,7 +163,6 @@ struct intel_pt_queue { bool step_through_buffers; bool use_buffer_pid_tid; bool sync_switch; - bool sample_ipc; pid_t pid, tid; int cpu; int switch_state; @@ -199,7 +189,6 @@ struct intel_pt_queue { u64 last_br_cyc_cnt; unsigned int cbr_seen; char insn[INTEL_PT_INSN_BUF_SZ]; - struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS]; }; static void intel_pt_dump(struct intel_pt *pt __maybe_unused, @@ -1582,7 +1571,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } - if (ptq->sample_ipc) + if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; @@ -1633,7 +1622,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) else sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; - if (ptq->sample_ipc) + if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; @@ -1989,13 +1978,15 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) +static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) { const struct intel_pt_blk_items *items = &ptq->state->items; struct perf_sample sample = { .ip = 0, }; union perf_event *event = ptq->event_buf; struct intel_pt *pt = ptq->pt; + struct evsel *evsel = pt->pebs_evsel; u64 sample_type = evsel->core.attr.sample_type; + u64 id = evsel->core.id[0]; u8 cpumode; u64 regs[8 * sizeof(sample.intr_regs.mask)]; @@ -2121,45 +2112,6 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); } -static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) -{ - struct intel_pt *pt = ptq->pt; - struct evsel *evsel = pt->pebs_evsel; - u64 id = evsel->core.id[0]; - - return intel_pt_do_synth_pebs_sample(ptq, evsel, id); -} - -static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) -{ - const struct intel_pt_blk_items *items = &ptq->state->items; - struct intel_pt_pebs_event *pe; - struct intel_pt *pt = ptq->pt; - int err = -EINVAL; - int hw_id; - - if (!items->has_applicable_counters || !items->applicable_counters) { - if (!pt->single_pebs) - pr_err("PEBS-via-PT record with no applicable_counters\n"); - return intel_pt_synth_single_pebs_sample(ptq); - } - - for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) { - pe = &ptq->pebs[hw_id]; - if (!pe->evsel) { - if (!pt->single_pebs) - pr_err("PEBS-via-PT record with no matching event, hw_id %d\n", - hw_id); - return intel_pt_synth_single_pebs_sample(ptq); - } - err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); - if (err) - return err; - } - - return err; -} - static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, pid_t pid, pid_t tid, u64 ip, u64 timestamp) { @@ -2246,15 +2198,8 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - if (pt->synth_opts.approx_ipc) { - ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; - ptq->ipc_cyc_cnt = ptq->state->cycles; - ptq->sample_ipc = true; - } else { - ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; - ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; - ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC; - } + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; /* * Do PEBS first to allow for the possibility that the PEBS timestamp @@ -2938,30 +2883,6 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } -static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, - union perf_event *event, - struct perf_sample *sample) -{ - u64 hw_id = event->aux_output_hw_id.hw_id; - struct auxtrace_queue *queue; - struct intel_pt_queue *ptq; - struct evsel *evsel; - - queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); - evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id); - if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) { - pr_err("Bad AUX output hardware ID\n"); - return -EINVAL; - } - - ptq = queue->priv; - - ptq->pebs[hw_id].evsel = evsel; - ptq->pebs[hw_id].id = sample->id; - - return 0; -} - static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { @@ -3089,8 +3010,6 @@ static int intel_pt_process_event(struct perf_session *session, err = intel_pt_process_switch(pt, sample); else if (event->header.type == PERF_RECORD_ITRACE_START) err = intel_pt_process_itrace_start(pt, event, sample); - else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) - err = intel_pt_process_aux_output_hw_id(pt, event, sample); else if (event->header.type == PERF_RECORD_SWITCH || event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); @@ -3475,13 +3394,9 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) evlist__for_each_entry(pt->session->evlist, evsel) { if (evsel->core.attr.aux_output && evsel->core.id) { - if (pt->single_pebs) { - pt->single_pebs = false; - return; - } - pt->single_pebs = true; pt->sample_pebs = true; pt->pebs_evsel = evsel; + return; } } } @@ -3738,6 +3653,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (err) goto err_free; + intel_pt_log_set_name(INTEL_PT_PMU_NAME); + if (session->itrace_synth_opts->set) { pt->synth_opts = *session->itrace_synth_opts; } else { @@ -3752,9 +3669,6 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->synth_opts.thread_stack = opts->thread_stack; } - if (!(pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT)) - intel_pt_log_set_name(INTEL_PT_PMU_NAME); - pt->session = session; pt->machine = &session->machines.host; /* No kvm support */ pt->auxtrace_type = auxtrace_info->type; diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 15f60fd094..c397be0c2e 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -23,9 +23,7 @@ #include "unwind.h" #include "libunwind-aarch64.h" -#define perf_event_arm_regs perf_event_arm64_regs #include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> -#undef perf_event_arm_regs #include "../../arch/arm64/util/unwind-libunwind.c" /* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 3945500036..44e40bad0e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -16,7 +16,6 @@ #include "map_symbol.h" #include "branch.h" #include "mem-events.h" -#include "path.h" #include "srcline.h" #include "symbol.h" #include "sort.h" @@ -35,7 +34,6 @@ #include "bpf-event.h" #include // page_size #include "cgroup.h" -#include "arm64-frame-pointer-unwind-support.h" #include #include @@ -757,14 +755,6 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused, return 0; } -int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unused, - union perf_event *event) -{ - if (dump_trace) - perf_event__fprintf_aux_output_hw_id(event, stdout); - return 0; -} - int machine__process_switch_event(struct machine *machine __maybe_unused, union perf_event *event) { @@ -1417,7 +1407,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i struct stat st; /*sshfs might return bad dent->d_type, so we have to stat*/ - path__join(path, sizeof(path), dir_name, dent->d_name); + snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name); if (stat(path, &st)) continue; @@ -2038,8 +2028,6 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_bpf(machine, event, sample); break; case PERF_RECORD_TEXT_POKE: ret = machine__process_text_poke(machine, event, sample); break; - case PERF_RECORD_AUX_OUTPUT_HW_ID: - ret = machine__process_aux_output_hw_id_event(machine, event); break; default: ret = -1; break; @@ -2073,7 +2061,6 @@ static void ip__resolve_ams(struct thread *thread, ams->addr = ip; ams->al_addr = al.addr; - ams->al_level = al.level; ams->ms.maps = al.maps; ams->ms.sym = al.sym; ams->ms.map = al.map; @@ -2093,7 +2080,6 @@ static void ip__resolve_data(struct thread *thread, ams->addr = addr; ams->al_addr = al.addr; - ams->al_level = al.level; ams->ms.maps = al.maps; ams->ms.sym = al.sym; ams->ms.map = al.map; @@ -2714,15 +2700,6 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, return err; } -static u64 get_leaf_frame_caller(struct perf_sample *sample, - struct thread *thread, int usr_idx) -{ - if (machine__normalized_is(thread->maps->machine, "arm64")) - return get_leaf_frame_caller_aarch64(sample, thread, usr_idx); - else - return 0; -} - static int thread__resolve_callchain_sample(struct thread *thread, struct callchain_cursor *cursor, struct evsel *evsel, @@ -2736,10 +2713,9 @@ static int thread__resolve_callchain_sample(struct thread *thread, struct ip_callchain *chain = sample->callchain; int chain_nr = 0; u8 cpumode = PERF_RECORD_MISC_USER; - int i, j, err, nr_entries, usr_idx; + int i, j, err, nr_entries; int skip_idx = -1; int first_call = 0; - u64 leaf_frame_caller; if (chain) chain_nr = chain->nr; @@ -2864,34 +2840,6 @@ static int thread__resolve_callchain_sample(struct thread *thread, continue; } - /* - * PERF_CONTEXT_USER allows us to locate where the user stack ends. - * Depending on callchain_param.order and the position of PERF_CONTEXT_USER, - * the index will be different in order to add the missing frame - * at the right place. - */ - - usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1; - - if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) { - - leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx); - - /* - * check if leaf_frame_Caller != ip to not add the same - * value twice. - */ - - if (leaf_frame_caller && leaf_frame_caller != ip) { - - err = add_callchain_ip(thread, cursor, parent, - root_al, &cpumode, leaf_frame_caller, - false, NULL, NULL, 0); - if (err) - return (err < 0) ? err : 0; - } - } - err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, false, NULL, NULL, 0); @@ -3121,19 +3069,14 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, } /* - * Compares the raw arch string. N.B. see instead perf_env__arch() or - * machine__normalized_is() if a normalized arch is needed. + * Compares the raw arch string. N.B. see instead perf_env__arch() if a + * normalized arch is needed. */ bool machine__is(struct machine *machine, const char *arch) { return machine && !strcmp(perf_env__raw_arch(machine->env), arch); } -bool machine__normalized_is(struct machine *machine, const char *arch) -{ - return machine && !strcmp(perf_env__arch(machine->env), arch); -} - int machine__nr_cpus_avail(struct machine *machine) { return machine ? perf_env__nr_cpus_avail(machine->env) : 0; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index c5a45dc8df..7377ed6efd 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -124,8 +124,6 @@ int machine__process_aux_event(struct machine *machine, union perf_event *event); int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); -int machine__process_aux_output_hw_id_event(struct machine *machine, - union perf_event *event); int machine__process_switch_event(struct machine *machine, union perf_event *event); int machine__process_namespaces_event(struct machine *machine, @@ -208,7 +206,6 @@ static inline bool machine__is_host(struct machine *machine) } bool machine__is(struct machine *machine, const char *arch); -bool machine__normalized_is(struct machine *machine, const char *arch); int machine__nr_cpus_avail(struct machine *machine); struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h index e08817b0c3..7d22ade082 100644 --- a/tools/perf/util/map_symbol.h +++ b/tools/perf/util/map_symbol.h @@ -18,7 +18,6 @@ struct addr_map_symbol { struct map_symbol ms; u64 addr; u64 al_addr; - char al_level; u64 phys_addr; u64 data_page_size; }; diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ed0ab838bc..f0e75df72b 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -301,25 +301,12 @@ static const char * const mem_lvlnum[] = { [PERF_MEM_LVLNUM_NA] = "N/A", }; -static const char * const mem_hops[] = { - "N/A", - /* - * While printing, 'Remote' will be added to represent - * 'Remote core, same node' accesses as remote field need - * to be set with mem_hops field. - */ - "core, same node", - "node, same socket", - "socket, same board", - "board", -}; - int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { size_t i, l = 0; u64 m = PERF_MEM_LVL_NA; u64 hit, miss; - int printed = 0; + int printed; if (mem_info) m = mem_info->data_src.mem_lvl; @@ -333,27 +320,21 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) /* already taken care of */ m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); + if (mem_info && mem_info->data_src.mem_remote) { strcat(out, "Remote "); l += 7; } - /* - * Incase mem_hops field is set, we can skip printing data source via - * PERF_MEM_LVL namespace. - */ - if (mem_info && mem_info->data_src.mem_hops) { - l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]); - } else { - for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (printed++) { - strcat(out, " or "); - l += 4; - } - l += scnprintf(out + l, sz - l, mem_lvl[i]); + printed = 0; + for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (printed++) { + strcat(out, " or "); + l += 4; } + l += scnprintf(out + l, sz - l, mem_lvl[i]); } if (mem_info && mem_info->data_src.mem_lvl_num) { @@ -491,12 +472,8 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) /* * Skylake might report unknown remote level via this * bit, consider it when evaluating remote HITMs. - * - * Incase of power, remote field can also be used to denote cache - * accesses from the another core of same node. Hence, setting - * mrem only when HOPS is zero along with set remote field. */ - bool mrem = (data_src->mem_remote && !data_src->mem_hops); + bool mrem = data_src->mem_remote; int err = 0; #define HITM_INC(__f) \ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index d8492e3395..29b747ac31 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -11,7 +11,6 @@ #include "evsel.h" #include "strbuf.h" #include "pmu.h" -#include "pmu-hybrid.h" #include "expr.h" #include "rblist.h" #include @@ -19,7 +18,6 @@ #include "strlist.h" #include #include -#include #include #include #include @@ -86,7 +84,6 @@ static void metric_event_delete(struct rblist *rblist __maybe_unused, struct metric_expr *expr, *tmp; list_for_each_entry_safe(expr, tmp, &me->head, nd) { - free((char *)expr->metric_name); free(expr->metric_refs); free(expr->metric_events); free(expr); @@ -119,207 +116,289 @@ struct metric_ref_node { struct list_head list; }; -/** - * The metric under construction. The data held here will be placed in a - * metric_expr. - */ struct metric { struct list_head nd; - /** - * The expression parse context importantly holding the IDs contained - * within the expression. - */ - struct expr_parse_ctx *pctx; - /** The name of the metric such as "IPC". */ + struct expr_parse_ctx pctx; const char *metric_name; - /** Modifier on the metric such as "u" or NULL for none. */ - const char *modifier; - /** The expression to parse, for example, "instructions/cycles". */ const char *metric_expr; - /** - * The "ScaleUnit" that scales and adds a unit to the metric during - * output. - */ const char *metric_unit; - /** Optional null terminated array of referenced metrics. */ - struct metric_ref *metric_refs; - /** - * Is there a constraint on the group of events? In which case the - * events won't be grouped. - */ + struct list_head metric_refs; + int metric_refs_cnt; + int runtime; bool has_constraint; - /** - * Parsed events for the metric. Optional as events may be taken from a - * different metric whose group contains all the IDs necessary for this - * one. - */ - struct evlist *evlist; }; -static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) +#define RECURSION_ID_MAX 1000 + +struct expr_ids { + struct expr_id id[RECURSION_ID_MAX]; + int cnt; +}; + +static struct expr_id *expr_ids__alloc(struct expr_ids *ids) { - static bool violate_nmi_constraint; - - if (!foot) { - pr_warning("Splitting metric group %s into standalone metrics.\n", name); - violate_nmi_constraint = true; - return; - } - - if (!violate_nmi_constraint) - return; - - pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" - " echo 0 > /proc/sys/kernel/nmi_watchdog\n" - " perf stat ...\n" - " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); -} - -static bool metricgroup__has_constraint(const struct pmu_event *pe) -{ - if (!pe->metric_constraint) - return false; - - if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && - sysctl__nmi_watchdog_enabled()) { - metricgroup___watchdog_constraint_hint(pe->metric_name, false); - return true; - } - - return false; -} - -static struct metric *metric__new(const struct pmu_event *pe, - const char *modifier, - bool metric_no_group, - int runtime) -{ - struct metric *m; - - m = zalloc(sizeof(*m)); - if (!m) + if (ids->cnt >= RECURSION_ID_MAX) return NULL; - - m->pctx = expr__ctx_new(); - if (!m->pctx) { - free(m); - return NULL; - } - - m->metric_name = pe->metric_name; - m->modifier = modifier ? strdup(modifier) : NULL; - if (modifier && !m->modifier) { - expr__ctx_free(m->pctx); - free(m); - return NULL; - } - m->metric_expr = pe->metric_expr; - m->metric_unit = pe->unit; - m->pctx->runtime = runtime; - m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); - m->metric_refs = NULL; - m->evlist = NULL; - - return m; + return &ids->id[ids->cnt++]; } -static void metric__free(struct metric *m) +static void expr_ids__exit(struct expr_ids *ids) { - free(m->metric_refs); - expr__ctx_free(m->pctx); - free((char *)m->modifier); - evlist__delete(m->evlist); - free(m); + int i; + + for (i = 0; i < ids->cnt; i++) + free(ids->id[i].id); } -static bool contains_metric_id(struct evsel **metric_events, int num_events, - const char *metric_id) +static bool contains_event(struct evsel **metric_events, int num_events, + const char *event_name) { int i; for (i = 0; i < num_events; i++) { - if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) + if (!strcmp(metric_events[i]->name, event_name)) return true; } return false; } -/** - * setup_metric_events - Find a group of events in metric_evlist that correspond - * to the IDs from a parsed metric expression. - * @ids: the metric IDs to match. - * @metric_evlist: the list of perf events. - * @out_metric_events: holds the created metric events array. - */ -static int setup_metric_events(struct hashmap *ids, - struct evlist *metric_evlist, - struct evsel ***out_metric_events) +static bool evsel_same_pmu_or_none(struct evsel *ev1, struct evsel *ev2) { - struct evsel **metric_events; - const char *metric_id; - struct evsel *ev; - size_t ids_size, matched_events, i; + if (!ev1->pmu_name || !ev2->pmu_name) + return true; - *out_metric_events = NULL; - ids_size = hashmap__size(ids); + return !strcmp(ev1->pmu_name, ev2->pmu_name); +} - metric_events = calloc(sizeof(void *), ids_size + 1); - if (!metric_events) - return -ENOMEM; +/** + * Find a group of events in perf_evlist that correspond to those from a parsed + * metric expression. Note, as find_evsel_group is called in the same order as + * perf_evlist was constructed, metric_no_merge doesn't need to test for + * underfilling a group. + * @perf_evlist: a list of events something like: {metric1 leader, metric1 + * sibling, metric1 sibling}:W,duration_time,{metric2 leader, metric2 sibling, + * metric2 sibling}:W,duration_time + * @pctx: the parse context for the metric expression. + * @metric_no_merge: don't attempt to share events for the metric with other + * metrics. + * @has_constraint: is there a constraint on the group of events? In which case + * the events won't be grouped. + * @metric_events: out argument, null terminated array of evsel's associated + * with the metric. + * @evlist_used: in/out argument, bitmap tracking which evlist events are used. + * @return the first metric event or NULL on failure. + */ +static struct evsel *find_evsel_group(struct evlist *perf_evlist, + struct expr_parse_ctx *pctx, + bool metric_no_merge, + bool has_constraint, + struct evsel **metric_events, + unsigned long *evlist_used) +{ + struct evsel *ev, *current_leader = NULL; + struct expr_id_data *val_ptr; + int i = 0, matched_events = 0, events_to_match; + const int idnum = (int)hashmap__size(&pctx->ids); - matched_events = 0; - evlist__for_each_entry(metric_evlist, ev) { - struct expr_id_data *val_ptr; + /* + * duration_time is always grouped separately, when events are grouped + * (ie has_constraint is false) then ignore it in the matching loop and + * add it to metric_events at the end. + */ + if (!has_constraint && + hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr)) + events_to_match = idnum - 1; + else + events_to_match = idnum; + evlist__for_each_entry (perf_evlist, ev) { /* - * Check for duplicate events with the same name. For - * example, uncore_imc/cas_count_read/ will turn into 6 - * events per socket on skylakex. Only the first such - * event is placed in metric_events. + * Events with a constraint aren't grouped and match the first + * events available. */ - metric_id = evsel__metric_id(ev); - if (contains_metric_id(metric_events, matched_events, metric_id)) + if (has_constraint && ev->weak_group) continue; + /* Ignore event if already used and merging is disabled. */ + if (metric_no_merge && test_bit(ev->core.idx, evlist_used)) + continue; + if (!has_constraint && !evsel__has_leader(ev, current_leader)) { + /* + * Start of a new group, discard the whole match and + * start again. + */ + matched_events = 0; + memset(metric_events, 0, + sizeof(struct evsel *) * idnum); + current_leader = evsel__leader(ev); + } /* - * Does this event belong to the parse context? For - * combined or shared groups, this metric may not care - * about this event. + * Check for duplicate events with the same name. For example, + * uncore_imc/cas_count_read/ will turn into 6 events per socket + * on skylakex. Only the first such event is placed in + * metric_events. If events aren't grouped then this also + * ensures that the same event in different sibling groups + * aren't both added to metric_events. */ - if (hashmap__find(ids, metric_id, (void **)&val_ptr)) { + if (contains_event(metric_events, matched_events, ev->name)) + continue; + /* Does this event belong to the parse context? */ + if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) metric_events[matched_events++] = ev; - if (matched_events >= ids_size) + if (matched_events == events_to_match) + break; + } + + if (events_to_match != idnum) { + /* Add the first duration_time. */ + evlist__for_each_entry(perf_evlist, ev) { + if (!strcmp(ev->name, "duration_time")) { + metric_events[matched_events++] = ev; break; + } } } - if (matched_events < ids_size) { - free(metric_events); - return -EINVAL; - } - for (i = 0; i < ids_size; i++) { - ev = metric_events[i]; - ev->collect_stat = true; + if (matched_events != idnum) { + /* Not a whole match */ + return NULL; + } + + metric_events[idnum] = NULL; + + for (i = 0; i < idnum; i++) { + ev = metric_events[i]; + /* Don't free the used events. */ + set_bit(ev->core.idx, evlist_used); /* - * The metric leader points to the identically named - * event in metric_events. + * The metric leader points to the identically named event in + * metric_events. */ ev->metric_leader = ev; /* - * Mark two events with identical names in the same - * group (or globally) as being in use as uncore events - * may be duplicated for each pmu. Set the metric leader - * of such events to be the event that appears in - * metric_events. + * Mark two events with identical names in the same group (or + * globally) as being in use as uncore events may be duplicated + * for each pmu. Set the metric leader of such events to be the + * event that appears in metric_events. */ - metric_id = evsel__metric_id(ev); - evlist__for_each_entry_continue(metric_evlist, ev) { - if (!strcmp(evsel__metric_id(ev), metric_id)) + evlist__for_each_entry_continue(perf_evlist, ev) { + /* + * If events are grouped then the search can terminate + * when then group is left. + */ + if (!has_constraint && + ev->core.leader != metric_events[i]->core.leader && + evsel_same_pmu_or_none(evsel__leader(ev), evsel__leader(metric_events[i]))) + break; + if (!strcmp(metric_events[i]->name, ev->name)) { + set_bit(ev->core.idx, evlist_used); ev->metric_leader = metric_events[i]; + } } } - *out_metric_events = metric_events; - return 0; + + return metric_events[0]; +} + +static int metricgroup__setup_events(struct list_head *groups, + bool metric_no_merge, + struct evlist *perf_evlist, + struct rblist *metric_events_list) +{ + struct metric_event *me; + struct metric_expr *expr; + int i = 0; + int ret = 0; + struct metric *m; + struct evsel *evsel, *tmp; + unsigned long *evlist_used; + + evlist_used = bitmap_zalloc(perf_evlist->core.nr_entries); + if (!evlist_used) + return -ENOMEM; + + list_for_each_entry (m, groups, nd) { + struct evsel **metric_events; + struct metric_ref *metric_refs = NULL; + + metric_events = calloc(sizeof(void *), + hashmap__size(&m->pctx.ids) + 1); + if (!metric_events) { + ret = -ENOMEM; + break; + } + evsel = find_evsel_group(perf_evlist, &m->pctx, + metric_no_merge, + m->has_constraint, metric_events, + evlist_used); + if (!evsel) { + pr_debug("Cannot resolve %s: %s\n", + m->metric_name, m->metric_expr); + free(metric_events); + continue; + } + for (i = 0; metric_events[i]; i++) + metric_events[i]->collect_stat = true; + me = metricgroup__lookup(metric_events_list, evsel, true); + if (!me) { + ret = -ENOMEM; + free(metric_events); + break; + } + expr = malloc(sizeof(struct metric_expr)); + if (!expr) { + ret = -ENOMEM; + free(metric_events); + break; + } + + /* + * Collect and store collected nested expressions + * for metric processing. + */ + if (m->metric_refs_cnt) { + struct metric_ref_node *ref; + + metric_refs = zalloc(sizeof(struct metric_ref) * (m->metric_refs_cnt + 1)); + if (!metric_refs) { + ret = -ENOMEM; + free(metric_events); + free(expr); + break; + } + + i = 0; + list_for_each_entry(ref, &m->metric_refs, list) { + /* + * Intentionally passing just const char pointers, + * originally from 'struct pmu_event' object. + * We don't need to change them, so there's no + * need to create our own copy. + */ + metric_refs[i].metric_name = ref->metric_name; + metric_refs[i].metric_expr = ref->metric_expr; + i++; + } + } + + expr->metric_refs = metric_refs; + expr->metric_expr = m->metric_expr; + expr->metric_name = m->metric_name; + expr->metric_unit = m->metric_unit; + expr->metric_events = metric_events; + expr->runtime = m->runtime; + list_add(&expr->nd, &me->head); + } + + evlist__for_each_entry_safe(perf_evlist, tmp, evsel) { + if (!test_bit(evsel->core.idx, evlist_used)) { + evlist__remove(perf_evlist, evsel); + evsel__delete(evsel); + } + } + bitmap_free(evlist_used); + + return ret; } static bool match_metric(const char *n, const char *list) @@ -343,7 +422,7 @@ static bool match_metric(const char *n, const char *list) return false; } -static bool match_pe_metric(const struct pmu_event *pe, const char *metric) +static bool match_pe_metric(struct pmu_event *pe, const char *metric) { return match_metric(pe->metric_group, metric) || match_metric(pe->metric_name, metric); @@ -427,7 +506,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) putchar('\n'); } -static int metricgroup__print_pmu_event(const struct pmu_event *pe, +static int metricgroup__print_pmu_event(struct pmu_event *pe, bool metricgroups, char *filter, bool raw, bool details, struct rblist *groups, @@ -502,14 +581,14 @@ struct metricgroup_print_sys_idata { bool details; }; -typedef int (*metricgroup_sys_event_iter_fn)(const struct pmu_event *pe, void *); +typedef int (*metricgroup_sys_event_iter_fn)(struct pmu_event *pe, void *); struct metricgroup_iter_data { metricgroup_sys_event_iter_fn fn; void *data; }; -static int metricgroup__sys_event_iter(const struct pmu_event *pe, void *data) +static int metricgroup__sys_event_iter(struct pmu_event *pe, void *data) { struct metricgroup_iter_data *d = data; struct perf_pmu *pmu = NULL; @@ -528,7 +607,7 @@ static int metricgroup__sys_event_iter(const struct pmu_event *pe, void *data) return 0; } -static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, void *data) +static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data) { struct metricgroup_print_sys_idata *d = data; @@ -537,10 +616,10 @@ static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, void *d } void metricgroup__print(bool metrics, bool metricgroups, char *filter, - bool raw, bool details, const char *pmu_name) + bool raw, bool details) { - const struct pmu_events_map *map = pmu_events_map__find(); - const struct pmu_event *pe; + struct pmu_events_map *map = pmu_events_map__find(); + struct pmu_event *pe; int i; struct rblist groups; struct rb_node *node, *next; @@ -563,10 +642,6 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, break; if (!pe->metric_expr) continue; - if (pmu_name && perf_pmu__is_hybrid(pe->pmu) && - strcmp(pmu_name, pe->pmu)) { - continue; - } if (metricgroup__print_pmu_event(pe, metricgroups, filter, raw, details, &groups, metriclist) < 0) @@ -611,391 +686,150 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, strlist__delete(metriclist); } -static const char *code_characters = ",-=@"; - -static int encode_metric_id(struct strbuf *sb, const char *x) -{ - char *c; - int ret = 0; - - for (; *x; x++) { - c = strchr(code_characters, *x); - if (c) { - ret = strbuf_addch(sb, '!'); - if (ret) - break; - - ret = strbuf_addch(sb, '0' + (c - code_characters)); - if (ret) - break; - } else { - ret = strbuf_addch(sb, *x); - if (ret) - break; - } - } - return ret; -} - -static int decode_metric_id(struct strbuf *sb, const char *x) -{ - const char *orig = x; - size_t i; - char c; - int ret; - - for (; *x; x++) { - c = *x; - if (*x == '!') { - x++; - i = *x - '0'; - if (i > strlen(code_characters)) { - pr_err("Bad metric-id encoding in: '%s'", orig); - return -1; - } - c = code_characters[i]; - } - ret = strbuf_addch(sb, c); - if (ret) - return ret; - } - return 0; -} - -static int decode_all_metric_ids(struct evlist *perf_evlist, const char *modifier) -{ - struct evsel *ev; - struct strbuf sb = STRBUF_INIT; - char *cur; - int ret = 0; - - evlist__for_each_entry(perf_evlist, ev) { - if (!ev->metric_id) - continue; - - ret = strbuf_setlen(&sb, 0); - if (ret) - break; - - ret = decode_metric_id(&sb, ev->metric_id); - if (ret) - break; - - free((char *)ev->metric_id); - ev->metric_id = strdup(sb.buf); - if (!ev->metric_id) { - ret = -ENOMEM; - break; - } - /* - * If the name is just the parsed event, use the metric-id to - * give a more friendly display version. - */ - if (strstr(ev->name, "metric-id=")) { - bool has_slash = false; - - free(ev->name); - for (cur = strchr(sb.buf, '@') ; cur; cur = strchr(++cur, '@')) { - *cur = '/'; - has_slash = true; - } - - if (modifier) { - if (!has_slash && !strchr(sb.buf, ':')) { - ret = strbuf_addch(&sb, ':'); - if (ret) - break; - } - ret = strbuf_addstr(&sb, modifier); - if (ret) - break; - } - ev->name = strdup(sb.buf); - if (!ev->name) { - ret = -ENOMEM; - break; - } - } - } - strbuf_release(&sb); - return ret; -} - -static int metricgroup__build_event_string(struct strbuf *events, - const struct expr_parse_ctx *ctx, - const char *modifier, - bool has_constraint) +static void metricgroup__add_metric_weak_group(struct strbuf *events, + struct expr_parse_ctx *ctx) { struct hashmap_entry *cur; size_t bkt; bool no_group = true, has_duration = false; - int ret = 0; -#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) - - hashmap__for_each_entry(ctx->ids, cur, bkt) { - const char *sep, *rsep, *id = cur->key; - - pr_debug("found event %s\n", id); + hashmap__for_each_entry((&ctx->ids), cur, bkt) { + pr_debug("found event %s\n", (const char *)cur->key); /* * Duration time maps to a software event and can make * groups not count. Always use it outside a * group. */ - if (!strcmp(id, "duration_time")) { + if (!strcmp(cur->key, "duration_time")) { has_duration = true; continue; } - /* Separate events with commas and open the group if necessary. */ - if (no_group) { - if (!has_constraint) { - ret = strbuf_addch(events, '{'); - RETURN_IF_NON_ZERO(ret); - } - - no_group = false; - } else { - ret = strbuf_addch(events, ','); - RETURN_IF_NON_ZERO(ret); - } - /* - * Encode the ID as an event string. Add a qualifier for - * metric_id that is the original name except with characters - * that parse-events can't parse replaced. For example, - * 'msr@tsc@' gets added as msr/tsc,metric-id=msr!3tsc!3/ - */ - sep = strchr(id, '@'); - if (sep != NULL) { - ret = strbuf_add(events, id, sep - id); - RETURN_IF_NON_ZERO(ret); - ret = strbuf_addch(events, '/'); - RETURN_IF_NON_ZERO(ret); - rsep = strrchr(sep, '@'); - ret = strbuf_add(events, sep + 1, rsep - sep - 1); - RETURN_IF_NON_ZERO(ret); - ret = strbuf_addstr(events, ",metric-id="); - RETURN_IF_NON_ZERO(ret); - sep = rsep; - } else { - sep = strchr(id, ':'); - if (sep != NULL) { - ret = strbuf_add(events, id, sep - id); - RETURN_IF_NON_ZERO(ret); - } else { - ret = strbuf_addstr(events, id); - RETURN_IF_NON_ZERO(ret); - } - ret = strbuf_addstr(events, "/metric-id="); - RETURN_IF_NON_ZERO(ret); - } - ret = encode_metric_id(events, id); - RETURN_IF_NON_ZERO(ret); - ret = strbuf_addstr(events, "/"); - RETURN_IF_NON_ZERO(ret); - - if (sep != NULL) { - ret = strbuf_addstr(events, sep + 1); - RETURN_IF_NON_ZERO(ret); - } - if (modifier) { - ret = strbuf_addstr(events, modifier); - RETURN_IF_NON_ZERO(ret); - } + strbuf_addf(events, "%s%s", + no_group ? "{" : ",", + (const char *)cur->key); + no_group = false; } - if (has_duration) { - if (no_group) { - /* Strange case of a metric of just duration_time. */ - ret = strbuf_addf(events, "duration_time"); - } else if (!has_constraint) - ret = strbuf_addf(events, "}:W,duration_time"); - else - ret = strbuf_addf(events, ",duration_time"); - } else if (!no_group && !has_constraint) - ret = strbuf_addf(events, "}:W"); - - return ret; -#undef RETURN_IF_NON_ZERO + if (!no_group) { + strbuf_addf(events, "}:W"); + if (has_duration) + strbuf_addf(events, ",duration_time"); + } else if (has_duration) + strbuf_addf(events, "duration_time"); } -int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused) +static void metricgroup__add_metric_non_group(struct strbuf *events, + struct expr_parse_ctx *ctx) +{ + struct hashmap_entry *cur; + size_t bkt; + bool first = true; + + hashmap__for_each_entry((&ctx->ids), cur, bkt) { + if (!first) + strbuf_addf(events, ","); + strbuf_addf(events, "%s", (const char *)cur->key); + first = false; + } +} + +static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) +{ + static bool violate_nmi_constraint; + + if (!foot) { + pr_warning("Splitting metric group %s into standalone metrics.\n", name); + violate_nmi_constraint = true; + return; + } + + if (!violate_nmi_constraint) + return; + + pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" + " echo 0 > /proc/sys/kernel/nmi_watchdog\n" + " perf stat ...\n" + " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); +} + +static bool metricgroup__has_constraint(struct pmu_event *pe) +{ + if (!pe->metric_constraint) + return false; + + if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && + sysctl__nmi_watchdog_enabled()) { + metricgroup___watchdog_constraint_hint(pe->metric_name, false); + return true; + } + + return false; +} + +int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused) { return 1; } -/* - * A singly linked list on the stack of the names of metrics being - * processed. Used to identify recursion. - */ -struct visited_metric { - const char *name; - const struct visited_metric *parent; -}; - struct metricgroup_add_iter_data { struct list_head *metric_list; - const char *metric_name; - const char *modifier; + const char *metric; + struct expr_ids *ids; int *ret; bool *has_match; bool metric_no_group; - struct metric *root_metric; - const struct visited_metric *visited; - const struct pmu_events_map *map; }; -static int add_metric(struct list_head *metric_list, - const struct pmu_event *pe, - const char *modifier, - bool metric_no_group, - struct metric *root_metric, - const struct visited_metric *visited, - const struct pmu_events_map *map); - -/** - * resolve_metric - Locate metrics within the root metric and recursively add - * references to them. - * @metric_list: The list the metric is added to. - * @modifier: if non-null event modifiers like "u". - * @metric_no_group: Should events written to events be grouped "{}" or - * global. Grouping is the default but due to multiplexing the - * user may override. - * @root_metric: Metrics may reference other metrics to form a tree. In this - * case the root_metric holds all the IDs and a list of referenced - * metrics. When adding a root this argument is NULL. - * @visited: A singly linked list of metric names being added that is used to - * detect recursion. - * @map: The map that is searched for metrics, most commonly the table for the - * architecture perf is running upon. - */ -static int resolve_metric(struct list_head *metric_list, - const char *modifier, - bool metric_no_group, - struct metric *root_metric, - const struct visited_metric *visited, - const struct pmu_events_map *map) -{ - struct hashmap_entry *cur; - size_t bkt; - struct to_resolve { - /* The metric to resolve. */ - const struct pmu_event *pe; - /* - * The key in the IDs map, this may differ from in case, - * etc. from pe->metric_name. - */ - const char *key; - } *pending = NULL; - int i, ret = 0, pending_cnt = 0; - - /* - * Iterate all the parsed IDs and if there's a matching metric and it to - * the pending array. - */ - hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { - const struct pmu_event *pe; - - pe = metricgroup__find_metric(cur->key, map); - if (pe) { - pending = realloc(pending, - (pending_cnt + 1) * sizeof(struct to_resolve)); - if (!pending) - return -ENOMEM; - - pending[pending_cnt].pe = pe; - pending[pending_cnt].key = cur->key; - pending_cnt++; - } - } - - /* Remove the metric IDs from the context. */ - for (i = 0; i < pending_cnt; i++) - expr__del_id(root_metric->pctx, pending[i].key); - - /* - * Recursively add all the metrics, IDs are added to the root metric's - * context. - */ - for (i = 0; i < pending_cnt; i++) { - ret = add_metric(metric_list, pending[i].pe, modifier, metric_no_group, - root_metric, visited, map); - if (ret) - break; - } - - free(pending); - return ret; -} - -/** - * __add_metric - Add a metric to metric_list. - * @metric_list: The list the metric is added to. - * @pe: The pmu_event containing the metric to be added. - * @modifier: if non-null event modifiers like "u". - * @metric_no_group: Should events written to events be grouped "{}" or - * global. Grouping is the default but due to multiplexing the - * user may override. - * @runtime: A special argument for the parser only known at runtime. - * @root_metric: Metrics may reference other metrics to form a tree. In this - * case the root_metric holds all the IDs and a list of referenced - * metrics. When adding a root this argument is NULL. - * @visited: A singly linked list of metric names being added that is used to - * detect recursion. - * @map: The map that is searched for metrics, most commonly the table for the - * architecture perf is running upon. - */ static int __add_metric(struct list_head *metric_list, - const struct pmu_event *pe, - const char *modifier, + struct pmu_event *pe, bool metric_no_group, int runtime, - struct metric *root_metric, - const struct visited_metric *visited, - const struct pmu_events_map *map) + struct metric **mp, + struct expr_id *parent, + struct expr_ids *ids) { - const struct visited_metric *vm; - int ret; - bool is_root = !root_metric; - struct visited_metric visited_node = { - .name = pe->metric_name, - .parent = visited, - }; + struct metric_ref_node *ref; + struct metric *m; - for (vm = visited; vm; vm = vm->parent) { - if (!strcmp(pe->metric_name, vm->name)) { - pr_err("failed: recursion detected for %s\n", pe->metric_name); - return -1; - } - } - - if (is_root) { + if (*mp == NULL) { /* - * This metric is the root of a tree and may reference other - * metrics that are added recursively. + * We got in here for the parent group, + * allocate it and put it on the list. */ - root_metric = metric__new(pe, modifier, metric_no_group, runtime); - if (!root_metric) + m = zalloc(sizeof(*m)); + if (!m) return -ENOMEM; - } else { - int cnt = 0; + expr__ctx_init(&m->pctx); + m->metric_name = pe->metric_name; + m->metric_expr = pe->metric_expr; + m->metric_unit = pe->unit; + m->runtime = runtime; + m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); + INIT_LIST_HEAD(&m->metric_refs); + m->metric_refs_cnt = 0; - /* - * This metric was referenced in a metric higher in the - * tree. Check if the same metric is already resolved in the - * metric_refs list. - */ - if (root_metric->metric_refs) { - for (; root_metric->metric_refs[cnt].metric_name; cnt++) { - if (!strcmp(pe->metric_name, - root_metric->metric_refs[cnt].metric_name)) - return 0; - } + parent = expr_ids__alloc(ids); + if (!parent) { + free(m); + return -EINVAL; } - /* Create reference. Need space for the entry and the terminator. */ - root_metric->metric_refs = realloc(root_metric->metric_refs, - (cnt + 2) * sizeof(struct metric_ref)); - if (!root_metric->metric_refs) + parent->id = strdup(pe->metric_name); + if (!parent->id) { + free(m); + return -ENOMEM; + } + *mp = m; + } else { + /* + * We got here for the referenced metric, via the + * recursive metricgroup__add_metric call, add + * it to the parent group. + */ + m = *mp; + + ref = malloc(sizeof(*ref)); + if (!ref) return -ENOMEM; /* @@ -1004,35 +838,54 @@ static int __add_metric(struct list_head *metric_list, * need to change them, so there's no need to create * our own copy. */ - root_metric->metric_refs[cnt].metric_name = pe->metric_name; - root_metric->metric_refs[cnt].metric_expr = pe->metric_expr; + ref->metric_name = pe->metric_name; + ref->metric_expr = pe->metric_expr; - /* Null terminate array. */ - root_metric->metric_refs[cnt+1].metric_name = NULL; - root_metric->metric_refs[cnt+1].metric_expr = NULL; + list_add(&ref->list, &m->metric_refs); + m->metric_refs_cnt++; } + /* Force all found IDs in metric to have us as parent ID. */ + WARN_ON_ONCE(!parent); + m->pctx.parent = parent; + /* * For both the parent and referenced metrics, we parse - * all the metric's IDs and add it to the root context. + * all the metric's IDs and add it to the parent context. */ - if (expr__find_ids(pe->metric_expr, NULL, root_metric->pctx) < 0) { - /* Broken metric. */ - ret = -EINVAL; - } else { - /* Resolve referenced metrics. */ - ret = resolve_metric(metric_list, modifier, metric_no_group, root_metric, - &visited_node, map); + if (expr__find_other(pe->metric_expr, NULL, &m->pctx, runtime) < 0) { + if (m->metric_refs_cnt == 0) { + expr__ctx_clear(&m->pctx); + free(m); + *mp = NULL; + } + return -EINVAL; } - if (ret) { - if (is_root) - metric__free(root_metric); + /* + * We add new group only in the 'parent' call, + * so bail out for referenced metric case. + */ + if (m->metric_refs_cnt) + return 0; - } else if (is_root) - list_add(&root_metric->nd, metric_list); + if (list_empty(metric_list)) + list_add(&m->nd, metric_list); + else { + struct list_head *pos; - return ret; + /* Place the largest groups at the front. */ + list_for_each_prev(pos, metric_list) { + struct metric *old = list_entry(pos, struct metric, nd); + + if (hashmap__size(&m->pctx.ids) <= + hashmap__size(&old->pctx.ids)) + break; + } + list_add(&m->nd, pos); + } + + return 0; } #define map_for_each_event(__pe, __idx, __map) \ @@ -1047,10 +900,10 @@ static int __add_metric(struct list_head *metric_list, (match_metric(__pe->metric_group, __metric) || \ match_metric(__pe->metric_name, __metric))) -const struct pmu_event *metricgroup__find_metric(const char *metric, - const struct pmu_events_map *map) +struct pmu_event *metricgroup__find_metric(const char *metric, + struct pmu_events_map *map) { - const struct pmu_event *pe; + struct pmu_event *pe; int i; map_for_each_event(pe, i, map) { @@ -1061,21 +914,136 @@ const struct pmu_event *metricgroup__find_metric(const char *metric, return NULL; } -static int add_metric(struct list_head *metric_list, - const struct pmu_event *pe, - const char *modifier, - bool metric_no_group, - struct metric *root_metric, - const struct visited_metric *visited, - const struct pmu_events_map *map) +static int recursion_check(struct metric *m, const char *id, struct expr_id **parent, + struct expr_ids *ids) { + struct expr_id_data *data; + struct expr_id *p; + int ret; + + /* + * We get the parent referenced by 'id' argument and + * traverse through all the parent object IDs to check + * if we already processed 'id', if we did, it's recursion + * and we fail. + */ + ret = expr__get_id(&m->pctx, id, &data); + if (ret) + return ret; + + p = expr_id_data__parent(data); + + while (p->parent) { + if (!strcmp(p->id, id)) { + pr_err("failed: recursion detected for %s\n", id); + return -1; + } + p = p->parent; + } + + /* + * If we are over the limit of static entris, the metric + * is too difficult/nested to process, fail as well. + */ + p = expr_ids__alloc(ids); + if (!p) { + pr_err("failed: too many nested metrics\n"); + return -EINVAL; + } + + p->id = strdup(id); + p->parent = expr_id_data__parent(data); + *parent = p; + + return p->id ? 0 : -ENOMEM; +} + +static int add_metric(struct list_head *metric_list, + struct pmu_event *pe, + bool metric_no_group, + struct metric **mp, + struct expr_id *parent, + struct expr_ids *ids); + +static int __resolve_metric(struct metric *m, + bool metric_no_group, + struct list_head *metric_list, + struct pmu_events_map *map, + struct expr_ids *ids) +{ + struct hashmap_entry *cur; + size_t bkt; + bool all; + int ret; + + /* + * Iterate all the parsed IDs and if there's metric, + * add it to the context. + */ + do { + all = true; + hashmap__for_each_entry((&m->pctx.ids), cur, bkt) { + struct expr_id *parent; + struct pmu_event *pe; + + pe = metricgroup__find_metric(cur->key, map); + if (!pe) + continue; + + ret = recursion_check(m, cur->key, &parent, ids); + if (ret) + return ret; + + all = false; + /* The metric key itself needs to go out.. */ + expr__del_id(&m->pctx, cur->key); + + /* ... and it gets resolved to the parent context. */ + ret = add_metric(metric_list, pe, metric_no_group, &m, parent, ids); + if (ret) + return ret; + + /* + * We added new metric to hashmap, so we need + * to break the iteration and start over. + */ + break; + } + } while (!all); + + return 0; +} + +static int resolve_metric(bool metric_no_group, + struct list_head *metric_list, + struct pmu_events_map *map, + struct expr_ids *ids) +{ + struct metric *m; + int err; + + list_for_each_entry(m, metric_list, nd) { + err = __resolve_metric(m, metric_no_group, metric_list, map, ids); + if (err) + return err; + } + return 0; +} + +static int add_metric(struct list_head *metric_list, + struct pmu_event *pe, + bool metric_no_group, + struct metric **m, + struct expr_id *parent, + struct expr_ids *ids) +{ + struct metric *orig = *m; int ret = 0; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); if (!strstr(pe->metric_expr, "?")) { - ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0, - root_metric, visited, map); + ret = __add_metric(metric_list, pe, metric_no_group, 1, m, parent, ids); } else { int j, count; @@ -1086,25 +1054,29 @@ static int add_metric(struct list_head *metric_list, * those events to metric_list. */ - for (j = 0; j < count && !ret; j++) - ret = __add_metric(metric_list, pe, modifier, metric_no_group, j, - root_metric, visited, map); + for (j = 0; j < count && !ret; j++, *m = orig) + ret = __add_metric(metric_list, pe, metric_no_group, j, m, parent, ids); } return ret; } -static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe, +static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe, void *data) { struct metricgroup_add_iter_data *d = data; + struct metric *m = NULL; int ret; - if (!match_pe_metric(pe, d->metric_name)) + if (!match_pe_metric(pe, d->metric)) return 0; - ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group, - d->root_metric, d->visited, d->map); + ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids); + if (ret) + goto out; + + ret = resolve_metric(d->metric_no_group, + d->metric_list, NULL, d->ids); if (ret) goto out; @@ -1115,61 +1087,32 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe, return ret; } -/** - * metric_list_cmp - list_sort comparator that sorts metrics with more events to - * the front. duration_time is excluded from the count. - */ -static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, - const struct list_head *r) -{ - const struct metric *left = container_of(l, struct metric, nd); - const struct metric *right = container_of(r, struct metric, nd); - struct expr_id_data *data; - int left_count, right_count; - - left_count = hashmap__size(left->pctx->ids); - if (!expr__get_id(left->pctx, "duration_time", &data)) - left_count--; - - right_count = hashmap__size(right->pctx->ids); - if (!expr__get_id(right->pctx, "duration_time", &data)) - right_count--; - - return right_count - left_count; -} - -/** - * metricgroup__add_metric - Find and add a metric, or a metric group. - * @metric_name: The name of the metric or metric group. For example, "IPC" - * could be the name of a metric and "TopDownL1" the name of a - * metric group. - * @modifier: if non-null event modifiers like "u". - * @metric_no_group: Should events written to events be grouped "{}" or - * global. Grouping is the default but due to multiplexing the - * user may override. - * @metric_list: The list that the metric or metric group are added to. - * @map: The map that is searched for metrics, most commonly the table for the - * architecture perf is running upon. - */ -static int metricgroup__add_metric(const char *metric_name, const char *modifier, - bool metric_no_group, +static int metricgroup__add_metric(const char *metric, bool metric_no_group, + struct strbuf *events, struct list_head *metric_list, - const struct pmu_events_map *map) + struct pmu_events_map *map) { - const struct pmu_event *pe; + struct expr_ids ids = { .cnt = 0, }; + struct pmu_event *pe; + struct metric *m; LIST_HEAD(list); int i, ret; bool has_match = false; - /* - * Iterate over all metrics seeing if metric matches either the name or - * group. When it does add the metric to the list. - */ - map_for_each_metric(pe, i, map, metric_name) { + map_for_each_metric(pe, i, map, metric) { has_match = true; - ret = add_metric(&list, pe, modifier, metric_no_group, - /*root_metric=*/NULL, - /*visited_metrics=*/NULL, map); + m = NULL; + + ret = add_metric(&list, pe, metric_no_group, &m, NULL, &ids); + if (ret) + goto out; + + /* + * Process any possible referenced metrics + * included in the expression. + */ + ret = resolve_metric(metric_no_group, + &list, map, &ids); if (ret) goto out; } @@ -1179,20 +1122,34 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier .fn = metricgroup__add_metric_sys_event_iter, .data = (void *) &(struct metricgroup_add_iter_data) { .metric_list = &list, - .metric_name = metric_name, - .modifier = modifier, + .metric = metric, .metric_no_group = metric_no_group, + .ids = &ids, .has_match = &has_match, .ret = &ret, - .map = map, }, }; pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); } /* End of pmu events. */ - if (!has_match) + if (!has_match) { ret = -EINVAL; + goto out; + } + + list_for_each_entry(m, &list, nd) { + if (events->len > 0) + strbuf_addf(events, ","); + + if (m->has_constraint) { + metricgroup__add_metric_non_group(events, + &m->pctx); + } else { + metricgroup__add_metric_weak_group(events, + &m->pctx); + } + } out: /* @@ -1200,325 +1157,95 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier * even if it's failed */ list_splice(&list, metric_list); + expr_ids__exit(&ids); return ret; } -/** - * metricgroup__add_metric_list - Find and add metrics, or metric groups, - * specified in a list. - * @list: the list of metrics or metric groups. For example, "IPC,CPI,TopDownL1" - * would match the IPC and CPI metrics, and TopDownL1 would match all - * the metrics in the TopDownL1 group. - * @metric_no_group: Should events written to events be grouped "{}" or - * global. Grouping is the default but due to multiplexing the - * user may override. - * @metric_list: The list that metrics are added to. - * @map: The map that is searched for metrics, most commonly the table for the - * architecture perf is running upon. - */ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, + struct strbuf *events, struct list_head *metric_list, - const struct pmu_events_map *map) + struct pmu_events_map *map) { - char *list_itr, *list_copy, *metric_name, *modifier; - int ret, count = 0; + char *llist, *nlist, *p; + int ret = -EINVAL; - list_copy = strdup(list); - if (!list_copy) + nlist = strdup(list); + if (!nlist) return -ENOMEM; - list_itr = list_copy; + llist = nlist; - while ((metric_name = strsep(&list_itr, ",")) != NULL) { - modifier = strchr(metric_name, ':'); - if (modifier) - *modifier++ = '\0'; + strbuf_init(events, 100); + strbuf_addf(events, "%s", ""); - ret = metricgroup__add_metric(metric_name, modifier, - metric_no_group, metric_list, - map); - if (ret == -EINVAL) - pr_err("Cannot find metric or group `%s'\n", metric_name); - - if (ret) + while ((p = strsep(&llist, ",")) != NULL) { + ret = metricgroup__add_metric(p, metric_no_group, events, + metric_list, map); + if (ret == -EINVAL) { + fprintf(stderr, "Cannot find metric or group `%s'\n", + p); break; - - count++; + } } - free(list_copy); + free(nlist); - if (!ret) { - /* - * Warn about nmi_watchdog if any parsed metrics had the - * NO_NMI_WATCHDOG constraint. - */ + if (!ret) metricgroup___watchdog_constraint_hint(NULL, true); - /* No metrics. */ - if (count == 0) - return -EINVAL; - } + return ret; } +static void metric__free_refs(struct metric *metric) +{ + struct metric_ref_node *ref, *tmp; + + list_for_each_entry_safe(ref, tmp, &metric->metric_refs, list) { + list_del(&ref->list); + free(ref); + } +} + static void metricgroup__free_metrics(struct list_head *metric_list) { struct metric *m, *tmp; list_for_each_entry_safe (m, tmp, metric_list, nd) { + metric__free_refs(m); + expr__ctx_clear(&m->pctx); list_del_init(&m->nd); - metric__free(m); + free(m); } } -/** - * build_combined_expr_ctx - Make an expr_parse_ctx with all has_constraint - * metric IDs, as the IDs are held in a set, - * duplicates will be removed. - * @metric_list: List to take metrics from. - * @combined: Out argument for result. - */ -static int build_combined_expr_ctx(const struct list_head *metric_list, - struct expr_parse_ctx **combined) -{ - struct hashmap_entry *cur; - size_t bkt; - struct metric *m; - char *dup; - int ret; - - *combined = expr__ctx_new(); - if (!*combined) - return -ENOMEM; - - list_for_each_entry(m, metric_list, nd) { - if (m->has_constraint && !m->modifier) { - hashmap__for_each_entry(m->pctx->ids, cur, bkt) { - dup = strdup(cur->key); - if (!dup) { - ret = -ENOMEM; - goto err_out; - } - ret = expr__add_id(*combined, dup); - if (ret) - goto err_out; - } - } - } - return 0; -err_out: - expr__ctx_free(*combined); - *combined = NULL; - return ret; -} - -/** - * parse_ids - Build the event string for the ids and parse them creating an - * evlist. The encoded metric_ids are decoded. - * @metric_no_merge: is metric sharing explicitly disabled. - * @fake_pmu: used when testing metrics not supported by the current CPU. - * @ids: the event identifiers parsed from a metric. - * @modifier: any modifiers added to the events. - * @has_constraint: false if events should be placed in a weak group. - * @out_evlist: the created list of events. - */ -static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, - struct expr_parse_ctx *ids, const char *modifier, - bool has_constraint, struct evlist **out_evlist) -{ - struct parse_events_error parse_error; - struct evlist *parsed_evlist; - struct strbuf events = STRBUF_INIT; - int ret; - - *out_evlist = NULL; - if (!metric_no_merge || hashmap__size(ids->ids) == 0) { - char *tmp; - /* - * We may fail to share events between metrics because - * duration_time isn't present in one metric. For example, a - * ratio of cache misses doesn't need duration_time but the same - * events may be used for a misses per second. Events without - * sharing implies multiplexing, that is best avoided, so place - * duration_time in every group. - * - * Also, there may be no ids/events in the expression parsing - * context because of constant evaluation, e.g.: - * event1 if #smt_on else 0 - * Add a duration_time event to avoid a parse error on an empty - * string. - */ - tmp = strdup("duration_time"); - if (!tmp) - return -ENOMEM; - - ids__insert(ids->ids, tmp); - } - ret = metricgroup__build_event_string(&events, ids, modifier, - has_constraint); - if (ret) - return ret; - - parsed_evlist = evlist__new(); - if (!parsed_evlist) { - ret = -ENOMEM; - goto err_out; - } - pr_debug("Parsing metric events '%s'\n", events.buf); - parse_events_error__init(&parse_error); - ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu); - if (ret) { - parse_events_error__print(&parse_error, events.buf); - goto err_out; - } - ret = decode_all_metric_ids(parsed_evlist, modifier); - if (ret) - goto err_out; - - *out_evlist = parsed_evlist; - parsed_evlist = NULL; -err_out: - parse_events_error__exit(&parse_error); - evlist__delete(parsed_evlist); - strbuf_release(&events); - return ret; -} - static int parse_groups(struct evlist *perf_evlist, const char *str, bool metric_no_group, bool metric_no_merge, struct perf_pmu *fake_pmu, - struct rblist *metric_events_list, - const struct pmu_events_map *map) + struct rblist *metric_events, + struct pmu_events_map *map) { - struct evlist *combined_evlist = NULL; + struct parse_events_error parse_error; + struct strbuf extra_events; LIST_HEAD(metric_list); - struct metric *m; int ret; - if (metric_events_list->nr_entries == 0) - metricgroup__rblist_init(metric_events_list); + if (metric_events->nr_entries == 0) + metricgroup__rblist_init(metric_events); ret = metricgroup__add_metric_list(str, metric_no_group, - &metric_list, map); + &extra_events, &metric_list, map); if (ret) goto out; - - /* Sort metrics from largest to smallest. */ - list_sort(NULL, &metric_list, metric_list_cmp); - - if (!metric_no_merge) { - struct expr_parse_ctx *combined = NULL; - - ret = build_combined_expr_ctx(&metric_list, &combined); - - if (!ret && combined && hashmap__size(combined->ids)) { - ret = parse_ids(metric_no_merge, fake_pmu, combined, - /*modifier=*/NULL, - /*has_constraint=*/true, - &combined_evlist); - } - if (combined) - expr__ctx_free(combined); - - if (ret) - goto out; + pr_debug("adding %s\n", extra_events.buf); + bzero(&parse_error, sizeof(parse_error)); + ret = __parse_events(perf_evlist, extra_events.buf, &parse_error, fake_pmu); + if (ret) { + parse_events_print_error(&parse_error, extra_events.buf); + goto out; } - - list_for_each_entry(m, &metric_list, nd) { - struct metric_event *me; - struct evsel **metric_events; - struct evlist *metric_evlist = NULL; - struct metric *n; - struct metric_expr *expr; - - if (combined_evlist && m->has_constraint) { - metric_evlist = combined_evlist; - } else if (!metric_no_merge) { - /* - * See if the IDs for this metric are a subset of an - * earlier metric. - */ - list_for_each_entry(n, &metric_list, nd) { - if (m == n) - break; - - if (n->evlist == NULL) - continue; - - if ((!m->modifier && n->modifier) || - (m->modifier && !n->modifier) || - (m->modifier && n->modifier && - strcmp(m->modifier, n->modifier))) - continue; - - if (expr__subset_of_ids(n->pctx, m->pctx)) { - pr_debug("Events in '%s' fully contained within '%s'\n", - m->metric_name, n->metric_name); - metric_evlist = n->evlist; - break; - } - - } - } - if (!metric_evlist) { - ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier, - m->has_constraint, &m->evlist); - if (ret) - goto out; - - metric_evlist = m->evlist; - } - ret = setup_metric_events(m->pctx->ids, metric_evlist, &metric_events); - if (ret) { - pr_debug("Cannot resolve IDs for %s: %s\n", - m->metric_name, m->metric_expr); - goto out; - } - - me = metricgroup__lookup(metric_events_list, metric_events[0], true); - - expr = malloc(sizeof(struct metric_expr)); - if (!expr) { - ret = -ENOMEM; - free(metric_events); - goto out; - } - - expr->metric_refs = m->metric_refs; - m->metric_refs = NULL; - expr->metric_expr = m->metric_expr; - if (m->modifier) { - char *tmp; - - if (asprintf(&tmp, "%s:%s", m->metric_name, m->modifier) < 0) - expr->metric_name = NULL; - else - expr->metric_name = tmp; - } else - expr->metric_name = strdup(m->metric_name); - - if (!expr->metric_name) { - ret = -ENOMEM; - free(metric_events); - goto out; - } - expr->metric_unit = m->metric_unit; - expr->metric_events = metric_events; - expr->runtime = m->pctx->runtime; - list_add(&expr->nd, &me->head); - } - - - if (combined_evlist) { - evlist__splice_list_tail(perf_evlist, &combined_evlist->core.entries); - evlist__delete(combined_evlist); - } - - list_for_each_entry(m, &metric_list, nd) { - if (m->evlist) - evlist__splice_list_tail(perf_evlist, &m->evlist->core.entries); - } - + ret = metricgroup__setup_events(&metric_list, metric_no_merge, + perf_evlist, metric_events); out: metricgroup__free_metrics(&metric_list); + strbuf_release(&extra_events); return ret; } @@ -1529,14 +1256,14 @@ int metricgroup__parse_groups(const struct option *opt, struct rblist *metric_events) { struct evlist *perf_evlist = *(struct evlist **)opt->value; - const struct pmu_events_map *map = pmu_events_map__find(); + struct pmu_events_map *map = pmu_events_map__find(); return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge, NULL, metric_events, map); } int metricgroup__parse_groups_test(struct evlist *evlist, - const struct pmu_events_map *map, + struct pmu_events_map *map, const char *str, bool metric_no_group, bool metric_no_merge, @@ -1548,8 +1275,8 @@ int metricgroup__parse_groups_test(struct evlist *evlist, bool metricgroup__has_metric(const char *metric) { - const struct pmu_events_map *map = pmu_events_map__find(); - const struct pmu_event *pe; + struct pmu_events_map *map = pmu_events_map__find(); + struct pmu_event *pe; int i; if (!map) @@ -1601,10 +1328,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, return -ENOMEM; new_expr->metric_expr = old_expr->metric_expr; - new_expr->metric_name = strdup(old_expr->metric_name); - if (!new_expr->metric_name) - return -ENOMEM; - + new_expr->metric_name = old_expr->metric_name; new_expr->metric_unit = old_expr->metric_unit; new_expr->runtime = old_expr->runtime; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 2b42b778d1..cc4a92492a 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -14,51 +14,24 @@ struct rblist; struct pmu_events_map; struct cgroup; -/** - * A node in a rblist keyed by the evsel. The global rblist of metric events - * generally exists in perf_stat_config. The evsel is looked up in the rblist - * yielding a list of metric_expr. - */ struct metric_event { struct rb_node nd; struct evsel *evsel; struct list_head head; /* list of metric_expr */ }; -/** - * A metric referenced by a metric_expr. When parsing a metric expression IDs - * will be looked up, matching either a value (from metric_events) or a - * metric_ref. A metric_ref will then be parsed recursively. The metric_refs and - * metric_events need to be known before parsing so that their values may be - * placed in the parse context for lookup. - */ struct metric_ref { const char *metric_name; const char *metric_expr; }; -/** - * One in a list of metric_expr associated with an evsel. The data is used to - * generate a metric value during stat output. - */ struct metric_expr { struct list_head nd; - /** The expression to parse, for example, "instructions/cycles". */ const char *metric_expr; - /** The name of the meric such as "IPC". */ const char *metric_name; - /** - * The "ScaleUnit" that scales and adds a unit to the metric during - * output. For example, "6.4e-05MiB" means to scale the resulting metric - * by 6.4e-05 (typically converting a unit like cache lines to something - * more human intelligible) and then add "MiB" afterward when displayed. - */ const char *metric_unit; - /** Null terminated array of events used by the metric. */ struct evsel **metric_events; - /** Null terminated array of referenced metrics. */ struct metric_ref *metric_refs; - /** A value substituted for '?' during parsing. */ int runtime; }; @@ -70,19 +43,19 @@ int metricgroup__parse_groups(const struct option *opt, bool metric_no_group, bool metric_no_merge, struct rblist *metric_events); -const struct pmu_event *metricgroup__find_metric(const char *metric, - const struct pmu_events_map *map); +struct pmu_event *metricgroup__find_metric(const char *metric, + struct pmu_events_map *map); int metricgroup__parse_groups_test(struct evlist *evlist, - const struct pmu_events_map *map, + struct pmu_events_map *map, const char *str, bool metric_no_group, bool metric_no_merge, struct rblist *metric_events); void metricgroup__print(bool metrics, bool groups, char *filter, - bool raw, bool details, const char *pmu_name); + bool raw, bool details); bool metricgroup__has_metric(const char *metric); -int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused); +int arch_get_runtimeparam(struct pmu_event *pe __maybe_unused); void metricgroup__rblist_exit(struct rblist *metric_events); int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 0e8ff8d1e2..512dc8b9c1 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -94,7 +94,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx) } } -static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, int affinity) +static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) { void *data; size_t mmap_len; @@ -138,7 +138,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx) } static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused, - struct perf_cpu cpu __maybe_unused, int affinity __maybe_unused) + int cpu __maybe_unused, int affinity __maybe_unused) { return 0; } @@ -240,8 +240,7 @@ void mmap__munmap(struct mmap *map) static void build_node_mask(int node, struct mmap_cpu_mask *mask) { - int idx, nr_cpus; - struct perf_cpu cpu; + int c, cpu, nr_cpus; const struct perf_cpu_map *cpu_map = NULL; cpu_map = cpu_map__online(); @@ -249,16 +248,16 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) return; nr_cpus = perf_cpu_map__nr(cpu_map); - for (idx = 0; idx < nr_cpus; idx++) { - cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */ + for (c = 0; c < nr_cpus; c++) { + cpu = cpu_map->map[c]; /* map c index to online cpu index */ if (cpu__get_node(cpu) == node) - set_bit(cpu.cpu, mask->bits); + set_bit(cpu, mask->bits); } } static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) { - map->affinity_mask.nbits = cpu__max_cpu().cpu; + map->affinity_mask.nbits = cpu__max_cpu(); map->affinity_mask.bits = bitmap_zalloc(map->affinity_mask.nbits); if (!map->affinity_mask.bits) return -1; @@ -266,12 +265,12 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params * if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask); else if (mp->affinity == PERF_AFFINITY_CPU) - set_bit(map->core.cpu.cpu, map->affinity_mask.bits); + set_bit(map->core.cpu, map->affinity_mask.bits); return 0; } -int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu) +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) { if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", @@ -351,14 +350,3 @@ int perf_mmap__push(struct mmap *md, void *to, out: return rc; } - -int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone) -{ - clone->nbits = original->nbits; - clone->bits = bitmap_zalloc(original->nbits); - if (!clone->bits) - return -ENOMEM; - - memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original)); - return 0; -} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 83f6bd4d40..af33118354 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include // for cpu_set_t #ifdef HAVE_AIO_SUPPORT @@ -53,7 +52,7 @@ struct mmap_params { struct auxtrace_mmap_params auxtrace_mp; }; -int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu); +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); void mmap__munmap(struct mmap *map); union perf_event *perf_mmap__read_forward(struct mmap *map); @@ -65,7 +64,4 @@ size_t mmap__mmap_len(struct mmap *map); void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag); -int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, - struct mmap_cpu_mask *clone); - #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 48aa321730..608b20c72a 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -60,49 +60,17 @@ void namespaces__free(struct namespaces *namespaces) free(namespaces); } -static int nsinfo__get_nspid(struct nsinfo *nsi, const char *path) -{ - FILE *f = NULL; - char *statln = NULL; - size_t linesz = 0; - char *nspid; - - f = fopen(path, "r"); - if (f == NULL) - return -1; - - while (getline(&statln, &linesz, f) != -1) { - /* Use tgid if CONFIG_PID_NS is not defined. */ - if (strstr(statln, "Tgid:") != NULL) { - nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'), - NULL, 10); - nsi->nstgid = nsi->tgid; - } - - if (strstr(statln, "NStgid:") != NULL) { - nspid = strrchr(statln, '\t'); - nsi->nstgid = (pid_t)strtol(nspid, NULL, 10); - /* - * If innermost tgid is not the first, process is in a different - * PID namespace. - */ - nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid; - break; - } - } - - fclose(f); - free(statln); - return 0; -} - int nsinfo__init(struct nsinfo *nsi) { char oldns[PATH_MAX]; char spath[PATH_MAX]; char *newns = NULL; + char *statln = NULL; + char *nspid; struct stat old_stat; struct stat new_stat; + FILE *f = NULL; + size_t linesz = 0; int rv = -1; if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) @@ -132,9 +100,34 @@ int nsinfo__init(struct nsinfo *nsi) if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX) goto out; - rv = nsinfo__get_nspid(nsi, spath); + f = fopen(spath, "r"); + if (f == NULL) + goto out; + + while (getline(&statln, &linesz, f) != -1) { + /* Use tgid if CONFIG_PID_NS is not defined. */ + if (strstr(statln, "Tgid:") != NULL) { + nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'), + NULL, 10); + nsi->nstgid = nsi->tgid; + } + + if (strstr(statln, "NStgid:") != NULL) { + nspid = strrchr(statln, '\t'); + nsi->nstgid = (pid_t)strtol(nspid, NULL, 10); + /* If innermost tgid is not the first, process is in a different + * PID namespace. + */ + nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid; + break; + } + } + rv = 0; out: + if (f != NULL) + (void) fclose(f); + free(statln); free(newns); return rv; } @@ -306,12 +299,3 @@ int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi) return ret; } - -bool nsinfo__is_in_root_namespace(void) -{ - struct nsinfo nsi; - - memset(&nsi, 0x0, sizeof(nsi)); - nsinfo__get_nspid(&nsi, "/proc/self/status"); - return !nsi.in_pidns; -} diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 9ceea96435..ad9775db7b 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -59,8 +59,6 @@ void nsinfo__mountns_exit(struct nscookie *nc); char *nsinfo__realpath(const char *path, struct nsinfo *nsi); int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi); -bool nsinfo__is_in_root_namespace(void); - static inline void __nsinfo__zput(struct nsinfo **nsip) { if (nsip) { diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index 284f8eabd3..b234d95fb1 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -38,8 +38,7 @@ static void config_hybrid_attr(struct perf_event_attr *attr, static int create_event_hybrid(__u32 config_type, int *idx, struct list_head *list, - struct perf_event_attr *attr, const char *name, - const char *metric_id, + struct perf_event_attr *attr, char *name, struct list_head *config_terms, struct perf_pmu *pmu) { @@ -48,7 +47,7 @@ static int create_event_hybrid(__u32 config_type, int *idx, __u64 config = attr->config; config_hybrid_attr(attr, config_type, pmu->type); - evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, + evsel = parse_events__add_event_hybrid(list, idx, attr, name, pmu, config_terms); if (evsel) evsel->pmu_name = strdup(pmu->name); @@ -63,19 +62,15 @@ static int create_event_hybrid(__u32 config_type, int *idx, static int pmu_cmp(struct parse_events_state *parse_state, struct perf_pmu *pmu) { - if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name) - return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name); + if (!parse_state->hybrid_pmu_name) + return 0; - if (parse_state->hybrid_pmu_name) - return strcmp(parse_state->hybrid_pmu_name, pmu->name); - - return 0; + return strcmp(parse_state->hybrid_pmu_name, pmu->name); } static int add_hw_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms) + char *name, struct list_head *config_terms) { struct perf_pmu *pmu; int ret; @@ -89,7 +84,7 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HARDWARE, &parse_state->idx, list, attr, name, - metric_id, &terms, pmu); + &terms, pmu); free_config_terms(&terms); if (ret) return ret; @@ -99,16 +94,14 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, } static int create_raw_event_hybrid(int *idx, struct list_head *list, - struct perf_event_attr *attr, - const char *name, - const char *metric_id, + struct perf_event_attr *attr, char *name, struct list_head *config_terms, struct perf_pmu *pmu) { struct evsel *evsel; attr->type = pmu->type; - evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, + evsel = parse_events__add_event_hybrid(list, idx, attr, name, pmu, config_terms); if (evsel) evsel->pmu_name = strdup(pmu->name); @@ -120,8 +113,7 @@ static int create_raw_event_hybrid(int *idx, struct list_head *list, static int add_raw_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms) + char *name, struct list_head *config_terms) { struct perf_pmu *pmu; int ret; @@ -134,7 +126,7 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, copy_config_terms(&terms, config_terms); ret = create_raw_event_hybrid(&parse_state->idx, list, attr, - name, metric_id, &terms, pmu); + name, &terms, pmu); free_config_terms(&terms); if (ret) return ret; @@ -146,8 +138,7 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms, + char *name, struct list_head *config_terms, bool *hybrid) { *hybrid = false; @@ -159,18 +150,16 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, *hybrid = true; if (attr->type != PERF_TYPE_RAW) { - return add_hw_hybrid(parse_state, list, attr, name, metric_id, + return add_hw_hybrid(parse_state, list, attr, name, config_terms); } - return add_raw_hybrid(parse_state, list, attr, name, metric_id, + return add_raw_hybrid(parse_state, list, attr, name, config_terms); } int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, - const char *name, - const char *metric_id, + struct perf_event_attr *attr, char *name, struct list_head *config_terms, bool *hybrid, struct parse_events_state *parse_state) @@ -191,7 +180,7 @@ int parse_events__add_cache_hybrid(struct list_head *list, int *idx, copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, - attr, name, metric_id, &terms, pmu); + attr, name, &terms, pmu); free_config_terms(&terms); if (ret) return ret; diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h index cbc05fec02..f33bd67aa8 100644 --- a/tools/perf/util/parse-events-hybrid.h +++ b/tools/perf/util/parse-events-hybrid.h @@ -11,13 +11,11 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms, + char *name, struct list_head *config_terms, bool *hybrid); int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, - const char *name, const char *metric_id, + struct perf_event_attr *attr, char *name, struct list_head *config_terms, bool *hybrid, struct parse_events_state *parse_state); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9739b05b99..51a2219df6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -19,6 +19,8 @@ #include #include "string2.h" #include "strlist.h" +#include "symbol.h" +#include "header.h" #include "bpf-loader.h" #include "debug.h" #include @@ -191,6 +193,39 @@ static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir) #define MAX_EVENT_LENGTH 512 +void parse_events__handle_error(struct parse_events_error *err, int idx, + char *str, char *help) +{ + if (WARN(!str, "WARNING: failed to provide error string\n")) { + free(help); + return; + } + switch (err->num_errors) { + case 0: + err->idx = idx; + err->str = str; + err->help = help; + break; + case 1: + err->first_idx = err->idx; + err->idx = idx; + err->first_str = err->str; + err->str = str; + err->first_help = err->help; + err->help = help; + break; + default: + pr_debug("Multiple errors dropping message: %s (%s)\n", + err->str, err->help); + free(err->str); + err->str = str; + free(err->help); + err->help = help; + break; + } + err->num_errors++; +} + struct tracepoint_path *tracepoint_id_to_path(u64 config) { struct tracepoint_path *path = NULL; @@ -299,7 +334,12 @@ const char *event_type(int type) return "unknown"; } -static char *get_config_str(struct list_head *head_terms, int type_term) +static int parse_events__is_name_term(struct parse_events_term *term) +{ + return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; +} + +static char *get_config_name(struct list_head *head_terms) { struct parse_events_term *term; @@ -307,27 +347,17 @@ static char *get_config_str(struct list_head *head_terms, int type_term) return NULL; list_for_each_entry(term, head_terms, list) - if (term->type_term == type_term) + if (parse_events__is_name_term(term)) return term->val.str; return NULL; } -static char *get_config_metric_id(struct list_head *head_terms) -{ - return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_METRIC_ID); -} - -static char *get_config_name(struct list_head *head_terms) -{ - return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); -} - static struct evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, bool init_attr, - const char *name, const char *metric_id, struct perf_pmu *pmu, + char *name, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats, const char *cpu_list) { @@ -356,9 +386,6 @@ __add_event(struct list_head *list, int *idx, if (name) evsel->name = strdup(name); - if (metric_id) - evsel->metric_id = strdup(metric_id); - if (config_terms) list_splice_init(config_terms, &evsel->config_terms); @@ -369,21 +396,18 @@ __add_event(struct list_head *list, int *idx, } struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct perf_pmu *pmu) + char *name, struct perf_pmu *pmu) { - return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, - metric_id, pmu, /*config_terms=*/NULL, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL); + return __add_event(NULL, &idx, attr, false, name, pmu, NULL, false, + NULL); } static int add_event(struct list_head *list, int *idx, - struct perf_event_attr *attr, const char *name, - const char *metric_id, struct list_head *config_terms) + struct perf_event_attr *attr, char *name, + struct list_head *config_terms) { - return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, - /*pmu=*/NULL, config_terms, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM; + return __add_event(list, idx, attr, true, name, NULL, config_terms, + false, NULL) ? 0 : -ENOMEM; } static int add_event_tool(struct list_head *list, int *idx, @@ -395,17 +419,13 @@ static int add_event_tool(struct list_head *list, int *idx, .config = PERF_COUNT_SW_DUMMY, }; - evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL, - /*metric_id=*/NULL, /*pmu=*/NULL, - /*config_terms=*/NULL, /*auto_merge_stats=*/false, - /*cpu_list=*/"0"); + evsel = __add_event(list, idx, &attr, true, NULL, NULL, NULL, false, + "0"); if (!evsel) return -ENOMEM; evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME) { - free((char *)evsel->unit); - evsel->unit = strdup("ns"); - } + if (tool_event == PERF_TOOL_DURATION_TIME) + evsel->unit = "ns"; return 0; } @@ -446,8 +466,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, { struct perf_event_attr attr; LIST_HEAD(config_terms); - char name[MAX_NAME_LEN]; - const char *config_name, *metric_id; + char name[MAX_NAME_LEN], *config_name; int cache_type = -1, cache_op = -1, cache_result = -1; char *op_result[2] = { op_result1, op_result2 }; int i, n, ret; @@ -512,17 +531,13 @@ int parse_events_add_cache(struct list_head *list, int *idx, return -ENOMEM; } - metric_id = get_config_metric_id(head_config); ret = parse_events__add_cache_hybrid(list, idx, &attr, - config_name ? : name, - metric_id, - &config_terms, + config_name ? : name, &config_terms, &hybrid, parse_state); if (hybrid) goto out_free_terms; - ret = add_event(list, idx, &attr, config_name ? : name, metric_id, - &config_terms); + ret = add_event(list, idx, &attr, config_name ? : name, &config_terms); out_free_terms: free_config_terms(&config_terms); return ret; @@ -556,7 +571,7 @@ static void tracepoint_error(struct parse_events_error *e, int err, } tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name); - parse_events_error__handle(e, 0, strdup(str), strdup(help)); + parse_events__handle_error(e, 0, strdup(str), strdup(help)); } static int add_tracepoint(struct list_head *list, int *idx, @@ -780,7 +795,7 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, return 0; errout: - parse_events_error__handle(parse_state->error, 0, + parse_events__handle_error(parse_state->error, 0, strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -800,7 +815,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state, int err; if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) { - parse_events_error__handle(parse_state->error, term->err_term, + parse_events__handle_error(parse_state->error, term->err_term, strdup("Invalid config term for BPF object"), NULL); return -EINVAL; @@ -820,7 +835,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state, else idx = term->err_term + error_pos; - parse_events_error__handle(parse_state->error, idx, + parse_events__handle_error(parse_state->error, idx, strdup(errbuf), strdup( "Hint:\tValid config terms:\n" @@ -892,7 +907,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, -err, errbuf, sizeof(errbuf)); - parse_events_error__handle(parse_state->error, 0, + parse_events__handle_error(parse_state->error, 0, strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -916,7 +931,7 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, struct bpf_object *obj __maybe_unused, struct list_head *head_config __maybe_unused) { - parse_events_error__handle(parse_state->error, 0, + parse_events__handle_error(parse_state->error, 0, strdup("BPF support is not compiled"), strdup("Make sure libbpf-devel is available at build time.")); return -ENOTSUP; @@ -928,7 +943,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, bool source __maybe_unused, struct list_head *head_config __maybe_unused) { - parse_events_error__handle(parse_state->error, 0, + parse_events__handle_error(parse_state->error, 0, strdup("BPF support is not compiled"), strdup("Make sure libbpf-devel is available at build time.")); return -ENOTSUP; @@ -999,8 +1014,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, attr.type = PERF_TYPE_BREAKPOINT; attr.sample_period = 1; - return add_event(list, idx, &attr, /*name=*/NULL, /*mertic_id=*/NULL, - /*config_terms=*/NULL); + return add_event(list, idx, &attr, NULL, NULL); } static int check_type_val(struct parse_events_term *term, @@ -1011,7 +1025,7 @@ static int check_type_val(struct parse_events_term *term, return 0; if (err) { - parse_events_error__handle(err, term->err_val, + parse_events__handle_error(err, term->err_val, type == PARSE_EVENTS__TERM_TYPE_NUM ? strdup("expected numeric value") : strdup("expected string value"), @@ -1045,7 +1059,6 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output", [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", - [PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id", }; static bool config_term_shrinked; @@ -1056,7 +1069,7 @@ config_term_avail(int term_type, struct parse_events_error *err) char *err_str; if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) { - parse_events_error__handle(err, -1, + parse_events__handle_error(err, -1, strdup("Invalid term_type"), NULL); return false; } @@ -1068,7 +1081,6 @@ config_term_avail(int term_type, struct parse_events_error *err) case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_NAME: - case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_PERCORE: return true; @@ -1079,7 +1091,7 @@ config_term_avail(int term_type, struct parse_events_error *err) /* term_type is validated so indexing is safe */ if (asprintf(&err_str, "'%s' is not usable in 'perf stat'", config_term_names[term_type]) >= 0) - parse_events_error__handle(err, -1, err_str, NULL); + parse_events__handle_error(err, -1, err_str, NULL); return false; } } @@ -1123,7 +1135,7 @@ do { \ if (strcmp(term->val.str, "no") && parse_branch_str(term->val.str, &attr->branch_sample_type)) { - parse_events_error__handle(err, term->err_val, + parse_events__handle_error(err, term->err_val, strdup("invalid branch sample type"), NULL); return -EINVAL; @@ -1132,7 +1144,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_TIME: CHECK_TYPE_VAL(NUM); if (term->val.num > 1) { - parse_events_error__handle(err, term->err_val, + parse_events__handle_error(err, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -1159,9 +1171,6 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; - case PARSE_EVENTS__TERM_TYPE_METRIC_ID: - CHECK_TYPE_VAL(STR); - break; case PARSE_EVENTS__TERM_TYPE_MAX_STACK: CHECK_TYPE_VAL(NUM); break; @@ -1171,7 +1180,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_PERCORE: CHECK_TYPE_VAL(NUM); if ((unsigned int)term->val.num > 1) { - parse_events_error__handle(err, term->err_val, + parse_events__handle_error(err, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -1183,14 +1192,14 @@ do { \ case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: CHECK_TYPE_VAL(NUM); if (term->val.num > UINT_MAX) { - parse_events_error__handle(err, term->err_val, + parse_events__handle_error(err, term->err_val, strdup("too big"), NULL); return -EINVAL; } break; default: - parse_events_error__handle(err, term->err_term, + parse_events__handle_error(err, term->err_term, strdup("unknown term"), parse_events_formats_error_string(NULL)); return -EINVAL; @@ -1244,7 +1253,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, return config_term_common(attr, term, err); default: if (err) { - parse_events_error__handle(err, term->err_term, + parse_events__handle_error(err, term->err_term, strdup("unknown term"), strdup("valid terms: call-graph,stack-size\n")); } @@ -1431,7 +1440,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, { struct perf_event_attr attr; LIST_HEAD(config_terms); - const char *name, *metric_id; bool hybrid; int ret; @@ -1448,16 +1456,14 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, return -ENOMEM; } - name = get_config_name(head_config); - metric_id = get_config_metric_id(head_config); ret = parse_events__add_numeric_hybrid(parse_state, list, &attr, - name, metric_id, + get_config_name(head_config), &config_terms, &hybrid); if (hybrid) goto out_free_terms; - ret = add_event(list, &parse_state->idx, &attr, name, metric_id, - &config_terms); + ret = add_event(list, &parse_state->idx, &attr, + get_config_name(head_config), &config_terms); out_free_terms: free_config_terms(&config_terms); return ret; @@ -1465,7 +1471,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, - int tool_event) + enum perf_tool_event tool_event) { return add_event_tool(list, &parse_state->idx, tool_event); } @@ -1543,7 +1549,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (asprintf(&err_str, "Cannot find PMU `%s'. Missing kernel support?", name) >= 0) - parse_events_error__handle(err, 0, err_str, NULL); + parse_events__handle_error(err, 0, err_str, NULL); return -EINVAL; } @@ -1558,11 +1564,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &parse_state->idx, &attr, - /*init_attr=*/true, /*name=*/NULL, - /*metric_id=*/NULL, pmu, - /*config_terms=*/NULL, auto_merge_stats, - /*cpu_list=*/NULL); + evsel = __add_event(list, &parse_state->idx, &attr, true, NULL, + pmu, NULL, auto_merge_stats, NULL); if (evsel) { evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; @@ -1615,10 +1618,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return -EINVAL; } - evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, - get_config_name(head_config), - get_config_metric_id(head_config), pmu, - &config_terms, auto_merge_stats, /*cpu_list=*/NULL); + evsel = __add_event(list, &parse_state->idx, &attr, true, + get_config_name(head_config), pmu, + &config_terms, auto_merge_stats, NULL); if (!evsel) return -ENOMEM; @@ -1632,8 +1634,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (parse_state->fake_pmu) return 0; - free((char *)evsel->unit); - evsel->unit = strdup(info.unit); + evsel->unit = info.unit; evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; @@ -1643,50 +1644,44 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, } int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - char *str, struct list_head *head, - struct list_head **listp) + char *str, struct list_head **listp) { struct parse_events_term *term; - struct list_head *list = NULL; + struct list_head *list; struct perf_pmu *pmu = NULL; int ok = 0; - char *config; *listp = NULL; - - if (!head) { - head = malloc(sizeof(struct list_head)); - if (!head) - goto out_err; - - INIT_LIST_HEAD(head); - } - config = strdup(str); - if (!config) - goto out_err; - - if (parse_events_term__num(&term, - PARSE_EVENTS__TERM_TYPE_USER, - config, 1, false, &config, - NULL) < 0) { - free(config); - goto out_err; - } - list_add_tail(&term->list, head); - - /* Add it for all PMUs that support the alias */ list = malloc(sizeof(struct list_head)); if (!list) - goto out_err; - + return -1; INIT_LIST_HEAD(list); - while ((pmu = perf_pmu__scan(pmu)) != NULL) { struct perf_pmu_alias *alias; list_for_each_entry(alias, &pmu->aliases, list) { if (!strcasecmp(alias->name, str)) { + struct list_head *head; + char *config; + + head = malloc(sizeof(struct list_head)); + if (!head) + return -1; + INIT_LIST_HEAD(head); + config = strdup(str); + if (!config) + return -1; + if (parse_events_term__num(&term, + PARSE_EVENTS__TERM_TYPE_USER, + config, 1, false, &config, + NULL) < 0) { + free(list); + free(config); + return -1; + } + list_add_tail(&term->list, head); + if (!parse_events_add_pmu(parse_state, list, pmu->name, head, true, true)) { @@ -1694,26 +1689,17 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, pmu->name, alias->str); ok++; } + + parse_events_terms__delete(head); } } } - - if (parse_state->fake_pmu) { - if (!parse_events_add_pmu(parse_state, list, str, head, - true, true)) { - pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str); - ok++; - } - } - -out_err: - if (ok) - *listp = list; - else + if (!ok) { free(list); - - parse_events_terms__delete(head); - return ok ? 0 : -1; + return -1; + } + *listp = list; + return 0; } int parse_events__modifier_group(struct list_head *list, @@ -1833,11 +1819,6 @@ parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list, return ret; } -__weak struct evsel *arch_evlist__leader(struct list_head *list) -{ - return list_first_entry(list, struct evsel, core.node); -} - void parse_events__set_leader(char *name, struct list_head *list, struct parse_events_state *parse_state) { @@ -1851,10 +1832,9 @@ void parse_events__set_leader(char *name, struct list_head *list, if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) return; - leader = arch_evlist__leader(list); - __perf_evlist__set_leader(list, &leader->core); + __perf_evlist__set_leader(list); + leader = list_entry(list->next, struct evsel, core.node); leader->group_name = name ? strdup(name) : NULL; - list_move(&leader->core.node, list); } /* list_event is assumed to point to malloc'ed memory */ @@ -2049,7 +2029,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) return 0; } -int parse_events_name(struct list_head *list, const char *name) +int parse_events_name(struct list_head *list, char *name) { struct evsel *evsel; @@ -2107,17 +2087,8 @@ static void perf_pmu__parse_init(void) pmu = NULL; while ((pmu = perf_pmu__scan(pmu)) != NULL) { list_for_each_entry(alias, &pmu->aliases, list) { - char *tmp = strchr(alias->name, '-'); - - if (tmp) { - char *tmp2 = NULL; - - tmp2 = strchr(tmp + 1, '-'); + if (strchr(alias->name, '-')) len++; - if (tmp2) - len++; - } - len++; } } @@ -2137,20 +2108,8 @@ static void perf_pmu__parse_init(void) list_for_each_entry(alias, &pmu->aliases, list) { struct perf_pmu_event_symbol *p = perf_pmu_events_list + len; char *tmp = strchr(alias->name, '-'); - char *tmp2 = NULL; - if (tmp) - tmp2 = strchr(tmp + 1, '-'); - if (tmp2) { - SET_SYMBOL(strndup(alias->name, tmp - alias->name), - PMU_EVENT_SYMBOL_PREFIX); - p++; - tmp++; - SET_SYMBOL(strndup(tmp, tmp2 - tmp), PMU_EVENT_SYMBOL_SUFFIX); - p++; - SET_SYMBOL(strdup(++tmp2), PMU_EVENT_SYMBOL_SUFFIX2); - len += 3; - } else if (tmp) { + if (tmp != NULL) { SET_SYMBOL(strndup(alias->name, tmp - alias->name), PMU_EVENT_SYMBOL_PREFIX); p++; @@ -2177,38 +2136,23 @@ static void perf_pmu__parse_init(void) */ int perf_pmu__test_parse_init(void) { - struct perf_pmu_event_symbol *list, *tmp, symbols[] = { - {(char *)"read", PMU_EVENT_SYMBOL}, - {(char *)"event", PMU_EVENT_SYMBOL_PREFIX}, - {(char *)"two", PMU_EVENT_SYMBOL_SUFFIX}, - {(char *)"hyphen", PMU_EVENT_SYMBOL_SUFFIX}, - {(char *)"hyph", PMU_EVENT_SYMBOL_SUFFIX2}, - }; - unsigned long i, j; + struct perf_pmu_event_symbol *list; - tmp = list = malloc(sizeof(*list) * ARRAY_SIZE(symbols)); + list = malloc(sizeof(*list) * 1); if (!list) return -ENOMEM; - for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) { - tmp->type = symbols[i].type; - tmp->symbol = strdup(symbols[i].symbol); - if (!list->symbol) - goto err_free; + list->type = PMU_EVENT_SYMBOL; + list->symbol = strdup("read"); + + if (!list->symbol) { + free(list); + return -ENOMEM; } perf_pmu_events_list = list; - perf_pmu_events_list_num = ARRAY_SIZE(symbols); - - qsort(perf_pmu_events_list, ARRAY_SIZE(symbols), - sizeof(struct perf_pmu_event_symbol), comp_pmu); + perf_pmu_events_list_num = 1; return 0; - -err_free: - for (j = 0, tmp = list; j < i; j++, tmp++) - free(tmp->symbol); - free(list); - return -ENOMEM; } enum perf_pmu_event_symbol_type @@ -2355,52 +2299,6 @@ int __parse_events(struct evlist *evlist, const char *str, return ret; } -void parse_events_error__init(struct parse_events_error *err) -{ - bzero(err, sizeof(*err)); -} - -void parse_events_error__exit(struct parse_events_error *err) -{ - zfree(&err->str); - zfree(&err->help); - zfree(&err->first_str); - zfree(&err->first_help); -} - -void parse_events_error__handle(struct parse_events_error *err, int idx, - char *str, char *help) -{ - if (WARN(!str, "WARNING: failed to provide error string\n")) { - free(help); - return; - } - switch (err->num_errors) { - case 0: - err->idx = idx; - err->str = str; - err->help = help; - break; - case 1: - err->first_idx = err->idx; - err->idx = idx; - err->first_str = err->str; - err->str = str; - err->first_help = err->help; - err->help = help; - break; - default: - pr_debug("Multiple errors dropping message: %s (%s)\n", - err->str, err->help); - free(err->str); - err->str = str; - free(err->help); - err->help = help; - break; - } - err->num_errors++; -} - #define MAX_WIDTH 1000 static int get_term_width(void) { @@ -2410,8 +2308,8 @@ static int get_term_width(void) return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col; } -static void __parse_events_error__print(int err_idx, const char *err_str, - const char *err_help, const char *event) +static void __parse_events_print_error(int err_idx, const char *err_str, + const char *err_help, const char *event) { const char *str = "invalid or unsupported event: "; char _buf[MAX_WIDTH]; @@ -2465,18 +2363,22 @@ static void __parse_events_error__print(int err_idx, const char *err_str, } } -void parse_events_error__print(struct parse_events_error *err, - const char *event) +void parse_events_print_error(struct parse_events_error *err, + const char *event) { if (!err->num_errors) return; - __parse_events_error__print(err->idx, err->str, err->help, event); + __parse_events_print_error(err->idx, err->str, err->help, event); + zfree(&err->str); + zfree(&err->help); if (err->num_errors > 1) { fputs("\nInitial error:\n", stderr); - __parse_events_error__print(err->first_idx, err->first_str, + __parse_events_print_error(err->first_idx, err->first_str, err->first_help, event); + zfree(&err->first_str); + zfree(&err->first_help); } } @@ -2489,14 +2391,13 @@ int parse_events_option(const struct option *opt, const char *str, struct parse_events_error err; int ret; - parse_events_error__init(&err); + bzero(&err, sizeof(err)); ret = parse_events(evlist, str, &err); if (ret) { - parse_events_error__print(&err, str); + parse_events_print_error(&err, str); fprintf(stderr, "Run 'perf list' for a list of valid events\n"); } - parse_events_error__exit(&err); return ret; } @@ -2802,7 +2703,7 @@ int is_valid_tracepoint(const char *event_string) return 0; } -static bool is_event_supported(u8 type, u64 config) +static bool is_event_supported(u8 type, unsigned config) { bool ret = true; int open_return; @@ -2922,18 +2823,10 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0; - char name[64], new_name[128]; - char **evt_list = NULL, **evt_pmus = NULL; + unsigned int type, op, i, evt_i = 0, evt_num = 0; + char name[64]; + char **evt_list = NULL; bool evt_num_known = false; - struct perf_pmu *pmu = NULL; - - if (perf_pmu__has_hybrid()) { - npmus = perf_pmu__hybrid_pmu_num(); - evt_pmus = zalloc(sizeof(char *) * npmus); - if (!evt_pmus) - goto out_enomem; - } restart: if (evt_num_known) { @@ -2949,61 +2842,20 @@ int print_hwcache_events(const char *event_glob, bool name_only) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - unsigned int hybrid_supported = 0, j; - bool supported; - __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; - if (!perf_pmu__has_hybrid()) { - if (!is_event_supported(PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16))) { - continue; - } - } else { - perf_pmu__for_each_hybrid_pmu(pmu) { - if (!evt_num_known) { - evt_num++; - continue; - } - - supported = is_event_supported( - PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16) | - ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT)); - if (supported) { - snprintf(new_name, sizeof(new_name), "%s/%s/", - pmu->name, name); - evt_pmus[hybrid_supported] = strdup(new_name); - hybrid_supported++; - } - } - - if (hybrid_supported == 0) - continue; - } + if (!is_event_supported(PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16))) + continue; if (!evt_num_known) { evt_num++; continue; } - if ((hybrid_supported == 0) || - (hybrid_supported == npmus)) { - evt_list[evt_i] = strdup(name); - if (npmus > 0) { - for (j = 0; j < npmus; j++) - zfree(&evt_pmus[j]); - } - } else { - for (j = 0; j < hybrid_supported; j++) { - evt_list[evt_i++] = evt_pmus[j]; - evt_pmus[j] = NULL; - } - continue; - } - + evt_list[evt_i] = strdup(name); if (evt_list[evt_i] == NULL) goto out_enomem; evt_i++; @@ -3015,13 +2867,6 @@ int print_hwcache_events(const char *event_glob, bool name_only) evt_num_known = true; goto restart; } - - for (evt_i = 0; evt_i < evt_num; evt_i++) { - if (!evt_list[evt_i]) - break; - } - - evt_num = evt_i; qsort(evt_list, evt_num, sizeof(char *), cmp_string); evt_i = 0; while (evt_i < evt_num) { @@ -3040,10 +2885,6 @@ int print_hwcache_events(const char *event_glob, bool name_only) for (evt_i = 0; evt_i < evt_num; evt_i++) zfree(&evt_list[evt_i]); zfree(&evt_list); - - for (evt_i = 0; evt_i < npmus; evt_i++) - zfree(&evt_pmus[evt_i]); - zfree(&evt_pmus); return evt_num; out_enomem: @@ -3153,8 +2994,7 @@ void print_symbol_events(const char *event_glob, unsigned type, * Print the help text for the event symbols: */ void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated, - const char *pmu_name) + bool long_desc, bool details_flag, bool deprecated) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -3166,7 +3006,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_hwcache_events(event_glob, name_only); print_pmu_events(event_glob, name_only, quiet_flag, long_desc, - details_flag, deprecated, pmu_name); + details_flag, deprecated); if (event_glob != NULL) return; @@ -3192,8 +3032,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_sdt_events(NULL, NULL, name_only); - metricgroup__print(true, true, NULL, name_only, details_flag, - pmu_name); + metricgroup__print(true, true, NULL, name_only, details_flag); print_libpfm_events(name_only, long_desc); } @@ -3244,7 +3083,7 @@ int parse_events_term__num(struct parse_events_term **term, struct parse_events_term temp = { .type_val = PARSE_EVENTS__TERM_TYPE_NUM, .type_term = type_term, - .config = config ? : strdup(config_term_names[type_term]), + .config = config, .no_value = no_value, .err_term = loc_term ? loc_term->first_column : 0, .err_val = loc_val ? loc_val->first_column : 0, @@ -3388,7 +3227,7 @@ void parse_events_evlist_error(struct parse_events_state *parse_state, if (!parse_state->error) return; - parse_events_error__handle(parse_state->error, idx, strdup(str), NULL); + parse_events__handle_error(parse_state->error, idx, strdup(str), NULL); } static void config_terms_list(char *buf, size_t buf_sz) @@ -3447,12 +3286,9 @@ char *parse_events_formats_error_string(char *additional_terms) struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, - const char *name, - const char *metric_id, - struct perf_pmu *pmu, + char *name, struct perf_pmu *pmu, struct list_head *config_terms) { - return __add_event(list, idx, attr, /*init_attr=*/true, name, metric_id, - pmu, config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL); + return __add_event(list, idx, attr, true, name, pmu, + config_terms, false, NULL); } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index a38b8b160e..bf6e41aa9b 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -53,7 +53,6 @@ enum perf_pmu_event_symbol_type { PMU_EVENT_SYMBOL, /* normal style PMU event */ PMU_EVENT_SYMBOL_PREFIX, /* prefix of pre-suf style event */ PMU_EVENT_SYMBOL_SUFFIX, /* suffix of pre-suf style event */ - PMU_EVENT_SYMBOL_SUFFIX2, /* suffix of pre-suf2 style event */ }; struct perf_pmu_event_symbol { @@ -88,7 +87,6 @@ enum { PARSE_EVENTS__TERM_TYPE_PERCORE, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE, - PARSE_EVENTS__TERM_TYPE_METRIC_ID, __PARSE_EVENTS__TERM_TYPE_NR, }; @@ -143,6 +141,8 @@ struct parse_events_state { char *hybrid_pmu_name; }; +void parse_events__handle_error(struct parse_events_error *err, int idx, + char *str, char *help); void parse_events__shrink_config_terms(void); int parse_events__is_hardcoded_term(struct parse_events_term *term); int parse_events_term__num(struct parse_events_term **term, @@ -162,7 +162,7 @@ void parse_events_terms__purge(struct list_head *terms); void parse_events__clear_array(struct parse_events_array *a); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); -int parse_events_name(struct list_head *list, const char *name); +int parse_events_name(struct list_head *list, char *name); int parse_events_add_tracepoint(struct list_head *list, int *idx, const char *sys, const char *event, struct parse_events_error *error, @@ -182,9 +182,10 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, struct list_head *head_config); +enum perf_tool_event; int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, - int tool_event); + enum perf_tool_event tool_event); int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2, struct parse_events_error *error, @@ -199,12 +200,10 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, bool use_alias); struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct perf_pmu *pmu); + char *name, struct perf_pmu *pmu); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, - struct list_head *head_config, struct list_head **listp); int parse_events_copy_term_list(struct list_head *old, @@ -220,8 +219,7 @@ void parse_events_evlist_error(struct parse_events_state *parse_state, int idx, const char *str); void print_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc, bool details_flag, bool deprecated, - const char *pmu_name); + bool long_desc, bool details_flag, bool deprecated); struct event_symbol { const char *symbol; @@ -243,12 +241,8 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); -void parse_events_error__init(struct parse_events_error *err); -void parse_events_error__exit(struct parse_events_error *err); -void parse_events_error__handle(struct parse_events_error *err, int idx, - char *str, char *help); -void parse_events_error__print(struct parse_events_error *err, - const char *event); +void parse_events_print_error(struct parse_events_error *err, + const char *event); #ifdef HAVE_LIBELF_SUPPORT /* @@ -273,9 +267,7 @@ int perf_pmu__test_parse_init(void); struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, - const char *name, - const char *metric_id, - struct perf_pmu *pmu, + char *name, struct perf_pmu *pmu, struct list_head *config_terms); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 5b6e4b5249..923849024b 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -12,6 +12,7 @@ #include #include #include +#include "../perf.h" #include "parse-events.h" #include "parse-events-bison.h" #include "evsel.h" @@ -138,25 +139,18 @@ static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_stat yylval->str = strdup(text); - /* - * If we're not testing then parse check determines the PMU event type - * which if it isn't a PMU returns PE_NAME. When testing the result of - * parse check can't be trusted so we return PE_PMU_EVENT_FAKE unless - * an '!' is present in which case the text can't be a PMU name. - */ + if (parse_state->fake_pmu) + return PE_PMU_EVENT_FAKE; + switch (perf_pmu__parse_check(text)) { case PMU_EVENT_SYMBOL_PREFIX: return PE_PMU_EVENT_PRE; case PMU_EVENT_SYMBOL_SUFFIX: return PE_PMU_EVENT_SUF; - case PMU_EVENT_SYMBOL_SUFFIX2: - return PE_PMU_EVENT_SUF2; case PMU_EVENT_SYMBOL: - return parse_state->fake_pmu - ? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT; + return PE_KERNEL_PMU_EVENT; default: - return parse_state->fake_pmu && !strchr(text,'!') - ? PE_PMU_EVENT_FAKE : PE_NAME; + return PE_NAME; } } @@ -211,7 +205,7 @@ bpf_source [^,{}]+\.c[a-zA-Z0-9._]* num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ -name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!]* +name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]* name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? @@ -301,7 +295,6 @@ no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } -metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } r{num_raw_hex} { return raw(yyscanner); } r0x{num_raw_hex} { return raw(yyscanner); } , { return ','; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index be8c517700..d94e48e1ff 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list, %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR -%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM %type PE_VALUE @@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list, %type PE_MODIFIER_EVENT %type PE_MODIFIER_BP %type PE_EVENT_NAME -%type PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%type PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %type PE_DRV_CFG_TERM %type event_pmu_name %destructor { free ($$); } @@ -183,11 +183,6 @@ group_def ':' PE_MODIFIER_EVENT err = parse_events__modifier_group(list, $3); free($3); if (err) { - struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; - - parse_events_error__handle(error, @3.first_column, - strdup("Bad modifier"), NULL); free_list_evsel(list); YYABORT; } @@ -245,11 +240,6 @@ event_name PE_MODIFIER_EVENT err = parse_events__modifier_event(list, $2, false); free($2); if (err) { - struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; - - parse_events_error__handle(error, @2.first_column, - strdup("Bad modifier"), NULL); free_list_evsel(list); YYABORT; } @@ -352,39 +342,13 @@ PE_KERNEL_PMU_EVENT sep_dc struct list_head *list; int err; - err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); + err = parse_events_multi_pmu_add(_parse_state, $1, &list); free($1); if (err < 0) YYABORT; $$ = list; } | -PE_KERNEL_PMU_EVENT opt_pmu_config -{ - struct list_head *list; - int err; - - /* frees $2 */ - err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list); - free($1); - if (err < 0) - YYABORT; - $$ = list; -} -| -PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF '-' PE_PMU_EVENT_SUF2 sep_dc -{ - struct list_head *list; - char pmu_name[128]; - snprintf(pmu_name, sizeof(pmu_name), "%s-%s-%s", $1, $3, $5); - free($1); - free($3); - free($5); - if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0) - YYABORT; - $$ = list; -} -| PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc { struct list_head *list; @@ -393,7 +357,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc snprintf(pmu_name, sizeof(pmu_name), "%s-%s", $1, $3); free($1); free($3); - if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0) + if (parse_events_multi_pmu_add(_parse_state, pmu_name, &list) < 0) YYABORT; $$ = list; } diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index c28dd50bd5..020411682a 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -11,7 +11,7 @@ typedef void (*setup_probe_fn_t)(struct evsel *evsel); -static int perf_do_probe_api(setup_probe_fn_t fn, struct perf_cpu cpu, const char *str) +static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) { struct evlist *evlist; struct evsel *evsel; @@ -29,7 +29,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, struct perf_cpu cpu, const cha evsel = evlist__first(evlist); while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags); + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); if (fd < 0) { if (pid == -1 && errno == EACCES) { pid = 0; @@ -43,7 +43,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, struct perf_cpu cpu, const cha fn(evsel); - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags); + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); if (fd < 0) { if (errno == EINVAL) err = -EINVAL; @@ -61,13 +61,12 @@ static bool perf_probe_api(setup_probe_fn_t fn) { const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; struct perf_cpu_map *cpus; - struct perf_cpu cpu; - int ret, i = 0; + int cpu, ret, i = 0; cpus = perf_cpu_map__new(NULL); if (!cpus) return false; - cpu = perf_cpu_map__cpu(cpus, 0); + cpu = cpus->map[0]; perf_cpu_map__put(cpus); do { @@ -137,17 +136,15 @@ bool perf_can_record_cpu_wide(void) .exclude_kernel = 1, }; struct perf_cpu_map *cpus; - struct perf_cpu cpu; - int fd; + int cpu, fd; cpus = perf_cpu_map__new(NULL); if (!cpus) return false; - - cpu = perf_cpu_map__cpu(cpus, 0); + cpu = cpus->map[0]; perf_cpu_map__put(cpus); - fd = sys_perf_event_open(&attr, -1, cpu.cpu, -1, 0); + fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); if (fd < 0) return false; close(fd); diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 98af3fa4ea..47b7531f51 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -52,7 +52,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), - bit_name(TYPE_SAVE), bit_name(HW_INDEX), + bit_name(HW_INDEX), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index a982e40ee5..5ee47ae150 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include #include "perf_regs.h" #include "event.h" @@ -21,679 +20,11 @@ uint64_t __weak arch__user_reg_mask(void) } #ifdef HAVE_PERF_REGS_SUPPORT - -#define perf_event_arm_regs perf_event_arm64_regs -#include "../../arch/arm64/include/uapi/asm/perf_regs.h" -#undef perf_event_arm_regs - -#include "../../arch/arm/include/uapi/asm/perf_regs.h" -#include "../../arch/csky/include/uapi/asm/perf_regs.h" -#include "../../arch/mips/include/uapi/asm/perf_regs.h" -#include "../../arch/powerpc/include/uapi/asm/perf_regs.h" -#include "../../arch/riscv/include/uapi/asm/perf_regs.h" -#include "../../arch/s390/include/uapi/asm/perf_regs.h" -#include "../../arch/x86/include/uapi/asm/perf_regs.h" - -static const char *__perf_reg_name_arm64(int id) -{ - switch (id) { - case PERF_REG_ARM64_X0: - return "x0"; - case PERF_REG_ARM64_X1: - return "x1"; - case PERF_REG_ARM64_X2: - return "x2"; - case PERF_REG_ARM64_X3: - return "x3"; - case PERF_REG_ARM64_X4: - return "x4"; - case PERF_REG_ARM64_X5: - return "x5"; - case PERF_REG_ARM64_X6: - return "x6"; - case PERF_REG_ARM64_X7: - return "x7"; - case PERF_REG_ARM64_X8: - return "x8"; - case PERF_REG_ARM64_X9: - return "x9"; - case PERF_REG_ARM64_X10: - return "x10"; - case PERF_REG_ARM64_X11: - return "x11"; - case PERF_REG_ARM64_X12: - return "x12"; - case PERF_REG_ARM64_X13: - return "x13"; - case PERF_REG_ARM64_X14: - return "x14"; - case PERF_REG_ARM64_X15: - return "x15"; - case PERF_REG_ARM64_X16: - return "x16"; - case PERF_REG_ARM64_X17: - return "x17"; - case PERF_REG_ARM64_X18: - return "x18"; - case PERF_REG_ARM64_X19: - return "x19"; - case PERF_REG_ARM64_X20: - return "x20"; - case PERF_REG_ARM64_X21: - return "x21"; - case PERF_REG_ARM64_X22: - return "x22"; - case PERF_REG_ARM64_X23: - return "x23"; - case PERF_REG_ARM64_X24: - return "x24"; - case PERF_REG_ARM64_X25: - return "x25"; - case PERF_REG_ARM64_X26: - return "x26"; - case PERF_REG_ARM64_X27: - return "x27"; - case PERF_REG_ARM64_X28: - return "x28"; - case PERF_REG_ARM64_X29: - return "x29"; - case PERF_REG_ARM64_SP: - return "sp"; - case PERF_REG_ARM64_LR: - return "lr"; - case PERF_REG_ARM64_PC: - return "pc"; - default: - return NULL; - } - - return NULL; -} - -static const char *__perf_reg_name_arm(int id) -{ - switch (id) { - case PERF_REG_ARM_R0: - return "r0"; - case PERF_REG_ARM_R1: - return "r1"; - case PERF_REG_ARM_R2: - return "r2"; - case PERF_REG_ARM_R3: - return "r3"; - case PERF_REG_ARM_R4: - return "r4"; - case PERF_REG_ARM_R5: - return "r5"; - case PERF_REG_ARM_R6: - return "r6"; - case PERF_REG_ARM_R7: - return "r7"; - case PERF_REG_ARM_R8: - return "r8"; - case PERF_REG_ARM_R9: - return "r9"; - case PERF_REG_ARM_R10: - return "r10"; - case PERF_REG_ARM_FP: - return "fp"; - case PERF_REG_ARM_IP: - return "ip"; - case PERF_REG_ARM_SP: - return "sp"; - case PERF_REG_ARM_LR: - return "lr"; - case PERF_REG_ARM_PC: - return "pc"; - default: - return NULL; - } - - return NULL; -} - -static const char *__perf_reg_name_csky(int id) -{ - switch (id) { - case PERF_REG_CSKY_A0: - return "a0"; - case PERF_REG_CSKY_A1: - return "a1"; - case PERF_REG_CSKY_A2: - return "a2"; - case PERF_REG_CSKY_A3: - return "a3"; - case PERF_REG_CSKY_REGS0: - return "regs0"; - case PERF_REG_CSKY_REGS1: - return "regs1"; - case PERF_REG_CSKY_REGS2: - return "regs2"; - case PERF_REG_CSKY_REGS3: - return "regs3"; - case PERF_REG_CSKY_REGS4: - return "regs4"; - case PERF_REG_CSKY_REGS5: - return "regs5"; - case PERF_REG_CSKY_REGS6: - return "regs6"; - case PERF_REG_CSKY_REGS7: - return "regs7"; - case PERF_REG_CSKY_REGS8: - return "regs8"; - case PERF_REG_CSKY_REGS9: - return "regs9"; - case PERF_REG_CSKY_SP: - return "sp"; - case PERF_REG_CSKY_LR: - return "lr"; - case PERF_REG_CSKY_PC: - return "pc"; -#if defined(__CSKYABIV2__) - case PERF_REG_CSKY_EXREGS0: - return "exregs0"; - case PERF_REG_CSKY_EXREGS1: - return "exregs1"; - case PERF_REG_CSKY_EXREGS2: - return "exregs2"; - case PERF_REG_CSKY_EXREGS3: - return "exregs3"; - case PERF_REG_CSKY_EXREGS4: - return "exregs4"; - case PERF_REG_CSKY_EXREGS5: - return "exregs5"; - case PERF_REG_CSKY_EXREGS6: - return "exregs6"; - case PERF_REG_CSKY_EXREGS7: - return "exregs7"; - case PERF_REG_CSKY_EXREGS8: - return "exregs8"; - case PERF_REG_CSKY_EXREGS9: - return "exregs9"; - case PERF_REG_CSKY_EXREGS10: - return "exregs10"; - case PERF_REG_CSKY_EXREGS11: - return "exregs11"; - case PERF_REG_CSKY_EXREGS12: - return "exregs12"; - case PERF_REG_CSKY_EXREGS13: - return "exregs13"; - case PERF_REG_CSKY_EXREGS14: - return "exregs14"; - case PERF_REG_CSKY_TLS: - return "tls"; - case PERF_REG_CSKY_HI: - return "hi"; - case PERF_REG_CSKY_LO: - return "lo"; -#endif - default: - return NULL; - } - - return NULL; -} - -static const char *__perf_reg_name_mips(int id) -{ - switch (id) { - case PERF_REG_MIPS_PC: - return "PC"; - case PERF_REG_MIPS_R1: - return "$1"; - case PERF_REG_MIPS_R2: - return "$2"; - case PERF_REG_MIPS_R3: - return "$3"; - case PERF_REG_MIPS_R4: - return "$4"; - case PERF_REG_MIPS_R5: - return "$5"; - case PERF_REG_MIPS_R6: - return "$6"; - case PERF_REG_MIPS_R7: - return "$7"; - case PERF_REG_MIPS_R8: - return "$8"; - case PERF_REG_MIPS_R9: - return "$9"; - case PERF_REG_MIPS_R10: - return "$10"; - case PERF_REG_MIPS_R11: - return "$11"; - case PERF_REG_MIPS_R12: - return "$12"; - case PERF_REG_MIPS_R13: - return "$13"; - case PERF_REG_MIPS_R14: - return "$14"; - case PERF_REG_MIPS_R15: - return "$15"; - case PERF_REG_MIPS_R16: - return "$16"; - case PERF_REG_MIPS_R17: - return "$17"; - case PERF_REG_MIPS_R18: - return "$18"; - case PERF_REG_MIPS_R19: - return "$19"; - case PERF_REG_MIPS_R20: - return "$20"; - case PERF_REG_MIPS_R21: - return "$21"; - case PERF_REG_MIPS_R22: - return "$22"; - case PERF_REG_MIPS_R23: - return "$23"; - case PERF_REG_MIPS_R24: - return "$24"; - case PERF_REG_MIPS_R25: - return "$25"; - case PERF_REG_MIPS_R28: - return "$28"; - case PERF_REG_MIPS_R29: - return "$29"; - case PERF_REG_MIPS_R30: - return "$30"; - case PERF_REG_MIPS_R31: - return "$31"; - default: - break; - } - return NULL; -} - -static const char *__perf_reg_name_powerpc(int id) -{ - switch (id) { - case PERF_REG_POWERPC_R0: - return "r0"; - case PERF_REG_POWERPC_R1: - return "r1"; - case PERF_REG_POWERPC_R2: - return "r2"; - case PERF_REG_POWERPC_R3: - return "r3"; - case PERF_REG_POWERPC_R4: - return "r4"; - case PERF_REG_POWERPC_R5: - return "r5"; - case PERF_REG_POWERPC_R6: - return "r6"; - case PERF_REG_POWERPC_R7: - return "r7"; - case PERF_REG_POWERPC_R8: - return "r8"; - case PERF_REG_POWERPC_R9: - return "r9"; - case PERF_REG_POWERPC_R10: - return "r10"; - case PERF_REG_POWERPC_R11: - return "r11"; - case PERF_REG_POWERPC_R12: - return "r12"; - case PERF_REG_POWERPC_R13: - return "r13"; - case PERF_REG_POWERPC_R14: - return "r14"; - case PERF_REG_POWERPC_R15: - return "r15"; - case PERF_REG_POWERPC_R16: - return "r16"; - case PERF_REG_POWERPC_R17: - return "r17"; - case PERF_REG_POWERPC_R18: - return "r18"; - case PERF_REG_POWERPC_R19: - return "r19"; - case PERF_REG_POWERPC_R20: - return "r20"; - case PERF_REG_POWERPC_R21: - return "r21"; - case PERF_REG_POWERPC_R22: - return "r22"; - case PERF_REG_POWERPC_R23: - return "r23"; - case PERF_REG_POWERPC_R24: - return "r24"; - case PERF_REG_POWERPC_R25: - return "r25"; - case PERF_REG_POWERPC_R26: - return "r26"; - case PERF_REG_POWERPC_R27: - return "r27"; - case PERF_REG_POWERPC_R28: - return "r28"; - case PERF_REG_POWERPC_R29: - return "r29"; - case PERF_REG_POWERPC_R30: - return "r30"; - case PERF_REG_POWERPC_R31: - return "r31"; - case PERF_REG_POWERPC_NIP: - return "nip"; - case PERF_REG_POWERPC_MSR: - return "msr"; - case PERF_REG_POWERPC_ORIG_R3: - return "orig_r3"; - case PERF_REG_POWERPC_CTR: - return "ctr"; - case PERF_REG_POWERPC_LINK: - return "link"; - case PERF_REG_POWERPC_XER: - return "xer"; - case PERF_REG_POWERPC_CCR: - return "ccr"; - case PERF_REG_POWERPC_SOFTE: - return "softe"; - case PERF_REG_POWERPC_TRAP: - return "trap"; - case PERF_REG_POWERPC_DAR: - return "dar"; - case PERF_REG_POWERPC_DSISR: - return "dsisr"; - case PERF_REG_POWERPC_SIER: - return "sier"; - case PERF_REG_POWERPC_MMCRA: - return "mmcra"; - case PERF_REG_POWERPC_MMCR0: - return "mmcr0"; - case PERF_REG_POWERPC_MMCR1: - return "mmcr1"; - case PERF_REG_POWERPC_MMCR2: - return "mmcr2"; - case PERF_REG_POWERPC_MMCR3: - return "mmcr3"; - case PERF_REG_POWERPC_SIER2: - return "sier2"; - case PERF_REG_POWERPC_SIER3: - return "sier3"; - case PERF_REG_POWERPC_PMC1: - return "pmc1"; - case PERF_REG_POWERPC_PMC2: - return "pmc2"; - case PERF_REG_POWERPC_PMC3: - return "pmc3"; - case PERF_REG_POWERPC_PMC4: - return "pmc4"; - case PERF_REG_POWERPC_PMC5: - return "pmc5"; - case PERF_REG_POWERPC_PMC6: - return "pmc6"; - case PERF_REG_POWERPC_SDAR: - return "sdar"; - case PERF_REG_POWERPC_SIAR: - return "siar"; - default: - break; - } - return NULL; -} - -static const char *__perf_reg_name_riscv(int id) -{ - switch (id) { - case PERF_REG_RISCV_PC: - return "pc"; - case PERF_REG_RISCV_RA: - return "ra"; - case PERF_REG_RISCV_SP: - return "sp"; - case PERF_REG_RISCV_GP: - return "gp"; - case PERF_REG_RISCV_TP: - return "tp"; - case PERF_REG_RISCV_T0: - return "t0"; - case PERF_REG_RISCV_T1: - return "t1"; - case PERF_REG_RISCV_T2: - return "t2"; - case PERF_REG_RISCV_S0: - return "s0"; - case PERF_REG_RISCV_S1: - return "s1"; - case PERF_REG_RISCV_A0: - return "a0"; - case PERF_REG_RISCV_A1: - return "a1"; - case PERF_REG_RISCV_A2: - return "a2"; - case PERF_REG_RISCV_A3: - return "a3"; - case PERF_REG_RISCV_A4: - return "a4"; - case PERF_REG_RISCV_A5: - return "a5"; - case PERF_REG_RISCV_A6: - return "a6"; - case PERF_REG_RISCV_A7: - return "a7"; - case PERF_REG_RISCV_S2: - return "s2"; - case PERF_REG_RISCV_S3: - return "s3"; - case PERF_REG_RISCV_S4: - return "s4"; - case PERF_REG_RISCV_S5: - return "s5"; - case PERF_REG_RISCV_S6: - return "s6"; - case PERF_REG_RISCV_S7: - return "s7"; - case PERF_REG_RISCV_S8: - return "s8"; - case PERF_REG_RISCV_S9: - return "s9"; - case PERF_REG_RISCV_S10: - return "s10"; - case PERF_REG_RISCV_S11: - return "s11"; - case PERF_REG_RISCV_T3: - return "t3"; - case PERF_REG_RISCV_T4: - return "t4"; - case PERF_REG_RISCV_T5: - return "t5"; - case PERF_REG_RISCV_T6: - return "t6"; - default: - return NULL; - } - - return NULL; -} - -static const char *__perf_reg_name_s390(int id) -{ - switch (id) { - case PERF_REG_S390_R0: - return "R0"; - case PERF_REG_S390_R1: - return "R1"; - case PERF_REG_S390_R2: - return "R2"; - case PERF_REG_S390_R3: - return "R3"; - case PERF_REG_S390_R4: - return "R4"; - case PERF_REG_S390_R5: - return "R5"; - case PERF_REG_S390_R6: - return "R6"; - case PERF_REG_S390_R7: - return "R7"; - case PERF_REG_S390_R8: - return "R8"; - case PERF_REG_S390_R9: - return "R9"; - case PERF_REG_S390_R10: - return "R10"; - case PERF_REG_S390_R11: - return "R11"; - case PERF_REG_S390_R12: - return "R12"; - case PERF_REG_S390_R13: - return "R13"; - case PERF_REG_S390_R14: - return "R14"; - case PERF_REG_S390_R15: - return "R15"; - case PERF_REG_S390_FP0: - return "FP0"; - case PERF_REG_S390_FP1: - return "FP1"; - case PERF_REG_S390_FP2: - return "FP2"; - case PERF_REG_S390_FP3: - return "FP3"; - case PERF_REG_S390_FP4: - return "FP4"; - case PERF_REG_S390_FP5: - return "FP5"; - case PERF_REG_S390_FP6: - return "FP6"; - case PERF_REG_S390_FP7: - return "FP7"; - case PERF_REG_S390_FP8: - return "FP8"; - case PERF_REG_S390_FP9: - return "FP9"; - case PERF_REG_S390_FP10: - return "FP10"; - case PERF_REG_S390_FP11: - return "FP11"; - case PERF_REG_S390_FP12: - return "FP12"; - case PERF_REG_S390_FP13: - return "FP13"; - case PERF_REG_S390_FP14: - return "FP14"; - case PERF_REG_S390_FP15: - return "FP15"; - case PERF_REG_S390_MASK: - return "MASK"; - case PERF_REG_S390_PC: - return "PC"; - default: - return NULL; - } - - return NULL; -} - -static const char *__perf_reg_name_x86(int id) -{ - switch (id) { - case PERF_REG_X86_AX: - return "AX"; - case PERF_REG_X86_BX: - return "BX"; - case PERF_REG_X86_CX: - return "CX"; - case PERF_REG_X86_DX: - return "DX"; - case PERF_REG_X86_SI: - return "SI"; - case PERF_REG_X86_DI: - return "DI"; - case PERF_REG_X86_BP: - return "BP"; - case PERF_REG_X86_SP: - return "SP"; - case PERF_REG_X86_IP: - return "IP"; - case PERF_REG_X86_FLAGS: - return "FLAGS"; - case PERF_REG_X86_CS: - return "CS"; - case PERF_REG_X86_SS: - return "SS"; - case PERF_REG_X86_DS: - return "DS"; - case PERF_REG_X86_ES: - return "ES"; - case PERF_REG_X86_FS: - return "FS"; - case PERF_REG_X86_GS: - return "GS"; - case PERF_REG_X86_R8: - return "R8"; - case PERF_REG_X86_R9: - return "R9"; - case PERF_REG_X86_R10: - return "R10"; - case PERF_REG_X86_R11: - return "R11"; - case PERF_REG_X86_R12: - return "R12"; - case PERF_REG_X86_R13: - return "R13"; - case PERF_REG_X86_R14: - return "R14"; - case PERF_REG_X86_R15: - return "R15"; - -#define XMM(x) \ - case PERF_REG_X86_XMM ## x: \ - case PERF_REG_X86_XMM ## x + 1: \ - return "XMM" #x; - XMM(0) - XMM(1) - XMM(2) - XMM(3) - XMM(4) - XMM(5) - XMM(6) - XMM(7) - XMM(8) - XMM(9) - XMM(10) - XMM(11) - XMM(12) - XMM(13) - XMM(14) - XMM(15) -#undef XMM - default: - return NULL; - } - - return NULL; -} - -const char *perf_reg_name(int id, const char *arch) -{ - const char *reg_name = NULL; - - if (!strcmp(arch, "csky")) - reg_name = __perf_reg_name_csky(id); - else if (!strcmp(arch, "mips")) - reg_name = __perf_reg_name_mips(id); - else if (!strcmp(arch, "powerpc")) - reg_name = __perf_reg_name_powerpc(id); - else if (!strcmp(arch, "riscv")) - reg_name = __perf_reg_name_riscv(id); - else if (!strcmp(arch, "s390")) - reg_name = __perf_reg_name_s390(id); - else if (!strcmp(arch, "x86")) - reg_name = __perf_reg_name_x86(id); - else if (!strcmp(arch, "arm")) - reg_name = __perf_reg_name_arm(id); - else if (!strcmp(arch, "arm64")) - reg_name = __perf_reg_name_arm64(id); - - return reg_name ?: "unknown"; -} - int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { int i, idx = 0; u64 mask = regs->mask; - if ((u64)id >= PERF_SAMPLE_REGS_CACHE_SIZE) - return -EINVAL; - if (regs->cache_mask & (1ULL << id)) goto out; diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index ce1127af05..eeac181ebc 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -11,11 +11,8 @@ struct sample_reg { const char *name; uint64_t mask; }; - -#define SMPL_REG_MASK(b) (1ULL << (b)) -#define SMPL_REG(n, b) { .name = #n, .mask = SMPL_REG_MASK(b) } -#define SMPL_REG2_MASK(b) (3ULL << (b)) -#define SMPL_REG2(n, b) { .name = #n, .mask = SMPL_REG2_MASK(b) } +#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } +#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) } #define SMPL_REG_END { .name = NULL } enum { @@ -34,16 +31,22 @@ extern const struct sample_reg sample_reg_masks[]; #define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP)) -const char *perf_reg_name(int id, const char *arch); int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); +static inline const char *perf_reg_name(int id) +{ + const char *reg_name = __perf_reg_name(id); + + return reg_name ?: "unknown"; +} + #else #define PERF_REGS_MASK 0 #define PERF_REGS_MAX 0 #define DWARF_MINIMAL_REGS PERF_REGS_MASK -static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused) +static inline const char *perf_reg_name(int id __maybe_unused) { return "unknown"; } diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index f0bcfcab1a..756295dedc 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -87,8 +87,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, pmu = perf_pmu__find_by_type((unsigned int)attr.type); evsel = parse_events__add_event(evlist->core.nr_entries, - &attr, q, /*metric_id=*/NULL, - pmu); + &attr, q, pmu); if (evsel == NULL) goto error; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8dfbba15ae..bdabd62170 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -315,7 +315,7 @@ static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, - char *desc, char *val, const struct pmu_event *pe) + char *desc, char *val, struct pmu_event *pe) { struct parse_events_term *term; struct perf_pmu_alias *alias; @@ -710,9 +710,9 @@ static char *perf_pmu__getcpuid(struct perf_pmu *pmu) return cpuid; } -const struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) { - const struct pmu_events_map *map; + struct pmu_events_map *map; char *cpuid = perf_pmu__getcpuid(pmu); int i; @@ -737,7 +737,7 @@ const struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } -const struct pmu_events_map *__weak pmu_events_map__find(void) +struct pmu_events_map *__weak pmu_events_map__find(void) { return perf_pmu__find_map(NULL); } @@ -824,7 +824,7 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name) * as aliases. */ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, - const struct pmu_events_map *map) + struct pmu_events_map *map) { int i; const char *name = pmu->name; @@ -834,7 +834,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, i = 0; while (1) { const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu"; - const struct pmu_event *pe = &map->table[i++]; + struct pmu_event *pe = &map->table[i++]; const char *pname = pe->pmu ? pe->pmu : cpu_name; if (!pe->name) { @@ -859,7 +859,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) { - const struct pmu_events_map *map; + struct pmu_events_map *map; map = perf_pmu__find_map(pmu); if (!map) @@ -873,7 +873,7 @@ void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data) int i = 0; while (1) { - const struct pmu_sys_events *event_table; + struct pmu_sys_events *event_table; int j = 0; event_table = &pmu_sys_event_tables[i++]; @@ -882,7 +882,7 @@ void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data) break; while (1) { - const struct pmu_event *pe = &event_table->table[j++]; + struct pmu_event *pe = &event_table->table[j++]; int ret; if (!pe->name && !pe->metric_group && !pe->metric_name) @@ -900,7 +900,7 @@ struct pmu_sys_event_iter_data { struct perf_pmu *pmu; }; -static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, void *data) +static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data) { struct pmu_sys_event_iter_data *idata = data; struct perf_pmu *pmu = idata->pmu; @@ -1283,7 +1283,7 @@ static int pmu_config_term(const char *pmu_name, unknown_term = NULL; help_msg = parse_events_formats_error_string(pmu_term); if (err) { - parse_events_error__handle(err, term->err_term, + parse_events__handle_error(err, term->err_term, unknown_term, help_msg); } else { @@ -1316,7 +1316,7 @@ static int pmu_config_term(const char *pmu_name, if (term->no_value && bitmap_weight(format->bits, PERF_PMU_FORMAT_BITS) > 1) { if (err) { - parse_events_error__handle(err, term->err_val, + parse_events__handle_error(err, term->err_val, strdup("no value assigned for term"), NULL); } @@ -1331,7 +1331,7 @@ static int pmu_config_term(const char *pmu_name, term->config, term->val.str); } if (err) { - parse_events_error__handle(err, term->err_val, + parse_events__handle_error(err, term->err_val, strdup("expected numeric value"), NULL); } @@ -1348,7 +1348,7 @@ static int pmu_config_term(const char *pmu_name, if (err) { char *err_str; - parse_events_error__handle(err, term->err_val, + parse_events__handle_error(err, term->err_val, asprintf(&err_str, "value too big for format, maximum is %llu", (unsigned long long)max_val) < 0 @@ -1608,7 +1608,6 @@ static int cmp_sevent(const void *a, const void *b) { const struct sevent *as = a; const struct sevent *bs = b; - int ret; /* Put extra events last */ if (!!as->desc != !!bs->desc) @@ -1624,13 +1623,7 @@ static int cmp_sevent(const void *a, const void *b) if (as->is_cpu != bs->is_cpu) return bs->is_cpu - as->is_cpu; - ret = strcmp(as->name, bs->name); - if (!ret) { - if (as->pmu && bs->pmu) - return strcmp(as->pmu, bs->pmu); - } - - return ret; + return strcmp(as->name, bs->name); } static void wordwrap(char *s, int start, int max, int corr) @@ -1659,24 +1652,8 @@ bool is_pmu_core(const char *name) return !strcmp(name, "cpu") || is_arm_pmu_core(name); } -static bool pmu_alias_is_duplicate(struct sevent *alias_a, - struct sevent *alias_b) -{ - /* Different names -> never duplicates */ - if (strcmp(alias_a->name, alias_b->name)) - return false; - - /* Don't remove duplicates for hybrid PMUs */ - if (perf_pmu__is_hybrid(alias_a->pmu) && - perf_pmu__is_hybrid(alias_b->pmu)) - return false; - - return true; -} - void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated, - const char *pmu_name) + bool long_desc, bool details_flag, bool deprecated) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1702,16 +1679,10 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (pmu_name && perf_pmu__is_hybrid(pmu->name) && - strcmp(pmu_name, pmu->name)) { - continue; - } - list_for_each_entry(alias, &pmu->aliases, list) { char *name = alias->desc ? alias->name : format_alias(buf, sizeof(buf), pmu, alias); - bool is_cpu = is_pmu_core(pmu->name) || - perf_pmu__is_hybrid(pmu->name); + bool is_cpu = is_pmu_core(pmu->name); if (alias->deprecated && !deprecated) continue; @@ -1759,9 +1730,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { /* Skip duplicates */ - if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) + if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name)) continue; - if (name_only) { printf("%s ", aliases[j].name); continue; @@ -1936,7 +1906,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) } void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, - const char *name) + char *name) { struct perf_pmu_format *format; __u64 masks = 0, bits; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 541889fa9f..394898b07f 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -49,10 +49,6 @@ struct perf_pmu { struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ struct list_head list; /* ELEM */ struct list_head hybrid_list; - - struct { - bool exclude_guest; - } missing_features; }; extern struct perf_pmu perf_pmu__fake; @@ -115,7 +111,7 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); bool is_pmu_core(const char *name); void print_pmu_events(const char *event_glob, bool name_only, bool quiet, bool long_desc, bool details_flag, - bool deprecated, const char *pmu_name); + bool deprecated); bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); @@ -124,21 +120,21 @@ int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, - const struct pmu_events_map *map); + struct pmu_events_map *map); -const struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); -const struct pmu_events_map *pmu_events_map__find(void); +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); +struct pmu_events_map *pmu_events_map__find(void); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); void perf_pmu_free_alias(struct perf_pmu_alias *alias); -typedef int (*pmu_sys_event_iter_fn)(const struct pmu_event *pe, void *data); +typedef int (*pmu_sys_event_iter_fn)(struct pmu_event *pe, void *data); void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, - const char *name); + char *name); bool perf_pmu__has_hybrid(void); int perf_pmu__match(char *pattern, char *name, char *tok); diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index a685d20165..d7c976671e 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -18,7 +18,6 @@ util/mmap.c util/namespaces.c ../lib/bitmap.c ../lib/find_bit.c -../lib/list_sort.c ../lib/hweight.c ../lib/string.c ../lib/vsprintf.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 52d8995cfd..8feef3a05a 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -69,18 +69,6 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) { } -/* - * This one is needed not to drag the PMU bandwagon, jevents generated - * pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for - * doing per PMU perf_event_attr.exclude_guest handling, not really needed, so - * far, for the perf python binding known usecases, revisit if this become - * necessary. - */ -struct perf_pmu *evsel__find_pmu(struct evsel *evsel __maybe_unused) -{ - return NULL; -} - /* * Add this one here not to drag util/metricgroup.c */ @@ -428,8 +416,6 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) offset = val; len = offset >> 16; offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { @@ -463,7 +449,7 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) struct tep_event *tp_format; tp_format = trace_event__tp_format_id(evsel->core.attr.config); - if (IS_ERR_OR_NULL(tp_format)) + if (!tp_format) return NULL; evsel->tp_format = tp_format; @@ -638,17 +624,17 @@ static Py_ssize_t pyrf_cpu_map__length(PyObject *obj) { struct pyrf_cpu_map *pcpus = (void *)obj; - return perf_cpu_map__nr(pcpus->cpus); + return pcpus->cpus->nr; } static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_cpu_map *pcpus = (void *)obj; - if (i >= perf_cpu_map__nr(pcpus->cpus)) + if (i >= pcpus->cpus->nr) return NULL; - return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu); + return Py_BuildValue("i", pcpus->cpus->map[i]); } static PySequenceMethods pyrf_cpu_map__sequence_methods = { @@ -1059,7 +1045,7 @@ static struct mmap *get_md(struct evlist *evlist, int cpu) for (i = 0; i < evlist->core.nr_mmaps; i++) { struct mmap *md = &evlist->mmap[i]; - if (md->core.cpu.cpu == cpu) + if (md->core.cpu == cpu) return md; } @@ -1445,7 +1431,7 @@ PyMODINIT_FUNC PyInit_perf(void) * Dummy, to avoid dragging all the test_attr infrastructure in the python * binding. */ -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, int fd, int group_fd, unsigned long flags) { } diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 007a646814..bff669b615 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -106,7 +106,7 @@ void evlist__config(struct evlist *evlist, struct record_opts *opts, struct call if (opts->group) evlist__set_leader(evlist); - if (perf_cpu_map__cpu(evlist->core.cpus, 0).cpu < 0) + if (evlist->core.cpus->map[0] < 0) opts->no_inherit = true; use_comm_exec = perf_can_comm_exec(); @@ -229,8 +229,7 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) { struct evlist *temp_evlist; struct evsel *evsel; - int err, fd; - struct perf_cpu cpu = { .cpu = 0 }; + int err, fd, cpu; bool ret = false; pid_t pid = -1; @@ -247,16 +246,14 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) { struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); - if (cpus) - cpu = perf_cpu_map__cpu(cpus, 0); - + cpu = cpus ? cpus->map[0] : 0; perf_cpu_map__put(cpus); } else { - cpu = perf_cpu_map__cpu(evlist->core.cpus, 0); + cpu = evlist->core.cpus->map[0]; } while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, perf_event_open_cloexec_flag()); if (fd < 0) { if (pid == -1 && errno == EACCES) { diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index ef6c2715fd..68f471d9a8 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -77,7 +77,6 @@ struct record_opts { int ctl_fd; int ctl_fd_ack; bool ctl_fd_close; - int synth; }; extern const char * const *record_usage; diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index f3fdad28a8..8130b56aa0 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -244,7 +244,7 @@ static bool s390_cpumsf_basic_show(const char *color, size_t pos, struct hws_basic_entry *basicp) { struct hws_basic_entry *basic = basicp; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN struct hws_basic_entry local; unsigned long long word = be64toh(*(unsigned long long *)basicp); @@ -288,7 +288,7 @@ static bool s390_cpumsf_diag_show(const char *color, size_t pos, struct hws_diag_entry *diagp) { struct hws_diag_entry *diag = diagp; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN struct hws_diag_entry local; unsigned long long word = be64toh(*(unsigned long long *)diagp); @@ -322,7 +322,7 @@ static unsigned long long trailer_timestamp(struct hws_trailer_entry *te, static bool s390_cpumsf_trailer_show(const char *color, size_t pos, struct hws_trailer_entry *te) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN struct hws_trailer_entry local; const unsigned long long flags = be64toh(te->flags); @@ -552,7 +552,7 @@ static unsigned long long get_trailer_time(const unsigned char *buf) te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ - sizeof(*te)); -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN clock_base = be64toh(te->progusage[0]) >> 63 & 0x1; progusage2 = be64toh(te->progusage[1]); #else diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index cd3a348403..08ec3c3ae0 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -135,12 +135,12 @@ static int get_counterset_start(int setnr) * the name of this counter. * If no match is found a NULL pointer is returned. */ -static const char *get_counter_name(int set, int nr, const struct pmu_events_map *map) +static const char *get_counter_name(int set, int nr, struct pmu_events_map *map) { int rc, event_nr, wanted = get_counterset_start(set) + nr; if (map) { - const struct pmu_event *evp = map->table; + struct pmu_event *evp = map->table; for (; evp->name || evp->event || evp->desc; ++evp) { if (evp->name == NULL || evp->event == NULL) @@ -159,7 +159,7 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample) unsigned char *buf = sample->raw_data; const char *color = PERF_COLOR_BLUE; struct cf_ctrset_entry *cep, ce; - const struct pmu_events_map *map; + struct pmu_events_map *map; u64 *p; map = pmu_events_map__find(); diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index a5d945415b..32a721b3e9 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -392,8 +392,6 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(data + field->offset); offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; } else offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index e752e1f4a5..c0c010350b 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -36,7 +36,6 @@ #include "../debug.h" #include "../dso.h" #include "../callchain.h" -#include "../env.h" #include "../evsel.h" #include "../event.h" #include "../thread.h" @@ -688,7 +687,7 @@ static void set_sample_datasrc_in_dict(PyObject *dict, _PyUnicode_FromString(decode)); } -static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, char *bf, int size) +static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) { unsigned int i = 0, r; int printed = 0; @@ -703,7 +702,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch printed += scnprintf(bf + printed, size - printed, "%5s:0x%" PRIx64 " ", - perf_reg_name(r, arch), val); + perf_reg_name(r), val); } } @@ -712,7 +711,6 @@ static void set_regs_in_dict(PyObject *dict, struct evsel *evsel) { struct perf_event_attr *attr = &evsel->core.attr; - const char *arch = perf_env__arch(evsel__env(evsel)); /* * Here value 28 is a constant size which can be used to print @@ -724,12 +722,12 @@ static void set_regs_in_dict(PyObject *dict, int size = __sw_hweight64(attr->sample_regs_intr) * 28; char bf[size]; - regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, sizeof(bf)); + regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf)); pydict_set_item_string_decref(dict, "iregs", _PyUnicode_FromString(bf)); - regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, sizeof(bf)); + regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf)); pydict_set_item_string_decref(dict, "uregs", _PyUnicode_FromString(bf)); @@ -944,8 +942,6 @@ static void python_process_tracepoint(struct perf_sample *sample, offset = val; len = offset >> 16; offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { @@ -1557,7 +1553,7 @@ static void get_handler_name(char *str, size_t size, } static void -process_stat(struct evsel *counter, struct perf_cpu cpu, int thread, u64 tstamp, +process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp, struct perf_counts_values *count) { PyObject *handler, *t; @@ -1577,7 +1573,7 @@ process_stat(struct evsel *counter, struct perf_cpu cpu, int thread, u64 tstamp, return; } - PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu.cpu)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu)); PyTuple_SetItem(t, n++, _PyLong_FromLong(thread)); tuple_set_u64(t, n++, tstamp); @@ -1601,14 +1597,14 @@ static void python_process_stat(struct perf_stat_config *config, int cpu, thread; if (config->aggr_mode == AGGR_GLOBAL) { - process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp, + process_stat(counter, -1, -1, tstamp, &counter->counts->aggr); return; } for (thread = 0; thread < threads->nr; thread++) { - for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { - process_stat(counter, perf_cpu_map__cpu(cpus, cpu), + for (cpu = 0; cpu < cpus->nr; cpu++) { + process_stat(counter, cpus->map[cpu], perf_thread_map__pid(threads, thread), tstamp, perf_counts(counter->counts, cpu, thread)); } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 498b05708d..352f16076e 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -15,7 +15,6 @@ #include "map_symbol.h" #include "branch.h" #include "debug.h" -#include "env.h" #include "evlist.h" #include "evsel.h" #include "memswap.h" @@ -45,7 +44,7 @@ static int perf_session__process_compressed_event(struct perf_session *session, size_t decomp_size, src_size; u64 decomp_last_rem = 0; size_t mmap_len, decomp_len = session->header.env.comp_mmap_len; - struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last; + struct decomp *decomp, *decomp_last = session->decomp_last; if (decomp_last) { decomp_last_rem = decomp_last->size - decomp_last->head; @@ -72,7 +71,7 @@ static int perf_session__process_compressed_event(struct perf_session *session, src = (void *)event + sizeof(struct perf_record_compressed); src_size = event->pack.header.size - sizeof(struct perf_record_compressed); - decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size, + decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size, &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); if (!decomp_size) { munmap(decomp, mmap_len); @@ -82,12 +81,13 @@ static int perf_session__process_compressed_event(struct perf_session *session, decomp->size += decomp_size; - if (session->active_decomp->decomp == NULL) - session->active_decomp->decomp = decomp; - else - session->active_decomp->decomp_last->next = decomp; - - session->active_decomp->decomp_last = decomp; + if (session->decomp == NULL) { + session->decomp = decomp; + session->decomp_last = decomp; + } else { + session->decomp_last->next = decomp; + session->decomp_last = decomp; + } pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size); @@ -197,8 +197,6 @@ struct perf_session *__perf_session__new(struct perf_data *data, session->repipe = repipe; session->tool = tool; - session->decomp_data.zstd_decomp = &session->zstd_data; - session->active_decomp = &session->decomp_data; INIT_LIST_HEAD(&session->auxtrace_index); machines__init(&session->machines); ordered_events__init(&session->ordered_events, @@ -278,11 +276,11 @@ static void perf_session__delete_threads(struct perf_session *session) machine__delete_threads(&session->machines.host); } -static void perf_decomp__release_events(struct decomp *next) +static void perf_session__release_decomp_events(struct perf_session *session) { - struct decomp *decomp; + struct decomp *next, *decomp; size_t mmap_len; - + next = session->decomp; do { decomp = next; if (decomp == NULL) @@ -301,7 +299,7 @@ void perf_session__delete(struct perf_session *session) auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); perf_session__delete_threads(session); - perf_decomp__release_events(session->decomp_data.decomp); + perf_session__release_decomp_events(session); perf_env__exit(&session->header.env); machines__exit(&session->machines); if (session->data) { @@ -511,8 +509,6 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->bpf = perf_event__process_bpf; if (tool->text_poke == NULL) tool->text_poke = perf_event__process_text_poke; - if (tool->aux_output_hw_id == NULL) - tool->aux_output_hw_id = perf_event__process_aux_output_hw_id; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -1004,7 +1000,6 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, - [PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -1169,7 +1164,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) } } -static void regs_dump__printf(u64 mask, u64 *regs, const char *arch) +static void regs_dump__printf(u64 mask, u64 *regs) { unsigned rid, i = 0; @@ -1177,7 +1172,7 @@ static void regs_dump__printf(u64 mask, u64 *regs, const char *arch) u64 val = regs[i++]; printf(".... %-5s 0x%016" PRIx64 "\n", - perf_reg_name(rid, arch), val); + perf_reg_name(rid), val); } } @@ -1195,7 +1190,7 @@ static inline const char *regs_dump_abi(struct regs_dump *d) return regs_abi[d->abi]; } -static void regs__printf(const char *type, struct regs_dump *regs, const char *arch) +static void regs__printf(const char *type, struct regs_dump *regs) { u64 mask = regs->mask; @@ -1204,23 +1199,23 @@ static void regs__printf(const char *type, struct regs_dump *regs, const char *a mask, regs_dump_abi(regs)); - regs_dump__printf(mask, regs->regs, arch); + regs_dump__printf(mask, regs->regs); } -static void regs_user__printf(struct perf_sample *sample, const char *arch) +static void regs_user__printf(struct perf_sample *sample) { struct regs_dump *user_regs = &sample->user_regs; if (user_regs->regs) - regs__printf("user", user_regs, arch); + regs__printf("user", user_regs); } -static void regs_intr__printf(struct perf_sample *sample, const char *arch) +static void regs_intr__printf(struct perf_sample *sample) { struct regs_dump *intr_regs = &sample->intr_regs; if (intr_regs->regs) - regs__printf("intr", intr_regs, arch); + regs__printf("intr", intr_regs); } static void stack_user__printf(struct stack_dump *dump) @@ -1305,7 +1300,7 @@ char *get_page_size_name(u64 size, char *str) } static void dump_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample, const char *arch) + struct perf_sample *sample) { u64 sample_type; char str[PAGE_SIZE_NAME_LEN]; @@ -1326,10 +1321,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); if (sample_type & PERF_SAMPLE_REGS_USER) - regs_user__printf(sample, arch); + regs_user__printf(sample); if (sample_type & PERF_SAMPLE_REGS_INTR) - regs_intr__printf(sample, arch); + regs_intr__printf(sample); if (sample_type & PERF_SAMPLE_STACK_USER) stack_user__printf(&sample->user_stack); @@ -1503,12 +1498,11 @@ static int machines__deliver_event(struct machines *machines, ++evlist->stats.nr_unknown_id; return 0; } + dump_sample(evsel, event, sample); if (machine == NULL) { ++evlist->stats.nr_unprocessable_samples; - dump_sample(evsel, event, sample, perf_env__arch(NULL)); return 0; } - dump_sample(evsel, event, sample, perf_env__arch(machine->env)); return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); @@ -1562,8 +1556,6 @@ static int machines__deliver_event(struct machines *machines, return tool->bpf(tool, event, sample, machine); case PERF_RECORD_TEXT_POKE: return tool->text_poke(tool, event, sample, machine); - case PERF_RECORD_AUX_OUTPUT_HW_ID: - return tool->aux_output_hw_id(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; @@ -2125,7 +2117,7 @@ static int __perf_session__process_decomp_events(struct perf_session *session) { s64 skip; u64 size; - struct decomp *decomp = session->active_decomp->decomp_last; + struct decomp *decomp = session->decomp_last; if (!decomp) return 0; @@ -2179,55 +2171,35 @@ struct reader { u64 data_offset; reader_cb_t process; bool in_place_update; - char *mmaps[NUM_MMAPS]; - size_t mmap_size; - int mmap_idx; - char *mmap_cur; - u64 file_pos; - u64 file_offset; - u64 head; - struct zstd_data zstd_data; - struct decomp_data decomp_data; }; static int -reader__init(struct reader *rd, bool *one_mmap) +reader__process_events(struct reader *rd, struct perf_session *session, + struct ui_progress *prog) { u64 data_size = rd->data_size; - char **mmaps = rd->mmaps; + u64 head, page_offset, file_offset, file_pos, size; + int err = 0, mmap_prot, mmap_flags, map_idx = 0; + size_t mmap_size; + char *buf, *mmaps[NUM_MMAPS]; + union perf_event *event; + s64 skip; + + page_offset = page_size * (rd->data_offset / page_size); + file_offset = page_offset; + head = rd->data_offset - page_offset; + + ui_progress__init_size(prog, data_size, "Processing events..."); - rd->head = rd->data_offset; data_size += rd->data_offset; - rd->mmap_size = MMAP_SIZE; - if (rd->mmap_size > data_size) { - rd->mmap_size = data_size; - if (one_mmap) - *one_mmap = true; + mmap_size = MMAP_SIZE; + if (mmap_size > data_size) { + mmap_size = data_size; + session->one_mmap = true; } - memset(mmaps, 0, sizeof(rd->mmaps)); - - if (zstd_init(&rd->zstd_data, 0)) - return -1; - rd->decomp_data.zstd_decomp = &rd->zstd_data; - - return 0; -} - -static void -reader__release_decomp(struct reader *rd) -{ - perf_decomp__release_events(rd->decomp_data.decomp); - zstd_fini(&rd->zstd_data); -} - -static int -reader__mmap(struct reader *rd, struct perf_session *session) -{ - int mmap_prot, mmap_flags; - char *buf, **mmaps = rd->mmaps; - u64 page_offset; + memset(mmaps, 0, sizeof(mmaps)); mmap_prot = PROT_READ; mmap_flags = MAP_SHARED; @@ -2238,63 +2210,47 @@ reader__mmap(struct reader *rd, struct perf_session *session) mmap_prot |= PROT_WRITE; mmap_flags = MAP_PRIVATE; } - - if (mmaps[rd->mmap_idx]) { - munmap(mmaps[rd->mmap_idx], rd->mmap_size); - mmaps[rd->mmap_idx] = NULL; - } - - page_offset = page_size * (rd->head / page_size); - rd->file_offset += page_offset; - rd->head -= page_offset; - - buf = mmap(NULL, rd->mmap_size, mmap_prot, mmap_flags, rd->fd, - rd->file_offset); +remap: + buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd, + file_offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); - return -errno; + err = -errno; + goto out; } - mmaps[rd->mmap_idx] = rd->mmap_cur = buf; - rd->mmap_idx = (rd->mmap_idx + 1) & (ARRAY_SIZE(rd->mmaps) - 1); - rd->file_pos = rd->file_offset + rd->head; + mmaps[map_idx] = buf; + map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); + file_pos = file_offset + head; if (session->one_mmap) { session->one_mmap_addr = buf; - session->one_mmap_offset = rd->file_offset; + session->one_mmap_offset = file_offset; } - return 0; -} - -enum { - READER_OK, - READER_NODATA, -}; - -static int -reader__read_event(struct reader *rd, struct perf_session *session, - struct ui_progress *prog) -{ - u64 size; - int err = READER_OK; - union perf_event *event; - s64 skip; - - event = fetch_mmaped_event(rd->head, rd->mmap_size, rd->mmap_cur, - session->header.needs_swap); +more: + event = fetch_mmaped_event(head, mmap_size, buf, session->header.needs_swap); if (IS_ERR(event)) return PTR_ERR(event); - if (!event) - return READER_NODATA; + if (!event) { + if (mmaps[map_idx]) { + munmap(mmaps[map_idx], mmap_size); + mmaps[map_idx] = NULL; + } + + page_offset = page_size * (head / page_size); + file_offset += page_offset; + head -= page_offset; + goto remap; + } size = event->header.size; skip = -EINVAL; if (size < sizeof(struct perf_event_header) || - (skip = rd->process(session, event, rd->file_pos)) < 0) { + (skip = rd->process(session, event, file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n", - rd->file_offset + rd->head, event->header.size, + file_offset + head, event->header.size, event->header.type, strerror(-skip)); err = skip; goto out; @@ -2303,8 +2259,8 @@ reader__read_event(struct reader *rd, struct perf_session *session, if (skip) size += skip; - rd->head += size; - rd->file_pos += size; + head += size; + file_pos += size; err = __perf_session__process_decomp_events(session); if (err) @@ -2312,48 +2268,13 @@ reader__read_event(struct reader *rd, struct perf_session *session, ui_progress__update(prog, size); -out: - return err; -} - -static inline bool -reader__eof(struct reader *rd) -{ - return (rd->file_pos >= rd->data_size + rd->data_offset); -} - -static int -reader__process_events(struct reader *rd, struct perf_session *session, - struct ui_progress *prog) -{ - int err; - - err = reader__init(rd, &session->one_mmap); - if (err) - goto out; - - session->active_decomp = &rd->decomp_data; - -remap: - err = reader__mmap(rd, session); - if (err) - goto out; - -more: - err = reader__read_event(rd, session, prog); - if (err < 0) - goto out; - else if (err == READER_NODATA) - goto remap; - if (session_done()) goto out; - if (!reader__eof(rd)) + if (file_pos < data_size) goto more; out: - session->active_decomp = &session->decomp_data; return err; } @@ -2406,7 +2327,6 @@ static int __perf_session__process_events(struct perf_session *session) */ ordered_events__reinit(&session->ordered_events); auxtrace__free_events(session); - reader__release_decomp(&rd); session->one_mmap = false; return err; } @@ -2538,16 +2458,16 @@ int perf_session__cpu_bitmap(struct perf_session *session, return -1; } - for (i = 0; i < perf_cpu_map__nr(map); i++) { - struct perf_cpu cpu = perf_cpu_map__cpu(map, i); + for (i = 0; i < map->nr; i++) { + int cpu = map->map[i]; - if (cpu.cpu >= nr_cpus) { + if (cpu >= nr_cpus) { pr_err("Requested CPU %d too large. " - "Consider raising MAX_NR_CPUS\n", cpu.cpu); + "Consider raising MAX_NR_CPUS\n", cpu); goto out_delete_map; } - set_bit(cpu.cpu, cpu_bitmap); + set_bit(cpu, cpu_bitmap); } err = 0; @@ -2599,7 +2519,7 @@ int perf_event__process_id_index(struct perf_session *session, if (!sid) return -ENOENT; sid->idx = e->idx; - sid->cpu.cpu = e->cpu; + sid->cpu = e->cpu; sid->tid = e->tid; } return 0; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 46c854292a..5d8bd14a0a 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -20,12 +20,6 @@ struct thread; struct auxtrace; struct itrace_synth_opts; -struct decomp_data { - struct decomp *decomp; - struct decomp *decomp_last; - struct zstd_data *zstd_decomp; -}; - struct perf_session { struct perf_header header; struct machines machines; @@ -45,8 +39,8 @@ struct perf_session { u64 bytes_transferred; u64 bytes_compressed; struct zstd_data zstd_data; - struct decomp_data decomp_data; - struct decomp_data *active_decomp; + struct decomp *decomp; + struct decomp *decomp_last; }; struct decomp { diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 2b0a36ebf2..34f1b1b117 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -5,56 +5,6 @@ #include "api/fs/fs.h" #include "smt.h" -/** - * hweight_str - Returns the number of bits set in str. Stops at first non-hex - * or ',' character. - */ -static int hweight_str(char *str) -{ - int result = 0; - - while (*str) { - switch (*str++) { - case '0': - case ',': - break; - case '1': - case '2': - case '4': - case '8': - result++; - break; - case '3': - case '5': - case '6': - case '9': - case 'a': - case 'A': - case 'c': - case 'C': - result += 2; - break; - case '7': - case 'b': - case 'B': - case 'd': - case 'D': - case 'e': - case 'E': - result += 3; - break; - case 'f': - case 'F': - result += 4; - break; - default: - goto done; - } - } -done: - return result; -} - int smt_on(void) { static bool cached; @@ -65,12 +15,9 @@ int smt_on(void) if (cached) return cached_result; - if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) { - cached = true; - return cached_result; - } + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) + goto done; - cached_result = 0; ncpu = sysconf(_SC_NPROCESSORS_CONF); for (cpu = 0; cpu < ncpu; cpu++) { unsigned long long siblings; @@ -79,21 +26,27 @@ int smt_on(void) char fn[256]; snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/thread_siblings", cpu); + "devices/system/cpu/cpu%d/topology/core_cpus", cpu); if (sysfs__read_str(fn, &str, &strlen) < 0) { snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/core_cpus", cpu); + "devices/system/cpu/cpu%d/topology/thread_siblings", + cpu); if (sysfs__read_str(fn, &str, &strlen) < 0) continue; } /* Entry is hex, but does not have 0x, so need custom parser */ - siblings = hweight_str(str); + siblings = strtoull(str, NULL, 16); free(str); - if (siblings > 1) { + if (hweight64(siblings) > 1) { cached_result = 1; + cached = true; break; } } - cached = true; + if (!cached) { + cached_result = 0; +done: + cached = true; + } return cached_result; } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2da081ef53..a111065b48 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -37,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char *default_sort_order = "comm,dso,symbol"; const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; -const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc"; +const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; const char default_tracepoint_sort_order[] = "trace"; @@ -46,8 +46,8 @@ const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; -static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"}; -static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"}; +const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; +const char *arch_specific_sort_keys[] = {"p_stage_cyc"}; /* * Replaces all occurrences of a char used with the: @@ -915,7 +915,7 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf, struct addr_map_symbol *from = &he->branch_info->from; return _hist_entry__sym_snprintf(&from->ms, from->al_addr, - from->al_level, bf, size, width); + he->level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); @@ -928,7 +928,7 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf, struct addr_map_symbol *to = &he->branch_info->to; return _hist_entry__sym_snprintf(&to->ms, to->al_addr, - to->al_level, bf, size, width); + he->level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); @@ -1392,37 +1392,22 @@ struct sort_entry sort_global_ins_lat = { }; static int64_t -sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) +sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { return left->p_stage_cyc - right->p_stage_cyc; } -static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf, - size_t size, unsigned int width) -{ - return repsep_snprintf(bf, size, "%-*u", width, - he->p_stage_cyc * he->stat.nr_events); -} - - static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); } -struct sort_entry sort_local_p_stage_cyc = { - .se_header = "Local Pipeline Stage Cycle", - .se_cmp = sort__p_stage_cyc_cmp, - .se_snprintf = hist_entry__p_stage_cyc_snprintf, - .se_width_idx = HISTC_LOCAL_P_STAGE_CYC, -}; - -struct sort_entry sort_global_p_stage_cyc = { +struct sort_entry sort_p_stage_cyc = { .se_header = "Pipeline Stage Cycle", - .se_cmp = sort__p_stage_cyc_cmp, - .se_snprintf = hist_entry__global_p_stage_cyc_snprintf, - .se_width_idx = HISTC_GLOBAL_P_STAGE_CYC, + .se_cmp = sort__global_p_stage_cyc_cmp, + .se_snprintf = hist_entry__p_stage_cyc_snprintf, + .se_width_idx = HISTC_P_STAGE_CYC, }; struct sort_entry sort_mem_daddr_sym = { @@ -1873,8 +1858,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), - DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc), - DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc), + DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc), }; #undef DIM @@ -2381,8 +2365,6 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, tep_read_number_field(field, a->raw_data, &dyn); offset = dyn & 0xffff; size = (dyn >> 16) & 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; /* record max width for output */ if (size > hde->dynamic_len) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index f994261888..7b71455019 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -235,8 +235,7 @@ enum sort_type { SORT_CODE_PAGE_SIZE, SORT_LOCAL_INS_LAT, SORT_GLOBAL_INS_LAT, - SORT_LOCAL_PIPELINE_STAGE_CYC, - SORT_GLOBAL_PIPELINE_STAGE_CYC, + SORT_PIPELINE_STAGE_CYC, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index af468e3bb6..5b7d6c16d3 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -1,10 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include #include #include #include -#include #include #include @@ -17,7 +15,6 @@ #include "srcline.h" #include "string2.h" #include "symbol.h" -#include "subcmd/run-command.h" bool srcline_full_filename; @@ -122,8 +119,6 @@ static struct symbol *new_inline_sym(struct dso *dso, return inline_sym; } -#define MAX_INLINE_NEST 1024 - #ifdef HAVE_LIBBFD_SUPPORT /* @@ -278,6 +273,8 @@ static void addr2line_cleanup(struct a2l_data *a2l) free(a2l); } +#define MAX_INLINE_NEST 1024 + static int inline_list__append_dso_a2l(struct dso *dso, struct inline_node *node, struct symbol *sym) @@ -364,13 +361,25 @@ void dso__free_a2l(struct dso *dso) dso->a2l = NULL; } -#else /* HAVE_LIBBFD_SUPPORT */ +static struct inline_node *addr2inlines(const char *dso_name, u64 addr, + struct dso *dso, struct symbol *sym) +{ + struct inline_node *node; -struct a2l_subprocess { - struct child_process addr2line; - FILE *to_child; - FILE *from_child; -}; + node = zalloc(sizeof(*node)); + if (node == NULL) { + perror("not enough memory for the inline node"); + return NULL; + } + + INIT_LIST_HEAD(&node->val); + node->addr = addr; + + addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); + return node; +} + +#else /* HAVE_LIBBFD_SUPPORT */ static int filename_split(char *filename, unsigned int *line_nr) { @@ -393,285 +402,114 @@ static int filename_split(char *filename, unsigned int *line_nr) return 0; } -static void addr2line_subprocess_cleanup(struct a2l_subprocess *a2l) -{ - if (a2l->addr2line.pid != -1) { - kill(a2l->addr2line.pid, SIGKILL); - finish_command(&a2l->addr2line); /* ignore result, we don't care */ - a2l->addr2line.pid = -1; - } - - if (a2l->to_child != NULL) { - fclose(a2l->to_child); - a2l->to_child = NULL; - } - - if (a2l->from_child != NULL) { - fclose(a2l->from_child); - a2l->from_child = NULL; - } - - free(a2l); -} - -static struct a2l_subprocess *addr2line_subprocess_init(const char *path) -{ - const char *argv[] = { "addr2line", "-e", path, "-i", "-f", NULL }; - struct a2l_subprocess *a2l = zalloc(sizeof(*a2l)); - int start_command_status = 0; - - if (a2l == NULL) - goto out; - - a2l->to_child = NULL; - a2l->from_child = NULL; - - a2l->addr2line.pid = -1; - a2l->addr2line.in = -1; - a2l->addr2line.out = -1; - a2l->addr2line.no_stderr = 1; - - a2l->addr2line.argv = argv; - start_command_status = start_command(&a2l->addr2line); - a2l->addr2line.argv = NULL; /* it's not used after start_command; avoid dangling pointers */ - - if (start_command_status != 0) { - pr_warning("could not start addr2line for %s: start_command return code %d\n", - path, - start_command_status); - goto out; - } - - a2l->to_child = fdopen(a2l->addr2line.in, "w"); - if (a2l->to_child == NULL) { - pr_warning("could not open write-stream to addr2line of %s\n", path); - goto out; - } - - a2l->from_child = fdopen(a2l->addr2line.out, "r"); - if (a2l->from_child == NULL) { - pr_warning("could not open read-stream from addr2line of %s\n", path); - goto out; - } - - return a2l; - -out: - if (a2l) - addr2line_subprocess_cleanup(a2l); - - return NULL; -} - -static int read_addr2line_record(struct a2l_subprocess *a2l, - char **function, - char **filename, - unsigned int *line_nr) -{ - /* - * Returns: - * -1 ==> error - * 0 ==> sentinel (or other ill-formed) record read - * 1 ==> a genuine record read - */ - char *line = NULL; - size_t line_len = 0; - unsigned int dummy_line_nr = 0; - int ret = -1; - - if (function != NULL) - zfree(function); - - if (filename != NULL) - zfree(filename); - - if (line_nr != NULL) - *line_nr = 0; - - if (getline(&line, &line_len, a2l->from_child) < 0 || !line_len) - goto error; - - if (function != NULL) - *function = strdup(strim(line)); - - zfree(&line); - line_len = 0; - - if (getline(&line, &line_len, a2l->from_child) < 0 || !line_len) - goto error; - - if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0) { - ret = 0; - goto error; - } - - if (filename != NULL) - *filename = strdup(line); - - zfree(&line); - line_len = 0; - - return 1; - -error: - free(line); - if (function != NULL) - zfree(function); - if (filename != NULL) - zfree(filename); - return ret; -} - -static int inline_list__append_record(struct dso *dso, - struct inline_node *node, - struct symbol *sym, - const char *function, - const char *filename, - unsigned int line_nr) -{ - struct symbol *inline_sym = new_inline_sym(dso, sym, function); - - return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node); -} - static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, - struct dso *dso, - bool unwind_inlines, - struct inline_node *node, + struct dso *dso __maybe_unused, + bool unwind_inlines __maybe_unused, + struct inline_node *node __maybe_unused, struct symbol *sym __maybe_unused) { - struct a2l_subprocess *a2l = dso->a2l; - char *record_function = NULL; - char *record_filename = NULL; - unsigned int record_line_nr = 0; - int record_status = -1; + FILE *fp; + char cmd[PATH_MAX]; + char *filename = NULL; + size_t len; int ret = 0; - size_t inline_count = 0; - if (!a2l) { - dso->a2l = addr2line_subprocess_init(dso_name); - a2l = dso->a2l; + scnprintf(cmd, sizeof(cmd), "addr2line -e %s %016"PRIx64, + dso_name, addr); + + fp = popen(cmd, "r"); + if (fp == NULL) { + pr_warning("popen failed for %s\n", dso_name); + return 0; } - if (a2l == NULL) { - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name); + if (getline(&filename, &len, fp) < 0 || !len) { + pr_warning("addr2line has no output for %s\n", dso_name); goto out; } - /* - * Send our request and then *deliberately* send something that can't be interpreted as - * a valid address to ask addr2line about (namely, ","). This causes addr2line to first - * write out the answer to our request, in an unbounded/unknown number of records, and - * then to write out the lines "??" and "??:0", so that we can detect when it has - * finished giving us anything useful. We have to be careful about the first record, - * though, because it may be genuinely unknown, in which case we'll get two sets of - * "??"/"??:0" lines. - */ - if (fprintf(a2l->to_child, "%016"PRIx64"\n,\n", addr) < 0 || fflush(a2l->to_child) != 0) { - pr_warning("%s %s: could not send request\n", __func__, dso_name); + ret = filename_split(filename, line_nr); + if (ret != 1) { + free(filename); goto out; } - switch (read_addr2line_record(a2l, &record_function, &record_filename, &record_line_nr)) { - case -1: - pr_warning("%s %s: could not read first record\n", __func__, dso_name); - goto out; - case 0: - /* - * The first record was invalid, so return failure, but first read another - * record, since we asked a junk question and have to clear the answer out. - */ - switch (read_addr2line_record(a2l, NULL, NULL, NULL)) { - case -1: - pr_warning("%s %s: could not read delimiter record\n", __func__, dso_name); - break; - case 0: - /* As expected. */ - break; - default: - pr_warning("%s %s: unexpected record instead of sentinel", - __func__, dso_name); - break; - } - goto out; - default: - break; - } - - if (file) { - *file = strdup(record_filename); - ret = 1; - } - if (line_nr) - *line_nr = record_line_nr; - - if (unwind_inlines) { - if (node && inline_list__append_record(dso, node, sym, - record_function, - record_filename, - record_line_nr)) { - ret = 0; - goto out; - } - } - - /* We have to read the records even if we don't care about the inline info. */ - while ((record_status = read_addr2line_record(a2l, - &record_function, - &record_filename, - &record_line_nr)) == 1) { - if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) { - if (inline_list__append_record(dso, node, sym, - record_function, - record_filename, - record_line_nr)) { - ret = 0; - goto out; - } - ret = 1; /* found at least one inline frame */ - } - } + *file = filename; out: - free(record_function); - free(record_filename); + pclose(fp); return ret; } -void dso__free_a2l(struct dso *dso) +void dso__free_a2l(struct dso *dso __maybe_unused) { - struct a2l_subprocess *a2l = dso->a2l; - - if (!a2l) - return; - - addr2line_subprocess_cleanup(a2l); - - dso->a2l = NULL; } -#endif /* HAVE_LIBBFD_SUPPORT */ - static struct inline_node *addr2inlines(const char *dso_name, u64 addr, - struct dso *dso, struct symbol *sym) + struct dso *dso __maybe_unused, + struct symbol *sym) { + FILE *fp; + char cmd[PATH_MAX]; struct inline_node *node; + char *filename = NULL; + char *funcname = NULL; + size_t filelen, funclen; + unsigned int line_nr = 0; + + scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i -f %016"PRIx64, + dso_name, addr); + + fp = popen(cmd, "r"); + if (fp == NULL) { + pr_err("popen failed for %s\n", dso_name); + return NULL; + } node = zalloc(sizeof(*node)); if (node == NULL) { perror("not enough memory for the inline node"); - return NULL; + goto out; } INIT_LIST_HEAD(&node->val); node->addr = addr; - addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); + /* addr2line -f generates two lines for each inlined functions */ + while (getline(&funcname, &funclen, fp) != -1) { + char *srcline; + struct symbol *inline_sym; + + strim(funcname); + + if (getline(&filename, &filelen, fp) == -1) + goto out; + + if (filename_split(filename, &line_nr) != 1) + goto out; + + srcline = srcline_from_fileline(filename, line_nr); + inline_sym = new_inline_sym(dso, sym, funcname); + + if (inline_list__append(inline_sym, srcline, node) != 0) { + free(srcline); + if (inline_sym && inline_sym->inlined) + symbol__delete(inline_sym); + goto out; + } + } + +out: + pclose(fp); + free(filename); + free(funcname); + return node; } +#endif /* HAVE_LIBBFD_SUPPORT */ + /* * Number of addr2line failures (without success) before disabling it for that * dso. diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 9cbe351b14..db00ca6a67 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -4,7 +4,6 @@ #include #include #include -#include #include "color.h" #include "counts.h" #include "evlist.h" @@ -121,10 +120,11 @@ static void aggr_printout(struct perf_stat_config *config, id.die, config->csv_output ? 0 : -3, id.core, config->csv_sep); - } else if (id.cpu.cpu > -1) { + } else if (id.core > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, - id.cpu.cpu, config->csv_sep); + evsel__cpus(evsel)->map[id.core], + config->csv_sep); } break; case AGGR_THREAD: @@ -327,24 +327,26 @@ static void print_metric_header(struct perf_stat_config *config, fprintf(os->fh, "%*s ", config->metric_only_len, unit); } -static int first_shadow_cpu_map_idx(struct perf_stat_config *config, - struct evsel *evsel, const struct aggr_cpu_id *id) +static int first_shadow_cpu(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id) { - struct perf_cpu_map *cpus = evsel__cpus(evsel); - struct perf_cpu cpu; - int idx; + struct evlist *evlist = evsel->evlist; + int i; if (config->aggr_mode == AGGR_NONE) - return perf_cpu_map__idx(cpus, id->cpu); + return id.core; if (!config->aggr_get_id) return 0; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu); + for (i = 0; i < evsel__nr_cpus(evsel); i++) { + int cpu2 = evsel__cpus(evsel)->map[i]; - if (aggr_cpu_id__equal(&cpu_id, id)) - return idx; + if (cpu_map__compare_aggr_cpu_id( + config->aggr_get_id(config, evlist->core.cpus, cpu2), + id)) { + return cpu2; + } } return 0; } @@ -503,7 +505,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } perf_stat__print_shadow_stats(config, counter, uval, - first_shadow_cpu_map_idx(config, counter, &id), + first_shadow_cpu(config, counter, id), &out, &config->metric_events, st); if (!config->csv_output && !config->metric_only) { print_noise(config, counter, noise); @@ -514,26 +516,23 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int static void aggr_update_shadow(struct perf_stat_config *config, struct evlist *evlist) { - int idx, s; - struct perf_cpu cpu; + int cpu, s; struct aggr_cpu_id s2, id; u64 val; struct evsel *counter; - struct perf_cpu_map *cpus; for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { - cpus = evsel__cpus(counter); val = 0; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - s2 = config->aggr_get_id(config, cpu); - if (!aggr_cpu_id__equal(&s2, &id)) + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { + s2 = config->aggr_get_id(config, evlist->core.cpus, cpu); + if (!cpu_map__compare_aggr_cpu_id(s2, id)) continue; - val += perf_counts(counter->counts, idx, 0)->val; + val += perf_counts(counter->counts, cpu, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu_map_idx(config, counter, &id), + first_shadow_cpu(config, counter, id), &rt_stat); } } @@ -629,28 +628,25 @@ struct aggr_data { u64 ena, run, val; struct aggr_cpu_id id; int nr; - int cpu_map_idx; + int cpu; }; static void aggr_cb(struct perf_stat_config *config, struct evsel *counter, void *data, bool first) { struct aggr_data *ad = data; - int idx; - struct perf_cpu cpu; - struct perf_cpu_map *cpus; + int cpu; struct aggr_cpu_id s2; - cpus = evsel__cpus(counter); - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct perf_counts_values *counts; - s2 = config->aggr_get_id(config, cpu); - if (!aggr_cpu_id__equal(&s2, &ad->id)) + s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); + if (!cpu_map__compare_aggr_cpu_id(s2, ad->id)) continue; if (first) ad->nr++; - counts = perf_counts(counter->counts, idx, 0); + counts = perf_counts(counter->counts, cpu, 0); /* * When any result is bad, make them all to give * consistent output in interval mode. @@ -670,7 +666,7 @@ static void aggr_cb(struct perf_stat_config *config, static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, char *prefix, bool metric_only, - bool *first, struct perf_cpu cpu) + bool *first, int cpu) { struct aggr_data ad; FILE *output = config->output; @@ -700,9 +696,10 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - if (cpu.cpu != -1) - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); - + if (cpu != -1) { + id = cpu_map__empty_aggr_cpu_id(); + id.core = cpu; + } printout(config, id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat); if (!metric_only) @@ -735,8 +732,8 @@ static void print_aggr(struct perf_stat_config *config, first = true; evlist__for_each_entry(evlist, counter) { print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, (struct perf_cpu){ .cpu = -1 }); + prefix, metric_only, + &first, -1); } if (metric_only) fputc('\n', output); @@ -782,7 +779,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread( continue; buf[i].counter = counter; - buf[i].id = aggr_cpu_id__empty(); + buf[i].id = cpu_map__empty_aggr_cpu_id(); buf[i].id.thread = thread; buf[i].uval = uval; buf[i].val = val; @@ -870,7 +867,7 @@ static void print_counter_aggr(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running, + printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); @@ -882,9 +879,9 @@ static void counter_cb(struct perf_stat_config *config __maybe_unused, { struct aggr_data *ad = data; - ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val; - ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena; - ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run; + ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; + ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; + ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; } /* @@ -897,12 +894,11 @@ static void print_counter(struct perf_stat_config *config, FILE *output = config->output; u64 ena, run, val; double uval; - int idx; - struct perf_cpu cpu; + int cpu; struct aggr_cpu_id id; - perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { - struct aggr_data ad = { .cpu_map_idx = idx }; + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { + struct aggr_data ad = { .cpu = cpu }; if (!collect_data(config, counter, counter_cb, &ad)) return; @@ -914,7 +910,8 @@ static void print_counter(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); + id = cpu_map__empty_aggr_cpu_id(); + id.core = cpu; printout(config, id, 0, counter, uval, prefix, run, ena, 1.0, &rt_stat); @@ -926,32 +923,29 @@ static void print_no_aggr_metric(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { - int all_idx; - struct perf_cpu cpu; + int cpu; + int nrcpus = 0; + struct evsel *counter; + u64 ena, run, val; + double uval; + struct aggr_cpu_id id; - perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.cpus) { - struct evsel *counter; + nrcpus = evlist->core.cpus->nr; + for (cpu = 0; cpu < nrcpus; cpu++) { bool first = true; if (prefix) fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { - u64 ena, run, val; - double uval; - struct aggr_cpu_id id; - int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu); - - if (counter_idx < 0) - continue; - - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); + id = cpu_map__empty_aggr_cpu_id(); + id.core = cpu; if (first) { aggr_printout(config, counter, id, 0); first = false; } - val = perf_counts(counter->counts, counter_idx, 0)->val; - ena = perf_counts(counter->counts, counter_idx, 0)->ena; - run = perf_counts(counter->counts, counter_idx, 0)->run; + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; uval = val * counter->scale; printout(config, id, 0, counter, uval, prefix, @@ -1215,23 +1209,19 @@ static void print_percore_thread(struct perf_stat_config *config, { int s; struct aggr_cpu_id s2, id; - struct perf_cpu_map *cpus; bool first = true; - int idx; - struct perf_cpu cpu; - cpus = evsel__cpus(counter); - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - s2 = config->aggr_get_id(config, cpu); + for (int i = 0; i < evsel__nr_cpus(counter); i++) { + s2 = config->aggr_get_id(config, evsel__cpus(counter), i); for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; - if (aggr_cpu_id__equal(&s2, &id)) + if (cpu_map__compare_aggr_cpu_id(s2, id)) break; } print_counter_aggrdata(config, counter, s, prefix, false, - &first, cpu); + &first, i); } } @@ -1254,8 +1244,8 @@ static void print_percore(struct perf_stat_config *config, fprintf(output, "%s", prefix); print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, (struct perf_cpu){ .cpu = -1 }); + prefix, metric_only, + &first, -1); } if (metric_only) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 10af7804e4..34a7f5c1ff 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -1,10 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include #include "evsel.h" #include "stat.h" #include "color.h" -#include "debug.h" #include "pmu.h" #include "rblist.h" #include "evlist.h" @@ -32,7 +30,7 @@ struct saved_value { struct evsel *evsel; enum stat_type type; int ctx; - int cpu_map_idx; + int cpu; struct cgroup *cgrp; struct runtime_stat *stat; struct stats stats; @@ -47,8 +45,8 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) rb_node); const struct saved_value *b = entry; - if (a->cpu_map_idx != b->cpu_map_idx) - return a->cpu_map_idx - b->cpu_map_idx; + if (a->cpu != b->cpu) + return a->cpu - b->cpu; /* * Previously the rbtree was used to link generic metrics. @@ -105,7 +103,7 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused, } static struct saved_value *saved_value_lookup(struct evsel *evsel, - int cpu_map_idx, + int cpu, bool create, enum stat_type type, int ctx, @@ -115,7 +113,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, struct rblist *rblist; struct rb_node *nd; struct saved_value dm = { - .cpu_map_idx = cpu_map_idx, + .cpu = cpu, .evsel = evsel, .type = type, .ctx = ctx, @@ -213,10 +211,10 @@ struct runtime_stat_data { static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, - int cpu_map_idx, u64 count, + int cpu, u64 count, struct runtime_stat_data *rsd) { - struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type, + struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, rsd->ctx, st, rsd->cgrp); if (v) @@ -229,7 +227,7 @@ static void update_runtime_stat(struct runtime_stat *st, * instruction rates, etc: */ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, - int cpu_map_idx, struct runtime_stat *st) + int cpu, struct runtime_stat *st) { u64 count_ns = count; struct saved_value *v; @@ -241,88 +239,88 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, count *= counter->scale; if (evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd); + update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd); else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) update_runtime_stat(st, STAT_TOPDOWN_RETIRING, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS)) update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT)) update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, - cpu_map_idx, count, &rsd); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, APERF)) - update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); if (counter->collect_stat) { - v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st, + v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st, rsd.cgrp); update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; } else if (counter->metric_leader) { v = saved_value_lookup(counter->metric_leader, - cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp); + cpu, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; v->metric_other++; } @@ -372,16 +370,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) { struct evsel *counter, *leader, **metric_events, *oc; bool found; - struct expr_parse_ctx *ctx; + struct expr_parse_ctx ctx; struct hashmap_entry *cur; size_t bkt; int i; - ctx = expr__ctx_new(); - if (!ctx) { - pr_debug("expr__ctx_new failed"); - return; - } + expr__ctx_init(&ctx); evlist__for_each_entry(evsel_list, counter) { bool invalid = false; @@ -389,25 +383,25 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) if (!counter->metric_expr) continue; - expr__ctx_clear(ctx); + expr__ctx_clear(&ctx); metric_events = counter->metric_events; if (!metric_events) { - if (expr__find_ids(counter->metric_expr, - counter->name, - ctx) < 0) + if (expr__find_other(counter->metric_expr, + counter->name, + &ctx, 1) < 0) continue; metric_events = calloc(sizeof(struct evsel *), - hashmap__size(ctx->ids) + 1); + hashmap__size(&ctx.ids) + 1); if (!metric_events) { - expr__ctx_free(ctx); + expr__ctx_clear(&ctx); return; } counter->metric_events = metric_events; } i = 0; - hashmap__for_each_entry(ctx->ids, cur, bkt) { + hashmap__for_each_entry((&ctx.ids), cur, bkt) { const char *metric_name = (const char *)cur->key; found = false; @@ -444,7 +438,6 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) "Add %s event to groups to get metric expression for %s\n", metric_name, counter->name); - free(printed); printed = strdup(metric_name); } invalid = true; @@ -460,16 +453,16 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) counter->metric_expr = NULL; } } - expr__ctx_free(ctx); + expr__ctx_clear(&ctx); } static double runtime_stat_avg(struct runtime_stat *st, - enum stat_type type, int cpu_map_idx, + enum stat_type type, int cpu, struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -477,12 +470,12 @@ static double runtime_stat_avg(struct runtime_stat *st, } static double runtime_stat_n(struct runtime_stat *st, - enum stat_type type, int cpu_map_idx, + enum stat_type type, int cpu, struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -490,7 +483,7 @@ static double runtime_stat_n(struct runtime_stat *st, } static void print_stalled_cycles_frontend(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -498,7 +491,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -513,7 +506,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, } static void print_stalled_cycles_backend(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -521,7 +514,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -532,7 +525,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, } static void print_branch_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -540,7 +533,7 @@ static void print_branch_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -551,7 +544,7 @@ static void print_branch_misses(struct perf_stat_config *config, } static void print_l1_dcache_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -559,7 +552,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -570,7 +563,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, } static void print_l1_icache_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -578,7 +571,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -588,7 +581,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, } static void print_dtlb_cache_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -596,7 +589,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -606,7 +599,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, } static void print_itlb_cache_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -614,7 +607,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -624,7 +617,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, } static void print_ll_cache_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -632,7 +625,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -690,61 +683,61 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int cpu_map_idx, struct runtime_stat *st, +static double td_total_slots(int cpu, struct runtime_stat *st, struct runtime_stat_data *rsd) { - return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd); } -static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st, +static double td_bad_spec(int cpu, struct runtime_stat *st, struct runtime_stat_data *rsd) { double bad_spec = 0; double total_slots; double total; - total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) - - runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) + - runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd); - total_slots = td_total_slots(cpu_map_idx, st, rsd); + total_slots = td_total_slots(cpu, st, rsd); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int cpu_map_idx, struct runtime_stat *st, +static double td_retiring(int cpu, struct runtime_stat *st, struct runtime_stat_data *rsd) { double retiring = 0; - double total_slots = td_total_slots(cpu_map_idx, st, rsd); + double total_slots = td_total_slots(cpu, st, rsd); double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, - cpu_map_idx, rsd); + cpu, rsd); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st, +static double td_fe_bound(int cpu, struct runtime_stat *st, struct runtime_stat_data *rsd) { double fe_bound = 0; - double total_slots = td_total_slots(cpu_map_idx, st, rsd); + double total_slots = td_total_slots(cpu, st, rsd); double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, - cpu_map_idx, rsd); + cpu, rsd); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int cpu_map_idx, struct runtime_stat *st, +static double td_be_bound(int cpu, struct runtime_stat *st, struct runtime_stat_data *rsd) { - double sum = (td_fe_bound(cpu_map_idx, st, rsd) + - td_bad_spec(cpu_map_idx, st, rsd) + - td_retiring(cpu_map_idx, st, rsd)); + double sum = (td_fe_bound(cpu, st, rsd) + + td_bad_spec(cpu, st, rsd) + + td_retiring(cpu, st, rsd)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); @@ -755,15 +748,15 @@ static double td_be_bound(int cpu_map_idx, struct runtime_stat *st, * the ratios we need to recreate the sum. */ -static double td_metric_ratio(int cpu_map_idx, enum stat_type type, +static double td_metric_ratio(int cpu, enum stat_type type, struct runtime_stat *stat, struct runtime_stat_data *rsd) { - double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd); - double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd); + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd); + double d = runtime_stat_avg(stat, type, cpu, rsd); if (sum) return d / sum; @@ -775,23 +768,23 @@ static double td_metric_ratio(int cpu_map_idx, enum stat_type type, * We allow two missing. */ -static bool full_td(int cpu_map_idx, struct runtime_stat *stat, +static bool full_td(int cpu, struct runtime_stat *stat, struct runtime_stat_data *rsd) { int c = 0; - if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0) c++; return c >= 2; } -static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx, +static void print_smi_cost(struct perf_stat_config *config, int cpu, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -799,9 +792,9 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx, double smi_num, aperf, cycles, cost = 0.0; const char *color = NULL; - smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd); - aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd); - cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd); + aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd); + cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if ((cycles == 0) || (aperf == 0)) return; @@ -818,43 +811,51 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx, static int prepare_metric(struct evsel **metric_events, struct metric_ref *metric_refs, struct expr_parse_ctx *pctx, - int cpu_map_idx, + int cpu, struct runtime_stat *st) { double scale; - char *n; + char *n, *pn; int i, j, ret; + expr__ctx_init(pctx); for (i = 0; metric_events[i]; i++) { struct saved_value *v; struct stats *stats; u64 metric_total = 0; - int source_count; if (!strcmp(metric_events[i]->name, "duration_time")) { stats = &walltime_nsecs_stats; scale = 1e-9; - source_count = 1; } else { - v = saved_value_lookup(metric_events[i], cpu_map_idx, false, + v = saved_value_lookup(metric_events[i], cpu, false, STAT_NONE, 0, st, metric_events[i]->cgrp); if (!v) break; stats = &v->stats; scale = 1.0; - source_count = evsel__source_count(metric_events[i]); if (v->metric_other) metric_total = v->metric_total; } - n = strdup(evsel__metric_id(metric_events[i])); + + n = strdup(metric_events[i]->name); if (!n) return -ENOMEM; + /* + * This display code with --no-merge adds [cpu] postfixes. + * These are not supported by the parser. Remove everything + * after the space. + */ + pn = strchr(n, ' '); + if (pn) + *pn = 0; - expr__add_id_val_source_count(pctx, n, - metric_total ? : avg_stats(stats) * scale, - source_count); + if (metric_total) + expr__add_id_val(pctx, n, metric_total); + else + expr__add_id_val(pctx, n, avg_stats(stats)*scale); } for (j = 0; metric_refs && metric_refs[j].metric_name; j++) { @@ -874,28 +875,22 @@ static void generic_metric(struct perf_stat_config *config, const char *metric_name, const char *metric_unit, int runtime, - int cpu_map_idx, + int cpu, struct perf_stat_output_ctx *out, struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; - struct expr_parse_ctx *pctx; + struct expr_parse_ctx pctx; double ratio, scale; int i; void *ctxp = out->ctx; - pctx = expr__ctx_new(); - if (!pctx) + i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st); + if (i < 0) return; - pctx->runtime = runtime; - i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st); - if (i < 0) { - expr__ctx_free(pctx); - return; - } if (!metric_events[i]) { - if (expr__parse(&ratio, pctx, metric_expr) == 0) { + if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) { char *unit; char metric_bf[64]; @@ -931,32 +926,28 @@ static void generic_metric(struct perf_stat_config *config, (metric_name ? metric_name : name) : "", 0); } - expr__ctx_free(pctx); + expr__ctx_clear(&pctx); } -double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st) +double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st) { - struct expr_parse_ctx *pctx; + struct expr_parse_ctx pctx; double ratio = 0.0; - pctx = expr__ctx_new(); - if (!pctx) - return NAN; - - if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0) + if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0) goto out; - if (expr__parse(&ratio, pctx, mexp->metric_expr)) + if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1)) ratio = 0.0; out: - expr__ctx_free(pctx); + expr__ctx_clear(&pctx); return ratio; } void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int cpu_map_idx, + double avg, int cpu, struct perf_stat_output_ctx *out, struct rblist *metric_events, struct runtime_stat *st) @@ -975,7 +966,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (config->iostat_run) { iostat_print_metric(config, evsel, out); } else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) { ratio = avg / total; @@ -985,11 +976,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } - total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd); total = max(total, runtime_stat_avg(st, STAT_STALLED_CYCLES_BACK, - cpu_map_idx, &rsd)); + cpu, &rsd)); if (total && avg) { out->new_line(config, ctxp); @@ -999,8 +990,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio); } } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0) - print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0) + print_branch_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -1009,8 +1000,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0) - print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0) + print_l1_dcache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( @@ -1019,8 +1010,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0) - print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0) + print_l1_icache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( @@ -1029,8 +1020,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0) - print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0) + print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( @@ -1039,8 +1030,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0) - print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0) + print_itlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( @@ -1049,27 +1040,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0) - print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0) + print_ll_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd); if (total) ratio = avg * 100 / total; - if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd); + print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd); + print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) { ratio = avg / total; @@ -1078,7 +1069,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) print_metric(config, ctxp, NULL, @@ -1088,8 +1079,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); - total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (total2 < avg) total2 = avg; @@ -1099,19 +1090,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; - if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; @@ -1124,28 +1115,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd); + double fe_bound = td_fe_bound(cpu, st, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(cpu_map_idx, st, &rsd); + double retiring = td_retiring(cpu, st, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd); + double bad_spec = td_bad_spec(cpu, st, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(cpu_map_idx, st, &rsd); + double be_bound = td_be_bound(cpu, st, &rsd); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -1158,14 +1149,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(cpu_map_idx, st, &rsd) > 0) + if (td_total_slots(cpu, st, &rsd) > 0) print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && - full_td(cpu_map_idx, st, &rsd)) { - double retiring = td_metric_ratio(cpu_map_idx, + full_td(cpu, st, &rsd)) { + double retiring = td_metric_ratio(cpu, STAT_TOPDOWN_RETIRING, st, &rsd); if (retiring > 0.7) @@ -1173,8 +1164,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && - full_td(cpu_map_idx, st, &rsd)) { - double fe_bound = td_metric_ratio(cpu_map_idx, + full_td(cpu, st, &rsd)) { + double fe_bound = td_metric_ratio(cpu, STAT_TOPDOWN_FE_BOUND, st, &rsd); if (fe_bound > 0.2) @@ -1182,8 +1173,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && - full_td(cpu_map_idx, st, &rsd)) { - double be_bound = td_metric_ratio(cpu_map_idx, + full_td(cpu, st, &rsd)) { + double be_bound = td_metric_ratio(cpu, STAT_TOPDOWN_BE_BOUND, st, &rsd); if (be_bound > 0.2) @@ -1191,8 +1182,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "backend bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && - full_td(cpu_map_idx, st, &rsd)) { - double bad_spec = td_metric_ratio(cpu_map_idx, + full_td(cpu, st, &rsd)) { + double bad_spec = td_metric_ratio(cpu, STAT_TOPDOWN_BAD_SPEC, st, &rsd); if (bad_spec > 0.1) @@ -1200,11 +1191,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) && - full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { - double retiring = td_metric_ratio(cpu_map_idx, + full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { + double retiring = td_metric_ratio(cpu, STAT_TOPDOWN_RETIRING, st, &rsd); - double heavy_ops = td_metric_ratio(cpu_map_idx, + double heavy_ops = td_metric_ratio(cpu, STAT_TOPDOWN_HEAVY_OPS, st, &rsd); double light_ops = retiring - heavy_ops; @@ -1220,11 +1211,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "light operations", light_ops * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) && - full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { - double bad_spec = td_metric_ratio(cpu_map_idx, + full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { + double bad_spec = td_metric_ratio(cpu, STAT_TOPDOWN_BAD_SPEC, st, &rsd); - double br_mis = td_metric_ratio(cpu_map_idx, + double br_mis = td_metric_ratio(cpu, STAT_TOPDOWN_BR_MISPREDICT, st, &rsd); double m_clears = bad_spec - br_mis; @@ -1240,11 +1231,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "machine clears", m_clears * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) && - full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { - double fe_bound = td_metric_ratio(cpu_map_idx, + full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { + double fe_bound = td_metric_ratio(cpu, STAT_TOPDOWN_FE_BOUND, st, &rsd); - double fetch_lat = td_metric_ratio(cpu_map_idx, + double fetch_lat = td_metric_ratio(cpu, STAT_TOPDOWN_FETCH_LAT, st, &rsd); double fetch_bw = fe_bound - fetch_lat; @@ -1260,11 +1251,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth", fetch_bw * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) && - full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { - double be_bound = td_metric_ratio(cpu_map_idx, + full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { + double be_bound = td_metric_ratio(cpu, STAT_TOPDOWN_BE_BOUND, st, &rsd); - double mem_bound = td_metric_ratio(cpu_map_idx, + double mem_bound = td_metric_ratio(cpu, STAT_TOPDOWN_MEM_BOUND, st, &rsd); double core_bound = be_bound - mem_bound; @@ -1281,12 +1272,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, core_bound * 100.); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, - evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st); - } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) { + evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); + } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { char unit = ' '; char unit_buf[10] = "/sec"; - total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) ratio = convert_unit_double(1000000000.0 * avg / total, &unit); @@ -1294,7 +1285,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(config, cpu_map_idx, out, st, &rsd); + print_smi_cost(config, cpu, out, st, &rsd); } else { num = 0; } @@ -1307,7 +1298,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, out->new_line(config, ctxp); generic_metric(config, mexp->metric_expr, mexp->metric_events, mexp->metric_refs, evsel->name, mexp->metric_name, - mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st); + mexp->metric_unit, mexp->runtime, cpu, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index ee6f034812..09ea334586 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -152,13 +152,11 @@ static void evsel__free_stat_priv(struct evsel *evsel) zfree(&evsel->stats); } -static int evsel__alloc_prev_raw_counts(struct evsel *evsel) +static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads) { - int cpu_map_nr = evsel__nr_cpus(evsel); - int nthreads = perf_thread_map__nr(evsel->core.threads); struct perf_counts *counts; - counts = perf_counts__new(cpu_map_nr, nthreads); + counts = perf_counts__new(ncpus, nthreads); if (counts) evsel->prev_raw_counts = counts; @@ -179,9 +177,12 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel) static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) { + int ncpus = evsel__nr_cpus(evsel); + int nthreads = perf_thread_map__nr(evsel->core.threads); + if (evsel__alloc_stat_priv(evsel) < 0 || - evsel__alloc_counts(evsel) < 0 || - (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0)) + evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || + (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) return -ENOMEM; return 0; @@ -292,12 +293,11 @@ static bool pkg_id_equal(const void *__key1, const void *__key2, return *key1 == *key2; } -static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, - int cpu_map_idx, bool *skip) +static int check_per_pkg(struct evsel *counter, + struct perf_counts_values *vals, int cpu, bool *skip) { struct hashmap *mask = counter->per_pkg_mask; struct perf_cpu_map *cpus = evsel__cpus(counter); - struct perf_cpu cpu = perf_cpu_map__cpu(cpus, cpu_map_idx); int s, d, ret = 0; uint64_t *key; @@ -328,7 +328,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, if (!(vals->run && vals->ena)) return 0; - s = cpu__get_socket_id(cpu); + s = cpu_map__get_socket(cpus, cpu, NULL).socket; if (s < 0) return -1; @@ -336,7 +336,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, * On multi-die system, die_id > 0. On no-die system, die_id = 0. * We use hashmap(socket, die) to check the used socket+die pair. */ - d = cpu__get_die_id(cpu); + d = cpu_map__get_die(cpus, cpu, NULL).die; if (d < 0) return -1; @@ -345,10 +345,9 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, return -ENOMEM; *key = (uint64_t)d << 32 | s; - if (hashmap__find(mask, (void *)key, NULL)) { + if (hashmap__find(mask, (void *)key, NULL)) *skip = true; - free(key); - } else + else ret = hashmap__add(mask, (void *)key, (void *)1); return ret; @@ -356,14 +355,14 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, static int process_counter_values(struct perf_stat_config *config, struct evsel *evsel, - int cpu_map_idx, int thread, + int cpu, int thread, struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; static struct perf_counts_values zero; bool skip = false; - if (check_per_pkg(evsel, count, cpu_map_idx, &skip)) { + if (check_per_pkg(evsel, count, cpu, &skip)) { pr_err("failed to read per-pkg counter\n"); return -1; } @@ -379,11 +378,11 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, case AGGR_NODE: case AGGR_NONE: if (!evsel->snapshot) - evsel__compute_deltas(evsel, cpu_map_idx, thread, count); + evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { perf_stat__update_shadow_stats(evsel, count->val, - cpu_map_idx, &rt_stat); + cpu, &rt_stat); } if (config->aggr_mode == AGGR_THREAD) { @@ -412,15 +411,15 @@ static int process_counter_maps(struct perf_stat_config *config, { int nthreads = perf_thread_map__nr(counter->core.threads); int ncpus = evsel__nr_cpus(counter); - int idx, thread; + int cpu, thread; if (counter->core.system_wide) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { - for (idx = 0; idx < ncpus; idx++) { - if (process_counter_values(config, counter, idx, thread, - perf_counts(counter->counts, idx, thread))) + for (cpu = 0; cpu < ncpus; cpu++) { + if (process_counter_values(config, counter, cpu, thread, + perf_counts(counter->counts, cpu, thread))) return -1; } } @@ -532,7 +531,7 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target, - int cpu_map_idx) + int cpu) { struct perf_event_attr *attr = &evsel->core.attr; struct evsel *leader = evsel__leader(evsel); @@ -586,7 +585,7 @@ int create_perf_stat_counter(struct evsel *evsel, } if (target__has_cpu(target) && !target__has_per_thread(target)) - return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx); + return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); return evsel__open_per_thread(evsel, evsel->core.threads); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 335d19cc30..32c8527de3 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -108,7 +108,8 @@ struct runtime_stat { struct rblist value_list; }; -typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu); +typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, + struct perf_cpu_map *m, int cpu); struct perf_stat_config { enum aggr_mode aggr_mode; @@ -208,7 +209,7 @@ void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__reset_shadow_per_stat(struct runtime_stat *st); void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, - int cpu_map_idx, struct runtime_stat *st); + int cpu, struct runtime_stat *st); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; @@ -248,10 +249,10 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target, - int cpu_map_idx); + int cpu); void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv); struct metric_expr; -double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st); +double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st); #endif diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 1e0c731fc5..96f941e016 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -728,20 +728,20 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) int i; int ret = 0; struct perf_cpu_map *m; - struct perf_cpu c; + int c; m = perf_cpu_map__new(s); if (!m) return -1; - for (i = 0; i < perf_cpu_map__nr(m); i++) { - c = perf_cpu_map__cpu(m, i); - if (c.cpu >= nr_cpus) { + for (i = 0; i < m->nr; i++) { + c = m->map[i]; + if (c >= nr_cpus) { ret = -1; break; } - set_bit(c.cpu, cpumask_bits(b)); + set_bit(c, cpumask_bits(b)); } perf_cpu_map__put(m); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b2ed3140a1..0fc9a54107 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -274,7 +274,7 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char * if (symbol_conf.priv_size) { if (symbol_conf.init_annotation) { struct annotation *notes = (void *)sym; - annotation__init(notes); + pthread_mutex_init(¬es->lock, NULL); } sym = ((void *)sym) + symbol_conf.priv_size; } @@ -294,13 +294,6 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char * void symbol__delete(struct symbol *sym) { - if (symbol_conf.priv_size) { - if (symbol_conf.init_annotation) { - struct annotation *notes = symbol__annotation(sym); - - annotation__exit(notes); - } - } free(((void *)sym) - symbol_conf.priv_size); } @@ -709,10 +702,6 @@ static int map__process_kallsym_symbol(void *arg, const char *name, if (!symbol_type__filter(type)) return 0; - /* Ignore local symbols for ARM modules */ - if (name[0] == '$') - return 0; - /* * module symbols are not sorted so we add all * symbols, setting length to 0, and rely on @@ -2641,25 +2630,3 @@ struct mem_info *mem_info__new(void) refcount_set(&mi->refcnt, 1); return mi; } - -/* - * Checks that user supplied symbol kernel files are accessible because - * the default mechanism for accessing elf files fails silently. i.e. if - * debug syms for a build ID aren't found perf carries on normally. When - * they are user supplied we should assume that the user doesn't want to - * silently fail. - */ -int symbol__validate_sym_arguments(void) -{ - if (symbol_conf.vmlinux_name && - access(symbol_conf.vmlinux_name, R_OK)) { - pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); - return -EINVAL; - } - if (symbol_conf.kallsyms_name && - access(symbol_conf.kallsyms_name, R_OK)) { - pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); - return -EINVAL; - } - return 0; -} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index fbf866d82d..954d6a049e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -40,33 +40,22 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, GElf_Shdr *shp, const char *name, size_t *idx); #endif -/** - * A symtab entry. When allocated this may be preceded by an annotation (see - * symbol__annotation), a browser_index (see symbol__browser_index) and rb_node - * to sort by name (see struct symbol_name_rb_node). +/** struct symbol - symtab entry + * + * @ignore - resolvable but tools ignore it (e.g. idle routines) */ struct symbol { struct rb_node rb_node; - /** Range of symbol [start, end). */ u64 start; u64 end; - /** Length of the string name. */ u16 namelen; - /** ELF symbol type as defined for st_info. E.g STT_OBJECT or STT_FUNC. */ u8 type:4; - /** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */ u8 binding:4; - /** Set true for kernel symbols of idle routines. */ u8 idle:1; - /** Resolvable but tools ignore it (e.g. idle routines). */ u8 ignore:1; - /** Symbol for an inlined function. */ u8 inlined:1; - /** Has symbol__annotate2 been performed. */ - u8 annotate2:1; - /** Architecture specific. Unused except on PPC where it holds st_other. */ u8 arch_sym; - /** The name of length namelen associated with the symbol. */ + bool annotate2; char name[]; }; @@ -297,6 +286,4 @@ static inline void __mem_info__zput(struct mem_info **mi) #define mem_info__zput(mi) __mem_info__zput(&mi) -int symbol__validate_sym_arguments(void); - #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index b654de0841..a7e981b2d7 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -715,8 +715,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *fork_event, union perf_event *namespaces_event, pid_t pid, int full, perf_event__handler_t process, - struct perf_tool *tool, struct machine *machine, - bool needs_mmap, bool mmap_data) + struct perf_tool *tool, struct machine *machine, bool mmap_data) { char filename[PATH_MAX]; struct dirent **dirent; @@ -740,7 +739,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, * send mmap only for thread group leader * see thread__init_maps() */ - if (pid == tgid && needs_mmap && + if (pid == tgid && perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data)) return -1; @@ -787,7 +786,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, break; rc = 0; - if (_pid == pid && !kernel_thread && needs_mmap) { + if (_pid == pid && !kernel_thread) { /* process the parent's maps too */ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data); @@ -807,7 +806,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, - bool needs_mmap, bool mmap_data) + bool mmap_data) { union perf_event *comm_event, *mmap_event, *fork_event; union perf_event *namespaces_event; @@ -837,7 +836,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, fork_event, namespaces_event, perf_thread_map__pid(threads, thread), 0, process, tool, machine, - needs_mmap, mmap_data)) { + mmap_data)) { err = -1; break; } @@ -863,7 +862,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, fork_event, namespaces_event, comm_event->comm.pid, 0, process, tool, machine, - needs_mmap, mmap_data)) { + mmap_data)) { err = -1; break; } @@ -883,7 +882,6 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, static int __perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, - bool needs_mmap, bool mmap_data, struct dirent **dirent, int start, @@ -928,7 +926,7 @@ static int __perf_event__synthesize_threads(struct perf_tool *tool, */ __event__synthesize_thread(comm_event, mmap_event, fork_event, namespaces_event, pid, 1, process, - tool, machine, needs_mmap, mmap_data); + tool, machine, mmap_data); } err = 0; @@ -947,7 +945,6 @@ struct synthesize_threads_arg { struct perf_tool *tool; perf_event__handler_t process; struct machine *machine; - bool needs_mmap; bool mmap_data; struct dirent **dirent; int num; @@ -959,8 +956,7 @@ static void *synthesize_threads_worker(void *arg) struct synthesize_threads_arg *args = arg; __perf_event__synthesize_threads(args->tool, args->process, - args->machine, - args->needs_mmap, args->mmap_data, + args->machine, args->mmap_data, args->dirent, args->start, args->num); return NULL; @@ -969,7 +965,7 @@ static void *synthesize_threads_worker(void *arg) int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, - bool needs_mmap, bool mmap_data, + bool mmap_data, unsigned int nr_threads_synthesize) { struct synthesize_threads_arg *args = NULL; @@ -998,8 +994,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (thread_nr <= 1) { err = __perf_event__synthesize_threads(tool, process, - machine, - needs_mmap, mmap_data, + machine, mmap_data, dirent, base, n); goto free_dirent; } @@ -1020,7 +1015,6 @@ int perf_event__synthesize_threads(struct perf_tool *tool, args[i].tool = tool; args[i].process = process; args[i].machine = machine; - args[i].needs_mmap = needs_mmap; args[i].mmap_data = mmap_data; args[i].dirent = dirent; } @@ -1186,12 +1180,12 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool, static void synthesize_cpus(struct cpu_map_entries *cpus, struct perf_cpu_map *map) { - int i, map_nr = perf_cpu_map__nr(map); + int i; - cpus->nr = map_nr; + cpus->nr = map->nr; - for (i = 0; i < map_nr; i++) - cpus->cpu[i] = perf_cpu_map__cpu(map, i).cpu; + for (i = 0; i < map->nr; i++) + cpus->cpu[i] = map->map[i]; } static void synthesize_mask(struct perf_record_record_cpu_map *mask, @@ -1202,13 +1196,13 @@ static void synthesize_mask(struct perf_record_record_cpu_map *mask, mask->nr = BITS_TO_LONGS(max); mask->long_size = sizeof(long); - for (i = 0; i < perf_cpu_map__nr(map); i++) - set_bit(perf_cpu_map__cpu(map, i).cpu, mask->mask); + for (i = 0; i < map->nr; i++) + set_bit(map->map[i], mask->mask); } static size_t cpus_size(struct perf_cpu_map *map) { - return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16); + return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16); } static size_t mask_size(struct perf_cpu_map *map, int *max) @@ -1217,9 +1211,9 @@ static size_t mask_size(struct perf_cpu_map *map, int *max) *max = 0; - for (i = 0; i < perf_cpu_map__nr(map); i++) { + for (i = 0; i < map->nr; i++) { /* bit position of the cpu is + 1 */ - int bit = perf_cpu_map__cpu(map, i).cpu + 1; + int bit = map->map[i] + 1; if (bit > *max) *max = bit; @@ -1354,7 +1348,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool, } int perf_event__synthesize_stat(struct perf_tool *tool, - struct perf_cpu cpu, u32 thread, u64 id, + u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine) @@ -1366,7 +1360,7 @@ int perf_event__synthesize_stat(struct perf_tool *tool, event.header.misc = 0; event.id = id; - event.cpu = cpu.cpu; + event.cpu = cpu; event.thread = thread; event.val = count->val; event.ena = count->ena; @@ -1763,7 +1757,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_ } e->idx = sid->idx; - e->cpu = sid->cpu.cpu; + e->cpu = sid->cpu; e->tid = sid->tid; } } @@ -1781,46 +1775,26 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool needs_mmap, - bool data_mmap, unsigned int nr_threads_synthesize) + perf_event__handler_t process, bool data_mmap, + unsigned int nr_threads_synthesize) { - /* - * When perf runs in non-root PID namespace, and the namespace's proc FS - * is not mounted, nsinfo__is_in_root_namespace() returns false. - * In this case, the proc FS is coming for the parent namespace, thus - * perf tool will wrongly gather process info from its parent PID - * namespace. - * - * To avoid the confusion that the perf tool runs in a child PID - * namespace but it synthesizes thread info from its parent PID - * namespace, returns failure with warning. - */ - if (!nsinfo__is_in_root_namespace()) { - pr_err("Perf runs in non-root PID namespace but it tries to "); - pr_err("gather process info from its parent PID namespace.\n"); - pr_err("Please mount the proc file system properly, e.g. "); - pr_err("add the option '--mount-proc' for unshare command.\n"); - return -EPERM; - } - if (target__has_task(target)) - return perf_event__synthesize_thread_map(tool, threads, process, machine, - needs_mmap, data_mmap); + return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); else if (target__has_cpu(target)) - return perf_event__synthesize_threads(tool, process, machine, - needs_mmap, data_mmap, + return perf_event__synthesize_threads(tool, process, + machine, data_mmap, nr_threads_synthesize); /* command specified */ return 0; } int machine__synthesize_threads(struct machine *machine, struct target *target, - struct perf_thread_map *threads, bool needs_mmap, - bool data_mmap, unsigned int nr_threads_synthesize) + struct perf_thread_map *threads, bool data_mmap, + unsigned int nr_threads_synthesize) { return __machine__synthesize_threads(machine, NULL, target, threads, - perf_event__process, needs_mmap, - data_mmap, nr_threads_synthesize); + perf_event__process, data_mmap, + nr_threads_synthesize); } static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id) @@ -2256,31 +2230,3 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool, return ret; } - -int parse_synth_opt(char *synth) -{ - char *p, *q; - int ret = 0; - - if (synth == NULL) - return -1; - - for (q = synth; (p = strsep(&q, ",")); p = q) { - if (!strcasecmp(p, "no") || !strcasecmp(p, "none")) - return 0; - - if (!strcasecmp(p, "all")) - return PERF_SYNTH_ALL; - - if (!strcasecmp(p, "task")) - ret |= PERF_SYNTH_TASK; - else if (!strcasecmp(p, "mmap")) - ret |= PERF_SYNTH_TASK | PERF_SYNTH_MMAP; - else if (!strcasecmp(p, "cgroup")) - ret |= PERF_SYNTH_CGROUP; - else - return -1; - } - - return ret; -} diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index 78a0450db1..c845e2b9b4 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -6,7 +6,6 @@ #include // pid_t #include #include -#include struct auxtrace_record; struct dso; @@ -28,18 +27,6 @@ struct target; union perf_event; -enum perf_record_synth { - PERF_SYNTH_TASK = 1 << 0, - PERF_SYNTH_MMAP = 1 << 1, - PERF_SYNTH_CGROUP = 1 << 2, - - /* last element */ - PERF_SYNTH_MAX = 1 << 3, -}; -#define PERF_SYNTH_ALL (PERF_SYNTH_MAX - 1) - -int parse_synth_opt(char *str); - typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); @@ -64,10 +51,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data); -int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize); +int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool mmap_data); +int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, unsigned int nr_threads_synthesize); int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process); int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine); @@ -78,10 +65,10 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool needs_mmap, bool data_mmap, + perf_event__handler_t process, bool data_mmap, unsigned int nr_threads_synthesize); int machine__synthesize_threads(struct machine *machine, struct target *target, - struct perf_thread_map *threads, bool needs_mmap, bool data_mmap, + struct perf_thread_map *threads, bool data_mmap, unsigned int nr_threads_synthesize); #ifdef HAVE_AUXTRACE_SUPPORT diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index ef873f2cc3..bbbc0dcd46 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -53,7 +53,6 @@ struct perf_tool { lost_samples, aux, itrace_start, - aux_output_hw_id, context_switch, throttle, unthrottle, diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index c1ebfc5d2e..27945eeb0c 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -95,15 +95,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) if (target->cpu_list) ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", - perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : "", + top->evlist->core.cpus->nr > 1 ? "s" : "", target->cpu_list); else { if (target->tid) ret += SNPRINTF(bf + ret, size - ret, ")"); else ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", - perf_cpu_map__nr(top->evlist->core.cpus), - perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : ""); + top->evlist->core.cpus->nr, + top->evlist->core.cpus->nr > 1 ? "s" : ""); } perf_top__reset_sample_counters(top); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index fb4f6616b5..df3c4671be 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -416,18 +416,3 @@ char *perf_exe(char *buf, int len) } return strcpy(buf, "perf"); } - -void perf_debuginfod_setup(struct perf_debuginfod *di) -{ - /* - * By default '!di->set' we clear DEBUGINFOD_URLS, so debuginfod - * processing is not triggered, otherwise we set it to 'di->urls' - * value. If 'di->urls' is "system" we keep DEBUGINFOD_URLS value. - */ - if (!di->set) - setenv("DEBUGINFOD_URLS", "", 1); - else if (di->urls && strcmp(di->urls, "system")) - setenv("DEBUGINFOD_URLS", di->urls, 1); - - pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS")); -} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 7b625cbd2d..9f0d36ba77 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -11,9 +11,6 @@ #include #include #include -#ifndef __cplusplus -#include -#endif /* General helper functions */ void usage(const char *err) __noreturn; @@ -69,12 +66,6 @@ extern bool test_attr__enabled; void test_attr__ready(void); void test_attr__init(void); struct perf_event_attr; -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, int fd, int group_fd, unsigned long flags); - -struct perf_debuginfod { - const char *urls; - bool set; -}; -void perf_debuginfod_setup(struct perf_debuginfod *di); #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore index eada0297ef..0b319fc8bb 100644 --- a/tools/power/acpi/.gitignore +++ b/tools/power/acpi/.gitignore @@ -2,5 +2,4 @@ /acpidbg /acpidump /ec -/pfrut /include/ diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile index 5ff1d9c864..a249c50ebf 100644 --- a/tools/power/acpi/Makefile +++ b/tools/power/acpi/Makefile @@ -9,18 +9,18 @@ include ../../scripts/Makefile.include .NOTPARALLEL: -all: acpidbg acpidump ec pfrut -clean: acpidbg_clean acpidump_clean ec_clean pfrut_clean -install: acpidbg_install acpidump_install ec_install pfrut_install -uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall +all: acpidbg acpidump ec +clean: acpidbg_clean acpidump_clean ec_clean +install: acpidbg_install acpidump_install ec_install +uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall -acpidbg acpidump ec pfrut: FORCE +acpidbg acpidump ec: FORCE $(call descend,tools/$@,all) -acpidbg_clean acpidump_clean ec_clean pfrut_clean: +acpidbg_clean acpidump_clean ec_clean: $(call descend,tools/$(@:_clean=),clean) -acpidbg_install acpidump_install ec_install pfrut_install: +acpidbg_install acpidump_install ec_install: $(call descend,tools/$(@:_install=),install) -acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall: +acpidbg_uninstall acpidump_uninstall ec_uninstall: $(call descend,tools/$(@:_uninstall=),uninstall) .PHONY: FORCE diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index cd7106876a..331f6d30f4 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -69,7 +69,6 @@ KERNEL_INCLUDE := $(OUTPUT)include ACPICA_INCLUDE := $(srctree)/../../../drivers/acpi/acpica CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE) CFLAGS += $(WARNINGS) -MKDIR = mkdir ifeq ($(strip $(V)),false) QUIET=@ diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules index b71aada776..2a6c170b57 100644 --- a/tools/power/acpi/Makefile.rules +++ b/tools/power/acpi/Makefile.rules @@ -9,7 +9,7 @@ objdir := $(OUTPUT)tools/$(TOOL)/ toolobjs := $(addprefix $(objdir),$(TOOL_OBJS)) $(OUTPUT)$(TOOL): $(toolobjs) FORCE $(ECHO) " LD " $(subst $(OUTPUT),,$@) - $(QUIET) $(LD) $(CFLAGS) $(toolobjs) $(LDFLAGS) -L$(OUTPUT) -o $@ + $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(toolobjs) -L$(OUTPUT) -o $@ $(ECHO) " STRIP " $(subst $(OUTPUT),,$@) $(QUIET) $(STRIPCMD) $@ @@ -21,7 +21,6 @@ $(KERNEL_INCLUDE): $(objdir)%.o: %.c $(KERNEL_INCLUDE) $(ECHO) " CC " $(subst $(OUTPUT),,$@) - $(QUIET) $(MKDIR) -p $(objdir) 2>/dev/null $(QUIET) $(CC) -c $(CFLAGS) -o $@ $< all: $(OUTPUT)$(TOOL) diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index efe72fa482..bf9fd3549a 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.11"; +static const char *version_str = "v1.10"; static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; @@ -1599,7 +1599,6 @@ static void set_scaling_min_to_cpuinfo_max(int cpu) die_id != get_physical_die_id(i)) continue; - adjust_scaling_max_from_base_freq(i); set_cpufreq_scaling_min_max_from_cpuinfo(i, 1, 0); adjust_scaling_min_from_base_freq(i); } @@ -1616,7 +1615,6 @@ static void set_scaling_min_to_cpuinfo_min(int cpu) die_id != get_physical_die_id(i)) continue; - adjust_scaling_max_from_base_freq(i); set_cpufreq_scaling_min_max_from_cpuinfo(i, 0, 0); } } diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch index 0c6c7f4568..b10b7a27c3 100644 --- a/tools/scripts/Makefile.arch +++ b/tools/scripts/Makefile.arch @@ -4,8 +4,7 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ - -e s/riscv.*/riscv/) + -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ ) ifndef ARCH ARCH := $(HOSTARCH) diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 79d1023044..071312f5eb 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -87,18 +87,7 @@ LLVM_STRIP ?= llvm-strip ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 - -else ifneq ($(CROSS_COMPILE),) -CLANG_CROSS_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc 2>/dev/null)) -ifneq ($(GCC_TOOLCHAIN_DIR),) -CLANG_CROSS_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) -CLANG_CROSS_FLAGS += --sysroot=$(shell $(CROSS_COMPILE)gcc -print-sysroot) -CLANG_CROSS_FLAGS += --gcc-toolchain=$(realpath $(GCC_TOOLCHAIN_DIR)/..) -endif # GCC_TOOLCHAIN_DIR -CFLAGS += $(CLANG_CROSS_FLAGS) -AFLAGS += $(CLANG_CROSS_FLAGS) -endif # CROSS_COMPILE +endif # Hack to avoid type-punned warnings on old systems such as RHEL5: # We should be changing CFLAGS and checking gcc version, but this diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 7a706f96f6..ac35c61f65 100644 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -8,64 +8,45 @@ # Author: Brendan Higgins import argparse -import os -import re import sys +import os import time assert sys.version_info >= (3, 7), "Python version is too old" -from dataclasses import dataclass +from collections import namedtuple from enum import Enum, auto -from typing import Any, Iterable, Sequence, List, Optional +from typing import Iterable, Sequence +import kunit_config import kunit_json import kunit_kernel import kunit_parser +KunitResult = namedtuple('KunitResult', ['status','result','elapsed_time']) + +KunitConfigRequest = namedtuple('KunitConfigRequest', + ['build_dir', 'make_options']) +KunitBuildRequest = namedtuple('KunitBuildRequest', + ['jobs', 'build_dir', 'alltests', + 'make_options']) +KunitExecRequest = namedtuple('KunitExecRequest', + ['timeout', 'build_dir', 'alltests', + 'filter_glob', 'kernel_args']) +KunitParseRequest = namedtuple('KunitParseRequest', + ['raw_output', 'input_data', 'build_dir', 'json']) +KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', + 'build_dir', 'alltests', 'filter_glob', + 'kernel_args', 'json', 'make_options']) + +KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] + class KunitStatus(Enum): SUCCESS = auto() CONFIG_FAILURE = auto() BUILD_FAILURE = auto() TEST_FAILURE = auto() -@dataclass -class KunitResult: - status: KunitStatus - result: Any - elapsed_time: float - -@dataclass -class KunitConfigRequest: - build_dir: str - make_options: Optional[List[str]] - -@dataclass -class KunitBuildRequest(KunitConfigRequest): - jobs: int - alltests: bool - -@dataclass -class KunitParseRequest: - raw_output: Optional[str] - build_dir: str - json: Optional[str] - -@dataclass -class KunitExecRequest(KunitParseRequest): - timeout: int - alltests: bool - filter_glob: str - kernel_args: Optional[List[str]] - run_isolated: Optional[str] - -@dataclass -class KunitRequest(KunitExecRequest, KunitBuildRequest): - pass - - -KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] - def get_kernel_root_path() -> str: path = sys.argv[0] if not __file__ else __file__ parts = os.path.realpath(path).split('tools/testing/kunit') @@ -110,104 +91,31 @@ def build_tests(linux: kunit_kernel.LinuxSourceTree, 'built kernel successfully', build_end - build_start) -def config_and_build_tests(linux: kunit_kernel.LinuxSourceTree, - request: KunitBuildRequest) -> KunitResult: - config_result = config_tests(linux, request) - if config_result.status != KunitStatus.SUCCESS: - return config_result +def exec_tests(linux: kunit_kernel.LinuxSourceTree, + request: KunitExecRequest) -> KunitResult: + kunit_parser.print_with_timestamp('Starting KUnit Kernel ...') + test_start = time.time() + result = linux.run_kernel( + args=request.kernel_args, + timeout=None if request.alltests else request.timeout, + filter_glob=request.filter_glob, + build_dir=request.build_dir) - return build_tests(linux, request) + test_end = time.time() -def _list_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> List[str]: - args = ['kunit.action=list'] - if request.kernel_args: - args.extend(request.kernel_args) + return KunitResult(KunitStatus.SUCCESS, + result, + test_end - test_start) - output = linux.run_kernel(args=args, - timeout=None if request.alltests else request.timeout, - filter_glob=request.filter_glob, - build_dir=request.build_dir) - lines = kunit_parser.extract_tap_lines(output) - # Hack! Drop the dummy TAP version header that the executor prints out. - lines.pop() - - # Filter out any extraneous non-test output that might have gotten mixed in. - return [l for l in lines if re.match('^[^\s.]+\.[^\s.]+$', l)] - -def _suites_from_test_list(tests: List[str]) -> List[str]: - """Extracts all the suites from an ordered list of tests.""" - suites = [] # type: List[str] - for t in tests: - parts = t.split('.', maxsplit=2) - if len(parts) != 2: - raise ValueError(f'internal KUnit error, test name should be of the form ".", got "{t}"') - suite, case = parts - if not suites or suites[-1] != suite: - suites.append(suite) - return suites - - - -def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> KunitResult: - filter_globs = [request.filter_glob] - if request.run_isolated: - tests = _list_tests(linux, request) - if request.run_isolated == 'test': - filter_globs = tests - if request.run_isolated == 'suite': - filter_globs = _suites_from_test_list(tests) - # Apply the test-part of the user's glob, if present. - if '.' in request.filter_glob: - test_glob = request.filter_glob.split('.', maxsplit=2)[1] - filter_globs = [g + '.'+ test_glob for g in filter_globs] - - test_counts = kunit_parser.TestCounts() - exec_time = 0.0 - for i, filter_glob in enumerate(filter_globs): - kunit_parser.print_with_timestamp('Starting KUnit Kernel ({}/{})...'.format(i+1, len(filter_globs))) - - test_start = time.time() - run_result = linux.run_kernel( - args=request.kernel_args, - timeout=None if request.alltests else request.timeout, - filter_glob=filter_glob, - build_dir=request.build_dir) - - result = parse_tests(request, run_result) - # run_kernel() doesn't block on the kernel exiting. - # That only happens after we get the last line of output from `run_result`. - # So exec_time here actually contains parsing + execution time, which is fine. - test_end = time.time() - exec_time += test_end - test_start - - test_counts.add_subtest_counts(result.result.counts) - - if len(filter_globs) == 1 and test_counts.crashed > 0: - bd = request.build_dir - print('The kernel seems to have crashed; you can decode the stack traces with:') - print('$ scripts/decode_stacktrace.sh {}/vmlinux {} < {} | tee {}/decoded.log | {} parse'.format( - bd, bd, kunit_kernel.get_outfile_path(bd), bd, sys.argv[0])) - - kunit_status = _map_to_overall_status(test_counts.get_status()) - return KunitResult(status=kunit_status, result=result, elapsed_time=exec_time) - -def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus: - if test_status in (kunit_parser.TestStatus.SUCCESS, kunit_parser.TestStatus.SKIPPED): - return KunitStatus.SUCCESS - else: - return KunitStatus.TEST_FAILURE - -def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> KunitResult: +def parse_tests(request: KunitParseRequest) -> KunitResult: parse_start = time.time() - test_result = kunit_parser.Test() + test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS, + [], + 'Tests not Parsed.') if request.raw_output: - # Treat unparsed results as one passing test. - test_result.status = kunit_parser.TestStatus.SUCCESS - test_result.counts.passed = 1 - - output: Iterable[str] = input_data + output: Iterable[str] = request.input_data if request.raw_output == 'all': pass elif request.raw_output == 'kunit': @@ -218,12 +126,12 @@ def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> KunitR print(line.rstrip()) else: - test_result = kunit_parser.parse_run_tests(input_data) + test_result = kunit_parser.parse_run_tests(request.input_data) parse_end = time.time() if request.json: json_obj = kunit_json.get_json_result( - test=test_result, + test_result=test_result, def_config='kunit_defconfig', build_dir=request.build_dir, json_path=request.json) @@ -241,15 +149,31 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitRequest) -> KunitResult: run_start = time.time() - config_result = config_tests(linux, request) + config_request = KunitConfigRequest(request.build_dir, + request.make_options) + config_result = config_tests(linux, config_request) if config_result.status != KunitStatus.SUCCESS: return config_result - build_result = build_tests(linux, request) + build_request = KunitBuildRequest(request.jobs, request.build_dir, + request.alltests, + request.make_options) + build_result = build_tests(linux, build_request) if build_result.status != KunitStatus.SUCCESS: return build_result - exec_result = exec_tests(linux, request) + exec_request = KunitExecRequest(request.timeout, request.build_dir, + request.alltests, request.filter_glob, + request.kernel_args) + exec_result = exec_tests(linux, exec_request) + if exec_result.status != KunitStatus.SUCCESS: + return exec_result + + parse_request = KunitParseRequest(request.raw_output, + exec_result.result, + request.build_dir, + request.json) + parse_result = parse_tests(parse_request) run_end = time.time() @@ -260,7 +184,7 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, config_result.elapsed_time, build_result.elapsed_time, exec_result.elapsed_time)) - return exec_result + return parse_result # Problem: # $ kunit.py run --json @@ -282,9 +206,6 @@ def massage_argv(argv: Sequence[str]) -> Sequence[str]: return f'{arg}={pseudo_bool_flag_defaults[arg]}' return list(map(massage_arg, argv)) -def get_default_jobs() -> int: - return len(os.sched_getaffinity(0)) - def add_common_opts(parser) -> None: parser.add_argument('--build_dir', help='As in the make command, it specifies the build ' @@ -301,10 +222,6 @@ def add_common_opts(parser) -> None: ' If given a directory, (e.g. lib/kunit), "/.kunitconfig" ' 'will get automatically appended.', metavar='kunitconfig') - parser.add_argument('--kconfig_add', - help='Additional Kconfig options to append to the ' - '.kunitconfig, e.g. CONFIG_KASAN=y. Can be repeated.', - action='append') parser.add_argument('--arch', help=('Specifies the architecture to run tests under. ' @@ -335,7 +252,7 @@ def add_build_opts(parser) -> None: parser.add_argument('--jobs', help='As in the make command, "Specifies the number of ' 'jobs (commands) to run simultaneously."', - type=int, default=get_default_jobs(), metavar='jobs') + type=int, default=8, metavar='jobs') def add_exec_opts(parser) -> None: parser.add_argument('--timeout', @@ -346,8 +263,9 @@ def add_exec_opts(parser) -> None: default=300, metavar='timeout') parser.add_argument('filter_glob', - help='Filter which KUnit test suites/tests run at ' - 'boot-time, e.g. list* or list*.*del_test', + help='maximum number of seconds to allow for all tests ' + 'to run. This does not include time taken to build the ' + 'tests.', type=str, nargs='?', default='', @@ -355,12 +273,6 @@ def add_exec_opts(parser) -> None: parser.add_argument('--kernel_args', help='Kernel command-line parameters. Maybe be repeated', action='append') - parser.add_argument('--run_isolated', help='If set, boot the kernel for each ' - 'individual suite/test. This is can be useful for debugging ' - 'a non-hermetic test, one that might pass/fail based on ' - 'what ran before it.', - type=str, - choices=['suite', 'test']), def add_parse_opts(parser) -> None: parser.add_argument('--raw_output', help='If set don\'t format output from kernel. ' @@ -423,21 +335,19 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, - kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - request = KunitRequest(build_dir=cli_args.build_dir, - make_options=cli_args.make_options, - jobs=cli_args.jobs, - alltests=cli_args.alltests, - raw_output=cli_args.raw_output, - json=cli_args.json, - timeout=cli_args.timeout, - filter_glob=cli_args.filter_glob, - kernel_args=cli_args.kernel_args, - run_isolated=cli_args.run_isolated) + request = KunitRequest(cli_args.raw_output, + cli_args.timeout, + cli_args.jobs, + cli_args.build_dir, + cli_args.alltests, + cli_args.filter_glob, + cli_args.kernel_args, + cli_args.json, + cli_args.make_options) result = run_tests(linux, request) if result.status != KunitStatus.SUCCESS: sys.exit(1) @@ -449,13 +359,12 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, - kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - request = KunitConfigRequest(build_dir=cli_args.build_dir, - make_options=cli_args.make_options) + request = KunitConfigRequest(cli_args.build_dir, + cli_args.make_options) result = config_tests(linux, request) kunit_parser.print_with_timestamp(( 'Elapsed time: %.3fs\n') % ( @@ -466,16 +375,15 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, - kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - request = KunitBuildRequest(build_dir=cli_args.build_dir, - make_options=cli_args.make_options, - jobs=cli_args.jobs, - alltests=cli_args.alltests) - result = config_and_build_tests(linux, request) + request = KunitBuildRequest(cli_args.jobs, + cli_args.build_dir, + cli_args.alltests, + cli_args.make_options) + result = build_tests(linux, request) kunit_parser.print_with_timestamp(( 'Elapsed time: %.3fs\n') % ( result.elapsed_time)) @@ -485,35 +393,37 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, - kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - exec_request = KunitExecRequest(raw_output=cli_args.raw_output, - build_dir=cli_args.build_dir, - json=cli_args.json, - timeout=cli_args.timeout, - alltests=cli_args.alltests, - filter_glob=cli_args.filter_glob, - kernel_args=cli_args.kernel_args, - run_isolated=cli_args.run_isolated) - result = exec_tests(linux, exec_request) + exec_request = KunitExecRequest(cli_args.timeout, + cli_args.build_dir, + cli_args.alltests, + cli_args.filter_glob, + cli_args.kernel_args) + exec_result = exec_tests(linux, exec_request) + parse_request = KunitParseRequest(cli_args.raw_output, + exec_result.result, + cli_args.build_dir, + cli_args.json) + result = parse_tests(parse_request) kunit_parser.print_with_timestamp(( - 'Elapsed time: %.3fs\n') % (result.elapsed_time)) + 'Elapsed time: %.3fs\n') % ( + exec_result.elapsed_time)) if result.status != KunitStatus.SUCCESS: sys.exit(1) elif cli_args.subcommand == 'parse': if cli_args.file == None: - sys.stdin.reconfigure(errors='backslashreplace') # pytype: disable=attribute-error kunit_output = sys.stdin else: - with open(cli_args.file, 'r', errors='backslashreplace') as f: + with open(cli_args.file, 'r') as f: kunit_output = f.read().splitlines() - request = KunitParseRequest(raw_output=cli_args.raw_output, - build_dir='', - json=cli_args.json) - result = parse_tests(request, kunit_output) + request = KunitParseRequest(cli_args.raw_output, + kunit_output, + None, + cli_args.json) + result = parse_tests(request) if result.status != KunitStatus.SUCCESS: sys.exit(1) else: diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index 6773545461..c77c7d2ef6 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -62,34 +62,33 @@ class Kconfig(object): for entry in self.entries(): f.write(str(entry) + '\n') -def parse_file(path: str) -> Kconfig: - with open(path, 'r') as f: - return parse_from_string(f.read()) + def parse_from_string(self, blob: str) -> None: + """Parses a string containing KconfigEntrys and populates this Kconfig.""" + self._entries = [] + is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN) + config_matcher = re.compile(CONFIG_PATTERN) + for line in blob.split('\n'): + line = line.strip() + if not line: + continue -def parse_from_string(blob: str) -> Kconfig: - """Parses a string containing Kconfig entries.""" - kconfig = Kconfig() - is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN) - config_matcher = re.compile(CONFIG_PATTERN) - for line in blob.split('\n'): - line = line.strip() - if not line: - continue + match = config_matcher.match(line) + if match: + entry = KconfigEntry(match.group(1), match.group(2)) + self.add_entry(entry) + continue - match = config_matcher.match(line) - if match: - entry = KconfigEntry(match.group(1), match.group(2)) - kconfig.add_entry(entry) - continue + empty_match = is_not_set_matcher.match(line) + if empty_match: + entry = KconfigEntry(empty_match.group(1), 'n') + self.add_entry(entry) + continue - empty_match = is_not_set_matcher.match(line) - if empty_match: - entry = KconfigEntry(empty_match.group(1), 'n') - kconfig.add_entry(entry) - continue + if line[0] == '#': + continue + else: + raise KconfigParseError('Failed to parse: ' + line) - if line[0] == '#': - continue - else: - raise KconfigParseError('Failed to parse: ' + line) - return kconfig + def read_from_file(self, path: str) -> None: + with open(path, 'r') as f: + self.parse_from_string(f.read()) diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py index 6862671709..f5cca5c38c 100644 --- a/tools/testing/kunit/kunit_json.py +++ b/tools/testing/kunit/kunit_json.py @@ -11,49 +11,47 @@ import os import kunit_parser -from kunit_parser import Test, TestStatus -from typing import Any, Dict, Optional +from kunit_parser import TestStatus -JsonObj = Dict[str, Any] +def get_json_result(test_result, def_config, build_dir, json_path) -> str: + sub_groups = [] -def _get_group_json(test: Test, def_config: str, - build_dir: Optional[str]) -> JsonObj: - sub_groups = [] # List[JsonObj] - test_cases = [] # List[JsonObj] - - for subtest in test.subtests: - if len(subtest.subtests): - sub_group = _get_group_json(subtest, def_config, - build_dir) - sub_groups.append(sub_group) - else: - test_case = {"name": subtest.name, "status": "FAIL"} - if subtest.status == TestStatus.SUCCESS: + # Each test suite is mapped to a KernelCI sub_group + for test_suite in test_result.suites: + sub_group = { + "name": test_suite.name, + "arch": "UM", + "defconfig": def_config, + "build_environment": build_dir, + "test_cases": [], + "lab_name": None, + "kernel": None, + "job": None, + "git_branch": "kselftest", + } + test_cases = [] + # TODO: Add attachments attribute in test_case with detailed + # failure message, see https://api.kernelci.org/schema-test-case.html#get + for case in test_suite.cases: + test_case = {"name": case.name, "status": "FAIL"} + if case.status == TestStatus.SUCCESS: test_case["status"] = "PASS" - elif subtest.status == TestStatus.SKIPPED: - test_case["status"] = "SKIP" - elif subtest.status == TestStatus.TEST_CRASHED: + elif case.status == TestStatus.TEST_CRASHED: test_case["status"] = "ERROR" test_cases.append(test_case) - + sub_group["test_cases"] = test_cases + sub_groups.append(sub_group) test_group = { - "name": test.name, + "name": "KUnit Test Group", "arch": "UM", "defconfig": def_config, "build_environment": build_dir, "sub_groups": sub_groups, - "test_cases": test_cases, "lab_name": None, "kernel": None, "job": None, "git_branch": "kselftest", } - return test_group - -def get_json_result(test: Test, def_config: str, - build_dir: Optional[str], json_path: str) -> str: - test_group = _get_group_json(test, def_config, build_dir) - test_group["name"] = "KUnit Test Group" json_obj = json.dumps(test_group, indent=4) if json_path != 'stdout': with open(json_path, 'w') as result_path: diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 3c4196cef3..0874e512d1 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -13,8 +13,11 @@ import subprocess import os import shutil import signal -import threading -from typing import Iterator, List, Optional, Tuple +from typing import Iterator, Optional, Tuple + +from contextlib import ExitStack + +from collections import namedtuple import kunit_config import kunit_parser @@ -22,7 +25,6 @@ import qemu_config KCONFIG_PATH = '.config' KUNITCONFIG_PATH = '.kunitconfig' -OLD_KUNITCONFIG_PATH = 'last_used_kunitconfig' DEFAULT_KUNITCONFIG_PATH = 'tools/testing/kunit/configs/default.config' BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' OUTFILE_PATH = 'test.log' @@ -102,8 +104,8 @@ class LinuxSourceTreeOperations(object): if stderr: # likely only due to build warnings print(stderr.decode()) - def start(self, params: List[str], build_dir: str) -> subprocess.Popen: - raise RuntimeError('not implemented!') + def run(self, params, timeout, build_dir, outfile) -> None: + pass class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): @@ -118,10 +120,11 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): self._extra_qemu_params = qemu_arch_params.extra_qemu_params def make_arch_qemuconfig(self, base_kunitconfig: kunit_config.Kconfig) -> None: - kconfig = kunit_config.parse_from_string(self._kconfig) + kconfig = kunit_config.Kconfig() + kconfig.parse_from_string(self._kconfig) base_kunitconfig.merge_in_entries(kconfig) - def start(self, params: List[str], build_dir: str) -> subprocess.Popen: + def run(self, params, timeout, build_dir, outfile): kernel_path = os.path.join(build_dir, self._kernel_path) qemu_command = ['qemu-system-' + self._qemu_arch, '-nodefaults', @@ -132,11 +135,18 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): '-nographic', '-serial stdio'] + self._extra_qemu_params print('Running tests with:\n$', ' '.join(qemu_command)) - return subprocess.Popen(' '.join(qemu_command), - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, shell=True, errors='backslashreplace') + with open(outfile, 'w') as output: + process = subprocess.Popen(' '.join(qemu_command), + stdin=subprocess.PIPE, + stdout=output, + stderr=subprocess.STDOUT, + text=True, shell=True) + try: + process.wait(timeout=timeout) + except Exception as e: + print(e) + process.terminate() + return process class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): """An abstraction over command line operations performed on a source tree.""" @@ -159,21 +169,24 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): process.wait() kunit_parser.print_with_timestamp( 'Disabling broken configs to run KUnit tests...') - - with open(get_kconfig_path(build_dir), 'a') as config: - with open(BROKEN_ALLCONFIG_PATH, 'r') as disable: - config.write(disable.read()) + with ExitStack() as es: + config = open(get_kconfig_path(build_dir), 'a') + disable = open(BROKEN_ALLCONFIG_PATH, 'r').read() + config.write(disable) kunit_parser.print_with_timestamp( 'Starting Kernel with all configs takes a few minutes...') - def start(self, params: List[str], build_dir: str) -> subprocess.Popen: + def run(self, params, timeout, build_dir, outfile): """Runs the Linux UML binary. Must be named 'linux'.""" linux_bin = get_file_path(build_dir, 'linux') - return subprocess.Popen([linux_bin] + params, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, errors='backslashreplace') + outfile = get_outfile_path(build_dir) + with open(outfile, 'w') as output: + process = subprocess.Popen([linux_bin] + params, + stdin=subprocess.PIPE, + stdout=output, + stderr=subprocess.STDOUT, + text=True) + process.wait(timeout) def get_kconfig_path(build_dir) -> str: return get_file_path(build_dir, KCONFIG_PATH) @@ -181,9 +194,6 @@ def get_kconfig_path(build_dir) -> str: def get_kunitconfig_path(build_dir) -> str: return get_file_path(build_dir, KUNITCONFIG_PATH) -def get_old_kunitconfig_path(build_dir) -> str: - return get_file_path(build_dir, OLD_KUNITCONFIG_PATH) - def get_outfile_path(build_dir) -> str: return get_file_path(build_dir, OUTFILE_PATH) @@ -193,9 +203,8 @@ def get_source_tree_ops(arch: str, cross_compile: Optional[str]) -> LinuxSourceT return LinuxSourceTreeOperationsUml(cross_compile=cross_compile) elif os.path.isfile(config_path): return get_source_tree_ops_from_qemu_config(config_path, cross_compile)[1] - - options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')] - raise ConfigError(arch + ' is not a valid arch, options are ' + str(sorted(options))) + else: + raise ConfigError(arch + ' is not a valid arch') def get_source_tree_ops_from_qemu_config(config_path: str, cross_compile: Optional[str]) -> Tuple[ @@ -210,17 +219,13 @@ def get_source_tree_ops_from_qemu_config(config_path: str, # exists as a file. module_path = '.' + os.path.join(os.path.basename(QEMU_CONFIGS_DIR), os.path.basename(config_path)) spec = importlib.util.spec_from_file_location(module_path, config_path) - assert spec is not None config = importlib.util.module_from_spec(spec) - # See https://github.com/python/typeshed/pull/2626 for context. - assert isinstance(spec.loader, importlib.abc.Loader) - spec.loader.exec_module(config) - - if not hasattr(config, 'QEMU_ARCH'): - raise ValueError('qemu_config module missing "QEMU_ARCH": ' + config_path) - params: qemu_config.QemuArchParams = config.QEMU_ARCH # type: ignore - return params.linux_arch, LinuxSourceTreeOperationsQemu( - params, cross_compile=cross_compile) + # TODO(brendanhiggins@google.com): I looked this up and apparently other + # Python projects have noted that pytype complains that "No attribute + # 'exec_module' on _importlib_modulespec._Loader". Disabling for now. + spec.loader.exec_module(config) # pytype: disable=attribute-error + return config.QEMU_ARCH.linux_arch, LinuxSourceTreeOperationsQemu( + config.QEMU_ARCH, cross_compile=cross_compile) class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" @@ -230,7 +235,6 @@ class LinuxSourceTree(object): build_dir: str, load_config=True, kunitconfig_path='', - kconfig_add: Optional[List[str]]=None, arch=None, cross_compile=None, qemu_config_path=None) -> None: @@ -255,11 +259,8 @@ class LinuxSourceTree(object): if not os.path.exists(kunitconfig_path): shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, kunitconfig_path) - self._kconfig = kunit_config.parse_file(kunitconfig_path) - if kconfig_add: - kconfig = kunit_config.parse_from_string('\n'.join(kconfig_add)) - self._kconfig.merge_in_entries(kconfig) - + self._kconfig = kunit_config.Kconfig() + self._kconfig.read_from_file(kunitconfig_path) def clean(self) -> bool: try: @@ -271,18 +272,17 @@ class LinuxSourceTree(object): def validate_config(self, build_dir) -> bool: kconfig_path = get_kconfig_path(build_dir) - validated_kconfig = kunit_config.parse_file(kconfig_path) - if self._kconfig.is_subset_of(validated_kconfig): - return True - invalid = self._kconfig.entries() - validated_kconfig.entries() - message = 'Not all Kconfig options selected in kunitconfig were in the generated .config.\n' \ - 'This is probably due to unsatisfied dependencies.\n' \ - 'Missing: ' + ', '.join([str(e) for e in invalid]) - if self._arch == 'um': - message += '\nNote: many Kconfig options aren\'t available on UML. You can try running ' \ - 'on a different architecture with something like "--arch=x86_64".' - logging.error(message) - return False + validated_kconfig = kunit_config.Kconfig() + validated_kconfig.read_from_file(kconfig_path) + if not self._kconfig.is_subset_of(validated_kconfig): + invalid = self._kconfig.entries() - validated_kconfig.entries() + message = 'Provided Kconfig is not contained in validated .config. Following fields found in kunitconfig, ' \ + 'but not in .config: %s' % ( + ', '.join([str(e) for e in invalid]) + ) + logging.error(message) + return False + return True def build_config(self, build_dir, make_options) -> bool: kconfig_path = get_kconfig_path(build_dir) @@ -295,38 +295,25 @@ class LinuxSourceTree(object): except ConfigError as e: logging.error(e) return False - if not self.validate_config(build_dir): - return False - - old_path = get_old_kunitconfig_path(build_dir) - if os.path.exists(old_path): - os.remove(old_path) # write_to_file appends to the file - self._kconfig.write_to_file(old_path) - return True - - def _kunitconfig_changed(self, build_dir: str) -> bool: - old_path = get_old_kunitconfig_path(build_dir) - if not os.path.exists(old_path): - return True - - old_kconfig = kunit_config.parse_file(old_path) - return old_kconfig.entries() != self._kconfig.entries() + return self.validate_config(build_dir) def build_reconfig(self, build_dir, make_options) -> bool: """Creates a new .config if it is not a subset of the .kunitconfig.""" kconfig_path = get_kconfig_path(build_dir) - if not os.path.exists(kconfig_path): + if os.path.exists(kconfig_path): + existing_kconfig = kunit_config.Kconfig() + existing_kconfig.read_from_file(kconfig_path) + self._ops.make_arch_qemuconfig(self._kconfig) + if not self._kconfig.is_subset_of(existing_kconfig): + print('Regenerating .config ...') + os.remove(kconfig_path) + return self.build_config(build_dir, make_options) + else: + return True + else: print('Generating .config ...') return self.build_config(build_dir, make_options) - existing_kconfig = kunit_config.parse_file(kconfig_path) - self._ops.make_arch_qemuconfig(self._kconfig) - if self._kconfig.is_subset_of(existing_kconfig) and not self._kunitconfig_changed(build_dir): - return True - print('Regenerating .config ...') - os.remove(kconfig_path) - return self.build_config(build_dir, make_options) - def build_kernel(self, alltests, jobs, build_dir, make_options) -> bool: try: if alltests: @@ -344,36 +331,12 @@ class LinuxSourceTree(object): args.extend(['mem=1G', 'console=tty', 'kunit_shutdown=halt']) if filter_glob: args.append('kunit.filter_glob='+filter_glob) - - process = self._ops.start(args, build_dir) - assert process.stdout is not None # tell mypy it's set - - # Enforce the timeout in a background thread. - def _wait_proc(): - try: - process.wait(timeout=timeout) - except Exception as e: - print(e) - process.terminate() - process.wait() - waiter = threading.Thread(target=_wait_proc) - waiter.start() - - output = open(get_outfile_path(build_dir), 'w') - try: - # Tee the output to the file and to our caller in real time. - for line in process.stdout: - output.write(line) + outfile = get_outfile_path(build_dir) + self._ops.run(args, timeout, build_dir, outfile) + subprocess.call(['stty', 'sane']) + with open(outfile, 'r') as file: + for line in file: yield line - # This runs even if our caller doesn't consume every line. - finally: - # Flush any leftover output to the file - output.write(process.stdout.read()) - output.close() - process.stdout.close() - - waiter.join() - subprocess.call(['stty', 'sane']) def signal_handler(self, sig, frame) -> None: logging.error('Build interruption occurred. Cleaning console.') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 05ff334761..6310a641b1 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -1,66 +1,46 @@ # SPDX-License-Identifier: GPL-2.0 # -# Parses KTAP test results from a kernel dmesg log and incrementally prints -# results with reader-friendly format. Stores and returns test results in a -# Test object. +# Parses test results from a kernel dmesg log. # # Copyright (C) 2019, Google LLC. # Author: Felix Guo # Author: Brendan Higgins -# Author: Rae Moar -from __future__ import annotations import re -import datetime +from collections import namedtuple +from datetime import datetime from enum import Enum, auto from functools import reduce from typing import Iterable, Iterator, List, Optional, Tuple -class Test(object): - """ - A class to represent a test parsed from KTAP results. All KTAP - results within a test log are stored in a main Test object as - subtests. +TestResult = namedtuple('TestResult', ['status','suites','log']) - Attributes: - status : TestStatus - status of the test - name : str - name of the test - expected_count : int - expected number of subtests (0 if single - test case and None if unknown expected number of subtests) - subtests : List[Test] - list of subtests - log : List[str] - log of KTAP lines that correspond to the test - counts : TestCounts - counts of the test statuses and errors of - subtests or of the test itself if the test is a single - test case. - """ +class TestSuite(object): def __init__(self) -> None: - """Creates Test object with default attributes.""" - self.status = TestStatus.TEST_CRASHED + self.status = TestStatus.SUCCESS self.name = '' - self.expected_count = 0 # type: Optional[int] - self.subtests = [] # type: List[Test] - self.log = [] # type: List[str] - self.counts = TestCounts() + self.cases = [] # type: List[TestCase] def __str__(self) -> str: - """Returns string representation of a Test class object.""" - return ('Test(' + str(self.status) + ', ' + self.name + - ', ' + str(self.expected_count) + ', ' + - str(self.subtests) + ', ' + str(self.log) + ', ' + - str(self.counts) + ')') + return 'TestSuite(' + str(self.status) + ',' + self.name + ',' + str(self.cases) + ')' def __repr__(self) -> str: - """Returns string representation of a Test class object.""" return str(self) - def add_error(self, error_message: str) -> None: - """Records an error that occurred while parsing this test.""" - self.counts.errors += 1 - print_error('Test ' + self.name + ': ' + error_message) +class TestCase(object): + def __init__(self) -> None: + self.status = TestStatus.SUCCESS + self.name = '' + self.log = [] # type: List[str] + + def __str__(self) -> str: + return 'TestCase(' + str(self.status) + ',' + self.name + ',' + str(self.log) + ')' + + def __repr__(self) -> str: + return str(self) class TestStatus(Enum): - """An enumeration class to represent the status of a test.""" SUCCESS = auto() FAILURE = auto() SKIPPED = auto() @@ -68,761 +48,381 @@ class TestStatus(Enum): NO_TESTS = auto() FAILURE_TO_PARSE_TESTS = auto() -class TestCounts: - """ - Tracks the counts of statuses of all test cases and any errors within - a Test. - - Attributes: - passed : int - the number of tests that have passed - failed : int - the number of tests that have failed - crashed : int - the number of tests that have crashed - skipped : int - the number of tests that have skipped - errors : int - the number of errors in the test and subtests - """ - def __init__(self): - """Creates TestCounts object with counts of all test - statuses and test errors set to 0. - """ - self.passed = 0 - self.failed = 0 - self.crashed = 0 - self.skipped = 0 - self.errors = 0 - - def __str__(self) -> str: - """Returns the string representation of a TestCounts object. - """ - return ('Passed: ' + str(self.passed) + - ', Failed: ' + str(self.failed) + - ', Crashed: ' + str(self.crashed) + - ', Skipped: ' + str(self.skipped) + - ', Errors: ' + str(self.errors)) - - def total(self) -> int: - """Returns the total number of test cases within a test - object, where a test case is a test with no subtests. - """ - return (self.passed + self.failed + self.crashed + - self.skipped) - - def add_subtest_counts(self, counts: TestCounts) -> None: - """ - Adds the counts of another TestCounts object to the current - TestCounts object. Used to add the counts of a subtest to the - parent test. - - Parameters: - counts - a different TestCounts object whose counts - will be added to the counts of the TestCounts object - """ - self.passed += counts.passed - self.failed += counts.failed - self.crashed += counts.crashed - self.skipped += counts.skipped - self.errors += counts.errors - - def get_status(self) -> TestStatus: - """Returns the aggregated status of a Test using test - counts. - """ - if self.total() == 0: - return TestStatus.NO_TESTS - elif self.crashed: - # If one of the subtests crash, the expected status - # of the Test is crashed. - return TestStatus.TEST_CRASHED - elif self.failed: - # Otherwise if one of the subtests fail, the - # expected status of the Test is failed. - return TestStatus.FAILURE - elif self.passed: - # Otherwise if one of the subtests pass, the - # expected status of the Test is passed. - return TestStatus.SUCCESS - else: - # Finally, if none of the subtests have failed, - # crashed, or passed, the expected status of the - # Test is skipped. - return TestStatus.SKIPPED - - def add_status(self, status: TestStatus) -> None: - """ - Increments count of inputted status. - - Parameters: - status - status to be added to the TestCounts object - """ - if status == TestStatus.SUCCESS: - self.passed += 1 - elif status == TestStatus.FAILURE: - self.failed += 1 - elif status == TestStatus.SKIPPED: - self.skipped += 1 - elif status != TestStatus.NO_TESTS: - self.crashed += 1 - class LineStream: - """ - A class to represent the lines of kernel output. - Provides a lazy peek()/pop() interface over an iterator of - (line#, text). - """ + """Provides a peek()/pop() interface over an iterator of (line#, text).""" _lines: Iterator[Tuple[int, str]] _next: Tuple[int, str] - _need_next: bool _done: bool def __init__(self, lines: Iterator[Tuple[int, str]]): - """Creates a new LineStream that wraps the given iterator.""" self._lines = lines self._done = False - self._need_next = True self._next = (0, '') + self._get_next() def _get_next(self) -> None: - """Advances the LineSteam to the next line, if necessary.""" - if not self._need_next: - return try: self._next = next(self._lines) except StopIteration: self._done = True - finally: - self._need_next = False def peek(self) -> str: - """Returns the current line, without advancing the LineStream. - """ - self._get_next() return self._next[1] def pop(self) -> str: - """Returns the current line and advances the LineStream to - the next line. - """ - s = self.peek() - if self._done: - raise ValueError(f'LineStream: going past EOF, last line was {s}') - self._need_next = True - return s + n = self._next + self._get_next() + return n[1] def __bool__(self) -> bool: - """Returns True if stream has more lines.""" - self._get_next() return not self._done # Only used by kunit_tool_test.py. def __iter__(self) -> Iterator[str]: - """Empties all lines stored in LineStream object into - Iterator object and returns the Iterator object. - """ while bool(self): yield self.pop() def line_number(self) -> int: - """Returns the line number of the current line.""" - self._get_next() return self._next[0] -# Parsing helper methods: - -KTAP_START = re.compile(r'KTAP version ([0-9]+)$') -TAP_START = re.compile(r'TAP version ([0-9]+)$') -KTAP_END = re.compile('(List of all partitions:|' - 'Kernel panic - not syncing: VFS:|reboot: System halted)') +kunit_start_re = re.compile(r'TAP version [0-9]+$') +kunit_end_re = re.compile('(List of all partitions:|' + 'Kernel panic - not syncing: VFS:|reboot: System halted)') def extract_tap_lines(kernel_output: Iterable[str]) -> LineStream: - """Extracts KTAP lines from the kernel output.""" - def isolate_ktap_output(kernel_output: Iterable[str]) \ - -> Iterator[Tuple[int, str]]: + def isolate_kunit_output(kernel_output: Iterable[str]) -> Iterator[Tuple[int, str]]: line_num = 0 started = False for line in kernel_output: line_num += 1 - line = line.rstrip() # remove trailing \n - if not started and KTAP_START.search(line): - # start extracting KTAP lines and set prefix - # to number of characters before version line - prefix_len = len( - line.split('KTAP version')[0]) - started = True - yield line_num, line[prefix_len:] - elif not started and TAP_START.search(line): - # start extracting KTAP lines and set prefix - # to number of characters before version line + line = line.rstrip() # line always has a trailing \n + if kunit_start_re.search(line): prefix_len = len(line.split('TAP version')[0]) started = True yield line_num, line[prefix_len:] - elif started and KTAP_END.search(line): - # stop extracting KTAP lines + elif kunit_end_re.search(line): break elif started: - # remove prefix and any indention and yield - # line with line number - line = line[prefix_len:].lstrip() - yield line_num, line - return LineStream(lines=isolate_ktap_output(kernel_output)) - -KTAP_VERSIONS = [1] -TAP_VERSIONS = [13, 14] - -def check_version(version_num: int, accepted_versions: List[int], - version_type: str, test: Test) -> None: - """ - Adds error to test object if version number is too high or too - low. - - Parameters: - version_num - The inputted version number from the parsed KTAP or TAP - header line - accepted_version - List of accepted KTAP or TAP versions - version_type - 'KTAP' or 'TAP' depending on the type of - version line. - test - Test object for current test being parsed - """ - if version_num < min(accepted_versions): - test.add_error(version_type + - ' version lower than expected!') - elif version_num > max(accepted_versions): - test.add_error( - version_type + ' version higher than expected!') - -def parse_ktap_header(lines: LineStream, test: Test) -> bool: - """ - Parses KTAP/TAP header line and checks version number. - Returns False if fails to parse KTAP/TAP header line. - - Accepted formats: - - 'KTAP version [version number]' - - 'TAP version [version number]' - - Parameters: - lines - LineStream of KTAP output to parse - test - Test object for current test being parsed - - Return: - True if successfully parsed KTAP/TAP header line - """ - ktap_match = KTAP_START.match(lines.peek()) - tap_match = TAP_START.match(lines.peek()) - if ktap_match: - version_num = int(ktap_match.group(1)) - check_version(version_num, KTAP_VERSIONS, 'KTAP', test) - elif tap_match: - version_num = int(tap_match.group(1)) - check_version(version_num, TAP_VERSIONS, 'TAP', test) - else: - return False - test.log.append(lines.pop()) - return True - -TEST_HEADER = re.compile(r'^# Subtest: (.*)$') - -def parse_test_header(lines: LineStream, test: Test) -> bool: - """ - Parses test header and stores test name in test object. - Returns False if fails to parse test header line. - - Accepted format: - - '# Subtest: [test name]' - - Parameters: - lines - LineStream of KTAP output to parse - test - Test object for current test being parsed - - Return: - True if successfully parsed test header line - """ - match = TEST_HEADER.match(lines.peek()) - if not match: - return False - test.log.append(lines.pop()) - test.name = match.group(1) - return True - -TEST_PLAN = re.compile(r'1\.\.([0-9]+)') - -def parse_test_plan(lines: LineStream, test: Test) -> bool: - """ - Parses test plan line and stores the expected number of subtests in - test object. Reports an error if expected count is 0. - Returns False and sets expected_count to None if there is no valid test - plan. - - Accepted format: - - '1..[number of subtests]' - - Parameters: - lines - LineStream of KTAP output to parse - test - Test object for current test being parsed - - Return: - True if successfully parsed test plan line - """ - match = TEST_PLAN.match(lines.peek()) - if not match: - test.expected_count = None - return False - test.log.append(lines.pop()) - expected_count = int(match.group(1)) - test.expected_count = expected_count - return True - -TEST_RESULT = re.compile(r'^(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$') - -TEST_RESULT_SKIP = re.compile(r'^(ok|not ok) ([0-9]+) (- )?(.*) # SKIP(.*)$') - -def peek_test_name_match(lines: LineStream, test: Test) -> bool: - """ - Matches current line with the format of a test result line and checks - if the name matches the name of the current test. - Returns False if fails to match format or name. - - Accepted format: - - '[ok|not ok] [test number] [-] [test name] [optional skip - directive]' - - Parameters: - lines - LineStream of KTAP output to parse - test - Test object for current test being parsed - - Return: - True if matched a test result line and the name matching the - expected test name - """ - line = lines.peek() - match = TEST_RESULT.match(line) - if not match: - return False - name = match.group(4) - return (name == test.name) - -def parse_test_result(lines: LineStream, test: Test, - expected_num: int) -> bool: - """ - Parses test result line and stores the status and name in the test - object. Reports an error if the test number does not match expected - test number. - Returns False if fails to parse test result line. - - Note that the SKIP directive is the only direction that causes a - change in status. - - Accepted format: - - '[ok|not ok] [test number] [-] [test name] [optional skip - directive]' - - Parameters: - lines - LineStream of KTAP output to parse - test - Test object for current test being parsed - expected_num - expected test number for current test - - Return: - True if successfully parsed a test result line. - """ - line = lines.peek() - match = TEST_RESULT.match(line) - skip_match = TEST_RESULT_SKIP.match(line) - - # Check if line matches test result line format - if not match: - return False - test.log.append(lines.pop()) - - # Set name of test object - if skip_match: - test.name = skip_match.group(4) - else: - test.name = match.group(4) - - # Check test num - num = int(match.group(2)) - if num != expected_num: - test.add_error('Expected test number ' + - str(expected_num) + ' but found ' + str(num)) - - # Set status of test object - status = match.group(1) - if skip_match: - test.status = TestStatus.SKIPPED - elif status == 'ok': - test.status = TestStatus.SUCCESS - else: - test.status = TestStatus.FAILURE - return True - -def parse_diagnostic(lines: LineStream) -> List[str]: - """ - Parse lines that do not match the format of a test result line or - test header line and returns them in list. - - Line formats that are not parsed: - - '# Subtest: [test name]' - - '[ok|not ok] [test number] [-] [test name] [optional skip - directive]' - - Parameters: - lines - LineStream of KTAP output to parse - - Return: - Log of diagnostic lines - """ - log = [] # type: List[str] - while lines and not TEST_RESULT.match(lines.peek()) and not \ - TEST_HEADER.match(lines.peek()): - log.append(lines.pop()) - return log - -DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^# .*?: kunit test case crashed!$') - -def parse_crash_in_log(test: Test) -> bool: - """ - Iterate through the lines of the log to parse for crash message. - If crash message found, set status to crashed and return True. - Otherwise return False. - - Parameters: - test - Test object for current test being parsed - - Return: - True if crash message found in log - """ - for line in test.log: - if DIAGNOSTIC_CRASH_MESSAGE.match(line): - test.status = TestStatus.TEST_CRASHED - return True - return False - - -# Printing helper methods: + yield line_num, line[prefix_len:] + return LineStream(lines=isolate_kunit_output(kernel_output)) DIVIDER = '=' * 60 RESET = '\033[0;0m' -def red(text: str) -> str: - """Returns inputted string with red color code.""" +def red(text) -> str: return '\033[1;31m' + text + RESET -def yellow(text: str) -> str: - """Returns inputted string with yellow color code.""" +def yellow(text) -> str: return '\033[1;33m' + text + RESET -def green(text: str) -> str: - """Returns inputted string with green color code.""" +def green(text) -> str: return '\033[1;32m' + text + RESET -ANSI_LEN = len(red('')) +def print_with_timestamp(message) -> None: + print('[%s] %s' % (datetime.now().strftime('%H:%M:%S'), message)) -def print_with_timestamp(message: str) -> None: - """Prints message with timestamp at beginning.""" - print('[%s] %s' % (datetime.datetime.now().strftime('%H:%M:%S'), message)) +def format_suite_divider(message) -> str: + return '======== ' + message + ' ========' -def format_test_divider(message: str, len_message: int) -> str: - """ - Returns string with message centered in fixed width divider. +def print_suite_divider(message) -> None: + print_with_timestamp(DIVIDER) + print_with_timestamp(format_suite_divider(message)) - Example: - '===================== message example =====================' - - Parameters: - message - message to be centered in divider line - len_message - length of the message to be printed such that - any characters of the color codes are not counted - - Return: - String containing message centered in fixed width divider - """ - default_count = 3 # default number of dashes - len_1 = default_count - len_2 = default_count - difference = len(DIVIDER) - len_message - 2 # 2 spaces added - if difference > 0: - # calculate number of dashes for each side of the divider - len_1 = int(difference / 2) - len_2 = difference - len_1 - return ('=' * len_1) + ' ' + message + ' ' + ('=' * len_2) - -def print_test_header(test: Test) -> None: - """ - Prints test header with test name and optionally the expected number - of subtests. - - Example: - '=================== example (2 subtests) ===================' - - Parameters: - test - Test object representing current test being printed - """ - message = test.name - if test.expected_count: - if test.expected_count == 1: - message += (' (' + str(test.expected_count) + - ' subtest)') - else: - message += (' (' + str(test.expected_count) + - ' subtests)') - print_with_timestamp(format_test_divider(message, len(message))) - -def print_log(log: Iterable[str]) -> None: - """ - Prints all strings in saved log for test in yellow. - - Parameters: - log - Iterable object with all strings saved in log for test - """ +def print_log(log) -> None: for m in log: - print_with_timestamp(yellow(m)) + print_with_timestamp(m) -def format_test_result(test: Test) -> str: - """ - Returns string with formatted test result with colored status and test - name. +TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*# (Subtest:|.*: kunit test case crashed!)).*$') - Example: - '[PASSED] example' +def consume_non_diagnostic(lines: LineStream) -> None: + while lines and not TAP_ENTRIES.match(lines.peek()): + lines.pop() - Parameters: - test - Test object representing current test being printed +def save_non_diagnostic(lines: LineStream, test_case: TestCase) -> None: + while lines and not TAP_ENTRIES.match(lines.peek()): + test_case.log.append(lines.peek()) + lines.pop() - Return: - String containing formatted test result - """ - if test.status == TestStatus.SUCCESS: - return (green('[PASSED] ') + test.name) - elif test.status == TestStatus.SKIPPED: - return (yellow('[SKIPPED] ') + test.name) - elif test.status == TestStatus.NO_TESTS: - return (yellow('[NO TESTS RUN] ') + test.name) - elif test.status == TestStatus.TEST_CRASHED: - print_log(test.log) - return (red('[CRASHED] ') + test.name) - else: - print_log(test.log) - return (red('[FAILED] ') + test.name) +OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text']) -def print_test_result(test: Test) -> None: - """ - Prints result line with status of test. +OK_NOT_OK_SKIP = re.compile(r'^[\s]*(ok|not ok) [0-9]+ - (.*) # SKIP(.*)$') - Example: - '[PASSED] example' +OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$') - Parameters: - test - Test object representing current test being printed - """ - print_with_timestamp(format_test_result(test)) +OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$') -def print_test_footer(test: Test) -> None: - """ - Prints test footer with status of test. - - Example: - '===================== [PASSED] example =====================' - - Parameters: - test - Test object representing current test being printed - """ - message = format_test_result(test) - print_with_timestamp(format_test_divider(message, - len(message) - ANSI_LEN)) - -def print_summary_line(test: Test) -> None: - """ - Prints summary line of test object. Color of line is dependent on - status of test. Color is green if test passes, yellow if test is - skipped, and red if the test fails or crashes. Summary line contains - counts of the statuses of the tests subtests or the test itself if it - has no subtests. - - Example: - "Testing complete. Passed: 2, Failed: 0, Crashed: 0, Skipped: 0, - Errors: 0" - - test - Test object representing current test being printed - """ - if test.status == TestStatus.SUCCESS: - color = green - elif test.status == TestStatus.SKIPPED or test.status == TestStatus.NO_TESTS: - color = yellow - else: - color = red - counts = test.counts - print_with_timestamp(color('Testing complete. ' + str(counts))) - -def print_error(error_message: str) -> None: - """ - Prints error message with error format. - - Example: - "[ERROR] Test example: missing test plan!" - - Parameters: - error_message - message describing error - """ - print_with_timestamp(red('[ERROR] ') + error_message) - -# Other methods: - -def bubble_up_test_results(test: Test) -> None: - """ - If the test has subtests, add the test counts of the subtests to the - test and check if any of the tests crashed and if so set the test - status to crashed. Otherwise if the test has no subtests add the - status of the test to the test counts. - - Parameters: - test - Test object for current test being parsed - """ - parse_crash_in_log(test) - subtests = test.subtests - counts = test.counts - status = test.status - for t in subtests: - counts.add_subtest_counts(t.counts) - if counts.total() == 0: - counts.add_status(status) - elif test.counts.get_status() == TestStatus.TEST_CRASHED: - test.status = TestStatus.TEST_CRASHED - -def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: - """ - Finds next test to parse in LineStream, creates new Test object, - parses any subtests of the test, populates Test object with all - information (status, name) about the test and the Test objects for - any subtests, and then returns the Test object. The method accepts - three formats of tests: - - Accepted test formats: - - - Main KTAP/TAP header - - Example: - - KTAP version 1 - 1..4 - [subtests] - - - Subtest header line - - Example: - - # Subtest: name - 1..3 - [subtests] - ok 1 name - - - Test result line - - Example: - - ok 1 - test - - Parameters: - lines - LineStream of KTAP output to parse - expected_num - expected test number for test to be parsed - log - list of strings containing any preceding diagnostic lines - corresponding to the current test - - Return: - Test object populated with characteristics and any subtests - """ - test = Test() - test.log.extend(log) - parent_test = False - main = parse_ktap_header(lines, test) - if main: - # If KTAP/TAP header is found, attempt to parse - # test plan - test.name = "main" - parse_test_plan(lines, test) - parent_test = True - else: - # If KTAP/TAP header is not found, test must be subtest - # header or test result line so parse attempt to parser - # subtest header - parent_test = parse_test_header(lines, test) - if parent_test: - # If subtest header is found, attempt to parse - # test plan and print header - parse_test_plan(lines, test) - print_test_header(test) - expected_count = test.expected_count - subtests = [] - test_num = 1 - while parent_test and (expected_count is None or test_num <= expected_count): - # Loop to parse any subtests. - # Break after parsing expected number of tests or - # if expected number of tests is unknown break when test - # result line with matching name to subtest header is found - # or no more lines in stream. - sub_log = parse_diagnostic(lines) - sub_test = Test() - if not lines or (peek_test_name_match(lines, test) and - not main): - if expected_count and test_num <= expected_count: - # If parser reaches end of test before - # parsing expected number of subtests, print - # crashed subtest and record error - test.add_error('missing expected subtest!') - sub_test.log.extend(sub_log) - test.counts.add_status( - TestStatus.TEST_CRASHED) - print_test_result(sub_test) - else: - test.log.extend(sub_log) - break - else: - sub_test = parse_test(lines, test_num, sub_log) - subtests.append(sub_test) - test_num += 1 - test.subtests = subtests - if not main: - # If not main test, look for test result line - test.log.extend(parse_diagnostic(lines)) - if (parent_test and peek_test_name_match(lines, test)) or \ - not parent_test: - parse_test_result(lines, test, expected_num) - else: - test.add_error('missing subtest result line!') - - # Check for there being no tests - if parent_test and len(subtests) == 0: - test.status = TestStatus.NO_TESTS - test.add_error('0 tests run!') - - # Add statuses to TestCounts attribute in Test object - bubble_up_test_results(test) - if parent_test and not main: - # If test has subtests and is not the main test object, print - # footer. - print_test_footer(test) - elif not main: - print_test_result(test) - return test - -def parse_run_tests(kernel_output: Iterable[str]) -> Test: - """ - Using kernel output, extract KTAP lines, parse the lines for test - results and print condensed test results and summary line . - - Parameters: - kernel_output - Iterable object contains lines of kernel output - - Return: - Test - the main test object with all subtests. - """ - print_with_timestamp(DIVIDER) - lines = extract_tap_lines(kernel_output) - test = Test() +def parse_ok_not_ok_test_case(lines: LineStream, test_case: TestCase) -> bool: + save_non_diagnostic(lines, test_case) if not lines: - test.add_error('invalid KTAP input!') - test.status = TestStatus.FAILURE_TO_PARSE_TESTS + test_case.status = TestStatus.TEST_CRASHED + return True + line = lines.peek() + match = OK_NOT_OK_SUBTEST.match(line) + while not match and lines: + line = lines.pop() + match = OK_NOT_OK_SUBTEST.match(line) + if match: + test_case.log.append(lines.pop()) + test_case.name = match.group(2) + skip_match = OK_NOT_OK_SKIP.match(line) + if skip_match: + test_case.status = TestStatus.SKIPPED + return True + if test_case.status == TestStatus.TEST_CRASHED: + return True + if match.group(1) == 'ok': + test_case.status = TestStatus.SUCCESS + else: + test_case.status = TestStatus.FAILURE + return True else: - test = parse_test(lines, 0, []) - if test.status != TestStatus.NO_TESTS: - test.status = test.counts.get_status() + return False + +SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# (.*)$') +DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^[\s]+# .*?: kunit test case crashed!$') + +def parse_diagnostic(lines: LineStream, test_case: TestCase) -> bool: + save_non_diagnostic(lines, test_case) + if not lines: + return False + line = lines.peek() + match = SUBTEST_DIAGNOSTIC.match(line) + if match: + test_case.log.append(lines.pop()) + crash_match = DIAGNOSTIC_CRASH_MESSAGE.match(line) + if crash_match: + test_case.status = TestStatus.TEST_CRASHED + return True + else: + return False + +def parse_test_case(lines: LineStream) -> Optional[TestCase]: + test_case = TestCase() + save_non_diagnostic(lines, test_case) + while parse_diagnostic(lines, test_case): + pass + if parse_ok_not_ok_test_case(lines, test_case): + return test_case + else: + return None + +SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$') + +def parse_subtest_header(lines: LineStream) -> Optional[str]: + consume_non_diagnostic(lines) + if not lines: + return None + match = SUBTEST_HEADER.match(lines.peek()) + if match: + lines.pop() + return match.group(1) + else: + return None + +SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)') + +def parse_subtest_plan(lines: LineStream) -> Optional[int]: + consume_non_diagnostic(lines) + match = SUBTEST_PLAN.match(lines.peek()) + if match: + lines.pop() + return int(match.group(1)) + else: + return None + +def max_status(left: TestStatus, right: TestStatus) -> TestStatus: + if left == right: + return left + elif left == TestStatus.TEST_CRASHED or right == TestStatus.TEST_CRASHED: + return TestStatus.TEST_CRASHED + elif left == TestStatus.FAILURE or right == TestStatus.FAILURE: + return TestStatus.FAILURE + elif left == TestStatus.SKIPPED: + return right + else: + return left + +def parse_ok_not_ok_test_suite(lines: LineStream, + test_suite: TestSuite, + expected_suite_index: int) -> bool: + consume_non_diagnostic(lines) + if not lines: + test_suite.status = TestStatus.TEST_CRASHED + return False + line = lines.peek() + match = OK_NOT_OK_MODULE.match(line) + if match: + lines.pop() + if match.group(1) == 'ok': + test_suite.status = TestStatus.SUCCESS + else: + test_suite.status = TestStatus.FAILURE + skip_match = OK_NOT_OK_SKIP.match(line) + if skip_match: + test_suite.status = TestStatus.SKIPPED + suite_index = int(match.group(2)) + if suite_index != expected_suite_index: + print_with_timestamp( + red('[ERROR] ') + 'expected_suite_index ' + + str(expected_suite_index) + ', but got ' + + str(suite_index)) + return True + else: + return False + +def bubble_up_errors(status_list: Iterable[TestStatus]) -> TestStatus: + return reduce(max_status, status_list, TestStatus.SKIPPED) + +def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: + max_test_case_status = bubble_up_errors(x.status for x in test_suite.cases) + return max_status(max_test_case_status, test_suite.status) + +def parse_test_suite(lines: LineStream, expected_suite_index: int) -> Optional[TestSuite]: + if not lines: + return None + consume_non_diagnostic(lines) + test_suite = TestSuite() + test_suite.status = TestStatus.SUCCESS + name = parse_subtest_header(lines) + if not name: + return None + test_suite.name = name + expected_test_case_num = parse_subtest_plan(lines) + if expected_test_case_num is None: + return None + while expected_test_case_num > 0: + test_case = parse_test_case(lines) + if not test_case: + break + test_suite.cases.append(test_case) + expected_test_case_num -= 1 + if parse_ok_not_ok_test_suite(lines, test_suite, expected_suite_index): + test_suite.status = bubble_up_test_case_errors(test_suite) + return test_suite + elif not lines: + print_with_timestamp(red('[ERROR] ') + 'ran out of lines before end token') + return test_suite + else: + print(f'failed to parse end of suite "{name}", at line {lines.line_number()}: {lines.peek()}') + return None + +TAP_HEADER = re.compile(r'^TAP version 14$') + +def parse_tap_header(lines: LineStream) -> bool: + consume_non_diagnostic(lines) + if TAP_HEADER.match(lines.peek()): + lines.pop() + return True + else: + return False + +TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)') + +def parse_test_plan(lines: LineStream) -> Optional[int]: + consume_non_diagnostic(lines) + match = TEST_PLAN.match(lines.peek()) + if match: + lines.pop() + return int(match.group(1)) + else: + return None + +def bubble_up_suite_errors(test_suites: Iterable[TestSuite]) -> TestStatus: + return bubble_up_errors(x.status for x in test_suites) + +def parse_test_result(lines: LineStream) -> TestResult: + consume_non_diagnostic(lines) + if not lines or not parse_tap_header(lines): + return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) + expected_test_suite_num = parse_test_plan(lines) + if expected_test_suite_num == 0: + return TestResult(TestStatus.NO_TESTS, [], lines) + elif expected_test_suite_num is None: + return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) + test_suites = [] + for i in range(1, expected_test_suite_num + 1): + test_suite = parse_test_suite(lines, i) + if test_suite: + test_suites.append(test_suite) + else: + print_with_timestamp( + red('[ERROR] ') + ' expected ' + + str(expected_test_suite_num) + + ' test suites, but got ' + str(i - 2)) + break + test_suite = parse_test_suite(lines, -1) + if test_suite: + print_with_timestamp(red('[ERROR] ') + + 'got unexpected test suite: ' + test_suite.name) + if test_suites: + return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines) + else: + return TestResult(TestStatus.NO_TESTS, [], lines) + +class TestCounts: + passed: int + failed: int + crashed: int + skipped: int + + def __init__(self): + self.passed = 0 + self.failed = 0 + self.crashed = 0 + self.skipped = 0 + + def total(self) -> int: + return self.passed + self.failed + self.crashed + self.skipped + +def print_and_count_results(test_result: TestResult) -> TestCounts: + counts = TestCounts() + for test_suite in test_result.suites: + if test_suite.status == TestStatus.SUCCESS: + print_suite_divider(green('[PASSED] ') + test_suite.name) + elif test_suite.status == TestStatus.SKIPPED: + print_suite_divider(yellow('[SKIPPED] ') + test_suite.name) + elif test_suite.status == TestStatus.TEST_CRASHED: + print_suite_divider(red('[CRASHED] ' + test_suite.name)) + else: + print_suite_divider(red('[FAILED] ') + test_suite.name) + for test_case in test_suite.cases: + if test_case.status == TestStatus.SUCCESS: + counts.passed += 1 + print_with_timestamp(green('[PASSED] ') + test_case.name) + elif test_case.status == TestStatus.SKIPPED: + counts.skipped += 1 + print_with_timestamp(yellow('[SKIPPED] ') + test_case.name) + elif test_case.status == TestStatus.TEST_CRASHED: + counts.crashed += 1 + print_with_timestamp(red('[CRASHED] ' + test_case.name)) + print_log(map(yellow, test_case.log)) + print_with_timestamp('') + else: + counts.failed += 1 + print_with_timestamp(red('[FAILED] ') + test_case.name) + print_log(map(yellow, test_case.log)) + print_with_timestamp('') + return counts + +def parse_run_tests(kernel_output: Iterable[str]) -> TestResult: + counts = TestCounts() + lines = extract_tap_lines(kernel_output) + test_result = parse_test_result(lines) + if test_result.status == TestStatus.NO_TESTS: + print(red('[ERROR] ') + yellow('no tests run!')) + elif test_result.status == TestStatus.FAILURE_TO_PARSE_TESTS: + print(red('[ERROR] ') + yellow('could not parse test results!')) + else: + counts = print_and_count_results(test_result) print_with_timestamp(DIVIDER) - print_summary_line(test) - return test + if test_result.status == TestStatus.SUCCESS: + fmt = green + elif test_result.status == TestStatus.SKIPPED: + fmt = yellow + else: + fmt =red + print_with_timestamp( + fmt('Testing complete. %d tests run. %d failed. %d crashed. %d skipped.' % + (counts.total(), counts.failed, counts.crashed, counts.skipped))) + return test_result diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 352369dffb..1edcc8373b 100644 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -13,10 +13,8 @@ import tempfile, shutil # Handling test_tmpdir import itertools import json -import os import signal -import subprocess -from typing import Iterable +import os import kunit_config import kunit_parser @@ -51,9 +49,10 @@ class KconfigTest(unittest.TestCase): self.assertFalse(kconfig1.is_subset_of(kconfig0)) def test_read_from_file(self): + kconfig = kunit_config.Kconfig() kconfig_path = test_data_path('test_read_from_file.kconfig') - kconfig = kunit_config.parse_file(kconfig_path) + kconfig.read_from_file(kconfig_path) expected_kconfig = kunit_config.Kconfig() expected_kconfig.add_entry( @@ -86,7 +85,8 @@ class KconfigTest(unittest.TestCase): expected_kconfig.write_to_file(kconfig_path) - actual_kconfig = kunit_config.parse_file(kconfig_path) + actual_kconfig = kunit_config.Kconfig() + actual_kconfig.read_from_file(kconfig_path) self.assertEqual(actual_kconfig.entries(), expected_kconfig.entries()) @@ -106,10 +106,10 @@ class KUnitParserTest(unittest.TestCase): with open(log_path) as file: result = kunit_parser.extract_tap_lines(file.readlines()) self.assertContains('TAP version 14', result) - self.assertContains('# Subtest: example', result) - self.assertContains('1..2', result) - self.assertContains('ok 1 - example_simple_test', result) - self.assertContains('ok 2 - example_mock_test', result) + self.assertContains(' # Subtest: example', result) + self.assertContains(' 1..2', result) + self.assertContains(' ok 1 - example_simple_test', result) + self.assertContains(' ok 2 - example_mock_test', result) self.assertContains('ok 1 - example', result) def test_output_with_prefix_isolated_correctly(self): @@ -117,28 +117,28 @@ class KUnitParserTest(unittest.TestCase): with open(log_path) as file: result = kunit_parser.extract_tap_lines(file.readlines()) self.assertContains('TAP version 14', result) - self.assertContains('# Subtest: kunit-resource-test', result) - self.assertContains('1..5', result) - self.assertContains('ok 1 - kunit_resource_test_init_resources', result) - self.assertContains('ok 2 - kunit_resource_test_alloc_resource', result) - self.assertContains('ok 3 - kunit_resource_test_destroy_resource', result) - self.assertContains('foo bar #', result) - self.assertContains('ok 4 - kunit_resource_test_cleanup_resources', result) - self.assertContains('ok 5 - kunit_resource_test_proper_free_ordering', result) + self.assertContains(' # Subtest: kunit-resource-test', result) + self.assertContains(' 1..5', result) + self.assertContains(' ok 1 - kunit_resource_test_init_resources', result) + self.assertContains(' ok 2 - kunit_resource_test_alloc_resource', result) + self.assertContains(' ok 3 - kunit_resource_test_destroy_resource', result) + self.assertContains(' foo bar #', result) + self.assertContains(' ok 4 - kunit_resource_test_cleanup_resources', result) + self.assertContains(' ok 5 - kunit_resource_test_proper_free_ordering', result) self.assertContains('ok 1 - kunit-resource-test', result) - self.assertContains('foo bar # non-kunit output', result) - self.assertContains('# Subtest: kunit-try-catch-test', result) - self.assertContains('1..2', result) - self.assertContains('ok 1 - kunit_test_try_catch_successful_try_no_catch', + self.assertContains(' foo bar # non-kunit output', result) + self.assertContains(' # Subtest: kunit-try-catch-test', result) + self.assertContains(' 1..2', result) + self.assertContains(' ok 1 - kunit_test_try_catch_successful_try_no_catch', result) - self.assertContains('ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch', + self.assertContains(' ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch', result) self.assertContains('ok 2 - kunit-try-catch-test', result) - self.assertContains('# Subtest: string-stream-test', result) - self.assertContains('1..3', result) - self.assertContains('ok 1 - string_stream_test_empty_on_creation', result) - self.assertContains('ok 2 - string_stream_test_not_empty_after_add', result) - self.assertContains('ok 3 - string_stream_test_get_string', result) + self.assertContains(' # Subtest: string-stream-test', result) + self.assertContains(' 1..3', result) + self.assertContains(' ok 1 - string_stream_test_empty_on_creation', result) + self.assertContains(' ok 2 - string_stream_test_not_empty_after_add', result) + self.assertContains(' ok 3 - string_stream_test_get_string', result) self.assertContains('ok 3 - string-stream-test', result) def test_parse_successful_test_log(self): @@ -149,22 +149,6 @@ class KUnitParserTest(unittest.TestCase): kunit_parser.TestStatus.SUCCESS, result.status) - def test_parse_successful_nested_tests_log(self): - all_passed_log = test_data_path('test_is_test_passed-all_passed_nested.log') - with open(all_passed_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - - def test_kselftest_nested(self): - kselftest_log = test_data_path('test_is_test_passed-kselftest.log') - with open(kselftest_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - def test_parse_failed_test_log(self): failed_log = test_data_path('test_is_test_passed-failure.log') with open(failed_log) as file: @@ -178,101 +162,58 @@ class KUnitParserTest(unittest.TestCase): with open(empty_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - self.assertEqual(0, len(result.subtests)) + self.assertEqual(0, len(result.suites)) self.assertEqual( kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, result.status) - def test_missing_test_plan(self): - missing_plan_log = test_data_path('test_is_test_passed-' - 'missing_plan.log') - with open(missing_plan_log) as file: - result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines( - file.readlines())) - # A missing test plan is not an error. - self.assertEqual(0, result.counts.errors) - # All tests should be accounted for. - self.assertEqual(10, result.counts.total()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - def test_no_tests(self): - header_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') - with open(header_log) as file: + empty_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') + with open(empty_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - self.assertEqual(0, len(result.subtests)) + self.assertEqual(0, len(result.suites)) self.assertEqual( kunit_parser.TestStatus.NO_TESTS, result.status) - def test_no_tests_no_plan(self): - no_plan_log = test_data_path('test_is_test_passed-no_tests_no_plan.log') - with open(no_plan_log) as file: - result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) - self.assertEqual(0, len(result.subtests[0].subtests[0].subtests)) - self.assertEqual( - kunit_parser.TestStatus.NO_TESTS, - result.subtests[0].subtests[0].status) - self.assertEqual(1, result.counts.errors) - - def test_no_kunit_output(self): crash_log = test_data_path('test_insufficient_memory.log') print_mock = mock.patch('builtins.print').start() with open(crash_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - print_mock.assert_any_call(StrContains('invalid KTAP input!')) + print_mock.assert_any_call(StrContains('could not parse test results!')) print_mock.stop() - self.assertEqual(0, len(result.subtests)) + file.close() def test_crashed_test(self): crashed_log = test_data_path('test_is_test_passed-crash.log') with open(crashed_log) as file: - result = kunit_parser.parse_run_tests( - file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( kunit_parser.TestStatus.TEST_CRASHED, result.status) def test_skipped_test(self): skipped_log = test_data_path('test_skip_tests.log') - with open(skipped_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) - - # A skipped test does not fail the whole suite. - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - - def test_skipped_all_tests(self): - skipped_log = test_data_path('test_skip_all_tests.log') - with open(skipped_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) - - self.assertEqual( - kunit_parser.TestStatus.SKIPPED, - result.status) - - def test_ignores_hyphen(self): - hyphen_log = test_data_path('test_strip_hyphen.log') - file = open(hyphen_log) + file = open(skipped_log) result = kunit_parser.parse_run_tests(file.readlines()) # A skipped test does not fail the whole suite. self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) + file.close() + + def test_skipped_all_tests(self): + skipped_log = test_data_path('test_skip_all_tests.log') + file = open(skipped_log) + result = kunit_parser.parse_run_tests(file.readlines()) + self.assertEqual( - "sysctl_test", - result.subtests[0].name) - self.assertEqual( - "example", - result.subtests[1].name) + kunit_parser.TestStatus.SKIPPED, + result.status) file.close() @@ -283,7 +224,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_ignores_multiple_prefixes(self): prefix_log = test_data_path('test_multiple_prefixes.log') @@ -292,7 +233,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_mixed_kernel_output(self): mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') @@ -301,7 +242,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_poundsign(self): pound_log = test_data_path('test_pound_sign.log') @@ -310,7 +251,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_kernel_panic_end(self): panic_log = test_data_path('test_kernel_panic_interrupt.log') @@ -319,7 +260,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.TEST_CRASHED, result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_pound_no_prefix(self): pound_log = test_data_path('test_pound_no_prefix.log') @@ -328,46 +269,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) - -def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream: - return kunit_parser.LineStream(enumerate(strs, start=1)) - -class LineStreamTest(unittest.TestCase): - - def test_basic(self): - stream = line_stream_from_strs(['hello', 'world']) - - self.assertTrue(stream, msg='Should be more input') - self.assertEqual(stream.line_number(), 1) - self.assertEqual(stream.peek(), 'hello') - self.assertEqual(stream.pop(), 'hello') - - self.assertTrue(stream, msg='Should be more input') - self.assertEqual(stream.line_number(), 2) - self.assertEqual(stream.peek(), 'world') - self.assertEqual(stream.pop(), 'world') - - self.assertFalse(stream, msg='Should be no more input') - with self.assertRaisesRegex(ValueError, 'LineStream: going past EOF'): - stream.pop() - - def test_is_lazy(self): - called_times = 0 - def generator(): - nonlocal called_times - for i in range(1,5): - called_times += 1 - yield called_times, str(called_times) - - stream = kunit_parser.LineStream(generator()) - self.assertEqual(called_times, 0) - - self.assertEqual(stream.pop(), '1') - self.assertEqual(called_times, 1) - - self.assertEqual(stream.pop(), '2') - self.assertEqual(called_times, 2) + self.assertEqual('kunit-resource-test', result.suites[0].name) class LinuxSourceTreeTest(unittest.TestCase): @@ -381,82 +283,13 @@ class LinuxSourceTreeTest(unittest.TestCase): def test_valid_kunitconfig(self): with tempfile.NamedTemporaryFile('wt') as kunitconfig: - kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name) + tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name) def test_dir_kunitconfig(self): with tempfile.TemporaryDirectory('') as dir: - with open(os.path.join(dir, '.kunitconfig'), 'w'): + with open(os.path.join(dir, '.kunitconfig'), 'w') as f: pass - kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir) - - def test_kconfig_add(self): - tree = kunit_kernel.LinuxSourceTree('', kconfig_add=['CONFIG_NOT_REAL=y']) - self.assertIn(kunit_config.KconfigEntry('NOT_REAL', 'y'), tree._kconfig.entries()) - - def test_invalid_arch(self): - with self.assertRaisesRegex(kunit_kernel.ConfigError, 'not a valid arch, options are.*x86_64'): - kunit_kernel.LinuxSourceTree('', arch='invalid') - - def test_run_kernel_hits_exception(self): - def fake_start(unused_args, unused_build_dir): - return subprocess.Popen(['echo "hi\nbye"'], shell=True, text=True, stdout=subprocess.PIPE) - - with tempfile.TemporaryDirectory('') as build_dir: - tree = kunit_kernel.LinuxSourceTree(build_dir, load_config=False) - mock.patch.object(tree._ops, 'start', side_effect=fake_start).start() - - with self.assertRaises(ValueError): - for line in tree.run_kernel(build_dir=build_dir): - self.assertEqual(line, 'hi\n') - raise ValueError('uh oh, did not read all output') - - with open(kunit_kernel.get_outfile_path(build_dir), 'rt') as outfile: - self.assertEqual(outfile.read(), 'hi\nbye\n', msg='Missing some output') - - def test_build_reconfig_no_config(self): - with tempfile.TemporaryDirectory('') as build_dir: - with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f: - f.write('CONFIG_KUNIT=y') - - tree = kunit_kernel.LinuxSourceTree(build_dir) - mock_build_config = mock.patch.object(tree, 'build_config').start() - - # Should generate the .config - self.assertTrue(tree.build_reconfig(build_dir, make_options=[])) - mock_build_config.assert_called_once_with(build_dir, []) - - def test_build_reconfig_existing_config(self): - with tempfile.TemporaryDirectory('') as build_dir: - # Existing .config is a superset, should not touch it - with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f: - f.write('CONFIG_KUNIT=y') - with open(kunit_kernel.get_old_kunitconfig_path(build_dir), 'w') as f: - f.write('CONFIG_KUNIT=y') - with open(kunit_kernel.get_kconfig_path(build_dir), 'w') as f: - f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y') - - tree = kunit_kernel.LinuxSourceTree(build_dir) - mock_build_config = mock.patch.object(tree, 'build_config').start() - - self.assertTrue(tree.build_reconfig(build_dir, make_options=[])) - self.assertEqual(mock_build_config.call_count, 0) - - def test_build_reconfig_remove_option(self): - with tempfile.TemporaryDirectory('') as build_dir: - # We removed CONFIG_KUNIT_TEST=y from our .kunitconfig... - with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f: - f.write('CONFIG_KUNIT=y') - with open(kunit_kernel.get_old_kunitconfig_path(build_dir), 'w') as f: - f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y') - with open(kunit_kernel.get_kconfig_path(build_dir), 'w') as f: - f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y') - - tree = kunit_kernel.LinuxSourceTree(build_dir) - mock_build_config = mock.patch.object(tree, 'build_config').start() - - # ... so we should trigger a call to build_config() - self.assertTrue(tree.build_reconfig(build_dir, make_options=[])) - mock_build_config.assert_called_once_with(build_dir, []) + tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir) # TODO: add more test cases. @@ -467,7 +300,7 @@ class KUnitJsonTest(unittest.TestCase): with open(test_data_path(log_file)) as file: test_result = kunit_parser.parse_run_tests(file) json_obj = kunit_json.get_json_result( - test=test_result, + test_result=test_result, def_config='kunit_defconfig', build_dir=None, json_path='stdout') @@ -485,22 +318,10 @@ class KUnitJsonTest(unittest.TestCase): {'name': 'example_simple_test', 'status': 'ERROR'}, result["sub_groups"][1]["test_cases"][0]) - def test_skipped_test_json(self): - result = self._json_for('test_skip_tests.log') - self.assertEqual( - {'name': 'example_skip_test', 'status': 'SKIP'}, - result["sub_groups"][1]["test_cases"][1]) - def test_no_tests_json(self): result = self._json_for('test_is_test_passed-no_tests_run_with_header.log') self.assertEqual(0, len(result['sub_groups'])) - def test_nested_json(self): - result = self._json_for('test_is_test_passed-all_passed_nested.log') - self.assertEqual( - {'name': 'example_simple_test', 'status': 'PASS'}, - result["sub_groups"][0]["sub_groups"][0]["test_cases"][0]) - class StrContains(str): def __eq__(self, other): return self in other @@ -526,8 +347,8 @@ class KUnitMainTest(unittest.TestCase): def test_build_passes_args_pass(self): kunit.main(['build'], self.linux_source_mock) - self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) - self.linux_source_mock.build_kernel.assert_called_once_with(False, kunit.get_default_jobs(), '.kunit', None) + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0) + self.linux_source_mock.build_kernel.assert_called_once_with(False, 8, '.kunit', None) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0) def test_exec_passes_args_pass(self): @@ -559,15 +380,7 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(e.exception.code, 1) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) - self.print_mock.assert_any_call(StrContains('invalid KTAP input!')) - - def test_exec_no_tests(self): - self.linux_source_mock.run_kernel = mock.Mock(return_value=['TAP version 14', '1..0']) - with self.assertRaises(SystemExit) as e: - kunit.main(['run'], self.linux_source_mock) - self.linux_source_mock.run_kernel.assert_called_once_with( - args=None, build_dir='.kunit', filter_glob='', timeout=300) - self.print_mock.assert_any_call(StrContains(' 0 tests run!')) + self.print_mock.assert_any_call(StrContains(' 0 tests run')) def test_exec_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) @@ -575,7 +388,7 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) for call in self.print_mock.call_args_list: self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) - self.assertNotEqual(call, mock.call(StrContains(' 0 tests run!'))) + self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) def test_run_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) @@ -584,7 +397,7 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) for call in self.print_mock.call_args_list: self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) - self.assertNotEqual(call, mock.call(StrContains(' 0 tests run!'))) + self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) def test_run_raw_output_kunit(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) @@ -633,9 +446,8 @@ class KUnitMainTest(unittest.TestCase): def test_build_builddir(self): build_dir = '.kunit' - jobs = kunit.get_default_jobs() kunit.main(['build', '--build_dir', build_dir], self.linux_source_mock) - self.linux_source_mock.build_kernel.assert_called_once_with(False, jobs, build_dir, None) + self.linux_source_mock.build_kernel.assert_called_once_with(False, 8, build_dir, None) def test_exec_builddir(self): build_dir = '.kunit' @@ -651,7 +463,6 @@ class KUnitMainTest(unittest.TestCase): # Just verify that we parsed and initialized it correctly here. mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig', - kconfig_add=None, arch='um', cross_compile=None, qemu_config_path=None) @@ -663,19 +474,6 @@ class KUnitMainTest(unittest.TestCase): # Just verify that we parsed and initialized it correctly here. mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig', - kconfig_add=None, - arch='um', - cross_compile=None, - qemu_config_path=None) - - @mock.patch.object(kunit_kernel, 'LinuxSourceTree') - def test_run_kconfig_add(self, mock_linux_init): - mock_linux_init.return_value = self.linux_source_mock - kunit.main(['run', '--kconfig_add=CONFIG_KASAN=y', '--kconfig_add=CONFIG_KCSAN=y']) - # Just verify that we parsed and initialized it correctly here. - mock_linux_init.assert_called_once_with('.kunit', - kunitconfig_path=None, - kconfig_add=['CONFIG_KASAN=y', 'CONFIG_KCSAN=y'], arch='um', cross_compile=None, qemu_config_path=None) @@ -687,46 +485,6 @@ class KUnitMainTest(unittest.TestCase): args=['a=1','b=2'], build_dir='.kunit', filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) - def test_list_tests(self): - want = ['suite.test1', 'suite.test2', 'suite2.test1'] - self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want - - got = kunit._list_tests(self.linux_source_mock, - kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*', None, 'suite')) - - self.assertEqual(got, want) - # Should respect the user's filter glob when listing tests. - self.linux_source_mock.run_kernel.assert_called_once_with( - args=['kunit.action=list'], build_dir='.kunit', filter_glob='suite*', timeout=300) - - - @mock.patch.object(kunit, '_list_tests') - def test_run_isolated_by_suite(self, mock_tests): - mock_tests.return_value = ['suite.test1', 'suite.test2', 'suite2.test1'] - kunit.main(['exec', '--run_isolated=suite', 'suite*.test*'], self.linux_source_mock) - - # Should respect the user's filter glob when listing tests. - mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*.test*', None, 'suite')) - self.linux_source_mock.run_kernel.assert_has_calls([ - mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', timeout=300), - mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', timeout=300), - ]) - - @mock.patch.object(kunit, '_list_tests') - def test_run_isolated_by_test(self, mock_tests): - mock_tests.return_value = ['suite.test1', 'suite.test2', 'suite2.test1'] - kunit.main(['exec', '--run_isolated=test', 'suite*'], self.linux_source_mock) - - # Should respect the user's filter glob when listing tests. - mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*', None, 'test')) - self.linux_source_mock.run_kernel.assert_has_calls([ - mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', timeout=300), - mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', timeout=300), - mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test1', timeout=300), - ]) - if __name__ == '__main__': unittest.main() diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index c57d9e9d44..47f9cc9dcd 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -35,6 +35,8 @@ obj-$(CONFIG_DAX) += dax.o endif obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o +obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o +obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o nfit-y := $(ACPI_SRC)/core.o nfit-y += $(ACPI_SRC)/intel.o @@ -65,8 +67,12 @@ device_dax-y += dax-dev.o device_dax-y += device_dax_test.o device_dax-y += config_check.o -dax_pmem-y := $(DAX_SRC)/pmem.o +dax_pmem-y := $(DAX_SRC)/pmem/pmem.o dax_pmem-y += dax_pmem_test.o +dax_pmem_core-y := $(DAX_SRC)/pmem/core.o +dax_pmem_core-y += dax_pmem_core_test.o +dax_pmem_compat-y := $(DAX_SRC)/pmem/compat.o +dax_pmem_compat-y += dax_pmem_compat_test.o dax_pmem-y += config_check.o libnvdimm-y := $(NVDIMM_SRC)/core.o diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index b752ce47ea..ed563bdd88 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -100,17 +100,25 @@ static void nfit_test_kill(void *_pgmap) { struct dev_pagemap *pgmap = _pgmap; - WARN_ON(!pgmap); + WARN_ON(!pgmap || !pgmap->ref); - percpu_ref_kill(&pgmap->ref); + if (pgmap->ops && pgmap->ops->kill) + pgmap->ops->kill(pgmap); + else + percpu_ref_kill(pgmap->ref); - wait_for_completion(&pgmap->done); - percpu_ref_exit(&pgmap->ref); + if (pgmap->ops && pgmap->ops->cleanup) { + pgmap->ops->cleanup(pgmap); + } else { + wait_for_completion(&pgmap->done); + percpu_ref_exit(pgmap->ref); + } } static void dev_pagemap_percpu_release(struct percpu_ref *ref) { - struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref); + struct dev_pagemap *pgmap = + container_of(ref, struct dev_pagemap, internal_ref); complete(&pgmap->done); } @@ -124,11 +132,22 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) if (!nfit_res) return devm_memremap_pages(dev, pgmap); - init_completion(&pgmap->done); - error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0, - GFP_KERNEL); - if (error) - return ERR_PTR(error); + if (!pgmap->ref) { + if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup)) + return ERR_PTR(-EINVAL); + + init_completion(&pgmap->done); + error = percpu_ref_init(&pgmap->internal_ref, + dev_pagemap_percpu_release, 0, GFP_KERNEL); + if (error) + return ERR_PTR(error); + pgmap->ref = &pgmap->internal_ref; + } else { + if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) { + WARN(1, "Missing reference count teardown definition\n"); + return ERR_PTR(-EINVAL); + } + } error = devm_add_action_or_reset(dev, nfit_test_kill, pgmap); if (error) diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index 3ca7c32e93..6862915f1f 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -1054,6 +1054,10 @@ static __init int ndtest_init(void) libnvdimm_test(); device_dax_test(); dax_pmem_test(); + dax_pmem_core_test(); +#ifdef CONFIG_DEV_DAX_PMEM_COMPAT + dax_pmem_compat_test(); +#endif nfit_test_setup(ndtest_resource_lookup, NULL); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 0bc91ffee2..b1bff5fb0f 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -3300,6 +3300,10 @@ static __init int nfit_test_init(void) acpi_nfit_test(); device_dax_test(); dax_pmem_test(); + dax_pmem_core_test(); +#ifdef CONFIG_DEV_DAX_PMEM_COMPAT + dax_pmem_compat_test(); +#endif nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h index 016cff473c..565fccdfe6 100644 --- a/tools/testing/radix-tree/linux/lockdep.h +++ b/tools/testing/radix-tree/linux/lockdep.h @@ -1,8 +1,5 @@ #ifndef _LINUX_LOCKDEP_H #define _LINUX_LOCKDEP_H - -#include - struct lock_class_key { unsigned int a; }; diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 5bd9e6e806..16ec895bbe 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -74,7 +74,7 @@ static inline unsigned long page_to_phys(struct page *page) __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ x, y) -#define pagefault_disabled() (0) +#define preemptible() (1) static inline void *kmap(struct page *page) { @@ -127,7 +127,6 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags) #define kmemleak_free(a) #define PageSlab(p) (0) -#define flush_dcache_page(p) #define MAX_ERRNO 4095 diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index d08fe4cfe8..c852eb40c4 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TARGETS += alsa -TARGETS += arm64 +TARGETS = arm64 TARGETS += bpf TARGETS += breakpoints TARGETS += capabilities diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 1e8d9a8f59..ced910fb40 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi +ARM64_SUBTARGETS ?= tags signal pauth fp mte bti else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore index c50d86331e..b67395903b 100644 --- a/tools/testing/selftests/arm64/fp/.gitignore +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -1,4 +1,3 @@ -fp-pidbench fpsimd-test rdvl-sve sve-probe-vls diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index 95f0b877a0..f2abdd6ba1 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -2,21 +2,19 @@ CFLAGS += -I../../../../../usr/include/ TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg -TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \ +TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \ rdvl-sve \ sve-test sve-stress \ vlset all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) -fp-pidbench: fp-pidbench.S asm-utils.o - $(CC) -nostdlib $^ -o $@ -fpsimd-test: fpsimd-test.o asm-utils.o +fpsimd-test: fpsimd-test.o $(CC) -nostdlib $^ -o $@ rdvl-sve: rdvl-sve.o rdvl.o -sve-ptrace: sve-ptrace.o +sve-ptrace: sve-ptrace.o sve-ptrace-asm.o sve-probe-vls: sve-probe-vls.o rdvl.o -sve-test: sve-test.o asm-utils.o +sve-test: sve-test.o $(CC) -nostdlib $^ -o $@ vec-syscfg: vec-syscfg.o rdvl.o vlset: vlset.o diff --git a/tools/testing/selftests/arm64/fp/TODO b/tools/testing/selftests/arm64/fp/TODO index 44004e53da..b6b7ebfcf3 100644 --- a/tools/testing/selftests/arm64/fp/TODO +++ b/tools/testing/selftests/arm64/fp/TODO @@ -1,7 +1,4 @@ - Test unsupported values in the ABIs. -- More coverage for ptrace: - - Get/set of FFR. - - Ensure ptraced processes actually see the register state visible through - the ptrace interface. - - Big endian. -- Test PR_SVE_VL_INHERIT after a double fork. +- More coverage for ptrace (eg, vector length conversions). +- Coverage for signals. +- Test PR_SVE_VL_INHERITY after a double fork. diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h index 90bd433d26..8944f21892 100644 --- a/tools/testing/selftests/arm64/fp/assembler.h +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -54,15 +54,4 @@ endfunction .purgem \name\()_entry .endm -// Utility macro to print a literal string -// Clobbers x0-x4,x8 -.macro puts string - .pushsection .rodata.str1.1, "aMS", 1 -.L__puts_literal\@: .string "\string" - .popsection - - ldr x0, =.L__puts_literal\@ - bl puts -.endm - #endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index e21e8ea52c..0dbd594c27 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -33,6 +33,131 @@ define_accessor setv, NVR, _vldr define_accessor getv, NVR, _vstr +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + // Declare some storate space to shadow the SVE register contents: .pushsection .text .data @@ -43,6 +168,18 @@ scratch: .space MAXVL_B .popsection +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + // Generate a test pattern for storage in SVE registers // x0: pid (16 bits) // x1: register number (6 bits) @@ -90,6 +227,33 @@ function setup_vreg ret x4 endfunction +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + // Trivial memory compare: compare x2 bytes starting at address x0 with // bytes starting at address x1. // Returns only if all bytes match; otherwise, the program is aborted. diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index a3c1e67441..612d389961 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -1,17 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2015-2021 ARM Limited. + * Copyright (C) 2015-2020 ARM Limited. * Original author: Dave Martin */ #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -21,43 +19,40 @@ #include "../../kselftest.h" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) - /* and don't like each other, so: */ #ifndef NT_ARM_SVE #define NT_ARM_SVE 0x405 #endif -struct vec_type { - const char *name; - unsigned long hwcap_type; - unsigned long hwcap; - int regset; - int prctl_set; -}; +/* Number of registers filled in by sve_store_patterns */ +#define NR_VREGS 5 -static const struct vec_type vec_types[] = { - { - .name = "SVE", - .hwcap_type = AT_HWCAP, - .hwcap = HWCAP_SVE, - .regset = NT_ARM_SVE, - .prctl_set = PR_SVE_SET_VL, - }, -}; +void sve_store_patterns(__uint128_t v[NR_VREGS]); -#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3) -#define FLAG_TESTS 2 -#define FPSIMD_TESTS 3 +static void dump(const void *buf, size_t size) +{ + size_t i; + const unsigned char *p = buf; -#define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types)) + for (i = 0; i < size; ++i) + printf(" %.2x", *p++); +} -static void fill_buf(char *buf, size_t size) +static int check_vregs(const __uint128_t vregs[NR_VREGS]) { int i; + int ok = 1; - for (i = 0; i < size; i++) - buf[i] = random(); + for (i = 0; i < NR_VREGS; ++i) { + printf("# v[%d]:", i); + dump(&vregs[i], sizeof vregs[i]); + putchar('\n'); + + if (vregs[i] != vregs[0]) + ok = 0; + } + + return ok; } static int do_child(void) @@ -71,17 +66,7 @@ static int do_child(void) return EXIT_SUCCESS; } -static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) -{ - struct iovec iov; - - iov.iov_base = fpsimd; - iov.iov_len = sizeof(*fpsimd); - return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov); -} - -static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, - void **buf, size_t *size) +static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size) { struct user_sve_header *sve; void *p; @@ -102,7 +87,7 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, iov.iov_base = *buf; iov.iov_len = sz; - if (ptrace(PTRACE_GETREGSET, pid, type->regset, &iov)) + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov)) goto error; sve = *buf; @@ -118,376 +103,49 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, return NULL; } -static int set_sve(pid_t pid, const struct vec_type *type, - const struct user_sve_header *sve) +static int set_sve(pid_t pid, const struct user_sve_header *sve) { struct iovec iov; iov.iov_base = (void *)sve; iov.iov_len = sve->size; - return ptrace(PTRACE_SETREGSET, pid, type->regset, &iov); + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov); } -/* Validate setting and getting the inherit flag */ -static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type) +static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num, + unsigned int vlmax) { - struct user_sve_header sve; - struct user_sve_header *new_sve = NULL; - size_t new_sve_size = 0; - int ret; + unsigned int vq; + unsigned int i; - /* First set the flag */ - memset(&sve, 0, sizeof(sve)); - sve.size = sizeof(sve); - sve.vl = sve_vl_from_vq(SVE_VQ_MIN); - sve.flags = SVE_PT_VL_INHERIT; - ret = set_sve(child, type, &sve); - if (ret != 0) { - ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n", - type->name); - return; + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + ksft_exit_fail_msg("Dumping non-SVE register\n"); + + if (vlmax > sve->vl) + vlmax = sve->vl; + + vq = sve_vq_from_vl(sve->vl); + for (i = 0; i < num; ++i) { + printf("# z%u:", i); + dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i), + vlmax); + printf("%s\n", vlmax == sve->vl ? "" : " ..."); } - - /* - * Read back the new register state and verify that we have - * set the flags we expected. - */ - if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) { - ksft_test_result_fail("Failed to read %s SVE flags\n", - type->name); - return; - } - - ksft_test_result(new_sve->flags & SVE_PT_VL_INHERIT, - "%s SVE_PT_VL_INHERIT set\n", type->name); - - /* Now clear */ - sve.flags &= ~SVE_PT_VL_INHERIT; - ret = set_sve(child, type, &sve); - if (ret != 0) { - ksft_test_result_fail("Failed to clear %s SVE_PT_VL_INHERIT\n", - type->name); - return; - } - - if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) { - ksft_test_result_fail("Failed to read %s SVE flags\n", - type->name); - return; - } - - ksft_test_result(!(new_sve->flags & SVE_PT_VL_INHERIT), - "%s SVE_PT_VL_INHERIT cleared\n", type->name); - - free(new_sve); -} - -/* Validate attempting to set the specfied VL via ptrace */ -static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, - unsigned int vl, bool *supported) -{ - struct user_sve_header sve; - struct user_sve_header *new_sve = NULL; - size_t new_sve_size = 0; - int ret, prctl_vl; - - *supported = false; - - /* Check if the VL is supported in this process */ - prctl_vl = prctl(type->prctl_set, vl); - if (prctl_vl == -1) - ksft_exit_fail_msg("prctl(PR_%s_SET_VL) failed: %s (%d)\n", - type->name, strerror(errno), errno); - - /* If the VL is not supported then a supported VL will be returned */ - *supported = (prctl_vl == vl); - - /* Set the VL by doing a set with no register payload */ - memset(&sve, 0, sizeof(sve)); - sve.size = sizeof(sve); - sve.vl = vl; - ret = set_sve(child, type, &sve); - if (ret != 0) { - ksft_test_result_fail("Failed to set %s VL %u\n", - type->name, vl); - return; - } - - /* - * Read back the new register state and verify that we have the - * same VL that we got from prctl() on ourselves. - */ - if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) { - ksft_test_result_fail("Failed to read %s VL %u\n", - type->name, vl); - return; - } - - ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n", - type->name, vl); - - free(new_sve); -} - -static void check_u32(unsigned int vl, const char *reg, - uint32_t *in, uint32_t *out, int *errors) -{ - if (*in != *out) { - printf("# VL %d %s wrote %x read %x\n", - vl, reg, *in, *out); - (*errors)++; - } -} - -/* Access the FPSIMD registers via the SVE regset */ -static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) -{ - void *svebuf = NULL; - size_t svebufsz = 0; - struct user_sve_header *sve; - struct user_fpsimd_state *fpsimd, new_fpsimd; - unsigned int i, j; - unsigned char *p; - - /* New process should start with FPSIMD registers only */ - sve = get_sve(child, type, &svebuf, &svebufsz); - if (!sve) { - ksft_test_result_fail("get_sve(%s): %s\n", - type->name, strerror(errno)); - - return; - } else { - ksft_test_result_pass("get_sve(%s FPSIMD)\n", type->name); - } - - ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, - "Got FPSIMD registers via %s\n", type->name); - if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) - goto out; - - /* Try to set a known FPSIMD state via PT_REGS_SVE */ - fpsimd = (struct user_fpsimd_state *)((char *)sve + - SVE_PT_FPSIMD_OFFSET); - for (i = 0; i < 32; ++i) { - p = (unsigned char *)&fpsimd->vregs[i]; - - for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j) - p[j] = j; - } - - if (set_sve(child, type, sve)) { - ksft_test_result_fail("set_sve(%s FPSIMD): %s\n", - type->name, strerror(errno)); - - goto out; - } - - /* Verify via the FPSIMD regset */ - if (get_fpsimd(child, &new_fpsimd)) { - ksft_test_result_fail("get_fpsimd(): %s\n", - strerror(errno)); - goto out; - } - if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0) - ksft_test_result_pass("%s get_fpsimd() gave same state\n", - type->name); - else - ksft_test_result_fail("%s get_fpsimd() gave different state\n", - type->name); - -out: - free(svebuf); -} - -/* Validate attempting to set SVE data and read SVE data */ -static void ptrace_set_sve_get_sve_data(pid_t child, - const struct vec_type *type, - unsigned int vl) -{ - void *write_buf; - void *read_buf = NULL; - struct user_sve_header *write_sve; - struct user_sve_header *read_sve; - size_t read_sve_size = 0; - unsigned int vq = sve_vq_from_vl(vl); - int ret, i; - size_t data_size; - int errors = 0; - - data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); - write_buf = malloc(data_size); - if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", - data_size, type->name, vl); - return; - } - write_sve = write_buf; - - /* Set up some data and write it out */ - memset(write_sve, 0, data_size); - write_sve->size = data_size; - write_sve->vl = vl; - write_sve->flags = SVE_PT_REGS_SVE; - - for (i = 0; i < __SVE_NUM_ZREGS; i++) - fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), - SVE_PT_SVE_ZREG_SIZE(vq)); - - for (i = 0; i < __SVE_NUM_PREGS; i++) - fill_buf(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), - SVE_PT_SVE_PREG_SIZE(vq)); - - fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE); - fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE); - - /* TODO: Generate a valid FFR pattern */ - - ret = set_sve(child, type, write_sve); - if (ret != 0) { - ksft_test_result_fail("Failed to set %s VL %u data\n", - type->name, vl); - goto out; - } - - /* Read the data back */ - if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) { - ksft_test_result_fail("Failed to read %s VL %u data\n", - type->name, vl); - goto out; - } - read_sve = read_buf; - - /* We might read more data if there's extensions we don't know */ - if (read_sve->size < write_sve->size) { - ksft_test_result_fail("%s wrote %d bytes, only read %d\n", - type->name, write_sve->size, - read_sve->size); - goto out_read; - } - - for (i = 0; i < __SVE_NUM_ZREGS; i++) { - if (memcmp(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), - read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), - SVE_PT_SVE_ZREG_SIZE(vq)) != 0) { - printf("# Mismatch in %u Z%d\n", vl, i); - errors++; - } - } - - for (i = 0; i < __SVE_NUM_PREGS; i++) { - if (memcmp(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), - read_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), - SVE_PT_SVE_PREG_SIZE(vq)) != 0) { - printf("# Mismatch in %u P%d\n", vl, i); - errors++; - } - } - - check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), - read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors); - check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), - read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors); - - ksft_test_result(errors == 0, "Set and get %s data for VL %u\n", - type->name, vl); - -out_read: - free(read_buf); -out: - free(write_buf); -} - -/* Validate attempting to set SVE data and read SVE data */ -static void ptrace_set_sve_get_fpsimd_data(pid_t child, - const struct vec_type *type, - unsigned int vl) -{ - void *write_buf; - struct user_sve_header *write_sve; - unsigned int vq = sve_vq_from_vl(vl); - struct user_fpsimd_state fpsimd_state; - int ret, i; - size_t data_size; - int errors = 0; - - if (__BYTE_ORDER == __BIG_ENDIAN) { - ksft_test_result_skip("Big endian not supported\n"); - return; - } - - data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); - write_buf = malloc(data_size); - if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", - data_size, type->name, vl); - return; - } - write_sve = write_buf; - - /* Set up some data and write it out */ - memset(write_sve, 0, data_size); - write_sve->size = data_size; - write_sve->vl = vl; - write_sve->flags = SVE_PT_REGS_SVE; - - for (i = 0; i < __SVE_NUM_ZREGS; i++) - fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), - SVE_PT_SVE_ZREG_SIZE(vq)); - - fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE); - fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE); - - ret = set_sve(child, type, write_sve); - if (ret != 0) { - ksft_test_result_fail("Failed to set %s VL %u data\n", - type->name, vl); - goto out; - } - - /* Read the data back */ - if (get_fpsimd(child, &fpsimd_state)) { - ksft_test_result_fail("Failed to read %s VL %u FPSIMD data\n", - type->name, vl); - goto out; - } - - for (i = 0; i < __SVE_NUM_ZREGS; i++) { - __uint128_t tmp = 0; - - /* - * Z regs are stored endianness invariant, this won't - * work for big endian - */ - memcpy(&tmp, write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), - sizeof(tmp)); - - if (tmp != fpsimd_state.vregs[i]) { - printf("# Mismatch in FPSIMD for %s VL %u Z%d\n", - type->name, vl, i); - errors++; - } - } - - check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), - &fpsimd_state.fpsr, &errors); - check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), - &fpsimd_state.fpcr, &errors); - - ksft_test_result(errors == 0, "Set and get FPSIMD data for %s VL %u\n", - type->name, vl); - -out: - free(write_buf); } static int do_parent(pid_t child) { int ret = EXIT_FAILURE; pid_t pid; - int status, i; + int status; siginfo_t si; - unsigned int vq, vl; - bool vl_supported; + void *svebuf = NULL, *newsvebuf; + size_t svebufsz = 0, newsvebufsz; + struct user_sve_header *sve, *new_sve; + struct user_fpsimd_state *fpsimd; + unsigned int i, j; + unsigned char *p; + unsigned int vq; /* Attach to the child */ while (1) { @@ -509,6 +167,8 @@ static int do_parent(pid_t child) if (WIFEXITED(status) || WIFSIGNALED(status)) ksft_exit_fail_msg("Child died unexpectedly\n"); + ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n", + status); if (!WIFSTOPPED(status)) goto error; @@ -543,55 +203,98 @@ static int do_parent(pid_t child) } } - for (i = 0; i < ARRAY_SIZE(vec_types); i++) { - /* FPSIMD via SVE regset */ - if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { - ptrace_sve_fpsimd(child, &vec_types[i]); - } else { - ksft_test_result_skip("%s FPSIMD get via SVE\n", - vec_types[i].name); - ksft_test_result_skip("%s FPSIMD set via SVE\n", - vec_types[i].name); - ksft_test_result_skip("%s set read via FPSIMD\n", - vec_types[i].name); - } + sve = get_sve(pid, &svebuf, &svebufsz); + if (!sve) { + int e = errno; - /* prctl() flags */ - if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { - ptrace_set_get_inherit(child, &vec_types[i]); - } else { - ksft_test_result_skip("%s SVE_PT_VL_INHERIT set\n", - vec_types[i].name); - ksft_test_result_skip("%s SVE_PT_VL_INHERIT cleared\n", - vec_types[i].name); - } + ksft_test_result_fail("get_sve: %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; - /* Step through every possible VQ */ - for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { - vl = sve_vl_from_vq(vq); + goto error; + } else { + ksft_test_result_pass("get_sve\n"); + } - /* First, try to set this vector length */ - if (getauxval(vec_types[i].hwcap_type) & - vec_types[i].hwcap) { - ptrace_set_get_vl(child, &vec_types[i], vl, - &vl_supported); - } else { - ksft_test_result_skip("%s get/set VL %d\n", - vec_types[i].name, vl); - vl_supported = false; - } + ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, + "FPSIMD registers\n"); + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) + goto error; - /* If the VL is supported validate data set/get */ - if (vl_supported) { - ptrace_set_sve_get_sve_data(child, &vec_types[i], vl); - ptrace_set_sve_get_fpsimd_data(child, &vec_types[i], vl); - } else { - ksft_test_result_skip("%s set SVE get SVE for VL %d\n", - vec_types[i].name, vl); - ksft_test_result_skip("%s set SVE get FPSIMD for VL %d\n", - vec_types[i].name, vl); - } - } + fpsimd = (struct user_fpsimd_state *)((char *)sve + + SVE_PT_FPSIMD_OFFSET); + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&fpsimd->vregs[i]; + + for (j = 0; j < sizeof fpsimd->vregs[i]; ++j) + p[j] = j; + } + + if (set_sve(pid, sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(FPSIMD): %s\n", + strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + vq = sve_vq_from_vl(sve->vl); + + newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + new_sve = newsvebuf = malloc(newsvebufsz); + if (!new_sve) { + errno = ENOMEM; + perror(NULL); + goto error; + } + + *new_sve = *sve; + new_sve->flags &= ~SVE_PT_REGS_MASK; + new_sve->flags |= SVE_PT_REGS_SVE; + memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0), + 0, SVE_PT_SVE_ZREG_SIZE(vq)); + new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + if (set_sve(pid, new_sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + new_sve = get_sve(pid, &newsvebuf, &newsvebufsz); + if (!new_sve) { + int e = errno; + + ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE, + "SVE registers\n"); + if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + goto error; + + dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]); + + p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1); + for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) { + unsigned char expected = i; + + if (__BYTE_ORDER == __BIG_ENDIAN) + expected = sizeof fpsimd->vregs[0] - 1 - expected; + + ksft_test_result(p[i] == expected, "p[%d] == expected\n", i); + if (p[i] != expected) + goto error; } ret = EXIT_SUCCESS; @@ -606,16 +309,20 @@ static int do_parent(pid_t child) int main(void) { int ret = EXIT_SUCCESS; + __uint128_t v[NR_VREGS]; pid_t child; - srandom(getpid()); - ksft_print_header(); - ksft_set_plan(EXPECTED_TESTS); + ksft_set_plan(20); if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) ksft_exit_skip("SVE not available\n"); + sve_store_patterns(v); + + if (!check_vregs(v)) + ksft_exit_fail_msg("Initial check_vregs() failed\n"); + child = fork(); if (!child) return do_child(); diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index f5b1b48fff..e3e08d9c70 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -46,6 +46,130 @@ define_accessor getz, NZR, _sve_str_v define_accessor setp, NPR, _sve_ldr_p define_accessor getp, NPR, _sve_str_p +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + // Declare some storate space to shadow the SVE register contents: .pushsection .text .data @@ -60,6 +184,18 @@ scratch: .space MAXVL_B .popsection +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + // Generate a test pattern for storage in SVE registers // x0: pid (16 bits) // x1: register number (6 bits) @@ -180,6 +316,33 @@ function setup_ffr ret x4 endfunction +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + // Trivial memory compare: compare x2 bytes starting at address x0 with // bytes starting at address x1. // Returns only if all bytes match; otherwise, the program is aborted. diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index c90658811a..c02071dcb5 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -21,6 +21,8 @@ #include "../../kselftest.h" #include "rdvl.h" +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + #define ARCH_MIN_VL SVE_VL_MIN struct vec_data { @@ -107,7 +109,7 @@ static int get_child_rdvl(struct vec_data *data) /* exec() a new binary which puts the VL on stdout */ ret = execl(data->rdvl_binary, data->rdvl_binary, NULL); - fprintf(stderr, "execl(%s) failed: %d (%s)\n", + fprintf(stderr, "execl(%s) failed: %d\n", data->rdvl_binary, errno, strerror(errno)); exit(EXIT_FAILURE); @@ -178,6 +180,7 @@ static int file_read_integer(const char *name, int *val) static int file_write_integer(const char *name, int val) { FILE *f; + int ret; f = fopen(name, "w"); if (!f) { @@ -189,6 +192,11 @@ static int file_write_integer(const char *name, int val) fprintf(f, "%d", val); fclose(f); + if (ret < 0) { + ksft_test_result_fail("Error writing %d to %s\n", + val, name); + return -1; + } return 0; } @@ -327,9 +335,12 @@ static void prctl_set_same(struct vec_data *data) return; } - ksft_test_result(cur_vl == data->rdvl(), - "%s set VL %d and have VL %d\n", - data->name, cur_vl, data->rdvl()); + if (cur_vl != data->rdvl()) + ksft_test_result_pass("%s current VL is %d\n", + data->name, ret); + else + ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n", + data->name, ret, data->rdvl()); } /* Can we set a new VL for this process? */ @@ -538,82 +549,6 @@ static void prctl_set_onexec(struct vec_data *data) file_write_integer(data->default_vl_file, data->default_vl); } -/* For each VQ verify that setting via prctl() does the right thing */ -static void prctl_set_all_vqs(struct vec_data *data) -{ - int ret, vq, vl, new_vl; - int errors = 0; - - if (!data->min_vl || !data->max_vl) { - ksft_test_result_skip("%s Failed to enumerate VLs, not testing VL setting\n", - data->name); - return; - } - - for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { - vl = sve_vl_from_vq(vq); - - /* Attempt to set the VL */ - ret = prctl(data->prctl_set, vl); - if (ret < 0) { - errors++; - ksft_print_msg("%s prctl set failed for %d: %d (%s)\n", - data->name, vl, - errno, strerror(errno)); - continue; - } - - new_vl = ret & PR_SVE_VL_LEN_MASK; - - /* Check that we actually have the reported new VL */ - if (data->rdvl() != new_vl) { - ksft_print_msg("Set %s VL %d but RDVL reports %d\n", - data->name, new_vl, data->rdvl()); - errors++; - } - - /* Was that the VL we asked for? */ - if (new_vl == vl) - continue; - - /* Should round up to the minimum VL if below it */ - if (vl < data->min_vl) { - if (new_vl != data->min_vl) { - ksft_print_msg("%s VL %d returned %d not minimum %d\n", - data->name, vl, new_vl, - data->min_vl); - errors++; - } - - continue; - } - - /* Should round down to maximum VL if above it */ - if (vl > data->max_vl) { - if (new_vl != data->max_vl) { - ksft_print_msg("%s VL %d returned %d not maximum %d\n", - data->name, vl, new_vl, - data->max_vl); - errors++; - } - - continue; - } - - /* Otherwise we should've rounded down */ - if (!(new_vl < vl)) { - ksft_print_msg("%s VL %d returned %d, did not round down\n", - data->name, vl, new_vl); - errors++; - - continue; - } - } - - ksft_test_result(errors == 0, "%s prctl() set all VLs, %d errors\n", - data->name, errors); -} - typedef void (*test_type)(struct vec_data *); static const test_type tests[] = { @@ -626,12 +561,10 @@ static const test_type tests[] = { proc_write_max, prctl_get, - prctl_set_same, prctl_set, prctl_set_no_child, prctl_set_for_child, prctl_set_onexec, - prctl_set_all_vqs, }; int main(void) diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 2f8c23af3b..22722abc9d 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -310,12 +310,14 @@ int test_setup(struct tdescr *td) int test_run(struct tdescr *td) { - if (td->trigger) - return td->trigger(td); - else if (td->sig_trig) - return default_trigger(td); - else + if (td->sig_trig) { + if (td->trigger) + return td->trigger(td); + else + return default_trigger(td); + } else { return td->run(td, NULL, NULL); + } } void test_result(struct tdescr *td) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 1dad8d617d..433f8bef26 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -9,9 +9,8 @@ test_tag FEATURE-DUMP.libbpf fixdep test_dev_cgroup -/test_progs -/test_progs-no_alu32 -/test_progs-bpf_gcc +/test_progs* +!test_progs.h test_verifier_log feature test_sock diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 42ffc24e9e..799b88152e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -23,8 +23,9 @@ BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ - -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) -LDFLAGS += $(SAN_CFLAGS) + -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \ + -Dbpf_prog_load=bpf_prog_test_load \ + -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lz -lrt -lpthread # Silence some warnings when compiled with clang @@ -45,8 +46,10 @@ ifneq ($(BPF_GCC),) TEST_GEN_PROGS += test_progs-bpf_gcc endif -TEST_GEN_FILES = test_lwt_ip_encap.o test_tc_edt.o -TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) +TEST_GEN_FILES = test_lwt_ip_encap.o \ + test_tc_edt.o +TEST_FILES = xsk_prereqs.sh \ + $(wildcard progs/btf_dump_test_case_*.c) # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -105,10 +108,7 @@ endif OVERRIDE_TARGETS := 1 override define CLEAN $(call msg,CLEAN) - $(Q)$(RM) -r $(TEST_GEN_PROGS) - $(Q)$(RM) -r $(TEST_GEN_PROGS_EXTENDED) - $(Q)$(RM) -r $(TEST_GEN_FILES) - $(Q)$(RM) -r $(EXTRA_CLEAN) + $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) $(Q)$(MAKE) -C bpf_testmod clean $(Q)$(MAKE) docs-clean endef @@ -122,15 +122,12 @@ BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a ifneq ($(CROSS_COMPILE),) HOST_BUILD_DIR := $(BUILD_DIR)/host HOST_SCRATCH_DIR := $(OUTPUT)/host-tools -HOST_INCLUDE_DIR := $(HOST_SCRATCH_DIR)/include else HOST_BUILD_DIR := $(BUILD_DIR) HOST_SCRATCH_DIR := $(SCRATCH_DIR) -HOST_INCLUDE_DIR := $(INCLUDE_DIR) endif HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids -RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -155,7 +152,7 @@ $(notdir $(TEST_GEN_PROGS) \ # sort removes libbpf duplicates when not cross-building MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \ - $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR)) + $(INCLUDE_DIR)) $(MAKE_DIRS): $(call msg,MKDIR,,$@) $(Q)mkdir -p $@ @@ -170,7 +167,7 @@ $(OUTPUT)/%:%.c $(OUTPUT)/urandom_read: urandom_read.c $(call msg,BINARY,,$@) - $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@ + $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id=sha1 $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) @@ -178,51 +175,41 @@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_tes $(Q)$(MAKE) $(submake_extras) -C bpf_testmod $(Q)cp bpf_testmod/bpf_testmod.ko $@ +$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) + $(call msg,CC,,$@) + $(Q)$(CC) -c $(CFLAGS) -o $@ $< + DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool -$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ - OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \ - BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ - BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \ - BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ - cp $(RUNQSLOWER_OUTPUT)runqslower $@ +$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) + $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ + OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ + BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ + cp $(SCRATCH_DIR)/runqslower $@ TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL) -$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ) +$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ) -CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o -TESTING_HELPERS := $(OUTPUT)/testing_helpers.o -TRACE_HELPERS := $(OUTPUT)/trace_helpers.o - -$(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_skb_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_sock: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_sock_addr: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS) -$(OUTPUT)/get_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_cgroup_storage: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_tag: $(TESTING_HELPERS) -$(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS) -$(OUTPUT)/xdping: $(TESTING_HELPERS) -$(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) -$(OUTPUT)/test_maps: $(TESTING_HELPERS) -$(OUTPUT)/test_verifier: $(TESTING_HELPERS) +$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c +$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c +$(OUTPUT)/test_sock: cgroup_helpers.c +$(OUTPUT)/test_sock_addr: cgroup_helpers.c +$(OUTPUT)/test_sockmap: cgroup_helpers.c +$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c +$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c +$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c +$(OUTPUT)/test_sock_fields: cgroup_helpers.c +$(OUTPUT)/test_sysctl: cgroup_helpers.c BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ - ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \ + CC=$(HOSTCC) LD=$(HOSTLD) \ EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ - LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ - LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ - prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin + prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install all: docs @@ -237,18 +224,18 @@ docs-clean: prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ - $(APIDIR)/linux/bpf.h \ - | $(BUILD_DIR)/libbpf + ../../../include/uapi/linux/bpf.h \ + | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ EXTRA_CFLAGS='-g -O0' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) -$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ - $(APIDIR)/linux/bpf.h \ - | $(HOST_BUILD_DIR)/libbpf +$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + ../../../include/uapi/linux/bpf.h \ + | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ - EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \ + EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers endif @@ -271,7 +258,6 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ $(TOOLSDIR)/lib/str_error_r.c $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \ CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \ - LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by @@ -283,7 +269,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ define get_sys_includes $(shell $(1) -v -E - &1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) -dM -E - $$@ + $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o)) @@ -428,9 +411,10 @@ ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),) $(TRUNNER_TESTS_DIR)-tests-hdr := y $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c $$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@) - $$(shell (echo '/* Generated header, do not edit */'; \ - sed -n -E 's/^void (serial_)?test_([a-zA-Z0-9_]+)\((void)?\).*/DEFINE_TEST(\2)/p' \ - $(TRUNNER_TESTS_DIR)/*.c | sort ; \ + $$(shell ( cd $(TRUNNER_TESTS_DIR); \ + echo '/* Generated header, do not edit */'; \ + ls *.c 2> /dev/null | \ + sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \ ) > $$@) endif @@ -469,7 +453,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ - $(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.o $$@ + $(Q)$(RESOLVE_BTFIDS) --no-fail --btf $(TRUNNER_OUTPUT)/btf_data.o $$@ endef @@ -478,12 +462,13 @@ TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ - btf_helpers.c flow_dissector_load.h + btf_helpers.c flow_dissector_load.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ ima_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE -TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) +TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS $(eval $(call DEFINE_TEST_RUNNER,test_progs)) # Define test_progs-no_alu32 test runner. @@ -528,30 +513,22 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ # Benchmark runner -$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) +$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(call msg,CC,,$@) - $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/perfbuf_bench.skel.h -$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h -$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h -$(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h -$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) +$(OUTPUT)/bench.o: bench.h testing_helpers.h $(OUTPUT)/bench: LDLIBS += -lm -$(OUTPUT)/bench: $(OUTPUT)/bench.o \ - $(TESTING_HELPERS) \ - $(TRACE_HELPERS) \ +$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_count.o \ $(OUTPUT)/bench_rename.o \ $(OUTPUT)/bench_trigger.o \ - $(OUTPUT)/bench_ringbufs.o \ - $(OUTPUT)/bench_bloom_filter_map.o \ - $(OUTPUT)/bench_bpf_loop.o \ - $(OUTPUT)/bench_strncmp.o + $(OUTPUT)/bench_ringbufs.o $(call msg,BINARY,,$@) - $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 42ef250c7a..9b17f28674 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -201,21 +201,6 @@ Without it, the error from compiling bpf selftests looks like: __ https://reviews.llvm.org/D93563 -btf_tag test and Clang version -============================== - -The btf_tag selftest requires LLVM support to recognize the btf_decl_tag and -btf_type_tag attributes. They are introduced in `Clang 14` [0_, 1_]. - -Without them, the btf_tag selftest will be skipped and you will observe: - -.. code-block:: console - - # btf_tag:SKIP - -.. _0: https://reviews.llvm.org/D111588 -.. _1: https://reviews.llvm.org/D111199 - Clang dependencies for static linking tests =========================================== @@ -243,16 +228,3 @@ To fix this issue, user newer libbpf. .. Links .. _clang reloc patch: https://reviews.llvm.org/D102712 .. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst - -Clang dependencies for the u32 spill test (xdpwall) -=================================================== -The xdpwall selftest requires a change in `Clang 14`__. - -Without it, the xdpwall selftest will fail and the error message -from running test_progs will look like: - -.. code-block:: console - - test_xdpwall:FAIL:Does LLVM have https://reviews.llvm.org/D109073? unexpected error: -4007 - -__ https://reviews.llvm.org/D109073 diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index f973320e6d..6ea15b93a2 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -29,39 +29,26 @@ static int libbpf_print_fn(enum libbpf_print_level level, return vfprintf(stderr, format, args); } -void setup_libbpf(void) +static int bump_memlock_rlimit(void) { + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + return setrlimit(RLIMIT_MEMLOCK, &rlim_new); +} + +void setup_libbpf() +{ + int err; + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); libbpf_set_print(libbpf_print_fn); -} -void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns) -{ - long total = res->false_hits + res->hits + res->drops; - - printf("Iter %3d (%7.3lfus): ", - iter, (delta_ns - 1000000000) / 1000.0); - - printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n", - res->false_hits, total, ((float)res->false_hits / total) * 100); -} - -void false_hits_report_final(struct bench_res res[], int res_cnt) -{ - long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0; - int i; - - for (i = 0; i < res_cnt; i++) { - total_hits += res[i].hits; - total_false_hits += res[i].false_hits; - total_drops += res[i].drops; - } - total_ops = total_hits + total_false_hits + total_drops; - - printf("Summary: %ld false hits of %ld total operations. ", - total_false_hits, total_ops); - printf("Percentage = %2.2f %%\n", - ((float)total_false_hits / total_ops) * 100); + err = bump_memlock_rlimit(); + if (err) + fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err); } void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) @@ -76,22 +63,20 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); - printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s, total operations %8.3lfM/s\n", - hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec); + printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n", + hits_per_sec, hits_per_prod, drops_per_sec); } void hits_drops_report_final(struct bench_res res[], int res_cnt) { int i; - double hits_mean = 0.0, drops_mean = 0.0, total_ops_mean = 0.0; - double hits_stddev = 0.0, drops_stddev = 0.0, total_ops_stddev = 0.0; - double total_ops; + double hits_mean = 0.0, drops_mean = 0.0; + double hits_stddev = 0.0, drops_stddev = 0.0; for (i = 0; i < res_cnt; i++) { hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt); } - total_ops_mean = hits_mean + drops_mean; if (res_cnt > 1) { for (i = 0; i < res_cnt; i++) { @@ -101,54 +86,14 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt) drops_stddev += (drops_mean - res[i].drops / 1000000.0) * (drops_mean - res[i].drops / 1000000.0) / (res_cnt - 1.0); - total_ops = res[i].hits + res[i].drops; - total_ops_stddev += (total_ops_mean - total_ops / 1000000.0) * - (total_ops_mean - total_ops / 1000000.0) / - (res_cnt - 1.0); } hits_stddev = sqrt(hits_stddev); drops_stddev = sqrt(drops_stddev); - total_ops_stddev = sqrt(total_ops_stddev); } printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ", hits_mean, hits_stddev, hits_mean / env.producer_cnt); - printf("drops %8.3lf \u00B1 %5.3lfM/s, ", + printf("drops %8.3lf \u00B1 %5.3lfM/s\n", drops_mean, drops_stddev); - printf("total operations %8.3lf \u00B1 %5.3lfM/s\n", - total_ops_mean, total_ops_stddev); -} - -void ops_report_progress(int iter, struct bench_res *res, long delta_ns) -{ - double hits_per_sec, hits_per_prod; - - hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0); - hits_per_prod = hits_per_sec / env.producer_cnt; - - printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); - - printf("hits %8.3lfM/s (%7.3lfM/prod)\n", hits_per_sec, hits_per_prod); -} - -void ops_report_final(struct bench_res res[], int res_cnt) -{ - double hits_mean = 0.0, hits_stddev = 0.0; - int i; - - for (i = 0; i < res_cnt; i++) - hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); - - if (res_cnt > 1) { - for (i = 0; i < res_cnt; i++) - hits_stddev += (hits_mean - res[i].hits / 1000000.0) * - (hits_mean - res[i].hits / 1000000.0) / - (res_cnt - 1.0); - - hits_stddev = sqrt(hits_stddev); - } - printf("Summary: throughput %8.3lf \u00B1 %5.3lf M ops/s (%7.3lfM ops/prod), ", - hits_mean, hits_stddev, hits_mean / env.producer_cnt); - printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt); } const char *argp_program_version = "benchmark"; @@ -187,15 +132,9 @@ static const struct argp_option opts[] = { }; extern struct argp bench_ringbufs_argp; -extern struct argp bench_bloom_map_argp; -extern struct argp bench_bpf_loop_argp; -extern struct argp bench_strncmp_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, - { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, - { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 }, - { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, {}, }; @@ -380,23 +319,10 @@ extern const struct bench bench_trig_kprobe; extern const struct bench bench_trig_fentry; extern const struct bench bench_trig_fentry_sleep; extern const struct bench bench_trig_fmodret; -extern const struct bench bench_trig_uprobe_base; -extern const struct bench bench_trig_uprobe_with_nop; -extern const struct bench bench_trig_uretprobe_with_nop; -extern const struct bench bench_trig_uprobe_without_nop; -extern const struct bench bench_trig_uretprobe_without_nop; extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; extern const struct bench bench_pb_libbpf; extern const struct bench bench_pb_custom; -extern const struct bench bench_bloom_lookup; -extern const struct bench bench_bloom_update; -extern const struct bench bench_bloom_false_positive; -extern const struct bench bench_hashmap_without_bloom; -extern const struct bench bench_hashmap_with_bloom; -extern const struct bench bench_bpf_loop; -extern const struct bench bench_strncmp_no_helper; -extern const struct bench bench_strncmp_helper; static const struct bench *benchs[] = { &bench_count_global, @@ -414,23 +340,10 @@ static const struct bench *benchs[] = { &bench_trig_fentry, &bench_trig_fentry_sleep, &bench_trig_fmodret, - &bench_trig_uprobe_base, - &bench_trig_uprobe_with_nop, - &bench_trig_uretprobe_with_nop, - &bench_trig_uprobe_without_nop, - &bench_trig_uretprobe_without_nop, &bench_rb_libbpf, &bench_rb_custom, &bench_pb_libbpf, &bench_pb_custom, - &bench_bloom_lookup, - &bench_bloom_update, - &bench_bloom_false_positive, - &bench_hashmap_without_bloom, - &bench_hashmap_with_bloom, - &bench_bpf_loop, - &bench_strncmp_no_helper, - &bench_strncmp_helper, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index fb3e213df3..c1f48a473b 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -33,13 +33,12 @@ struct env { struct bench_res { long hits; long drops; - long false_hits; }; struct bench { const char *name; - void (*validate)(void); - void (*setup)(void); + void (*validate)(); + void (*setup)(); void *(*producer_thread)(void *ctx); void *(*consumer_thread)(void *ctx); void (*measure)(struct bench_res* res); @@ -54,16 +53,11 @@ struct counter { extern struct env env; extern const struct bench *bench; -void setup_libbpf(void); +void setup_libbpf(); void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); void hits_drops_report_final(struct bench_res res[], int res_cnt); -void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); -void false_hits_report_final(struct bench_res res[], int res_cnt); -void ops_report_progress(int iter, struct bench_res *res, long delta_ns); -void ops_report_final(struct bench_res res[], int res_cnt); -static inline __u64 get_time_ns(void) -{ +static inline __u64 get_time_ns() { struct timespec t; clock_gettime(CLOCK_MONOTONIC, &t); diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c index 078972ce20..befba7a826 100644 --- a/tools/testing/selftests/bpf/benchs/bench_count.c +++ b/tools/testing/selftests/bpf/benchs/bench_count.c @@ -36,7 +36,7 @@ static struct count_local_ctx { struct counter *hits; } count_local_ctx; -static void count_local_setup(void) +static void count_local_setup() { struct count_local_ctx *ctx = &count_local_ctx; diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c index 3c203b6d6a..c7ec114eca 100644 --- a/tools/testing/selftests/bpf/benchs/bench_rename.c +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -11,7 +11,7 @@ static struct ctx { int fd; } ctx; -static void validate(void) +static void validate() { if (env.producer_cnt != 1) { fprintf(stderr, "benchmark doesn't support multi-producer!\n"); @@ -43,7 +43,7 @@ static void measure(struct bench_res *res) res->hits = atomic_swap(&ctx.hits.value, 0); } -static void setup_ctx(void) +static void setup_ctx() { setup_libbpf(); @@ -71,36 +71,36 @@ static void attach_bpf(struct bpf_program *prog) } } -static void setup_base(void) +static void setup_base() { setup_ctx(); } -static void setup_kprobe(void) +static void setup_kprobe() { setup_ctx(); attach_bpf(ctx.skel->progs.prog1); } -static void setup_kretprobe(void) +static void setup_kretprobe() { setup_ctx(); attach_bpf(ctx.skel->progs.prog2); } -static void setup_rawtp(void) +static void setup_rawtp() { setup_ctx(); attach_bpf(ctx.skel->progs.prog3); } -static void setup_fentry(void) +static void setup_fentry() { setup_ctx(); attach_bpf(ctx.skel->progs.prog4); } -static void setup_fexit(void) +static void setup_fexit() { setup_ctx(); attach_bpf(ctx.skel->progs.prog5); diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index da8593b349..d167bffac6 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -88,12 +88,12 @@ const struct argp bench_ringbufs_argp = { static struct counter buf_hits; -static inline void bufs_trigger_batch(void) +static inline void bufs_trigger_batch() { (void)syscall(__NR_getpgid); } -static void bufs_validate(void) +static void bufs_validate() { if (env.consumer_cnt != 1) { fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n"); @@ -132,7 +132,7 @@ static void ringbuf_libbpf_measure(struct bench_res *res) res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); } -static struct ringbuf_bench *ringbuf_setup_skeleton(void) +static struct ringbuf_bench *ringbuf_setup_skeleton() { struct ringbuf_bench *skel; @@ -167,7 +167,7 @@ static int buf_process_sample(void *ctx, void *data, size_t len) return 0; } -static void ringbuf_libbpf_setup(void) +static void ringbuf_libbpf_setup() { struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; struct bpf_link *link; @@ -223,7 +223,7 @@ static void ringbuf_custom_measure(struct bench_res *res) res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); } -static void ringbuf_custom_setup(void) +static void ringbuf_custom_setup() { struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; const size_t page_size = getpagesize(); @@ -352,7 +352,7 @@ static void perfbuf_measure(struct bench_res *res) res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); } -static struct perfbuf_bench *perfbuf_setup_skeleton(void) +static struct perfbuf_bench *perfbuf_setup_skeleton() { struct perfbuf_bench *skel; @@ -390,10 +390,15 @@ perfbuf_process_sample_raw(void *input_ctx, int cpu, return LIBBPF_PERF_EVENT_CONT; } -static void perfbuf_libbpf_setup(void) +static void perfbuf_libbpf_setup() { struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; struct perf_event_attr attr; + struct perf_buffer_raw_opts pb_opts = { + .event_cb = perfbuf_process_sample_raw, + .ctx = (void *)(long)0, + .attr = &attr, + }; struct bpf_link *link; ctx->skel = perfbuf_setup_skeleton(); @@ -418,8 +423,7 @@ static void perfbuf_libbpf_setup(void) } ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf), - args.perfbuf_sz, &attr, - perfbuf_process_sample_raw, NULL, NULL); + args.perfbuf_sz, &pb_opts); if (!ctx->perfbuf) { fprintf(stderr, "failed to create perfbuf\n"); exit(1); diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 7f957c55a3..f41a491a8c 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bench.h" #include "trigger_bench.skel.h" -#include "trace_helpers.h" /* BPF triggering benchmarks */ static struct trigger_ctx { @@ -11,7 +10,7 @@ static struct trigger_ctx { static struct counter base_hits; -static void trigger_validate(void) +static void trigger_validate() { if (env.consumer_cnt != 1) { fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); @@ -45,7 +44,7 @@ static void trigger_measure(struct bench_res *res) res->hits = atomic_swap(&ctx.skel->bss->hits, 0); } -static void setup_ctx(void) +static void setup_ctx() { setup_libbpf(); @@ -67,37 +66,37 @@ static void attach_bpf(struct bpf_program *prog) } } -static void trigger_tp_setup(void) +static void trigger_tp_setup() { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_tp); } -static void trigger_rawtp_setup(void) +static void trigger_rawtp_setup() { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_raw_tp); } -static void trigger_kprobe_setup(void) +static void trigger_kprobe_setup() { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_kprobe); } -static void trigger_fentry_setup(void) +static void trigger_fentry_setup() { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fentry); } -static void trigger_fentry_sleep_setup(void) +static void trigger_fentry_sleep_setup() { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep); } -static void trigger_fmodret_setup(void) +static void trigger_fmodret_setup() { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fmodret); @@ -108,101 +107,6 @@ static void *trigger_consumer(void *input) return NULL; } -/* make sure call is not inlined and not avoided by compiler, so __weak and - * inline asm volatile in the body of the function - * - * There is a performance difference between uprobing at nop location vs other - * instructions. So use two different targets, one of which starts with nop - * and another doesn't. - * - * GCC doesn't generate stack setup preample for these functions due to them - * having no input arguments and doing nothing in the body. - */ -__weak void uprobe_target_with_nop(void) -{ - asm volatile ("nop"); -} - -__weak void uprobe_target_without_nop(void) -{ - asm volatile (""); -} - -static void *uprobe_base_producer(void *input) -{ - while (true) { - uprobe_target_with_nop(); - atomic_inc(&base_hits.value); - } - return NULL; -} - -static void *uprobe_producer_with_nop(void *input) -{ - while (true) - uprobe_target_with_nop(); - return NULL; -} - -static void *uprobe_producer_without_nop(void *input) -{ - while (true) - uprobe_target_without_nop(); - return NULL; -} - -static void usetup(bool use_retprobe, bool use_nop) -{ - size_t uprobe_offset; - ssize_t base_addr; - struct bpf_link *link; - - setup_libbpf(); - - ctx.skel = trigger_bench__open_and_load(); - if (!ctx.skel) { - fprintf(stderr, "failed to open skeleton\n"); - exit(1); - } - - base_addr = get_base_addr(); - if (use_nop) - uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop, base_addr); - else - uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop, base_addr); - - link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, - use_retprobe, - -1 /* all PIDs */, - "/proc/self/exe", - uprobe_offset); - if (!link) { - fprintf(stderr, "failed to attach uprobe!\n"); - exit(1); - } - ctx.skel->links.bench_trigger_uprobe = link; -} - -static void uprobe_setup_with_nop(void) -{ - usetup(false, true); -} - -static void uretprobe_setup_with_nop(void) -{ - usetup(true, true); -} - -static void uprobe_setup_without_nop(void) -{ - usetup(false, false); -} - -static void uretprobe_setup_without_nop(void) -{ - usetup(true, false); -} - const struct bench bench_trig_base = { .name = "trig-base", .validate = trigger_validate, @@ -278,53 +182,3 @@ const struct bench bench_trig_fmodret = { .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, }; - -const struct bench bench_trig_uprobe_base = { - .name = "trig-uprobe-base", - .setup = NULL, /* no uprobe/uretprobe is attached */ - .producer_thread = uprobe_base_producer, - .consumer_thread = trigger_consumer, - .measure = trigger_base_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_uprobe_with_nop = { - .name = "trig-uprobe-with-nop", - .setup = uprobe_setup_with_nop, - .producer_thread = uprobe_producer_with_nop, - .consumer_thread = trigger_consumer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_uretprobe_with_nop = { - .name = "trig-uretprobe-with-nop", - .setup = uretprobe_setup_with_nop, - .producer_thread = uprobe_producer_with_nop, - .consumer_thread = trigger_consumer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_uprobe_without_nop = { - .name = "trig-uprobe-without-nop", - .setup = uprobe_setup_without_nop, - .producer_thread = uprobe_producer_without_nop, - .consumer_thread = trigger_consumer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_uretprobe_without_nop = { - .name = "trig-uretprobe-without-nop", - .setup = uretprobe_setup_without_nop, - .producer_thread = uprobe_producer_without_nop, - .consumer_thread = trigger_consumer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh index ada028aa90..af4aa04cab 100644 --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -1,9 +1,35 @@ #!/bin/bash -source ./benchs/run_common.sh - set -eufo pipefail +RUN_BENCH="sudo ./bench -w3 -d10 -a" + +function hits() +{ + echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function drops() +{ + echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function header() +{ + local len=${#1} + + printf "\n%s\n" "$1" + for i in $(seq 1 $len); do printf '='; done + printf '\n' +} + +function summarize() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" +} + header "Single-producer, parallel producer" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do summarize $b "$($RUN_BENCH $b)" diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h index 11ee801e75..89c6d58e5d 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -34,21 +34,6 @@ DECLARE_TRACE(bpf_testmod_test_write_bare, TP_ARGS(task, ctx) ); -#undef BPF_TESTMOD_DECLARE_TRACE -#ifdef DECLARE_TRACE_WRITABLE -#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ - DECLARE_TRACE_WRITABLE(call, PARAMS(proto), PARAMS(args), size) -#else -#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ - DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) -#endif - -BPF_TESTMOD_DECLARE_TRACE(bpf_testmod_test_writable_bare, - TP_PROTO(struct bpf_testmod_test_writable_ctx *ctx), - TP_ARGS(ctx), - sizeof(struct bpf_testmod_test_writable_ctx) -); - #endif /* _BPF_TESTMOD_EVENTS_H */ #undef TRACE_INCLUDE_PATH diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index df3b292a8f..141d8da687 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include -#include #include #include #include @@ -15,40 +13,6 @@ DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123; -noinline void -bpf_testmod_test_mod_kfunc(int i) -{ - *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; -} - -noinline int bpf_testmod_loop_test(int n) -{ - int i, sum = 0; - - /* the primary goal of this test is to test LBR. Create a lot of - * branches in the function, so we can catch it easily. - */ - for (i = 0; i < n; i++) - sum += i; - return sum; -} - -__weak noinline struct file *bpf_testmod_return_ptr(int arg) -{ - static struct file f = {}; - - switch (arg) { - case 1: return (void *)EINVAL; /* user addr */ - case 2: return (void *)0xcafe4a11; /* user addr */ - case 3: return (void *)-EINVAL; /* canonical, but invalid */ - case 4: return (void *)(1ull << 60); /* non-canonical and invalid */ - case 5: return (void *)~(1ull << 30); /* trigger extable */ - case 6: return &f; /* valid addr */ - case 7: return (void *)((long)&f | 1); /* kernel tricks */ - default: return NULL; - } -} - noinline ssize_t bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -59,26 +23,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, .off = off, .len = len, }; - int i = 1; - while (bpf_testmod_return_ptr(i)) - i++; - - /* This is always true. Use the check to make sure the compiler - * doesn't remove bpf_testmod_loop_test. - */ - if (bpf_testmod_loop_test(101) > 100) - trace_bpf_testmod_test_read(current, &ctx); - - /* Magic number to enable writable tp */ - if (len == 64) { - struct bpf_testmod_test_writable_ctx writable = { - .val = 1024, - }; - trace_bpf_testmod_test_writable_bare(&writable); - if (writable.early_ret) - return snprintf(buf, len, "%d\n", writable.val); - } + trace_bpf_testmod_test_read(current, &ctx); return -EIO; /* always fail */ } @@ -109,26 +55,13 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET_START(bpf_testmod_kfunc_ids) -BTF_ID(func, bpf_testmod_test_mod_kfunc) -BTF_SET_END(bpf_testmod_kfunc_ids) - -static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set); - static int bpf_testmod_init(void) { - int ret; - - ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); - if (ret) - return ret; - register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); - return 0; + return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } static void bpf_testmod_exit(void) { - unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } @@ -138,3 +71,4 @@ module_exit(bpf_testmod_exit); MODULE_AUTHOR("Andrii Nakryiko"); MODULE_DESCRIPTION("BPF selftests module"); MODULE_LICENSE("Dual BSD/GPL"); + diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index 0d71e26078..b3892dc401 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -17,9 +17,4 @@ struct bpf_testmod_test_write_ctx { size_t len; }; -struct bpf_testmod_test_writable_ctx { - bool early_ret; - int val; -}; - #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index b5941d514e..0a4ad7cb2c 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -24,13 +24,11 @@ static const char * const btf_kind_str_mapping[] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", - [BTF_KIND_DECL_TAG] = "DECL_TAG", - [BTF_KIND_TYPE_TAG] = "TYPE_TAG", }; static const char *btf_kind_str(__u16 kind) { - if (kind > BTF_KIND_TYPE_TAG) + if (kind > BTF_KIND_DATASEC) return "UNKNOWN"; return btf_kind_str_mapping[kind]; } @@ -110,7 +108,6 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) case BTF_KIND_VOLATILE: case BTF_KIND_RESTRICT: case BTF_KIND_TYPEDEF: - case BTF_KIND_TYPE_TAG: fprintf(out, " type_id=%u", t->type); break; case BTF_KIND_ARRAY: { @@ -180,10 +177,6 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) case BTF_KIND_FLOAT: fprintf(out, " size=%u", t->size); break; - case BTF_KIND_DECL_TAG: - fprintf(out, " type_id=%u component_idx=%d", - t->type, btf_decl_tag(t)->component_idx); - break; default: break; } @@ -217,7 +210,7 @@ int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]) int i; bool ok = true; - ASSERT_EQ(btf__type_cnt(btf) - 1, nr_types, "btf_nr_types"); + ASSERT_EQ(btf__get_nr_types(btf), nr_types, "btf_nr_types"); for (i = 1; i <= nr_types; i++) { if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump")) @@ -240,6 +233,7 @@ const char *btf_type_c_dump(const struct btf *btf) static char buf[16 * 1024]; FILE *buf_file; struct btf_dump *d = NULL; + struct btf_dump_opts opts = {}; int err, i; buf_file = fmemopen(buf, sizeof(buf) - 1, "w"); @@ -248,13 +242,14 @@ const char *btf_type_c_dump(const struct btf *btf) return NULL; } - d = btf_dump__new(btf, btf_dump_printf, buf_file, NULL); + opts.ctx = buf_file; + d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); if (libbpf_get_error(d)) { fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d)); goto err_out; } - for (i = 1; i < btf__type_cnt(btf); i++) { + for (i = 1; i <= btf__get_nr_types(btf); i++) { err = btf_dump__dump_type(d, i); if (err) { fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err); diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 9d59c3990c..f3daa44a82 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -33,9 +33,10 @@ #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" #define CGROUP_WORK_DIR "/cgroup-test-work-dir" + #define format_cgroup_path(buf, path) \ - snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ - CGROUP_WORK_DIR, getpid(), path) + snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \ + CGROUP_WORK_DIR, path) #define format_classid_path(buf) \ snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index fcc9cb91b2..629da3854b 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -26,4 +26,4 @@ int join_classid(void); int setup_classid_environment(void); void cleanup_classid_environment(void); -#endif /* __CGROUP_HELPERS_H */ \ No newline at end of file +#endif /* __CGROUP_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f6287132fa..5192305159 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -38,9 +38,7 @@ CONFIG_IPV6_SIT=m CONFIG_BPF_JIT=y CONFIG_BPF_LSM=y CONFIG_SECURITY=y -CONFIG_RC_CORE=y CONFIG_LIRC=y -CONFIG_BPF_LIRC_MODE2=y CONFIG_IMA=y CONFIG_SECURITYFS=y CONFIG_IMA_WRITE_POLICY=y diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c index 87fd1aa323..3fd83b9dc1 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.c +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -17,7 +17,7 @@ const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; const char *cfg_map_name = "jmp_table"; bool cfg_attach = true; -char *cfg_prog_name; +char *cfg_section_name; char *cfg_path_name; static void load_and_attach_program(void) @@ -25,11 +25,7 @@ static void load_and_attach_program(void) int prog_fd, ret; struct bpf_object *obj; - ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - if (ret) - error(1, 0, "failed to enable libbpf strict mode: %d", ret); - - ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name, + ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name, cfg_map_name, NULL, &prog_fd, NULL); if (ret) error(1, 0, "bpf_flow_load %s", cfg_path_name); @@ -79,15 +75,15 @@ static void parse_opts(int argc, char **argv) break; case 'p': if (cfg_path_name) - error(1, 0, "only one path can be given"); + error(1, 0, "only one prog name can be given"); cfg_path_name = optarg; break; case 's': - if (cfg_prog_name) - error(1, 0, "only one prog can be given"); + if (cfg_section_name) + error(1, 0, "only one section can be given"); - cfg_prog_name = optarg; + cfg_section_name = optarg; break; } } @@ -98,7 +94,7 @@ static void parse_opts(int argc, char **argv) if (cfg_attach && !cfg_path_name) error(1, 0, "must provide a path to the BPF program"); - if (cfg_attach && !cfg_prog_name) + if (cfg_attach && !cfg_section_name) error(1, 0, "must provide a section name"); } diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index f40b585f4e..7290401ec1 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -4,11 +4,10 @@ #include #include -#include "testing_helpers.h" static inline int bpf_flow_load(struct bpf_object **obj, const char *path, - const char *prog_name, + const char *section_name, const char *map_name, const char *keys_map_name, int *prog_fd, @@ -19,12 +18,18 @@ static inline int bpf_flow_load(struct bpf_object **obj, int prog_array_fd; int ret, fd, i; - ret = bpf_prog_test_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj, + ret = bpf_prog_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj, prog_fd); if (ret) return ret; - main_prog = bpf_object__find_program_by_name(*obj, prog_name); + main_prog = NULL; + bpf_object__for_each_program(prog, *obj) { + if (strcmp(section_name, bpf_program__section_name(prog)) == 0) { + main_prog = prog; + break; + } + } if (!main_prog) return -1; diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c index 3a7b82bd9e..99628e1a1e 100644 --- a/tools/testing/selftests/bpf/get_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c @@ -19,7 +19,6 @@ #include #include "cgroup_helpers.h" -#include "testing_helpers.h" #include "bpf_rlimit.h" #define CHECK(condition, tag, format...) ({ \ @@ -67,8 +66,8 @@ int main(int argc, char **argv) if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno)) return 1; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); - if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno)) + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) goto cleanup_cgroup_env; cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids"); diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c index 78c76496b1..f4d870da76 100644 --- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c @@ -68,6 +68,13 @@ static void map_batch_verify(int *visited, __u32 max_entries, int *keys, static void __test_map_lookup_and_update_batch(bool is_pcpu) { + struct bpf_create_map_attr xattr = { + .name = "array_map", + .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : + BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(__s64), + }; int map_fd, *keys, *visited; __u32 count, total, total_success; const __u32 max_entries = 10; @@ -79,10 +86,10 @@ static void __test_map_lookup_and_update_batch(bool is_pcpu) .flags = 0, ); - map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY, - "array_map", sizeof(int), sizeof(__s64), max_entries, NULL); + xattr.max_entries = max_entries; + map_fd = bpf_create_map_xattr(&xattr); CHECK(map_fd == -1, - "bpf_map_create()", "error:%s\n", strerror(errno)); + "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); value_size = sizeof(__s64); if (is_pcpu) diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c index f807d53fd8..976bf415fb 100644 --- a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c @@ -83,15 +83,22 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu) int err, step, value_size; bool nospace_err; void *values; + struct bpf_create_map_attr xattr = { + .name = "hash_map", + .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : + BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(int), + }; DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, .elem_flags = 0, .flags = 0, ); - map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : BPF_MAP_TYPE_HASH, - "hash_map", sizeof(int), sizeof(int), max_entries, NULL); + xattr.max_entries = max_entries; + map_fd = bpf_create_map_xattr(&xattr); CHECK(map_fd == -1, - "bpf_map_create()", "error:%s\n", strerror(errno)); + "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); value_size = is_pcpu ? sizeof(value) : sizeof(int); keys = malloc(max_entries * sizeof(int)); diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c index 87d07b596e..2e986e5e4c 100644 --- a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c @@ -64,7 +64,13 @@ static void map_batch_verify(int *visited, __u32 max_entries, void test_lpm_trie_map_batch_ops(void) { - LIBBPF_OPTS(bpf_map_create_opts, create_opts, .map_flags = BPF_F_NO_PREALLOC); + struct bpf_create_map_attr xattr = { + .name = "lpm_trie_map", + .map_type = BPF_MAP_TYPE_LPM_TRIE, + .key_size = sizeof(struct test_lpm_key), + .value_size = sizeof(int), + .map_flags = BPF_F_NO_PREALLOC, + }; struct test_lpm_key *keys, key; int map_fd, *values, *visited; __u32 step, count, total, total_success; @@ -76,10 +82,9 @@ void test_lpm_trie_map_batch_ops(void) .flags = 0, ); - map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map", - sizeof(struct test_lpm_key), sizeof(int), - max_entries, &create_opts); - CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n", + xattr.max_entries = max_entries; + map_fd = bpf_create_map_xattr(&xattr); + CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); keys = malloc(max_entries * sizeof(struct test_lpm_key)); diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c index 099eb4dfd4..e569edc679 100644 --- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c @@ -19,12 +19,16 @@ #include #include -static struct bpf_map_create_opts map_opts = { - .sz = sizeof(map_opts), +static struct bpf_create_map_attr xattr = { + .name = "sk_storage_map", + .map_type = BPF_MAP_TYPE_SK_STORAGE, + .map_flags = BPF_F_NO_PREALLOC, + .max_entries = 0, + .key_size = 4, + .value_size = 8, .btf_key_type_id = 1, .btf_value_type_id = 3, .btf_fd = -1, - .map_flags = BPF_F_NO_PREALLOC, }; static unsigned int nr_sk_threads_done; @@ -136,7 +140,7 @@ static int load_btf(void) memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types), btf_str_sec, sizeof(btf_str_sec)); - return bpf_btf_load(raw_btf, sizeof(raw_btf), NULL); + return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0); } static int create_sk_storage_map(void) @@ -146,13 +150,13 @@ static int create_sk_storage_map(void) btf_fd = load_btf(); CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n", btf_fd, errno); - map_opts.btf_fd = btf_fd; + xattr.btf_fd = btf_fd; - map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts); - map_opts.btf_fd = -1; + map_fd = bpf_create_map_xattr(&xattr); + xattr.btf_fd = -1; close(btf_fd); CHECK(map_fd == -1, - "bpf_map_create()", "errno:%d\n", errno); + "bpf_create_map_xattr()", "errno:%d\n", errno); return map_fd; } @@ -459,20 +463,20 @@ static void test_sk_storage_map_basic(void) int cnt; int lock; } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value; - struct bpf_map_create_opts bad_xattr; + struct bpf_create_map_attr bad_xattr; int btf_fd, map_fd, sk_fd, err; btf_fd = load_btf(); CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n", btf_fd, errno); - map_opts.btf_fd = btf_fd; + xattr.btf_fd = btf_fd; sk_fd = socket(AF_INET6, SOCK_STREAM, 0); CHECK(sk_fd == -1, "socket()", "sk_fd:%d errno:%d\n", sk_fd, errno); - map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts); - CHECK(map_fd == -1, "bpf_map_create(good_xattr)", + map_fd = bpf_create_map_xattr(&xattr); + CHECK(map_fd == -1, "bpf_create_map_xattr(good_xattr)", "map_fd:%d errno:%d\n", map_fd, errno); /* Add new elem */ @@ -556,29 +560,31 @@ static void test_sk_storage_map_basic(void) CHECK(!err || errno != ENOENT, "bpf_map_delete_elem()", "err:%d errno:%d\n", err, errno); - memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); + memcpy(&bad_xattr, &xattr, sizeof(xattr)); bad_xattr.btf_key_type_id = 0; - err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); - CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", - "err:%d errno:%d\n", err, errno); - - memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); - bad_xattr.btf_key_type_id = 3; - err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); - CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", - "err:%d errno:%d\n", err, errno); - - err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 1, &map_opts); - CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", - "err:%d errno:%d\n", err, errno); - - memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); - bad_xattr.map_flags = 0; - err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); + err = bpf_create_map_xattr(&bad_xattr); CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", "err:%d errno:%d\n", err, errno); - map_opts.btf_fd = -1; + memcpy(&bad_xattr, &xattr, sizeof(xattr)); + bad_xattr.btf_key_type_id = 3; + err = bpf_create_map_xattr(&bad_xattr); + CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + "err:%d errno:%d\n", err, errno); + + memcpy(&bad_xattr, &xattr, sizeof(xattr)); + bad_xattr.max_entries = 1; + err = bpf_create_map_xattr(&bad_xattr); + CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + "err:%d errno:%d\n", err, errno); + + memcpy(&bad_xattr, &xattr, sizeof(xattr)); + bad_xattr.map_flags = 0; + err = bpf_create_map_xattr(&bad_xattr); + CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + "err:%d errno:%d\n", err, errno); + + xattr.btf_fd = -1; close(btf_fd); close(map_fd); close(sk_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 0ee29e11ea..5861446d07 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -39,13 +39,13 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(id=0,off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=inv2"}, - {1, "R3_w=inv4"}, - {2, "R3_w=inv8"}, - {3, "R3_w=inv16"}, - {4, "R3_w=inv32"}, + {1, "R1=ctx(id=0,off=0,imm=0)"}, + {1, "R10=fp0"}, + {1, "R3_w=inv2"}, + {2, "R3_w=inv4"}, + {3, "R3_w=inv8"}, + {4, "R3_w=inv16"}, + {5, "R3_w=inv32"}, }, }, { @@ -67,19 +67,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(id=0,off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=inv1"}, - {1, "R3_w=inv2"}, - {2, "R3_w=inv4"}, - {3, "R3_w=inv8"}, - {4, "R3_w=inv16"}, - {5, "R3_w=inv1"}, - {6, "R4_w=inv32"}, - {7, "R4_w=inv16"}, - {8, "R4_w=inv8"}, - {9, "R4_w=inv4"}, - {10, "R4_w=inv2"}, + {1, "R1=ctx(id=0,off=0,imm=0)"}, + {1, "R10=fp0"}, + {1, "R3_w=inv1"}, + {2, "R3_w=inv2"}, + {3, "R3_w=inv4"}, + {4, "R3_w=inv8"}, + {5, "R3_w=inv16"}, + {6, "R3_w=inv1"}, + {7, "R4_w=inv32"}, + {8, "R4_w=inv16"}, + {9, "R4_w=inv8"}, + {10, "R4_w=inv4"}, + {11, "R4_w=inv2"}, }, }, { @@ -96,14 +96,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(id=0,off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=inv4"}, - {1, "R3_w=inv8"}, - {2, "R3_w=inv10"}, - {3, "R4_w=inv8"}, - {4, "R4_w=inv12"}, - {5, "R4_w=inv14"}, + {1, "R1=ctx(id=0,off=0,imm=0)"}, + {1, "R10=fp0"}, + {1, "R3_w=inv4"}, + {2, "R3_w=inv8"}, + {3, "R3_w=inv10"}, + {4, "R4_w=inv8"}, + {5, "R4_w=inv12"}, + {6, "R4_w=inv14"}, }, }, { @@ -118,12 +118,12 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(id=0,off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=inv7"}, + {1, "R1=ctx(id=0,off=0,imm=0)"}, + {1, "R10=fp0"}, {1, "R3_w=inv7"}, - {2, "R3_w=inv14"}, - {3, "R3_w=inv56"}, + {2, "R3_w=inv7"}, + {3, "R3_w=inv14"}, + {4, "R3_w=inv56"}, }, }, @@ -161,19 +161,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R0_w=pkt(id=0,off=8,r=8,imm=0)"}, - {6, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {7, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {8, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {9, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {10, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, - {12, "R3_w=pkt_end(id=0,off=0,imm=0)"}, - {17, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {18, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, - {19, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, - {20, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {21, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {22, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {7, "R0_w=pkt(id=0,off=8,r=8,imm=0)"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {18, "R3=pkt_end(id=0,off=0,imm=0)"}, + {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, + {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, }, }, { @@ -194,16 +194,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {7, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {9, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {11, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, - {12, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {13, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, - {14, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {15, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, + {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, + {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, + {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, + {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, }, { @@ -234,14 +234,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {2, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, - {4, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, - {5, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, - {9, "R2=pkt(id=0,off=0,r=18,imm=0)"}, + {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, + {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, + {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, + {10, "R2=pkt(id=0,off=0,r=18,imm=0)"}, {10, "R5=pkt(id=0,off=14,r=18,imm=0)"}, {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {13, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, }, }, { @@ -296,8 +296,8 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, - {7, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ @@ -313,11 +313,11 @@ static struct bpf_align_test tests[] = { /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. */ - {17, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {18, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte @@ -329,18 +329,18 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {25, "R5_w=pkt(id=0,off=14,r=8"}, + {26, "R5_w=pkt(id=0,off=14,r=8"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). */ - {26, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {27, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {28, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"}, + {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so @@ -386,13 +386,13 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, - {7, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {8, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* Packet pointer has (4n+2) offset */ {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, - {12, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, + {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so @@ -403,12 +403,12 @@ static struct bpf_align_test tests[] = { /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {17, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, - {20, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, + {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so @@ -448,18 +448,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {3, "R5_w=pkt_end(id=0,off=0,imm=0)"}, + {4, "R5_w=pkt_end(id=0,off=0,imm=0)"}, /* (ptr - ptr) << 2 == unknown, (4n) */ - {5, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"}, + {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because * the add could overflow. */ - {6, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* packet pointer + nonnegative (4n+2) */ {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, - {12, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -502,14 +502,14 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, + {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* New unknown value in R7 is (4n) */ - {10, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Subtracting it from R6 blows our unsigned bounds */ - {11, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>= 0 */ {14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, @@ -556,14 +556,14 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, - {9, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, + {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, + {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, + {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ {13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ - {14, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, + {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ {16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, /* At the time the word size load is performed from R5, @@ -594,12 +594,6 @@ static int do_test_single(struct bpf_align_test *test) struct bpf_insn *prog = test->insns; int prog_type = test->prog_type; char bpf_vlog_copy[32768]; - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .prog_flags = BPF_F_STRICT_ALIGNMENT, - .log_buf = bpf_vlog, - .log_size = sizeof(bpf_vlog), - .log_level = 2, - ); const char *line_ptr; int cur_line = -1; int prog_len, i; @@ -607,8 +601,9 @@ static int do_test_single(struct bpf_align_test *test) int ret; prog_len = probe_filter_length(prog); - fd_prog = bpf_prog_load(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - prog, prog_len, &opts); + fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len, BPF_F_STRICT_ALIGNMENT, + "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2); if (fd_prog < 0 && test->result != REJECT) { printf("Failed to load program.\n"); printf("%s", bpf_vlog); @@ -625,15 +620,12 @@ static int do_test_single(struct bpf_align_test *test) line_ptr = strtok(bpf_vlog_copy, "\n"); for (i = 0; i < MAX_MATCHES; i++) { struct bpf_reg_match m = test->matches[i]; - int tmp; if (!m.match) break; while (line_ptr) { cur_line = -1; sscanf(line_ptr, "%u: ", &cur_line); - if (cur_line == -1) - sscanf(line_ptr, "from %u to %u: ", &tmp, &cur_line); if (cur_line == m.line) break; line_ptr = strtok(NULL, "\n"); @@ -645,19 +637,7 @@ static int do_test_single(struct bpf_align_test *test) printf("%s", bpf_vlog); break; } - /* Check the next line as well in case the previous line - * did not have a corresponding bpf insn. Example: - * func#0 @0 - * 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 - * 0: (b7) r3 = 2 ; R3_w=inv2 - */ if (!strstr(line_ptr, m.match)) { - cur_line = -1; - line_ptr = strtok(NULL, "\n"); - sscanf(line_ptr, "%u: ", &cur_line); - } - if (cur_line != m.line || !line_ptr || - !strstr(line_ptr, m.match)) { printf("Failed to find match %u: %s\n", m.line, m.match); ret = 1; diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index 86b7d5d84e..ba0e1efe5a 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -4,13 +4,13 @@ #include "atomics.lskel.h" -static void test_add(struct atomics_lskel *skel) +static void test_add(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics_lskel__add__attach(skel); + link_fd = atomics__add__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(add)")) return; @@ -36,13 +36,13 @@ static void test_add(struct atomics_lskel *skel) close(link_fd); } -static void test_sub(struct atomics_lskel *skel) +static void test_sub(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics_lskel__sub__attach(skel); + link_fd = atomics__sub__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(sub)")) return; @@ -69,13 +69,13 @@ static void test_sub(struct atomics_lskel *skel) close(link_fd); } -static void test_and(struct atomics_lskel *skel) +static void test_and(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics_lskel__and__attach(skel); + link_fd = atomics__and__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(and)")) return; @@ -97,13 +97,13 @@ static void test_and(struct atomics_lskel *skel) close(link_fd); } -static void test_or(struct atomics_lskel *skel) +static void test_or(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics_lskel__or__attach(skel); + link_fd = atomics__or__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(or)")) return; @@ -126,13 +126,13 @@ static void test_or(struct atomics_lskel *skel) close(link_fd); } -static void test_xor(struct atomics_lskel *skel) +static void test_xor(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics_lskel__xor__attach(skel); + link_fd = atomics__xor__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(xor)")) return; @@ -154,20 +154,20 @@ static void test_xor(struct atomics_lskel *skel) close(link_fd); } -static void test_cmpxchg(struct atomics_lskel *skel) +static void test_cmpxchg(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics_lskel__cmpxchg__attach(skel); + link_fd = atomics__cmpxchg__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)")) return; prog_fd = skel->progs.cmpxchg.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run cmpxchg", + if (CHECK(err || retval, "test_run add", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) goto cleanup; @@ -183,20 +183,20 @@ static void test_cmpxchg(struct atomics_lskel *skel) close(link_fd); } -static void test_xchg(struct atomics_lskel *skel) +static void test_xchg(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics_lskel__xchg__attach(skel); + link_fd = atomics__xchg__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(xchg)")) return; prog_fd = skel->progs.xchg.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run xchg", + if (CHECK(err || retval, "test_run add", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) goto cleanup; @@ -212,10 +212,10 @@ static void test_xchg(struct atomics_lskel *skel) void test_atomics(void) { - struct atomics_lskel *skel; + struct atomics *skel; __u32 duration = 0; - skel = atomics_lskel__open_and_load(); + skel = atomics__open_and_load(); if (CHECK(!skel, "skel_load", "atomics skeleton failed\n")) return; @@ -225,7 +225,6 @@ void test_atomics(void) test__skip(); goto cleanup; } - skel->bss->pid = getpid(); if (test__start_subtest("add")) test_add(skel); @@ -243,5 +242,5 @@ void test_atomics(void) test_xchg(skel); cleanup: - atomics_lskel__destroy(skel); + atomics__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index d0bd51eb23..bf307bb9e4 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -5,11 +5,6 @@ /* this is how USDT semaphore is actually defined, except volatile modifier */ volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes"))); -/* attach point */ -static void method(void) { - return ; -} - void test_attach_probe(void) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); @@ -19,26 +14,12 @@ void test_attach_probe(void) struct test_attach_probe* skel; size_t uprobe_offset; ssize_t base_addr, ref_ctr_offset; - bool legacy; - - /* Check if new-style kprobe/uprobe API is supported. - * Kernels that support new FD-based kprobe and uprobe BPF attachment - * through perf_event_open() syscall expose - * /sys/bus/event_source/devices/kprobe/type and - * /sys/bus/event_source/devices/uprobe/type files, respectively. They - * contain magic numbers that are passed as "type" field of - * perf_event_attr. Lack of such file in the system indicates legacy - * kernel with old-style kprobe/uprobe attach interface through - * creating per-probe event through tracefs. For such cases - * ref_ctr_offset feature is not supported, so we don't test it. - */ - legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0; base_addr = get_base_addr(); if (CHECK(base_addr < 0, "get_base_addr", "failed to find base addr: %zd", base_addr)) return; - uprobe_offset = get_uprobe_offset(&method, base_addr); + uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr); ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr); if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) @@ -64,11 +45,10 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_kretprobe = kretprobe_link; - if (!legacy) - ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); + ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); uprobe_opts.retprobe = false; - uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset; + uprobe_opts.ref_ctr_offset = ref_ctr_offset; uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */, "/proc/self/exe", @@ -78,12 +58,11 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_uprobe = uprobe_link; - if (!legacy) - ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after"); + ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after"); /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */ uprobe_opts.retprobe = true; - uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset; + uprobe_opts.ref_ctr_offset = ref_ctr_offset; uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */, "/proc/self/exe", @@ -103,7 +82,7 @@ void test_attach_probe(void) goto cleanup; /* trigger & validate uprobe & uretprobe */ - method(); + get_base_addr(); if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res", "wrong uprobe res: %d\n", skel->bss->uprobe_res)) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index b84f859b12..dc18e5ae0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -469,12 +469,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) * fills seq_file buffer and then the other will trigger * overflow and needs restart. */ - map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map1_fd < 0, "bpf_map_create", + map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); + if (CHECK(map1_fd < 0, "bpf_create_map", "map_creation failed: %s\n", strerror(errno))) goto out; - map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map2_fd < 0, "bpf_map_create", + map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); + if (CHECK(map2_fd < 0, "bpf_create_map", "map_creation failed: %s\n", strerror(errno))) goto free_map1; @@ -589,7 +589,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) static void test_bpf_hash_map(void) { - __u32 expected_key_a = 0, expected_key_b = 0; + __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0; DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_hash_map *skel; int err, i, len, map_fd, iter_fd; @@ -638,6 +638,7 @@ static void test_bpf_hash_map(void) val = i + 4; expected_key_a += key.a; expected_key_b += key.b; + expected_key_c += key.c; expected_val += val; err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); @@ -684,7 +685,7 @@ static void test_bpf_hash_map(void) static void test_bpf_percpu_hash_map(void) { - __u32 expected_key_a = 0, expected_key_b = 0; + __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0; DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_hash_map *skel; int err, i, j, len, map_fd, iter_fd; @@ -699,13 +700,14 @@ static void test_bpf_percpu_hash_map(void) char buf[64]; void *val; + val = malloc(8 * bpf_num_possible_cpus()); + skel = bpf_iter_bpf_percpu_hash_map__open(); if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open", "skeleton open failed\n")) return; skel->rodata->num_cpus = bpf_num_possible_cpus(); - val = malloc(8 * bpf_num_possible_cpus()); err = bpf_iter_bpf_percpu_hash_map__load(skel); if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load", @@ -720,6 +722,7 @@ static void test_bpf_percpu_hash_map(void) key.c = i + 3; expected_key_a += key.a; expected_key_b += key.b; + expected_key_c += key.c; for (j = 0; j < bpf_num_possible_cpus(); j++) { *(__u32 *)(val + j * 8) = i + j; @@ -769,7 +772,6 @@ static void test_bpf_percpu_hash_map(void) bpf_link__destroy(link); out: bpf_iter_bpf_percpu_hash_map__destroy(skel); - free(val); } static void test_bpf_array_map(void) @@ -870,13 +872,14 @@ static void test_bpf_percpu_array_map(void) void *val; int len; + val = malloc(8 * bpf_num_possible_cpus()); + skel = bpf_iter_bpf_percpu_array_map__open(); if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open", "skeleton open failed\n")) return; skel->rodata->num_cpus = bpf_num_possible_cpus(); - val = malloc(8 * bpf_num_possible_cpus()); err = bpf_iter_bpf_percpu_array_map__load(skel); if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load", @@ -932,7 +935,6 @@ static void test_bpf_percpu_array_map(void) bpf_link__destroy(link); out: bpf_iter_bpf_percpu_array_map__destroy(skel); - free(val); } /* An iterator program deletes all local storage in a map. */ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c index b52ff8ce34..85babb0487 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c @@ -179,7 +179,7 @@ static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel, free_fds(est_fds, nr_est); } -void serial_test_bpf_iter_setsockopt(void) +void test_bpf_iter_setsockopt(void) { struct bpf_iter_setsockopt *iter_skel = NULL; struct bpf_cubic *cubic_skel = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index dbe56fa858..284d5921c3 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -3,7 +3,7 @@ #define nr_iters 2 -void serial_test_bpf_obj_id(void) +void test_bpf_obj_id(void) { const __u64 array_magic_value = 0xfaceb00c; const __u32 array_key = 0; @@ -48,7 +48,7 @@ void serial_test_bpf_obj_id(void) bzero(zeros, sizeof(zeros)); for (i = 0; i < nr_iters; i++) { now = time(NULL); - err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &objs[i], &prog_fds[i]); /* test_obj_id.o is a dumb prog. It should never fail * to load. @@ -65,8 +65,8 @@ void serial_test_bpf_obj_id(void) if (CHECK_FAIL(err)) goto done; - prog = bpf_object__find_program_by_name(objs[i], - "test_obj_id"); + prog = bpf_object__find_program_by_title(objs[i], + "raw_tp/sys_enter"); if (CHECK_FAIL(!prog)) goto done; links[i] = bpf_program__attach(prog); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 8f7a1cef7d..94e03df69d 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -217,22 +217,21 @@ static bool found; static int libbpf_debug_print(enum libbpf_print_level level, const char *format, va_list args) { - const char *prog_name, *log_buf; + char *log_buf; if (level != LIBBPF_WARN || - !strstr(format, "-- BEGIN PROG LOAD LOG --")) { + strcmp(format, "libbpf: \n%s\n")) { vprintf(format, args); return 0; } - prog_name = va_arg(args, char *); log_buf = va_arg(args, char *); if (!log_buf) goto out; if (err_str && strstr(log_buf, err_str) != NULL) found = true; out: - printf(format, prog_name, log_buf); + printf(format, log_buf); return 0; } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index ff6cce9fef..3d002c245d 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -19,28 +19,16 @@ extern int extra_prog_load_log_flags; static int check_load(const char *file, enum bpf_prog_type type) { + struct bpf_prog_load_attr attr; struct bpf_object *obj = NULL; - struct bpf_program *prog; - int err; + int err, prog_fd; - obj = bpf_object__open_file(file, NULL); - err = libbpf_get_error(obj); - if (err) - return err; - - prog = bpf_object__next_program(obj, NULL); - if (!prog) { - err = -ENOENT; - goto err_out; - } - - bpf_program__set_type(prog, type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); - bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags); - - err = bpf_object__load(obj); - -err_out: + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = file; + attr.prog_type = type; + attr.log_level = 4 | extra_prog_load_log_flags; + attr.prog_flags = BPF_F_TEST_RND_HI32; + err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); bpf_object__close(obj); return err; } @@ -51,183 +39,82 @@ struct scale_test_def { bool fails; }; -static void scale_test(const char *file, - enum bpf_prog_type attach_type, - bool should_fail) +void test_bpf_verif_scale(void) { + struct scale_test_def tests[] = { + { "loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */ }, + + { "test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS }, + { "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS }, + { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS }, + + { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + + /* full unroll by llvm */ + { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + + /* partial unroll. llvm will unroll loop ~150 times. + * C loop count -> 600. + * Asm loop count -> 4. + * 16k insns in loop body. + * Total of 5 such loops. Total program size ~82k insns. + */ + { "pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + + /* no unroll at all. + * C loop count -> 600. + * ASM loop count -> 600. + * ~110 insns in loop body. + * Total of 5 such loops. Total program size ~1500 insns. + */ + { "pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + + { "loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "loop4.o", BPF_PROG_TYPE_SCHED_CLS }, + { "loop5.o", BPF_PROG_TYPE_SCHED_CLS }, + { "loop6.o", BPF_PROG_TYPE_KPROBE }, + + /* partial unroll. 19k insn in a loop. + * Total program size 20.8k insn. + * ~350k processed_insns + */ + { "strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + + /* no unroll, tiny loops */ + { "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + + /* non-inlined subprogs */ + { "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + + { "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, + { "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, + + { "test_xdp_loop.o", BPF_PROG_TYPE_XDP }, + { "test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL }, + }; libbpf_print_fn_t old_print_fn = NULL; - int err; + int err, i; if (env.verifier_stats) { test__force_log(); old_print_fn = libbpf_set_print(libbpf_debug_print); } - err = check_load(file, attach_type); - if (should_fail) - ASSERT_ERR(err, "expect_error"); - else - ASSERT_OK(err, "expect_success"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { + const struct scale_test_def *test = &tests[i]; + + if (!test__start_subtest(test->file)) + continue; + + err = check_load(test->file, test->attach_type); + CHECK_FAIL(err && !test->fails); + } if (env.verifier_stats) libbpf_set_print(old_print_fn); } - -void test_verif_scale1() -{ - scale_test("test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS, false); -} - -void test_verif_scale2() -{ - scale_test("test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS, false); -} - -void test_verif_scale3() -{ - scale_test("test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS, false); -} - -void test_verif_scale_pyperf_global() -{ - scale_test("pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_pyperf_subprogs() -{ - scale_test("pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_pyperf50() -{ - /* full unroll by llvm */ - scale_test("pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_pyperf100() -{ - /* full unroll by llvm */ - scale_test("pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_pyperf180() -{ - /* full unroll by llvm */ - scale_test("pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_pyperf600() -{ - /* partial unroll. llvm will unroll loop ~150 times. - * C loop count -> 600. - * Asm loop count -> 4. - * 16k insns in loop body. - * Total of 5 such loops. Total program size ~82k insns. - */ - scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_pyperf600_bpf_loop(void) -{ - /* use the bpf_loop helper*/ - scale_test("pyperf600_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_pyperf600_nounroll() -{ - /* no unroll at all. - * C loop count -> 600. - * ASM loop count -> 600. - * ~110 insns in loop body. - * Total of 5 such loops. Total program size ~1500 insns. - */ - scale_test("pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_loop1() -{ - scale_test("loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_loop2() -{ - scale_test("loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_loop3_fail() -{ - scale_test("loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */); -} - -void test_verif_scale_loop4() -{ - scale_test("loop4.o", BPF_PROG_TYPE_SCHED_CLS, false); -} - -void test_verif_scale_loop5() -{ - scale_test("loop5.o", BPF_PROG_TYPE_SCHED_CLS, false); -} - -void test_verif_scale_loop6() -{ - scale_test("loop6.o", BPF_PROG_TYPE_KPROBE, false); -} - -void test_verif_scale_strobemeta() -{ - /* partial unroll. 19k insn in a loop. - * Total program size 20.8k insn. - * ~350k processed_insns - */ - scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_strobemeta_bpf_loop(void) -{ - /* use the bpf_loop helper*/ - scale_test("strobemeta_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_strobemeta_nounroll1() -{ - /* no unroll, tiny loops */ - scale_test("strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_strobemeta_nounroll2() -{ - /* no unroll, tiny loops */ - scale_test("strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_strobemeta_subprogs() -{ - /* non-inlined subprogs */ - scale_test("strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); -} - -void test_verif_scale_sysctl_loop1() -{ - scale_test("test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false); -} - -void test_verif_scale_sysctl_loop2() -{ - scale_test("test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false); -} - -void test_verif_scale_xdp_loop() -{ - scale_test("test_xdp_loop.o", BPF_PROG_TYPE_XDP, false); -} - -void test_verif_scale_seg6_loop() -{ - scale_test("test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL, false); -} - -void test_verif_twfw() -{ - scale_test("twfw.o", BPF_PROG_TYPE_CGROUP_SKB, false); -} diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 8ba53acf9e..649f87382c 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -22,6 +22,7 @@ #include #include +#include "bpf_rlimit.h" #include "bpf_util.h" #include "../test_btf.h" #include "test_progs.h" @@ -38,8 +39,8 @@ static bool always_log; #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f -#define NAME_NTH(N) (0xfffe0000 | N) -#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xfffe0000) +#define NAME_NTH(N) (0xffff0000 | N) +#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000) #define GET_NAME_NTH_IDX(X) (X & 0x0000ffff) #define MAX_NR_RAW_U32 1024 @@ -3660,302 +3661,6 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid type_size", }, -{ - .descr = "decl_tag test #1, struct/member, well-formed", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_STRUCT_ENC(0, 2, 8), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_MEMBER_ENC(NAME_TBD, 1, 32), - BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), - BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), - BTF_DECL_TAG_ENC(NAME_TBD, 2, 1), - BTF_END_RAW, - }, - BTF_STR_SEC("\0m1\0m2\0tag1\0tag2\0tag3"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 8, - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 1, -}, -{ - .descr = "decl_tag test #2, union/member, well-formed", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_UNION_ENC(NAME_TBD, 2, 4), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), - BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), - BTF_DECL_TAG_ENC(NAME_TBD, 2, 1), - BTF_END_RAW, - }, - BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 1, -}, -{ - .descr = "decl_tag test #3, variable, well-formed", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_VAR_ENC(NAME_TBD, 1, 1), /* [3] */ - BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), - BTF_DECL_TAG_ENC(NAME_TBD, 3, -1), - BTF_END_RAW, - }, - BTF_STR_SEC("\0local\0global\0tag1\0tag2"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, -}, -{ - .descr = "decl_tag test #4, func/parameter, well-formed", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_DECL_TAG_ENC(NAME_TBD, 3, -1), - BTF_DECL_TAG_ENC(NAME_TBD, 3, 0), - BTF_DECL_TAG_ENC(NAME_TBD, 3, 1), - BTF_END_RAW, - }, - BTF_STR_SEC("\0arg1\0arg2\0f\0tag1\0tag2\0tag3"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, -}, -{ - .descr = "decl_tag test #5, invalid value", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_DECL_TAG_ENC(0, 2, -1), - BTF_END_RAW, - }, - BTF_STR_SEC("\0local\0tag"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid value", -}, -{ - .descr = "decl_tag test #6, invalid target type", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_DECL_TAG_ENC(NAME_TBD, 1, -1), - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag1"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid type", -}, -{ - .descr = "decl_tag test #7, invalid vlen", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 1), 2), (0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0local\0tag1"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, - .btf_load_err = true, - .err_str = "vlen != 0", -}, -{ - .descr = "decl_tag test #8, invalid kflag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1), - BTF_END_RAW, - }, - BTF_STR_SEC("\0local\0tag1"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, -{ - .descr = "decl_tag test #9, var, invalid component_idx", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0local\0tag"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid component_idx", -}, -{ - .descr = "decl_tag test #10, struct member, invalid component_idx", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_STRUCT_ENC(0, 2, 8), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_MEMBER_ENC(NAME_TBD, 1, 32), - BTF_DECL_TAG_ENC(NAME_TBD, 2, 2), - BTF_END_RAW, - }, - BTF_STR_SEC("\0m1\0m2\0tag"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 8, - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid component_idx", -}, -{ - .descr = "decl_tag test #11, func parameter, invalid component_idx", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_DECL_TAG_ENC(NAME_TBD, 3, 2), - BTF_END_RAW, - }, - BTF_STR_SEC("\0arg1\0arg2\0f\0tag"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid component_idx", -}, -{ - .descr = "decl_tag test #12, < -1 component_idx", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_DECL_TAG_ENC(NAME_TBD, 3, -2), - BTF_END_RAW, - }, - BTF_STR_SEC("\0arg1\0arg2\0f\0tag"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid component_idx", -}, -{ - .descr = "decl_tag test #13, typedef, well-formed", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ - BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), - BTF_END_RAW, - }, - BTF_STR_SEC("\0t\0tag"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, -}, -{ - .descr = "decl_tag test #14, typedef, invalid component_idx", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ - BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0local\0tag"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid component_idx", -}, -{ - .descr = "type_tag test #1", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [2] */ - BTF_PTR_ENC(2), /* [3] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "tag_type_check_btf", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 1, -}, - }; /* struct btf_raw_test raw_tests[] */ static const char *get_next_str(const char *start, const char *end) @@ -4062,40 +3767,20 @@ static void *btf_raw_create(const struct btf_header *hdr, next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL; done: - free(strs_idx); if (err) { - free(raw_btf); + if (raw_btf) + free(raw_btf); + if (strs_idx) + free(strs_idx); return NULL; } return raw_btf; } -static int load_raw_btf(const void *raw_data, size_t raw_size) -{ - LIBBPF_OPTS(bpf_btf_load_opts, opts); - int btf_fd; - - if (always_log) { - opts.log_buf = btf_log_buf, - opts.log_size = BTF_LOG_BUF_SIZE, - opts.log_level = 1; - } - - btf_fd = bpf_btf_load(raw_data, raw_size, &opts); - if (btf_fd < 0 && !always_log) { - opts.log_buf = btf_log_buf, - opts.log_size = BTF_LOG_BUF_SIZE, - opts.log_level = 1; - btf_fd = bpf_btf_load(raw_data, raw_size, &opts); - } - - return btf_fd; -} - static void do_test_raw(unsigned int test_num) { struct btf_raw_test *test = &raw_tests[test_num - 1]; - LIBBPF_OPTS(bpf_map_create_opts, opts); + struct bpf_create_map_attr create_attr = {}; int map_fd = -1, btf_fd = -1; unsigned int raw_btf_size; struct btf_header *hdr; @@ -4121,14 +3806,16 @@ static void do_test_raw(unsigned int test_num) hdr->str_len = (int)hdr->str_len + test->str_len_delta; *btf_log_buf = '\0'; - btf_fd = load_raw_btf(raw_btf, raw_btf_size); + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); free(raw_btf); err = ((btf_fd < 0) != test->btf_load_err); if (CHECK(err, "btf_fd:%d test->btf_load_err:%u", btf_fd, test->btf_load_err) || CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), - "expected err_str:%s\n", test->err_str)) { + "expected err_str:%s", test->err_str)) { err = -1; goto done; } @@ -4136,11 +3823,16 @@ static void do_test_raw(unsigned int test_num) if (err || btf_fd < 0) goto done; - opts.btf_fd = btf_fd; - opts.btf_key_type_id = test->key_type_id; - opts.btf_value_type_id = test->value_type_id; - map_fd = bpf_map_create(test->map_type, test->map_name, - test->key_size, test->value_size, test->max_entries, &opts); + create_attr.name = test->map_name; + create_attr.map_type = test->map_type; + create_attr.key_size = test->key_size; + create_attr.value_size = test->value_size; + create_attr.max_entries = test->max_entries; + create_attr.btf_fd = btf_fd; + create_attr.btf_key_type_id = test->key_type_id; + create_attr.btf_value_type_id = test->value_type_id; + + map_fd = bpf_create_map_xattr(&create_attr); err = ((map_fd < 0) != test->map_create_err); CHECK(err, "map_fd:%d test->map_create_err:%u", @@ -4246,7 +3938,9 @@ static int test_big_btf_info(unsigned int test_num) goto done; } - btf_fd = load_raw_btf(raw_btf, raw_btf_size); + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); if (CHECK(btf_fd < 0, "errno:%d", errno)) { err = -1; goto done; @@ -4302,7 +3996,7 @@ static int test_big_btf_info(unsigned int test_num) static int test_btf_id(unsigned int test_num) { const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; - LIBBPF_OPTS(bpf_map_create_opts, opts); + struct bpf_create_map_attr create_attr = {}; uint8_t *raw_btf = NULL, *user_btf[2] = {}; int btf_fd[2] = {-1, -1}, map_fd = -1; struct bpf_map_info map_info = {}; @@ -4332,7 +4026,9 @@ static int test_btf_id(unsigned int test_num) info[i].btf_size = raw_btf_size; } - btf_fd[0] = load_raw_btf(raw_btf, raw_btf_size); + btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) { err = -1; goto done; @@ -4365,11 +4061,16 @@ static int test_btf_id(unsigned int test_num) } /* Test btf members in struct bpf_map_info */ - opts.btf_fd = btf_fd[0]; - opts.btf_key_type_id = 1; - opts.btf_value_type_id = 2; - map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_btf_id", - sizeof(int), sizeof(int), 4, &opts); + create_attr.name = "test_btf_id"; + create_attr.map_type = BPF_MAP_TYPE_ARRAY; + create_attr.key_size = sizeof(int); + create_attr.value_size = sizeof(unsigned int); + create_attr.max_entries = 4; + create_attr.btf_fd = btf_fd[0]; + create_attr.btf_key_type_id = 1; + create_attr.btf_value_type_id = 2; + + map_fd = bpf_create_map_xattr(&create_attr); if (CHECK(map_fd < 0, "errno:%d", errno)) { err = -1; goto done; @@ -4462,7 +4163,9 @@ static void do_test_get_info(unsigned int test_num) goto done; } - btf_fd = load_raw_btf(raw_btf, raw_btf_size); + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); if (CHECK(btf_fd <= 0, "errno:%d", errno)) { err = -1; goto done; @@ -4565,7 +4268,7 @@ static void do_test_file(unsigned int test_num) if (CHECK(err, "obj: %d", err)) return; - prog = bpf_object__next_program(obj, NULL); + prog = bpf_program__next(NULL, obj); if (CHECK(!prog, "Cannot find bpf_prog")) { err = -1; goto done; @@ -5156,7 +4859,7 @@ static void do_test_pprint(int test_num) { const struct btf_raw_test *test = &pprint_test_template[test_num]; enum pprint_mapv_kind_t mapv_kind = test->mapv_kind; - LIBBPF_OPTS(bpf_map_create_opts, opts); + struct bpf_create_map_attr create_attr = {}; bool ordered_map, lossless_map, percpu_map; int err, ret, num_cpus, rounded_value_size; unsigned int key, nr_read_elems; @@ -5182,19 +4885,26 @@ static void do_test_pprint(int test_num) return; *btf_log_buf = '\0'; - btf_fd = load_raw_btf(raw_btf, raw_btf_size); + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); free(raw_btf); - if (CHECK(btf_fd < 0, "errno:%d\n", errno)) { + if (CHECK(btf_fd < 0, "errno:%d", errno)) { err = -1; goto done; } - opts.btf_fd = btf_fd; - opts.btf_key_type_id = test->key_type_id; - opts.btf_value_type_id = test->value_type_id; - map_fd = bpf_map_create(test->map_type, test->map_name, - test->key_size, test->value_size, test->max_entries, &opts); + create_attr.name = test->map_name; + create_attr.map_type = test->map_type; + create_attr.key_size = test->key_size; + create_attr.value_size = test->value_size; + create_attr.max_entries = test->max_entries; + create_attr.btf_fd = btf_fd; + create_attr.btf_key_type_id = test->key_type_id; + create_attr.btf_value_type_id = test->value_type_id; + + map_fd = bpf_create_map_xattr(&create_attr); if (CHECK(map_fd < 0, "errno:%d", errno)) { err = -1; goto done; @@ -6549,7 +6259,9 @@ static void do_test_info_raw(unsigned int test_num) return; *btf_log_buf = '\0'; - btf_fd = load_raw_btf(raw_btf, raw_btf_size); + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); free(raw_btf); if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) { @@ -6638,7 +6350,7 @@ struct btf_dedup_test { struct btf_dedup_opts opts; }; -static struct btf_dedup_test dedup_tests[] = { +const struct btf_dedup_test dedup_tests[] = { { .descr = "dedup: unused strings filtering", @@ -6658,6 +6370,9 @@ static struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0int\0long"), }, + .opts = { + .dont_resolve_fwds = false, + }, }, { .descr = "dedup: strings deduplication", @@ -6680,6 +6395,9 @@ static struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0int\0long int"), }, + .opts = { + .dont_resolve_fwds = false, + }, }, { .descr = "dedup: struct example #1", @@ -6703,33 +6421,27 @@ static struct btf_dedup_test dedup_tests[] = { BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */ BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */ BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */ - BTF_MEMBER_ENC(NAME_NTH(8), 15, 672), /* float d; */ + BTF_MEMBER_ENC(NAME_NTH(8), 13, 672), /* float d; */ /* ptr -> [3] struct s */ BTF_PTR_ENC(3), /* [4] */ /* ptr -> [6] const int */ BTF_PTR_ENC(6), /* [5] */ /* const -> [1] int */ BTF_CONST_ENC(1), /* [6] */ - /* tag -> [3] struct s */ - BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ - /* tag -> [3] struct s, member 1 */ - BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ /* full copy of the above */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [9] */ - BTF_TYPE_ARRAY_ENC(9, 9, 16), /* [10] */ - BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [11] */ - BTF_MEMBER_ENC(NAME_NTH(3), 12, 0), - BTF_MEMBER_ENC(NAME_NTH(4), 13, 64), - BTF_MEMBER_ENC(NAME_NTH(5), 10, 128), - BTF_MEMBER_ENC(NAME_NTH(6), 9, 640), - BTF_MEMBER_ENC(NAME_NTH(8), 15, 672), - BTF_PTR_ENC(11), /* [12] */ - BTF_PTR_ENC(14), /* [13] */ - BTF_CONST_ENC(9), /* [14] */ - BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [15] */ - BTF_DECL_TAG_ENC(NAME_NTH(2), 11, -1), /* [16] */ - BTF_DECL_TAG_ENC(NAME_NTH(2), 11, 1), /* [17] */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */ + BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */ + BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [9] */ + BTF_MEMBER_ENC(NAME_NTH(3), 10, 0), + BTF_MEMBER_ENC(NAME_NTH(4), 11, 64), + BTF_MEMBER_ENC(NAME_NTH(5), 8, 128), + BTF_MEMBER_ENC(NAME_NTH(6), 7, 640), + BTF_MEMBER_ENC(NAME_NTH(8), 13, 672), + BTF_PTR_ENC(9), /* [10] */ + BTF_PTR_ENC(12), /* [11] */ + BTF_CONST_ENC(7), /* [12] */ + BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [13] */ BTF_END_RAW, }, BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"), @@ -6746,20 +6458,21 @@ static struct btf_dedup_test dedup_tests[] = { BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */ BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */ BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */ - BTF_MEMBER_ENC(NAME_NTH(4), 9, 672), /* float d; */ + BTF_MEMBER_ENC(NAME_NTH(4), 7, 672), /* float d; */ /* ptr -> [3] struct s */ BTF_PTR_ENC(3), /* [4] */ /* ptr -> [6] const int */ BTF_PTR_ENC(6), /* [5] */ /* const -> [1] int */ BTF_CONST_ENC(1), /* [6] */ - BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ - BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ - BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [9] */ + BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [7] */ BTF_END_RAW, }, BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"), }, + .opts = { + .dont_resolve_fwds = false, + }, }, { .descr = "dedup: struct <-> fwd resolution w/ hash collision", @@ -6802,7 +6515,8 @@ static struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0s\0x"), }, .opts = { - .force_collisions = true, /* force hash collisions */ + .dont_resolve_fwds = false, + .dedup_table_size = 1, /* force hash collisions */ }, }, { @@ -6848,7 +6562,8 @@ static struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0s\0x"), }, .opts = { - .force_collisions = true, /* force hash collisions */ + .dont_resolve_fwds = false, + .dedup_table_size = 1, /* force hash collisions */ }, }, { @@ -6872,16 +6587,12 @@ static struct btf_dedup_test dedup_tests[] = { BTF_RESTRICT_ENC(8), /* [11] restrict */ BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ - BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ - BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ - BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ - BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"), }, .expect = { .raw_types = { @@ -6902,16 +6613,15 @@ static struct btf_dedup_test dedup_tests[] = { BTF_RESTRICT_ENC(8), /* [11] restrict */ BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ - BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ - BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ - BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ - BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"), + }, + .opts = { + .dont_resolve_fwds = false, }, }, { @@ -6964,6 +6674,9 @@ static struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0int\0some other int\0float"), }, + .opts = { + .dont_resolve_fwds = false, + }, }, { .descr = "dedup: enum fwd resolution", @@ -7005,6 +6718,9 @@ static struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), }, + .opts = { + .dont_resolve_fwds = false, + }, }, { .descr = "dedup: datasec and vars pass-through", @@ -7047,329 +6763,8 @@ static struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0.bss\0t"), }, .opts = { - .force_collisions = true - }, -}, -{ - .descr = "dedup: func/func_arg/var tags", - .input = { - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* static int t */ - BTF_VAR_ENC(NAME_NTH(1), 1, 0), /* [2] */ - /* void f(int a1, int a2) */ - BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1), - BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */ - /* tag -> t */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [6] */ - /* tag -> func */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [7] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [8] */ - /* tag -> func arg a1 */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [9] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [10] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"), - }, - .expect = { - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_VAR_ENC(NAME_NTH(1), 1, 0), /* [2] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1), - BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [6] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [7] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"), - }, -}, -{ - .descr = "dedup: func/func_param tags", - .input = { - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* void f(int a1, int a2) */ - BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), - BTF_FUNC_ENC(NAME_NTH(3), 2), /* [3] */ - /* void f(int a1, int a2) */ - BTF_FUNC_PROTO_ENC(0, 2), /* [4] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), - BTF_FUNC_ENC(NAME_NTH(3), 4), /* [5] */ - /* tag -> f: tag1, tag2 */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [6] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [7] */ - /* tag -> f/a2: tag1, tag2 */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [8] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [9] */ - /* tag -> f: tag1, tag3 */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 5, -1), /* [10] */ - BTF_DECL_TAG_ENC(NAME_NTH(6), 5, -1), /* [11] */ - /* tag -> f/a2: tag1, tag3 */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 5, 1), /* [12] */ - BTF_DECL_TAG_ENC(NAME_NTH(6), 5, 1), /* [13] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"), - }, - .expect = { - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), - BTF_FUNC_ENC(NAME_NTH(3), 2), /* [3] */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [4] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [5] */ - BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [6] */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [7] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [8] */ - BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [9] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"), - }, -}, -{ - .descr = "dedup: struct/struct_member tags", - .input = { - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [2] */ - BTF_MEMBER_ENC(NAME_NTH(2), 1, 0), - BTF_MEMBER_ENC(NAME_NTH(3), 1, 32), - BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [3] */ - BTF_MEMBER_ENC(NAME_NTH(2), 1, 0), - BTF_MEMBER_ENC(NAME_NTH(3), 1, 32), - /* tag -> t: tag1, tag2 */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [4] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ - /* tag -> t/m2: tag1, tag2 */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ - /* tag -> t: tag1, tag3 */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [8] */ - BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [9] */ - /* tag -> t/m2: tag1, tag3 */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [10] */ - BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [11] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), - }, - .expect = { - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [2] */ - BTF_MEMBER_ENC(NAME_NTH(2), 1, 0), - BTF_MEMBER_ENC(NAME_NTH(3), 1, 32), - BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [3] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [4] */ - BTF_DECL_TAG_ENC(NAME_NTH(6), 2, -1), /* [5] */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ - BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ - BTF_DECL_TAG_ENC(NAME_NTH(6), 2, 1), /* [8] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), - }, -}, -{ - .descr = "dedup: typedef tags", - .input = { - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [3] */ - /* tag -> t: tag1, tag2 */ - BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [4] */ - BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [5] */ - /* tag -> t: tag1, tag3 */ - BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [6] */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [7] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"), - }, - .expect = { - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [3] */ - BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [4] */ - BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [5] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"), - }, -}, -{ - .descr = "dedup: btf_type_tag #1", - .input = { - .raw_types = { - /* ptr -> tag2 -> tag1 -> int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ - BTF_PTR_ENC(3), /* [4] */ - /* ptr -> tag2 -> tag1 -> int */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [5] */ - BTF_TYPE_TAG_ENC(NAME_NTH(2), 5), /* [6] */ - BTF_PTR_ENC(6), /* [7] */ - /* ptr -> tag1 -> int */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [8] */ - BTF_PTR_ENC(8), /* [9] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag1\0tag2"), - }, - .expect = { - .raw_types = { - /* ptr -> tag2 -> tag1 -> int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ - BTF_PTR_ENC(3), /* [4] */ - /* ptr -> tag1 -> int */ - BTF_PTR_ENC(2), /* [5] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag1\0tag2"), - }, -}, -{ - .descr = "dedup: btf_type_tag #2", - .input = { - .raw_types = { - /* ptr -> tag2 -> tag1 -> int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ - BTF_PTR_ENC(3), /* [4] */ - /* ptr -> tag2 -> int */ - BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ - BTF_PTR_ENC(5), /* [6] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag1\0tag2"), - }, - .expect = { - .raw_types = { - /* ptr -> tag2 -> tag1 -> int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ - BTF_PTR_ENC(3), /* [4] */ - /* ptr -> tag2 -> int */ - BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ - BTF_PTR_ENC(5), /* [6] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag1\0tag2"), - }, -}, -{ - .descr = "dedup: btf_type_tag #3", - .input = { - .raw_types = { - /* ptr -> tag2 -> tag1 -> int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ - BTF_PTR_ENC(3), /* [4] */ - /* ptr -> tag1 -> tag2 -> int */ - BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */ - BTF_PTR_ENC(6), /* [7] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag1\0tag2"), - }, - .expect = { - .raw_types = { - /* ptr -> tag2 -> tag1 -> int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ - BTF_PTR_ENC(3), /* [4] */ - /* ptr -> tag1 -> tag2 -> int */ - BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */ - BTF_PTR_ENC(6), /* [7] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag1\0tag2"), - }, -}, -{ - .descr = "dedup: btf_type_tag #4", - .input = { - .raw_types = { - /* ptr -> tag1 -> int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_PTR_ENC(2), /* [3] */ - /* ptr -> tag1 -> long */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */ - BTF_PTR_ENC(5), /* [6] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag1"), - }, - .expect = { - .raw_types = { - /* ptr -> tag1 -> int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_PTR_ENC(2), /* [3] */ - /* ptr -> tag1 -> long */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */ - BTF_PTR_ENC(5), /* [6] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag1"), - }, -}, -{ - .descr = "dedup: btf_type_tag #5, struct", - .input = { - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */ - BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)), - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [4] */ - BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [5] */ - BTF_MEMBER_ENC(NAME_NTH(3), 4, BTF_MEMBER_OFFSET(0, 0)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag1\0t\0m"), - }, - .expect = { - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ - BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */ - BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0tag1\0t\0m"), + .dont_resolve_fwds = false, + .dedup_table_size = 1 }, }, @@ -7390,7 +6785,6 @@ static int btf_type_size(const struct btf_type *t) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: - case BTF_KIND_TYPE_TAG: return base_size; case BTF_KIND_INT: return base_size + sizeof(__u32); @@ -7407,8 +6801,6 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(struct btf_var); case BTF_KIND_DATASEC: return base_size + vlen * sizeof(struct btf_var_secinfo); - case BTF_KIND_DECL_TAG: - return base_size + sizeof(struct btf_decl_tag); default: fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind); return -EINVAL; @@ -7429,7 +6821,7 @@ static void dump_btf_strings(const char *strs, __u32 len) static void do_test_dedup(unsigned int test_num) { - struct btf_dedup_test *test = &dedup_tests[test_num - 1]; + const struct btf_dedup_test *test = &dedup_tests[test_num - 1]; __u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size; const struct btf_header *test_hdr, *expect_hdr; struct btf *test_btf = NULL, *expect_btf = NULL; @@ -7473,15 +6865,14 @@ static void do_test_dedup(unsigned int test_num) goto done; } - test->opts.sz = sizeof(test->opts); - err = btf__dedup(test_btf, &test->opts); + err = btf__dedup(test_btf, NULL, &test->opts); if (CHECK(err, "btf_dedup failed errno:%d", err)) { err = -1; goto done; } - test_btf_data = btf__raw_data(test_btf, &test_btf_size); - expect_btf_data = btf__raw_data(expect_btf, &expect_btf_size); + test_btf_data = btf__get_raw_data(test_btf, &test_btf_size); + expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size); if (CHECK(test_btf_size != expect_btf_size, "test_btf_size:%u != expect_btf_size:%u", test_btf_size, expect_btf_size)) { @@ -7535,8 +6926,8 @@ static void do_test_dedup(unsigned int test_num) expect_str_cur += expect_len + 1; } - test_nr_types = btf__type_cnt(test_btf); - expect_nr_types = btf__type_cnt(expect_btf); + test_nr_types = btf__get_nr_types(test_btf); + expect_nr_types = btf__get_nr_types(expect_btf); if (CHECK(test_nr_types != expect_nr_types, "test_nr_types:%u != expect_nr_types:%u", test_nr_types, expect_nr_types)) { @@ -7544,7 +6935,7 @@ static void do_test_dedup(unsigned int test_num) goto done; } - for (i = 1; i < test_nr_types; i++) { + for (i = 1; i <= test_nr_types; i++) { const struct btf_type *test_type, *expect_type; int test_size, expect_size; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c index 90aac43757..64554fd335 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c @@ -92,7 +92,7 @@ struct s2 {\n\ int *f3;\n\ };\n\n", "c_dump"); - err = btf__dedup(btf2, NULL); + err = btf__dedup(btf2, NULL, NULL); if (!ASSERT_OK(err, "btf_dedup")) goto cleanup; @@ -186,7 +186,7 @@ static void test_split_fwd_resolve() { "\t'f1' type_id=7 bits_offset=0\n" "\t'f2' type_id=9 bits_offset=64"); - err = btf__dedup(btf2, NULL); + err = btf__dedup(btf2, NULL, NULL); if (!ASSERT_OK(err, "btf_dedup")) goto cleanup; @@ -283,7 +283,7 @@ static void test_split_struct_duped() { "[13] STRUCT 's3' size=8 vlen=1\n" "\t'f1' type_id=12 bits_offset=0"); - err = btf__dedup(btf2, NULL); + err = btf__dedup(btf2, NULL, NULL); if (!ASSERT_OK(err, "btf_dedup")) goto cleanup; @@ -314,117 +314,6 @@ static void test_split_struct_duped() { btf__free(btf1); } -static void btf_add_dup_struct_in_cu(struct btf *btf, int start_id) -{ -#define ID(n) (start_id + n) - btf__set_pointer_size(btf, 8); /* enforce 64-bit arch */ - - btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */ - - btf__add_struct(btf, "s", 8); /* [2] struct s { */ - btf__add_field(btf, "a", ID(3), 0, 0); /* struct anon a; */ - btf__add_field(btf, "b", ID(4), 0, 0); /* struct anon b; */ - /* } */ - - btf__add_struct(btf, "(anon)", 8); /* [3] struct anon { */ - btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */ - btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */ - /* } */ - - btf__add_struct(btf, "(anon)", 8); /* [4] struct anon { */ - btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */ - btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */ - /* } */ -#undef ID -} - -static void test_split_dup_struct_in_cu() -{ - struct btf *btf1, *btf2 = NULL; - int err; - - /* generate the base data.. */ - btf1 = btf__new_empty(); - if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) - return; - - btf_add_dup_struct_in_cu(btf1, 0); - - VALIDATE_RAW_BTF( - btf1, - "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[2] STRUCT 's' size=8 vlen=2\n" - "\t'a' type_id=3 bits_offset=0\n" - "\t'b' type_id=4 bits_offset=0", - "[3] STRUCT '(anon)' size=8 vlen=2\n" - "\t'f1' type_id=1 bits_offset=0\n" - "\t'f2' type_id=1 bits_offset=32", - "[4] STRUCT '(anon)' size=8 vlen=2\n" - "\t'f1' type_id=1 bits_offset=0\n" - "\t'f2' type_id=1 bits_offset=32"); - - /* ..dedup them... */ - err = btf__dedup(btf1, NULL); - if (!ASSERT_OK(err, "btf_dedup")) - goto cleanup; - - VALIDATE_RAW_BTF( - btf1, - "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[2] STRUCT 's' size=8 vlen=2\n" - "\t'a' type_id=3 bits_offset=0\n" - "\t'b' type_id=3 bits_offset=0", - "[3] STRUCT '(anon)' size=8 vlen=2\n" - "\t'f1' type_id=1 bits_offset=0\n" - "\t'f2' type_id=1 bits_offset=32"); - - /* and add the same data on top of it */ - btf2 = btf__new_empty_split(btf1); - if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) - goto cleanup; - - btf_add_dup_struct_in_cu(btf2, 3); - - VALIDATE_RAW_BTF( - btf2, - "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[2] STRUCT 's' size=8 vlen=2\n" - "\t'a' type_id=3 bits_offset=0\n" - "\t'b' type_id=3 bits_offset=0", - "[3] STRUCT '(anon)' size=8 vlen=2\n" - "\t'f1' type_id=1 bits_offset=0\n" - "\t'f2' type_id=1 bits_offset=32", - "[4] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[5] STRUCT 's' size=8 vlen=2\n" - "\t'a' type_id=6 bits_offset=0\n" - "\t'b' type_id=7 bits_offset=0", - "[6] STRUCT '(anon)' size=8 vlen=2\n" - "\t'f1' type_id=4 bits_offset=0\n" - "\t'f2' type_id=4 bits_offset=32", - "[7] STRUCT '(anon)' size=8 vlen=2\n" - "\t'f1' type_id=4 bits_offset=0\n" - "\t'f2' type_id=4 bits_offset=32"); - - err = btf__dedup(btf2, NULL); - if (!ASSERT_OK(err, "btf_dedup")) - goto cleanup; - - /* after dedup it should match the original data */ - VALIDATE_RAW_BTF( - btf2, - "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[2] STRUCT 's' size=8 vlen=2\n" - "\t'a' type_id=3 bits_offset=0\n" - "\t'b' type_id=3 bits_offset=0", - "[3] STRUCT '(anon)' size=8 vlen=2\n" - "\t'f1' type_id=1 bits_offset=0\n" - "\t'f2' type_id=1 bits_offset=32"); - -cleanup: - btf__free(btf2); - btf__free(btf1); -} - void test_btf_dedup_split() { if (test__start_subtest("split_simple")) @@ -433,6 +322,4 @@ void test_btf_dedup_split() test_split_struct_duped(); if (test__start_subtest("split_fwd_resolve")) test_split_fwd_resolve(); - if (test__start_subtest("split_dup_struct_in_cu")) - test_split_dup_struct_in_cu(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 9e26903f91..52ccf0cf35 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -13,28 +13,30 @@ static struct btf_dump_test_case { const char *name; const char *file; bool known_ptr_sz; + struct btf_dump_opts opts; } btf_dump_test_cases[] = { - {"btf_dump: syntax", "btf_dump_test_case_syntax", true}, - {"btf_dump: ordering", "btf_dump_test_case_ordering", false}, - {"btf_dump: padding", "btf_dump_test_case_padding", true}, - {"btf_dump: packing", "btf_dump_test_case_packing", true}, - {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true}, - {"btf_dump: multidim", "btf_dump_test_case_multidim", false}, - {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false}, + {"btf_dump: syntax", "btf_dump_test_case_syntax", true, {}}, + {"btf_dump: ordering", "btf_dump_test_case_ordering", false, {}}, + {"btf_dump: padding", "btf_dump_test_case_padding", true, {}}, + {"btf_dump: packing", "btf_dump_test_case_packing", true, {}}, + {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true, {}}, + {"btf_dump: multidim", "btf_dump_test_case_multidim", false, {}}, + {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false, {}}, }; -static int btf_dump_all_types(const struct btf *btf, void *ctx) +static int btf_dump_all_types(const struct btf *btf, + const struct btf_dump_opts *opts) { - size_t type_cnt = btf__type_cnt(btf); + size_t type_cnt = btf__get_nr_types(btf); struct btf_dump *d; int err = 0, id; - d = btf_dump__new(btf, btf_dump_printf, ctx, NULL); + d = btf_dump__new(btf, NULL, opts, btf_dump_printf); err = libbpf_get_error(d); if (err) return err; - for (id = 1; id < type_cnt; id++) { + for (id = 1; id <= type_cnt; id++) { err = btf_dump__dump_type(d, id); if (err) goto done; @@ -86,7 +88,8 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t) goto done; } - err = btf_dump_all_types(btf, f); + t->opts.ctx = f; + err = btf_dump_all_types(btf, &t->opts); fclose(f); close(fd); if (CHECK(err, "btf_dump", "failure during C dumping: %d\n", err)) { @@ -130,10 +133,11 @@ static char *dump_buf; static size_t dump_buf_sz; static FILE *dump_buf_file; -static void test_btf_dump_incremental(void) +void test_btf_dump_incremental(void) { struct btf *btf = NULL; struct btf_dump *d = NULL; + struct btf_dump_opts opts; int id, err, i; dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); @@ -142,7 +146,8 @@ static void test_btf_dump_incremental(void) btf = btf__new_empty(); if (!ASSERT_OK_PTR(btf, "new_empty")) goto err_out; - d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL); + opts.ctx = dump_buf_file; + d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new")) goto err_out; @@ -166,7 +171,7 @@ static void test_btf_dump_incremental(void) err = btf__add_field(btf, "x", 2, 0, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i < btf__type_cnt(btf); i++) { + for (i = 1; i <= btf__get_nr_types(btf); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } @@ -205,7 +210,7 @@ static void test_btf_dump_incremental(void) err = btf__add_field(btf, "s", 3, 32, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i < btf__type_cnt(btf); i++) { + for (i = 1; i <= btf__get_nr_types(btf); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } @@ -323,7 +328,7 @@ static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d, char *str) { #ifdef __SIZEOF_INT128__ - unsigned __int128 i = 0xffffffffffffffff; + __int128 i = 0xffffffffffffffff; /* this dance is required because we cannot directly initialize * a 128-bit value to anything larger than a 64-bit value. @@ -353,27 +358,12 @@ static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d, TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1); #ifdef __SIZEOF_INT128__ - /* gcc encode unsigned __int128 type with name "__int128 unsigned" in dwarf, - * and clang encode it with name "unsigned __int128" in dwarf. - * Do an availability test for either variant before doing actual test. - */ - if (btf__find_by_name(btf, "unsigned __int128") > 0) { - TEST_BTF_DUMP_DATA(btf, d, NULL, str, unsigned __int128, BTF_F_COMPACT, - "(unsigned __int128)0xffffffffffffffff", - 0xffffffffffffffff); - ASSERT_OK(btf_dump_data(btf, d, "unsigned __int128", NULL, 0, &i, 16, str, - "(unsigned __int128)0xfffffffffffffffffffffffffffffffe"), - "dump unsigned __int128"); - } else if (btf__find_by_name(btf, "__int128 unsigned") > 0) { - TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128 unsigned, BTF_F_COMPACT, - "(__int128 unsigned)0xffffffffffffffff", - 0xffffffffffffffff); - ASSERT_OK(btf_dump_data(btf, d, "__int128 unsigned", NULL, 0, &i, 16, str, - "(__int128 unsigned)0xfffffffffffffffffffffffffffffffe"), - "dump unsigned __int128"); - } else { - ASSERT_TRUE(false, "unsigned_int128_not_found"); - } + TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT, + "(__int128)0xffffffffffffffff", + 0xffffffffffffffff); + ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str, + "(__int128)0xfffffffffffffffffffffffffffffffe"), + "dump __int128"); #endif } @@ -756,7 +746,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, /* overflow bpf_sock_ops struct with final element nonzero/zero. * Regardless of the value of the final field, we don't have all the * data we need to display it, so we should trigger an overflow. - * In other words overflow checking should trump "is field zero?" + * In other words oveflow checking should trump "is field zero?" * checks because if we've overflowed, it shouldn't matter what the * field is - we can't trust its value so shouldn't display it. */ @@ -773,10 +763,8 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, char *str) { -#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, "int cpu_number = (int)100", 100); -#endif TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT, "static int cpu_profile_flip = (int)2", 2); } @@ -809,28 +797,26 @@ static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, static void test_btf_dump_datasec_data(char *str) { - struct btf *btf; + struct btf *btf = btf__parse("xdping_kern.o", NULL); + struct btf_dump_opts opts = { .ctx = str }; char license[4] = "GPL"; struct btf_dump *d; - btf = btf__parse("xdping_kern.o", NULL); if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found")) return; - d = btf_dump__new(btf, btf_dump_snprintf, str, NULL); + d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); if (!ASSERT_OK_PTR(d, "could not create BTF dump")) - goto out; + return; test_btf_datasec(btf, d, str, "license", "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];", license, sizeof(license)); -out: - btf_dump__free(d); - btf__free(btf); } void test_btf_dump() { char str[STRSIZE]; + struct btf_dump_opts opts = { .ctx = str }; struct btf_dump *d; struct btf *btf; int i; @@ -850,7 +836,7 @@ void test_btf_dump() { if (!ASSERT_OK_PTR(btf, "no kernel BTF found")) return; - d = btf_dump__new(btf, btf_dump_snprintf, str, NULL); + d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); if (!ASSERT_OK_PTR(d, "could not create BTF dump")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c index 8afbf3d0b8..8ab5d3e358 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_endian.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c @@ -7,12 +7,12 @@ #include void test_btf_endian() { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN enum btf_endianness endian = BTF_LITTLE_ENDIAN; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN enum btf_endianness endian = BTF_BIG_ENDIAN; #else -#error "Unrecognized __BYTE_ORDER__" +#error "Unrecognized __BYTE_ORDER" #endif enum btf_endianness swap_endian = 1 - endian; struct btf *btf = NULL, *swap_btf = NULL; @@ -32,7 +32,7 @@ void test_btf_endian() { ASSERT_EQ(btf__endianness(btf), swap_endian, "endian"); /* Get raw BTF data in non-native endianness... */ - raw_data = btf__raw_data(btf, &raw_sz); + raw_data = btf__get_raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) goto err_out; @@ -42,9 +42,9 @@ void test_btf_endian() { goto err_out; ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); - ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types"); + ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); - swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz); + swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) goto err_out; @@ -58,7 +58,7 @@ void test_btf_endian() { /* swap it back to native endianness */ btf__set_endianness(swap_btf, endian); - swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz); + swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) goto err_out; @@ -75,7 +75,7 @@ void test_btf_endian() { swap_btf = NULL; btf__set_endianness(btf, swap_endian); - raw_data = btf__raw_data(btf, &raw_sz); + raw_data = btf__get_raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) goto err_out; @@ -85,7 +85,7 @@ void test_btf_endian() { goto err_out; ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); - ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types"); + ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); /* the type should appear as if it was stored in native endianness */ t = btf__type_by_id(swap_btf, var_id); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c index eef1158676..ca7c2a9161 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c @@ -13,6 +13,7 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args) } void test_btf_split() { + struct btf_dump_opts opts; struct btf_dump *d = NULL; const struct btf_type *t; struct btf *btf1, *btf2; @@ -67,10 +68,11 @@ void test_btf_split() { dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) return; - d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL); + opts.ctx = dump_buf_file; + d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf); if (!ASSERT_OK_PTR(d, "btf_dump__new")) goto cleanup; - for (i = 1; i < btf__type_cnt(btf2); i++) { + for (i = 1; i <= btf__get_nr_types(btf2); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index addf99c058..022c7d89d6 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -4,15 +4,19 @@ #include #include "btf_helpers.h" -static void gen_btf(struct btf *btf) -{ +void test_btf_write() { const struct btf_var_secinfo *vi; const struct btf_type *t; const struct btf_member *m; const struct btf_enum *v; const struct btf_param *p; + struct btf *btf; int id, err, str_off; + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "new_empty")) + return; + str_off = btf__find_str(btf, "int"); ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off"); @@ -277,172 +281,5 @@ static void gen_btf(struct btf *btf) "[17] DATASEC 'datasec1' size=12 vlen=1\n" "\ttype_id=1 offset=4 size=8", "raw_dump"); - /* DECL_TAG */ - id = btf__add_decl_tag(btf, "tag1", 16, -1); - ASSERT_EQ(id, 18, "tag_id"); - t = btf__type_by_id(btf, 18); - ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value"); - ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind"); - ASSERT_EQ(t->type, 16, "tag_type"); - ASSERT_EQ(btf_decl_tag(t)->component_idx, -1, "tag_component_idx"); - ASSERT_STREQ(btf_type_raw_dump(btf, 18), - "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "raw_dump"); - - id = btf__add_decl_tag(btf, "tag2", 14, 1); - ASSERT_EQ(id, 19, "tag_id"); - t = btf__type_by_id(btf, 19); - ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag2", "tag_value"); - ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind"); - ASSERT_EQ(t->type, 14, "tag_type"); - ASSERT_EQ(btf_decl_tag(t)->component_idx, 1, "tag_component_idx"); - ASSERT_STREQ(btf_type_raw_dump(btf, 19), - "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "raw_dump"); - - /* TYPE_TAG */ - id = btf__add_type_tag(btf, "tag1", 1); - ASSERT_EQ(id, 20, "tag_id"); - t = btf__type_by_id(btf, 20); - ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value"); - ASSERT_EQ(btf_kind(t), BTF_KIND_TYPE_TAG, "tag_kind"); - ASSERT_EQ(t->type, 1, "tag_type"); - ASSERT_STREQ(btf_type_raw_dump(btf, 20), - "[20] TYPE_TAG 'tag1' type_id=1", "raw_dump"); -} - -static void test_btf_add() -{ - struct btf *btf; - - btf = btf__new_empty(); - if (!ASSERT_OK_PTR(btf, "new_empty")) - return; - - gen_btf(btf); - - VALIDATE_RAW_BTF( - btf, - "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[2] PTR '(anon)' type_id=1", - "[3] CONST '(anon)' type_id=5", - "[4] VOLATILE '(anon)' type_id=3", - "[5] RESTRICT '(anon)' type_id=4", - "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", - "[7] STRUCT 's1' size=8 vlen=2\n" - "\t'f1' type_id=1 bits_offset=0\n" - "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", - "[8] UNION 'u1' size=8 vlen=1\n" - "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", - "[9] ENUM 'e1' size=4 vlen=2\n" - "\t'v1' val=1\n" - "\t'v2' val=2", - "[10] FWD 'struct_fwd' fwd_kind=struct", - "[11] FWD 'union_fwd' fwd_kind=union", - "[12] ENUM 'enum_fwd' size=4 vlen=0", - "[13] TYPEDEF 'typedef1' type_id=1", - "[14] FUNC 'func1' type_id=15 linkage=global", - "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" - "\t'p1' type_id=1\n" - "\t'p2' type_id=2", - "[16] VAR 'var1' type_id=1, linkage=global-alloc", - "[17] DATASEC 'datasec1' size=12 vlen=1\n" - "\ttype_id=1 offset=4 size=8", - "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", - "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", - "[20] TYPE_TAG 'tag1' type_id=1"); - btf__free(btf); } - -static void test_btf_add_btf() -{ - struct btf *btf1 = NULL, *btf2 = NULL; - int id; - - btf1 = btf__new_empty(); - if (!ASSERT_OK_PTR(btf1, "btf1")) - return; - - btf2 = btf__new_empty(); - if (!ASSERT_OK_PTR(btf2, "btf2")) - goto cleanup; - - gen_btf(btf1); - gen_btf(btf2); - - id = btf__add_btf(btf1, btf2); - if (!ASSERT_EQ(id, 21, "id")) - goto cleanup; - - VALIDATE_RAW_BTF( - btf1, - "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[2] PTR '(anon)' type_id=1", - "[3] CONST '(anon)' type_id=5", - "[4] VOLATILE '(anon)' type_id=3", - "[5] RESTRICT '(anon)' type_id=4", - "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", - "[7] STRUCT 's1' size=8 vlen=2\n" - "\t'f1' type_id=1 bits_offset=0\n" - "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", - "[8] UNION 'u1' size=8 vlen=1\n" - "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", - "[9] ENUM 'e1' size=4 vlen=2\n" - "\t'v1' val=1\n" - "\t'v2' val=2", - "[10] FWD 'struct_fwd' fwd_kind=struct", - "[11] FWD 'union_fwd' fwd_kind=union", - "[12] ENUM 'enum_fwd' size=4 vlen=0", - "[13] TYPEDEF 'typedef1' type_id=1", - "[14] FUNC 'func1' type_id=15 linkage=global", - "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" - "\t'p1' type_id=1\n" - "\t'p2' type_id=2", - "[16] VAR 'var1' type_id=1, linkage=global-alloc", - "[17] DATASEC 'datasec1' size=12 vlen=1\n" - "\ttype_id=1 offset=4 size=8", - "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", - "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", - "[20] TYPE_TAG 'tag1' type_id=1", - - /* types appended from the second BTF */ - "[21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[22] PTR '(anon)' type_id=21", - "[23] CONST '(anon)' type_id=25", - "[24] VOLATILE '(anon)' type_id=23", - "[25] RESTRICT '(anon)' type_id=24", - "[26] ARRAY '(anon)' type_id=22 index_type_id=21 nr_elems=10", - "[27] STRUCT 's1' size=8 vlen=2\n" - "\t'f1' type_id=21 bits_offset=0\n" - "\t'f2' type_id=21 bits_offset=32 bitfield_size=16", - "[28] UNION 'u1' size=8 vlen=1\n" - "\t'f1' type_id=21 bits_offset=0 bitfield_size=16", - "[29] ENUM 'e1' size=4 vlen=2\n" - "\t'v1' val=1\n" - "\t'v2' val=2", - "[30] FWD 'struct_fwd' fwd_kind=struct", - "[31] FWD 'union_fwd' fwd_kind=union", - "[32] ENUM 'enum_fwd' size=4 vlen=0", - "[33] TYPEDEF 'typedef1' type_id=21", - "[34] FUNC 'func1' type_id=35 linkage=global", - "[35] FUNC_PROTO '(anon)' ret_type_id=21 vlen=2\n" - "\t'p1' type_id=21\n" - "\t'p2' type_id=22", - "[36] VAR 'var1' type_id=21, linkage=global-alloc", - "[37] DATASEC 'datasec1' size=12 vlen=1\n" - "\ttype_id=21 offset=4 size=8", - "[38] DECL_TAG 'tag1' type_id=36 component_idx=-1", - "[39] DECL_TAG 'tag2' type_id=34 component_idx=1", - "[40] TYPE_TAG 'tag1' type_id=21"); - -cleanup: - btf__free(btf1); - btf__free(btf2); -} - -void test_btf_write() -{ - if (test__start_subtest("btf_add")) - test_btf_add(); - if (test__start_subtest("btf_add_btf")) - test_btf_add_btf(); -} diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c index 621c572221..876be0ecb6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c @@ -363,7 +363,7 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd) cg_storage_multi_shared__destroy(obj); } -void serial_test_cg_storage_multi(void) +void test_cg_storage_multi(void) { int parent_cgroup_fd = -1, child_cgroup_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c index 858916d11e..70e94e7830 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c @@ -16,12 +16,12 @@ static int prog_load(void) }; size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); } -void serial_test_cgroup_attach_autodetach(void) +void test_cgroup_attach_autodetach(void) { __u32 duration = 0, prog_cnt = 4, attach_flags; int allow_prog[2] = {-1}; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index d3e8f729c6..20bb8831dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -15,22 +15,22 @@ static int prog_load_cnt(int verdict, int val) int cgroup_storage_fd, percpu_cgroup_storage_fd; if (map_fd < 0) - map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); if (map_fd < 0) { printf("failed to create map '%s'\n", strerror(errno)); return -1; } - cgroup_storage_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, - sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL); + cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); if (cgroup_storage_fd < 0) { printf("failed to create map '%s'\n", strerror(errno)); return -1; } - percpu_cgroup_storage_fd = bpf_map_create( - BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, NULL, - sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL); + percpu_cgroup_storage_fd = bpf_create_map( + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); if (percpu_cgroup_storage_fd < 0) { printf("failed to create map '%s'\n", strerror(errno)); return -1; @@ -66,7 +66,7 @@ static int prog_load_cnt(int verdict, int val) size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); int ret; - ret = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); @@ -74,7 +74,7 @@ static int prog_load_cnt(int verdict, int val) return ret; } -void serial_test_cgroup_attach_multi(void) +void test_cgroup_attach_multi(void) { __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c index 356547e849..9e96f8d87f 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c @@ -18,12 +18,12 @@ static int prog_load(int verdict) }; size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); } -void serial_test_cgroup_attach_override(void) +void test_cgroup_attach_override(void) { int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1; __u32 duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c index 9e6e6aad34..9091524131 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c @@ -24,7 +24,7 @@ int ping_and_check(int exp_calls, int exp_alt_calls) return 0; } -void serial_test_cgroup_link(void) +void test_cgroup_link(void) { struct { const char *path; diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index f73e6e36b7..012068f33a 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -195,7 +195,7 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex) test_check_mtu__destroy(skel); } -void serial_test_check_mtu(void) +void test_check_mtu(void) { __u32 mtu_lo; diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c index 9c4325f4ae..9229db2f5c 100644 --- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c +++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c @@ -51,25 +51,24 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type) bool v4 = family == AF_INET; __u16 expected_local_port = v4 ? 22222 : 22223; __u16 expected_peer_port = 60000; + struct bpf_prog_load_attr attr = { + .file = v4 ? "./connect_force_port4.o" : + "./connect_force_port6.o", + }; struct bpf_program *prog; struct bpf_object *obj; - const char *obj_file = v4 ? "connect_force_port4.o" : "connect_force_port6.o"; - int fd, err; + int xlate_fd, fd, err; __u32 duration = 0; - obj = bpf_object__open_file(obj_file, NULL); - if (!ASSERT_OK_PTR(obj, "bpf_obj_open")) + err = bpf_prog_load_xattr(&attr, &obj, &xlate_fd); + if (err) { + log_err("Failed to load BPF object"); return -1; - - err = bpf_object__load(obj); - if (!ASSERT_OK(err, "bpf_obj_load")) { - err = -EIO; - goto close_bpf_object; } - prog = bpf_object__find_program_by_name(obj, v4 ? - "connect4" : - "connect6"); + prog = bpf_object__find_program_by_title(obj, v4 ? + "cgroup/connect4" : + "cgroup/connect6"); if (CHECK(!prog, "find_prog", "connect prog not found\n")) { err = -EIO; goto close_bpf_object; @@ -83,9 +82,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type) goto close_bpf_object; } - prog = bpf_object__find_program_by_name(obj, v4 ? - "getpeername4" : - "getpeername6"); + prog = bpf_object__find_program_by_title(obj, v4 ? + "cgroup/getpeername4" : + "cgroup/getpeername6"); if (CHECK(!prog, "find_prog", "getpeername prog not found\n")) { err = -EIO; goto close_bpf_object; @@ -99,9 +98,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type) goto close_bpf_object; } - prog = bpf_object__find_program_by_name(obj, v4 ? - "getsockname4" : - "getsockname6"); + prog = bpf_object__find_program_by_title(obj, v4 ? + "cgroup/getsockname4" : + "cgroup/getsockname6"); if (CHECK(!prog, "find_prog", "getsockname prog not found\n")) { err = -EIO; goto close_bpf_object; diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c index 1dfe14ff6a..3d4b2a358d 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -112,7 +112,7 @@ void test_core_autosize(void) if (!ASSERT_OK_PTR(f, "btf_fdopen")) goto cleanup; - raw_data = btf__raw_data(btf, &raw_sz); + raw_data = btf__get_raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data")) goto cleanup; written = fwrite(raw_data, 1, raw_sz, f); @@ -163,7 +163,7 @@ void test_core_autosize(void) usleep(1); - bss_map = bpf_object__find_map_by_name(skel->obj, ".bss"); + bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss"); if (!ASSERT_OK_PTR(bss_map, "bss_map_find")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index b8bdd1c3ef..4739b15b2a 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -10,7 +10,7 @@ static int duration = 0; #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name) -#define MODULES_CASE(name, pg_name, tp_name) { \ +#define MODULES_CASE(name, sec_name, tp_name) { \ .case_name = name, \ .bpf_obj_file = "test_core_reloc_module.o", \ .btf_src_file = NULL, /* find in kernel module BTFs */ \ @@ -28,9 +28,9 @@ static int duration = 0; .comm_len = sizeof("test_progs"), \ }, \ .output_len = sizeof(struct core_reloc_module_output), \ - .prog_name = pg_name, \ + .prog_sec_name = sec_name, \ .raw_tp_name = tp_name, \ - .trigger = __trigger_module_test_read, \ + .trigger = trigger_module_test_read, \ .needs_testmod = true, \ } @@ -43,9 +43,7 @@ static int duration = 0; #define FLAVORS_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_flavors.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_flavors" \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ #define FLAVORS_CASE(name) { \ FLAVORS_CASE_COMMON(name), \ @@ -68,9 +66,7 @@ static int duration = 0; #define NESTING_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_nesting.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_nesting" \ + .btf_src_file = "btf__core_reloc_" #name ".o" #define NESTING_CASE(name) { \ NESTING_CASE_COMMON(name), \ @@ -95,9 +91,7 @@ static int duration = 0; #define ARRAYS_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_arrays.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_arrays" \ + .btf_src_file = "btf__core_reloc_" #name ".o" #define ARRAYS_CASE(name) { \ ARRAYS_CASE_COMMON(name), \ @@ -129,9 +123,7 @@ static int duration = 0; #define PRIMITIVES_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_primitives.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_primitives" \ + .btf_src_file = "btf__core_reloc_" #name ".o" #define PRIMITIVES_CASE(name) { \ PRIMITIVES_CASE_COMMON(name), \ @@ -166,8 +158,6 @@ static int duration = 0; .e = 5, .f = 6, .g = 7, .h = 8, \ }, \ .output_len = sizeof(struct core_reloc_mods_output), \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_mods", \ } #define PTR_AS_ARR_CASE(name) { \ @@ -184,8 +174,6 @@ static int duration = 0; .a = 3, \ }, \ .output_len = sizeof(struct core_reloc_ptr_as_arr), \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_ptr_as_arr", \ } #define INTS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ @@ -202,9 +190,7 @@ static int duration = 0; #define INTS_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_ints.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_ints" + .btf_src_file = "btf__core_reloc_" #name ".o" #define INTS_CASE(name) { \ INTS_CASE_COMMON(name), \ @@ -222,9 +208,7 @@ static int duration = 0; #define FIELD_EXISTS_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_existence.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_existence" + .btf_src_file = "btf__core_reloc_" #name ".o" \ #define BITFIELDS_CASE_COMMON(objfile, test_name_prefix, name) \ .case_name = test_name_prefix#name, \ @@ -239,8 +223,6 @@ static int duration = 0; .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ __VA_ARGS__, \ .output_len = sizeof(struct core_reloc_bitfields_output), \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_bitfields", \ }, { \ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ "direct:", name), \ @@ -249,7 +231,7 @@ static int duration = 0; .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ __VA_ARGS__, \ .output_len = sizeof(struct core_reloc_bitfields_output), \ - .prog_name = "test_core_bitfields_direct", \ + .prog_sec_name = "tp_btf/sys_enter", \ } @@ -257,21 +239,18 @@ static int duration = 0; BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ "probed:", name), \ .fails = true, \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_bitfields", \ }, { \ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ "direct:", name), \ + .prog_sec_name = "tp_btf/sys_enter", \ .fails = true, \ - .prog_name = "test_core_bitfields_direct", \ } #define SIZE_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_size.o", \ .btf_src_file = "btf__core_reloc_" #name ".o", \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_size" + .relaxed_core_relocs = true #define SIZE_OUTPUT_DATA(type) \ STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \ @@ -299,10 +278,8 @@ static int duration = 0; #define TYPE_BASED_CASE_COMMON(name) \ .case_name = #name, \ - .bpf_obj_file = "test_core_reloc_type_based.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_type_based" + .bpf_obj_file = "test_core_reloc_type_based.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ #define TYPE_BASED_CASE(name, ...) { \ TYPE_BASED_CASE_COMMON(name), \ @@ -319,9 +296,7 @@ static int duration = 0; #define TYPE_ID_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_type_id.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_type_id" + .btf_src_file = "btf__core_reloc_" #name ".o" \ #define TYPE_ID_CASE(name, setup_fn) { \ TYPE_ID_CASE_COMMON(name), \ @@ -338,9 +313,7 @@ static int duration = 0; #define ENUMVAL_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_enumval.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .raw_tp_name = "sys_enter", \ - .prog_name = "test_core_enumval" + .btf_src_file = "btf__core_reloc_" #name ".o" \ #define ENUMVAL_CASE(name, ...) { \ ENUMVAL_CASE_COMMON(name), \ @@ -370,7 +343,7 @@ struct core_reloc_test_case { bool fails; bool needs_testmod; bool relaxed_core_relocs; - const char *prog_name; + const char *prog_sec_name; const char *raw_tp_name; setup_test_fn setup; trigger_test_fn trigger; @@ -409,7 +382,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test) exp->local_anon_void_ptr = -1; exp->local_anon_arr = -1; - for (i = 1; i < btf__type_cnt(local_btf); i++) + for (i = 1; i <= btf__get_nr_types(local_btf); i++) { t = btf__type_by_id(local_btf, i); /* we are interested only in anonymous types */ @@ -461,7 +434,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test) static int setup_type_id_case_success(struct core_reloc_test_case *test) { struct core_reloc_type_id_output *exp = (void *)test->output; - struct btf *targ_btf; + struct btf *targ_btf = btf__parse(test->btf_src_file, NULL); int err; err = setup_type_id_case_local(test); @@ -502,11 +475,19 @@ static int setup_type_id_case_failure(struct core_reloc_test_case *test) return 0; } -static int __trigger_module_test_read(const struct core_reloc_test_case *test) +static int trigger_module_test_read(const struct core_reloc_test_case *test) { struct core_reloc_module_output *exp = (void *)test->output; + int fd, err; + + fd = open("/sys/kernel/bpf_testmod", O_RDONLY); + err = -errno; + if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) + return err; + + read(fd, NULL, exp->len); /* request expected number of bytes */ + close(fd); - trigger_module_test_read(exp->len); return 0; } @@ -525,13 +506,11 @@ static struct core_reloc_test_case test_cases[] = { .comm_len = sizeof("test_progs"), }, .output_len = sizeof(struct core_reloc_kernel_output), - .raw_tp_name = "sys_enter", - .prog_name = "test_core_kernel", }, /* validate we can find kernel module BTF types for relocs/attach */ - MODULES_CASE("module_probed", "test_core_module_probed", "bpf_testmod_test_read"), - MODULES_CASE("module_direct", "test_core_module_direct", NULL), + MODULES_CASE("module_probed", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"), + MODULES_CASE("module_direct", "tp_btf/bpf_testmod_test_read", NULL), /* validate BPF program can use multiple flavors to match against * single target BTF type @@ -610,8 +589,6 @@ static struct core_reloc_test_case test_cases[] = { .c = 0, /* BUG in clang, should be 3 */ }, .output_len = sizeof(struct core_reloc_misc_output), - .raw_tp_name = "sys_enter", - .prog_name = "test_core_misc", }, /* validate field existence checks */ @@ -880,9 +857,14 @@ void test_core_reloc(void) if (!ASSERT_OK_PTR(obj, "obj_open")) goto cleanup; - probe_name = test_case->prog_name; - tp_name = test_case->raw_tp_name; /* NULL for tp_btf */ - prog = bpf_object__find_program_by_name(obj, probe_name); + probe_name = "raw_tracepoint/sys_enter"; + tp_name = "sys_enter"; + if (test_case->prog_sec_name) { + probe_name = test_case->prog_sec_name; + tp_name = test_case->raw_tp_name; /* NULL for tp_btf */ + } + + prog = bpf_object__find_program_by_title(obj, probe_name); if (CHECK(!prog, "find_probe", "prog '%s' not found\n", probe_name)) goto cleanup; @@ -894,7 +876,7 @@ void test_core_reloc(void) goto cleanup; } - data_map = bpf_object__find_map_by_name(obj, ".bss"); + data_map = bpf_object__find_map_by_name(obj, "test_cor.bss"); if (CHECK(!data_map, "find_data_map", "data map not found\n")) goto cleanup; @@ -908,8 +890,7 @@ void test_core_reloc(void) data = mmap_data; memset(mmap_data, 0, sizeof(*data)); - if (test_case->input_len) - memcpy(data->in, test_case->input, test_case->input_len); + memcpy(data->in, test_case->input, test_case->input_len); data->my_pid_tgid = my_pid_tgid; link = bpf_program__attach_raw_tracepoint(prog, tp_name); diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index 911345c526..0a577a248d 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -9,8 +9,6 @@ #define MAX_FILES 7 #include "test_d_path.skel.h" -#include "test_d_path_check_rdonly_mem.skel.h" -#include "test_d_path_check_types.skel.h" static int duration; @@ -101,7 +99,7 @@ static int trigger_fstat_events(pid_t pid) return ret; } -static void test_d_path_basic(void) +void test_d_path(void) { struct test_d_path__bss *bss; struct test_d_path *skel; @@ -157,35 +155,3 @@ static void test_d_path_basic(void) cleanup: test_d_path__destroy(skel); } - -static void test_d_path_check_rdonly_mem(void) -{ - struct test_d_path_check_rdonly_mem *skel; - - skel = test_d_path_check_rdonly_mem__open_and_load(); - ASSERT_ERR_PTR(skel, "unexpected_load_overwriting_rdonly_mem"); - - test_d_path_check_rdonly_mem__destroy(skel); -} - -static void test_d_path_check_types(void) -{ - struct test_d_path_check_types *skel; - - skel = test_d_path_check_types__open_and_load(); - ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type"); - - test_d_path_check_types__destroy(skel); -} - -void test_d_path(void) -{ - if (test__start_subtest("basic")) - test_d_path_basic(); - - if (test__start_subtest("check_rdonly_mem")) - test_d_path_check_rdonly_mem(); - - if (test__start_subtest("check_alloc_mem")) - test_d_path_check_types(); -} diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 4374ac8a8a..91154c2ba2 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -6,23 +6,23 @@ void test_fentry_fexit(void) { - struct fentry_test_lskel *fentry_skel = NULL; - struct fexit_test_lskel *fexit_skel = NULL; + struct fentry_test *fentry_skel = NULL; + struct fexit_test *fexit_skel = NULL; __u64 *fentry_res, *fexit_res; __u32 duration = 0, retval; int err, prog_fd, i; - fentry_skel = fentry_test_lskel__open_and_load(); + fentry_skel = fentry_test__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto close_prog; - fexit_skel = fexit_test_lskel__open_and_load(); + fexit_skel = fexit_test__open_and_load(); if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) goto close_prog; - err = fentry_test_lskel__attach(fentry_skel); + err = fentry_test__attach(fentry_skel); if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) goto close_prog; - err = fexit_test_lskel__attach(fexit_skel); + err = fexit_test__attach(fexit_skel); if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto close_prog; @@ -44,6 +44,6 @@ void test_fentry_fexit(void) } close_prog: - fentry_test_lskel__destroy(fentry_skel); - fexit_test_lskel__destroy(fexit_skel); + fentry_test__destroy(fentry_skel); + fexit_test__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 12921b3850..174c89e745 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -3,19 +3,19 @@ #include #include "fentry_test.lskel.h" -static int fentry_test(struct fentry_test_lskel *fentry_skel) +static int fentry_test(struct fentry_test *fentry_skel) { int err, prog_fd, i; __u32 duration = 0, retval; int link_fd; __u64 *result; - err = fentry_test_lskel__attach(fentry_skel); + err = fentry_test__attach(fentry_skel); if (!ASSERT_OK(err, "fentry_attach")) return err; /* Check that already linked program can't be attached again. */ - link_fd = fentry_test_lskel__test1__attach(fentry_skel); + link_fd = fentry_test__test1__attach(fentry_skel); if (!ASSERT_LT(link_fd, 0, "fentry_attach_link")) return -1; @@ -31,7 +31,7 @@ static int fentry_test(struct fentry_test_lskel *fentry_skel) return -1; } - fentry_test_lskel__detach(fentry_skel); + fentry_test__detach(fentry_skel); /* zero results for re-attach test */ memset(fentry_skel->bss, 0, sizeof(*fentry_skel->bss)); @@ -40,10 +40,10 @@ static int fentry_test(struct fentry_test_lskel *fentry_skel) void test_fentry_test(void) { - struct fentry_test_lskel *fentry_skel = NULL; + struct fentry_test *fentry_skel = NULL; int err; - fentry_skel = fentry_test_lskel__open_and_load(); + fentry_skel = fentry_test__open_and_load(); if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) goto cleanup; @@ -55,5 +55,5 @@ void test_fentry_test(void) ASSERT_OK(err, "fentry_second_attach"); cleanup: - fentry_test_lskel__destroy(fentry_skel); + fentry_test__destroy(fentry_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index c52f99f6a9..73b4c76e6b 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -60,15 +60,18 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, struct bpf_object *obj = NULL, *tgt_obj; __u32 retval, tgt_prog_id, info_len; struct bpf_prog_info prog_info = {}; - struct bpf_program **prog = NULL, *p; + struct bpf_program **prog = NULL; struct bpf_link **link = NULL; int err, tgt_fd, i; struct btf *btf; - err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, + err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, &tgt_obj, &tgt_fd); if (!ASSERT_OK(err, "tgt_prog_load")) return; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .attach_prog_fd = tgt_fd, + ); info_len = sizeof(prog_info); err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len); @@ -86,23 +89,16 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, if (!ASSERT_OK_PTR(prog, "prog_ptr")) goto close_prog; - obj = bpf_object__open_file(obj_file, NULL); + obj = bpf_object__open_file(obj_file, &opts); if (!ASSERT_OK_PTR(obj, "obj_open")) goto close_prog; - bpf_object__for_each_program(p, obj) { - err = bpf_program__set_attach_target(p, tgt_fd, NULL); - ASSERT_OK(err, "set_attach_target"); - } - err = bpf_object__load(obj); if (!ASSERT_OK(err, "obj_load")) goto close_prog; for (i = 0; i < prog_cnt; i++) { struct bpf_link_info link_info; - struct bpf_program *pos; - const char *pos_sec_name; char *tgt_name; __s32 btf_id; @@ -111,14 +107,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, goto close_prog; btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC); - prog[i] = NULL; - bpf_object__for_each_program(pos, obj) { - pos_sec_name = bpf_program__section_name(pos); - if (pos_sec_name && !strcmp(pos_sec_name, prog_name[i])) { - prog[i] = pos; - break; - } - } + prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]); if (!ASSERT_OK_PTR(prog[i], prog_name[i])) goto close_prog; @@ -220,8 +209,8 @@ static void test_func_replace_verify(void) static int test_second_attach(struct bpf_object *obj) { - const char *prog_name = "security_new_get_constant"; - const char *tgt_name = "get_constant"; + const char *prog_name = "freplace/get_constant"; + const char *tgt_name = prog_name + 9; /* cut off freplace/ */ const char *tgt_obj_file = "./test_pkt_access.o"; struct bpf_program *prog = NULL; struct bpf_object *tgt_obj; @@ -229,11 +218,11 @@ static int test_second_attach(struct bpf_object *obj) struct bpf_link *link; int err = 0, tgt_fd; - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name)) return -ENOENT; - err = bpf_prog_test_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC, + err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC, &tgt_obj, &tgt_fd); if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n", tgt_obj_file, err, errno)) @@ -281,40 +270,34 @@ static void test_fmod_ret_freplace(void) struct bpf_link *freplace_link = NULL; struct bpf_program *prog; __u32 duration = 0; - int err, pkt_fd, attach_prog_fd; + int err, pkt_fd; - err = bpf_prog_test_load(tgt_name, BPF_PROG_TYPE_UNSPEC, + err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC, &pkt_obj, &pkt_fd); /* the target prog should load fine */ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", tgt_name, err, errno)) return; + opts.attach_prog_fd = pkt_fd; - freplace_obj = bpf_object__open_file(freplace_name, NULL); + freplace_obj = bpf_object__open_file(freplace_name, &opts); if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open")) goto out; - prog = bpf_object__next_program(freplace_obj, NULL); - err = bpf_program__set_attach_target(prog, pkt_fd, NULL); - ASSERT_OK(err, "freplace__set_attach_target"); - err = bpf_object__load(freplace_obj); if (CHECK(err, "freplace_obj_load", "err %d\n", err)) goto out; + prog = bpf_program__next(NULL, freplace_obj); freplace_link = bpf_program__attach_trace(prog); if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace")) goto out; - fmod_obj = bpf_object__open_file(fmod_ret_name, NULL); + opts.attach_prog_fd = bpf_program__fd(prog); + fmod_obj = bpf_object__open_file(fmod_ret_name, &opts); if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open")) goto out; - attach_prog_fd = bpf_program__fd(prog); - prog = bpf_object__next_program(fmod_obj, NULL); - err = bpf_program__set_attach_target(prog, attach_prog_fd, NULL); - ASSERT_OK(err, "fmod_ret_set_attach_target"); - err = bpf_object__load(fmod_obj); if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n")) goto out; @@ -339,32 +322,31 @@ static void test_func_sockmap_update(void) } static void test_obj_load_failure_common(const char *obj_file, - const char *target_obj_file) + const char *target_obj_file) + { /* * standalone test that asserts failure to load freplace prog * because of invalid return code. */ struct bpf_object *obj = NULL, *pkt_obj; - struct bpf_program *prog; int err, pkt_fd; __u32 duration = 0; - err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, + err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, &pkt_obj, &pkt_fd); /* the target prog should load fine */ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", target_obj_file, err, errno)) return; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .attach_prog_fd = pkt_fd, + ); - obj = bpf_object__open_file(obj_file, NULL); + obj = bpf_object__open_file(obj_file, &opts); if (!ASSERT_OK_PTR(obj, "obj_open")) goto close_prog; - prog = bpf_object__next_program(obj, NULL); - err = bpf_program__set_attach_target(prog, pkt_fd, NULL); - ASSERT_OK(err, "set_attach_target"); - /* It should fail to load the program */ err = bpf_object__load(obj); if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err)) @@ -389,8 +371,7 @@ static void test_func_map_prog_compatibility(void) "./test_attach_probe.o"); } -/* NOTE: affect other tests, must run in serial mode */ -void serial_test_fexit_bpf2bpf(void) +void test_fexit_bpf2bpf(void) { if (test__start_subtest("target_no_callees")) test_target_no_callees(); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c index f949647dbb..4e7f4b42ea 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -10,7 +10,7 @@ static int do_sleep(void *skel) { - struct fexit_sleep_lskel *fexit_skel = skel; + struct fexit_sleep *fexit_skel = skel; struct timespec ts1 = { .tv_nsec = 1 }; struct timespec ts2 = { .tv_sec = 10 }; @@ -25,16 +25,16 @@ static char child_stack[STACK_SIZE]; void test_fexit_sleep(void) { - struct fexit_sleep_lskel *fexit_skel = NULL; + struct fexit_sleep *fexit_skel = NULL; int wstatus, duration = 0; pid_t cpid; int err, fexit_cnt; - fexit_skel = fexit_sleep_lskel__open_and_load(); + fexit_skel = fexit_sleep__open_and_load(); if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) goto cleanup; - err = fexit_sleep_lskel__attach(fexit_skel); + err = fexit_sleep__attach(fexit_skel); if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto cleanup; @@ -60,7 +60,7 @@ void test_fexit_sleep(void) */ close(fexit_skel->progs.nanosleep_fentry.prog_fd); close(fexit_skel->progs.nanosleep_fexit.prog_fd); - fexit_sleep_lskel__detach(fexit_skel); + fexit_sleep__detach(fexit_skel); /* kill the thread to unwind sys_nanosleep stack through the trampoline */ kill(cpid, 9); @@ -78,5 +78,5 @@ void test_fexit_sleep(void) goto cleanup; cleanup: - fexit_sleep_lskel__destroy(fexit_skel); + fexit_sleep__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index e4cede6b4b..7c9b62e971 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -20,33 +20,34 @@ void test_fexit_stress(void) BPF_EXIT_INSN(), }; - LIBBPF_OPTS(bpf_prog_load_opts, trace_opts, + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_TRACING, + .license = "GPL", + .insns = trace_program, + .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), .expected_attach_type = BPF_TRACE_FEXIT, - .log_buf = error, - .log_size = sizeof(error), - ); + }; const struct bpf_insn skb_program[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - LIBBPF_OPTS(bpf_prog_load_opts, skb_opts, - .log_buf = error, - .log_size = sizeof(error), - ); + struct bpf_load_program_attr skb_load_attr = { + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .license = "GPL", + .insns = skb_program, + .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), + }; err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", - trace_opts.expected_attach_type); + load_attr.expected_attach_type); if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err)) goto out; - trace_opts.attach_btf_id = err; + load_attr.attach_btf_id = err; for (i = 0; i < CNT; i++) { - fexit_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL", - trace_program, - sizeof(trace_program) / sizeof(struct bpf_insn), - &trace_opts); + fexit_fd[i] = bpf_load_program_xattr(&load_attr, error, sizeof(error)); if (CHECK(fexit_fd[i] < 0, "fexit loaded", "failed: %d errno %d\n", fexit_fd[i], errno)) goto out; @@ -56,9 +57,7 @@ void test_fexit_stress(void) goto out; } - filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), - &skb_opts); + filter_fd = bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", filter_fd, errno)) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index d4887d8bb3..af3dba7267 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -3,19 +3,19 @@ #include #include "fexit_test.lskel.h" -static int fexit_test(struct fexit_test_lskel *fexit_skel) +static int fexit_test(struct fexit_test *fexit_skel) { int err, prog_fd, i; __u32 duration = 0, retval; int link_fd; __u64 *result; - err = fexit_test_lskel__attach(fexit_skel); + err = fexit_test__attach(fexit_skel); if (!ASSERT_OK(err, "fexit_attach")) return err; /* Check that already linked program can't be attached again. */ - link_fd = fexit_test_lskel__test1__attach(fexit_skel); + link_fd = fexit_test__test1__attach(fexit_skel); if (!ASSERT_LT(link_fd, 0, "fexit_attach_link")) return -1; @@ -31,7 +31,7 @@ static int fexit_test(struct fexit_test_lskel *fexit_skel) return -1; } - fexit_test_lskel__detach(fexit_skel); + fexit_test__detach(fexit_skel); /* zero results for re-attach test */ memset(fexit_skel->bss, 0, sizeof(*fexit_skel->bss)); @@ -40,10 +40,10 @@ static int fexit_test(struct fexit_test_lskel *fexit_skel) void test_fexit_test(void) { - struct fexit_test_lskel *fexit_skel = NULL; + struct fexit_test *fexit_skel = NULL; int err; - fexit_skel = fexit_test_lskel__open_and_load(); + fexit_skel = fexit_test__open_and_load(); if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) goto cleanup; @@ -55,5 +55,5 @@ void test_fexit_test(void) ASSERT_OK(err, "fexit_second_attach"); cleanup: - fexit_test_lskel__destroy(fexit_skel); + fexit_test__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index ac54e3f91d..225714f71a 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -458,9 +458,9 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array) return -1; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i); + snprintf(prog_name, sizeof(prog_name), "flow_dissector/%i", i); - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (!prog) return -1; diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c index 93ac3f2822..0e8a4d2f02 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c @@ -2,7 +2,7 @@ #include #include -void serial_test_flow_dissector_load_bytes(void) +void test_flow_dissector_load_bytes(void) { struct bpf_flow_keys flow_keys; __u32 duration = 0, retval, size; @@ -30,7 +30,7 @@ void serial_test_flow_dissector_load_bytes(void) /* make sure bpf_skb_load_bytes is not allowed from skb-less context */ - fd = bpf_test_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, + fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); CHECK(fd < 0, "flow_dissector-bpf_skb_load_bytes-load", diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index 7c79462d27..3931ede5c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -47,9 +47,9 @@ static int load_prog(enum bpf_prog_type type) }; int fd; - fd = bpf_test_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); if (CHECK_FAIL(fd < 0)) - perror("bpf_test_load_program"); + perror("bpf_load_program"); return fd; } @@ -628,7 +628,7 @@ static void run_tests(int netns) } } -void serial_test_flow_dissector_reattach(void) +void test_flow_dissector_reattach(void) { int err, new_net, saved_net; diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c index e834a01de1..522237aa44 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c @@ -24,19 +24,13 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size) { bool good_kern_stack = false, good_user_stack = false; const char *nonjit_func = "___bpf_prog_run"; - /* perfbuf-submitted data is 4-byte aligned, but we need 8-byte - * alignment, so copy data into a local variable, for simplicity - */ - struct get_stack_trace_t e; + struct get_stack_trace_t *e = data; int i, num_stack; static __u64 cnt; struct ksym *ks; cnt++; - memset(&e, 0, sizeof(e)); - memcpy(&e, data, size <= sizeof(e) ? size : sizeof(e)); - if (size < sizeof(struct get_stack_trace_t)) { __u64 *raw_data = data; bool found = false; @@ -63,19 +57,19 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size) good_user_stack = true; } } else { - num_stack = e.kern_stack_size / sizeof(__u64); + num_stack = e->kern_stack_size / sizeof(__u64); if (env.jit_enabled) { good_kern_stack = num_stack > 0; } else { for (i = 0; i < num_stack; i++) { - ks = ksym_search(e.kern_stack[i]); + ks = ksym_search(e->kern_stack[i]); if (ks && (strcmp(ks->name, nonjit_func) == 0)) { good_kern_stack = true; break; } } } - if (e.user_stack_size > 0 && e.user_stack_buildid_size > 0) + if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0) good_user_stack = true; } @@ -89,8 +83,9 @@ void test_get_stack_raw_tp(void) { const char *file = "./test_get_stack_rawtp.o"; const char *file_err = "./test_get_stack_rawtp_err.o"; - const char *prog_name = "bpf_prog1"; + const char *prog_name = "raw_tracepoint/sys_enter"; int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP; + struct perf_buffer_opts pb_opts = {}; struct perf_buffer *pb = NULL; struct bpf_link *link = NULL; struct timespec tv = {0, 10}; @@ -99,15 +94,15 @@ void test_get_stack_raw_tp(void) struct bpf_map *map; cpu_set_t cpu_set; - err = bpf_prog_test_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); if (CHECK(err >= 0, "prog_load raw tp", "err %d errno %d\n", err, errno)) return; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) return; - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name)) goto close_prog; @@ -129,8 +124,8 @@ void test_get_stack_raw_tp(void) if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto close_prog; - pb = perf_buffer__new(bpf_map__fd(map), 8, get_stack_print_output, - NULL, NULL, NULL); + pb_opts.sample_cb = get_stack_print_output; + pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts); if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index 9da131b32e..9efa7e50ea 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -103,18 +103,11 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration) static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration) { int err = -ENOMEM, map_fd, zero = 0; - struct bpf_map *map, *map2; + struct bpf_map *map; __u8 *buff; map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); - if (!ASSERT_OK_PTR(map, "map")) - return; - if (!ASSERT_TRUE(bpf_map__is_internal(map), "is_internal")) - return; - - /* ensure we can lookup internal maps by their ELF names */ - map2 = bpf_object__find_map_by_name(obj, ".rodata"); - if (!ASSERT_EQ(map, map2, "same_maps")) + if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) return; map_fd = bpf_map__fd(map); @@ -136,7 +129,7 @@ void test_global_data(void) struct bpf_object *obj; int err, prog_fd; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK(err, "load program", "error %d loading %s\n", err, file)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c index 1db86eab10..ee46b11f1f 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c @@ -16,7 +16,7 @@ void test_global_data_init(void) if (CHECK_FAIL(err)) return; - map = bpf_object__find_map_by_name(obj, ".rodata"); + map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c index 93a2439237..8bcc286910 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_func_args.c +++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c @@ -44,7 +44,7 @@ void test_global_func_args(void) struct bpf_object *obj; int err, prog_fd; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); if (CHECK(err, "load program", "error %d loading %s\n", err, file)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index ce10d2fc3a..ddfb6bf971 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include "kfree_skb.skel.h" struct meta { int ifindex; @@ -49,8 +48,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) *(bool *)ctx = true; } -/* TODO: fix kernel panic caused by this test in parallel mode */ -void serial_test_kfree_skb(void) +void test_kfree_skb(void) { struct __sk_buff skb = {}; struct bpf_prog_test_run_attr tattr = { @@ -59,43 +57,63 @@ void serial_test_kfree_skb(void) .ctx_in = &skb, .ctx_size_in = sizeof(skb), }; - struct kfree_skb *skel = NULL; - struct bpf_link *link; - struct bpf_object *obj; + struct bpf_prog_load_attr attr = { + .file = "./kfree_skb.o", + }; + + struct bpf_link *link = NULL, *link_fentry = NULL, *link_fexit = NULL; + struct bpf_map *perf_buf_map, *global_data; + struct bpf_program *prog, *fentry, *fexit; + struct bpf_object *obj, *obj2 = NULL; + struct perf_buffer_opts pb_opts = {}; struct perf_buffer *pb = NULL; - int err; + int err, kfree_skb_fd; bool passed = false; __u32 duration = 0; const int zero = 0; bool test_ok[2]; - err = bpf_prog_test_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &tattr.prog_fd); if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) return; - skel = kfree_skb__open_and_load(); - if (!ASSERT_OK_PTR(skel, "kfree_skb_skel")) + err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) goto close_prog; - link = bpf_program__attach_raw_tracepoint(skel->progs.trace_kfree_skb, NULL); + prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb"); + if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n")) + goto close_prog; + fentry = bpf_object__find_program_by_title(obj2, "fentry/eth_type_trans"); + if (CHECK(!fentry, "find_prog", "prog eth_type_trans not found\n")) + goto close_prog; + fexit = bpf_object__find_program_by_title(obj2, "fexit/eth_type_trans"); + if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n")) + goto close_prog; + + global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss"); + if (CHECK(!global_data, "find global data", "not found\n")) + goto close_prog; + + link = bpf_program__attach_raw_tracepoint(prog, NULL); if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto close_prog; - skel->links.trace_kfree_skb = link; - - link = bpf_program__attach_trace(skel->progs.fentry_eth_type_trans); - if (!ASSERT_OK_PTR(link, "attach fentry")) + link_fentry = bpf_program__attach_trace(fentry); + if (!ASSERT_OK_PTR(link_fentry, "attach fentry")) goto close_prog; - skel->links.fentry_eth_type_trans = link; - - link = bpf_program__attach_trace(skel->progs.fexit_eth_type_trans); - if (!ASSERT_OK_PTR(link, "attach fexit")) + link_fexit = bpf_program__attach_trace(fexit); + if (!ASSERT_OK_PTR(link_fexit, "attach fexit")) + goto close_prog; + + perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map"); + if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n")) goto close_prog; - skel->links.fexit_eth_type_trans = link; /* set up perf buffer */ - pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, - on_sample, NULL, &passed, NULL); + pb_opts.sample_cb = on_sample; + pb_opts.ctx = &passed; + pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts); if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto close_prog; @@ -116,7 +134,7 @@ void serial_test_kfree_skb(void) */ ASSERT_TRUE(passed, "passed"); - err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.bss), &zero, test_ok); + err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok); if (CHECK(err, "get_result", "failed to get output data: %d\n", err)) goto close_prog; @@ -124,6 +142,9 @@ void serial_test_kfree_skb(void) CHECK_FAIL(!test_ok[0] || !test_ok[1]); close_prog: perf_buffer__free(pb); + bpf_link__destroy(link); + bpf_link__destroy(link_fentry); + bpf_link__destroy(link_fexit); bpf_object__close(obj); - kfree_skb__destroy(skel); + bpf_object__close(obj2); } diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 7d7445ccc1..9611f2bc50 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -4,14 +4,13 @@ #include #include "kfunc_call_test.lskel.h" #include "kfunc_call_test_subprog.skel.h" -#include "kfunc_call_test_subprog.lskel.h" static void test_main(void) { - struct kfunc_call_test_lskel *skel; + struct kfunc_call_test *skel; int prog_fd, retval, err; - skel = kfunc_call_test_lskel__open_and_load(); + skel = kfunc_call_test__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel")) return; @@ -27,7 +26,7 @@ static void test_main(void) ASSERT_OK(err, "bpf_prog_test_run(test2)"); ASSERT_EQ(retval, 3, "test2-retval"); - kfunc_call_test_lskel__destroy(skel); + kfunc_call_test__destroy(skel); } static void test_subprog(void) @@ -50,26 +49,6 @@ static void test_subprog(void) kfunc_call_test_subprog__destroy(skel); } -static void test_subprog_lskel(void) -{ - struct kfunc_call_test_subprog_lskel *skel; - int prog_fd, retval, err; - - skel = kfunc_call_test_subprog_lskel__open_and_load(); - if (!ASSERT_OK_PTR(skel, "skel")) - return; - - prog_fd = skel->progs.kfunc_call_test1.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, (__u32 *)&retval, NULL); - ASSERT_OK(err, "bpf_prog_test_run(test1)"); - ASSERT_EQ(retval, 10, "test1-retval"); - ASSERT_NEQ(skel->data->active_res, -1, "active_res"); - ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res"); - - kfunc_call_test_subprog_lskel__destroy(skel); -} - void test_kfunc_call(void) { if (test__start_subtest("main")) @@ -77,7 +56,4 @@ void test_kfunc_call(void) if (test__start_subtest("subprog")) test_subprog(); - - if (test__start_subtest("subprog_lskel")) - test_subprog_lskel(); } diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index f6933b06da..cf3acfa5a9 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -7,8 +7,6 @@ #include "test_ksyms_btf.skel.h" #include "test_ksyms_btf_null_check.skel.h" #include "test_ksyms_weak.skel.h" -#include "test_ksyms_weak.lskel.h" -#include "test_ksyms_btf_write_check.skel.h" static int duration; @@ -91,11 +89,11 @@ static void test_weak_syms(void) int err; skel = test_ksyms_weak__open_and_load(); - if (!ASSERT_OK_PTR(skel, "test_ksyms_weak__open_and_load")) + if (CHECK(!skel, "test_ksyms_weak__open_and_load", "failed\n")) return; err = test_ksyms_weak__attach(skel); - if (!ASSERT_OK(err, "test_ksyms_weak__attach")) + if (CHECK(err, "test_ksyms_weak__attach", "skeleton attach failed: %d\n", err)) goto cleanup; /* trigger tracepoint */ @@ -111,43 +109,6 @@ static void test_weak_syms(void) test_ksyms_weak__destroy(skel); } -static void test_weak_syms_lskel(void) -{ - struct test_ksyms_weak_lskel *skel; - struct test_ksyms_weak_lskel__data *data; - int err; - - skel = test_ksyms_weak_lskel__open_and_load(); - if (!ASSERT_OK_PTR(skel, "test_ksyms_weak_lskel__open_and_load")) - return; - - err = test_ksyms_weak_lskel__attach(skel); - if (!ASSERT_OK(err, "test_ksyms_weak_lskel__attach")) - goto cleanup; - - /* trigger tracepoint */ - usleep(1); - - data = skel->data; - ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym"); - ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym"); - ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym"); - ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym"); - -cleanup: - test_ksyms_weak_lskel__destroy(skel); -} - -static void test_write_check(void) -{ - struct test_ksyms_btf_write_check *skel; - - skel = test_ksyms_btf_write_check__open_and_load(); - ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n"); - - test_ksyms_btf_write_check__destroy(skel); -} - void test_ksyms_btf(void) { int percpu_datasec; @@ -175,10 +136,4 @@ void test_ksyms_btf(void) if (test__start_subtest("weak_ksyms")) test_weak_syms(); - - if (test__start_subtest("weak_ksyms_lskel")) - test_weak_syms_lskel(); - - if (test__start_subtest("write_check")) - test_write_check(); } diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c index d490ad80ec..2cd5cded54 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c @@ -2,61 +2,30 @@ /* Copyright (c) 2021 Facebook */ #include -#include +#include +#include #include "test_ksyms_module.lskel.h" -#include "test_ksyms_module.skel.h" -void test_ksyms_module_lskel(void) -{ - struct test_ksyms_module_lskel *skel; - int retval; - int err; - - if (!env.has_testmod) { - test__skip(); - return; - } - - skel = test_ksyms_module_lskel__open_and_load(); - if (!ASSERT_OK_PTR(skel, "test_ksyms_module_lskel__open_and_load")) - return; - err = bpf_prog_test_run(skel->progs.load.prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, (__u32 *)&retval, NULL); - if (!ASSERT_OK(err, "bpf_prog_test_run")) - goto cleanup; - ASSERT_EQ(retval, 0, "retval"); - ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); -cleanup: - test_ksyms_module_lskel__destroy(skel); -} - -void test_ksyms_module_libbpf(void) -{ - struct test_ksyms_module *skel; - int retval, err; - - if (!env.has_testmod) { - test__skip(); - return; - } - - skel = test_ksyms_module__open_and_load(); - if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open")) - return; - err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4, - sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); - if (!ASSERT_OK(err, "bpf_prog_test_run")) - goto cleanup; - ASSERT_EQ(retval, 0, "retval"); - ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); -cleanup: - test_ksyms_module__destroy(skel); -} +static int duration; void test_ksyms_module(void) { - if (test__start_subtest("lskel")) - test_ksyms_module_lskel(); - if (test__start_subtest("libbpf")) - test_ksyms_module_libbpf(); + struct test_ksyms_module* skel; + int err; + + skel = test_ksyms_module__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + err = test_ksyms_module__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + usleep(1); + + ASSERT_EQ(skel->bss->triggered, true, "triggered"); + ASSERT_EQ(skel->bss->out_mod_ksym_global, 123, "global_ksym_val"); + +cleanup: + test_ksyms_module__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index 540ef28fab..8073105548 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -30,7 +30,7 @@ static void test_l4lb(const char *file) char buf[128]; u32 *magic = (u32 *)buf; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c index 4e0b2ec057..5a2a689dbb 100644 --- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c +++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c @@ -27,7 +27,7 @@ void test_load_bytes_relative(void) if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; - err = bpf_prog_test_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB, + err = bpf_prog_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); if (CHECK_FAIL(err)) goto close_server_fd; diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c index 23d19e9cf2..ce17b1ed87 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c @@ -53,9 +53,9 @@ void test_map_lock(void) int err = 0, key = 0, i; void *ret; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); if (CHECK_FAIL(err)) { - printf("test_map_lock:bpf_prog_test_load errno %d\n", errno); + printf("test_map_lock:bpf_prog_load errno %d\n", errno); goto close_prog; } map_fd[0] = bpf_find_map(__func__, obj, "hash_map"); diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c index 273725504f..4972f92205 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c @@ -4,29 +4,31 @@ #include #include -#include "map_ptr_kern.lskel.h" +#include "map_ptr_kern.skel.h" void test_map_ptr(void) { - struct map_ptr_kern_lskel *skel; + struct map_ptr_kern *skel; __u32 duration = 0, retval; char buf[128]; int err; int page_size = getpagesize(); - skel = map_ptr_kern_lskel__open(); + skel = map_ptr_kern__open(); if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->maps.m_ringbuf.max_entries = page_size; + err = bpf_map__set_max_entries(skel->maps.m_ringbuf, page_size); + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) + goto cleanup; - err = map_ptr_kern_lskel__load(skel); + err = map_ptr_kern__load(skel); if (!ASSERT_OK(err, "skel_load")) goto cleanup; skel->bss->page_size = page_size; - err = bpf_prog_test_run(skel->progs.cg_skb.prog_fd, 1, &pkt_v4, + err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4, sizeof(pkt_v4), buf, NULL, &retval, NULL); if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno)) @@ -37,5 +39,5 @@ void test_map_ptr(void) goto cleanup; cleanup: - map_ptr_kern_lskel__destroy(skel); + map_ptr_kern__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c index eb2feaac81..3c85247f96 100644 --- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c @@ -541,7 +541,7 @@ static void run_test(struct migrate_reuseport_test_case *test_case, } } -void serial_test_migrate_reuseport(void) +void test_migrate_reuseport(void) { struct test_migrate_reuseport *skel; int i; diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c index b772fe30ce..97fec70c60 100644 --- a/tools/testing/selftests/bpf/prog_tests/modify_return.c +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -53,8 +53,7 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) modify_return__destroy(skel); } -/* TODO: conflict with get_func_ip_test */ -void serial_test_modify_return(void) +void test_modify_return(void) { run_test(0 /* input_retval */, 1 /* want_side_effect */, diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 6d0e50dcf4..d85a69b7ce 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -2,33 +2,46 @@ /* Copyright (c) 2020 Facebook */ #include -#include #include "test_module_attach.skel.h" static int duration; -static int trigger_module_test_writable(int *val) +static int trigger_module_test_read(int read_sz) { int fd, err; - char buf[65]; - ssize_t rd; - fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY); + fd = open("/sys/kernel/bpf_testmod", O_RDONLY); err = -errno; - if (!ASSERT_GE(fd, 0, "testmode_file_open")) + if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) return err; - rd = read(fd, buf, sizeof(buf) - 1); + read(fd, NULL, read_sz); + close(fd); + + return 0; +} + +static int trigger_module_test_write(int write_sz) +{ + int fd, err; + char *buf = malloc(write_sz); + + if (!buf) + return -ENOMEM; + + memset(buf, 'a', write_sz); + buf[write_sz-1] = '\0'; + + fd = open("/sys/kernel/bpf_testmod", O_WRONLY); err = -errno; - if (!ASSERT_GT(rd, 0, "testmod_file_rd_val")) { - close(fd); + if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) { + free(buf); return err; } - buf[rd] = '\0'; - *val = strtol(buf, NULL, 0); + write(fd, buf, write_sz); close(fd); - + free(buf); return 0; } @@ -45,7 +58,6 @@ void test_module_attach(void) struct test_module_attach__bss *bss; struct bpf_link *link; int err; - int writable_val = 0; skel = test_module_attach__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) @@ -78,14 +90,6 @@ void test_module_attach(void) ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet"); ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret"); - bss->raw_tp_writable_bare_early_ret = true; - bss->raw_tp_writable_bare_out_val = 0xf1f2f3f4; - ASSERT_OK(trigger_module_test_writable(&writable_val), - "trigger_writable"); - ASSERT_EQ(bss->raw_tp_writable_bare_in_val, 1024, "writable_test_in"); - ASSERT_EQ(bss->raw_tp_writable_bare_out_val, writable_val, - "writable_test_out"); - test_module_attach__detach(skel); /* attach fentry/fexit and make sure it get's module reference */ diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c index 954964f0ac..6ede48bde9 100644 --- a/tools/testing/selftests/bpf/prog_tests/netcnt.c +++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c @@ -8,7 +8,7 @@ #define CG_NAME "/netcnt" -void serial_test_netcnt(void) +void test_netcnt(void) { union percpu_net_cnt *percpu_netcnt = NULL; struct bpf_cgroup_storage_key key; diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 24d493482f..2535788e13 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -78,8 +78,7 @@ static void test_ns_current_pid_tgid_new_ns(void) return; } -/* TODO: use a different tracepoint */ -void serial_test_ns_current_pid_tgid(void) +void test_ns_current_pid_tgid(void) { if (test__start_subtest("ns_current_pid_tgid_root_ns")) test_current_pid_tgid(NULL); diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 5fc2b3a071..7daaaab136 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -43,10 +43,10 @@ int trigger_on_cpu(int cpu) return 0; } -void serial_test_perf_buffer(void) +void test_perf_buffer(void) { - int err, on_len, nr_on_cpus = 0, nr_cpus, i, j; - int zero = 0, my_pid = getpid(); + int err, on_len, nr_on_cpus = 0, nr_cpus, i; + struct perf_buffer_opts pb_opts = {}; struct test_perf_buffer *skel; cpu_set_t cpu_seen; struct perf_buffer *pb; @@ -71,18 +71,15 @@ void serial_test_perf_buffer(void) if (CHECK(!skel, "skel_load", "skeleton open/load failed\n")) goto out_close; - err = bpf_map_update_elem(bpf_map__fd(skel->maps.my_pid_map), &zero, &my_pid, 0); - if (!ASSERT_OK(err, "my_pid_update")) - goto out_close; - /* attach probe */ err = test_perf_buffer__attach(skel); if (CHECK(err, "attach_kprobe", "err %d\n", err)) goto out_close; /* set up perf buffer */ - pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, - on_sample, NULL, &cpu_seen, NULL); + pb_opts.sample_cb = on_sample; + pb_opts.ctx = &cpu_seen; + pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts); if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto out_close; @@ -114,15 +111,15 @@ void serial_test_perf_buffer(void) "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_on_cpus)) goto out_close; - for (i = 0, j = 0; i < nr_cpus; i++) { + for (i = 0; i < nr_cpus; i++) { if (i >= on_len || !online[i]) continue; - fd = perf_buffer__buffer_fd(pb, j); + fd = perf_buffer__buffer_fd(pb, i); CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd); last_fd = fd; - err = perf_buffer__consume_buffer(pb, j); + err = perf_buffer__consume_buffer(pb, i); if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err)) goto out_close; @@ -130,13 +127,12 @@ void serial_test_perf_buffer(void) if (trigger_on_cpu(i)) goto out_close; - err = perf_buffer__consume_buffer(pb, j); - if (CHECK(err, "consume_buf", "cpu %d, err %d\n", j, err)) + err = perf_buffer__consume_buffer(pb, i); + if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err)) goto out_close; if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i)) goto out_close; - j++; } out_free_pb: diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c index ede07344f2..b1abd0c466 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_link.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c @@ -23,8 +23,7 @@ static void burn_cpu(void) ++j; } -/* TODO: often fails in concurrent mode */ -void serial_test_perf_link(void) +void test_perf_link(void) { struct test_perf_link *skel = NULL; struct perf_event_attr attr; diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c index 31c09ba577..d4b953ae34 100644 --- a/tools/testing/selftests/bpf/prog_tests/pinning.c +++ b/tools/testing/selftests/bpf/prog_tests/pinning.c @@ -241,8 +241,8 @@ void test_pinning(void) goto out; } - map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(__u32), - sizeof(__u64), 1, NULL); + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32), + sizeof(__u64), 1, 0); if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd)) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c index 6628710ec3..44b514fabc 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c @@ -9,7 +9,7 @@ void test_pkt_access(void) __u32 duration, retval; int err, prog_fd; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c index c9d2d6a1bf..939015cd6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c @@ -9,7 +9,7 @@ void test_pkt_md_access(void) __u32 duration, retval; int err, prog_fd; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index abf890d066..95bd120973 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -1,10 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include -/* TODO: corrupts other tests uses connect() */ -void serial_test_probe_user(void) +void test_probe_user(void) { - const char *prog_name = "handle_sys_connect"; + const char *prog_name = "kprobe/__sys_connect"; const char *obj_file = "./test_probe_user.o"; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, ); int err, results_map_fd, sock_fd, duration = 0; @@ -19,7 +18,7 @@ void serial_test_probe_user(void) if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; - kprobe_prog = bpf_object__find_program_by_name(obj, prog_name); + kprobe_prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK(!kprobe_prog, "find_probe", "prog '%s' not found\n", prog_name)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index b9822f914e..f47e7b1cb3 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c @@ -14,7 +14,7 @@ static void test_queue_stack_map_by_type(int type) int i, err, prog_fd, map_in_fd, map_out_fd; char file[32], buf[128]; struct bpf_object *obj; - struct iphdr iph; + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); /* Fill test values to be used */ for (i = 0; i < MAP_SIZE; i++) @@ -27,7 +27,7 @@ static void test_queue_stack_map_by_type(int type) else return; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -60,17 +60,15 @@ static void test_queue_stack_map_by_type(int type) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - if (err || retval || size != sizeof(pkt_v4)) - break; - memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); - if (iph.daddr != val) + if (err || retval || size != sizeof(pkt_v4) || + iph->daddr != val) break; } - CHECK(err || retval || size != sizeof(pkt_v4) || iph.daddr != val, + CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val, "bpf_map_pop_elem", "err %d errno %d retval %d size %d iph->daddr %u\n", - err, errno, retval, size, iph.daddr); + err, errno, retval, size, iph->daddr); /* Queue is empty, program should return TC_ACT_SHOT */ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c index e2f1445b0e..9807336a30 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c @@ -18,15 +18,15 @@ void test_raw_tp_writable_reject_nbd_invalid(void) BPF_EXIT_INSN(), }; - LIBBPF_OPTS(bpf_prog_load_opts, opts, + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .license = "GPL v2", + .insns = program, + .insns_cnt = sizeof(program) / sizeof(struct bpf_insn), .log_level = 2, - .log_buf = error, - .log_size = sizeof(error), - ); + }; - bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2", - program, sizeof(program) / sizeof(struct bpf_insn), - &opts); + bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load", "failed: %d errno %d\n", bpf_fd, errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c index 239baccabc..5c45424cac 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c @@ -3,8 +3,7 @@ #include #include -/* NOTE: conflict with other tests. */ -void serial_test_raw_tp_writable_test_run(void) +void test_raw_tp_writable_test_run(void) { __u32 duration = 0; char error[4096]; @@ -17,15 +16,15 @@ void serial_test_raw_tp_writable_test_run(void) BPF_EXIT_INSN(), }; - LIBBPF_OPTS(bpf_prog_load_opts, trace_opts, + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .license = "GPL v2", + .insns = trace_program, + .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), .log_level = 2, - .log_buf = error, - .log_size = sizeof(error), - ); + }; - int bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2", - trace_program, sizeof(trace_program) / sizeof(struct bpf_insn), - &trace_opts); + int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded", "failed: %d errno %d\n", bpf_fd, errno)) return; @@ -35,14 +34,15 @@ void serial_test_raw_tp_writable_test_run(void) BPF_EXIT_INSN(), }; - LIBBPF_OPTS(bpf_prog_load_opts, skb_opts, - .log_buf = error, - .log_size = sizeof(error), - ); + struct bpf_load_program_attr skb_load_attr = { + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .license = "GPL v2", + .insns = skb_program, + .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), + }; - int filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL v2", - skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), - &skb_opts); + int filter_fd = + bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", filter_fd, errno)) goto out_bpffd; diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c index fd5d2ddfb0..5f9eaa3ab5 100644 --- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c +++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c @@ -37,7 +37,7 @@ void test_rdonly_maps(void) if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) goto cleanup; - bss_map = bpf_object__find_map_by_name(obj, ".bss"); + bss_map = bpf_object__find_map_by_name(obj, "test_rdo.bss"); if (CHECK(!bss_map, "find_bss_map", "failed\n")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c index f3af2627b5..0e378d63fe 100644 --- a/tools/testing/selftests/bpf/prog_tests/recursion.c +++ b/tools/testing/selftests/bpf/prog_tests/recursion.c @@ -20,18 +20,18 @@ void test_recursion(void) goto out; ASSERT_EQ(skel->bss->pass1, 0, "pass1 == 0"); - bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); ASSERT_EQ(skel->bss->pass1, 1, "pass1 == 1"); - bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); ASSERT_EQ(skel->bss->pass1, 2, "pass1 == 2"); ASSERT_EQ(skel->bss->pass2, 0, "pass2 == 0"); - bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); ASSERT_EQ(skel->bss->pass2, 1, "pass2 == 1"); - bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2"); - err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_delete), + err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_lookup), &prog_info, &prog_info_len); if (!ASSERT_OK(err, "get_prog_info")) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index 873323fb18..4e91f4d646 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -1,21 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include -static void toggle_object_autoload_progs(const struct bpf_object *obj, - const char *name_load) -{ - struct bpf_program *prog; - - bpf_object__for_each_program(prog, obj) { - const char *name = bpf_program__name(prog); - - if (!strcmp(name_load, name)) - bpf_program__set_autoload(prog, true); - else - bpf_program__set_autoload(prog, false); - } -} - void test_reference_tracking(void) { const char *file = "test_sk_lookup_kern.o"; @@ -24,49 +9,44 @@ void test_reference_tracking(void) .object_name = obj_name, .relaxed_maps = true, ); - struct bpf_object *obj_iter, *obj = NULL; + struct bpf_object *obj; struct bpf_program *prog; __u32 duration = 0; int err = 0; - obj_iter = bpf_object__open_file(file, &open_opts); - if (!ASSERT_OK_PTR(obj_iter, "obj_iter_open_file")) + obj = bpf_object__open_file(file, &open_opts); + if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; - if (CHECK(strcmp(bpf_object__name(obj_iter), obj_name), "obj_name", + if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name", "wrong obj name '%s', expected '%s'\n", - bpf_object__name(obj_iter), obj_name)) + bpf_object__name(obj), obj_name)) goto cleanup; - bpf_object__for_each_program(prog, obj_iter) { - const char *name; + bpf_object__for_each_program(prog, obj) { + const char *title; - name = bpf_program__name(prog); - if (!test__start_subtest(name)) + /* Ignore .text sections */ + title = bpf_program__section_name(prog); + if (strstr(title, ".text") != NULL) continue; - obj = bpf_object__open_file(file, &open_opts); - if (!ASSERT_OK_PTR(obj, "obj_open_file")) - goto cleanup; + if (!test__start_subtest(title)) + continue; - toggle_object_autoload_progs(obj, name); /* Expect verifier failure if test name has 'err' */ - if (strncmp(name, "err_", sizeof("err_") - 1) == 0) { + if (strstr(title, "err_") != NULL) { libbpf_print_fn_t old_print_fn; old_print_fn = libbpf_set_print(NULL); - err = !bpf_object__load(obj); + err = !bpf_program__load(prog, "GPL", 0); libbpf_set_print(old_print_fn); } else { - err = bpf_object__load(obj); + err = bpf_program__load(prog, "GPL", 0); } - ASSERT_OK(err, name); - - bpf_object__close(obj); - obj = NULL; + CHECK(err, title, "\n"); } cleanup: bpf_object__close(obj); - bpf_object__close(obj_iter); } diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index f4a13d9dd5..f62361306f 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -106,9 +106,9 @@ static int resolve_symbols(void) "Failed to load BTF from btf_data.o\n")) return -1; - nr = btf__type_cnt(btf); + nr = btf__get_nr_types(btf); - for (type_id = 1; type_id < nr; type_id++) { + for (type_id = 1; type_id <= nr; type_id++) { if (__resolve_symbol(btf, type_id)) break; } @@ -117,14 +117,14 @@ static int resolve_symbols(void) return 0; } -void test_resolve_btfids(void) +int test_resolve_btfids(void) { __u32 *test_list, *test_lists[] = { test_list_local, test_list_global }; unsigned int i, j; int ret = 0; if (resolve_symbols()) - return; + return -1; /* Check BTF_ID_LIST(test_list_local) and * BTF_ID_LIST_GLOBAL(test_list_global) IDs @@ -138,7 +138,7 @@ void test_resolve_btfids(void) test_symbols[i].name, test_list[i], test_symbols[i].id); if (ret) - return; + return ret; } } @@ -161,7 +161,9 @@ void test_resolve_btfids(void) if (i > 0) { if (!ASSERT_LE(test_set.ids[i - 1], test_set.ids[i], "sort_check")) - return; + return -1; } } + + return ret; } diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 9a80fe8a64..4706cee843 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -58,7 +58,7 @@ static int process_sample(void *ctx, void *data, size_t len) } } -static struct test_ringbuf_lskel *skel; +static struct test_ringbuf *skel; static struct ring_buffer *ringbuf; static void trigger_samples() @@ -90,13 +90,13 @@ void test_ringbuf(void) int page_size = getpagesize(); void *mmap_ptr, *tmp_ptr; - skel = test_ringbuf_lskel__open(); + skel = test_ringbuf__open(); if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; skel->maps.ringbuf.max_entries = page_size; - err = test_ringbuf_lskel__load(skel); + err = test_ringbuf__load(skel); if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) goto cleanup; @@ -154,7 +154,7 @@ void test_ringbuf(void) if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n")) goto cleanup; - err = test_ringbuf_lskel__attach(skel); + err = test_ringbuf__attach(skel); if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err)) goto cleanup; @@ -292,8 +292,8 @@ void test_ringbuf(void) CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n", 1L, skel->bss->discarded); - test_ringbuf_lskel__detach(skel); + test_ringbuf__detach(skel); cleanup: ring_buffer__free(ringbuf); - test_ringbuf_lskel__destroy(skel); + test_ringbuf__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index e945195b24..167cd8a2ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -62,8 +62,8 @@ void test_ringbuf_multi(void) if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) goto cleanup; - proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL); - if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n")) + proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0); + if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n")) goto cleanup; err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 1cbd8cd640..4efd337d6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -18,6 +18,7 @@ #include #include #include +#include "bpf_rlimit.h" #include "bpf_util.h" #include "test_progs.h" @@ -65,20 +66,29 @@ static union sa46 { static int create_maps(enum bpf_map_type inner_type) { - LIBBPF_OPTS(bpf_map_create_opts, opts); + struct bpf_create_map_attr attr = {}; inner_map_type = inner_type; /* Creating reuseport_array */ - reuseport_array = bpf_map_create(inner_type, "reuseport_array", - sizeof(__u32), sizeof(__u32), REUSEPORT_ARRAY_SIZE, NULL); + attr.name = "reuseport_array"; + attr.map_type = inner_type; + attr.key_size = sizeof(__u32); + attr.value_size = sizeof(__u32); + attr.max_entries = REUSEPORT_ARRAY_SIZE; + + reuseport_array = bpf_create_map_xattr(&attr); RET_ERR(reuseport_array < 0, "creating reuseport_array", "reuseport_array:%d errno:%d\n", reuseport_array, errno); /* Creating outer_map */ - opts.inner_map_fd = reuseport_array; - outer_map = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer_map", - sizeof(__u32), sizeof(__u32), 1, &opts); + attr.name = "outer_map"; + attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS; + attr.key_size = sizeof(__u32); + attr.value_size = sizeof(__u32); + attr.max_entries = 1; + attr.inner_map_fd = reuseport_array; + outer_map = bpf_create_map_xattr(&attr); RET_ERR(outer_map < 0, "creating outer_map", "outer_map:%d errno:%d\n", outer_map, errno); @@ -104,7 +114,7 @@ static int prepare_bpf_obj(void) err = bpf_object__load(obj); RET_ERR(err, "load bpf_object", "err:%d\n", err); - prog = bpf_object__next_program(obj, NULL); + prog = bpf_program__next(NULL, obj); RET_ERR(!prog, "get first bpf_program", "!prog\n"); select_by_skb_data_prog = bpf_program__fd(prog); RET_ERR(select_by_skb_data_prog < 0, "get prog fd", @@ -848,7 +858,7 @@ void test_map_type(enum bpf_map_type mt) cleanup(); } -void serial_test_select_reuseport(void) +void test_select_reuseport(void) { saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL); if (saved_tcp_fo < 0) diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c index 15dacfcfaa..189a34a7ad 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c @@ -25,8 +25,7 @@ static void *worker(void *p) return NULL; } -/* NOTE: cause events loss */ -void serial_test_send_signal_sched_switch(void) +void test_send_signal_sched_switch(void) { struct test_send_signal_kern *skel; pthread_t threads[THREAD_COUNT]; diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c index aecfe662c0..dfcbddcbe4 100644 --- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c +++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c @@ -22,7 +22,7 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type) prog[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); prog[ARRAY_SIZE(prog) - 1] = BPF_EXIT_INSN(); - prog_fd = bpf_test_load_program(prog_type, prog, ARRAY_SIZE(prog), + prog_fd = bpf_load_program(prog_type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); CHECK(prog_fd < 0, "test-run", "errno %d\n", errno); @@ -42,7 +42,7 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type) signal(SIGALRM, SIG_DFL); } -void test_signal_pending(void) +void test_signal_pending(enum bpf_prog_type prog_type) { test_signal_pending_by_type(BPF_PROG_TYPE_SOCKET_FILTER); test_signal_pending_by_type(BPF_PROG_TYPE_FLOW_DISSECTOR); diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index 1d272e0518..3a469099f3 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -48,7 +48,7 @@ configure_stack(void) return false; sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf", "direct-action object-file ./test_sk_assign.o", - "section tc", + "section classifier/sk_assign_test", (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose"); if (CHECK(system(tc_cmd), "BPF load failed;", "run with -vv for more info\n")) diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 597d0467a9..6db07401bc 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -30,6 +30,7 @@ #include #include "test_progs.h" +#include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" #include "network_helpers.h" @@ -936,37 +937,6 @@ static void test_drop_on_lookup(struct test_sk_lookup *skel) .connect_to = { EXT_IP6, EXT_PORT }, .listen_at = { EXT_IP6, INT_PORT }, }, - /* The program will drop on success, meaning that the ifindex - * was 1. - */ - { - .desc = "TCP IPv4 drop on valid ifindex", - .lookup_prog = skel->progs.check_ifindex, - .sotype = SOCK_STREAM, - .connect_to = { EXT_IP4, EXT_PORT }, - .listen_at = { EXT_IP4, EXT_PORT }, - }, - { - .desc = "TCP IPv6 drop on valid ifindex", - .lookup_prog = skel->progs.check_ifindex, - .sotype = SOCK_STREAM, - .connect_to = { EXT_IP6, EXT_PORT }, - .listen_at = { EXT_IP6, EXT_PORT }, - }, - { - .desc = "UDP IPv4 drop on valid ifindex", - .lookup_prog = skel->progs.check_ifindex, - .sotype = SOCK_DGRAM, - .connect_to = { EXT_IP4, EXT_PORT }, - .listen_at = { EXT_IP4, EXT_PORT }, - }, - { - .desc = "UDP IPv6 drop on valid ifindex", - .lookup_prog = skel->progs.check_ifindex, - .sotype = SOCK_DGRAM, - .connect_to = { EXT_IP6, EXT_PORT }, - .listen_at = { EXT_IP6, EXT_PORT }, - }, }; const struct test *t; diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c index 547ae53cde..2b392590e8 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c @@ -105,7 +105,7 @@ static void do_test(void) close(listen_fd); } -void serial_test_sk_storage_tracing(void) +void test_sk_storage_tracing(void) { struct test_sk_storage_trace_itself *skel_itself; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index b5319ba2ee..23915be617 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -11,14 +11,12 @@ void test_skb_ctx(void) .cb[3] = 4, .cb[4] = 5, .priority = 6, - .ingress_ifindex = 11, .ifindex = 1, .tstamp = 7, .wire_len = 100, .gso_segs = 8, .mark = 9, .gso_size = 10, - .hwtstamp = 11, }; struct bpf_prog_test_run_attr tattr = { .data_in = &pkt_v4, @@ -32,7 +30,7 @@ void test_skb_ctx(void) int err; int i; - err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &tattr.prog_fd); if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) return; @@ -99,10 +97,6 @@ void test_skb_ctx(void) "ctx_out_ifindex", "skb->ifindex == %d, expected %d\n", skb.ifindex, 1); - CHECK_ATTR(skb.ingress_ifindex != 11, - "ctx_out_ingress_ifindex", - "skb->ingress_ifindex == %d, expected %d\n", - skb.ingress_ifindex, 11); CHECK_ATTR(skb.tstamp != 8, "ctx_out_tstamp", "skb->tstamp == %lld, expected %d\n", diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c index 6f802a1c08..f302ad84a2 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c @@ -20,7 +20,7 @@ void test_skb_helpers(void) struct bpf_object *obj; int err; - err = bpf_prog_test_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &tattr.prog_fd); if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index 180afd632f..f6f130c99b 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -16,13 +16,8 @@ void test_skeleton(void) struct test_skeleton* skel; struct test_skeleton__bss *bss; struct test_skeleton__data *data; - struct test_skeleton__data_dyn *data_dyn; struct test_skeleton__rodata *rodata; - struct test_skeleton__rodata_dyn *rodata_dyn; struct test_skeleton__kconfig *kcfg; - const void *elf_bytes; - size_t elf_bytes_sz = 0; - int i; skel = test_skeleton__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) @@ -33,12 +28,7 @@ void test_skeleton(void) bss = skel->bss; data = skel->data; - data_dyn = skel->data_dyn; rodata = skel->rodata; - rodata_dyn = skel->rodata_dyn; - - ASSERT_STREQ(bpf_map__name(skel->maps.rodata_dyn), ".rodata.dyn", "rodata_dyn_name"); - ASSERT_STREQ(bpf_map__name(skel->maps.data_dyn), ".data.dyn", "data_dyn_name"); /* validate values are pre-initialized correctly */ CHECK(data->in1 != -1, "in1", "got %d != exp %d\n", data->in1, -1); @@ -54,12 +44,6 @@ void test_skeleton(void) CHECK(rodata->in.in6 != 0, "in6", "got %d != exp %d\n", rodata->in.in6, 0); CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0); - ASSERT_EQ(rodata_dyn->in_dynarr_sz, 0, "in_dynarr_sz"); - for (i = 0; i < 4; i++) - ASSERT_EQ(rodata_dyn->in_dynarr[i], -(i + 1), "in_dynarr"); - for (i = 0; i < 4; i++) - ASSERT_EQ(data_dyn->out_dynarr[i], i + 1, "out_dynarr"); - /* validate we can pre-setup global variables, even in .bss */ data->in1 = 10; data->in2 = 11; @@ -67,10 +51,6 @@ void test_skeleton(void) bss->in4 = 13; rodata->in.in6 = 14; - rodata_dyn->in_dynarr_sz = 4; - for (i = 0; i < 4; i++) - rodata_dyn->in_dynarr[i] = i + 10; - err = test_skeleton__load(skel); if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) goto cleanup; @@ -82,10 +62,6 @@ void test_skeleton(void) CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL); CHECK(rodata->in.in6 != 14, "in6", "got %d != exp %d\n", rodata->in.in6, 14); - ASSERT_EQ(rodata_dyn->in_dynarr_sz, 4, "in_dynarr_sz"); - for (i = 0; i < 4; i++) - ASSERT_EQ(rodata_dyn->in_dynarr[i], i + 10, "in_dynarr"); - /* now set new values and attach to get them into outX variables */ data->in1 = 1; data->in2 = 2; @@ -95,8 +71,6 @@ void test_skeleton(void) bss->in5.b = 6; kcfg = skel->kconfig; - skel->data_read_mostly->read_mostly_var = 123; - err = test_skeleton__attach(skel); if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) goto cleanup; @@ -117,15 +91,6 @@ void test_skeleton(void) CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2", "got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION); - for (i = 0; i < 4; i++) - ASSERT_EQ(data_dyn->out_dynarr[i], i + 10, "out_dynarr"); - - ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var"); - - elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); - ASSERT_OK_PTR(elf_bytes, "elf_bytes"); - ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); - cleanup: test_skeleton__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 394ebfc3bb..8fd1b4b29a 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -33,7 +33,7 @@ #define EXP_NO_BUF_RET 29 -static void test_snprintf_positive(void) +void test_snprintf_positive(void) { char exp_addr_out[] = EXP_ADDR_OUT; char exp_sym_out[] = EXP_SYM_OUT; @@ -103,7 +103,7 @@ static int load_single_snprintf(char *fmt) return ret; } -static void test_snprintf_negative(void) +void test_snprintf_negative(void) { ASSERT_OK(load_single_snprintf("valid %d"), "valid usage"); diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c index dd41b826be..76e1f5fe18 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c @@ -6,7 +6,7 @@ /* Demonstrate that bpf_snprintf_btf succeeds and that various data types * are formatted correctly. */ -void serial_test_snprintf_btf(void) +void test_snprintf_btf(void) { struct netif_receive_skb *skel; struct netif_receive_skb__bss *bss; diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 9fc040eaa4..577d619fb0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -15,6 +15,7 @@ #include "network_helpers.h" #include "cgroup_helpers.h" #include "test_progs.h" +#include "bpf_rlimit.h" #include "test_sock_fields.skel.h" enum bpf_linum_array_idx { @@ -328,7 +329,7 @@ static void test(void) close(listen_fd); } -void serial_test_sock_fields(void) +void test_sock_fields(void) { struct bpf_link *egress_link = NULL, *ingress_link = NULL; int parent_cg_fd = -1, child_cg_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 85db0f4cdd..1352ec1041 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -91,9 +91,9 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type) if (CHECK_FAIL(s < 0)) return; - map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); + map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); if (CHECK_FAIL(map < 0)) { - perror("bpf_cmap_create"); + perror("bpf_create_map"); goto out; } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index af293ea154..7a0d64fdc1 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -97,7 +97,7 @@ static void run_tests(int family, enum bpf_map_type map_type) char test_name[MAX_TEST_NAME]; int map; - map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); + map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); if (CHECK_FAIL(map < 0)) { perror("bpf_map_create"); return; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 7e21bfab63..d88bb65b74 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -502,8 +502,8 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd) if (s < 0) return; - mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key), - sizeof(value32), 1, NULL); + mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key), + sizeof(value32), 1, 0); if (mapfd < 0) { FAIL_ERRNO("map_create"); goto close; @@ -2002,7 +2002,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, test_udp_unix_redir(skel, map, family); } -void serial_test_sockmap_listen(void) +void test_sockmap_listen(void) { struct test_sockmap_listen *skel; diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index cd09f4c7dd..3e8517a839 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -852,21 +852,22 @@ static struct sockopt_test { static int load_prog(const struct bpf_insn *insns, enum bpf_attach_type expected_attach_type) { - LIBBPF_OPTS(bpf_prog_load_opts, opts, + struct bpf_load_program_attr attr = { + .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT, .expected_attach_type = expected_attach_type, + .insns = insns, + .license = "GPL", .log_level = 2, - .log_buf = bpf_log_buf, - .log_size = sizeof(bpf_log_buf), - ); - int fd, insns_cnt = 0; + }; + int fd; for (; - insns[insns_cnt].code != (BPF_JMP | BPF_EXIT); - insns_cnt++) { + insns[attr.insns_cnt].code != (BPF_JMP | BPF_EXIT); + attr.insns_cnt++) { } - insns_cnt++; + attr.insns_cnt++; - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCKOPT, NULL, "GPL", insns, insns_cnt, &opts); + fd = bpf_load_program_xattr(&attr, bpf_log_buf, sizeof(bpf_log_buf)); if (verbose && fd < 0) fprintf(stderr, "%s\n", bpf_log_buf); diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index 8ed78a9383..86f97681ad 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -136,8 +136,7 @@ static int start_server(void) return fd; } -static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, - const char *prog_name) +static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) { enum bpf_attach_type attach_type; enum bpf_prog_type prog_type; @@ -146,20 +145,20 @@ static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); if (err) { - log_err("Failed to deduct types for %s BPF program", prog_name); + log_err("Failed to deduct types for %s BPF program", title); return -1; } - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, title); if (!prog) { - log_err("Failed to find %s BPF program", prog_name); + log_err("Failed to find %s BPF program", title); return -1; } err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, attach_type, 0); if (err) { - log_err("Failed to attach %s BPF program", prog_name); + log_err("Failed to attach %s BPF program", title); return -1; } @@ -168,25 +167,25 @@ static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, static void run_test(int cgroup_fd) { + struct bpf_prog_load_attr attr = { + .file = "./sockopt_inherit.o", + }; int server_fd = -1, client_fd; struct bpf_object *obj; void *server_err; pthread_t tid; + int ignored; int err; - obj = bpf_object__open_file("sockopt_inherit.o", NULL); - if (!ASSERT_OK_PTR(obj, "obj_open")) + err = bpf_prog_load_xattr(&attr, &obj, &ignored); + if (CHECK_FAIL(err)) return; - err = bpf_object__load(obj); - if (!ASSERT_OK(err, "obj_load")) - goto close_bpf_object; - - err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt", "_getsockopt"); + err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt"); if (CHECK_FAIL(err)) goto close_bpf_object; - err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt", "_setsockopt"); + err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt"); if (CHECK_FAIL(err)) goto close_bpf_object; diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c index abce12ddcc..51fac975b3 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c @@ -2,7 +2,7 @@ #include #include "cgroup_helpers.h" -static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name) +static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) { enum bpf_attach_type attach_type; enum bpf_prog_type prog_type; @@ -15,23 +15,23 @@ static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, return -1; } - prog = bpf_object__find_program_by_name(obj, name); + prog = bpf_object__find_program_by_title(obj, title); if (!prog) { - log_err("Failed to find %s BPF program", name); + log_err("Failed to find %s BPF program", title); return -1; } err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, attach_type, BPF_F_ALLOW_MULTI); if (err) { - log_err("Failed to attach %s BPF program", name); + log_err("Failed to attach %s BPF program", title); return -1; } return 0; } -static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name) +static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title) { enum bpf_attach_type attach_type; enum bpf_prog_type prog_type; @@ -42,7 +42,7 @@ static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title, if (err) return -1; - prog = bpf_object__find_program_by_name(obj, name); + prog = bpf_object__find_program_by_title(obj, title); if (!prog) return -1; @@ -89,7 +89,7 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - child: 0x80 -> 0x90 */ - err = prog_attach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); + err = prog_attach(obj, cg_child, "cgroup/getsockopt/child"); if (err) goto detach; @@ -113,7 +113,7 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - parent: 0x90 -> 0xA0 */ - err = prog_attach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent"); + err = prog_attach(obj, cg_parent, "cgroup/getsockopt/parent"); if (err) goto detach; @@ -157,7 +157,7 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - parent: unexpected 0x40, EPERM */ - err = prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); + err = prog_detach(obj, cg_child, "cgroup/getsockopt/child"); if (err) { log_err("Failed to detach child program"); goto detach; @@ -198,8 +198,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, } detach: - prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); - prog_detach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent"); + prog_detach(obj, cg_child, "cgroup/getsockopt/child"); + prog_detach(obj, cg_parent, "cgroup/getsockopt/parent"); return err; } @@ -236,7 +236,7 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, /* Attach child program and make sure it adds 0x10. */ - err = prog_attach(obj, cg_child, "cgroup/setsockopt", "_setsockopt"); + err = prog_attach(obj, cg_child, "cgroup/setsockopt"); if (err) goto detach; @@ -263,7 +263,7 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, /* Attach parent program and make sure it adds another 0x10. */ - err = prog_attach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt"); + err = prog_attach(obj, cg_parent, "cgroup/setsockopt"); if (err) goto detach; @@ -289,18 +289,22 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, } detach: - prog_detach(obj, cg_child, "cgroup/setsockopt", "_setsockopt"); - prog_detach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt"); + prog_detach(obj, cg_child, "cgroup/setsockopt"); + prog_detach(obj, cg_parent, "cgroup/setsockopt"); return err; } void test_sockopt_multi(void) { + struct bpf_prog_load_attr attr = { + .file = "./sockopt_multi.o", + }; int cg_parent = -1, cg_child = -1; struct bpf_object *obj = NULL; int sock_fd = -1; int err = -1; + int ignored; cg_parent = test__join_cgroup("/parent"); if (CHECK_FAIL(cg_parent < 0)) @@ -310,12 +314,8 @@ void test_sockopt_multi(void) if (CHECK_FAIL(cg_child < 0)) goto out; - obj = bpf_object__open_file("sockopt_multi.o", NULL); - if (!ASSERT_OK_PTR(obj, "obj_load")) - goto out; - - err = bpf_object__load(obj); - if (!ASSERT_OK(err, "obj_load")) + err = bpf_prog_load_xattr(&attr, &obj, &ignored); + if (CHECK_FAIL(err)) goto out; sock_fd = socket(AF_INET, SOCK_STREAM, 0); diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c index 6307f5d2b4..7577a77a4c 100644 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c @@ -24,9 +24,9 @@ void test_spinlock(void) int err = 0, i; void *ret; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); if (CHECK_FAIL(err)) { - printf("test_spin_lock:bpf_prog_test_load errno %d\n", errno); + printf("test_spin_lock:bpf_prog_load errno %d\n", errno); goto close_prog; } for (i = 0; i < 4; i++) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index 313f0a6623..04b476bd62 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -4,7 +4,7 @@ void test_stacktrace_map(void) { int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; - const char *prog_name = "oncpu"; + const char *prog_name = "tracepoint/sched/sched_switch"; int err, prog_fd, stack_trace_len; const char *file = "./test_stacktrace_map.o"; __u32 key, val, duration = 0; @@ -12,11 +12,11 @@ void test_stacktrace_map(void) struct bpf_object *obj; struct bpf_link *link; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) return; - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c index 1cb8dd36bd..4fd30bb651 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c @@ -3,7 +3,7 @@ void test_stacktrace_map_raw_tp(void) { - const char *prog_name = "oncpu"; + const char *prog_name = "tracepoint/sched/sched_switch"; int control_map_fd, stackid_hmap_fd, stackmap_fd; const char *file = "./test_stacktrace_map.o"; __u32 key, val, duration = 0; @@ -12,11 +12,11 @@ void test_stacktrace_map_raw_tp(void) struct bpf_object *obj; struct bpf_link *link = NULL; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) return; - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 5dc0f425bd..b5940e6ca6 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -16,12 +16,12 @@ static void test_tailcall_1(void) char prog_name[32]; char buff[128] = {}; - err = bpf_prog_test_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_name(obj, "entry"); + prog = bpf_object__find_program_by_title(obj, "classifier"); if (CHECK_FAIL(!prog)) goto out; @@ -38,9 +38,9 @@ static void test_tailcall_1(void) goto out; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -70,9 +70,9 @@ static void test_tailcall_1(void) err, errno, retval); for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -92,9 +92,9 @@ static void test_tailcall_1(void) for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { j = bpf_map__def(prog_array)->max_entries - 1 - i; - snprintf(prog_name, sizeof(prog_name), "classifier_%d", j); + snprintf(prog_name, sizeof(prog_name), "classifier/%i", j); - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -154,12 +154,12 @@ static void test_tailcall_2(void) char prog_name[32]; char buff[128] = {}; - err = bpf_prog_test_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_name(obj, "entry"); + prog = bpf_object__find_program_by_title(obj, "classifier"); if (CHECK_FAIL(!prog)) goto out; @@ -176,9 +176,9 @@ static void test_tailcall_2(void) goto out; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -219,7 +219,10 @@ static void test_tailcall_2(void) bpf_object__close(obj); } -static void test_tailcall_count(const char *which) +/* test_tailcall_3 checks that the count value of the tail call limit + * enforcement matches with expectations. + */ +static void test_tailcall_3(void) { int err, map_fd, prog_fd, main_fd, data_fd, i, val; struct bpf_map *prog_array, *data_map; @@ -228,12 +231,12 @@ static void test_tailcall_count(const char *which) __u32 retval, duration; char buff[128] = {}; - err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_load("tailcall3.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_name(obj, "entry"); + prog = bpf_object__find_program_by_title(obj, "classifier"); if (CHECK_FAIL(!prog)) goto out; @@ -249,7 +252,7 @@ static void test_tailcall_count(const char *which) if (CHECK_FAIL(map_fd < 0)) goto out; - prog = bpf_object__find_program_by_name(obj, "classifier_0"); + prog = bpf_object__find_program_by_title(obj, "classifier/0"); if (CHECK_FAIL(!prog)) goto out; @@ -293,22 +296,6 @@ static void test_tailcall_count(const char *which) bpf_object__close(obj); } -/* test_tailcall_3 checks that the count value of the tail call limit - * enforcement matches with expectations. JIT uses direct jump. - */ -static void test_tailcall_3(void) -{ - test_tailcall_count("tailcall3.o"); -} - -/* test_tailcall_6 checks that the count value of the tail call limit - * enforcement matches with expectations. JIT uses indirect jump. - */ -static void test_tailcall_6(void) -{ - test_tailcall_count("tailcall6.o"); -} - /* test_tailcall_4 checks that the kernel properly selects indirect jump * for the case where the key is not known. Latter is passed via global * data to select different targets we can compare return value of. @@ -324,12 +311,12 @@ static void test_tailcall_4(void) char buff[128] = {}; char prog_name[32]; - err = bpf_prog_test_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_name(obj, "entry"); + prog = bpf_object__find_program_by_title(obj, "classifier"); if (CHECK_FAIL(!prog)) goto out; @@ -354,9 +341,9 @@ static void test_tailcall_4(void) return; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -412,12 +399,12 @@ static void test_tailcall_5(void) char buff[128] = {}; char prog_name[32]; - err = bpf_prog_test_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_name(obj, "entry"); + prog = bpf_object__find_program_by_title(obj, "classifier"); if (CHECK_FAIL(!prog)) goto out; @@ -442,9 +429,9 @@ static void test_tailcall_5(void) return; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -498,12 +485,12 @@ static void test_tailcall_bpf2bpf_1(void) __u32 retval, duration; char prog_name[32]; - err = bpf_prog_test_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS, + err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_name(obj, "entry"); + prog = bpf_object__find_program_by_title(obj, "classifier"); if (CHECK_FAIL(!prog)) goto out; @@ -521,9 +508,9 @@ static void test_tailcall_bpf2bpf_1(void) /* nop -> jmp */ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -582,12 +569,12 @@ static void test_tailcall_bpf2bpf_2(void) __u32 retval, duration; char buff[128] = {}; - err = bpf_prog_test_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS, + err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_name(obj, "entry"); + prog = bpf_object__find_program_by_title(obj, "classifier"); if (CHECK_FAIL(!prog)) goto out; @@ -603,7 +590,7 @@ static void test_tailcall_bpf2bpf_2(void) if (CHECK_FAIL(map_fd < 0)) goto out; - prog = bpf_object__find_program_by_name(obj, "classifier_0"); + prog = bpf_object__find_program_by_title(obj, "classifier/0"); if (CHECK_FAIL(!prog)) goto out; @@ -660,12 +647,12 @@ static void test_tailcall_bpf2bpf_3(void) __u32 retval, duration; char prog_name[32]; - err = bpf_prog_test_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS, + err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_name(obj, "entry"); + prog = bpf_object__find_program_by_title(obj, "classifier"); if (CHECK_FAIL(!prog)) goto out; @@ -682,9 +669,9 @@ static void test_tailcall_bpf2bpf_3(void) goto out; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -757,12 +744,12 @@ static void test_tailcall_bpf2bpf_4(bool noise) __u32 retval, duration; char prog_name[32]; - err = bpf_prog_test_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS, + err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_name(obj, "entry"); + prog = bpf_object__find_program_by_title(obj, "classifier"); if (CHECK_FAIL(!prog)) goto out; @@ -779,9 +766,9 @@ static void test_tailcall_bpf2bpf_4(bool noise) goto out; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); - prog = bpf_object__find_program_by_name(obj, prog_name); + prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -835,8 +822,6 @@ void test_tailcalls(void) test_tailcall_4(); if (test__start_subtest("tailcall_5")) test_tailcall_5(); - if (test__start_subtest("tailcall_6")) - test_tailcall_6(); if (test__start_subtest("tailcall_bpf2bpf_1")) test_tailcall_bpf2bpf_1(); if (test__start_subtest("tailcall_bpf2bpf_2")) diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c index 17947c9e1d..1bdc1d86a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c +++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c @@ -11,7 +11,7 @@ void test_task_fd_query_rawtp(void) __u32 duration = 0; char buf[256]; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c index c2a98a7a8d..3f131b8fe3 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c @@ -13,8 +13,8 @@ static void test_task_fd_query_tp_core(const char *probe_name, __u32 duration = 0; char buf[256]; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); - if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno)) + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) goto close_prog; snprintf(buf, sizeof(buf), diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index c2426df58e..47e3159729 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -183,18 +183,6 @@ static int netns_setup_namespaces(const char *verb) return 0; } -static void netns_setup_namespaces_nofail(const char *verb) -{ - const char * const *ns = namespaces; - char cmd[128]; - - while (*ns) { - snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns); - system(cmd); - ns++; - } -} - struct netns_setup_result { int ifindex_veth_src_fwd; int ifindex_veth_dst_fwd; @@ -652,7 +640,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) struct nstoken *nstoken = NULL; int err; int tunnel_pid = -1; - int src_fd, target_fd = -1; + int src_fd, target_fd; int ifindex; /* Start a L3 TUN/TAP tunnel between the src and dst namespaces. @@ -781,8 +769,6 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) static void *test_tc_redirect_run_tests(void *arg) { - netns_setup_namespaces_nofail("delete"); - RUN_TEST(tc_redirect_peer); RUN_TEST(tc_redirect_peer_l3); RUN_TEST(tc_redirect_neigh); @@ -790,7 +776,7 @@ static void *test_tc_redirect_run_tests(void *arg) return NULL; } -void serial_test_tc_redirect(void) +void test_tc_redirect(void) { pthread_t test_thread; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c index 11bf755be4..594307dffd 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c @@ -8,7 +8,7 @@ void test_tcp_estats(void) struct bpf_object *obj; __u32 duration = 0; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); CHECK(err, "", "err %d errno %d\n", err, errno); if (err) return; diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index 96ff2c20af..d207e968e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -2,7 +2,6 @@ #include #include "cgroup_helpers.h" #include "network_helpers.h" -#include "tcp_rtt.skel.h" struct tcp_rtt_storage { __u32 invoked; @@ -92,18 +91,26 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked, static int run_test(int cgroup_fd, int server_fd) { - struct tcp_rtt *skel; + struct bpf_prog_load_attr attr = { + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + .file = "./tcp_rtt.o", + .expected_attach_type = BPF_CGROUP_SOCK_OPS, + }; + struct bpf_object *obj; + struct bpf_map *map; int client_fd; int prog_fd; int map_fd; int err; - skel = tcp_rtt__open_and_load(); - if (!ASSERT_OK_PTR(skel, "skel_open_load")) + err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + if (err) { + log_err("Failed to load BPF object"); return -1; + } - map_fd = bpf_map__fd(skel->maps.socket_storage_map); - prog_fd = bpf_program__fd(skel->progs._sockops); + map = bpf_map__next(NULL, obj); + map_fd = bpf_map__fd(map); err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); if (err) { @@ -142,7 +149,7 @@ static int run_test(int cgroup_fd, int server_fd) close(client_fd); close_bpf_object: - tcp_rtt__destroy(skel); + bpf_object__close(obj); return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c index 214d9f4a94..172c999e52 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ #define _GNU_SOURCE -#include #include #include #include @@ -19,119 +18,55 @@ static int read_iter(char *file) fd = open(file, 0); if (fd < 0) return -1; - while ((len = read(fd, buf, sizeof(buf))) > 0) { - buf[sizeof(buf) - 1] = '\0'; + while ((len = read(fd, buf, sizeof(buf))) > 0) if (strstr(buf, "iter")) { close(fd); return 0; } - } close(fd); return -1; } static int fn(void) { - struct stat a, b, c; - int err, map; + int err, duration = 0; err = unshare(CLONE_NEWNS); - if (!ASSERT_OK(err, "unshare")) + if (CHECK(err, "unshare", "failed: %d\n", errno)) goto out; err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL); - if (!ASSERT_OK(err, "mount /")) + if (CHECK(err, "mount /", "failed: %d\n", errno)) goto out; err = umount(TDIR); - if (!ASSERT_OK(err, "umount " TDIR)) + if (CHECK(err, "umount " TDIR, "failed: %d\n", errno)) goto out; err = mount("none", TDIR, "tmpfs", 0, NULL); - if (!ASSERT_OK(err, "mount tmpfs")) + if (CHECK(err, "mount", "mount root failed: %d\n", errno)) goto out; err = mkdir(TDIR "/fs1", 0777); - if (!ASSERT_OK(err, "mkdir " TDIR "/fs1")) + if (CHECK(err, "mkdir "TDIR"/fs1", "failed: %d\n", errno)) goto out; err = mkdir(TDIR "/fs2", 0777); - if (!ASSERT_OK(err, "mkdir " TDIR "/fs2")) + if (CHECK(err, "mkdir "TDIR"/fs2", "failed: %d\n", errno)) goto out; err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL); - if (!ASSERT_OK(err, "mount bpffs " TDIR "/fs1")) + if (CHECK(err, "mount bpffs "TDIR"/fs1", "failed: %d\n", errno)) goto out; err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL); - if (!ASSERT_OK(err, "mount bpffs " TDIR "/fs2")) + if (CHECK(err, "mount bpffs " TDIR "/fs2", "failed: %d\n", errno)) goto out; err = read_iter(TDIR "/fs1/maps.debug"); - if (!ASSERT_OK(err, "reading " TDIR "/fs1/maps.debug")) + if (CHECK(err, "reading " TDIR "/fs1/maps.debug", "failed\n")) goto out; err = read_iter(TDIR "/fs2/progs.debug"); - if (!ASSERT_OK(err, "reading " TDIR "/fs2/progs.debug")) + if (CHECK(err, "reading " TDIR "/fs2/progs.debug", "failed\n")) goto out; - - err = mkdir(TDIR "/fs1/a", 0777); - if (!ASSERT_OK(err, "creating " TDIR "/fs1/a")) - goto out; - err = mkdir(TDIR "/fs1/a/1", 0777); - if (!ASSERT_OK(err, "creating " TDIR "/fs1/a/1")) - goto out; - err = mkdir(TDIR "/fs1/b", 0777); - if (!ASSERT_OK(err, "creating " TDIR "/fs1/b")) - goto out; - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); - if (!ASSERT_GT(map, 0, "create_map(ARRAY)")) - goto out; - err = bpf_obj_pin(map, TDIR "/fs1/c"); - if (!ASSERT_OK(err, "pin map")) - goto out; - close(map); - - /* Check that RENAME_EXCHANGE works for directories. */ - err = stat(TDIR "/fs1/a", &a); - if (!ASSERT_OK(err, "stat(" TDIR "/fs1/a)")) - goto out; - err = renameat2(0, TDIR "/fs1/a", 0, TDIR "/fs1/b", RENAME_EXCHANGE); - if (!ASSERT_OK(err, "renameat2(/fs1/a, /fs1/b, RENAME_EXCHANGE)")) - goto out; - err = stat(TDIR "/fs1/b", &b); - if (!ASSERT_OK(err, "stat(" TDIR "/fs1/b)")) - goto out; - if (!ASSERT_EQ(a.st_ino, b.st_ino, "b should have a's inode")) - goto out; - err = access(TDIR "/fs1/b/1", F_OK); - if (!ASSERT_OK(err, "access(" TDIR "/fs1/b/1)")) - goto out; - - /* Check that RENAME_EXCHANGE works for mixed file types. */ - err = stat(TDIR "/fs1/c", &c); - if (!ASSERT_OK(err, "stat(" TDIR "/fs1/map)")) - goto out; - err = renameat2(0, TDIR "/fs1/c", 0, TDIR "/fs1/b", RENAME_EXCHANGE); - if (!ASSERT_OK(err, "renameat2(/fs1/c, /fs1/b, RENAME_EXCHANGE)")) - goto out; - err = stat(TDIR "/fs1/b", &b); - if (!ASSERT_OK(err, "stat(" TDIR "/fs1/b)")) - goto out; - if (!ASSERT_EQ(c.st_ino, b.st_ino, "b should have c's inode")) - goto out; - err = access(TDIR "/fs1/c/1", F_OK); - if (!ASSERT_OK(err, "access(" TDIR "/fs1/c/1)")) - goto out; - - /* Check that RENAME_NOREPLACE works. */ - err = renameat2(0, TDIR "/fs1/b", 0, TDIR "/fs1/a", RENAME_NOREPLACE); - if (!ASSERT_ERR(err, "renameat2(RENAME_NOREPLACE)")) { - err = -EINVAL; - goto out; - } - err = access(TDIR "/fs1/b", F_OK); - if (!ASSERT_OK(err, "access(" TDIR "/fs1/b)")) - goto out; - out: umount(TDIR "/fs1"); umount(TDIR "/fs2"); diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 509e21d5cb..7e13129f59 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -30,29 +30,17 @@ extern int extra_prog_load_log_flags; static int check_load(const char *file) { + struct bpf_prog_load_attr attr; struct bpf_object *obj = NULL; - struct bpf_program *prog; - int err; + int err, prog_fd; + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = file; + attr.prog_type = BPF_PROG_TYPE_UNSPEC; + attr.log_level = extra_prog_load_log_flags; + attr.prog_flags = BPF_F_TEST_RND_HI32; found = false; - - obj = bpf_object__open_file(file, NULL); - err = libbpf_get_error(obj); - if (err) - return err; - - prog = bpf_object__next_program(obj, NULL); - if (!prog) { - err = -ENOENT; - goto err_out; - } - - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); - bpf_program__set_log_level(prog, extra_prog_load_log_flags); - - err = bpf_object__load(obj); - -err_out: + err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); bpf_object__close(obj); return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index 26ac26a880..d2c16eaae3 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -28,6 +28,10 @@ static unsigned int duration; struct storage { void *inode; unsigned int value; + /* Lock ensures that spin locked versions of local stoage operations + * also work, most operations in this tests are still single threaded + */ + struct bpf_spin_lock lock; }; /* Fork and exec the provided rm binary and return the exit code of the @@ -62,24 +66,27 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path) static bool check_syscall_operations(int map_fd, int obj_fd) { - struct storage val = { .value = TEST_STORAGE_VALUE }, - lookup_val = { .value = 0 }; + struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } }, + lookup_val = { .value = 0, .lock = { 0 } }; int err; /* Looking up an existing element should fail initially */ - err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, + BPF_F_LOCK); if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", "err:%d errno:%d\n", err, errno)) return false; /* Create a new element */ - err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST); + err = bpf_map_update_elem(map_fd, &obj_fd, &val, + BPF_NOEXIST | BPF_F_LOCK); if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err, errno)) return false; /* Lookup the newly created element */ - err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, + BPF_F_LOCK); if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err, errno)) return false; @@ -95,7 +102,8 @@ static bool check_syscall_operations(int map_fd, int obj_fd) return false; /* The lookup should fail, now that the element has been deleted */ - err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, + BPF_F_LOCK); if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", "err:%d errno:%d\n", err, errno)) return false; diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c index 05acb376f7..123c68c191 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c +++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c @@ -56,11 +56,11 @@ static void setaffinity(void) void test_test_overhead(void) { - const char *kprobe_name = "prog1"; - const char *kretprobe_name = "prog2"; - const char *raw_tp_name = "prog3"; - const char *fentry_name = "prog4"; - const char *fexit_name = "prog5"; + const char *kprobe_name = "kprobe/__set_task_comm"; + const char *kretprobe_name = "kretprobe/__set_task_comm"; + const char *raw_tp_name = "raw_tp/task_rename"; + const char *fentry_name = "fentry/__set_task_comm"; + const char *fexit_name = "fexit/__set_task_comm"; const char *kprobe_func = "__set_task_comm"; struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog; struct bpf_program *fentry_prog, *fexit_prog; @@ -76,23 +76,23 @@ void test_test_overhead(void) if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; - kprobe_prog = bpf_object__find_program_by_name(obj, kprobe_name); + kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name); if (CHECK(!kprobe_prog, "find_probe", "prog '%s' not found\n", kprobe_name)) goto cleanup; - kretprobe_prog = bpf_object__find_program_by_name(obj, kretprobe_name); + kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name); if (CHECK(!kretprobe_prog, "find_probe", "prog '%s' not found\n", kretprobe_name)) goto cleanup; - raw_tp_prog = bpf_object__find_program_by_name(obj, raw_tp_name); + raw_tp_prog = bpf_object__find_program_by_title(obj, raw_tp_name); if (CHECK(!raw_tp_prog, "find_probe", "prog '%s' not found\n", raw_tp_name)) goto cleanup; - fentry_prog = bpf_object__find_program_by_name(obj, fentry_name); + fentry_prog = bpf_object__find_program_by_title(obj, fentry_name); if (CHECK(!fentry_prog, "find_probe", "prog '%s' not found\n", fentry_name)) goto cleanup; - fexit_prog = bpf_object__find_program_by_name(obj, fexit_name); + fexit_prog = bpf_object__find_program_by_title(obj, fexit_name); if (CHECK(!fexit_prog, "find_probe", "prog '%s' not found\n", fexit_name)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 0f4e49e622..25f40e1b99 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -39,8 +39,7 @@ static int timer(struct timer *timer_skel) return 0; } -/* TODO: use pid filtering */ -void serial_test_timer(void) +void test_timer(void) { struct timer *timer_skel = NULL; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c index 949a061786..ced8f6cf34 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -52,7 +52,7 @@ static int timer_mim(struct timer_mim *timer_skel) return 0; } -void serial_test_timer_mim(void) +void test_timer_mim(void) { struct timer_mim_reject *timer_reject_skel = NULL; libbpf_print_fn_t old_print_fn = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c index 39e79291c8..fb095e5cd9 100644 --- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c +++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include -void serial_test_tp_attach_query(void) +void test_tp_attach_query(void) { const int num_progs = 3; int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs]; @@ -35,7 +35,7 @@ void serial_test_tp_attach_query(void) query = malloc(sizeof(*query) + sizeof(__u32) * num_progs); for (i = 0; i < num_progs; i++) { - err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i], + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i], &prog_fd[i]); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) goto cleanup1; diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index cade7f1231..d39bc00feb 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -8,34 +8,35 @@ #define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "testing,testing" -void serial_test_trace_printk(void) +void test_trace_printk(void) { - struct trace_printk_lskel__bss *bss; - int err = 0, iter = 0, found = 0; - struct trace_printk_lskel *skel; + int err, iter = 0, duration = 0, found = 0; + struct trace_printk__bss *bss; + struct trace_printk *skel; char *buf = NULL; FILE *fp = NULL; size_t buflen; - skel = trace_printk_lskel__open(); - if (!ASSERT_OK_PTR(skel, "trace_printk__open")) + skel = trace_printk__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) return; - ASSERT_EQ(skel->rodata->fmt[0], 'T', "skel->rodata->fmt[0]"); + ASSERT_EQ(skel->rodata->fmt[0], 'T', "invalid printk fmt string"); skel->rodata->fmt[0] = 't'; - err = trace_printk_lskel__load(skel); - if (!ASSERT_OK(err, "trace_printk__load")) + err = trace_printk__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) goto cleanup; bss = skel->bss; - err = trace_printk_lskel__attach(skel); - if (!ASSERT_OK(err, "trace_printk__attach")) + err = trace_printk__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) goto cleanup; fp = fopen(TRACEBUF, "r"); - if (!ASSERT_OK_PTR(fp, "fopen(TRACEBUF)")) + if (CHECK(fp == NULL, "could not open trace buffer", + "error %d opening %s", errno, TRACEBUF)) goto cleanup; /* We do not want to wait forever if this test fails... */ @@ -43,12 +44,16 @@ void serial_test_trace_printk(void) /* wait for tracepoint to trigger */ usleep(1); - trace_printk_lskel__detach(skel); + trace_printk__detach(skel); - if (!ASSERT_GT(bss->trace_printk_ran, 0, "bss->trace_printk_ran")) + if (CHECK(bss->trace_printk_ran == 0, + "bpf_trace_printk never ran", + "ran == %d", bss->trace_printk_ran)) goto cleanup; - if (!ASSERT_GT(bss->trace_printk_ret, 0, "bss->trace_printk_ret")) + if (CHECK(bss->trace_printk_ret <= 0, + "bpf_trace_printk returned <= 0 value", + "got %d", bss->trace_printk_ret)) goto cleanup; /* verify our search string is in the trace buffer */ @@ -61,11 +66,12 @@ void serial_test_trace_printk(void) break; } - if (!ASSERT_EQ(found, bss->trace_printk_ran, "found")) + if (CHECK(!found, "message from bpf_trace_printk not found", + "no instance of %s in %s", SEARCHMSG, TRACEBUF)) goto cleanup; cleanup: - trace_printk_lskel__destroy(skel); + trace_printk__destroy(skel); free(buf); if (fp) fclose(fp); diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index 9c795ee52b..d7f5a931d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -35,17 +35,16 @@ static struct bpf_link *load(struct bpf_object *obj, const char *name) struct bpf_program *prog; int duration = 0; - prog = bpf_object__find_program_by_name(obj, name); + prog = bpf_object__find_program_by_title(obj, name); if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name)) return ERR_PTR(-EINVAL); return bpf_program__attach_trace(prog); } -/* TODO: use different target function to run in concurrent mode */ -void serial_test_trampoline_count(void) +void test_trampoline_count(void) { - const char *fentry_name = "prog1"; - const char *fexit_name = "prog2"; + const char *fentry_name = "fentry/__set_task_comm"; + const char *fexit_name = "fexit/__set_task_comm"; const char *object = "test_trampoline_count.o"; struct inst inst[MAX_TRAMP_PROGS] = {}; int err, i = 0, duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c index ac65456b7a..48921ff748 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp.c @@ -11,12 +11,12 @@ void test_xdp(void) const char *file = "./test_xdp.o"; struct bpf_object *obj; char buf[128]; - struct ipv6hdr iph6; - struct iphdr iph; + struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr); + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); __u32 duration, retval, size; int err, prog_fd, map_fd; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -28,17 +28,16 @@ void test_xdp(void) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); + CHECK(err || retval != XDP_TX || size != 74 || - iph.protocol != IPPROTO_IPIP, "ipv4", + iph->protocol != IPPROTO_IPIP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - memcpy(&iph6, buf + sizeof(struct ethhdr), sizeof(iph6)); CHECK(err || retval != XDP_TX || size != 114 || - iph6.nexthdr != IPPROTO_IPV6, "ipv6", + iph6->nexthdr != IPPROTO_IPV6, "ipv6", "err %d errno %d retval %d size %d\n", err, errno, retval, size); out: diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index 3f5a17c38b..d5c98f2cb1 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -2,7 +2,7 @@ #include #include -static void test_xdp_adjust_tail_shrink(void) +void test_xdp_adjust_tail_shrink(void) { const char *file = "./test_xdp_adjust_tail_shrink.o"; __u32 duration, retval, size, expect_sz; @@ -10,7 +10,7 @@ static void test_xdp_adjust_tail_shrink(void) int err, prog_fd; char buf[128]; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -30,7 +30,7 @@ static void test_xdp_adjust_tail_shrink(void) bpf_object__close(obj); } -static void test_xdp_adjust_tail_grow(void) +void test_xdp_adjust_tail_grow(void) { const char *file = "./test_xdp_adjust_tail_grow.o"; struct bpf_object *obj; @@ -38,7 +38,7 @@ static void test_xdp_adjust_tail_grow(void) __u32 duration, retval, size, expect_sz; int err, prog_fd; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -58,7 +58,7 @@ static void test_xdp_adjust_tail_grow(void) bpf_object__close(obj); } -static void test_xdp_adjust_tail_grow2(void) +void test_xdp_adjust_tail_grow2(void) { const char *file = "./test_xdp_adjust_tail_grow.o"; char buf[4096]; /* avoid segfault: large buf to hold grow results */ @@ -75,7 +75,7 @@ static void test_xdp_adjust_tail_grow2(void) .data_size_out = 0, /* Per test */ }; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd); if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index c6fa390e3a..15ef353148 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -4,7 +4,7 @@ #define IFINDEX_LO 1 #define XDP_FLAGS_REPLACE (1U << 4) -void serial_test_xdp_attach(void) +void test_xdp_attach(void) { __u32 duration = 0, id1, id2, id0 = 0, len; struct bpf_object *obj1, *obj2, *obj3; @@ -16,7 +16,7 @@ void serial_test_xdp_attach(void) len = sizeof(info); - err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1); + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1); if (CHECK_FAIL(err)) return; err = bpf_obj_get_info_by_fd(fd1, &info, &len); @@ -24,7 +24,7 @@ void serial_test_xdp_attach(void) goto out_1; id1 = info.id; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2); + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2); if (CHECK_FAIL(err)) goto out_1; @@ -34,7 +34,7 @@ void serial_test_xdp_attach(void) goto out_2; id2 = info.id; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3); + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3); if (CHECK_FAIL(err)) goto out_2; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index 5e3a26b15e..ad3ba81b40 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -218,9 +218,9 @@ static int send_udp_packets(int vary_dst_ip) .h_dest = BOND2_MAC, .h_proto = htons(ETH_P_IP), }; - struct iphdr iph = {}; - struct udphdr uh = {}; - uint8_t buf[128]; + uint8_t buf[128] = {}; + struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh)); + struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph)); int i, s = -1; int ifindex; @@ -232,16 +232,17 @@ static int send_udp_packets(int vary_dst_ip) if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex")) goto err; - iph.ihl = 5; - iph.version = 4; - iph.tos = 16; - iph.id = 1; - iph.ttl = 64; - iph.protocol = IPPROTO_UDP; - iph.saddr = 1; - iph.daddr = 2; - iph.tot_len = htons(sizeof(buf) - ETH_HLEN); - iph.check = 0; + memcpy(buf, &eh, sizeof(eh)); + iph->ihl = 5; + iph->version = 4; + iph->tos = 16; + iph->id = 1; + iph->ttl = 64; + iph->protocol = IPPROTO_UDP; + iph->saddr = 1; + iph->daddr = 2; + iph->tot_len = htons(sizeof(buf) - ETH_HLEN); + iph->check = 0; for (i = 1; i <= NPACKETS; i++) { int n; @@ -252,15 +253,10 @@ static int send_udp_packets(int vary_dst_ip) }; /* vary the UDP destination port for even distribution with roundrobin/xor modes */ - uh.dest++; + uh->dest++; if (vary_dst_ip) - iph.daddr++; - - /* construct a packet */ - memcpy(buf, &eh, sizeof(eh)); - memcpy(buf + sizeof(eh), &iph, sizeof(iph)); - memcpy(buf + sizeof(eh) + sizeof(iph), &uh, sizeof(uh)); + iph->daddr++; n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll)); if (!ASSERT_EQ(n, sizeof(buf), "sendto")) @@ -523,7 +519,7 @@ static struct bond_test_case bond_test_cases[] = { { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, }, }; -void serial_test_xdp_bonding(void) +void test_xdp_bonding(void) { libbpf_print_fn_t old_print_fn; struct skeletons skeletons = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index c98a897ad6..3bd5904b4d 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -42,13 +42,14 @@ void test_xdp_bpf2bpf(void) char buf[128]; int err, pkt_fd, map_fd; bool passed = false; - struct iphdr iph; + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); struct iptnl_info value4 = {.family = AF_INET}; struct test_xdp *pkt_skel = NULL; struct test_xdp_bpf2bpf *ftrace_skel = NULL; struct vip key4 = {.protocol = 6, .family = AF_INET}; struct bpf_program *prog; struct perf_buffer *pb = NULL; + struct perf_buffer_opts pb_opts = {}; /* Load XDP program to introspect */ pkt_skel = test_xdp__open_and_load(); @@ -85,17 +86,19 @@ void test_xdp_bpf2bpf(void) goto out; /* Set up perf buffer */ - pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1, - on_sample, NULL, &passed, NULL); + pb_opts.sample_cb = on_sample; + pb_opts.ctx = &passed; + pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), + 1, &pb_opts); if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto out; /* Run test program */ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); + if (CHECK(err || retval != XDP_TX || size != 74 || - iph.protocol != IPPROTO_IPIP, "ipv4", + iph->protocol != IPPROTO_IPIP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size)) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index fd812bd436..8755effd80 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -7,7 +7,7 @@ #define IFINDEX_LO 1 -void serial_test_xdp_cpumap_attach(void) +void test_xdp_cpumap_attach(void) { struct test_xdp_with_cpumap_helpers *skel; struct bpf_prog_info info = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 3079d5568f..c72af030ff 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -8,7 +8,7 @@ #define IFINDEX_LO 1 -static void test_xdp_with_devmap_helpers(void) +void test_xdp_with_devmap_helpers(void) { struct test_xdp_with_devmap_helpers *skel; struct bpf_prog_info info = {}; @@ -60,7 +60,7 @@ static void test_xdp_with_devmap_helpers(void) test_xdp_with_devmap_helpers__destroy(skel); } -static void test_neg_xdp_devmap_helpers(void) +void test_neg_xdp_devmap_helpers(void) { struct test_xdp_devmap_helpers *skel; @@ -72,7 +72,7 @@ static void test_neg_xdp_devmap_helpers(void) } -void serial_test_xdp_devmap_attach(void) +void test_xdp_devmap_attach(void) { if (test__start_subtest("DEVMAP with programs in entries")) test_xdp_with_devmap_helpers(); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c index abe48e82e1..d2d7a283d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c @@ -4,7 +4,7 @@ #define IFINDEX_LO 1 -void serial_test_xdp_info(void) +void test_xdp_info(void) { __u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id; const char *file = "./xdp_dummy.o"; @@ -29,7 +29,7 @@ void serial_test_xdp_info(void) /* Setup prog */ - err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c index b2b357f8c7..46eed0a33c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -6,49 +6,48 @@ #define IFINDEX_LO 1 -void serial_test_xdp_link(void) +void test_xdp_link(void) { + __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err; DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); struct test_xdp_link *skel1 = NULL, *skel2 = NULL; - __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2; struct bpf_link_info link_info; struct bpf_prog_info prog_info; struct bpf_link *link; - int err; __u32 link_info_len = sizeof(link_info); __u32 prog_info_len = sizeof(prog_info); skel1 = test_xdp_link__open_and_load(); - if (!ASSERT_OK_PTR(skel1, "skel_load")) + if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n")) goto cleanup; prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler); skel2 = test_xdp_link__open_and_load(); - if (!ASSERT_OK_PTR(skel2, "skel_load")) + if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n")) goto cleanup; prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler); memset(&prog_info, 0, sizeof(prog_info)); err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len); - if (!ASSERT_OK(err, "fd_info1")) + if (CHECK(err, "fd_info1", "failed %d\n", -errno)) goto cleanup; id1 = prog_info.id; memset(&prog_info, 0, sizeof(prog_info)); err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len); - if (!ASSERT_OK(err, "fd_info2")) + if (CHECK(err, "fd_info2", "failed %d\n", -errno)) goto cleanup; id2 = prog_info.id; /* set initial prog attachment */ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts); - if (!ASSERT_OK(err, "fd_attach")) + if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err)) goto cleanup; /* validate prog ID */ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) - goto cleanup; + CHECK(err || id0 != id1, "id1_check", + "loaded prog id %u != id1 %u, err %d", id0, id1, err); /* BPF link is not allowed to replace prog attachment */ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); @@ -63,7 +62,7 @@ void serial_test_xdp_link(void) /* detach BPF program */ opts.old_fd = prog_fd1; err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts); - if (!ASSERT_OK(err, "prog_detach")) + if (CHECK(err, "prog_detach", "failed %d\n", err)) goto cleanup; /* now BPF link should attach successfully */ @@ -74,23 +73,24 @@ void serial_test_xdp_link(void) /* validate prog ID */ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) + if (CHECK(err || id0 != id1, "id1_check", + "loaded prog id %u != id1 %u, err %d", id0, id1, err)) goto cleanup; /* BPF prog attach is not allowed to replace BPF link */ opts.old_fd = prog_fd1; err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts); - if (!ASSERT_ERR(err, "prog_attach_fail")) + if (CHECK(!err, "prog_attach_fail", "unexpected success\n")) goto cleanup; /* Can't force-update when BPF link is active */ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0); - if (!ASSERT_ERR(err, "prog_update_fail")) + if (CHECK(!err, "prog_update_fail", "unexpected success\n")) goto cleanup; /* Can't force-detach when BPF link is active */ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); - if (!ASSERT_ERR(err, "prog_detach_fail")) + if (CHECK(!err, "prog_detach_fail", "unexpected success\n")) goto cleanup; /* BPF link is not allowed to replace another BPF link */ @@ -110,39 +110,40 @@ void serial_test_xdp_link(void) skel2->links.xdp_handler = link; err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val")) + if (CHECK(err || id0 != id2, "id2_check", + "loaded prog id %u != id2 %u, err %d", id0, id1, err)) goto cleanup; /* updating program under active BPF link works as expected */ err = bpf_link__update_program(link, skel1->progs.xdp_handler); - if (!ASSERT_OK(err, "link_upd")) + if (CHECK(err, "link_upd", "failed: %d\n", err)) goto cleanup; memset(&link_info, 0, sizeof(link_info)); err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); - if (!ASSERT_OK(err, "link_info")) + if (CHECK(err, "link_info", "failed: %d\n", err)) goto cleanup; - ASSERT_EQ(link_info.type, BPF_LINK_TYPE_XDP, "link_type"); - ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); - ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex"); - - /* updating program under active BPF link with different type fails */ - err = bpf_link__update_program(link, skel1->progs.tc_handler); - if (!ASSERT_ERR(err, "link_upd_invalid")) - goto cleanup; + CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type", + "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP); + CHECK(link_info.prog_id != id1, "link_prog_id", + "got %u != exp %u\n", link_info.prog_id, id1); + CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex", + "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO); err = bpf_link__detach(link); - if (!ASSERT_OK(err, "link_detach")) + if (CHECK(err, "link_detach", "failed %d\n", err)) goto cleanup; memset(&link_info, 0, sizeof(link_info)); err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); - - ASSERT_OK(err, "link_info"); - ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); + if (CHECK(err, "link_info", "failed: %d\n", err)) + goto cleanup; + CHECK(link_info.prog_id != id1, "link_prog_id", + "got %u != exp %u\n", link_info.prog_id, id1); /* ifindex should be zeroed out */ - ASSERT_EQ(link_info.xdp.ifindex, 0, "link_ifindex"); + CHECK(link_info.xdp.ifindex != 0, "link_ifindex", + "got %u != exp %u\n", link_info.xdp.ifindex, 0); cleanup: test_xdp_link__destroy(skel1); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c index 15a3900e43..7185bee16f 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c @@ -9,7 +9,7 @@ void test_xdp_perf(void) char in[128], out[128]; int err, prog_fd; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) return; diff --git a/tools/testing/selftests/bpf/progs/atomics.c b/tools/testing/selftests/bpf/progs/atomics.c index 16e5731320..c245345e41 100644 --- a/tools/testing/selftests/bpf/progs/atomics.c +++ b/tools/testing/selftests/bpf/progs/atomics.c @@ -10,8 +10,6 @@ bool skip_tests __attribute((__section__(".data"))) = false; bool skip_tests = true; #endif -__u32 pid = 0; - __u64 add64_value = 1; __u64 add64_result = 0; __u32 add32_value = 1; @@ -23,8 +21,6 @@ __u64 add_noreturn_value = 1; SEC("fentry/bpf_fentry_test1") int BPF_PROG(add, int a) { - if (pid != (bpf_get_current_pid_tgid() >> 32)) - return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 add_stack_value = 1; @@ -49,8 +45,6 @@ __s64 sub_noreturn_value = 1; SEC("fentry/bpf_fentry_test1") int BPF_PROG(sub, int a) { - if (pid != (bpf_get_current_pid_tgid() >> 32)) - return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 sub_stack_value = 1; @@ -73,8 +67,6 @@ __u64 and_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(and, int a) { - if (pid != (bpf_get_current_pid_tgid() >> 32)) - return 0; #ifdef ENABLE_ATOMICS_TESTS and64_result = __sync_fetch_and_and(&and64_value, 0x011ull << 32); @@ -94,8 +86,6 @@ __u64 or_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(or, int a) { - if (pid != (bpf_get_current_pid_tgid() >> 32)) - return 0; #ifdef ENABLE_ATOMICS_TESTS or64_result = __sync_fetch_and_or(&or64_value, 0x011ull << 32); or32_result = __sync_fetch_and_or(&or32_value, 0x011); @@ -114,8 +104,6 @@ __u64 xor_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(xor, int a) { - if (pid != (bpf_get_current_pid_tgid() >> 32)) - return 0; #ifdef ENABLE_ATOMICS_TESTS xor64_result = __sync_fetch_and_xor(&xor64_value, 0x011ull << 32); xor32_result = __sync_fetch_and_xor(&xor32_value, 0x011); @@ -135,8 +123,6 @@ __u32 cmpxchg32_result_succeed = 0; SEC("fentry/bpf_fentry_test1") int BPF_PROG(cmpxchg, int a) { - if (pid != (bpf_get_current_pid_tgid() >> 32)) - return 0; #ifdef ENABLE_ATOMICS_TESTS cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3); cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2); @@ -156,8 +142,6 @@ __u32 xchg32_result = 0; SEC("fentry/bpf_fentry_test1") int BPF_PROG(xchg, int a) { - if (pid != (bpf_get_current_pid_tgid() >> 32)) - return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 val64 = 2; __u32 val32 = 2; diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index d9660e7200..f62df4d023 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -169,7 +169,11 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk) ca->sample_cnt = 0; } -/* "struct_ops/" prefix is a requirement */ +/* "struct_ops/" prefix is not a requirement + * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS + * as long as it is used in one of the func ptr + * under SEC(".struct_ops"). + */ SEC("struct_ops/bpf_cubic_init") void BPF_PROG(bpf_cubic_init, struct sock *sk) { @@ -184,8 +188,10 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } -/* "struct_ops" prefix is a requirement */ -SEC("struct_ops/bpf_cubic_cwnd_event") +/* No prefix in SEC will also work. + * The remaining tcp-cubic functions have an easier way. + */ +SEC("no-sec-prefix-bictcp_cwnd_event") void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index f266c757b3..95a5a0778e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -19,8 +19,9 @@ #include #include +int _version SEC("version") = 1; #define PROG(F) PROG_(F, _##F) -#define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM +#define PROG_(NUM, NAME) SEC("flow_dissector/"#NUM) int bpf_func##NAME /* These are the identifiers of the BPF programs that will be used in tail * calls. Name is limited to 16 characters, with the terminating character and diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c index c21e3f5453..9442390268 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c @@ -49,7 +49,7 @@ int dump_unix(struct bpf_iter__unix *ctx) sock_i_ino(sk)); if (unix_sk->addr) { - if (unix_sk->addr->name->sun_path[0]) { + if (!UNIX_ABSTRACT(unix_sk)) { BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path); } else { /* The name of the abstract UNIX domain socket starts diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index e0f42601be..eef5646ddb 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -6,6 +6,8 @@ #define AF_INET6 10 #define __SO_ACCEPTCON (1 << 16) +#define UNIX_HASH_SIZE 256 +#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE) #define SOL_TCP 6 #define TCP_CONGESTION 13 diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c index e5560a6560..8f44767a75 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c @@ -11,7 +11,7 @@ /* *struct bitfields_only_mixed_types { * int a: 3; - * long b: 2; + * long int b: 2; * _Bool c: 1; * enum { * A = 0, @@ -27,7 +27,7 @@ struct bitfields_only_mixed_types { int a: 3; - long b: 2; + long int b: 2; bool c: 1; /* it's really a _Bool type */ enum { A, /* A = 0, dumper is very explicit */ @@ -44,8 +44,8 @@ struct bitfields_only_mixed_types { * char: 4; * int a: 4; * short b; - * long c; - * long d: 8; + * long int c; + * long int d: 8; * int e; * int f; *}; @@ -71,7 +71,7 @@ struct bitfield_mixed_with_others { *struct bitfield_flushed { * int a: 4; * long: 60; - * long b: 16; + * long int b: 16; *}; * */ diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c index e304b6204b..1cef3bec1d 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c @@ -29,7 +29,7 @@ struct non_packed_fields { struct nested_packed { char: 4; int a: 4; - long b; + long int b; struct { char c; int d; @@ -44,7 +44,7 @@ union union_is_never_packed { union union_does_not_need_packing { struct { - long a; + long int a; int b; } __attribute__((packed)); int c; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c index f2661c8d2d..35c512818a 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c @@ -9,7 +9,7 @@ /* ----- START-EXPECTED-OUTPUT ----- */ struct padded_implicitly { int a; - long b; + long int b; char c; }; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 1c7105fcae..8aaa24a003 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -189,7 +189,7 @@ struct struct_with_embedded_stuff { const char *d; } e; union { - volatile long f; + volatile long int f; void * restrict g; }; }; diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c index 3f81ff9218..a253730020 100644 --- a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c @@ -20,7 +20,7 @@ struct { __u32 invocations = 0; -SEC("cgroup_skb/egress") +SEC("cgroup_skb/egress/1") int egress1(struct __sk_buff *skb) { struct cgroup_value *ptr_cg_storage = @@ -32,7 +32,7 @@ int egress1(struct __sk_buff *skb) return 1; } -SEC("cgroup_skb/egress") +SEC("cgroup_skb/egress/2") int egress2(struct __sk_buff *skb) { struct cgroup_value *ptr_cg_storage = diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c index d662db27fe..a149f33bc5 100644 --- a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c @@ -20,7 +20,7 @@ struct { __u32 invocations = 0; -SEC("cgroup_skb/egress") +SEC("cgroup_skb/egress/1") int egress1(struct __sk_buff *skb) { struct cgroup_value *ptr_cg_storage = @@ -32,7 +32,7 @@ int egress1(struct __sk_buff *skb) return 1; } -SEC("cgroup_skb/egress") +SEC("cgroup_skb/egress/2") int egress2(struct __sk_buff *skb) { struct cgroup_value *ptr_cg_storage = diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c index 88638315c5..3f757e30d7 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c @@ -14,6 +14,7 @@ #include #include +int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; __u16 g_serv_port = 0; diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index b241932911..a943d394fd 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -31,6 +31,8 @@ #define IFNAMSIZ 16 #endif +int _version SEC("version") = 1; + __attribute__ ((noinline)) int do_bind(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c index 40266d2c73..506d0f81a3 100644 --- a/tools/testing/selftests/bpf/progs/connect6_prog.c +++ b/tools/testing/selftests/bpf/progs/connect6_prog.c @@ -24,6 +24,8 @@ #define DST_REWRITE_PORT6 6666 +int _version SEC("version") = 1; + SEC("cgroup/connect6") int connect_v6_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c index 27a632dd38..a979aaef2a 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port4.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c @@ -13,6 +13,7 @@ #include char _license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; struct svc_addr { __be32 addr; diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c index 19cad93e61..afc8f1c5a9 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port6.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c @@ -12,6 +12,7 @@ #include char _license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; struct svc_addr { __be32 addr[4]; diff --git a/tools/testing/selftests/bpf/progs/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c index 79b54a4fa2..8924e06bde 100644 --- a/tools/testing/selftests/bpf/progs/dev_cgroup.c +++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c @@ -57,3 +57,4 @@ int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx) } char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index 48cd14b437..49a84a3a23 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -73,7 +73,7 @@ int test_subprog2(struct args_subprog2 *ctx) __builtin_preserve_access_index(&skb->len)); ret = ctx->ret; - /* bpf_prog_test_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32 + /* bpf_prog_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32 * which randomizes upper 32 bits after BPF_ALU32 insns. * Hence after 'w0 <<= 1' upper bits of $rax are random. * That is expected and correct. Trim them. diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c index bca92c9bd2..03a672d763 100644 --- a/tools/testing/selftests/bpf/progs/fexit_sleep.c +++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c @@ -13,7 +13,7 @@ int fexit_cnt = 0; SEC("fentry/__x64_sys_nanosleep") int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) { - if (bpf_get_current_pid_tgid() >> 32 != pid) + if ((int)bpf_get_current_pid_tgid() != pid) return 0; fentry_cnt++; @@ -23,7 +23,7 @@ int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) SEC("fexit/__x64_sys_nanosleep") int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret) { - if (bpf_get_current_pid_tgid() >> 32 != pid) + if ((int)bpf_get_current_pid_tgid() != pid) return 0; fexit_cnt++; diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c index 52f6995ff2..75e8e1069f 100644 --- a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c +++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c @@ -23,16 +23,6 @@ struct callback_ctx { int output; }; -const volatile int bypass_unused = 1; - -static __u64 -unused_subprog(struct bpf_map *map, __u32 *key, __u64 *val, - struct callback_ctx *data) -{ - data->output = 0; - return 1; -} - static __u64 check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val, struct callback_ctx *data) @@ -57,15 +47,13 @@ check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val, u32 arraymap_output = 0; -SEC("tc") +SEC("classifier") int test_pkt_access(struct __sk_buff *skb) { struct callback_ctx data; data.output = 0; bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0); - if (!bypass_unused) - bpf_for_each_map_elem(&arraymap, unused_subprog, &data, 0); arraymap_output = data.output; bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0); diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c index 276994d5c0..913dd91aaf 100644 --- a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c +++ b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c @@ -78,7 +78,7 @@ int hashmap_output = 0; int hashmap_elems = 0; int percpu_map_elems = 0; -SEC("tc") +SEC("classifier") int test_pkt_access(struct __sk_buff *skb) { struct callback_ctx data; diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c index 68587b1de3..6b42db2fe3 100644 --- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c @@ -37,3 +37,4 @@ int trace(void *ctx) } char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c index 7236da72ce..55e283050c 100644 --- a/tools/testing/selftests/bpf/progs/kfree_skb.c +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -9,8 +9,8 @@ char _license[] SEC("license") = "GPL"; struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __type(key, int); - __type(value, int); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); } perf_buf_map SEC(".maps"); #define _(P) (__builtin_preserve_access_index(P)) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index 8a8cf59017..470f8723e4 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -8,7 +8,7 @@ extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, __u32 c, __u64 d) __ksym; -SEC("tc") +SEC("classifier") int kfunc_call_test2(struct __sk_buff *skb) { struct bpf_sock *sk = skb->sk; @@ -23,7 +23,7 @@ int kfunc_call_test2(struct __sk_buff *skb) return bpf_kfunc_call_test2((struct sock *)sk, 1, 2); } -SEC("tc") +SEC("classifier") int kfunc_call_test1(struct __sk_buff *skb) { struct bpf_sock *sk = skb->sk; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c index c1fdecabea..5fbd9e232d 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c @@ -33,7 +33,7 @@ int __noinline f1(struct __sk_buff *skb) return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4); } -SEC("tc") +SEC("classifier") int kfunc_call_test1(struct __sk_buff *skb) { return f1(skb); diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 9b1f9b75d5..95868bc7ad 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -20,6 +20,7 @@ int sk_storage_result = -1; struct local_storage { struct inode *exec_inode; __u32 value; + struct bpf_spin_lock lock; }; struct { @@ -57,7 +58,9 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) bpf_get_current_task_btf(), 0, 0); if (storage) { /* Don't let an executable delete itself */ + bpf_spin_lock(&storage->lock); is_self_unlink = storage->exec_inode == victim->d_inode; + bpf_spin_unlock(&storage->lock); if (is_self_unlink) return -EPERM; } @@ -65,7 +68,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) return 0; } -SEC("lsm.s/inode_rename") +SEC("lsm/inode_rename") int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) @@ -86,8 +89,10 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, if (!storage) return 0; + bpf_spin_lock(&storage->lock); if (storage->value != DUMMY_STORAGE_VALUE) inode_storage_result = -1; + bpf_spin_unlock(&storage->lock); err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode); if (!err) @@ -96,7 +101,7 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, return 0; } -SEC("lsm.s/socket_bind") +SEC("lsm/socket_bind") int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, int addrlen) { @@ -112,8 +117,10 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, if (!storage) return 0; + bpf_spin_lock(&storage->lock); if (storage->value != DUMMY_STORAGE_VALUE) sk_storage_result = -1; + bpf_spin_unlock(&storage->lock); err = bpf_sk_storage_delete(&sk_storage_map, sock->sk); if (!err) @@ -122,7 +129,7 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, return 0; } -SEC("lsm.s/socket_post_create") +SEC("lsm/socket_post_create") int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, int protocol, int kern) { @@ -137,7 +144,9 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, if (!storage) return 0; + bpf_spin_lock(&storage->lock); storage->value = DUMMY_STORAGE_VALUE; + bpf_spin_unlock(&storage->lock); return 0; } @@ -145,7 +154,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, /* This uses the local storage to remember the inode of the binary that a * process was originally executing. */ -SEC("lsm.s/bprm_committed_creds") +SEC("lsm/bprm_committed_creds") void BPF_PROG(exec, struct linux_binprm *bprm) { __u32 pid = bpf_get_current_pid_tgid() >> 32; @@ -157,13 +166,18 @@ void BPF_PROG(exec, struct linux_binprm *bprm) storage = bpf_task_storage_get(&task_storage_map, bpf_get_current_task_btf(), 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - if (storage) + if (storage) { + bpf_spin_lock(&storage->lock); storage->exec_inode = bprm->file->f_inode; + bpf_spin_unlock(&storage->lock); + } storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return; + bpf_spin_lock(&storage->lock); storage->value = DUMMY_STORAGE_VALUE; + bpf_spin_unlock(&storage->lock); } diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index b64df94ec4..d1d304c980 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -334,11 +334,9 @@ static inline int check_lpm_trie(void) return 1; } -#define INNER_MAX_ENTRIES 1234 - struct inner_map { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, INNER_MAX_ENTRIES); + __uint(max_entries, 1); __type(key, __u32); __type(value, __u32); } inner_map SEC(".maps"); @@ -350,7 +348,7 @@ struct { __type(value, __u32); __array(values, struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, INNER_MAX_ENTRIES); + __uint(max_entries, 1); __type(key, __u32); __type(value, __u32); }); @@ -362,13 +360,8 @@ static inline int check_array_of_maps(void) { struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps; struct bpf_map *map = (struct bpf_map *)&m_array_of_maps; - struct bpf_array *inner_map; - int key = 0; VERIFY(check_default(&array_of_maps->map, map)); - inner_map = bpf_map_lookup_elem(array_of_maps, &key); - VERIFY(inner_map != 0); - VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES); return 1; } @@ -389,13 +382,8 @@ static inline int check_hash_of_maps(void) { struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps; struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps; - struct bpf_htab *inner_map; - int key = 2; VERIFY(check_default(&hash_of_maps->map, map)); - inner_map = bpf_map_lookup_elem(hash_of_maps, &key); - VERIFY(inner_map != 0); - VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES); return 1; } @@ -695,4 +683,5 @@ int cg_skb(void *ctx) return 1; } +__u32 _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index f718b2c212..43649bce4c 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -68,3 +68,4 @@ int bpf_nextcnt(struct __sk_buff *skb) } char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c index b3fcb5274e..25467d13c3 100644 --- a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c +++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c @@ -11,8 +11,8 @@ typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; struct { __uint(type, BPF_MAP_TYPE_STACK_TRACE); __uint(max_entries, 16384); - __type(key, __u32); - __type(value, stack_trace_t); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(stack_trace_t)); } stackmap SEC(".maps"); struct { diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 1ed28882da..2fb7adafb6 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -159,59 +159,6 @@ struct { __uint(value_size, sizeof(long long) * 127); } stackmap SEC(".maps"); -#ifdef USE_BPF_LOOP -struct process_frame_ctx { - int cur_cpu; - int32_t *symbol_counter; - void *frame_ptr; - FrameData *frame; - PidData *pidData; - Symbol *sym; - Event *event; - bool done; -}; - -#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) - -static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) -{ - int zero = 0; - void *frame_ptr = ctx->frame_ptr; - PidData *pidData = ctx->pidData; - FrameData *frame = ctx->frame; - int32_t *symbol_counter = ctx->symbol_counter; - int cur_cpu = ctx->cur_cpu; - Event *event = ctx->event; - Symbol *sym = ctx->sym; - - if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) { - int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; - int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym); - - if (!symbol_id) { - bpf_map_update_elem(&symbolmap, sym, &zero, 0); - symbol_id = bpf_map_lookup_elem(&symbolmap, sym); - if (!symbol_id) { - ctx->done = true; - return 1; - } - } - if (*symbol_id == new_symbol_id) - (*symbol_counter)++; - - barrier_var(i); - if (i >= STACK_MAX_LEN) - return 1; - - event->stack[i] = *symbol_id; - - event->stack_len = i + 1; - frame_ptr = frame->f_back; - } - return 0; -} -#endif /* USE_BPF_LOOP */ - #ifdef GLOBAL_FUNC __noinline #elif defined(SUBPROGS) @@ -281,26 +228,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); if (symbol_counter == NULL) return 0; -#ifdef USE_BPF_LOOP - struct process_frame_ctx ctx = { - .cur_cpu = cur_cpu, - .symbol_counter = symbol_counter, - .frame_ptr = frame_ptr, - .frame = &frame, - .pidData = pidData, - .sym = &sym, - .event = event, - }; - - bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0); - if (ctx.done) - return 0; -#else #ifdef NO_UNROLL #pragma clang loop unroll(disable) #else #pragma clang loop unroll(full) -#endif /* NO_UNROLL */ +#endif /* Unwind python stack */ for (int i = 0; i < STACK_MAX_LEN; ++i) { if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { @@ -319,7 +251,6 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) frame_ptr = frame.f_back; } } -#endif /* USE_BPF_LOOP */ event->stack_complete = frame_ptr == NULL; } else { event->stack_complete = 1; diff --git a/tools/testing/selftests/bpf/progs/recursion.c b/tools/testing/selftests/bpf/progs/recursion.c index 3c2423bb19..49f679375b 100644 --- a/tools/testing/selftests/bpf/progs/recursion.c +++ b/tools/testing/selftests/bpf/progs/recursion.c @@ -24,8 +24,8 @@ struct { int pass1 = 0; int pass2 = 0; -SEC("fentry/htab_map_delete_elem") -int BPF_PROG(on_delete, struct bpf_map *map) +SEC("fentry/__htab_map_lookup_elem") +int BPF_PROG(on_lookup, struct bpf_map *map) { int key = 0; @@ -35,7 +35,10 @@ int BPF_PROG(on_delete, struct bpf_map *map) } if (map == (void *)&hash2) { pass2++; - bpf_map_delete_elem(&hash2, &key); + /* htab_map_gen_lookup() will inline below call + * into direct call to __htab_map_lookup_elem() + */ + bpf_map_lookup_elem(&hash2, &key); return 0; } diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index ea75a44cb7..ac5abc34cd 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -18,6 +18,8 @@ #define DST_PORT 4040 #define DST_REWRITE_PORT4 4444 +int _version SEC("version") = 1; + SEC("cgroup/sendmsg4") int sendmsg_v4_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c index bf9b46b806..24694b1a8d 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c @@ -22,6 +22,8 @@ #define DST_REWRITE_PORT6 6666 +int _version SEC("version") = 1; + SEC("cgroup/sendmsg6") int sendmsg_v6_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c index 992b786100..7f2eaa2f89 100644 --- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c +++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c @@ -25,7 +25,7 @@ static INLINE struct iphdr *get_iphdr(struct __sk_buff *skb) return ip; } -SEC("tc") +SEC("classifier/cls") int main_prog(struct __sk_buff *skb) { struct iphdr *ip = NULL; diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index 95d5b941bc..ca283af80d 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -2,6 +2,8 @@ #include #include +int _version SEC("version") = 1; + SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c index 80632954c5..eeaf6e75c9 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c @@ -3,6 +3,8 @@ #include #include +int _version SEC("version") = 1; + SEC("sk_msg1") int bpf_prog1(struct sk_msg_md *msg) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index e2468a6d01..4797dc9850 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -2,25 +2,27 @@ #include #include +int _version SEC("version") = 1; + struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 20); - __type(key, int); - __type(value, int); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); } sock_map_rx SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 20); - __type(key, int); - __type(value, int); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); } sock_map_tx SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 20); - __type(key, int); - __type(value, int); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); } sock_map_msg SEC(".maps"); struct { diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c index 9fb241b972..c6d428a8d7 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c @@ -3,6 +3,7 @@ #include char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; #define SOL_CUSTOM 0xdeadbeef #define CUSTOM_INHERIT1 0 diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c index 177a59069d..9d8c212dde 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_multi.c +++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c @@ -4,8 +4,9 @@ #include char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; -SEC("cgroup/getsockopt") +SEC("cgroup/getsockopt/child") int _getsockopt_child(struct bpf_sockopt *ctx) { __u8 *optval_end = ctx->optval_end; @@ -28,7 +29,7 @@ int _getsockopt_child(struct bpf_sockopt *ctx) return 1; } -SEC("cgroup/getsockopt") +SEC("cgroup/getsockopt/parent") int _getsockopt_parent(struct bpf_sockopt *ctx) { __u8 *optval_end = ctx->optval_end; diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 753718595c..60c93aee2f 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -445,48 +445,6 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, return payload; } -#ifdef USE_BPF_LOOP -enum read_type { - READ_INT_VAR, - READ_MAP_VAR, - READ_STR_VAR, -}; - -struct read_var_ctx { - struct strobemeta_payload *data; - void *tls_base; - struct strobemeta_cfg *cfg; - void *payload; - /* value gets mutated */ - struct strobe_value_generic *value; - enum read_type type; -}; - -static int read_var_callback(__u32 index, struct read_var_ctx *ctx) -{ - switch (ctx->type) { - case READ_INT_VAR: - if (index >= STROBE_MAX_INTS) - return 1; - read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data); - break; - case READ_MAP_VAR: - if (index >= STROBE_MAX_MAPS) - return 1; - ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); - break; - case READ_STR_VAR: - if (index >= STROBE_MAX_STRS) - return 1; - ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); - break; - } - return 0; -} -#endif /* USE_BPF_LOOP */ - /* * read_strobe_meta returns NULL, if no metadata was read; otherwise returns * pointer to *right after* payload ends @@ -517,36 +475,11 @@ static void *read_strobe_meta(struct task_struct *task, */ tls_base = (void *)task; -#ifdef USE_BPF_LOOP - struct read_var_ctx ctx = { - .cfg = cfg, - .tls_base = tls_base, - .value = &value, - .data = data, - .payload = payload, - }; - int err; - - ctx.type = READ_INT_VAR; - err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0); - if (err != STROBE_MAX_INTS) - return NULL; - - ctx.type = READ_STR_VAR; - err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0); - if (err != STROBE_MAX_STRS) - return NULL; - - ctx.type = READ_MAP_VAR; - err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); - if (err != STROBE_MAX_MAPS) - return NULL; -#else #ifdef NO_UNROLL #pragma clang loop unroll(disable) #else #pragma unroll -#endif /* NO_UNROLL */ +#endif for (int i = 0; i < STROBE_MAX_INTS; ++i) { read_int_var(cfg, i, tls_base, &value, data); } @@ -554,7 +487,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma clang loop unroll(disable) #else #pragma unroll -#endif /* NO_UNROLL */ +#endif for (int i = 0; i < STROBE_MAX_STRS; ++i) { payload += read_str_var(cfg, i, tls_base, &value, data, payload); } @@ -562,12 +495,10 @@ static void *read_strobe_meta(struct task_struct *task, #pragma clang loop unroll(disable) #else #pragma unroll -#endif /* NO_UNROLL */ +#endif for (int i = 0; i < STROBE_MAX_MAPS; ++i) { payload = read_map_var(cfg, i, tls_base, &value, data, payload); } -#endif /* USE_BPF_LOOP */ - /* * return pointer right after end of payload, so it's possible to * calculate exact amount of useful data that needs to be sent diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c index 8159a0b4a6..7115bcefbe 100644 --- a/tools/testing/selftests/bpf/progs/tailcall1.c +++ b/tools/testing/selftests/bpf/progs/tailcall1.c @@ -11,8 +11,8 @@ struct { } jmp_table SEC(".maps"); #define TAIL_FUNC(x) \ - SEC("tc") \ - int classifier_##x(struct __sk_buff *skb) \ + SEC("classifier/" #x) \ + int bpf_func_##x(struct __sk_buff *skb) \ { \ return x; \ } @@ -20,7 +20,7 @@ TAIL_FUNC(0) TAIL_FUNC(1) TAIL_FUNC(2) -SEC("tc") +SEC("classifier") int entry(struct __sk_buff *skb) { /* Multiple locations to make sure we patch @@ -45,3 +45,4 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c index a5ff53e617..0431e4fe7e 100644 --- a/tools/testing/selftests/bpf/progs/tailcall2.c +++ b/tools/testing/selftests/bpf/progs/tailcall2.c @@ -10,41 +10,41 @@ struct { __uint(value_size, sizeof(__u32)); } jmp_table SEC(".maps"); -SEC("tc") -int classifier_0(struct __sk_buff *skb) +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 1); return 0; } -SEC("tc") -int classifier_1(struct __sk_buff *skb) +SEC("classifier/1") +int bpf_func_1(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 2); return 1; } -SEC("tc") -int classifier_2(struct __sk_buff *skb) +SEC("classifier/2") +int bpf_func_2(struct __sk_buff *skb) { return 2; } -SEC("tc") -int classifier_3(struct __sk_buff *skb) +SEC("classifier/3") +int bpf_func_3(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 4); return 3; } -SEC("tc") -int classifier_4(struct __sk_buff *skb) +SEC("classifier/4") +int bpf_func_4(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 3); return 4; } -SEC("tc") +SEC("classifier") int entry(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 0); @@ -56,3 +56,4 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c index f60bcd7b8d..910858fe07 100644 --- a/tools/testing/selftests/bpf/progs/tailcall3.c +++ b/tools/testing/selftests/bpf/progs/tailcall3.c @@ -12,15 +12,15 @@ struct { int count = 0; -SEC("tc") -int classifier_0(struct __sk_buff *skb) +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) { count++; bpf_tail_call_static(skb, &jmp_table, 0); return 1; } -SEC("tc") +SEC("classifier") int entry(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 0); @@ -28,3 +28,4 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c index a56bbc2313..bd4be135c3 100644 --- a/tools/testing/selftests/bpf/progs/tailcall4.c +++ b/tools/testing/selftests/bpf/progs/tailcall4.c @@ -13,8 +13,8 @@ struct { int selector = 0; #define TAIL_FUNC(x) \ - SEC("tc") \ - int classifier_##x(struct __sk_buff *skb) \ + SEC("classifier/" #x) \ + int bpf_func_##x(struct __sk_buff *skb) \ { \ return x; \ } @@ -22,7 +22,7 @@ TAIL_FUNC(0) TAIL_FUNC(1) TAIL_FUNC(2) -SEC("tc") +SEC("classifier") int entry(struct __sk_buff *skb) { bpf_tail_call(skb, &jmp_table, selector); @@ -30,3 +30,4 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c index 8d03496eb6..adf30a3306 100644 --- a/tools/testing/selftests/bpf/progs/tailcall5.c +++ b/tools/testing/selftests/bpf/progs/tailcall5.c @@ -13,8 +13,8 @@ struct { int selector = 0; #define TAIL_FUNC(x) \ - SEC("tc") \ - int classifier_##x(struct __sk_buff *skb) \ + SEC("classifier/" #x) \ + int bpf_func_##x(struct __sk_buff *skb) \ { \ return x; \ } @@ -22,7 +22,7 @@ TAIL_FUNC(0) TAIL_FUNC(1) TAIL_FUNC(2) -SEC("tc") +SEC("classifier") int entry(struct __sk_buff *skb) { int idx = 0; @@ -37,3 +37,4 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c index 8c91428deb..0103f3dd9f 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c @@ -10,8 +10,8 @@ struct { } jmp_table SEC(".maps"); #define TAIL_FUNC(x) \ - SEC("tc") \ - int classifier_##x(struct __sk_buff *skb) \ + SEC("classifier/" #x) \ + int bpf_func_##x(struct __sk_buff *skb) \ { \ return x; \ } @@ -26,7 +26,7 @@ int subprog_tail(struct __sk_buff *skb) return skb->len * 2; } -SEC("tc") +SEC("classifier") int entry(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 1); @@ -35,3 +35,4 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c index ce97d141da..3cc4c12817 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c @@ -22,14 +22,14 @@ int subprog_tail(struct __sk_buff *skb) int count = 0; -SEC("tc") -int classifier_0(struct __sk_buff *skb) +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) { count++; return subprog_tail(skb); } -SEC("tc") +SEC("classifier") int entry(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 0); @@ -38,3 +38,4 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c index 7fab39a3bb..0d5482bea6 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c @@ -33,23 +33,23 @@ int subprog_tail(struct __sk_buff *skb) return skb->len * 2; } -SEC("tc") -int classifier_0(struct __sk_buff *skb) +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) { volatile char arr[128] = {}; return subprog_tail2(skb); } -SEC("tc") -int classifier_1(struct __sk_buff *skb) +SEC("classifier/1") +int bpf_func_1(struct __sk_buff *skb) { volatile char arr[128] = {}; return skb->len * 3; } -SEC("tc") +SEC("classifier") int entry(struct __sk_buff *skb) { volatile char arr[128] = {}; @@ -58,3 +58,4 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c index b67e8022d5..e89368a50b 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -50,29 +50,30 @@ int subprog_tail(struct __sk_buff *skb) return skb->len; } -SEC("tc") -int classifier_1(struct __sk_buff *skb) +SEC("classifier/1") +int bpf_func_1(struct __sk_buff *skb) { return subprog_tail_2(skb); } -SEC("tc") -int classifier_2(struct __sk_buff *skb) +SEC("classifier/2") +int bpf_func_2(struct __sk_buff *skb) { count++; return subprog_tail_2(skb); } -SEC("tc") -int classifier_0(struct __sk_buff *skb) +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) { return subprog_tail_1(skb); } -SEC("tc") +SEC("classifier") int entry(struct __sk_buff *skb) { return subprog_tail(skb); } char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c index 0988d79f15..0cb3204ddb 100644 --- a/tools/testing/selftests/bpf/progs/tcp_rtt.c +++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c @@ -3,6 +3,7 @@ #include char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; struct tcp_rtt_storage { __u32 invoked; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index 160ead6c67..31538c9ed1 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -4,6 +4,8 @@ #include #include "bpf_legacy.h" +int _version SEC("version") = 1; + struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c index c218cf8989..c1e0c8c7c5 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c @@ -21,8 +21,8 @@ struct inner_map_sz2 { struct outer_arr { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 3); - __type(key, int); - __type(value, int); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); /* it's possible to use anonymous struct as inner map definition here */ __array(values, struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -61,8 +61,8 @@ struct inner_map_sz4 { struct outer_arr_dyn { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 3); - __type(key, int); - __type(value, int); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); __array(values, struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_INNER_MAP); @@ -81,7 +81,7 @@ struct outer_arr_dyn { struct outer_hash { __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); __uint(max_entries, 5); - __type(key, int); + __uint(key_size, sizeof(int)); /* Here everything works flawlessly due to reuse of struct inner_map * and compiler will complain at the attempt to use non-inner_map * references below. This is great experience. @@ -111,8 +111,8 @@ struct sockarr_sz2 { struct outer_sockarr_sz1 { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 1); - __type(key, int); - __type(value, int); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); __array(values, struct sockarr_sz1); } outer_sockarr SEC(".maps") = { .values = { (void *)&sockarr_sz1 }, diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 1884a5bd10..6c55601627 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -4,6 +4,8 @@ #include #include "bpf_legacy.h" +int _version SEC("version") = 1; + struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c index 15e0f9945f..506da7fd2d 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c @@ -3,6 +3,8 @@ #include #include +int _version SEC("version") = 1; + struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c index e2bea4da19..9a6b85dd52 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c @@ -145,7 +145,7 @@ static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb) return TC_ACT_OK; } -SEC("tc") +SEC("classifier/ingress") int cls_ingress(struct __sk_buff *skb) { struct ipv6hdr *ip6h; diff --git a/tools/testing/selftests/bpf/progs/test_cgroup_link.c b/tools/testing/selftests/bpf/progs/test_cgroup_link.c index 4faba88e45..77e47b9e44 100644 --- a/tools/testing/selftests/bpf/progs/test_cgroup_link.c +++ b/tools/testing/selftests/bpf/progs/test_cgroup_link.c @@ -6,14 +6,14 @@ int calls = 0; int alt_calls = 0; -SEC("cgroup_skb/egress") +SEC("cgroup_skb/egress1") int egress(struct __sk_buff *skb) { __sync_fetch_and_add(&calls, 1); return 1; } -SEC("cgroup_skb/egress") +SEC("cgroup_skb/egress2") int egress_alt(struct __sk_buff *skb) { __sync_fetch_and_add(&alt_calls, 1); diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c index 2ec1de11a3..71184af577 100644 --- a/tools/testing/selftests/bpf/progs/test_check_mtu.c +++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c @@ -153,7 +153,7 @@ int xdp_input_len_exceed(struct xdp_md *ctx) return retval; } -SEC("tc") +SEC("classifier") int tc_use_helper(struct __sk_buff *ctx) { int retval = BPF_OK; /* Expected retval on successful test */ @@ -172,7 +172,7 @@ int tc_use_helper(struct __sk_buff *ctx) return retval; } -SEC("tc") +SEC("classifier") int tc_exceed_mtu(struct __sk_buff *ctx) { __u32 ifindex = GLOBAL_USER_IFINDEX; @@ -196,7 +196,7 @@ int tc_exceed_mtu(struct __sk_buff *ctx) return retval; } -SEC("tc") +SEC("classifier") int tc_exceed_mtu_da(struct __sk_buff *ctx) { /* SKB Direct-Access variant */ @@ -223,7 +223,7 @@ int tc_exceed_mtu_da(struct __sk_buff *ctx) return retval; } -SEC("tc") +SEC("classifier") int tc_minus_delta(struct __sk_buff *ctx) { int retval = BPF_OK; /* Expected retval on successful test */ @@ -245,7 +245,7 @@ int tc_minus_delta(struct __sk_buff *ctx) return retval; } -SEC("tc") +SEC("classifier") int tc_input_len(struct __sk_buff *ctx) { int retval = BPF_OK; /* Expected retval on successful test */ @@ -265,7 +265,7 @@ int tc_input_len(struct __sk_buff *ctx) return retval; } -SEC("tc") +SEC("classifier") int tc_input_len_exceed(struct __sk_buff *ctx) { int retval = BPF_DROP; /* Fail */ diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index 2833ad722c..e2a5acc478 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -928,7 +928,7 @@ static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics) } } -SEC("tc") +SEC("classifier/cls_redirect") int cls_redirect(struct __sk_buff *skb) { metrics_t *metrics = get_global_metrics(); diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c index b2ded49757..8b533db4a7 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c @@ -42,16 +42,7 @@ struct core_reloc_mods { core_reloc_mods_substruct_t h; }; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) -#else -#define CORE_READ(dst, src) ({ \ - int __sz = sizeof(*(dst)) < sizeof(*(src)) ? sizeof(*(dst)) : \ - sizeof(*(src)); \ - bpf_core_read((char *)(dst) + sizeof(*(dst)) - __sz, __sz, \ - (const char *)(src) + sizeof(*(src)) - __sz); \ -}) -#endif SEC("raw_tracepoint/sys_enter") int test_core_mods(void *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c index 1705097d01..01a002ade5 100644 --- a/tools/testing/selftests/bpf/progs/test_enable_stats.c +++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c @@ -13,6 +13,6 @@ __u64 count = 0; SEC("raw_tracepoint/sys_enter") int test_enable_stats(void *ctx) { - __sync_fetch_and_add(&count, 1); + count += 1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c index 719e314ef3..1319be1c54 100644 --- a/tools/testing/selftests/bpf/progs/test_global_data.c +++ b/tools/testing/selftests/bpf/progs/test_global_data.c @@ -68,7 +68,7 @@ static struct foo struct3 = { bpf_map_update_elem(&result_##map, &key, var, 0); \ } while (0) -SEC("tc") +SEC("classifier/static_data_load") int load_static_data(struct __sk_buff *skb) { static const __u64 bar = ~0; diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c index 7b42dad187..880260f6d5 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func1.c +++ b/tools/testing/selftests/bpf/progs/test_global_func1.c @@ -38,7 +38,7 @@ int f3(int val, struct __sk_buff *skb, int var) return skb->ifindex * val * var; } -SEC("tc") +SEC("classifier/test") int test_cls(struct __sk_buff *skb) { return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4); diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c index 01bf8275df..86f0ecb304 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func3.c +++ b/tools/testing/selftests/bpf/progs/test_global_func3.c @@ -54,7 +54,7 @@ int f8(struct __sk_buff *skb) } #endif -SEC("tc") +SEC("classifier/test") int test_cls(struct __sk_buff *skb) { #ifndef NO_FN8 diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c index 9248d03e0d..260c25b827 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func5.c +++ b/tools/testing/selftests/bpf/progs/test_global_func5.c @@ -24,7 +24,7 @@ int f3(int val, struct __sk_buff *skb) return skb->ifindex * val; } -SEC("tc") +SEC("classifier/test") int test_cls(struct __sk_buff *skb) { return f1(skb) + f2(2, skb) + f3(3, skb); diff --git a/tools/testing/selftests/bpf/progs/test_global_func6.c b/tools/testing/selftests/bpf/progs/test_global_func6.c index af8c78bdfb..69e19c64e1 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func6.c +++ b/tools/testing/selftests/bpf/progs/test_global_func6.c @@ -24,7 +24,7 @@ int f3(int val, struct __sk_buff *skb) return skb->ifindex * val; } -SEC("tc") +SEC("classifier/test") int test_cls(struct __sk_buff *skb) { return f1(skb) + f2(2, skb) + f3(3, skb); diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c index 6cb8e2f525..309b3f6136 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func7.c +++ b/tools/testing/selftests/bpf/progs/test_global_func7.c @@ -10,7 +10,7 @@ void foo(struct __sk_buff *skb) skb->tc_index = 0; } -SEC("tc") +SEC("classifier/test") int test_cls(struct __sk_buff *skb) { foo(skb); diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_module.c b/tools/testing/selftests/bpf/progs/test_ksyms_module.c index 0650d918c0..d6a0b3086b 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_module.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_module.c @@ -2,48 +2,24 @@ /* Copyright (c) 2021 Facebook */ #include "vmlinux.h" + #include -#define X_0(x) -#define X_1(x) x X_0(x) -#define X_2(x) x X_1(x) -#define X_3(x) x X_2(x) -#define X_4(x) x X_3(x) -#define X_5(x) x X_4(x) -#define X_6(x) x X_5(x) -#define X_7(x) x X_6(x) -#define X_8(x) x X_7(x) -#define X_9(x) x X_8(x) -#define X_10(x) x X_9(x) -#define REPEAT_256(Y) X_2(X_10(X_10(Y))) X_5(X_10(Y)) X_6(Y) - extern const int bpf_testmod_ksym_percpu __ksym; -extern void bpf_testmod_test_mod_kfunc(int i) __ksym; -extern void bpf_testmod_invalid_mod_kfunc(void) __ksym __weak; -int out_bpf_testmod_ksym = 0; -const volatile int x = 0; +int out_mod_ksym_global = 0; +bool triggered = false; -SEC("tc") -int load(struct __sk_buff *skb) +SEC("raw_tp/sys_enter") +int handler(const void *ctx) { - /* This will be kept by clang, but removed by verifier. Since it is - * marked as __weak, libbpf and gen_loader don't error out if BTF ID - * is not found for it, instead imm and off is set to 0 for it. - */ - if (x) - bpf_testmod_invalid_mod_kfunc(); - bpf_testmod_test_mod_kfunc(42); - out_bpf_testmod_ksym = *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); - return 0; -} + int *val; + __u32 cpu; + + val = (int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); + out_mod_ksym_global = *val; + triggered = true; -SEC("tc") -int load_256(struct __sk_buff *skb) -{ - /* this will fail if kfunc doesn't reuse its own btf fd index */ - REPEAT_256(bpf_testmod_test_mod_kfunc(42);); - bpf_testmod_test_mod_kfunc(42); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c index c26057ec46..33493911d8 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb.c @@ -21,6 +21,8 @@ #include "test_iptunnel_common.h" #include +int _version SEC("version") = 1; + static inline __u32 rol32(__u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); @@ -448,7 +450,7 @@ static __always_inline int process_packet(void *data, __u64 off, void *data_end, return bpf_redirect(ifindex, 0); } -SEC("tc") +SEC("l4lb-demo") int balancer_ingress(struct __sk_buff *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index 19e4d2071c..b9e2753f4f 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -447,7 +447,7 @@ static __noinline int process_packet(void *data, __u64 off, void *data_end, return bpf_redirect(ifindex, 0); } -SEC("tc") +SEC("l4lb-demo") int balancer_ingress(struct __sk_buff *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index f416032ba8..1cfeb940cf 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -9,19 +9,21 @@ struct { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 1); __uint(map_flags, 0); - __type(key, __u32); - __type(value, __u32); + __uint(key_size, sizeof(__u32)); + /* must be sizeof(__u32) for map in map */ + __uint(value_size, sizeof(__u32)); } mim_array SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); __uint(max_entries, 1); __uint(map_flags, 0); - __type(key, int); - __type(value, __u32); + __uint(key_size, sizeof(int)); + /* must be sizeof(__u32) for map in map */ + __uint(value_size, sizeof(__u32)); } mim_hash SEC(".maps"); -SEC("xdp") +SEC("xdp_mimtest") int xdp_mimtest0(struct xdp_md *ctx) { int value = 123; @@ -47,4 +49,5 @@ int xdp_mimtest0(struct xdp_md *ctx) return XDP_PASS; } +int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c index 9c7d75cf0b..703c08e064 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c @@ -13,7 +13,7 @@ struct inner { struct { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 0); /* This will make map creation to fail */ - __type(key, __u32); + __uint(key_size, sizeof(__u32)); __array(values, struct inner); } mim SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c index acf073db9e..b5c07ae7b6 100644 --- a/tools/testing/selftests/bpf/progs/test_map_lock.c +++ b/tools/testing/selftests/bpf/progs/test_map_lock.c @@ -30,7 +30,7 @@ struct { __type(value, struct array_elem); } array_map SEC(".maps"); -SEC("cgroup/skb") +SEC("map_lock_demo") int bpf_map_lock_test(struct __sk_buff *skb) { struct hmap_elem zero = {}, *val; diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c index 2c121c5d66..6077a02509 100644 --- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -293,7 +293,7 @@ static int handle_passive_estab(struct bpf_sock_ops *skops) return check_active_hdr_in(skops); } -SEC("sockops") +SEC("sockops/misc_estab") int misc_estab(struct bpf_sock_ops *skops) { int true_val = 1; diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index 50ce16d02d..bd37ceec55 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -27,20 +27,6 @@ int BPF_PROG(handle_raw_tp_bare, return 0; } -int raw_tp_writable_bare_in_val = 0; -int raw_tp_writable_bare_early_ret = 0; -int raw_tp_writable_bare_out_val = 0; - -SEC("raw_tp.w/bpf_testmod_test_writable_bare") -int BPF_PROG(handle_raw_tp_writable_bare, - struct bpf_testmod_test_writable_ctx *writable) -{ - raw_tp_writable_bare_in_val = writable->val; - writable->early_ret = raw_tp_writable_bare_early_ret; - writable->val = raw_tp_writable_bare_out_val; - return 0; -} - __u32 tp_btf_read_sz = 0; SEC("tp_btf/bpf_testmod_test_read") @@ -87,18 +73,6 @@ int BPF_PROG(handle_fexit, return 0; } -SEC("fexit/bpf_testmod_return_ptr") -int BPF_PROG(handle_fexit_ret, int arg, struct file *ret) -{ - long buf = 0; - - bpf_probe_read_kernel(&buf, 8, ret); - bpf_probe_read_kernel(&buf, 8, (char *)ret + 256); - *(volatile long long *)ret; - *(volatile int *)&ret->f_mode; - return 0; -} - __u32 fmod_ret_read_sz = 0; SEC("fmod_ret/bpf_testmod_test_read") diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c index 1249a94569..fb22de7c36 100644 --- a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c +++ b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c @@ -7,15 +7,15 @@ struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(max_entries, 1); - __type(key, int); - __type(value, int); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); } array_1 SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(max_entries, 1); - __type(key, int); - __type(value, int); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); __uint(map_flags, BPF_F_PRESERVE_ELEMS); } array_2 SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index 17d5b67744..8207a2dc2f 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -6,36 +6,20 @@ #include #include -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, int); - __type(value, int); - __uint(max_entries, 1); -} my_pid_map SEC(".maps"); - struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __type(key, int); - __type(value, int); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); } perf_buf_map SEC(".maps"); SEC("tp/raw_syscalls/sys_enter") int handle_sys_enter(void *ctx) { - int zero = 0, *my_pid, cur_pid; int cpu = bpf_get_smp_processor_id(); - my_pid = bpf_map_lookup_elem(&my_pid_map, &zero); - if (!my_pid) - return 1; - - cur_pid = bpf_get_current_pid_tgid() >> 32; - if (cur_pid != *my_pid) - return 1; - bpf_perf_event_output(ctx, &perf_buf_map, BPF_F_CURRENT_CPU, &cpu, sizeof(cpu)); - return 1; + return 0; } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c index 0facea6cbb..4ef2630292 100644 --- a/tools/testing/selftests/bpf/progs/test_pinning.c +++ b/tools/testing/selftests/bpf/progs/test_pinning.c @@ -3,6 +3,8 @@ #include #include +int _version SEC("version") = 1; + struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c index 2a56db1094..5412e0c732 100644 --- a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c +++ b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c @@ -3,6 +3,8 @@ #include #include +int _version SEC("version") = 1; + struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 0558544e1f..8520510645 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -15,6 +15,7 @@ #include #define barrier() __asm__ __volatile__("": : :"memory") +int _version SEC("version") = 1; /* llvm will optimize both subprograms into exactly the same BPF assembly * @@ -96,7 +97,7 @@ int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off) return 0; } -SEC("tc") +SEC("classifier/test_pkt_access") int test_pkt_access(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c index d1839366f3..610c74ea9f 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c @@ -7,6 +7,8 @@ #include #include +int _version SEC("version") = 1; + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define TEST_FIELD(TYPE, FIELD, MASK) \ { \ @@ -25,7 +27,7 @@ } #endif -SEC("tc") +SEC("classifier/test_pkt_md_access") int test_pkt_md_access(struct __sk_buff *skb) { TEST_FIELD(__u8, len, 0xFF); diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 8812a90da4..89b3532ccc 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -8,37 +8,13 @@ #include #include -#if defined(__TARGET_ARCH_x86) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__x64_" -#elif defined(__TARGET_ARCH_s390) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__s390x_" -#elif defined(__TARGET_ARCH_arm64) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__arm64_" -#else -#define SYSCALL_WRAPPER 0 -#define SYS_PREFIX "" -#endif - static struct sockaddr_in old; -SEC("kprobe/" SYS_PREFIX "sys_connect") +SEC("kprobe/__sys_connect") int BPF_KPROBE(handle_sys_connect) { -#if SYSCALL_WRAPPER == 1 - struct pt_regs *real_regs; -#endif + void *ptr = (void *)PT_REGS_PARM2(ctx); struct sockaddr_in new; - void *ptr; - -#if SYSCALL_WRAPPER == 0 - ptr = (void *)PT_REGS_PARM2(ctx); -#else - real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx); - bpf_probe_read_kernel(&ptr, sizeof(ptr), &PT_REGS_PARM2(real_regs)); -#endif bpf_probe_read_user(&old, sizeof(old), ptr); __builtin_memset(&new, 0xab, sizeof(new)); diff --git a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h index 648e8cab7a..4dd9806ad7 100644 --- a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h +++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h @@ -8,6 +8,8 @@ #include #include +int _version SEC("version") = 1; + struct { __uint(type, MAP_TYPE); __uint(max_entries, 32); @@ -24,7 +26,7 @@ struct { __uint(value_size, sizeof(__u32)); } map_out SEC(".maps"); -SEC("tc") +SEC("test") int _test(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index 7d56ed47cd..26e77dcc7e 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -15,6 +15,8 @@ #include #include "test_select_reuseport_common.h" +int _version SEC("version") = 1; + #ifndef offsetof #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif @@ -22,8 +24,8 @@ struct { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 1); - __type(key, __u32); - __type(value, __u32); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); } outer_map SEC(".maps"); struct { diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c index 02f79356d5..1ecd987005 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_assign.c +++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c @@ -36,6 +36,7 @@ struct { .pinning = PIN_GLOBAL_NS, }; +int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ @@ -158,7 +159,7 @@ handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) return ret; } -SEC("tc") +SEC("classifier/sk_assign_test") int bpf_sk_assign_test(struct __sk_buff *skb) { struct bpf_sock_tuple *tuple, ln = {0}; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index 83b0aaa52e..ac6f7f205e 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -72,40 +72,32 @@ static const __u16 DST_PORT = 7007; /* Host byte order */ static const __u32 DST_IP4 = IP4(127, 0, 0, 1); static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001); -SEC("sk_lookup") +SEC("sk_lookup/lookup_pass") int lookup_pass(struct bpf_sk_lookup *ctx) { return SK_PASS; } -SEC("sk_lookup") +SEC("sk_lookup/lookup_drop") int lookup_drop(struct bpf_sk_lookup *ctx) { return SK_DROP; } -SEC("sk_lookup") -int check_ifindex(struct bpf_sk_lookup *ctx) -{ - if (ctx->ingress_ifindex == 1) - return SK_DROP; - return SK_PASS; -} - -SEC("sk_reuseport") +SEC("sk_reuseport/reuse_pass") int reuseport_pass(struct sk_reuseport_md *ctx) { return SK_PASS; } -SEC("sk_reuseport") +SEC("sk_reuseport/reuse_drop") int reuseport_drop(struct sk_reuseport_md *ctx) { return SK_DROP; } /* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */ -SEC("sk_lookup") +SEC("sk_lookup/redir_port") int redir_port(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -124,7 +116,7 @@ int redir_port(struct bpf_sk_lookup *ctx) } /* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */ -SEC("sk_lookup") +SEC("sk_lookup/redir_ip4") int redir_ip4(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -147,7 +139,7 @@ int redir_ip4(struct bpf_sk_lookup *ctx) } /* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */ -SEC("sk_lookup") +SEC("sk_lookup/redir_ip6") int redir_ip6(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -172,7 +164,7 @@ int redir_ip6(struct bpf_sk_lookup *ctx) return err ? SK_DROP : SK_PASS; } -SEC("sk_lookup") +SEC("sk_lookup/select_sock_a") int select_sock_a(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -187,7 +179,7 @@ int select_sock_a(struct bpf_sk_lookup *ctx) return err ? SK_DROP : SK_PASS; } -SEC("sk_lookup") +SEC("sk_lookup/select_sock_a_no_reuseport") int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -202,7 +194,7 @@ int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx) return err ? SK_DROP : SK_PASS; } -SEC("sk_reuseport") +SEC("sk_reuseport/select_sock_b") int select_sock_b(struct sk_reuseport_md *ctx) { __u32 key = KEY_SERVER_B; @@ -213,7 +205,7 @@ int select_sock_b(struct sk_reuseport_md *ctx) } /* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */ -SEC("sk_lookup") +SEC("sk_lookup/sk_assign_eexist") int sk_assign_eexist(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -246,7 +238,7 @@ int sk_assign_eexist(struct bpf_sk_lookup *ctx) } /* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */ -SEC("sk_lookup") +SEC("sk_lookup/sk_assign_replace_flag") int sk_assign_replace_flag(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -278,7 +270,7 @@ int sk_assign_replace_flag(struct bpf_sk_lookup *ctx) } /* Check that bpf_sk_assign(sk=NULL) is accepted. */ -SEC("sk_lookup") +SEC("sk_lookup/sk_assign_null") int sk_assign_null(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk = NULL; @@ -321,7 +313,7 @@ int sk_assign_null(struct bpf_sk_lookup *ctx) } /* Check that selected sk is accessible through context. */ -SEC("sk_lookup") +SEC("sk_lookup/access_ctx_sk") int access_ctx_sk(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk1 = NULL, *sk2 = NULL; @@ -387,7 +379,7 @@ int access_ctx_sk(struct bpf_sk_lookup *ctx) * are not covered because they give bogus results, that is the * verifier ignores the offset. */ -SEC("sk_lookup") +SEC("sk_lookup/ctx_narrow_access") int ctx_narrow_access(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -561,7 +553,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) } /* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */ -SEC("sk_lookup") +SEC("sk_lookup/sk_assign_esocknosupport") int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -586,28 +578,28 @@ int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx) return ret; } -SEC("sk_lookup") +SEC("sk_lookup/multi_prog_pass1") int multi_prog_pass1(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); return SK_PASS; } -SEC("sk_lookup") +SEC("sk_lookup/multi_prog_pass2") int multi_prog_pass2(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); return SK_PASS; } -SEC("sk_lookup") +SEC("sk_lookup/multi_prog_drop1") int multi_prog_drop1(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); return SK_DROP; } -SEC("sk_lookup") +SEC("sk_lookup/multi_prog_drop2") int multi_prog_drop2(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); @@ -631,7 +623,7 @@ static __always_inline int select_server_a(struct bpf_sk_lookup *ctx) return SK_PASS; } -SEC("sk_lookup") +SEC("sk_lookup/multi_prog_redir1") int multi_prog_redir1(struct bpf_sk_lookup *ctx) { int ret; @@ -641,7 +633,7 @@ int multi_prog_redir1(struct bpf_sk_lookup *ctx) return SK_PASS; } -SEC("sk_lookup") +SEC("sk_lookup/multi_prog_redir2") int multi_prog_redir2(struct bpf_sk_lookup *ctx) { int ret; @@ -652,3 +644,4 @@ int multi_prog_redir2(struct bpf_sk_lookup *ctx) } char _license[] SEC("license") = "Dual BSD/GPL"; +__u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index 40f161480a..8249075f08 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -15,6 +15,7 @@ #include #include +int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ @@ -52,8 +53,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, return result; } -SEC("tc") -int sk_lookup_success(struct __sk_buff *skb) +SEC("classifier/sk_lookup_success") +int bpf_sk_lookup_test0(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -78,8 +79,8 @@ int sk_lookup_success(struct __sk_buff *skb) return sk ? TC_ACT_OK : TC_ACT_UNSPEC; } -SEC("tc") -int sk_lookup_success_simple(struct __sk_buff *skb) +SEC("classifier/sk_lookup_success_simple") +int bpf_sk_lookup_test1(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -90,8 +91,8 @@ int sk_lookup_success_simple(struct __sk_buff *skb) return 0; } -SEC("tc") -int err_use_after_free(struct __sk_buff *skb) +SEC("classifier/err_use_after_free") +int bpf_sk_lookup_uaf(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -105,8 +106,8 @@ int err_use_after_free(struct __sk_buff *skb) return family; } -SEC("tc") -int err_modify_sk_pointer(struct __sk_buff *skb) +SEC("classifier/err_modify_sk_pointer") +int bpf_sk_lookup_modptr(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -120,8 +121,8 @@ int err_modify_sk_pointer(struct __sk_buff *skb) return 0; } -SEC("tc") -int err_modify_sk_or_null_pointer(struct __sk_buff *skb) +SEC("classifier/err_modify_sk_or_null_pointer") +int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -134,8 +135,8 @@ int err_modify_sk_or_null_pointer(struct __sk_buff *skb) return 0; } -SEC("tc") -int err_no_release(struct __sk_buff *skb) +SEC("classifier/err_no_release") +int bpf_sk_lookup_test2(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -143,8 +144,8 @@ int err_no_release(struct __sk_buff *skb) return 0; } -SEC("tc") -int err_release_twice(struct __sk_buff *skb) +SEC("classifier/err_release_twice") +int bpf_sk_lookup_test3(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -155,8 +156,8 @@ int err_release_twice(struct __sk_buff *skb) return 0; } -SEC("tc") -int err_release_unchecked(struct __sk_buff *skb) +SEC("classifier/err_release_unchecked") +int bpf_sk_lookup_test4(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -172,8 +173,8 @@ void lookup_no_release(struct __sk_buff *skb) bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } -SEC("tc") -int err_no_release_subcall(struct __sk_buff *skb) +SEC("classifier/err_no_release_subcall") +int bpf_sk_lookup_test5(struct __sk_buff *skb) { lookup_no_release(skb); return 0; diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c index 6dc1f28fc4..8e94e5c080 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c +++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c @@ -68,7 +68,7 @@ static void set_task_info(struct sock *sk) } SEC("fentry/inet_csk_listen_start") -int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk) +int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk, int backlog) { set_task_info(sk); diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c index c304cd5b8c..552f209066 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c @@ -42,4 +42,6 @@ int log_cgroup_id(struct __sk_buff *skb) return TC_ACT_OK; } +int _version SEC("version") = 1; + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index c482110cfc..b02ea589ce 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -3,9 +3,10 @@ #include #include +int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; -SEC("tc") +SEC("skb_ctx") int process(struct __sk_buff *skb) { #pragma clang loop unroll(full) @@ -24,12 +25,6 @@ int process(struct __sk_buff *skb) return 1; if (skb->gso_size != 10) return 1; - if (skb->ingress_ifindex != 11) - return 1; - if (skb->ifindex != 1) - return 1; - if (skb->hwtstamp != 11) - return 1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_skb_helpers.c b/tools/testing/selftests/bpf/progs/test_skb_helpers.c index 507215791c..bb3fbf1a29 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_skb_helpers.c @@ -14,7 +14,7 @@ struct { char _license[] SEC("license") = "GPL"; -SEC("tc") +SEC("classifier/test_skb_helpers") int test_skb_helpers(struct __sk_buff *skb) { struct task_struct *task; diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 1b1187d296..441fa1c552 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -5,8 +5,6 @@ #include #include -#define __read_mostly SEC(".data.read_mostly") - struct s { int a; long long b; @@ -42,20 +40,9 @@ int kern_ver = 0; struct s out5 = {}; - -const volatile int in_dynarr_sz SEC(".rodata.dyn"); -const volatile int in_dynarr[4] SEC(".rodata.dyn") = { -1, -2, -3, -4 }; - -int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 }; - -int read_mostly_var __read_mostly; -int out_mostly_var; - SEC("raw_tp/sys_enter") int handler(const void *ctx) { - int i; - out1 = in1; out2 = in2; out3 = in3; @@ -66,11 +53,6 @@ int handler(const void *ctx) bpf_syscall = CONFIG_BPF_SYSCALL; kern_ver = LINUX_KERNEL_VERSION; - for (i = 0; i < in_dynarr_sz; i++) - out_dynarr[i] = in_dynarr[i]; - - out_mostly_var = read_mostly_var; - return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 6c85b00f27..5cb90ca292 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -371,4 +371,5 @@ int bpf_prog10(struct sk_msg_md *msg) return SK_DROP; } +int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c index 325c9f1934..a1cc58b10c 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -56,7 +56,7 @@ int prog_stream_verdict(struct __sk_buff *skb) return verdict; } -SEC("sk_skb") +SEC("sk_skb/skb_verdict") int prog_skb_verdict(struct __sk_buff *skb) { unsigned int *count; @@ -116,4 +116,5 @@ int prog_reuseport(struct sk_reuseport_md *reuse) return verdict; } +int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c index 3c69aa9717..2d31f66e4f 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c @@ -9,7 +9,7 @@ struct { __type(value, __u64); } sock_map SEC(".maps"); -SEC("sk_skb") +SEC("sk_skb/skb_verdict") int prog_skb_verdict(struct __sk_buff *skb) { return SK_DROP; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c index 6d64ea536e..9d0c9f28ca 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_update.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c @@ -24,7 +24,7 @@ struct { __type(value, __u64); } dst_sock_hash SEC(".maps"); -SEC("tc") +SEC("classifier/copy_sock_map") int copy_sock_map(void *ctx) { struct bpf_sock *sk; diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index 7e88309d32..0d31a3b350 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -45,7 +45,7 @@ struct { #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20) -SEC("tc") +SEC("spin_lock_demo") int bpf_sping_lock_test(struct __sk_buff *skb) { volatile int credit = 0, max_credit = 100, pkt_len = 64; diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index 36a707e7c7..0cf0134631 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -28,8 +28,8 @@ struct { __uint(type, BPF_MAP_TYPE_STACK_TRACE); __uint(max_entries, 128); __uint(map_flags, BPF_F_STACK_BUILD_ID); - __type(key, __u32); - __type(value, stack_trace_t); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(stack_trace_t)); } stackmap SEC(".maps"); struct { @@ -73,3 +73,4 @@ int oncpu(struct random_urandom_args *args) } char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index 728dbd39ef..00ed486726 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2018 Facebook -#include +#include #include #ifndef PERF_MAX_STACK_DEPTH @@ -27,8 +27,8 @@ typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; struct { __uint(type, BPF_MAP_TYPE_STACK_TRACE); __uint(max_entries, 16384); - __type(key, __u32); - __type(value, stack_trace_t); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(stack_trace_t)); } stackmap SEC(".maps"); struct { @@ -41,11 +41,11 @@ struct { /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; - char prev_comm[TASK_COMM_LEN]; + char prev_comm[16]; int prev_pid; int prev_prio; long long prev_state; - char next_comm[TASK_COMM_LEN]; + char next_comm[16]; int next_pid; int next_prio; }; diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c index d28ca8d1f3..18a3a7ed92 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_bpf.c +++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c @@ -5,7 +5,7 @@ /* Dummy prog to test TC-BPF API */ -SEC("tc") +SEC("classifier") int cls(struct __sk_buff *skb) { return 0; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index 3e32ea375a..0c93d326a6 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -70,7 +70,7 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, return v6_equal(ip6h->daddr, addr); } -SEC("tc") +SEC("classifier/chk_egress") int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); @@ -83,7 +83,7 @@ int tc_chk(struct __sk_buff *skb) return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; } -SEC("tc") +SEC("classifier/dst_ingress") int tc_dst(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; @@ -108,7 +108,7 @@ int tc_dst(struct __sk_buff *skb) return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0); } -SEC("tc") +SEC("classifier/src_ingress") int tc_src(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c index ec4cce1936..f7ab69cf01 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c @@ -75,7 +75,7 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb, return 0; } -SEC("tc") +SEC("classifier/chk_egress") int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); @@ -143,13 +143,13 @@ static __always_inline int tc_redir(struct __sk_buff *skb) /* these are identical, but keep them separate for compatibility with the * section names expected by test_tc_redirect.sh */ -SEC("tc") +SEC("classifier/dst_ingress") int tc_dst(struct __sk_buff *skb) { return tc_redir(skb); } -SEC("tc") +SEC("classifier/src_ingress") int tc_src(struct __sk_buff *skb) { return tc_redir(skb); diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c index 365eacb5dc..fe818cd5f0 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -16,31 +16,31 @@ volatile const __u32 IFINDEX_DST; static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}; static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}; -SEC("tc") +SEC("classifier/chk_egress") int tc_chk(struct __sk_buff *skb) { return TC_ACT_SHOT; } -SEC("tc") +SEC("classifier/dst_ingress") int tc_dst(struct __sk_buff *skb) { return bpf_redirect_peer(IFINDEX_SRC, 0); } -SEC("tc") +SEC("classifier/src_ingress") int tc_src(struct __sk_buff *skb) { return bpf_redirect_peer(IFINDEX_DST, 0); } -SEC("tc") +SEC("classifier/dst_ingress_l3") int tc_dst_l3(struct __sk_buff *skb) { return bpf_redirect(IFINDEX_SRC, 0); } -SEC("tc") +SEC("classifier/src_ingress_l3") int tc_src_l3(struct __sk_buff *skb) { __u16 proto = skb->protocol; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c index cd747cd93d..47cbe2eeae 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c @@ -148,7 +148,7 @@ static __always_inline void check_syncookie(void *ctx, void *data, bpf_sk_release(sk); } -SEC("tc") +SEC("clsact/check_syncookie") int check_syncookie_clsact(struct __sk_buff *skb) { check_syncookie(skb, (void *)(long)skb->data, @@ -156,7 +156,7 @@ int check_syncookie_clsact(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("xdp") +SEC("xdp/check_syncookie") int check_syncookie_xdp(struct xdp_md *ctx) { check_syncookie(ctx, (void *)(long)ctx->data, diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c index e2ae049c2f..adc83a54c3 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c @@ -244,7 +244,7 @@ static __always_inline void send_basic_event(struct sock *sk, bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY); } -SEC("tp/dummy/tracepoint") +SEC("dummy_tracepoint") int _dummy_tracepoint(struct dummy_tracepoint_args *arg) { if (!arg->sock) @@ -255,3 +255,4 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) } char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c index 5f4e87ee94..678bd0fad2 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c @@ -594,7 +594,7 @@ static int handle_parse_hdr(struct bpf_sock_ops *skops) return CG_OK; } -SEC("sockops") +SEC("sockops/estab") int estab(struct bpf_sock_ops *skops) { int true_val = 1; diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 3ded052807..94f50f7e94 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -16,6 +16,7 @@ #include "test_tcpbpf.h" struct tcpbpf_globals global = {}; +int _version SEC("version") = 1; /** * SOL_TCP is defined in while diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index 540181c115..ac63410bb5 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -24,10 +24,12 @@ struct { struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(max_entries, 2); - __type(key, int); - __type(value, __u32); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(__u32)); } perf_event_map SEC(".maps"); +int _version SEC("version") = 1; + SEC("sockops") int bpf_testcb(struct bpf_sock_ops *skops) { diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c index 43bd7a20cc..4b825ee122 100644 --- a/tools/testing/selftests/bpf/progs/test_tracepoint.c +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c @@ -1,17 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2017 Facebook -#include +#include #include /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; - char prev_comm[TASK_COMM_LEN]; + char prev_comm[16]; int prev_pid; int prev_prio; long long prev_state; - char next_comm[TASK_COMM_LEN]; + char next_comm[16]; int next_pid; int next_prio; }; @@ -23,3 +23,4 @@ int oncpu(struct sched_switch_args *ctx) } char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index ef0dde83b8..e7b6731174 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -26,6 +26,8 @@ bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ } while (0) +int _version SEC("version") = 1; + struct geneve_opt { __be16 opt_class; __u8 type; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c index f90ffcafd1..f024154c7b 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include "vmlinux.h" +#include #include #define ATTR __always_inline #include "test_jhash.h" -SEC("tc") +SEC("scale90_inline") int balancer_ingress(struct __sk_buff *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c index d7a9a74b72..31f9bce374 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp.c +++ b/tools/testing/selftests/bpf/progs/test_xdp.c @@ -20,6 +20,8 @@ #include #include "test_iptunnel_common.h" +int _version SEC("version") = 1; + struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 256); @@ -208,7 +210,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp) return XDP_TX; } -SEC("xdp") +SEC("xdp_tx_iptunnel") int _xdp_tx_iptunnel(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index 199c61b7d0..3d66599eee 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -2,7 +2,7 @@ #include #include -SEC("xdp") +SEC("xdp_adjust_tail_grow") int _xdp_adjust_tail_grow(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c index b7448253d1..22065a9cfb 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c @@ -9,7 +9,9 @@ #include #include -SEC("xdp") +int _version SEC("version") = 1; + +SEC("xdp_adjust_tail_shrink") int _xdp_adjust_tail_shrink(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index 58cf4345f5..a038e827f8 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -36,8 +36,8 @@ struct meta { struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __type(key, int); - __type(value, int); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); } perf_buf_map SEC(".maps"); __u64 test_result_fentry = 0; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c index 807bf895f4..b360ba2bd4 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c @@ -5,7 +5,7 @@ #include #include -SEC("xdp") +SEC("xdp_dm_log") int xdpdm_devlog(struct xdp_md *ctx) { char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c index 64ff32eaae..eb93ea95d1 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_link.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c @@ -5,14 +5,8 @@ char LICENSE[] SEC("license") = "GPL"; -SEC("xdp") +SEC("xdp/handler") int xdp_handler(struct xdp_md *xdp) { return 0; } - -SEC("tc") -int tc_handler(struct __sk_buff *skb) -{ - return 0; -} diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c index c98fb44156..fcabcda30b 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c @@ -16,6 +16,8 @@ #include #include "test_iptunnel_common.h" +int _version SEC("version") = 1; + struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 256); @@ -204,7 +206,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp) return XDP_TX; } -SEC("xdp") +SEC("xdp_tx_iptunnel") int _xdp_tx_iptunnel(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 596c4e71bf..3a67921f62 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -797,7 +797,7 @@ static int process_packet(void *data, __u64 off, void *data_end, return XDP_DROP; } -SEC("xdp") +SEC("xdp-test-v4") int balancer_ingress_v4(struct xdp_md *ctx) { void *data = (void *)(long)ctx->data; @@ -816,7 +816,7 @@ int balancer_ingress_v4(struct xdp_md *ctx) return XDP_DROP; } -SEC("xdp") +SEC("xdp-test-v6") int balancer_ingress_v6(struct xdp_md *ctx) { void *data = (void *)(long)ctx->data; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c index b778cad454..a5337cd940 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c @@ -12,6 +12,8 @@ #include #include +int _version SEC("version") = 1; + SEC("redirect_to_111") int xdp_redirect_to_111(struct xdp_md *xdp) { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c index 5320250577..59ee4f182f 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c @@ -12,13 +12,13 @@ struct { __uint(max_entries, 4); } cpu_map SEC(".maps"); -SEC("xdp") +SEC("xdp_redir") int xdp_redir_prog(struct xdp_md *ctx) { return bpf_redirect_map(&cpu_map, 1, 0); } -SEC("xdp") +SEC("xdp_dummy") int xdp_dummy_prog(struct xdp_md *ctx) { return XDP_PASS; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c index 1e6b9c38ea..0ac0864977 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c @@ -9,7 +9,7 @@ struct { __uint(max_entries, 4); } dm_ports SEC(".maps"); -SEC("xdp") +SEC("xdp_redir") int xdp_redir_prog(struct xdp_md *ctx) { return bpf_redirect_map(&dm_ports, 1, 0); @@ -18,7 +18,7 @@ int xdp_redir_prog(struct xdp_md *ctx) /* invalid program on DEVMAP entry; * SEC name means expected attach type not set */ -SEC("xdp") +SEC("xdp_dummy") int xdp_dummy_prog(struct xdp_md *ctx) { return XDP_PASS; diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 2098f3f27f..9a4d09590b 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -52,10 +52,3 @@ int bench_trigger_fmodret(void *ctx) __sync_add_and_fetch(&hits, 1); return -22; } - -SEC("uprobe/self/uprobe_target") -int bench_trigger_uprobe(void *ctx) -{ - __sync_add_and_fetch(&hits, 1); - return 0; -} diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c index d988b2e0ce..ea25e88819 100644 --- a/tools/testing/selftests/bpf/progs/xdp_dummy.c +++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c @@ -4,7 +4,7 @@ #include #include -SEC("xdp") +SEC("xdp_dummy") int xdp_dummy_prog(struct xdp_md *ctx) { return XDP_PASS; diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c index 8395782b6e..880debcbcd 100644 --- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c @@ -34,7 +34,7 @@ struct { __uint(max_entries, 128); } mac_map SEC(".maps"); -SEC("xdp") +SEC("xdp_redirect_map_multi") int xdp_redirect_map_multi_prog(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; @@ -63,7 +63,7 @@ int xdp_redirect_map_multi_prog(struct xdp_md *ctx) } /* The following 2 progs are for 2nd devmap prog testing */ -SEC("xdp") +SEC("xdp_redirect_map_ingress") int xdp_redirect_map_all_prog(struct xdp_md *ctx) { return bpf_redirect_map(&map_egress, 0, diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c index 4ad73847b8..6b9ca40bd1 100644 --- a/tools/testing/selftests/bpf/progs/xdping_kern.c +++ b/tools/testing/selftests/bpf/progs/xdping_kern.c @@ -86,7 +86,7 @@ static __always_inline int icmp_check(struct xdp_md *ctx, int type) return XDP_TX; } -SEC("xdp") +SEC("xdpclient") int xdping_client(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; @@ -150,7 +150,7 @@ int xdping_client(struct xdp_md *ctx) return XDP_TX; } -SEC("xdp") +SEC("xdpserver") int xdping_server(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/test_bpftool.py b/tools/testing/selftests/bpf/test_bpftool.py index 1c2408ee1f..4fed2dc25c 100644 --- a/tools/testing/selftests/bpf/test_bpftool.py +++ b/tools/testing/selftests/bpf/test_bpftool.py @@ -57,11 +57,6 @@ def default_iface(f): return f(*args, iface, **kwargs) return wrapper -DMESG_EMITTING_HELPERS = [ - "bpf_probe_write_user", - "bpf_trace_printk", - "bpf_trace_vprintk", - ] class TestBpftool(unittest.TestCase): @classmethod @@ -72,7 +67,10 @@ class TestBpftool(unittest.TestCase): @default_iface def test_feature_dev_json(self, iface): - unexpected_helpers = DMESG_EMITTING_HELPERS + unexpected_helpers = [ + "bpf_probe_write_user", + "bpf_trace_printk", + ] expected_keys = [ "syscall_config", "program_types", @@ -96,7 +94,10 @@ class TestBpftool(unittest.TestCase): bpftool_json(["feature", "probe"]), bpftool_json(["feature"]), ] - unexpected_helpers = DMESG_EMITTING_HELPERS + unexpected_helpers = [ + "bpf_probe_write_user", + "bpf_trace_printk", + ] expected_keys = [ "syscall_config", "system_config", @@ -120,7 +121,10 @@ class TestBpftool(unittest.TestCase): bpftool_json(["feature", "probe", "kernel", "full"]), bpftool_json(["feature", "probe", "full"]), ] - expected_helpers = DMESG_EMITTING_HELPERS + expected_helpers = [ + "bpf_probe_write_user", + "bpf_trace_printk", + ] for tc in test_cases: # Check if expected helpers are included at least once in any @@ -153,7 +157,7 @@ class TestBpftool(unittest.TestCase): not_full_set.add(helper) self.assertCountEqual(full_set - not_full_set, - set(DMESG_EMITTING_HELPERS)) + {"bpf_probe_write_user", "bpf_trace_printk"}) self.assertCountEqual(not_full_set - full_set, set()) def test_feature_macros(self): diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh index 1453a53ed5..b03a875715 100644 --- a/tools/testing/selftests/bpf/test_bpftool_build.sh +++ b/tools/testing/selftests/bpf/test_bpftool_build.sh @@ -90,10 +90,6 @@ echo -e "... through kbuild\n" if [ -f ".config" ] ; then make_and_clean tools/bpf - ## "make tools/bpf" sets $(OUTPUT) to ...tools/bpf/runqslower for - ## runqslower, but the default (used for the "clean" target) is .output. - ## Let's make sure we clean runqslower's directory properly. - make -C tools/bpf/runqslower OUTPUT=${KDIR_ROOT_DIR}/tools/bpf/runqslower/ clean ## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed ## down from toplevel Makefile to bpftool's Makefile. diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index 6bf21e4788..be54b7335a 100644 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -9,15 +9,7 @@ import os, sys LINUX_ROOT = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir)) -BPFTOOL_DIR = os.getenv('BPFTOOL_DIR', - os.path.join(LINUX_ROOT, 'tools/bpf/bpftool')) -BPFTOOL_BASHCOMP_DIR = os.getenv('BPFTOOL_BASHCOMP_DIR', - os.path.join(BPFTOOL_DIR, 'bash-completion')) -BPFTOOL_DOC_DIR = os.getenv('BPFTOOL_DOC_DIR', - os.path.join(BPFTOOL_DIR, 'Documentation')) -INCLUDE_DIR = os.getenv('INCLUDE_DIR', - os.path.join(LINUX_ROOT, 'tools/include')) - +BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool') retval = 0 class BlockParser(object): @@ -250,6 +242,12 @@ class FileExtractor(object): end_marker = re.compile('}\\\\n') return self.__get_description_list(start_marker, pattern, end_marker) + def default_options(self): + """ + Return the default options contained in HELP_SPEC_OPTIONS + """ + return { '-j', '--json', '-p', '--pretty', '-d', '--debug' } + def get_bashcomp_list(self, block_name): """ Search for and parse a list of type names from a variable in bash @@ -276,56 +274,7 @@ class SourceFileExtractor(FileExtractor): defined in children classes. """ def get_options(self): - return self.get_help_list_macro('HELP_SPEC_OPTIONS') - -class MainHeaderFileExtractor(SourceFileExtractor): - """ - An extractor for bpftool's main.h - """ - filename = os.path.join(BPFTOOL_DIR, 'main.h') - - def get_common_options(self): - """ - Parse the list of common options in main.h (options that apply to all - commands), which looks to the lists of options in other source files - but has different start and end markers: - - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" - - Return a set containing all options, such as: - - {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} - """ - start_marker = re.compile(f'"OPTIONS :=') - pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])') - end_marker = re.compile('#define') - - parser = InlineListParser(self.reader) - parser.search_block(start_marker) - return parser.parse(pattern, end_marker) - -class ManSubstitutionsExtractor(SourceFileExtractor): - """ - An extractor for substitutions.rst - """ - filename = os.path.join(BPFTOOL_DOC_DIR, 'substitutions.rst') - - def get_common_options(self): - """ - Parse the list of common options in substitutions.rst (options that - apply to all commands). - - Return a set containing all options, such as: - - {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} - """ - start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {') - pattern = re.compile('\*\*([\w/-]+)\*\*') - end_marker = re.compile('}$') - - parser = InlineListParser(self.reader) - parser.search_block(start_marker) - return parser.parse(pattern, end_marker) + return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS')) class ProgFileExtractor(SourceFileExtractor): """ @@ -401,7 +350,7 @@ class BpfHeaderExtractor(FileExtractor): """ An extractor for the UAPI BPF header. """ - filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h') + filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h') def get_prog_types(self): return self.get_enum('bpf_prog_type') @@ -425,7 +374,7 @@ class ManProgExtractor(ManPageExtractor): """ An extractor for bpftool-prog.rst. """ - filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-prog.rst') + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst') def get_attach_types(self): return self.get_rst_list('ATTACH_TYPE') @@ -434,7 +383,7 @@ class ManMapExtractor(ManPageExtractor): """ An extractor for bpftool-map.rst. """ - filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-map.rst') + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst') def get_map_types(self): return self.get_rst_list('TYPE') @@ -443,7 +392,7 @@ class ManCgroupExtractor(ManPageExtractor): """ An extractor for bpftool-cgroup.rst. """ - filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-cgroup.rst') + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst') def get_attach_types(self): return self.get_rst_list('ATTACH_TYPE') @@ -462,7 +411,7 @@ class BashcompExtractor(FileExtractor): """ An extractor for bpftool's bash completion file. """ - filename = os.path.join(BPFTOOL_BASHCOMP_DIR, 'bpftool') + filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool') def get_prog_attach_types(self): return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES') @@ -613,7 +562,7 @@ def main(): help_cmd_options = source_info.get_options() source_info.close() - man_cmd_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool-' + cmd + '.rst')) + man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst')) man_cmd_options = man_cmd_info.get_options() man_cmd_info.close() @@ -624,26 +573,13 @@ def main(): help_main_options = source_main_info.get_options() source_main_info.close() - man_main_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool.rst')) + man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst')) man_main_options = man_main_info.get_options() man_main_info.close() verify(help_main_options, man_main_options, f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):') - # Compare common options (options that apply to all commands) - - main_hdr_info = MainHeaderFileExtractor() - source_common_options = main_hdr_info.get_common_options() - main_hdr_info.close() - - man_substitutions = ManSubstitutionsExtractor() - man_common_options = man_substitutions.get_common_options() - man_substitutions.close() - - verify(source_common_options, man_common_options, - f'Comparing common options from {main_hdr_info.filename} (HELP_SPEC_OPTIONS) and {man_substitutions.filename}:') - sys.exit(retval) if __name__ == "__main__": diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index 128989bed8..e2394eea4b 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -69,10 +69,4 @@ #define BTF_TYPE_FLOAT_ENC(name, sz) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) -#define BTF_DECL_TAG_ENC(value, type, component_idx) \ - BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) - -#define BTF_TYPE_TAG_ENC(value, type) \ - BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type) - #endif /* _TEST_BTF_H */ diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index 5b8314cd77..0cda61da5d 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -8,7 +8,6 @@ #include "bpf_rlimit.h" #include "cgroup_helpers.h" -#include "testing_helpers.h" char bpf_log_buf[BPF_LOG_BUF_SIZE]; @@ -51,15 +50,15 @@ int main(int argc, char **argv) goto err; } - map_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, sizeof(key), - sizeof(value), 0, NULL); + map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key), + sizeof(value), 0, 0); if (map_fd < 0) { printf("Failed to create map: %s\n", strerror(errno)); goto out; } - percpu_map_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, NULL, - sizeof(key), sizeof(value), 0, NULL); + percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + sizeof(key), sizeof(value), 0, 0); if (percpu_map_fd < 0) { printf("Failed to create map: %s\n", strerror(errno)); goto out; @@ -67,7 +66,7 @@ int main(int argc, char **argv) prog[0].imm = percpu_map_fd; prog[7].imm = map_fd; - prog_fd = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); if (prog_fd < 0) { diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp index e00201de28..a8d2e9a87f 100644 --- a/tools/testing/selftests/bpf/test_cpp.cpp +++ b/tools/testing/selftests/bpf/test_cpp.cpp @@ -7,15 +7,9 @@ /* do nothing, just make sure we can link successfully */ -static void dump_printf(void *ctx, const char *fmt, va_list args) -{ -} - int main(int argc, char *argv[]) { - struct btf_dump_opts opts = { }; struct test_core_extern *skel; - struct btf *btf; /* libbpf.h */ libbpf_set_print(NULL); @@ -24,8 +18,7 @@ int main(int argc, char *argv[]) bpf_prog_get_fd_by_id(0); /* btf.h */ - btf = btf__new(NULL, 0); - btf_dump__new(btf, dump_printf, nullptr, &opts); + btf__new(NULL, 0); /* BPF skeleton */ skel = test_core_extern__open_and_load(); diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index c299d34526..804dddd97d 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -14,7 +14,6 @@ #include #include "cgroup_helpers.h" -#include "testing_helpers.h" #include "bpf_rlimit.h" #define DEV_CGROUP_PROG "./dev_cgroup.o" @@ -28,7 +27,7 @@ int main(int argc, char **argv) int prog_fd, cgroup_fd; __u32 prog_cnt; - if (bpf_prog_test_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, + if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, &obj, &prog_fd)) { printf("Failed to load DEV_CGROUP program\n"); goto out; diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index dbd9122172..174b72a64a 100644 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -26,22 +26,22 @@ if [[ -z $(ip netns identify $$) ]]; then type flow_dissector if ! unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/_dissect flow_dissector; then + /sys/fs/bpf/flow/flow_dissector flow_dissector; then echo "Unexpected unsuccessful attach in namespace" >&2 err=1 fi - $bpftool prog attach pinned /sys/fs/bpf/flow/_dissect \ + $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \ flow_dissector if unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/_dissect flow_dissector; then + /sys/fs/bpf/flow/flow_dissector flow_dissector; then echo "Unexpected successful attach in namespace" >&2 err=1 fi if ! $bpftool prog detach pinned \ - /sys/fs/bpf/flow/_dissect flow_dissector; then + /sys/fs/bpf/flow/flow_dissector flow_dissector; then echo "Failed to detach flow dissector" >&2 err=1 fi @@ -95,7 +95,7 @@ else fi # Attach BPF program -./flow_dissector_load -p bpf_flow.o -s _dissect +./flow_dissector_load -p bpf_flow.o -s flow_dissector # Setup tc qdisc add dev lo ingress diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index 2893e9f2f1..fb5fd6841e 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -28,6 +28,7 @@ // 5. We can read keycode from same /dev/lirc device #include +#include #include #include #include @@ -44,8 +45,6 @@ #include #include -#include "testing_helpers.h" - int main(int argc, char **argv) { struct bpf_object *obj; @@ -59,8 +58,8 @@ int main(int argc, char **argv) return 2; } - ret = bpf_prog_test_load("test_lirc_mode2_kern.o", - BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd); + ret = bpf_prog_load("test_lirc_mode2_kern.o", + BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd); if (ret) { printf("Failed to load bpf program\n"); return 1; diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index baa3e3ecae..006be39639 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -208,7 +208,6 @@ static void test_lpm_order(void) static void test_lpm_map(int keysize) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups; struct tlpm_node *t, *list = NULL; struct bpf_lpm_trie_key *key; @@ -234,11 +233,11 @@ static void test_lpm_map(int keysize) key = alloca(sizeof(*key) + keysize); memset(key, 0, sizeof(*key) + keysize); - map = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, sizeof(*key) + keysize, keysize + 1, 4096, - &opts); + BPF_F_NO_PREALLOC); assert(map >= 0); for (i = 0; i < n_nodes; ++i) { @@ -330,7 +329,6 @@ static void test_lpm_map(int keysize) static void test_lpm_ipaddr(void) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); struct bpf_lpm_trie_key *key_ipv4; struct bpf_lpm_trie_key *key_ipv6; size_t key_size_ipv4; @@ -344,14 +342,14 @@ static void test_lpm_ipaddr(void) key_ipv4 = alloca(key_size_ipv4); key_ipv6 = alloca(key_size_ipv6); - map_fd_ipv4 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size_ipv4, sizeof(value), - 100, &opts); + 100, BPF_F_NO_PREALLOC); assert(map_fd_ipv4 >= 0); - map_fd_ipv6 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size_ipv6, sizeof(value), - 100, &opts); + 100, BPF_F_NO_PREALLOC); assert(map_fd_ipv6 >= 0); /* Fill data some IPv4 and IPv6 address ranges */ @@ -425,7 +423,6 @@ static void test_lpm_ipaddr(void) static void test_lpm_delete(void) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); struct bpf_lpm_trie_key *key; size_t key_size; int map_fd; @@ -434,9 +431,9 @@ static void test_lpm_delete(void) key_size = sizeof(*key) + sizeof(__u32); key = alloca(key_size); - map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value), - 100, &opts); + 100, BPF_F_NO_PREALLOC); assert(map_fd >= 0); /* Add nodes: @@ -538,7 +535,6 @@ static void test_lpm_delete(void) static void test_lpm_get_next_key(void) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); struct bpf_lpm_trie_key *key_p, *next_key_p; size_t key_size; __u32 value = 0; @@ -548,7 +544,8 @@ static void test_lpm_get_next_key(void) key_p = alloca(key_size); next_key_p = alloca(key_size); - map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, sizeof(value), 100, &opts); + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value), + 100, BPF_F_NO_PREALLOC); assert(map_fd >= 0); /* empty tree. get_next_key should return ENOENT */ @@ -756,7 +753,6 @@ static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd) static void test_lpm_multi_thread(void) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); struct lpm_mt_test_info info[4]; size_t key_size, value_size; pthread_t thread_id[4]; @@ -766,7 +762,8 @@ static void test_lpm_multi_thread(void) /* create a trie */ value_size = sizeof(__u32); key_size = sizeof(struct bpf_lpm_trie_key) + value_size; - map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts); + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size, + 100, BPF_F_NO_PREALLOC); /* create 4 threads to test update, delete, lookup and get_next_key */ setup_lpm_mt_test_info(&info[0], map_fd); diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index b9f1bbbc8a..7e9049fa3e 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -28,14 +28,13 @@ static int nr_cpus; static int create_map(int map_type, int map_flags, unsigned int size) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); int map_fd; - map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long), - sizeof(unsigned long long), size, &opts); + map_fd = bpf_create_map(map_type, sizeof(unsigned long long), + sizeof(unsigned long long), size, map_flags); if (map_fd == -1) - perror("bpf_map_create"); + perror("bpf_create_map"); return map_fd; } @@ -43,6 +42,8 @@ static int create_map(int map_type, int map_flags, unsigned int size) static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key, void *value) { + struct bpf_load_program_attr prog; + struct bpf_create_map_attr map; struct bpf_insn insns[] = { BPF_LD_MAP_VALUE(BPF_REG_9, 0, 0), BPF_LD_MAP_FD(BPF_REG_1, fd), @@ -63,13 +64,25 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key, int mfd, pfd, ret, zero = 0; __u32 retval = 0; - mfd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(__u64), 1, NULL); + memset(&map, 0, sizeof(map)); + map.map_type = BPF_MAP_TYPE_ARRAY; + map.key_size = sizeof(int); + map.value_size = sizeof(unsigned long long); + map.max_entries = 1; + + mfd = bpf_create_map_xattr(&map); if (mfd < 0) return -1; insns[0].imm = mfd; - pfd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL", insns, ARRAY_SIZE(insns), NULL); + memset(&prog, 0, sizeof(prog)); + prog.prog_type = BPF_PROG_TYPE_SCHED_CLS; + prog.insns = insns; + prog.insns_cnt = ARRAY_SIZE(insns); + prog.license = "GPL"; + + pfd = bpf_load_program_xattr(&prog, NULL, 0); if (pfd < 0) { close(mfd); return -1; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 50f7e74ca0..c7a36a9378 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -23,8 +23,8 @@ #include #include "bpf_util.h" +#include "bpf_rlimit.h" #include "test_maps.h" -#include "testing_helpers.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -32,14 +32,15 @@ static int skips; -static struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts) }; +static int map_flags; static void test_hashmap(unsigned int task, void *data) { long long key, next_key, first_key, value; int fd; - fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), 2, &map_opts); + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + 2, map_flags); if (fd < 0) { printf("Failed to create hashmap '%s'!\n", strerror(errno)); exit(1); @@ -136,7 +137,8 @@ static void test_hashmap_sizes(unsigned int task, void *data) for (i = 1; i <= 512; i <<= 1) for (j = 1; j <= 1 << 18; j <<= 1) { - fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, i, j, 2, &map_opts); + fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j, + 2, map_flags); if (fd < 0) { if (errno == ENOMEM) return; @@ -157,8 +159,8 @@ static void test_hashmap_percpu(unsigned int task, void *data) int expected_key_mask = 0; int fd, i; - fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_HASH, NULL, sizeof(key), - sizeof(bpf_percpu(value, 0)), 2, &map_opts); + fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key), + sizeof(bpf_percpu(value, 0)), 2, map_flags); if (fd < 0) { printf("Failed to create hashmap '%s'!\n", strerror(errno)); exit(1); @@ -269,11 +271,11 @@ static int helper_fill_hashmap(int max_entries) int i, fd, ret; long long key, value; - fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), - max_entries, &map_opts); + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + max_entries, map_flags); CHECK(fd < 0, "failed to create hashmap", - "err: %s, flags: 0x%x\n", strerror(errno), map_opts.map_flags); + "err: %s, flags: 0x%x\n", strerror(errno), map_flags); for (i = 0; i < max_entries; i++) { key = i; value = key; @@ -329,8 +331,8 @@ static void test_hashmap_zero_seed(void) int i, first, second, old_flags; long long key, next_first, next_second; - old_flags = map_opts.map_flags; - map_opts.map_flags |= BPF_F_ZERO_SEED; + old_flags = map_flags; + map_flags |= BPF_F_ZERO_SEED; first = helper_fill_hashmap(3); second = helper_fill_hashmap(3); @@ -352,7 +354,7 @@ static void test_hashmap_zero_seed(void) key = next_first; } - map_opts.map_flags = old_flags; + map_flags = old_flags; close(first); close(second); } @@ -362,7 +364,8 @@ static void test_arraymap(unsigned int task, void *data) int key, next_key, fd; long long value; - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(key), sizeof(value), 2, NULL); + fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), + 2, 0); if (fd < 0) { printf("Failed to create arraymap '%s'!\n", strerror(errno)); exit(1); @@ -417,8 +420,8 @@ static void test_arraymap_percpu(unsigned int task, void *data) BPF_DECLARE_PERCPU(long, values); int key, next_key, fd, i; - fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key), - sizeof(bpf_percpu(values, 0)), 2, NULL); + fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), + sizeof(bpf_percpu(values, 0)), 2, 0); if (fd < 0) { printf("Failed to create arraymap '%s'!\n", strerror(errno)); exit(1); @@ -480,8 +483,8 @@ static void test_arraymap_percpu_many_keys(void) unsigned int nr_keys = 2000; int key, fd, i; - fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key), - sizeof(bpf_percpu(values, 0)), nr_keys, NULL); + fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), + sizeof(bpf_percpu(values, 0)), nr_keys, 0); if (fd < 0) { printf("Failed to create per-cpu arraymap '%s'!\n", strerror(errno)); @@ -512,7 +515,8 @@ static void test_devmap(unsigned int task, void *data) int fd; __u32 key, value; - fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP, NULL, sizeof(key), sizeof(value), 2, NULL); + fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value), + 2, 0); if (fd < 0) { printf("Failed to create devmap '%s'!\n", strerror(errno)); exit(1); @@ -526,7 +530,8 @@ static void test_devmap_hash(unsigned int task, void *data) int fd; __u32 key, value; - fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP_HASH, NULL, sizeof(key), sizeof(value), 2, NULL); + fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key), sizeof(value), + 2, 0); if (fd < 0) { printf("Failed to create devmap_hash '%s'!\n", strerror(errno)); exit(1); @@ -546,12 +551,14 @@ static void test_queuemap(unsigned int task, void *data) vals[i] = rand(); /* Invalid key size */ - fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 4, sizeof(val), MAP_SIZE, &map_opts); + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE, + map_flags); assert(fd < 0 && errno == EINVAL); - fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 0, sizeof(val), MAP_SIZE, &map_opts); + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE, + map_flags); /* Queue map does not support BPF_F_NO_PREALLOC */ - if (map_opts.map_flags & BPF_F_NO_PREALLOC) { + if (map_flags & BPF_F_NO_PREALLOC) { assert(fd < 0 && errno == EINVAL); return; } @@ -602,12 +609,14 @@ static void test_stackmap(unsigned int task, void *data) vals[i] = rand(); /* Invalid key size */ - fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 4, sizeof(val), MAP_SIZE, &map_opts); + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE, + map_flags); assert(fd < 0 && errno == EINVAL); - fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 0, sizeof(val), MAP_SIZE, &map_opts); + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE, + map_flags); /* Stack map does not support BPF_F_NO_PREALLOC */ - if (map_opts.map_flags & BPF_F_NO_PREALLOC) { + if (map_flags & BPF_F_NO_PREALLOC) { assert(fd < 0 && errno == EINVAL); return; } @@ -734,9 +743,9 @@ static void test_sockmap(unsigned int tasks, void *data) } /* Test sockmap with connected sockets */ - fd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, + fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key), sizeof(value), - 6, NULL); + 6, 0); if (fd < 0) { if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) { printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n", @@ -821,21 +830,21 @@ static void test_sockmap(unsigned int tasks, void *data) } /* Load SK_SKB program and Attach */ - err = bpf_prog_test_load(SOCKMAP_PARSE_PROG, + err = bpf_prog_load(SOCKMAP_PARSE_PROG, BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog); if (err) { printf("Failed to load SK_SKB parse prog\n"); goto out_sockmap; } - err = bpf_prog_test_load(SOCKMAP_TCP_MSG_PROG, + err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG, BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog); if (err) { printf("Failed to load SK_SKB msg prog\n"); goto out_sockmap; } - err = bpf_prog_test_load(SOCKMAP_VERDICT_PROG, + err = bpf_prog_load(SOCKMAP_VERDICT_PROG, BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog); if (err) { printf("Failed to load SK_SKB verdict prog\n"); @@ -1158,7 +1167,8 @@ static void test_map_in_map(void) obj = bpf_object__open(MAPINMAP_PROG); - fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), sizeof(int), 2, NULL); + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int), + 2, 0); if (fd < 0) { printf("Failed to create hashmap '%s'!\n", strerror(errno)); exit(1); @@ -1304,8 +1314,8 @@ static void test_map_large(void) } key; int fd, i, value; - fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), - MAP_SIZE, &map_opts); + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE, map_flags); if (fd < 0) { printf("Failed to create large map '%s'!\n", strerror(errno)); exit(1); @@ -1458,8 +1468,8 @@ static void test_map_parallel(void) int i, fd, key = 0, value = 0; int data[2]; - fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), - MAP_SIZE, &map_opts); + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE, map_flags); if (fd < 0) { printf("Failed to create map for parallel test '%s'!\n", strerror(errno)); @@ -1507,13 +1517,9 @@ static void test_map_parallel(void) static void test_map_rdonly(void) { int fd, key = 0, value = 0; - __u32 old_flags; - old_flags = map_opts.map_flags; - map_opts.map_flags |= BPF_F_RDONLY; - fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), - MAP_SIZE, &map_opts); - map_opts.map_flags = old_flags; + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE, map_flags | BPF_F_RDONLY); if (fd < 0) { printf("Failed to create map for read only test '%s'!\n", strerror(errno)); @@ -1536,13 +1542,9 @@ static void test_map_rdonly(void) static void test_map_wronly_hash(void) { int fd, key = 0, value = 0; - __u32 old_flags; - old_flags = map_opts.map_flags; - map_opts.map_flags |= BPF_F_WRONLY; - fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), - MAP_SIZE, &map_opts); - map_opts.map_flags = old_flags; + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE, map_flags | BPF_F_WRONLY); if (fd < 0) { printf("Failed to create map for write only test '%s'!\n", strerror(errno)); @@ -1564,17 +1566,13 @@ static void test_map_wronly_hash(void) static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type) { int fd, value = 0; - __u32 old_flags; - assert(map_type == BPF_MAP_TYPE_QUEUE || map_type == BPF_MAP_TYPE_STACK); - old_flags = map_opts.map_flags; - map_opts.map_flags |= BPF_F_WRONLY; - fd = bpf_map_create(map_type, NULL, 0, sizeof(value), MAP_SIZE, &map_opts); - map_opts.map_flags = old_flags; + fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE, + map_flags | BPF_F_WRONLY); /* Stack/Queue maps do not support BPF_F_NO_PREALLOC */ - if (map_opts.map_flags & BPF_F_NO_PREALLOC) { + if (map_flags & BPF_F_NO_PREALLOC) { assert(fd < 0 && errno == EINVAL); return; } @@ -1701,8 +1699,8 @@ static void test_reuseport_array(void) __u32 fds_idx = 0; int fd; - map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL, - sizeof(__u32), sizeof(__u64), array_size, NULL); + map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + sizeof(__u32), sizeof(__u64), array_size, 0); CHECK(map_fd < 0, "reuseport array create", "map_fd:%d, errno:%d\n", map_fd, errno); @@ -1838,8 +1836,8 @@ static void test_reuseport_array(void) close(map_fd); /* Test 32 bit fd */ - map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL, - sizeof(__u32), sizeof(__u32), array_size, NULL); + map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + sizeof(__u32), sizeof(__u32), array_size, 0); CHECK(map_fd < 0, "reuseport array create", "map_fd:%d, errno:%d\n", map_fd, errno); prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64, @@ -1897,10 +1895,10 @@ int main(void) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - map_opts.map_flags = 0; + map_flags = 0; run_all_tests(); - map_opts.map_flags = BPF_F_NO_PREALLOC; + map_flags = BPF_F_NO_PREALLOC; run_all_tests(); #define DEFINE_TEST(name) test_##name(); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 2ecb73a652..e3fea6f281 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -4,6 +4,7 @@ #define _GNU_SOURCE #include "test_progs.h" #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #include #include #include @@ -11,11 +12,6 @@ #include #include /* backtrace */ #include -#include /* get_nprocs */ -#include -#include -#include -#include /* Adapted from perf/util/string.c */ static bool glob_match(const char *str, const char *pat) @@ -49,12 +45,9 @@ struct prog_test_def { const char *test_name; int test_num; void (*run_test)(void); - void (*run_serial_test)(void); bool force_log; int error_cnt; int skip_cnt; - int sub_succ_cnt; - bool should_run; bool tested; bool need_cgroup_cleanup; @@ -104,10 +97,6 @@ static void dump_test_log(const struct prog_test_def *test, bool failed) if (stdout == env.stdout) return; - /* worker always holds log */ - if (env.worker_id != -1) - return; - fflush(stdout); /* exports env.log_buf & env.log_cnt */ if (env.verbosity > VERBOSE_NONE || test->force_log || failed) { @@ -118,6 +107,8 @@ static void dump_test_log(const struct prog_test_def *test, bool failed) fprintf(env.stdout, "\n"); } } + + fseeko(stdout, 0, SEEK_SET); /* rewind */ } static void skip_account(void) @@ -133,8 +124,7 @@ static void stdio_restore(void); /* A bunch of tests set custom affinity per-thread and/or per-process. Reset * it after each test/sub-test. */ -static void reset_affinity(void) -{ +static void reset_affinity() { cpu_set_t cpuset; int i, err; @@ -175,21 +165,21 @@ static void restore_netns(void) } } -void test__end_subtest(void) +void test__end_subtest() { struct prog_test_def *test = env.test; int sub_error_cnt = test->error_cnt - test->old_error_cnt; dump_test_log(test, sub_error_cnt); - fprintf(stdout, "#%d/%d %s/%s:%s\n", + fprintf(env.stdout, "#%d/%d %s/%s:%s\n", test->test_num, test->subtest_num, test->test_name, test->subtest_name, sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); if (sub_error_cnt) - test->error_cnt++; + env.fail_cnt++; else if (test->skip_cnt == 0) - test->sub_succ_cnt++; + env.sub_succ_cnt++; skip_account(); free(test->subtest_name); @@ -227,8 +217,7 @@ bool test__start_subtest(const char *name) return true; } -void test__force_log(void) -{ +void test__force_log() { env.test->force_log = true; } @@ -457,26 +446,23 @@ static int load_bpf_testmod(void) } /* extern declarations for test funcs */ -#define DEFINE_TEST(name) \ - extern void test_##name(void) __weak; \ - extern void serial_test_##name(void) __weak; +#define DEFINE_TEST(name) extern void test_##name(void); #include #undef DEFINE_TEST static struct prog_test_def prog_test_defs[] = { -#define DEFINE_TEST(name) { \ - .test_name = #name, \ - .run_test = &test_##name, \ - .run_serial_test = &serial_test_##name, \ +#define DEFINE_TEST(name) { \ + .test_name = #name, \ + .run_test = &test_##name, \ }, #include #undef DEFINE_TEST }; -static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs); +const int prog_test_cnt = ARRAY_SIZE(prog_test_defs); const char *argp_program_version = "test_progs 0.1"; const char *argp_program_bug_address = ""; -static const char argp_program_doc[] = "BPF selftests test runner"; +const char argp_program_doc[] = "BPF selftests test runner"; enum ARG_KEYS { ARG_TEST_NUM = 'n', @@ -488,8 +474,6 @@ enum ARG_KEYS { ARG_LIST_TEST_NAMES = 'l', ARG_TEST_NAME_GLOB_ALLOWLIST = 'a', ARG_TEST_NAME_GLOB_DENYLIST = 'd', - ARG_NUM_WORKERS = 'j', - ARG_DEBUG = -1, }; static const struct argp_option opts[] = { @@ -511,10 +495,6 @@ static const struct argp_option opts[] = { "Run tests with name matching the pattern (supports '*' wildcard)." }, { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0, "Don't run tests with name matching the pattern (supports '*' wildcard)." }, - { "workers", ARG_NUM_WORKERS, "WORKERS", OPTION_ARG_OPTIONAL, - "Number of workers to run in parallel, default to number of cpus." }, - { "debug", ARG_DEBUG, NULL, 0, - "print extra debug information for test_progs." }, {}, }; @@ -670,7 +650,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) fprintf(stderr, "Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)", errno); - return -EINVAL; + return -1; } } @@ -681,20 +661,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case ARG_LIST_TEST_NAMES: env->list_test_names = true; break; - case ARG_NUM_WORKERS: - if (arg) { - env->workers = atoi(arg); - if (!env->workers) { - fprintf(stderr, "Invalid number of worker: %s.", arg); - return -EINVAL; - } - } else { - env->workers = get_nprocs(); - } - break; - case ARG_DEBUG: - env->debug = true; - break; case ARGP_KEY_ARG: argp_usage(state); break; @@ -712,7 +678,7 @@ static void stdio_hijack(void) env.stdout = stdout; env.stderr = stderr; - if (env.verbosity > VERBOSE_NONE && env.worker_id == -1) { + if (env.verbosity > VERBOSE_NONE) { /* nothing to do, output to stdout by default */ return; } @@ -738,6 +704,10 @@ static void stdio_restore(void) return; fclose(stdout); + free(env.log_buf); + + env.log_buf = NULL; + env.log_cnt = 0; stdout = env.stdout; stderr = env.stderr; @@ -773,45 +743,6 @@ int cd_flavor_subdir(const char *exec_name) return chdir(flavor); } -int trigger_module_test_read(int read_sz) -{ - int fd, err; - - fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY); - err = -errno; - if (!ASSERT_GE(fd, 0, "testmod_file_open")) - return err; - - read(fd, NULL, read_sz); - close(fd); - - return 0; -} - -int trigger_module_test_write(int write_sz) -{ - int fd, err; - char *buf = malloc(write_sz); - - if (!buf) - return -ENOMEM; - - memset(buf, 'a', write_sz); - buf[write_sz-1] = '\0'; - - fd = open(BPF_TESTMOD_TEST_FILE, O_WRONLY); - err = -errno; - if (!ASSERT_GE(fd, 0, "testmod_file_open")) { - free(buf); - return err; - } - - write(fd, buf, write_sz); - close(fd); - free(buf); - return 0; -} - #define MAX_BACKTRACE_SZ 128 void crash_handler(int signum) { @@ -824,498 +755,11 @@ void crash_handler(int signum) dump_test_log(env.test, true); if (env.stdout) stdio_restore(); - if (env.worker_id != -1) - fprintf(stderr, "[%d]: ", env.worker_id); + fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum); backtrace_symbols_fd(bt, sz, STDERR_FILENO); } -static void sigint_handler(int signum) -{ - int i; - - for (i = 0; i < env.workers; i++) - if (env.worker_socks[i] > 0) - close(env.worker_socks[i]); -} - -static int current_test_idx; -static pthread_mutex_t current_test_lock; -static pthread_mutex_t stdout_output_lock; - -struct test_result { - int error_cnt; - int skip_cnt; - int sub_succ_cnt; - - size_t log_cnt; - char *log_buf; -}; - -static struct test_result test_results[ARRAY_SIZE(prog_test_defs)]; - -static inline const char *str_msg(const struct msg *msg, char *buf) -{ - switch (msg->type) { - case MSG_DO_TEST: - sprintf(buf, "MSG_DO_TEST %d", msg->do_test.test_num); - break; - case MSG_TEST_DONE: - sprintf(buf, "MSG_TEST_DONE %d (log: %d)", - msg->test_done.test_num, - msg->test_done.have_log); - break; - case MSG_TEST_LOG: - sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", - strlen(msg->test_log.log_buf), - msg->test_log.is_last); - break; - case MSG_EXIT: - sprintf(buf, "MSG_EXIT"); - break; - default: - sprintf(buf, "UNKNOWN"); - break; - } - - return buf; -} - -static int send_message(int sock, const struct msg *msg) -{ - char buf[256]; - - if (env.debug) - fprintf(stderr, "Sending msg: %s\n", str_msg(msg, buf)); - return send(sock, msg, sizeof(*msg), 0); -} - -static int recv_message(int sock, struct msg *msg) -{ - int ret; - char buf[256]; - - memset(msg, 0, sizeof(*msg)); - ret = recv(sock, msg, sizeof(*msg), 0); - if (ret >= 0) { - if (env.debug) - fprintf(stderr, "Received msg: %s\n", str_msg(msg, buf)); - } - return ret; -} - -static void run_one_test(int test_num) -{ - struct prog_test_def *test = &prog_test_defs[test_num]; - - env.test = test; - - if (test->run_test) - test->run_test(); - else if (test->run_serial_test) - test->run_serial_test(); - - /* ensure last sub-test is finalized properly */ - if (test->subtest_name) - test__end_subtest(); - - test->tested = true; - - dump_test_log(test, test->error_cnt); - - reset_affinity(); - restore_netns(); - if (test->need_cgroup_cleanup) - cleanup_cgroup_environment(); -} - -struct dispatch_data { - int worker_id; - int sock_fd; -}; - -static void *dispatch_thread(void *ctx) -{ - struct dispatch_data *data = ctx; - int sock_fd; - FILE *log_fp = NULL; - - sock_fd = data->sock_fd; - - while (true) { - int test_to_run = -1; - struct prog_test_def *test; - struct test_result *result; - - /* grab a test */ - { - pthread_mutex_lock(¤t_test_lock); - - if (current_test_idx >= prog_test_cnt) { - pthread_mutex_unlock(¤t_test_lock); - goto done; - } - - test = &prog_test_defs[current_test_idx]; - test_to_run = current_test_idx; - current_test_idx++; - - pthread_mutex_unlock(¤t_test_lock); - } - - if (!test->should_run || test->run_serial_test) - continue; - - /* run test through worker */ - { - struct msg msg_do_test; - - msg_do_test.type = MSG_DO_TEST; - msg_do_test.do_test.test_num = test_to_run; - if (send_message(sock_fd, &msg_do_test) < 0) { - perror("Fail to send command"); - goto done; - } - env.worker_current_test[data->worker_id] = test_to_run; - } - - /* wait for test done */ - { - int err; - struct msg msg_test_done; - - err = recv_message(sock_fd, &msg_test_done); - if (err < 0) - goto error; - if (msg_test_done.type != MSG_TEST_DONE) - goto error; - if (test_to_run != msg_test_done.test_done.test_num) - goto error; - - test->tested = true; - result = &test_results[test_to_run]; - - result->error_cnt = msg_test_done.test_done.error_cnt; - result->skip_cnt = msg_test_done.test_done.skip_cnt; - result->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; - - /* collect all logs */ - if (msg_test_done.test_done.have_log) { - log_fp = open_memstream(&result->log_buf, &result->log_cnt); - if (!log_fp) - goto error; - - while (true) { - struct msg msg_log; - - if (recv_message(sock_fd, &msg_log) < 0) - goto error; - if (msg_log.type != MSG_TEST_LOG) - goto error; - - fprintf(log_fp, "%s", msg_log.test_log.log_buf); - if (msg_log.test_log.is_last) - break; - } - fclose(log_fp); - log_fp = NULL; - } - /* output log */ - { - pthread_mutex_lock(&stdout_output_lock); - - if (result->log_cnt) { - result->log_buf[result->log_cnt] = '\0'; - fprintf(stdout, "%s", result->log_buf); - if (result->log_buf[result->log_cnt - 1] != '\n') - fprintf(stdout, "\n"); - } - - fprintf(stdout, "#%d %s:%s\n", - test->test_num, test->test_name, - result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); - - pthread_mutex_unlock(&stdout_output_lock); - } - - } /* wait for test done */ - } /* while (true) */ -error: - if (env.debug) - fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno)); - - if (log_fp) - fclose(log_fp); -done: - { - struct msg msg_exit; - - msg_exit.type = MSG_EXIT; - if (send_message(sock_fd, &msg_exit) < 0) { - if (env.debug) - fprintf(stderr, "[%d]: send_message msg_exit: %s.\n", - data->worker_id, strerror(errno)); - } - } - return NULL; -} - -static void print_all_error_logs(void) -{ - int i; - - if (env.fail_cnt) - fprintf(stdout, "\nAll error logs:\n"); - - /* print error logs again */ - for (i = 0; i < prog_test_cnt; i++) { - struct prog_test_def *test; - struct test_result *result; - - test = &prog_test_defs[i]; - result = &test_results[i]; - - if (!test->tested || !result->error_cnt) - continue; - - fprintf(stdout, "\n#%d %s:%s\n", - test->test_num, test->test_name, - result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); - - if (result->log_cnt) { - result->log_buf[result->log_cnt] = '\0'; - fprintf(stdout, "%s", result->log_buf); - if (result->log_buf[result->log_cnt - 1] != '\n') - fprintf(stdout, "\n"); - } - } -} - -static int server_main(void) -{ - pthread_t *dispatcher_threads; - struct dispatch_data *data; - struct sigaction sigact_int = { - .sa_handler = sigint_handler, - .sa_flags = SA_RESETHAND, - }; - int i; - - sigaction(SIGINT, &sigact_int, NULL); - - dispatcher_threads = calloc(sizeof(pthread_t), env.workers); - data = calloc(sizeof(struct dispatch_data), env.workers); - - env.worker_current_test = calloc(sizeof(int), env.workers); - for (i = 0; i < env.workers; i++) { - int rc; - - data[i].worker_id = i; - data[i].sock_fd = env.worker_socks[i]; - rc = pthread_create(&dispatcher_threads[i], NULL, dispatch_thread, &data[i]); - if (rc < 0) { - perror("Failed to launch dispatcher thread"); - exit(EXIT_ERR_SETUP_INFRA); - } - } - - /* wait for all dispatcher to finish */ - for (i = 0; i < env.workers; i++) { - while (true) { - int ret = pthread_tryjoin_np(dispatcher_threads[i], NULL); - - if (!ret) { - break; - } else if (ret == EBUSY) { - if (env.debug) - fprintf(stderr, "Still waiting for thread %d (test %d).\n", - i, env.worker_current_test[i] + 1); - usleep(1000 * 1000); - continue; - } else { - fprintf(stderr, "Unexpected error joining dispatcher thread: %d", ret); - break; - } - } - } - free(dispatcher_threads); - free(env.worker_current_test); - free(data); - - /* run serial tests */ - save_netns(); - - for (int i = 0; i < prog_test_cnt; i++) { - struct prog_test_def *test = &prog_test_defs[i]; - struct test_result *result = &test_results[i]; - - if (!test->should_run || !test->run_serial_test) - continue; - - stdio_hijack(); - - run_one_test(i); - - stdio_restore(); - if (env.log_buf) { - result->log_cnt = env.log_cnt; - result->log_buf = strdup(env.log_buf); - - free(env.log_buf); - env.log_buf = NULL; - env.log_cnt = 0; - } - restore_netns(); - - fprintf(stdout, "#%d %s:%s\n", - test->test_num, test->test_name, - test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); - - result->error_cnt = test->error_cnt; - result->skip_cnt = test->skip_cnt; - result->sub_succ_cnt = test->sub_succ_cnt; - } - - /* generate summary */ - fflush(stderr); - fflush(stdout); - - for (i = 0; i < prog_test_cnt; i++) { - struct prog_test_def *current_test; - struct test_result *result; - - current_test = &prog_test_defs[i]; - result = &test_results[i]; - - if (!current_test->tested) - continue; - - env.succ_cnt += result->error_cnt ? 0 : 1; - env.skip_cnt += result->skip_cnt; - if (result->error_cnt) - env.fail_cnt++; - env.sub_succ_cnt += result->sub_succ_cnt; - } - - print_all_error_logs(); - - fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", - env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); - - /* reap all workers */ - for (i = 0; i < env.workers; i++) { - int wstatus, pid; - - pid = waitpid(env.worker_pids[i], &wstatus, 0); - if (pid != env.worker_pids[i]) - perror("Unable to reap worker"); - } - - return 0; -} - -static int worker_main(int sock) -{ - save_netns(); - - while (true) { - /* receive command */ - struct msg msg; - - if (recv_message(sock, &msg) < 0) - goto out; - - switch (msg.type) { - case MSG_EXIT: - if (env.debug) - fprintf(stderr, "[%d]: worker exit.\n", - env.worker_id); - goto out; - case MSG_DO_TEST: { - int test_to_run; - struct prog_test_def *test; - struct msg msg_done; - - test_to_run = msg.do_test.test_num; - test = &prog_test_defs[test_to_run]; - - if (env.debug) - fprintf(stderr, "[%d]: #%d:%s running.\n", - env.worker_id, - test_to_run + 1, - test->test_name); - - stdio_hijack(); - - run_one_test(test_to_run); - - stdio_restore(); - - memset(&msg_done, 0, sizeof(msg_done)); - msg_done.type = MSG_TEST_DONE; - msg_done.test_done.test_num = test_to_run; - msg_done.test_done.error_cnt = test->error_cnt; - msg_done.test_done.skip_cnt = test->skip_cnt; - msg_done.test_done.sub_succ_cnt = test->sub_succ_cnt; - msg_done.test_done.have_log = false; - - if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) { - if (env.log_cnt) - msg_done.test_done.have_log = true; - } - if (send_message(sock, &msg_done) < 0) { - perror("Fail to send message done"); - goto out; - } - - /* send logs */ - if (msg_done.test_done.have_log) { - char *src; - size_t slen; - - src = env.log_buf; - slen = env.log_cnt; - while (slen) { - struct msg msg_log; - char *dest; - size_t len; - - memset(&msg_log, 0, sizeof(msg_log)); - msg_log.type = MSG_TEST_LOG; - dest = msg_log.test_log.log_buf; - len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen; - memcpy(dest, src, len); - - src += len; - slen -= len; - if (!slen) - msg_log.test_log.is_last = true; - - assert(send_message(sock, &msg_log) >= 0); - } - } - if (env.log_buf) { - free(env.log_buf); - env.log_buf = NULL; - env.log_cnt = 0; - } - if (env.debug) - fprintf(stderr, "[%d]: #%d:%s done.\n", - env.worker_id, - test_to_run + 1, - test->test_name); - break; - } /* case MSG_DO_TEST */ - default: - if (env.debug) - fprintf(stderr, "[%d]: unknown message.\n", env.worker_id); - return -1; - } - } -out: - return 0; -} - int main(int argc, char **argv) { static const struct argp argp = { @@ -1326,7 +770,7 @@ int main(int argc, char **argv) struct sigaction sigact = { .sa_handler = crash_handler, .sa_flags = SA_RESETHAND, - }; + }; int err, i; sigaction(SIGSEGV, &sigact, NULL); @@ -1341,6 +785,7 @@ int main(int argc, char **argv) /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + libbpf_set_print(libbpf_print_fn); srand(time(NULL)); @@ -1353,84 +798,21 @@ int main(int argc, char **argv) return -1; } - env.stdout = stdout; - env.stderr = stderr; - + save_netns(); + stdio_hijack(); env.has_testmod = true; if (!env.list_test_names && load_bpf_testmod()) { fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); env.has_testmod = false; } - - /* initializing tests */ for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; + env.test = test; test->test_num = i + 1; - if (should_run(&env.test_selector, + + if (!should_run(&env.test_selector, test->test_num, test->test_name)) - test->should_run = true; - else - test->should_run = false; - - if ((test->run_test == NULL && test->run_serial_test == NULL) || - (test->run_test != NULL && test->run_serial_test != NULL)) { - fprintf(stderr, "Test %d:%s must have either test_%s() or serial_test_%sl() defined.\n", - test->test_num, test->test_name, test->test_name, test->test_name); - exit(EXIT_ERR_SETUP_INFRA); - } - } - - /* ignore workers if we are just listing */ - if (env.get_test_cnt || env.list_test_names) - env.workers = 0; - - /* launch workers if requested */ - env.worker_id = -1; /* main process */ - if (env.workers) { - env.worker_pids = calloc(sizeof(__pid_t), env.workers); - env.worker_socks = calloc(sizeof(int), env.workers); - if (env.debug) - fprintf(stdout, "Launching %d workers.\n", env.workers); - for (i = 0; i < env.workers; i++) { - int sv[2]; - pid_t pid; - - if (socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, sv) < 0) { - perror("Fail to create worker socket"); - return -1; - } - pid = fork(); - if (pid < 0) { - perror("Failed to fork worker"); - return -1; - } else if (pid != 0) { /* main process */ - close(sv[1]); - env.worker_pids[i] = pid; - env.worker_socks[i] = sv[0]; - } else { /* inside each worker process */ - close(sv[0]); - env.worker_id = i; - return worker_main(sv[1]); - } - } - - if (env.worker_id == -1) { - server_main(); - goto out; - } - } - - /* The rest of the main process */ - - /* on single mode */ - save_netns(); - - for (i = 0; i < prog_test_cnt; i++) { - struct prog_test_def *test = &prog_test_defs[i]; - struct test_result *result; - - if (!test->should_run) continue; if (env.get_test_cnt) { @@ -1444,35 +826,33 @@ int main(int argc, char **argv) continue; } - stdio_hijack(); + test->run_test(); + /* ensure last sub-test is finalized properly */ + if (test->subtest_name) + test__end_subtest(); - run_one_test(i); + test->tested = true; - stdio_restore(); + dump_test_log(test, test->error_cnt); fprintf(env.stdout, "#%d %s:%s\n", test->test_num, test->test_name, test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); - result = &test_results[i]; - result->error_cnt = test->error_cnt; - if (env.log_buf) { - result->log_buf = strdup(env.log_buf); - result->log_cnt = env.log_cnt; - - free(env.log_buf); - env.log_buf = NULL; - env.log_cnt = 0; - } - if (test->error_cnt) env.fail_cnt++; else env.succ_cnt++; - skip_account(); - env.sub_succ_cnt += test->sub_succ_cnt; + + reset_affinity(); + restore_netns(); + if (test->need_cgroup_cleanup) + cleanup_cgroup_environment(); } + if (!env.list_test_names && env.has_testmod) + unload_bpf_testmod(); + stdio_restore(); if (env.get_test_cnt) { printf("%d\n", env.succ_cnt); @@ -1482,21 +862,17 @@ int main(int argc, char **argv) if (env.list_test_names) goto out; - print_all_error_logs(); - fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); - close(env.saved_netns_fd); out: - if (!env.list_test_names && env.has_testmod) - unload_bpf_testmod(); free_str_set(&env.test_selector.blacklist); free_str_set(&env.test_selector.whitelist); free(env.test_selector.num_set); free_str_set(&env.subtest_selector.blacklist); free_str_set(&env.subtest_selector.whitelist); free(env.subtest_selector.num_set); + close(env.saved_netns_fd); if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) return EXIT_NO_TEST; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 93c1ff7055..c8c2bf878f 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -62,7 +62,6 @@ struct test_env { struct test_selector test_selector; struct test_selector subtest_selector; bool verifier_stats; - bool debug; enum verbosity verbosity; bool jit_enabled; @@ -70,8 +69,7 @@ struct test_env { bool get_test_cnt; bool list_test_names; - struct prog_test_def *test; /* current running tests */ - + struct prog_test_def *test; FILE *stdout; FILE *stderr; char *log_buf; @@ -84,38 +82,6 @@ struct test_env { int skip_cnt; /* skipped tests */ int saved_netns_fd; - int workers; /* number of worker process */ - int worker_id; /* id number of current worker, main process is -1 */ - pid_t *worker_pids; /* array of worker pids */ - int *worker_socks; /* array of worker socks */ - int *worker_current_test; /* array of current running test for each worker */ -}; - -#define MAX_LOG_TRUNK_SIZE 8192 -enum msg_type { - MSG_DO_TEST = 0, - MSG_TEST_DONE = 1, - MSG_TEST_LOG = 2, - MSG_EXIT = 255, -}; -struct msg { - enum msg_type type; - union { - struct { - int test_num; - } do_test; - struct { - int test_num; - int sub_succ_cnt; - int error_cnt; - int skip_cnt; - bool have_log; - } test_done; - struct { - char log_buf[MAX_LOG_TRUNK_SIZE + 1]; - bool is_last; - } test_log; - }; }; extern struct test_env env; @@ -325,8 +291,6 @@ int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); int extract_build_id(char *build_id, size_t size); int kern_sync_rcu(void); -int trigger_module_test_read(int read_sz); -int trigger_module_test_write(int write_sz); #ifdef __x86_64__ #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" @@ -335,5 +299,3 @@ int trigger_module_test_write(int write_sz); #else #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep" #endif - -#define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod" diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index fe10f81342..9613f75388 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -35,21 +35,18 @@ struct sock_test { /* Endpoint to bind() to */ const char *ip; unsigned short port; - unsigned short port_retry; /* Expected test result */ enum { LOAD_REJECT, ATTACH_REJECT, BIND_REJECT, SUCCESS, - RETRY_SUCCESS, - RETRY_REJECT } result; }; static struct sock_test tests[] = { { - .descr = "bind4 load with invalid access: src_ip6", + "bind4 load with invalid access: src_ip6", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -57,12 +54,16 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .result = LOAD_REJECT, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + 0, + 0, + NULL, + 0, + LOAD_REJECT, }, { - .descr = "bind4 load with invalid access: mark", + "bind4 load with invalid access: mark", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -70,12 +71,16 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .result = LOAD_REJECT, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + 0, + 0, + NULL, + 0, + LOAD_REJECT, }, { - .descr = "bind6 load with invalid access: src_ip4", + "bind6 load with invalid access: src_ip4", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -83,12 +88,16 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, - .attach_type = BPF_CGROUP_INET6_POST_BIND, - .result = LOAD_REJECT, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + 0, + 0, + NULL, + 0, + LOAD_REJECT, }, { - .descr = "sock_create load with invalid access: src_port", + "sock_create load with invalid access: src_port", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -96,106 +105,128 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .result = LOAD_REJECT, + BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_INET_SOCK_CREATE, + 0, + 0, + NULL, + 0, + LOAD_REJECT, }, { - .descr = "sock_create load w/o expected_attach_type (compat mode)", + "sock_create load w/o expected_attach_type (compat mode)", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = 0, - .attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .domain = AF_INET, - .type = SOCK_STREAM, - .ip = "127.0.0.1", - .port = 8097, - .result = SUCCESS, + 0, + BPF_CGROUP_INET_SOCK_CREATE, + AF_INET, + SOCK_STREAM, + "127.0.0.1", + 8097, + SUCCESS, }, { - .descr = "sock_create load w/ expected_attach_type", + "sock_create load w/ expected_attach_type", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .domain = AF_INET, - .type = SOCK_STREAM, - .ip = "127.0.0.1", - .port = 8097, - .result = SUCCESS, + BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_INET_SOCK_CREATE, + AF_INET, + SOCK_STREAM, + "127.0.0.1", + 8097, + SUCCESS, }, { - .descr = "attach type mismatch bind4 vs bind6", + "attach type mismatch bind4 vs bind6", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET6_POST_BIND, - .result = ATTACH_REJECT, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + 0, + 0, + NULL, + 0, + ATTACH_REJECT, }, { - .descr = "attach type mismatch bind6 vs bind4", + "attach type mismatch bind6 vs bind4", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .result = ATTACH_REJECT, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + 0, + 0, + NULL, + 0, + ATTACH_REJECT, }, { - .descr = "attach type mismatch default vs bind4", + "attach type mismatch default vs bind4", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = 0, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .result = ATTACH_REJECT, + 0, + BPF_CGROUP_INET4_POST_BIND, + 0, + 0, + NULL, + 0, + ATTACH_REJECT, }, { - .descr = "attach type mismatch bind6 vs sock_create", + "attach type mismatch bind6 vs sock_create", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, - .attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .result = ATTACH_REJECT, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET_SOCK_CREATE, + 0, + 0, + NULL, + 0, + ATTACH_REJECT, }, { - .descr = "bind4 reject all", + "bind4 reject all", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .domain = AF_INET, - .type = SOCK_STREAM, - .ip = "0.0.0.0", - .result = BIND_REJECT, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + AF_INET, + SOCK_STREAM, + "0.0.0.0", + 0, + BIND_REJECT, }, { - .descr = "bind6 reject all", + "bind6 reject all", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, - .attach_type = BPF_CGROUP_INET6_POST_BIND, - .domain = AF_INET6, - .type = SOCK_STREAM, - .ip = "::", - .result = BIND_REJECT, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + AF_INET6, + SOCK_STREAM, + "::", + 0, + BIND_REJECT, }, { - .descr = "bind6 deny specific IP & port", + "bind6 deny specific IP & port", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), @@ -216,16 +247,16 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, - .attach_type = BPF_CGROUP_INET6_POST_BIND, - .domain = AF_INET6, - .type = SOCK_STREAM, - .ip = "::1", - .port = 8193, - .result = BIND_REJECT, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + AF_INET6, + SOCK_STREAM, + "::1", + 8193, + BIND_REJECT, }, { - .descr = "bind4 allow specific IP & port", + "bind4 allow specific IP & port", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), @@ -246,132 +277,41 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .domain = AF_INET, - .type = SOCK_STREAM, - .ip = "127.0.0.1", - .port = 4098, - .result = SUCCESS, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + AF_INET, + SOCK_STREAM, + "127.0.0.1", + 4098, + SUCCESS, }, { - .descr = "bind4 deny specific IP & port of TCP, and retry", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - - /* if (ip == expected && port == expected) */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, - __bpf_constant_ntohl(0x7F000001), 4), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_port)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2), - - /* return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_A(1), - - /* else return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .domain = AF_INET, - .type = SOCK_STREAM, - .ip = "127.0.0.1", - .port = 4098, - .port_retry = 5000, - .result = RETRY_SUCCESS, - }, - { - .descr = "bind4 deny specific IP & port of UDP, and retry", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - - /* if (ip == expected && port == expected) */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, - __bpf_constant_ntohl(0x7F000001), 4), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_port)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2), - - /* return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_A(1), - - /* else return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .domain = AF_INET, - .type = SOCK_DGRAM, - .ip = "127.0.0.1", - .port = 4098, - .port_retry = 5000, - .result = RETRY_SUCCESS, - }, - { - .descr = "bind6 deny specific IP & port, and retry", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - - /* if (ip == expected && port == expected) */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_ip6[3])), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, - __bpf_constant_ntohl(0x00000001), 4), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_port)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2), - - /* return DENY; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_A(1), - - /* else return ALLOW; */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, - .attach_type = BPF_CGROUP_INET6_POST_BIND, - .domain = AF_INET6, - .type = SOCK_STREAM, - .ip = "::1", - .port = 8193, - .port_retry = 9000, - .result = RETRY_SUCCESS, - }, - { - .descr = "bind4 allow all", + "bind4 allow all", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .domain = AF_INET, - .type = SOCK_STREAM, - .ip = "0.0.0.0", - .result = SUCCESS, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + AF_INET, + SOCK_STREAM, + "0.0.0.0", + 0, + SUCCESS, }, { - .descr = "bind6 allow all", + "bind6 allow all", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, - .attach_type = BPF_CGROUP_INET6_POST_BIND, - .domain = AF_INET6, - .type = SOCK_STREAM, - .ip = "::", - .result = SUCCESS, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + AF_INET6, + SOCK_STREAM, + "::", + 0, + SUCCESS, }, }; @@ -388,17 +328,18 @@ static size_t probe_prog_length(const struct bpf_insn *fp) static int load_sock_prog(const struct bpf_insn *prog, enum bpf_attach_type attach_type) { - LIBBPF_OPTS(bpf_prog_load_opts, opts); - int ret, insn_cnt; + struct bpf_load_program_attr attr; + int ret; - insn_cnt = probe_prog_length(prog); + memset(&attr, 0, sizeof(struct bpf_load_program_attr)); + attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; + attr.expected_attach_type = attach_type; + attr.insns = prog; + attr.insns_cnt = probe_prog_length(attr.insns); + attr.license = "GPL"; + attr.log_level = 2; - opts.expected_attach_type = attach_type; - opts.log_buf = bpf_log_buf; - opts.log_size = BPF_LOG_BUF_SIZE; - opts.log_level = 2; - - ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", prog, insn_cnt, &opts); + ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); if (verbose && ret < 0) fprintf(stderr, "%s\n", bpf_log_buf); @@ -411,15 +352,14 @@ static int attach_sock_prog(int cgfd, int progfd, return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE); } -static int bind_sock(int domain, int type, const char *ip, - unsigned short port, unsigned short port_retry) +static int bind_sock(int domain, int type, const char *ip, unsigned short port) { struct sockaddr_storage addr; struct sockaddr_in6 *addr6; struct sockaddr_in *addr4; int sockfd = -1; socklen_t len; - int res = SUCCESS; + int err = 0; sockfd = socket(domain, type, 0); if (sockfd < 0) @@ -445,44 +385,21 @@ static int bind_sock(int domain, int type, const char *ip, goto err; } - if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) { - /* sys_bind() may fail for different reasons, errno has to be - * checked to confirm that BPF program rejected it. - */ - if (errno != EPERM) - goto err; - if (port_retry) - goto retry; - res = BIND_REJECT; - goto out; - } + if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) + goto err; - goto out; -retry: - if (domain == AF_INET) - addr4->sin_port = htons(port_retry); - else - addr6->sin6_port = htons(port_retry); - if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) { - if (errno != EPERM) - goto err; - res = RETRY_REJECT; - } else { - res = RETRY_SUCCESS; - } goto out; err: - res = -1; + err = -1; out: close(sockfd); - return res; + return err; } static int run_test_case(int cgfd, const struct sock_test *test) { int progfd = -1; int err = 0; - int res; printf("Test case: %s .. ", test->descr); progfd = load_sock_prog(test->insns, test->expected_attach_type); @@ -500,11 +417,21 @@ static int run_test_case(int cgfd, const struct sock_test *test) goto err; } - res = bind_sock(test->domain, test->type, test->ip, test->port, - test->port_retry); - if (res > 0 && test->result == res) - goto out; + if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) { + /* sys_bind() may fail for different reasons, errno has to be + * checked to confirm that BPF program rejected it. + */ + if (test->result == BIND_REJECT && errno == EPERM) + goto out; + else + goto err; + } + + if (test->result != SUCCESS) + goto err; + + goto out; err: err = -1; out: diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index f0c8d05ba6..aa3f185fcb 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -645,14 +645,17 @@ static int mk_sockaddr(int domain, const char *ip, unsigned short port, static int load_insns(const struct sock_addr_test *test, const struct bpf_insn *insns, size_t insns_cnt) { - LIBBPF_OPTS(bpf_prog_load_opts, opts); + struct bpf_load_program_attr load_attr; int ret; - opts.expected_attach_type = test->expected_attach_type; - opts.log_buf = bpf_log_buf; - opts.log_size = BPF_LOG_BUF_SIZE; + memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); + load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; + load_attr.expected_attach_type = test->expected_attach_type; + load_attr.insns = insns; + load_attr.insns_cnt = insns_cnt; + load_attr.license = "GPL"; - ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, NULL, "GPL", insns, insns_cnt, &opts); + ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE); if (ret < 0 && test->expected_result != LOAD_REJECT) { log_err(">>> Loading program error.\n" ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); @@ -663,36 +666,23 @@ static int load_insns(const struct sock_addr_test *test, static int load_path(const struct sock_addr_test *test, const char *path) { + struct bpf_prog_load_attr attr; struct bpf_object *obj; - struct bpf_program *prog; - int err; + int prog_fd; - obj = bpf_object__open_file(path, NULL); - err = libbpf_get_error(obj); - if (err) { - log_err(">>> Opening BPF object (%s) error.\n", path); + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = path; + attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; + attr.expected_attach_type = test->expected_attach_type; + attr.prog_flags = BPF_F_TEST_RND_HI32; + + if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { + if (test->expected_result != LOAD_REJECT) + log_err(">>> Loading program (%s) error.\n", path); return -1; } - prog = bpf_object__next_program(obj, NULL); - if (!prog) - goto err_out; - - bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR); - bpf_program__set_expected_attach_type(prog, test->expected_attach_type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); - - err = bpf_object__load(obj); - if (err) { - if (test->expected_result != LOAD_REJECT) - log_err(">>> Loading program (%s) error.\n", path); - goto err_out; - } - - return bpf_program__fd(prog); -err_out: - bpf_object__close(obj); - return -1; + return prog_fd; } static int bind4_prog_load(const struct sock_addr_test *test) diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 1ba7e7346a..eefd445b96 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -139,7 +139,6 @@ struct sockmap_options { bool sendpage; bool data_test; bool drop_expected; - bool check_recved_len; int iov_count; int iov_length; int rate; @@ -557,12 +556,8 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, int err, i, flags = MSG_NOSIGNAL; bool drop = opt->drop_expected; bool data = opt->data_test; - int iov_alloc_length = iov_length; - if (!tx && opt->check_recved_len) - iov_alloc_length *= 2; - - err = msg_alloc_iov(&msg, iov_count, iov_alloc_length, data, tx); + err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx); if (err) goto out_errno; if (peek_flag) { @@ -670,13 +665,6 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, s->bytes_recvd += recv; - if (opt->check_recved_len && s->bytes_recvd > total_bytes) { - errno = EMSGSIZE; - fprintf(stderr, "recv failed(), bytes_recvd:%zd, total_bytes:%f\n", - s->bytes_recvd, total_bytes); - goto out_errno; - } - if (data) { int chunk_sz = opt->sendpage ? iov_length * cnt : @@ -756,8 +744,7 @@ static int sendmsg_test(struct sockmap_options *opt) rxpid = fork(); if (rxpid == 0) { - if (txmsg_pop || txmsg_start_pop) - iov_buf -= (txmsg_pop - txmsg_start_pop + 1); + iov_buf -= (txmsg_pop - txmsg_start_pop + 1); if (opt->drop_expected || txmsg_ktls_skb_drop) _exit(0); @@ -1693,27 +1680,12 @@ static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt) { txmsg_pass = 1; skb_use_parser = 512; - if (ktls == 1) - skb_use_parser = 570; opt->iov_length = 256; opt->iov_count = 1; opt->rate = 2; test_exec(cgrp, opt); } -static void test_txmsg_ingress_parser2(int cgrp, struct sockmap_options *opt) -{ - if (ktls == 1) - return; - skb_use_parser = 10; - opt->iov_length = 20; - opt->iov_count = 1; - opt->rate = 1; - opt->check_recved_len = true; - test_exec(cgrp, opt); - opt->check_recved_len = false; -} - char *map_names[] = { "sock_map", "sock_map_txmsg", @@ -1812,8 +1784,7 @@ struct _test test[] = { {"txmsg test pull-data", test_txmsg_pull}, {"txmsg test pop-data", test_txmsg_pop}, {"txmsg test push/pop data", test_txmsg_push_pop}, - {"txmsg test ingress parser", test_txmsg_ingress_parser}, - {"txmsg test ingress parser2", test_txmsg_ingress_parser2}, + {"txmsg text ingress parser", test_txmsg_ingress_parser}, }; static int check_whitelist(struct _test *t, struct sockmap_options *opt) diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index 4f6cf833b5..a20a919244 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -17,7 +17,6 @@ #include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" -#include "testing_helpers.h" #define CG_PATH "/foo" #define MAX_INSNS 512 @@ -125,7 +124,7 @@ static struct sysctl_test tests[] = { .descr = "ctx:write sysctl:write read ok narrow", .insns = { /* u64 w = (u16)write & 1; */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, write)), #else @@ -185,7 +184,7 @@ static struct sysctl_test tests[] = { .descr = "ctx:file_pos sysctl:read read ok narrow", .insns = { /* If (file_pos == X) */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, file_pos)), #else @@ -1436,10 +1435,14 @@ static int load_sysctl_prog_insns(struct sysctl_test *test, const char *sysctl_path) { struct bpf_insn *prog = test->insns; - LIBBPF_OPTS(bpf_prog_load_opts, opts); - int ret, insn_cnt; + struct bpf_load_program_attr attr; + int ret; - insn_cnt = probe_prog_length(prog); + memset(&attr, 0, sizeof(struct bpf_load_program_attr)); + attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL; + attr.insns = prog; + attr.insns_cnt = probe_prog_length(attr.insns); + attr.license = "GPL"; if (test->fixup_value_insn) { char buf[128]; @@ -1462,10 +1465,7 @@ static int load_sysctl_prog_insns(struct sysctl_test *test, return -1; } - opts.log_buf = bpf_log_buf; - opts.log_size = BPF_LOG_BUF_SIZE; - - ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SYSCTL, NULL, "GPL", prog, insn_cnt, &opts); + ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); if (ret < 0 && test->result != LOAD_REJECT) { log_err(">>> Loading program error.\n" ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); @@ -1476,10 +1476,15 @@ static int load_sysctl_prog_insns(struct sysctl_test *test, static int load_sysctl_prog_file(struct sysctl_test *test) { + struct bpf_prog_load_attr attr; struct bpf_object *obj; int prog_fd; - if (bpf_prog_test_load(test->prog_file, BPF_PROG_TYPE_CGROUP_SYSCTL, &obj, &prog_fd)) { + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = test->prog_file; + attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL; + + if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { if (test->result != LOAD_REJECT) log_err(">>> Loading program (%s) error.\n", test->prog_file); diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c index 0851c42ee3..6272c784ca 100644 --- a/tools/testing/selftests/bpf/test_tag.c +++ b/tools/testing/selftests/bpf/test_tag.c @@ -21,7 +21,6 @@ #include "../../../include/linux/filter.h" #include "bpf_rlimit.h" -#include "testing_helpers.h" static struct bpf_insn prog[BPF_MAXINSNS]; @@ -58,7 +57,7 @@ static int bpf_try_load_prog(int insns, int fd_map, int fd_prog; bpf_filler(insns, fd_map); - fd_prog = bpf_test_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0, + fd_prog = bpf_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0, NULL, 0); assert(fd_prog > 0); if (fd_map > 0) @@ -185,12 +184,11 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map, int main(void) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); uint32_t tests = 0; int i, fd_map; - fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), - sizeof(int), 1, &opts); + fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), + sizeof(int), 1, BPF_F_NO_PREALLOC); assert(fd_map > 0); for (i = 0; i < 5; i++) { diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh index 6413c14725..9b3617d770 100644 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh @@ -76,8 +76,8 @@ DIR=$(dirname $0) TEST_IF=lo MAX_PING_TRIES=5 BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o" -CLSACT_SECTION="tc" -XDP_SECTION="xdp" +CLSACT_SECTION="clsact/check_syncookie" +XDP_SECTION="xdp/check_syncookie" BPF_PROG_ID=0 PROG="${DIR}/test_tcp_check_syncookie_user" diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 4c5114765b..4a39304cc5 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -25,7 +25,6 @@ #include "test_tcpnotify.h" #include "trace_helpers.h" -#include "testing_helpers.h" #define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L) @@ -72,6 +71,7 @@ int main(int argc, char **argv) { const char *file = "test_tcpnotify_kern.o"; struct bpf_map *perf_map, *global_map; + struct perf_buffer_opts pb_opts = {}; struct tcpnotify_globals g = {0}; struct perf_buffer *pb = NULL; const char *cg_path = "/foo"; @@ -92,7 +92,7 @@ int main(int argc, char **argv) if (cg_fd < 0) goto err; - if (bpf_prog_test_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { printf("FAILED: load_bpf_file failed for: %s\n", file); goto err; } @@ -116,7 +116,8 @@ int main(int argc, char **argv) return -1; } - pb = perf_buffer__new(bpf_map__fd(perf_map), 8, dummyfn, NULL, NULL, NULL); + pb_opts.sample_cb = dummyfn; + pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts); if (!pb) goto err; diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index ca13729240..1ccbe804e8 100644 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -168,15 +168,14 @@ add_vxlan_tunnel() ip netns exec at_ns0 \ ip link set dev $DEV_NS address 52:54:00:d9:01:00 up ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 \ - ip neigh add 10.1.1.200 lladdr 52:54:00:d9:02:00 dev $DEV_NS + ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00 ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF # root namespace ip link add dev $DEV type $TYPE external gbp dstport 4789 ip link set dev $DEV address 52:54:00:d9:02:00 up ip addr add dev $DEV 10.1.1.200/24 - ip neigh add 10.1.1.100 lladdr 52:54:00:d9:01:00 dev $DEV + arp -s 10.1.1.100 52:54:00:d9:01:00 } add_ip6vxlan_tunnel() diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 76cd903117..3a9e332c5e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -41,19 +41,16 @@ # define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1 # endif #endif +#include "bpf_rlimit.h" #include "bpf_rand.h" #include "bpf_util.h" #include "test_btf.h" #include "../../../include/linux/filter.h" -#ifndef ENOTSUPP -#define ENOTSUPP 524 -#endif - #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 22 +#define MAX_NR_MAPS 21 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -91,7 +88,6 @@ struct bpf_test { int fixup_map_event_output[MAX_FIXUPS]; int fixup_map_reuseport_array[MAX_FIXUPS]; int fixup_map_ringbuf[MAX_FIXUPS]; - int fixup_map_timer[MAX_FIXUPS]; /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. * Can be a tab-separated sequence of expected strings. An empty string * means no log verification. @@ -461,11 +457,11 @@ static int __create_map(uint32_t type, uint32_t size_key, uint32_t size_value, uint32_t max_elem, uint32_t extra_flags) { - LIBBPF_OPTS(bpf_map_create_opts, opts); int fd; - opts.map_flags = (type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0) | extra_flags; - fd = bpf_map_create(type, NULL, size_key, size_value, max_elem, &opts); + fd = bpf_create_map(type, size_key, size_value, max_elem, + (type == BPF_MAP_TYPE_HASH ? + BPF_F_NO_PREALLOC : 0) | extra_flags); if (fd < 0) { if (skip_unsupported_map(type)) return -1; @@ -498,7 +494,8 @@ static int create_prog_dummy_simple(enum bpf_prog_type prog_type, int ret) BPF_EXIT_INSN(), }; - return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL); + return bpf_load_program(prog_type, prog, + ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd, @@ -513,7 +510,8 @@ static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd, BPF_EXIT_INSN(), }; - return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL); + return bpf_load_program(prog_type, prog, + ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, @@ -521,8 +519,8 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, { int mfd, p1fd, p2fd, p3fd; - mfd = bpf_map_create(BPF_MAP_TYPE_PROG_ARRAY, NULL, sizeof(int), - sizeof(int), max_elem, NULL); + mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int), + sizeof(int), max_elem, 0); if (mfd < 0) { if (skip_unsupported_map(BPF_MAP_TYPE_PROG_ARRAY)) return -1; @@ -552,11 +550,10 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, static int create_map_in_map(void) { - LIBBPF_OPTS(bpf_map_create_opts, opts); int inner_map_fd, outer_map_fd; - inner_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), - sizeof(int), 1, NULL); + inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), + sizeof(int), 1, 0); if (inner_map_fd < 0) { if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY)) return -1; @@ -564,9 +561,8 @@ static int create_map_in_map(void) return inner_map_fd; } - opts.inner_map_fd = inner_map_fd; - outer_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, - sizeof(int), sizeof(int), 1, &opts); + outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, + sizeof(int), inner_map_fd, 1, 0); if (outer_map_fd < 0) { if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY_OF_MAPS)) return -1; @@ -585,8 +581,8 @@ static int create_cgroup_storage(bool percpu) BPF_MAP_TYPE_CGROUP_STORAGE; int fd; - fd = bpf_map_create(type, NULL, sizeof(struct bpf_cgroup_storage_key), - TEST_DATA_LEN, 0, NULL); + fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key), + TEST_DATA_LEN, 0, 0); if (fd < 0) { if (skip_unsupported_map(type)) return -1; @@ -604,15 +600,8 @@ static int create_cgroup_storage(bool percpu) * int cnt; * struct bpf_spin_lock l; * }; - * struct bpf_timer { - * __u64 :64; - * __u64 :64; - * } __attribute__((aligned(8))); - * struct timer { - * struct bpf_timer t; - * }; */ -static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"; +static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l"; static __u32 btf_raw_types[] = { /* int */ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -623,11 +612,6 @@ static __u32 btf_raw_types[] = { BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ - /* struct bpf_timer */ /* [4] */ - BTF_TYPE_ENC(25, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0), 16), - /* struct timer */ /* [5] */ - BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), - BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */ }; static int load_btf(void) @@ -653,7 +637,7 @@ static int load_btf(void) memcpy(ptr, btf_str_sec, hdr.str_len); ptr += hdr.str_len; - btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL); + btf_fd = bpf_load_btf(raw_btf, ptr - raw_btf, 0, 0, 0); free(raw_btf); if (btf_fd < 0) return -1; @@ -662,17 +646,22 @@ static int load_btf(void) static int create_map_spin_lock(void) { - LIBBPF_OPTS(bpf_map_create_opts, opts, + struct bpf_create_map_attr attr = { + .name = "test_map", + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = 4, + .value_size = 8, + .max_entries = 1, .btf_key_type_id = 1, .btf_value_type_id = 3, - ); + }; int fd, btf_fd; btf_fd = load_btf(); if (btf_fd < 0) return -1; - opts.btf_fd = btf_fd; - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 8, 1, &opts); + attr.btf_fd = btf_fd; + fd = bpf_create_map_xattr(&attr); if (fd < 0) printf("Failed to create map with spin_lock\n"); return fd; @@ -680,43 +669,29 @@ static int create_map_spin_lock(void) static int create_sk_storage_map(void) { - LIBBPF_OPTS(bpf_map_create_opts, opts, + struct bpf_create_map_attr attr = { + .name = "test_map", + .map_type = BPF_MAP_TYPE_SK_STORAGE, + .key_size = 4, + .value_size = 8, + .max_entries = 0, .map_flags = BPF_F_NO_PREALLOC, .btf_key_type_id = 1, .btf_value_type_id = 3, - ); + }; int fd, btf_fd; btf_fd = load_btf(); if (btf_fd < 0) return -1; - opts.btf_fd = btf_fd; - fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "test_map", 4, 8, 0, &opts); - close(opts.btf_fd); + attr.btf_fd = btf_fd; + fd = bpf_create_map_xattr(&attr); + close(attr.btf_fd); if (fd < 0) printf("Failed to create sk_storage_map\n"); return fd; } -static int create_map_timer(void) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, - .btf_key_type_id = 1, - .btf_value_type_id = 5, - ); - int fd, btf_fd; - - btf_fd = load_btf(); - if (btf_fd < 0) - return -1; - - opts.btf_fd = btf_fd; - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 16, 1, &opts); - if (fd < 0) - printf("Failed to create map with timer\n"); - return fd; -} - static char bpf_vlog[UINT_MAX >> 8]; static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, @@ -743,7 +718,6 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_event_output = test->fixup_map_event_output; int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; int *fixup_map_ringbuf = test->fixup_map_ringbuf; - int *fixup_map_timer = test->fixup_map_timer; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -929,13 +903,6 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_ringbuf++; } while (*fixup_map_ringbuf); } - if (*fixup_map_timer) { - map_fds[21] = create_map_timer(); - do { - prog[*fixup_map_timer].imm = map_fds[21]; - fixup_map_timer++; - } while (*fixup_map_timer); - } } struct libcap { @@ -1007,7 +974,7 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, if (err) { switch (saved_errno) { - case ENOTSUPP: + case 524/*ENOTSUPP*/: printf("Did not run the program (not supported) "); return 0; case EPERM: @@ -1074,7 +1041,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int fd_prog, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; - LIBBPF_OPTS(bpf_prog_load_opts, opts); + struct bpf_load_program_attr attr; int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; @@ -1114,34 +1081,32 @@ static void do_test_single(struct bpf_test *test, bool unpriv, test->result_unpriv : test->result; expected_err = unpriv && test->errstr_unpriv ? test->errstr_unpriv : test->errstr; - - opts.expected_attach_type = test->expected_attach_type; + memset(&attr, 0, sizeof(attr)); + attr.prog_type = prog_type; + attr.expected_attach_type = test->expected_attach_type; + attr.insns = prog; + attr.insns_cnt = prog_len; + attr.license = "GPL"; if (verbose) - opts.log_level = 1; + attr.log_level = 1; else if (expected_ret == VERBOSE_ACCEPT) - opts.log_level = 2; + attr.log_level = 2; else - opts.log_level = 4; - opts.prog_flags = pflags; + attr.log_level = 4; + attr.prog_flags = pflags; if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) { - int attach_btf_id; - - attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc, - opts.expected_attach_type); - if (attach_btf_id < 0) { + attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc, + attr.expected_attach_type); + if (attr.attach_btf_id < 0) { printf("FAIL\nFailed to find BTF ID for '%s'!\n", test->kfunc); (*errors)++; return; } - - opts.attach_btf_id = attach_btf_id; } - opts.log_buf = bpf_vlog; - opts.log_size = sizeof(bpf_vlog); - fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts); + fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog)); saved_errno = errno; /* BPF_PROG_TYPE_TRACING requires more setup and @@ -1154,12 +1119,6 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto close_fds; } - if (fd_prog < 0 && saved_errno == ENOTSUPP) { - printf("SKIP (program uses an unsupported feature)\n"); - skips++; - goto close_fds; - } - alignment_prevented_execution = 0; if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) { @@ -1394,9 +1353,6 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - bpf_semi_rand_init(); return do_test(unpriv, from, to); } diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index d10cefd6eb..637fcf4fe4 100644 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -1,8 +1,5 @@ #!/bin/sh -# Kselftest framework requirement - SKIP code is 4. -readonly KSFT_SKIP=4 - cleanup() { if [ "$?" = "0" ]; then @@ -20,7 +17,7 @@ cleanup() ip link set dev lo xdp off 2>/dev/null > /dev/null if [ $? -ne 0 ];then echo "selftests: [SKIP] Could not run test without the ip xdp support" - exit $KSFT_SKIP + exit 0 fi set -e diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index 57c8db9972..c033850886 100644 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -52,8 +52,8 @@ test_xdp_redirect() return 0 fi - ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null - ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null + ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null + ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh index 05f8727409..bedff7aa70 100644 --- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh @@ -92,7 +92,7 @@ setup_ns() # Add a neigh entry for IPv4 ping test ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 ip -n ns$i link set veth0 $mode obj \ - xdp_dummy.o sec xdp &> /dev/null || \ + xdp_dummy.o sec xdp_dummy &> /dev/null || \ { test_fail "Unable to load dummy xdp" && exit 1; } IFACES="$IFACES veth$i" veth_mac[$i]=$(ip -n ns0 link show veth$i | awk '/link\/ether/ {print $2}') diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh index a3a1eaee26..995278e684 100644 --- a/tools/testing/selftests/bpf/test_xdp_veth.sh +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -107,9 +107,9 @@ ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0 ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 -ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp +ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp -ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp +ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy trap cleanup EXIT diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh index 0cbc7604a2..bb8b0da916 100644 --- a/tools/testing/selftests/bpf/test_xdp_vlan.sh +++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh @@ -2,9 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 # Author: Jesper Dangaard Brouer -# Kselftest framework requirement - SKIP code is 4. -readonly KSFT_SKIP=4 - # Allow wrapper scripts to name test if [ -z "$TESTNAME" ]; then TESTNAME=xdp_vlan @@ -97,7 +94,7 @@ while true; do -h | --help ) usage; echo "selftests: $TESTNAME [SKIP] usage help info requested" - exit $KSFT_SKIP + exit 0 ;; * ) shift @@ -120,7 +117,7 @@ fi ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null if [ $? -ne 0 ]; then echo "selftests: $TESTNAME [SKIP] need ip xdp support" - exit $KSFT_SKIP + exit 0 fi # Interactive mode likely require us to cleanup netns diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 795b6798cc..800d503e5c 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -1,11 +1,7 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -/* Copyright (C) 2019 Netronome Systems, Inc. */ /* Copyright (C) 2020 Facebook, Inc. */ #include -#include #include -#include -#include #include "testing_helpers.h" int parse_num_list(const char *s, bool **num_set, int *num_set_len) @@ -82,61 +78,3 @@ __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info) } return info->prog_id; } - -int extra_prog_load_log_flags = 0; - -int bpf_prog_test_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts, - .kernel_log_level = extra_prog_load_log_flags, - ); - struct bpf_object *obj; - struct bpf_program *prog; - __u32 flags; - int err; - - obj = bpf_object__open_file(file, &opts); - if (!obj) - return -errno; - - prog = bpf_object__next_program(obj, NULL); - if (!prog) { - err = -ENOENT; - goto err_out; - } - - if (type != BPF_PROG_TYPE_UNSPEC) - bpf_program__set_type(prog, type); - - flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32; - bpf_program__set_flags(prog, flags); - - err = bpf_object__load(obj); - if (err) - goto err_out; - - *pobj = obj; - *prog_fd = bpf_program__fd(prog); - - return 0; -err_out: - bpf_object__close(obj); - return err; -} - -int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, - size_t log_buf_sz) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .kern_version = kern_version, - .prog_flags = BPF_F_TEST_RND_HI32, - .log_level = extra_prog_load_log_flags, - .log_buf = log_buf, - .log_size = log_buf_sz, - ); - - return bpf_prog_load(type, NULL, license, insns, insns_cnt, &opts); -} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index f46ebc476e..d4f8e74961 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -6,9 +6,3 @@ int parse_num_list(const char *s, bool **set, int *set_len); __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); -int bpf_prog_test_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd); -int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, - size_t log_buf_sz); diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 7b7f918eda..e7a19b04d4 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include #include #include diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index b39665f335..6fb52d8cfd 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -71,6 +71,8 @@ BPF_EXIT_INSN(), }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 leaks addr into mem", }, { "Can't use cmpxchg on uninit src reg", @@ -118,46 +120,6 @@ BPF_EXIT_INSN(), }, .result = ACCEPT, -}, -{ - "Dest pointer in r0 - fail", - .insns = { - /* val = 0; */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* r0 = &val */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), - /* r0 = atomic_cmpxchg(&val, r0, 1); */ - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), - /* if (r0 != 0) exit(1); */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - /* exit(0); */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 leaks addr into mem", -}, -{ - "Dest pointer in r0 - succeed", - .insns = { - /* r0 = &val */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), - /* val = r0; */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - /* r0 = atomic_cmpxchg(&val, r0, 0); */ - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), - /* r1 = *r0 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), - /* exit(0); */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr into mem", }, diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index d7b74eb283..336a749673 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1,26 +1,3 @@ -{ - "calls: invalid kfunc call not eliminated", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .result = REJECT, - .errstr = "invalid kernel function call not eliminated in verifier pass", -}, -{ - "calls: invalid kfunc call unreachable", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 0, 2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .result = ACCEPT, -}, { "calls: basic sanity", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c index a2b006e2fd..d78627be06 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c +++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c @@ -229,24 +229,6 @@ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sk_lookup, local_port)), - /* 1-byte read from ingress_ifindex field */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_sk_lookup, ingress_ifindex)), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_sk_lookup, ingress_ifindex) + 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_sk_lookup, ingress_ifindex) + 3), - /* 2-byte read from ingress_ifindex field */ - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_sk_lookup, ingress_ifindex)), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2), - /* 4-byte read from ingress_ifindex field */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_sk_lookup, ingress_ifindex)), - /* 8-byte read from sk field */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sk_lookup, sk)), @@ -369,20 +351,6 @@ .expected_attach_type = BPF_SK_LOOKUP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, -{ - "invalid 8-byte read from bpf_sk_lookup ingress_ifindex field", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_sk_lookup, ingress_ifindex)), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SK_LOOKUP, - .expected_attach_type = BPF_SK_LOOKUP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, /* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */ { "invalid 4-byte read from bpf_sk_lookup sk field", diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index 83cecfbd67..2022c0f2cd 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -502,7 +502,7 @@ "check skb->hash byte load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash)), #else @@ -537,7 +537,7 @@ "check skb->hash byte load permitted 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 3), #else @@ -646,7 +646,7 @@ "check skb->hash half load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash)), #else @@ -661,7 +661,7 @@ "check skb->hash half load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 2), #else @@ -676,7 +676,7 @@ "check skb->hash half load not permitted, unaligned 1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 1), #else @@ -693,7 +693,7 @@ "check skb->hash half load not permitted, unaligned 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 3), #else @@ -951,7 +951,7 @@ "check skb->data half load not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, data)), #else @@ -1057,66 +1057,6 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, -{ - "padding after gso_size is not accessible", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetofend(struct __sk_buff, gso_size)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .result_unpriv = REJECT, - .errstr = "invalid bpf_context access off=180 size=4", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "read hwtstamp from CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hwtstamp)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "read hwtstamp from CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, - offsetof(struct __sk_buff, hwtstamp)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "write hwtstamp from CGROUP_SKB", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, hwtstamp)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .result_unpriv = REJECT, - .errstr = "invalid bpf_context access off=184 size=8", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "read hwtstamp from CLS", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hwtstamp)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, { "check wire_len is not readable by sockets", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c index 79021c30e5..df215e0045 100644 --- a/tools/testing/selftests/bpf/verifier/jit.c +++ b/tools/testing/selftests/bpf/verifier/jit.c @@ -62,11 +62,6 @@ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), - BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 0xefefef), - BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), BPF_MOV32_REG(BPF_REG_2, BPF_REG_2), BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1), @@ -78,69 +73,11 @@ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), - BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), - BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0xefefef), - BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), - BPF_LD_IMM64(BPF_REG_2, 0x2ad4d4aaULL), - BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, 0x2b), - BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL), BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL), - BPF_LD_IMM64(BPF_REG_5, 0xeeff0d413122ULL), - BPF_ALU32_REG(BPF_MUL, BPF_REG_5, BPF_REG_1), - BPF_JMP_REG(BPF_JEQ, BPF_REG_5, BPF_REG_0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 2, -}, -{ - "jit: various div tests", - .insns = { - BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL), - BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL), - BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), - BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), - BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL), - BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL), - BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL), - BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL), - BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), - BPF_LD_IMM64(BPF_REG_3, 0xbeefULL), - BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3), - BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL), - BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), - BPF_LD_IMM64(BPF_REG_3, 0x2bULL), - BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3), - BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2), + BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL), + BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2), BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), BPF_MOV64_IMM(BPF_REG_0, 2), diff --git a/tools/testing/selftests/bpf/verifier/lwt.c b/tools/testing/selftests/bpf/verifier/lwt.c index 5c8944d0b0..2cab6a3966 100644 --- a/tools/testing/selftests/bpf/verifier/lwt.c +++ b/tools/testing/selftests/bpf/verifier/lwt.c @@ -174,7 +174,7 @@ "check skb->tc_classid half load not permitted for lwt prog", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, tc_classid)), #else diff --git a/tools/testing/selftests/bpf/verifier/map_in_map.c b/tools/testing/selftests/bpf/verifier/map_in_map.c index 128a348b76..2798927ee9 100644 --- a/tools/testing/selftests/bpf/verifier/map_in_map.c +++ b/tools/testing/selftests/bpf/verifier/map_in_map.c @@ -18,40 +18,6 @@ .fixup_map_in_map = { 3 }, .result = ACCEPT, }, -{ - "map in map state pruning", - .insns = { - BPF_ST_MEM(0, BPF_REG_10, -4, 0), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -4), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 11), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_in_map = { 4, 14 }, - .flags = BPF_F_TEST_STATE_FREQ, - .result = VERBOSE_ACCEPT, - .errstr = "processed 25 insns", - .prog_type = BPF_PROG_TYPE_XDP, -}, { "invalid inner map pointer", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c index d8a9b1a1f9..471c1a5950 100644 --- a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c +++ b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c @@ -2,7 +2,7 @@ "check bpf_perf_event_data->sample_period byte load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else @@ -18,7 +18,7 @@ "check bpf_perf_event_data->sample_period half load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else @@ -34,7 +34,7 @@ "check bpf_perf_event_data->sample_period word load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c index 682519769f..7e50cb8087 100644 --- a/tools/testing/selftests/bpf/verifier/search_pruning.c +++ b/tools/testing/selftests/bpf/verifier/search_pruning.c @@ -132,77 +132,6 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, -{ - "precision tracking for u32 spill/fill", - .insns = { - BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), - BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), - BPF_MOV32_IMM(BPF_REG_6, 32), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_MOV32_IMM(BPF_REG_6, 4), - /* Additional insns to introduce a pruning point. */ - BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - /* u32 spill/fill */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_10, -8), - /* out-of-bound map value access for r6=32 */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 15 }, - .result = REJECT, - .errstr = "R0 min value is outside of the allowed memory range", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "precision tracking for u32 spills, u64 fill", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_MOV32_IMM(BPF_REG_7, 0xffffffff), - /* Additional insns to introduce a pruning point. */ - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_ALU32_IMM(BPF_DIV, BPF_REG_3, 0), - /* u32 spills, u64 fill */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, -8), - /* if r8 != X goto pc+1 r8 known in fallthrough branch */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0xffffffff, 1), - BPF_MOV64_IMM(BPF_REG_3, 1), - /* if r8 == X goto pc+1 condition always true on first - * traversal, so starts backtracking to mark r8 as requiring - * precision. r7 marked as needing precision. r6 not marked - * since it's not tracked. - */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 0xffffffff, 1), - /* fails if r8 correctly marked unknown after fill. */ - BPF_ALU32_IMM(BPF_DIV, BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "div by zero", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, { "allocated_stack", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 8cfc5349d2..0b943897aa 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -58,34 +58,6 @@ .result = ACCEPT, .result_unpriv = ACCEPT, }, -{ - "check with invalid reg offset 0", - .insns = { - /* reserve 8 byte ringbuf memory */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), - /* store a pointer to the reserved memory in R6 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - /* add invalid offset to memory or NULL */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - /* check whether the reservation was successful */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - /* should not be able to access *(R7) = 0 */ - BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0), - /* submit the reserved ringbuf memory */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_ringbuf = { 1 }, - .result = REJECT, - .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited", -}, { "check corrupted spill/fill", .insns = { @@ -132,213 +104,3 @@ .result = ACCEPT, .retval = POINTER_VALUE, }, -{ - "Spill and refill a u32 const scalar. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - /* r4 = 20 */ - BPF_MOV32_IMM(BPF_REG_4, 20), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = *(u32 *)(r10 -8) */ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8), - /* r0 = r2 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv20 */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=inv20 */ - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - /* r0 = *(u32 *)r2 R0=pkt,off=20,r=20 R2=pkt,r=20 R3=pkt_end R4=inv20 */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill a u32 const, refill from another half of the uninit u32 from the stack", - .insns = { - /* r4 = 20 */ - BPF_MOV32_IMM(BPF_REG_4, 20), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = *(u32 *)(r10 -4) fp-8=????rrrr*/ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid read from stack off -4+0 size 4", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill a u32 const scalar. Refill as u16. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - /* r4 = 20 */ - BPF_MOV32_IMM(BPF_REG_4, 20), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = *(u16 *)(r10 -8) */ - BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8), - /* r0 = r2 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */ - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill u32 const scalars. Refill as u64. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - /* r6 = 0 */ - BPF_MOV32_IMM(BPF_REG_6, 0), - /* r7 = 20 */ - BPF_MOV32_IMM(BPF_REG_7, 20), - /* *(u32 *)(r10 -4) = r6 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4), - /* *(u32 *)(r10 -8) = r7 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8), - /* r4 = *(u64 *)(r10 -8) */ - BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8), - /* r0 = r2 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */ - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - /* r4 = 20 */ - BPF_MOV32_IMM(BPF_REG_4, 20), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = *(u16 *)(r10 -6) */ - BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -6), - /* r0 = r2 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */ - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill and refill a u32 const scalar at non 8byte aligned stack addr. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - /* r4 = 20 */ - BPF_MOV32_IMM(BPF_REG_4, 20), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* *(u32 *)(r10 -4) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -4), - /* r4 = *(u32 *)(r10 -4), */ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -4), - /* r0 = r2 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=U32_MAX */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - /* if (r0 > r3) R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4=inv */ - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - /* r0 = *(u32 *)r2 R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4=inv */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill and refill a umax=40 bounded scalar. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, - offsetof(struct __sk_buff, tstamp)), - BPF_JMP_IMM(BPF_JLE, BPF_REG_4, 40, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - /* *(u32 *)(r10 -8) = r4 R4=inv,umax=40 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = (*u32 *)(r10 - 8) */ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8), - /* r2 += r4 R2=pkt R4=inv,umax=40 */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_4), - /* r0 = r2 R2=pkt,umax=40 R4=inv,umax=40 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r2 += 20 R0=pkt,umax=40 R2=pkt,umax=40 */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 20), - /* if (r2 > r3) R0=pkt,umax=40 R2=pkt,off=20,umax=40 */ - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_3, 1), - /* r0 = *(u32 *)r0 R0=pkt,r=20,umax=40 R2=pkt,off=20,r=20,umax=40 */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill a u32 scalar at fp-4 and then at fp-8", - .insns = { - /* r4 = 4321 */ - BPF_MOV32_IMM(BPF_REG_4, 4321), - /* *(u32 *)(r10 -4) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -4), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = *(u64 *)(r10 -8) */ - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 359f3e8f8b..4d347bc53a 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -1078,7 +1078,7 @@ .errstr_unpriv = "R0 pointer -= pointer prohibited", }, { - "map access: trying to leak tainted dst reg", + "map access: trying to leak tained dst reg", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index b3afd43549..8889b3f552 100644 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -4,35 +4,18 @@ set -u set -e -# This script currently only works for x86_64 and s390x, as -# it is based on the VM image used by the BPF CI, which is -# available only for these architectures. -ARCH="$(uname -m)" -case "${ARCH}" in -s390x) - QEMU_BINARY=qemu-system-s390x - QEMU_CONSOLE="ttyS1" - QEMU_FLAGS=(-smp 2) - BZIMAGE="arch/s390/boot/compressed/vmlinux" - ;; -x86_64) - QEMU_BINARY=qemu-system-x86_64 - QEMU_CONSOLE="ttyS0,115200" - QEMU_FLAGS=(-cpu host -smp 8) - BZIMAGE="arch/x86/boot/bzImage" - ;; -*) - echo "Unsupported architecture" - exit 1 - ;; -esac +# This script currently only works for x86_64, as +# it is based on the VM image used by the BPF CI which is +# x86_64. +QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}" +X86_BZIMAGE="arch/x86/boot/bzImage" DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/config-latest.${ARCH}" -KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/config-latest.${ARCH}" -INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config" +KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config" +INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" LOG_FILE="${LOG_FILE_BASE}.log" @@ -102,7 +85,7 @@ newest_rootfs_version() { { for file in "${!URLS[@]}"; do - if [[ $file =~ ^"${ARCH}"/libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then + if [[ $file =~ ^libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then echo "${BASH_REMATCH[1]}" fi done @@ -119,7 +102,7 @@ download_rootfs() exit 1 fi - download "${ARCH}/libbpf-vmtest-rootfs-$rootfsversion.tar.zst" | + download "libbpf-vmtest-rootfs-$rootfsversion.tar.zst" | zstd -d | sudo tar -C "$dir" -x } @@ -241,12 +224,13 @@ EOF -nodefaults \ -display none \ -serial mon:stdio \ - "${qemu_flags[@]}" \ + -cpu kvm64 \ -enable-kvm \ - -m 4G \ + -smp 4 \ + -m 2G \ -drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \ -kernel "${kernel_bzimage}" \ - -append "root=/dev/vda rw console=${QEMU_CONSOLE}" + -append "root=/dev/vda rw console=ttyS0,115200" } copy_logs() @@ -298,7 +282,7 @@ main() local kernel_checkout=$(realpath "${script_dir}"/../../../../) # By default the script searches for the kernel in the checkout directory but # it also obeys environment variables O= and KBUILD_OUTPUT= - local kernel_bzimage="${kernel_checkout}/${BZIMAGE}" + local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}" local command="${DEFAULT_COMMAND}" local update_image="no" local exit_command="poweroff -f" @@ -353,13 +337,13 @@ main() if is_rel_path "${O}"; then O="$(realpath "${PWD}/${O}")" fi - kernel_bzimage="${O}/${BZIMAGE}" + kernel_bzimage="${O}/${X86_BZIMAGE}" make_command="${make_command} O=${O}" elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then if is_rel_path "${KBUILD_OUTPUT}"; then KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")" fi - kernel_bzimage="${KBUILD_OUTPUT}/${BZIMAGE}" + kernel_bzimage="${KBUILD_OUTPUT}/${X86_BZIMAGE}" make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}" fi diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c index 51c8224b4c..f5ffba341c 100644 --- a/tools/testing/selftests/bpf/xdp_redirect_multi.c +++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c @@ -85,7 +85,10 @@ int main(int argc, char **argv) { int prog_fd, group_all, mac_map; struct bpf_program *ingress_prog, *egress_prog; - int i, err, ret, opt, egress_prog_fd = 0; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_UNSPEC, + }; + int i, ret, opt, egress_prog_fd = 0; struct bpf_devmap_val devmap_val; bool attach_egress_prog = false; unsigned char mac_addr[6]; @@ -144,14 +147,10 @@ int main(int argc, char **argv) printf("\n"); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - obj = bpf_object__open_file(filename, NULL); - err = libbpf_get_error(obj); - if (err) + prog_load_attr.file = filename; + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) goto err_out; - err = bpf_object__load(obj); - if (err) - goto err_out; - prog_fd = bpf_program__fd(bpf_object__next_program(obj, NULL)); if (attach_egress_prog) group_all = bpf_object__find_map_fd_by_name(obj, "map_egress"); diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c index baa870a759..842d9155d3 100644 --- a/tools/testing/selftests/bpf/xdping.c +++ b/tools/testing/selftests/bpf/xdping.c @@ -22,7 +22,6 @@ #include "bpf/libbpf.h" #include "xdping.h" -#include "testing_helpers.h" static int ifindex; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; @@ -174,13 +173,14 @@ int main(int argc, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - if (bpf_prog_test_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) { + if (bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) { fprintf(stderr, "load of %s failed\n", filename); return 1; } - main_prog = bpf_object__find_program_by_name(obj, - server ? "xdping_server" : "xdping_client"); + main_prog = bpf_object__find_program_by_title(obj, + server ? "xdpserver" : + "xdpclient"); if (main_prog) prog_fd = bpf_program__fd(main_prog); if (!main_prog || prog_fd < 0) { @@ -188,7 +188,7 @@ int main(int argc, char **argv) return 1; } - map = bpf_object__next_map(obj, NULL); + map = bpf_map__next(NULL, obj); if (map) map_fd = bpf_map__fd(map); if (!map || map_fd < 0) { diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 0a5d23da48..f53ce2683f 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -19,7 +19,7 @@ * Virtual Ethernet interfaces. * * For each mode, the following tests are run: - * a. nopoll - soft-irq processing in run-to-completion mode + * a. nopoll - soft-irq processing * b. poll - using poll() syscall * c. Socket Teardown * Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy @@ -45,10 +45,6 @@ * Configure sockets at indexes 0 and 1, run a traffic on queue ids 0, * then remove xsk sockets from queue 0 on both veth interfaces and * finally run a traffic on queues ids 1 - * g. unaligned mode - * h. tests for invalid and corner case Tx descriptors so that the correct ones - * are discarded and let through, respectively. - * i. 2K frame size tests * * Total tests: 12 * @@ -100,12 +96,6 @@ #include "xdpxceiver.h" #include "../kselftest.h" -/* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf. - * Until xdpxceiver is either moved or re-writed into libxdp, suppress - * deprecation warnings in this file - */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; static const char *IP1 = "192.168.100.162"; @@ -122,10 +112,13 @@ static void __exit_with_error(int error, const char *file, const char *func, int #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) -#define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV" - -#define print_ksft_result(test) \ - (ksft_test_result_pass("PASS: %s %s\n", mode_string(test), (test)->name)) +#define print_ksft_result(void)\ + (ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\ + test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\ + test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\ + test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\ + test_type == TEST_TYPE_STATS ? "Stats" : "",\ + test_type == TEST_TYPE_BPF_RES ? "BPF RES" : "")) static void memset32_htonl(void *dest, u32 val, u32 size) { @@ -242,46 +235,80 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); } -static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size) +static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size, int idx) { struct xsk_umem_config cfg = { .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, - .frame_size = umem->frame_size, - .frame_headroom = umem->frame_headroom, + .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, + .frame_headroom = frame_headroom, .flags = XSK_UMEM__DEFAULT_FLAGS }; + struct xsk_umem_info *umem; int ret; - if (umem->unaligned_mode) - cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; + umem = calloc(1, sizeof(struct xsk_umem_info)); + if (!umem) + exit_with_error(errno); ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, &cfg); if (ret) - return ret; + exit_with_error(-ret); umem->buffer = buffer; - return 0; + + data->umem_arr[idx] = umem; } -static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, - struct ifobject *ifobject, u32 qid) +static void xsk_populate_fill_ring(struct xsk_umem_info *umem) +{ + int ret, i; + u32 idx = 0; + + ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx); + if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) + exit_with_error(-ret); + for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE; + xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); +} + +static int xsk_configure_socket(struct ifobject *ifobject, int idx) { struct xsk_socket_config cfg; + struct xsk_socket_info *xsk; struct xsk_ring_cons *rxr; struct xsk_ring_prod *txr; + int ret; - xsk->umem = umem; - cfg.rx_size = xsk->rxqsize; + xsk = calloc(1, sizeof(struct xsk_socket_info)); + if (!xsk) + exit_with_error(errno); + + xsk->umem = ifobject->umem; + cfg.rx_size = rxqsize; cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; cfg.libbpf_flags = 0; - cfg.xdp_flags = ifobject->xdp_flags; - cfg.bind_flags = ifobject->bind_flags; + cfg.xdp_flags = xdp_flags; + cfg.bind_flags = xdp_bind_flags; - txr = ifobject->tx_on ? &xsk->tx : NULL; - rxr = ifobject->rx_on ? &xsk->rx : NULL; - return xsk_socket__create(&xsk->xsk, ifobject->ifname, qid, umem->umem, rxr, txr, &cfg); + if (test_type != TEST_TYPE_BIDI) { + rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL; + txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL; + } else { + rxr = &xsk->rx; + txr = &xsk->tx; + } + + ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx, + ifobject->umem->umem, rxr, txr, &cfg); + if (ret) + return 1; + + ifobject->xsk_arr[idx] = xsk; + + return 0; } static struct option long_options[] = { @@ -327,44 +354,45 @@ static int switch_namespace(const char *nsname) return nsfd; } -static bool validate_interface(struct ifobject *ifobj) +static int validate_interfaces(void) { - if (!strcmp(ifobj->ifname, "")) - return false; - return true; + bool ret = true; + + for (int i = 0; i < MAX_INTERFACES; i++) { + if (!strcmp(ifdict[i]->ifname, "")) { + ret = false; + ksft_test_result_fail("ERROR: interfaces: -i , -i ,."); + } + } + return ret; } -static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx, int argc, - char **argv) +static void parse_command_line(int argc, char **argv) { - struct ifobject *ifobj; - u32 interface_nb = 0; - int option_index, c; + int option_index, interface_index = 0, c; opterr = 0; for (;;) { - char *sptr, *token; - c = getopt_long(argc, argv, "i:Dv", long_options, &option_index); + if (c == -1) break; switch (c) { case 'i': - if (interface_nb == 0) - ifobj = ifobj_tx; - else if (interface_nb == 1) - ifobj = ifobj_rx; - else + if (interface_index == MAX_INTERFACES) break; + char *sptr, *token; sptr = strndupa(optarg, strlen(optarg)); - memcpy(ifobj->ifname, strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS); + memcpy(ifdict[interface_index]->ifname, + strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS); token = strsep(&sptr, ","); if (token) - memcpy(ifobj->nsname, token, MAX_INTERFACES_NAMESPACE_CHARS); - interface_nb++; + memcpy(ifdict[interface_index]->nsname, token, + MAX_INTERFACES_NAMESPACE_CHARS); + interface_index++; break; case 'D': opt_pkt_dump = true; @@ -377,85 +405,11 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj ksft_exit_xfail(); } } -} -static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, - struct ifobject *ifobj_rx) -{ - u32 i, j; - - for (i = 0; i < MAX_INTERFACES; i++) { - struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; - - ifobj->umem = &ifobj->umem_arr[0]; - ifobj->xsk = &ifobj->xsk_arr[0]; - ifobj->use_poll = false; - ifobj->pacing_on = true; - ifobj->pkt_stream = test->pkt_stream_default; - - if (i == 0) { - ifobj->rx_on = false; - ifobj->tx_on = true; - } else { - ifobj->rx_on = true; - ifobj->tx_on = false; - } - - for (j = 0; j < MAX_SOCKETS; j++) { - memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j])); - memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); - ifobj->umem_arr[j].num_frames = DEFAULT_UMEM_BUFFERS; - ifobj->umem_arr[j].frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; - } + if (!validate_interfaces()) { + usage(basename(argv[0])); + ksft_exit_xfail(); } - - test->ifobj_tx = ifobj_tx; - test->ifobj_rx = ifobj_rx; - test->current_step = 0; - test->total_steps = 1; - test->nb_sockets = 1; -} - -static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, - struct ifobject *ifobj_rx, enum test_mode mode) -{ - struct pkt_stream *pkt_stream; - u32 i; - - pkt_stream = test->pkt_stream_default; - memset(test, 0, sizeof(*test)); - test->pkt_stream_default = pkt_stream; - - for (i = 0; i < MAX_INTERFACES; i++) { - struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; - - ifobj->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; - if (mode == TEST_MODE_SKB) - ifobj->xdp_flags |= XDP_FLAGS_SKB_MODE; - else - ifobj->xdp_flags |= XDP_FLAGS_DRV_MODE; - - ifobj->bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY; - } - - __test_spec_init(test, ifobj_tx, ifobj_rx); -} - -static void test_spec_reset(struct test_spec *test) -{ - __test_spec_init(test, test->ifobj_tx, test->ifobj_rx); -} - -static void test_spec_set_name(struct test_spec *test, const char *name) -{ - strncpy(test->name, name, MAX_TEST_NAME_SIZE); -} - -static void pkt_stream_reset(struct pkt_stream *pkt_stream) -{ - if (pkt_stream) - pkt_stream->rx_pkt_nb = 0; } static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) @@ -466,104 +420,29 @@ static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) return &pkt_stream->pkts[pkt_nb]; } -static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream) -{ - while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) { - if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid) - return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++]; - pkt_stream->rx_pkt_nb++; - } - return NULL; -} - -static void pkt_stream_delete(struct pkt_stream *pkt_stream) -{ - free(pkt_stream->pkts); - free(pkt_stream); -} - -static void pkt_stream_restore_default(struct test_spec *test) -{ - if (test->ifobj_tx->pkt_stream != test->pkt_stream_default) { - pkt_stream_delete(test->ifobj_tx->pkt_stream); - test->ifobj_tx->pkt_stream = test->pkt_stream_default; - } - test->ifobj_rx->pkt_stream = test->pkt_stream_default; -} - -static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) -{ - struct pkt_stream *pkt_stream; - - pkt_stream = calloc(1, sizeof(*pkt_stream)); - if (!pkt_stream) - return NULL; - - pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts)); - if (!pkt_stream->pkts) { - free(pkt_stream); - return NULL; - } - - pkt_stream->nb_pkts = nb_pkts; - return pkt_stream; -} - -static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) +static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len) { struct pkt_stream *pkt_stream; u32 i; - pkt_stream = __pkt_stream_alloc(nb_pkts); + pkt_stream = malloc(sizeof(*pkt_stream)); if (!pkt_stream) exit_with_error(ENOMEM); + pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts)); + if (!pkt_stream->pkts) + exit_with_error(ENOMEM); + pkt_stream->nb_pkts = nb_pkts; for (i = 0; i < nb_pkts; i++) { - pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size; + pkt_stream->pkts[i].addr = (i % num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE; pkt_stream->pkts[i].len = pkt_len; pkt_stream->pkts[i].payload = i; - - if (pkt_len > umem->frame_size) - pkt_stream->pkts[i].valid = false; - else - pkt_stream->pkts[i].valid = true; } return pkt_stream; } -static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem, - struct pkt_stream *pkt_stream) -{ - return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len); -} - -static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) -{ - struct pkt_stream *pkt_stream; - - pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len); - test->ifobj_tx->pkt_stream = pkt_stream; - test->ifobj_rx->pkt_stream = pkt_stream; -} - -static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) -{ - struct xsk_umem_info *umem = test->ifobj_tx->umem; - struct pkt_stream *pkt_stream; - u32 i; - - pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default); - for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) { - pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size + offset; - pkt_stream->pkts[i].len = pkt_len; - } - - test->ifobj_tx->pkt_stream = pkt_stream; - test->ifobj_rx->pkt_stream = pkt_stream; -} - static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) { struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb); @@ -574,8 +453,6 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) if (!pkt) return NULL; - if (!pkt->valid || pkt->len < PKT_SIZE) - return pkt; data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr); udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr)); @@ -590,26 +467,6 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) return pkt; } -static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts) -{ - struct pkt_stream *pkt_stream; - u32 i; - - pkt_stream = __pkt_stream_alloc(nb_pkts); - if (!pkt_stream) - exit_with_error(ENOMEM); - - test->ifobj_tx->pkt_stream = pkt_stream; - test->ifobj_rx->pkt_stream = pkt_stream; - - for (i = 0; i < nb_pkts; i++) { - pkt_stream->pkts[i].addr = pkts[i].addr; - pkt_stream->pkts[i].len = pkts[i].len; - pkt_stream->pkts[i].payload = i; - pkt_stream->pkts[i].valid = pkts[i].valid; - } -} - static void pkt_dump(void *pkt, u32 len) { char s[INET_ADDRSTRLEN]; @@ -647,28 +504,9 @@ static void pkt_dump(void *pkt, u32 len) fprintf(stdout, "---------------------------------------\n"); } -static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr, - u64 pkt_stream_addr) +static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *desc) { - u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; - u32 offset = addr % umem->frame_size, expected_offset = 0; - - if (!pkt_stream->use_addr_for_fill) - pkt_stream_addr = 0; - - expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; - - if (offset == expected_offset) - return true; - - ksft_test_result_fail("ERROR: [%s] expected [%u], got [%u]\n", __func__, expected_offset, - offset); - return false; -} - -static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) -{ - void *data = xsk_umem__get_data(buffer, addr); + void *data = xsk_umem__get_data(buffer, desc->addr); struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr)); if (!pkt) { @@ -676,24 +514,19 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) return false; } - if (len < PKT_SIZE) { - /*Do not try to verify packets that are smaller than minimum size. */ - return true; - } - - if (pkt->len != len) { - ksft_test_result_fail - ("ERROR: [%s] expected length [%d], got length [%d]\n", - __func__, pkt->len, len); - return false; - } - if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); - if (opt_pkt_dump) + if (opt_pkt_dump && test_type != TEST_TYPE_STATS) pkt_dump(data, PKT_SIZE); + if (pkt->len != desc->len) { + ksft_test_result_fail + ("ERROR: [%s] expected length [%d], got length [%d]\n", + __func__, pkt->len, desc->len); + return false; + } + if (pkt->payload != seqnum) { ksft_test_result_fail ("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n", @@ -725,20 +558,14 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size) unsigned int rcvd; u32 idx; + if (!xsk->outstanding_tx) + return; + if (xsk_ring_prod__needs_wakeup(&xsk->tx)) kick_tx(xsk); rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); if (rcvd) { - if (rcvd > xsk->outstanding_tx) { - u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); - - ksft_test_result_fail("ERROR: [%s] Too many packets completed\n", - __func__); - ksft_print_msg("Last completion address: %llx\n", addr); - return; - } - xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; } @@ -747,15 +574,15 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size) static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk, struct pollfd *fds) { - struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream); - struct xsk_umem_info *umem = xsk->umem; - u32 idx_rx = 0, idx_fq = 0, rcvd, i; + u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkt_count = 0; + struct pkt *pkt; int ret; + pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++); while (pkt) { rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); if (!rcvd) { - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { ret = poll(fds, 1, POLL_TMOUT); if (ret < 0) exit_with_error(-ret); @@ -763,57 +590,40 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info * continue; } - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { ret = poll(fds, 1, POLL_TMOUT); if (ret < 0) exit_with_error(-ret); } - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); } for (i = 0; i < rcvd; i++) { const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); u64 addr = desc->addr, orig; - if (!pkt) { - ksft_test_result_fail("ERROR: [%s] Received too many packets.\n", - __func__); - ksft_print_msg("Last packet has addr: %llx len: %u\n", - addr, desc->len); - return; - } - orig = xsk_umem__extract_addr(addr); addr = xsk_umem__add_offset_to_addr(addr); - - if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len)) - return; - if (!is_offset_correct(umem, pkt_stream, addr, pkt->addr)) + if (!is_pkt_valid(pkt, xsk->umem->buffer, desc)) return; - *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; - pkt = pkt_stream_get_next_rx_pkt(pkt_stream); + *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; + pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++); } - xsk_ring_prod__submit(&umem->fq, rcvd); + xsk_ring_prod__submit(&xsk->umem->fq, rcvd); xsk_ring_cons__release(&xsk->rx, rcvd); - - pthread_mutex_lock(&pacing_mutex); - pkts_in_flight -= rcvd; - if (pkts_in_flight < umem->num_frames) - pthread_cond_signal(&pacing_cond); - pthread_mutex_unlock(&pacing_mutex); } } static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb) { struct xsk_socket_info *xsk = ifobject->xsk; - u32 i, idx, valid_pkts = 0; + u32 i, idx; while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) complete_pkts(xsk, BATCH_SIZE); @@ -828,23 +638,15 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb) tx_desc->addr = pkt->addr; tx_desc->len = pkt->len; pkt_nb++; - if (pkt->valid) - valid_pkts++; } - pthread_mutex_lock(&pacing_mutex); - pkts_in_flight += valid_pkts; - if (ifobject->pacing_on && pkts_in_flight >= ifobject->umem->num_frames - BATCH_SIZE) { - kick_tx(xsk); - pthread_cond_wait(&pacing_cond, &pacing_mutex); - } - pthread_mutex_unlock(&pacing_mutex); - xsk_ring_prod__submit(&xsk->tx, i); - xsk->outstanding_tx += valid_pkts; + if (stat_test_type != STAT_TEST_TX_INVALID) + xsk->outstanding_tx += i; + else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) + kick_tx(xsk); complete_pkts(xsk, i); - usleep(10); return i; } @@ -856,25 +658,29 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk) static void send_pkts(struct ifobject *ifobject) { - struct pollfd fds = { }; + struct pollfd fds[MAX_SOCKS] = { }; u32 pkt_cnt = 0; - fds.fd = xsk_socket__fd(ifobject->xsk->xsk); - fds.events = POLLOUT; + fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); + fds[0].events = POLLOUT; while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { - if (ifobject->use_poll) { + u32 sent; + + if (test_type == TEST_TYPE_POLL) { int ret; - ret = poll(&fds, 1, POLL_TMOUT); + ret = poll(fds, 1, POLL_TMOUT); if (ret <= 0) continue; - if (!(fds.revents & POLLOUT)) + if (!(fds[0].revents & POLLOUT)) continue; } - pkt_cnt += __send_pkts(ifobject, pkt_cnt); + sent = __send_pkts(ifobject, pkt_cnt); + pkt_cnt += sent; + usleep(10); } wait_for_tx_completion(ifobject->xsk); @@ -892,7 +698,7 @@ static bool rx_stats_are_valid(struct ifobject *ifobject) optlen = sizeof(stats); err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); if (err) { - ksft_test_result_fail("ERROR Rx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", + ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", __func__, -err, strerror(-err)); return true; } @@ -933,7 +739,7 @@ static void tx_stats_validate(struct ifobject *ifobject) optlen = sizeof(stats); err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); if (err) { - ksft_test_result_fail("ERROR Tx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", + ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", __func__, -err, strerror(-err)); return; } @@ -945,62 +751,71 @@ static void tx_stats_validate(struct ifobject *ifobject) __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts); } -static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) +static void thread_common_ops(struct ifobject *ifobject, void *bufs) { + u64 umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE; int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; - u32 i; + size_t mmap_sz = umem_sz; + int ctr = 0; + int ret; ifobject->ns_fd = switch_namespace(ifobject->nsname); - if (ifobject->umem->unaligned_mode) - mmap_flags |= MAP_HUGETLB; + if (test_type == TEST_TYPE_BPF_RES) + mmap_sz *= 2; - for (i = 0; i < test->nb_sockets; i++) { - u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; - u32 ctr = 0; - void *bufs; - int ret; + bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + if (bufs == MAP_FAILED) + exit_with_error(errno); - bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); - if (bufs == MAP_FAILED) - exit_with_error(errno); + while (ctr++ < SOCK_RECONF_CTR) { + xsk_configure_umem(ifobject, bufs, umem_sz, 0); + ifobject->umem = ifobject->umem_arr[0]; + ret = xsk_configure_socket(ifobject, 0); + if (!ret) + break; - ret = xsk_configure_umem(&ifobject->umem_arr[i], bufs, umem_sz); - if (ret) + /* Retry Create Socket if it fails as xsk_socket__create() is asynchronous */ + usleep(USLEEP_MAX); + if (ctr >= SOCK_RECONF_CTR) exit_with_error(-ret); - - while (ctr++ < SOCK_RECONF_CTR) { - ret = xsk_configure_socket(&ifobject->xsk_arr[i], &ifobject->umem_arr[i], - ifobject, i); - if (!ret) - break; - - /* Retry if it fails as xsk_socket__create() is asynchronous */ - if (ctr >= SOCK_RECONF_CTR) - exit_with_error(-ret); - usleep(USLEEP_MAX); - } } - ifobject->umem = &ifobject->umem_arr[0]; - ifobject->xsk = &ifobject->xsk_arr[0]; + ifobject->umem = ifobject->umem_arr[0]; + ifobject->xsk = ifobject->xsk_arr[0]; + + if (test_type == TEST_TYPE_BPF_RES) { + xsk_configure_umem(ifobject, (u8 *)bufs + umem_sz, umem_sz, 1); + ifobject->umem = ifobject->umem_arr[1]; + ret = xsk_configure_socket(ifobject, 1); + } + + ifobject->umem = ifobject->umem_arr[0]; + ifobject->xsk = ifobject->xsk_arr[0]; + print_verbose("Interface [%s] vector [%s]\n", + ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx"); +} + +static bool testapp_is_test_two_stepped(void) +{ + return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step; } static void testapp_cleanup_xsk_res(struct ifobject *ifobj) { - print_verbose("Destroying socket\n"); - xsk_socket__delete(ifobj->xsk->xsk); - munmap(ifobj->umem->buffer, ifobj->umem->num_frames * ifobj->umem->frame_size); - xsk_umem__delete(ifobj->umem->umem); + if (testapp_is_test_two_stepped()) { + xsk_socket__delete(ifobj->xsk->xsk); + (void)xsk_umem__delete(ifobj->umem->umem); + } } static void *worker_testapp_validate_tx(void *arg) { - struct test_spec *test = (struct test_spec *)arg; - struct ifobject *ifobject = test->ifobj_tx; + struct ifobject *ifobject = (struct ifobject *)arg; + void *bufs = NULL; - if (test->current_step == 1) - thread_common_ops(test, ifobject); + if (!second_step) + thread_common_ops(ifobject, bufs); print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts, ifobject->ifname); @@ -1009,55 +824,24 @@ static void *worker_testapp_validate_tx(void *arg) if (stat_test_type == STAT_TEST_TX_INVALID) tx_stats_validate(ifobject); - if (test->total_steps == test->current_step) - testapp_cleanup_xsk_res(ifobject); + testapp_cleanup_xsk_res(ifobject); pthread_exit(NULL); } -static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream) -{ - u32 idx = 0, i, buffers_to_fill; - int ret; - - if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) - buffers_to_fill = umem->num_frames; - else - buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS; - - ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); - if (ret != buffers_to_fill) - exit_with_error(ENOSPC); - for (i = 0; i < buffers_to_fill; i++) { - u64 addr; - - if (pkt_stream->use_addr_for_fill) { - struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i); - - if (!pkt) - break; - addr = pkt->addr; - } else { - addr = i * umem->frame_size; - } - - *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; - } - xsk_ring_prod__submit(&umem->fq, buffers_to_fill); -} - static void *worker_testapp_validate_rx(void *arg) { - struct test_spec *test = (struct test_spec *)arg; - struct ifobject *ifobject = test->ifobj_rx; - struct pollfd fds = { }; + struct ifobject *ifobject = (struct ifobject *)arg; + struct pollfd fds[MAX_SOCKS] = { }; + void *bufs = NULL; - if (test->current_step == 1) - thread_common_ops(test, ifobject); + if (!second_step) + thread_common_ops(ifobject, bufs); - xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream); + if (stat_test_type != STAT_TEST_RX_FILL_EMPTY) + xsk_populate_fill_ring(ifobject->umem); - fds.fd = xsk_socket__fd(ifobject->xsk->xsk); - fds.events = POLLIN; + fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); + fds[0].events = POLLIN; pthread_barrier_wait(&barr); @@ -1065,239 +849,151 @@ static void *worker_testapp_validate_rx(void *arg) while (!rx_stats_are_valid(ifobject)) continue; else - receive_pkts(ifobject->pkt_stream, ifobject->xsk, &fds); + receive_pkts(ifobject->pkt_stream, ifobject->xsk, fds); - if (test->total_steps == test->current_step) - testapp_cleanup_xsk_res(ifobject); + if (test_type == TEST_TYPE_TEARDOWN) + print_verbose("Destroying socket\n"); + + testapp_cleanup_xsk_res(ifobject); pthread_exit(NULL); } -static void testapp_validate_traffic(struct test_spec *test) +static void testapp_validate(void) { - struct ifobject *ifobj_tx = test->ifobj_tx; - struct ifobject *ifobj_rx = test->ifobj_rx; - pthread_t t0, t1; + bool bidi = test_type == TEST_TYPE_BIDI; + bool bpf = test_type == TEST_TYPE_BPF_RES; + struct pkt_stream *pkt_stream; if (pthread_barrier_init(&barr, NULL, 2)) exit_with_error(errno); - test->current_step++; - pkt_stream_reset(ifobj_rx->pkt_stream); - pkts_in_flight = 0; + if (stat_test_type == STAT_TEST_TX_INVALID) + pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE); + else + pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, PKT_SIZE); + ifdict_tx->pkt_stream = pkt_stream; + ifdict_rx->pkt_stream = pkt_stream; /*Spawn RX thread */ - pthread_create(&t0, NULL, ifobj_rx->func_ptr, test); + pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx); pthread_barrier_wait(&barr); if (pthread_barrier_destroy(&barr)) exit_with_error(errno); /*Spawn TX thread */ - pthread_create(&t1, NULL, ifobj_tx->func_ptr, test); + pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx); pthread_join(t1, NULL); pthread_join(t0, NULL); + + if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS)) + print_ksft_result(); } -static void testapp_teardown(struct test_spec *test) +static void testapp_teardown(void) { int i; - test_spec_set_name(test, "TEARDOWN"); for (i = 0; i < MAX_TEARDOWN_ITER; i++) { - testapp_validate_traffic(test); - test_spec_reset(test); + print_verbose("Creating socket\n"); + testapp_validate(); } + + print_ksft_result(); } -static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) +static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2) { - thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr; - struct ifobject *tmp_ifobj = (*ifobj1); + void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr; + enum fvector tmp_vector = ifobj1->fv.vector; - (*ifobj1)->func_ptr = (*ifobj2)->func_ptr; - (*ifobj2)->func_ptr = tmp_func_ptr; + ifobj1->func_ptr = ifobj2->func_ptr; + ifobj1->fv.vector = ifobj2->fv.vector; - *ifobj1 = *ifobj2; - *ifobj2 = tmp_ifobj; + ifobj2->func_ptr = tmp_func_ptr; + ifobj2->fv.vector = tmp_vector; + + ifdict_tx = ifobj1; + ifdict_rx = ifobj2; } -static void testapp_bidi(struct test_spec *test) +static void testapp_bidi(void) { - test_spec_set_name(test, "BIDIRECTIONAL"); - test->ifobj_tx->rx_on = true; - test->ifobj_rx->tx_on = true; - test->total_steps = 2; - testapp_validate_traffic(test); + for (int i = 0; i < MAX_BIDI_ITER; i++) { + print_verbose("Creating socket\n"); + testapp_validate(); + if (!second_step) { + print_verbose("Switching Tx/Rx vectors\n"); + swap_vectors(ifdict[1], ifdict[0]); + } + second_step = true; + } - print_verbose("Switching Tx/Rx vectors\n"); - swap_directions(&test->ifobj_rx, &test->ifobj_tx); - testapp_validate_traffic(test); + swap_vectors(ifdict[0], ifdict[1]); - swap_directions(&test->ifobj_rx, &test->ifobj_tx); + print_ksft_result(); } -static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) +static void swap_xsk_res(void) { - xsk_socket__delete(ifobj_tx->xsk->xsk); - xsk_umem__delete(ifobj_tx->umem->umem); - xsk_socket__delete(ifobj_rx->xsk->xsk); - xsk_umem__delete(ifobj_rx->umem->umem); - ifobj_tx->umem = &ifobj_tx->umem_arr[1]; - ifobj_tx->xsk = &ifobj_tx->xsk_arr[1]; - ifobj_rx->umem = &ifobj_rx->umem_arr[1]; - ifobj_rx->xsk = &ifobj_rx->xsk_arr[1]; + xsk_socket__delete(ifdict_tx->xsk->xsk); + xsk_umem__delete(ifdict_tx->umem->umem); + xsk_socket__delete(ifdict_rx->xsk->xsk); + xsk_umem__delete(ifdict_rx->umem->umem); + ifdict_tx->umem = ifdict_tx->umem_arr[1]; + ifdict_tx->xsk = ifdict_tx->xsk_arr[1]; + ifdict_rx->umem = ifdict_rx->umem_arr[1]; + ifdict_rx->xsk = ifdict_rx->xsk_arr[1]; } -static void testapp_bpf_res(struct test_spec *test) -{ - test_spec_set_name(test, "BPF_RES"); - test->total_steps = 2; - test->nb_sockets = 2; - testapp_validate_traffic(test); - - swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); - testapp_validate_traffic(test); -} - -static void testapp_headroom(struct test_spec *test) -{ - test_spec_set_name(test, "UMEM_HEADROOM"); - test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; - testapp_validate_traffic(test); -} - -static void testapp_stats(struct test_spec *test) +static void testapp_bpf_res(void) { int i; - for (i = 0; i < STAT_TEST_TYPE_MAX; i++) { - test_spec_reset(test); + for (i = 0; i < MAX_BPF_ITER; i++) { + print_verbose("Creating socket\n"); + testapp_validate(); + if (!second_step) + swap_xsk_res(); + second_step = true; + } + + print_ksft_result(); +} + +static void testapp_stats(void) +{ + for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) { stat_test_type = i; - /* No or few packets will be received so cannot pace packets */ - test->ifobj_tx->pacing_on = false; + + /* reset defaults */ + rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; switch (stat_test_type) { case STAT_TEST_RX_DROPPED: - test_spec_set_name(test, "STAT_RX_DROPPED"); - test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - - XDP_PACKET_HEADROOM - 1; - testapp_validate_traffic(test); + frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE - + XDP_PACKET_HEADROOM - 1; break; case STAT_TEST_RX_FULL: - test_spec_set_name(test, "STAT_RX_FULL"); - test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE; - testapp_validate_traffic(test); + rxqsize = RX_FULL_RXQSIZE; break; case STAT_TEST_TX_INVALID: - test_spec_set_name(test, "STAT_TX_INVALID"); - pkt_stream_replace(test, DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE); - testapp_validate_traffic(test); - - pkt_stream_restore_default(test); - break; - case STAT_TEST_RX_FILL_EMPTY: - test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); - test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 0, - MIN_PKT_SIZE); - if (!test->ifobj_rx->pkt_stream) - exit_with_error(ENOMEM); - test->ifobj_rx->pkt_stream->use_addr_for_fill = true; - testapp_validate_traffic(test); - - pkt_stream_restore_default(test); - break; + continue; default: break; } + testapp_validate(); } - /* To only see the whole stat set being completed unless an individual test fails. */ - test_spec_set_name(test, "STATS"); + print_ksft_result(); } -/* Simple test */ -static bool hugepages_present(struct ifobject *ifobject) -{ - const size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size; - void *bufs; - - bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); - if (bufs == MAP_FAILED) - return false; - - munmap(bufs, mmap_sz); - return true; -} - -static bool testapp_unaligned(struct test_spec *test) -{ - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return false; - } - - test_spec_set_name(test, "UNALIGNED_MODE"); - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - /* Let half of the packets straddle a buffer boundrary */ - pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2); - test->ifobj_rx->pkt_stream->use_addr_for_fill = true; - testapp_validate_traffic(test); - - pkt_stream_restore_default(test); - return true; -} - -static void testapp_single_pkt(struct test_spec *test) -{ - struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}}; - - pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - testapp_validate_traffic(test); - pkt_stream_restore_default(test); -} - -static void testapp_invalid_desc(struct test_spec *test) -{ - struct pkt pkts[] = { - /* Zero packet length at address zero allowed */ - {0, 0, 0, true}, - /* Zero packet length allowed */ - {0x1000, 0, 0, true}, - /* Straddling the start of umem */ - {-2, PKT_SIZE, 0, false}, - /* Packet too large */ - {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, - /* After umem ends */ - {UMEM_SIZE, PKT_SIZE, 0, false}, - /* Straddle the end of umem */ - {UMEM_SIZE - PKT_SIZE / 2, PKT_SIZE, 0, false}, - /* Straddle a page boundrary */ - {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false}, - /* Straddle a 2K boundrary */ - {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true}, - /* Valid packet for synch so that something is received */ - {0x4000, PKT_SIZE, 0, true}}; - - if (test->ifobj_tx->umem->unaligned_mode) { - /* Crossing a page boundrary allowed */ - pkts[6].valid = true; - } - if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { - /* Crossing a 2K frame size boundrary not allowed */ - pkts[7].valid = false; - } - - pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - testapp_validate_traffic(test); - pkt_stream_restore_default(test); -} - -static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, - const char *dst_ip, const char *src_ip, const u16 dst_port, - const u16 src_port, thread_func_t func_ptr) +static void init_iface(struct ifobject *ifobj, const char *dst_mac, + const char *src_mac, const char *dst_ip, + const char *src_ip, const u16 dst_port, + const u16 src_port, enum fvector vector) { struct in_addr ip; @@ -1313,84 +1009,58 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char * ifobj->dst_port = dst_port; ifobj->src_port = src_port; - ifobj->func_ptr = func_ptr; + if (vector == tx) { + ifobj->fv.vector = tx; + ifobj->func_ptr = worker_testapp_validate_tx; + ifdict_tx = ifobj; + } else { + ifobj->fv.vector = rx; + ifobj->func_ptr = worker_testapp_validate_rx; + ifdict_rx = ifobj; + } } -static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) +static void run_pkt_test(int mode, int type) { test_type = type; /* reset defaults after potential previous test */ + xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; + second_step = 0; stat_test_type = -1; + rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; - switch (test_type) { - case TEST_TYPE_STATS: - testapp_stats(test); - break; - case TEST_TYPE_TEARDOWN: - testapp_teardown(test); - break; - case TEST_TYPE_BIDI: - testapp_bidi(test); - break; - case TEST_TYPE_BPF_RES: - testapp_bpf_res(test); - break; - case TEST_TYPE_RUN_TO_COMPLETION: - test_spec_set_name(test, "RUN_TO_COMPLETION"); - testapp_validate_traffic(test); - break; - case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT: - test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT"); - testapp_single_pkt(test); - break; - case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME: - test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); - test->ifobj_tx->umem->frame_size = 2048; - test->ifobj_rx->umem->frame_size = 2048; - pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); - testapp_validate_traffic(test); + configured_mode = mode; - pkt_stream_restore_default(test); + switch (mode) { + case (TEST_MODE_SKB): + xdp_flags |= XDP_FLAGS_SKB_MODE; break; - case TEST_TYPE_POLL: - test->ifobj_tx->use_poll = true; - test->ifobj_rx->use_poll = true; - test_spec_set_name(test, "POLL"); - testapp_validate_traffic(test); - break; - case TEST_TYPE_ALIGNED_INV_DESC: - test_spec_set_name(test, "ALIGNED_INV_DESC"); - testapp_invalid_desc(test); - break; - case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME: - test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE"); - test->ifobj_tx->umem->frame_size = 2048; - test->ifobj_rx->umem->frame_size = 2048; - testapp_invalid_desc(test); - break; - case TEST_TYPE_UNALIGNED_INV_DESC: - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return; - } - test_spec_set_name(test, "UNALIGNED_INV_DESC"); - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - testapp_invalid_desc(test); - break; - case TEST_TYPE_UNALIGNED: - if (!testapp_unaligned(test)) - return; - break; - case TEST_TYPE_HEADROOM: - testapp_headroom(test); + case (TEST_MODE_DRV): + xdp_flags |= XDP_FLAGS_DRV_MODE; break; default: break; } - print_ksft_result(test); + switch (test_type) { + case TEST_TYPE_STATS: + testapp_stats(); + break; + case TEST_TYPE_TEARDOWN: + testapp_teardown(); + break; + case TEST_TYPE_BIDI: + testapp_bidi(); + break; + case TEST_TYPE_BPF_RES: + testapp_bpf_res(); + break; + default: + testapp_validate(); + break; + } } static struct ifobject *ifobject_create(void) @@ -1401,11 +1071,11 @@ static struct ifobject *ifobject_create(void) if (!ifobj) return NULL; - ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr)); + ifobj->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *)); if (!ifobj->xsk_arr) goto out_xsk_arr; - ifobj->umem_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->umem_arr)); + ifobj->umem_arr = calloc(2, sizeof(struct xsk_umem_info *)); if (!ifobj->umem_arr) goto out_umem_arr; @@ -1428,53 +1098,34 @@ static void ifobject_delete(struct ifobject *ifobj) int main(int argc, char **argv) { struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY }; - struct pkt_stream *pkt_stream_default; - struct ifobject *ifobj_tx, *ifobj_rx; - struct test_spec test; - u32 i, j; + int i, j; if (setrlimit(RLIMIT_MEMLOCK, &_rlim)) exit_with_error(errno); - ifobj_tx = ifobject_create(); - if (!ifobj_tx) - exit_with_error(ENOMEM); - ifobj_rx = ifobject_create(); - if (!ifobj_rx) - exit_with_error(ENOMEM); + for (i = 0; i < MAX_INTERFACES; i++) { + ifdict[i] = ifobject_create(); + if (!ifdict[i]) + exit_with_error(ENOMEM); + } setlocale(LC_ALL, ""); - parse_command_line(ifobj_tx, ifobj_rx, argc, argv); + parse_command_line(argc, argv); - if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) { - usage(basename(argv[0])); - ksft_exit_xfail(); - } - - init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, - worker_testapp_validate_tx); - init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, - worker_testapp_validate_rx); - - test_spec_init(&test, ifobj_tx, ifobj_rx, 0); - pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE); - if (!pkt_stream_default) - exit_with_error(ENOMEM); - test.pkt_stream_default = pkt_stream_default; + init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx); + init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx); ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX); for (i = 0; i < TEST_MODE_MAX; i++) for (j = 0; j < TEST_TYPE_MAX; j++) { - test_spec_init(&test, ifobj_tx, ifobj_rx, i); - run_pkt_test(&test, i, j); + run_pkt_test(i, j); usleep(USLEEP_MAX); } - pkt_stream_delete(pkt_stream_default); - ifobject_delete(ifobj_tx); - ifobject_delete(ifobj_rx); + for (i = 0; i < MAX_INTERFACES; i++) + ifobject_delete(ifdict[i]); ksft_exit_pass(); return 0; diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 2f705f44b7..7e49b9fbe2 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -20,9 +20,10 @@ #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 7 #define MAX_INTERFACES_NAMESPACE_CHARS 10 -#define MAX_SOCKETS 2 -#define MAX_TEST_NAME_SIZE 32 +#define MAX_SOCKS 1 #define MAX_TEARDOWN_ITER 10 +#define MAX_BIDI_ITER 2 +#define MAX_BPF_ITER 2 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ sizeof(struct udphdr)) #define MIN_PKT_SIZE 64 @@ -35,13 +36,10 @@ #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) #define USLEEP_MAX 10000 #define SOCK_RECONF_CTR 10 -#define BATCH_SIZE 64 +#define BATCH_SIZE 8 #define POLL_TMOUT 1000 #define DEFAULT_PKT_CNT (4 * 1024) -#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) -#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE) #define RX_FULL_RXQSIZE 32 -#define UMEM_HEADROOM_TEST_SIZE 128 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) @@ -53,15 +51,8 @@ enum test_mode { }; enum test_type { - TEST_TYPE_RUN_TO_COMPLETION, - TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME, - TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT, + TEST_TYPE_NOPOLL, TEST_TYPE_POLL, - TEST_TYPE_UNALIGNED, - TEST_TYPE_ALIGNED_INV_DESC, - TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME, - TEST_TYPE_UNALIGNED_INV_DESC, - TEST_TYPE_HEADROOM, TEST_TYPE_TEARDOWN, TEST_TYPE_BIDI, TEST_TYPE_STATS, @@ -77,21 +68,25 @@ enum stat_test_type { STAT_TEST_TYPE_MAX }; +static int configured_mode; static bool opt_pkt_dump; +static u32 num_frames = DEFAULT_PKT_CNT / 4; +static bool second_step; static int test_type; static bool opt_verbose; + +static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY; static int stat_test_type; +static u32 rxqsize; +static u32 frame_headroom; struct xsk_umem_info { struct xsk_ring_prod fq; struct xsk_ring_cons cq; struct xsk_umem *umem; - u32 num_frames; - u32 frame_headroom; void *buffer; - u32 frame_size; - bool unaligned_mode; }; struct xsk_socket_info { @@ -100,63 +95,51 @@ struct xsk_socket_info { struct xsk_umem_info *umem; struct xsk_socket *xsk; u32 outstanding_tx; - u32 rxqsize; +}; + +struct flow_vector { + enum fvector { + tx, + rx, + } vector; }; struct pkt { u64 addr; u32 len; u32 payload; - bool valid; }; struct pkt_stream { u32 nb_pkts; - u32 rx_pkt_nb; struct pkt *pkts; - bool use_addr_for_fill; }; -typedef void *(*thread_func_t)(void *arg); - struct ifobject { char ifname[MAX_INTERFACE_NAME_CHARS]; char nsname[MAX_INTERFACES_NAMESPACE_CHARS]; struct xsk_socket_info *xsk; - struct xsk_socket_info *xsk_arr; + struct xsk_socket_info **xsk_arr; + struct xsk_umem_info **umem_arr; struct xsk_umem_info *umem; - struct xsk_umem_info *umem_arr; - thread_func_t func_ptr; + void *(*func_ptr)(void *arg); + struct flow_vector fv; struct pkt_stream *pkt_stream; int ns_fd; u32 dst_ip; u32 src_ip; - u32 xdp_flags; - u32 bind_flags; u16 src_port; u16 dst_port; - bool tx_on; - bool rx_on; - bool use_poll; - bool pacing_on; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; }; -struct test_spec { - struct ifobject *ifobj_tx; - struct ifobject *ifobj_rx; - struct pkt_stream *pkt_stream_default; - u16 total_steps; - u16 current_step; - u16 nb_sockets; - char name[MAX_TEST_NAME_SIZE]; -}; +static struct ifobject *ifdict[MAX_INTERFACES]; +static struct ifobject *ifdict_rx; +static struct ifobject *ifdict_tx; +/*threads*/ pthread_barrier_t barr; -pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; - -u32 pkts_in_flight; +pthread_t t0, t1; #endif /* XDPXCEIVER_H */ diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 745fe25fa0..59e2224605 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -11,12 +11,10 @@ TEST_GEN_PROGS += test_core TEST_GEN_PROGS += test_freezer TEST_GEN_PROGS += test_kill -LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h - include ../lib.mk -$(OUTPUT)/test_memcontrol: cgroup_util.c -$(OUTPUT)/test_kmem: cgroup_util.c -$(OUTPUT)/test_core: cgroup_util.c -$(OUTPUT)/test_freezer: cgroup_util.c -$(OUTPUT)/test_kill: cgroup_util.c +$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h +$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h +$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h +$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h +$(OUTPUT)/test_kill: cgroup_util.c ../clone3/clone3_selftests.h ../pidfd/pidfd.h diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 4f66d10626..82e59cdf16 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -2,10 +2,10 @@ #include #include -#include "../kselftest.h" - #define PAGE_SIZE 4096 +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + #define MB(x) (x << 20) /* diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 6001235030..3df648c378 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -1,14 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#define _GNU_SOURCE #include -#include #include #include #include #include #include -#include #include #include #include @@ -677,166 +674,6 @@ static int test_cgcore_thread_migration(const char *root) return ret; } -/* - * cgroup migration permission check should be performed based on the - * credentials at the time of open instead of write. - */ -static int test_cgcore_lesser_euid_open(const char *root) -{ - const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ - int ret = KSFT_FAIL; - char *cg_test_a = NULL, *cg_test_b = NULL; - char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; - int cg_test_b_procs_fd = -1; - uid_t saved_uid; - - cg_test_a = cg_name(root, "cg_test_a"); - cg_test_b = cg_name(root, "cg_test_b"); - - if (!cg_test_a || !cg_test_b) - goto cleanup; - - cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); - cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); - - if (!cg_test_a_procs || !cg_test_b_procs) - goto cleanup; - - if (cg_create(cg_test_a) || cg_create(cg_test_b)) - goto cleanup; - - if (cg_enter_current(cg_test_a)) - goto cleanup; - - if (chown(cg_test_a_procs, test_euid, -1) || - chown(cg_test_b_procs, test_euid, -1)) - goto cleanup; - - saved_uid = geteuid(); - if (seteuid(test_euid)) - goto cleanup; - - cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR); - - if (seteuid(saved_uid)) - goto cleanup; - - if (cg_test_b_procs_fd < 0) - goto cleanup; - - if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES) - goto cleanup; - - ret = KSFT_PASS; - -cleanup: - cg_enter_current(root); - if (cg_test_b_procs_fd >= 0) - close(cg_test_b_procs_fd); - if (cg_test_b) - cg_destroy(cg_test_b); - if (cg_test_a) - cg_destroy(cg_test_a); - free(cg_test_b_procs); - free(cg_test_a_procs); - free(cg_test_b); - free(cg_test_a); - return ret; -} - -struct lesser_ns_open_thread_arg { - const char *path; - int fd; - int err; -}; - -static int lesser_ns_open_thread_fn(void *arg) -{ - struct lesser_ns_open_thread_arg *targ = arg; - - targ->fd = open(targ->path, O_RDWR); - targ->err = errno; - return 0; -} - -/* - * cgroup migration permission check should be performed based on the cgroup - * namespace at the time of open instead of write. - */ -static int test_cgcore_lesser_ns_open(const char *root) -{ - static char stack[65536]; - const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ - int ret = KSFT_FAIL; - char *cg_test_a = NULL, *cg_test_b = NULL; - char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; - int cg_test_b_procs_fd = -1; - struct lesser_ns_open_thread_arg targ = { .fd = -1 }; - pid_t pid; - int status; - - cg_test_a = cg_name(root, "cg_test_a"); - cg_test_b = cg_name(root, "cg_test_b"); - - if (!cg_test_a || !cg_test_b) - goto cleanup; - - cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); - cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); - - if (!cg_test_a_procs || !cg_test_b_procs) - goto cleanup; - - if (cg_create(cg_test_a) || cg_create(cg_test_b)) - goto cleanup; - - if (cg_enter_current(cg_test_b)) - goto cleanup; - - if (chown(cg_test_a_procs, test_euid, -1) || - chown(cg_test_b_procs, test_euid, -1)) - goto cleanup; - - targ.path = cg_test_b_procs; - pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack), - CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD, - &targ); - if (pid < 0) - goto cleanup; - - if (waitpid(pid, &status, 0) < 0) - goto cleanup; - - if (!WIFEXITED(status)) - goto cleanup; - - cg_test_b_procs_fd = targ.fd; - if (cg_test_b_procs_fd < 0) - goto cleanup; - - if (cg_enter_current(cg_test_a)) - goto cleanup; - - if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT) - goto cleanup; - - ret = KSFT_PASS; - -cleanup: - cg_enter_current(root); - if (cg_test_b_procs_fd >= 0) - close(cg_test_b_procs_fd); - if (cg_test_b) - cg_destroy(cg_test_b); - if (cg_test_a) - cg_destroy(cg_test_a); - free(cg_test_b_procs); - free(cg_test_a_procs); - free(cg_test_b); - free(cg_test_a); - return ret; -} - #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); @@ -852,8 +689,6 @@ struct corecg_test { T(test_cgcore_proc_migration), T(test_cgcore_thread_migration), T(test_cgcore_destroy), - T(test_cgcore_lesser_euid_open), - T(test_cgcore_lesser_ns_open), }; #undef T diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 749239930c..aa7d13d919 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -50,6 +50,10 @@ static inline int sys_close_range(unsigned int fd, unsigned int max_fd, return syscall(__NR_close_range, fd, max_fd, flags); } +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + TEST(core_close_range) { int i, ret; diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh index 60ce18ed06..31f8c9a76c 100644 --- a/tools/testing/selftests/cpufreq/main.sh +++ b/tools/testing/selftests/cpufreq/main.sh @@ -194,5 +194,5 @@ prerequisite # Run requested functions clear_dumps $OUTFILE -do_test | tee -a $OUTFILE.txt +do_test >> $OUTFILE.txt dmesg_dumps $OUTFILE diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 937d36ae9a..f0aa954b5d 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -3,8 +3,7 @@ TEST_GEN_FILES += huge_count_read_write -TEST_FILES = _chk_dependency.sh _debugfs_common.sh -TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh -TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh +TEST_FILES = _chk_dependency.sh +TEST_PROGS = debugfs_attrs.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index 902e312bca..ecda972e87 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -1,7 +1,48 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -source _debugfs_common.sh +test_write_result() { + file=$1 + content=$2 + orig_content=$3 + expect_reason=$4 + expected=$5 + + echo "$content" > "$file" + if [ $? -ne "$expected" ] + then + echo "writing $content to $file doesn't return $expected" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +test_write_succ() { + test_write_result "$1" "$2" "$3" "$4" 0 +} + +test_write_fail() { + test_write_result "$1" "$2" "$3" "$4" 1 +} + +test_content() { + file=$1 + orig_content=$2 + expected=$3 + expect_reason=$4 + + content=$(cat "$file") + if [ "$content" != "$expected" ] + then + echo "reading $file expected $expected but $content" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +source ./_chk_dependency.sh # Test attrs file # =============== @@ -15,3 +56,38 @@ test_write_fail "$file" "1 2 3 5 4" "$orig_content" \ "min_nr_regions > max_nr_regions" test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written" echo "$orig_content" > "$file" + +# Test target_ids file +# ==================== + +file="$DBGFS/target_ids" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input" +test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input" +test_content "$file" "$orig_content" "1 2" "non-integer was there" +test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input" +test_content "$file" "$orig_content" "" "wrong input written" +test_write_succ "$file" "" "$orig_content" "empty input" +test_content "$file" "$orig_content" "" "empty input written" +echo "$orig_content" > "$file" + +# Test huge count read write +# ========================== + +dmesg -C + +for file in "$DBGFS/"* +do + ./huge_count_read_write "$file" +done + +if dmesg | grep -q WARNING +then + dmesg + exit 1 +else + exit 0 +fi + +echo "PASS" diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh index d3a891d421..a37273473c 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh @@ -87,7 +87,6 @@ ALL_TESTS=" NUM_NETIFS=4 source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh -source mlxsw_lib.sh h1_create() { @@ -627,7 +626,8 @@ ipv6_redirect_test() ptp_event_test() { - mlxsw_only_on_spectrum 1 || return + # PTP is only supported on Spectrum-1, for now. + [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return # PTP Sync (0) devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \ @@ -638,7 +638,8 @@ ptp_event_test() ptp_general_test() { - mlxsw_only_on_spectrum 1 || return + # PTP is only supported on Spectrum-1, for now. + [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return # PTP Announce (b) devlink_trap_stats_test "PTP General Message" "ptp_general" \ diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index 0bd5ffc218..508a702f00 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -272,17 +272,13 @@ __rate_test() rate_test() { - local last_policer=$(devlink -j -p trap policer show | - jq '[.[]["'$DEVLINK_DEV'"][].policer] | max') + local id - log_info "Running rate test for policer 1" - __rate_test 1 - - log_info "Running rate test for policer $((last_policer / 2))" - __rate_test $((last_policer / 2)) - - log_info "Running rate test for policer $last_policer" - __rate_test $last_policer + for id in $(devlink_trap_policer_ids_get); do + echo + log_info "Running rate test for policer $id" + __rate_test $id + done } __burst_test() @@ -346,17 +342,13 @@ __burst_test() burst_test() { - local last_policer=$(devlink -j -p trap policer show | - jq '[.[]["'$DEVLINK_DEV'"][].policer] | max') + local id - log_info "Running burst test for policer 1" - __burst_test 1 - - log_info "Running burst test for policer $((last_policer / 2))" - __burst_test $((last_policer / 2)) - - log_info "Running burst test for policer $last_policer" - __burst_test $last_policer + for id in $(devlink_trap_policer_ids_get); do + echo + log_info "Running burst size test for policer $id" + __burst_test $id + done } trap cleanup EXIT diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh index e9a82cae8c..8817851da7 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -13,7 +13,7 @@ # | # +-------------------|-----+ # | SW1 | | -# | $swp1 + | +# | $swp1 + | # | 192.0.2.2/28 | # | | # | + g1a (gre) | @@ -27,8 +27,8 @@ # | # +--|----------------------+ # | | VRF2 | -# | + $rp2 | -# | 198.51.100.2/28 | +# | + $rp2 | +# | 198.51.100.2/28 | # +-------------------------+ lib_dir=$(dirname $0)/../../../net/forwarding @@ -116,16 +116,12 @@ cleanup() forwarding_restore } -ipip_payload_get() +ecn_payload_get() { - local flags=$1; shift - local key=$1; shift - p=$(: - )"$flags"$( : GRE flags + )"0"$( : GRE flags )"0:00:"$( : Reserved + version )"08:00:"$( : ETH protocol type - )"$key"$( : Key )"4"$( : IP version )"5:"$( : IHL )"00:"$( : IP TOS @@ -141,11 +137,6 @@ ipip_payload_get() echo $p } -ecn_payload_get() -{ - echo $(ipip_payload_get "0") -} - ecn_decap_test() { local trap_name="decap_error" @@ -180,6 +171,31 @@ ecn_decap_test() tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower } +ipip_payload_get() +{ + local flags=$1; shift + local key=$1; shift + + p=$(: + )"$flags"$( : GRE flags + )"0:00:"$( : Reserved + version + )"08:00:"$( : ETH protocol type + )"$key"$( : Key + )"4"$( : IP version + )"5:"$( : IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"30:"$( : IP TTL + )"01:"$( : IP proto + )"E7:E6:"$( : IP header csum + )"C0:00:01:01:"$( : IP saddr : 192.0.1.1 + )"C0:00:02:01:"$( : IP daddr : 192.0.2.1 + ) + echo $p +} + no_matching_tunnel_test() { local trap_name="decap_error" @@ -223,8 +239,7 @@ decap_error_test() no_matching_tunnel_test "Decap error: Source IP check failed" \ 192.0.2.68 "0" no_matching_tunnel_test \ - "Decap error: Key exists but was not expected" $sip "2" \ - "00:00:00:E9:" + "Decap error: Key exists but was not expected" $sip "2" ":E9:" # Destroy the tunnel and create new one with key __addr_add_del g1 del 192.0.2.65/32 @@ -236,8 +251,7 @@ decap_error_test() no_matching_tunnel_test \ "Decap error: Key does not exist but was expected" $sip "0" no_matching_tunnel_test \ - "Decap error: Packet has a wrong key field" $sip "2" \ - "00:00:00:E8:" + "Decap error: Packet has a wrong key field" $sip "2" "E8:" } trap cleanup EXIT diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh index 5f6eb965cf..10e0f3dbc9 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -217,11 +217,9 @@ short_payload_get() dest_mac=$(mac_get $h1) p=$(: )"08:"$( : VXLAN flags - )"00:00:00:"$( : VXLAN reserved + )"01:00:00:"$( : VXLAN reserved )"00:03:e8:"$( : VXLAN VNI : 1000 )"00:"$( : VXLAN reserved - )"$dest_mac:"$( : ETH daddr - )"00:00:00:00:00:00:"$( : ETH saddr ) echo $p } @@ -265,8 +263,7 @@ decap_error_test() corrupted_packet_test "Decap error: Reserved bits in use" \ "reserved_bits_payload_get" - corrupted_packet_test "Decap error: Too short inner packet" \ - "short_payload_get" + corrupted_packet_test "Decap error: No L2 header" "short_payload_get" } mc_smac_payload_get() diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh index a95856aafd..cbe50f260a 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh @@ -11,53 +11,3 @@ if [[ ! -v MLXSW_CHIP ]]; then exit 1 fi fi - -MLXSW_SPECTRUM_REV=$(case $MLXSW_CHIP in - mlxsw_spectrum) - echo 1 ;; - mlxsw_spectrum*) - echo ${MLXSW_CHIP#mlxsw_spectrum} ;; - *) - echo "Couldn't determine Spectrum chip revision." \ - > /dev/stderr ;; - esac) - -mlxsw_on_spectrum() -{ - local rev=$1; shift - local op="==" - local rev2=${rev%+} - - if [[ $rev2 != $rev ]]; then - op=">=" - fi - - ((MLXSW_SPECTRUM_REV $op rev2)) -} - -__mlxsw_only_on_spectrum() -{ - local rev=$1; shift - local caller=$1; shift - local src=$1; shift - - if ! mlxsw_on_spectrum "$rev"; then - log_test_skip $src:$caller "(Spectrum-$rev only)" - return 1 - fi -} - -mlxsw_only_on_spectrum() -{ - local caller=${FUNCNAME[1]} - local src=${BASH_SOURCE[1]} - local rev - - for rev in "$@"; do - if __mlxsw_only_on_spectrum "$rev" "$caller" "$src"; then - return 0 - fi - done - - return 1 -} diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 04f03ae9d8..a217f9f677 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -10,7 +10,9 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" + rif_set_addr_test rif_vrf_set_addr_test + rif_inherit_bridge_addr_test rif_non_inherit_bridge_addr_test vlan_interface_deletion_test bridge_deletion_test @@ -58,6 +60,55 @@ cleanup() ip link set dev $swp1 down } +rif_set_addr_test() +{ + local swp1_mac=$(mac_get $swp1) + local swp2_mac=$(mac_get $swp2) + + RET=0 + + # $swp1 and $swp2 likely got their IPv6 local addresses already, but + # here we need to test the transition to RIF. + ip addr flush dev $swp1 + ip addr flush dev $swp2 + sleep .1 + + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + ip link set dev $swp1 addr 00:11:22:33:44:55 + check_err $? + + # IP address enablement should be rejected if the MAC address prefix + # doesn't match other RIFs. + ip addr add dev $swp2 192.0.2.2/28 &>/dev/null + check_fail $? "IP address addition passed for a device with a wrong MAC" + ip addr add dev $swp2 192.0.2.2/28 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for IP address addition" + + ip link set dev $swp2 addr 00:11:22:33:44:66 + check_err $? + ip addr add dev $swp2 192.0.2.2/28 &>/dev/null + check_err $? + + # Change of MAC address of a RIF should be forbidden if the new MAC + # doesn't share the prefix with other MAC addresses. + ip link set dev $swp2 addr 00:11:22:33:00:66 &>/dev/null + check_fail $? "change of MAC address passed for a wrong MAC" + ip link set dev $swp2 addr 00:11:22:33:00:66 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for MAC address change" + + log_test "RIF - bad MAC change" + + ip addr del dev $swp2 192.0.2.2/28 + ip addr del dev $swp1 192.0.2.1/28 + + ip link set dev $swp2 addr $swp2_mac + ip link set dev $swp1 addr $swp1_mac +} + rif_vrf_set_addr_test() { # Test that it is possible to set an IP address on a VRF upper despite @@ -77,6 +128,45 @@ rif_vrf_set_addr_test() ip link del dev vrf-test } +rif_inherit_bridge_addr_test() +{ + RET=0 + + # Create first RIF + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + # Create a FID RIF + ip link add name br1 up type bridge vlan_filtering 0 + ip link set dev $swp2 master br1 + ip addr add dev br1 192.0.2.17/28 + check_err $? + + # Prepare a device with a low MAC address + ip link add name d up type dummy + ip link set dev d addr 00:11:22:33:44:55 + + # Attach the device to br1. That prompts bridge address change, which + # should be vetoed, thus preventing the attachment. + ip link set dev d master br1 &>/dev/null + check_fail $? "Device with low MAC was permitted to attach a bridge with RIF" + ip link set dev d master br1 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for bridge attach rejection" + + ip link set dev $swp2 addr 00:11:22:33:44:55 &>/dev/null + check_fail $? "Changing swp2's MAC address permitted" + ip link set dev $swp2 addr 00:11:22:33:44:55 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for bridge port MAC address change rejection" + + log_test "RIF - attach port with bad MAC to bridge" + + ip link del dev d + ip link del dev br1 + ip addr del dev $swp1 192.0.2.1/28 +} + rif_non_inherit_bridge_addr_test() { local swp2_mac=$(mac_get $swp2) @@ -689,7 +779,7 @@ nexthop_obj_offload_test() setup_wait ip nexthop add id 1 via 192.0.2.2 dev $swp1 - ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ dev $swp1 busywait "$TIMEOUT" wait_for_offload \ @@ -701,7 +791,7 @@ nexthop_obj_offload_test() ip nexthop show id 1 check_err $? "nexthop marked as offloaded after setting neigh to failed state" - ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ dev $swp1 busywait "$TIMEOUT" wait_for_offload \ ip nexthop show id 1 @@ -738,11 +828,11 @@ nexthop_obj_group_offload_test() ip nexthop add id 1 via 192.0.2.2 dev $swp1 ip nexthop add id 2 via 2001:db8:1::2 dev $swp1 ip nexthop add id 10 group 1/2 - ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ dev $swp1 - ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \ + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ dev $swp1 - ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \ + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ dev $swp1 busywait "$TIMEOUT" wait_for_offload \ @@ -798,11 +888,11 @@ nexthop_obj_bucket_offload_test() ip nexthop add id 1 via 192.0.2.2 dev $swp1 ip nexthop add id 2 via 2001:db8:1::2 dev $swp1 ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0 - ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ dev $swp1 - ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \ + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ dev $swp1 - ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \ + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ dev $swp1 busywait "$TIMEOUT" wait_for_offload \ @@ -831,7 +921,7 @@ nexthop_obj_bucket_offload_test() check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop" # Revalidate nexthop id 2 by changing its neighbour - ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \ + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ dev $swp1 busywait "$TIMEOUT" wait_for_offload \ ip nexthop bucket show nhid 2 @@ -881,9 +971,9 @@ nexthop_obj_route_offload_test() setup_wait ip nexthop add id 1 via 192.0.2.2 dev $swp1 - ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ dev $swp1 - ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \ + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ dev $swp1 ip route replace 198.51.100.0/24 nhid 1 diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index f260f01db0..33ddd01689 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -73,7 +73,6 @@ CHECK_TC="yes" lib_dir=$(dirname $0)/../../../net/forwarding source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh -source mlxsw_lib.sh source qos_lib.sh ipaddr() @@ -332,14 +331,6 @@ get_nmarked() ethtool_stats_get $swp3 ecn_marked } -get_qdisc_nmarked() -{ - local vlan=$1; shift - - busywait_for_counter 1100 +1 \ - qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .marked -} - get_qdisc_npackets() { local vlan=$1; shift @@ -393,15 +384,14 @@ build_backlog() check_marking() { - local get_nmarked=$1; shift local vlan=$1; shift local cond=$1; shift local npackets_0=$(get_qdisc_npackets $vlan) - local nmarked_0=$($get_nmarked $vlan) + local nmarked_0=$(get_nmarked $vlan) sleep 5 local npackets_1=$(get_qdisc_npackets $vlan) - local nmarked_1=$($get_nmarked $vlan) + local nmarked_1=$(get_nmarked $vlan) local nmarked_d=$((nmarked_1 - nmarked_0)) local npackets_d=$((npackets_1 - npackets_0)) @@ -414,7 +404,6 @@ check_marking() ecn_test_common() { local name=$1; shift - local get_nmarked=$1; shift local vlan=$1; shift local limit=$1; shift local backlog @@ -427,7 +416,7 @@ ecn_test_common() RET=0 backlog=$(build_backlog $vlan $((2 * limit / 3)) udp) check_err $? "Could not build the requested backlog" - pct=$(check_marking "$get_nmarked" $vlan "== 0") + pct=$(check_marking $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." log_test "TC $((vlan - 10)): $name backlog < limit" @@ -437,23 +426,22 @@ ecn_test_common() RET=0 backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) check_err $? "Could not build the requested backlog" - pct=$(check_marking "$get_nmarked" $vlan ">= 95") + pct=$(check_marking $vlan ">= 95") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95." log_test "TC $((vlan - 10)): $name backlog > limit" } -__do_ecn_test() +do_ecn_test() { - local get_nmarked=$1; shift local vlan=$1; shift local limit=$1; shift - local name=${1-ECN}; shift + local name=ECN start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ $h3_mac tos=0x01 sleep 1 - ecn_test_common "$name" "$get_nmarked" $vlan $limit + ecn_test_common "$name" $vlan $limit # Up there we saw that UDP gets accepted when backlog is below the # limit. Now that it is above, it should all get dropped, and backlog @@ -467,23 +455,6 @@ __do_ecn_test() sleep 1 } -do_ecn_test() -{ - local vlan=$1; shift - local limit=$1; shift - - __do_ecn_test get_nmarked "$vlan" "$limit" -} - -do_ecn_test_perband() -{ - local vlan=$1; shift - local limit=$1; shift - - mlxsw_only_on_spectrum 3+ || return - __do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN" -} - do_ecn_nodrop_test() { local vlan=$1; shift @@ -494,7 +465,7 @@ do_ecn_nodrop_test() $h3_mac tos=0x01 sleep 1 - ecn_test_common "$name" get_nmarked $vlan $limit + ecn_test_common "$name" $vlan $limit # Up there we saw that UDP gets accepted when backlog is below the # limit. Now that it is above, in nodrop mode, make sure it goes to @@ -524,7 +495,7 @@ do_red_test() RET=0 backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01) check_err $? "Could not build the requested backlog" - pct=$(check_marking get_nmarked $vlan "== 0") + pct=$(check_marking $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." log_test "TC $((vlan - 10)): RED backlog < limit" @@ -532,7 +503,7 @@ do_red_test() RET=0 backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) check_fail $? "Traffic went into backlog instead of being early-dropped" - pct=$(check_marking get_nmarked $vlan "== 0") + pct=$(check_marking $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." local diff=$((limit - backlog)) pct=$((100 * diff / limit)) @@ -573,55 +544,6 @@ do_mc_backlog_test() log_test "TC $((vlan - 10)): Qdisc reports MC backlog" } -do_mark_test() -{ - local vlan=$1; shift - local limit=$1; shift - local subtest=$1; shift - local fetch_counter=$1; shift - local should_fail=$1; shift - local base - - mlxsw_only_on_spectrum 2+ || return - - RET=0 - - start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ - $h3_mac tos=0x01 - - # Create a bit of a backlog and observe no mirroring due to marks. - qevent_rule_install_$subtest - - build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01 >/dev/null - - base=$($fetch_counter) - count=$(busywait 1100 until_counter_is ">= $((base + 1))" \ - $fetch_counter) - check_fail $? "Spurious packets ($base -> $count) observed without buffer pressure" - - # Above limit, everything should be mirrored, we should see lots of - # packets. - build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01 >/dev/null - busywait_for_counter 1100 +10000 \ - $fetch_counter > /dev/null - check_err_fail "$should_fail" $? "ECN-marked packets $subtest'd" - - # When the rule is uninstalled, there should be no mirroring. - qevent_rule_uninstall_$subtest - busywait_for_counter 1100 +10 \ - $fetch_counter > /dev/null - check_fail $? "Spurious packets observed after uninstall" - - if ((should_fail)); then - log_test "TC $((vlan - 10)): marked packets not $subtest'd" - else - log_test "TC $((vlan - 10)): marked packets $subtest'd" - fi - - stop_traffic - sleep 1 -} - do_drop_test() { local vlan=$1; shift @@ -629,10 +551,10 @@ do_drop_test() local trigger=$1; shift local subtest=$1; shift local fetch_counter=$1; shift + local backlog local base local now - - mlxsw_only_on_spectrum 2+ || return + local pct RET=0 @@ -706,22 +628,6 @@ do_drop_mirror_test() tc filter del dev $h2 ingress pref 1 handle 101 flower } -do_mark_mirror_test() -{ - local vlan=$1; shift - local limit=$1; shift - - tc filter add dev $h2 ingress pref 1 handle 101 prot ip \ - flower skip_sw ip_proto tcp \ - action drop - - do_mark_test "$vlan" "$limit" mirror \ - qevent_counter_fetch_mirror \ - $(: should_fail=)0 - - tc filter del dev $h2 ingress pref 1 handle 101 flower -} - qevent_rule_install_trap() { tc filter add block 10 pref 1234 handle 102 matchall skip_sw \ @@ -749,14 +655,3 @@ do_drop_trap_test() do_drop_test "$vlan" "$limit" "$trap_name" trap \ "qevent_counter_fetch_trap $trap_name" } - -qevent_rule_install_trap_fwd() -{ - tc filter add block 10 pref 1234 handle 102 matchall skip_sw \ - action trap_fwd hw_stats disabled -} - -qevent_rule_uninstall_trap_fwd() -{ - tc filter del block 10 pref 1234 handle 102 matchall -} diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh index 1e5ad32094..f3ef3274f9 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -4,13 +4,11 @@ ALL_TESTS=" ping_ipv4 ecn_test - ecn_test_perband ecn_nodrop_test red_test mc_backlog_test red_mirror_test red_trap_test - ecn_mirror_test " : ${QDISC:=ets} source sch_red_core.sh @@ -23,58 +21,26 @@ source sch_red_core.sh BACKLOG1=200000 BACKLOG2=500000 -install_root_qdisc() -{ - tc qdisc add dev $swp3 root handle 10: $QDISC \ - bands 8 priomap 7 6 5 4 3 2 1 0 -} - -install_qdisc_tc0() +install_qdisc() { local -a args=("$@") + tc qdisc add dev $swp3 root handle 10: $QDISC \ + bands 8 priomap 7 6 5 4 3 2 1 0 tc qdisc add dev $swp3 parent 10:8 handle 108: red \ limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \ probability 1.0 avpkt 8000 burst 38 "${args[@]}" -} - -install_qdisc_tc1() -{ - local -a args=("$@") - tc qdisc add dev $swp3 parent 10:7 handle 107: red \ limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \ probability 1.0 avpkt 8000 burst 63 "${args[@]}" -} - -install_qdisc() -{ - install_root_qdisc - install_qdisc_tc0 "$@" - install_qdisc_tc1 "$@" sleep 1 } -uninstall_qdisc_tc0() -{ - tc qdisc del dev $swp3 parent 10:8 -} - -uninstall_qdisc_tc1() -{ - tc qdisc del dev $swp3 parent 10:7 -} - -uninstall_root_qdisc() -{ - tc qdisc del dev $swp3 root -} - uninstall_qdisc() { - uninstall_qdisc_tc0 - uninstall_qdisc_tc1 - uninstall_root_qdisc + tc qdisc del dev $swp3 parent 10:7 + tc qdisc del dev $swp3 parent 10:8 + tc qdisc del dev $swp3 root } ecn_test() @@ -87,16 +53,6 @@ ecn_test() uninstall_qdisc } -ecn_test_perband() -{ - install_qdisc ecn - - do_ecn_test_perband 10 $BACKLOG1 - do_ecn_test_perband 11 $BACKLOG2 - - uninstall_qdisc -} - ecn_nodrop_test() { install_qdisc ecn nodrop @@ -156,16 +112,6 @@ red_trap_test() uninstall_qdisc } -ecn_mirror_test() -{ - install_qdisc ecn qevent mark block 10 - - do_mark_mirror_test 10 $BACKLOG1 - do_mark_mirror_test 11 $BACKLOG2 - - uninstall_qdisc -} - trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh index d79a82f317..ede9c38d3e 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -4,7 +4,6 @@ ALL_TESTS=" ping_ipv4 ecn_test - ecn_test_perband ecn_nodrop_test red_test mc_backlog_test @@ -36,13 +35,6 @@ ecn_test() uninstall_qdisc } -ecn_test_perband() -{ - install_qdisc ecn - do_ecn_test_perband 10 $BACKLOG - uninstall_qdisc -} - ecn_nodrop_test() { install_qdisc ecn nodrop diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index e9f65bd2e2..50654f8a8c 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -7,9 +7,12 @@ NUM_NETIFS=6 source $lib_dir/lib.sh source $lib_dir/tc_common.sh source $lib_dir/devlink_lib.sh -source ../mlxsw_lib.sh -mlxsw_only_on_spectrum 2+ || exit 1 +if [[ "$DEVLINK_VIDDID" != "15b3:cf6c" && \ + "$DEVLINK_VIDDID" != "15b3:cf70" ]]; then + echo "SKIP: test is tailored for Mellanox Spectrum-2 and Spectrum-3" + exit 1 +fi current_test="" @@ -25,7 +28,7 @@ cleanup() trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" +ALL_TESTS="router tc_flower mirror_gre tc_police port" for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh index 06a80f40da..73035e2508 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh @@ -2,9 +2,11 @@ # SPDX-License-Identifier: GPL-2.0 source "../../../../net/forwarding/devlink_lib.sh" -source ../mlxsw_lib.sh -mlxsw_only_on_spectrum 1 || exit 1 +if [ "$DEVLINK_VIDDID" != "15b3:cb84" ]; then + echo "SKIP: test is tailored for Mellanox Spectrum" + exit 1 +fi # Needed for returning to default declare -A KVD_DEFAULTS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index bcb110e830..b9b8274643 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -22,7 +22,7 @@ cleanup() devlink_sp_read_kvd_defaults trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" +ALL_TESTS="router tc_flower mirror_gre tc_police port" for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh @@ -50,8 +50,8 @@ for current_test in ${TESTS:-$ALL_TESTS}; do else log_test "'$current_test' [$profile] overflow $target" fi + RET_FIN=$(( RET_FIN || RET )) done - RET_FIN=$(( RET_FIN || RET )) done done current_test="" diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh index 3e3e06ea57..86e787895f 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh @@ -60,7 +60,8 @@ __tc_police_test() tc_police_rules_create $count $should_fail - offload_count=$(tc filter show dev $swp1 ingress | grep in_hw | wc -l) + offload_count=$(tc -j filter show dev $swp1 ingress | + jq "[.[] | select(.options.in_hw == true)] | length") ((offload_count == count)) check_err_fail $should_fail $? "tc police offload count" } diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh index 0441a18f09..5ec3beb637 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh @@ -20,7 +20,6 @@ NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh -source mlxsw_lib.sh switch_create() { @@ -170,7 +169,7 @@ matchall_sample_egress_test() # It is forbidden in mlxsw driver to have matchall with sample action # bound on egress. Spectrum-1 specific restriction - mlxsw_only_on_spectrum 1 || return + [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return tc qdisc add dev $swp1 clsact diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh index 83a0210e75..373d5f2a84 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh @@ -51,7 +51,6 @@ NUM_NETIFS=8 CAPTURE_FILE=$(mktemp) source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh -source mlxsw_lib.sh # Available at https://github.com/Mellanox/libpsample require_command psample @@ -432,7 +431,7 @@ tc_sample_md_out_tc_test() RET=0 # Output traffic class is not supported on Spectrum-1. - mlxsw_only_on_spectrum 2+ || return + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ skip_sw action sample rate 5 group 1 @@ -478,7 +477,7 @@ tc_sample_md_out_tc_occ_test() RET=0 # Output traffic class occupancy is not supported on Spectrum-1. - mlxsw_only_on_spectrum 2+ || return + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ skip_sw action sample rate 1024 group 1 @@ -522,7 +521,7 @@ tc_sample_md_latency_test() RET=0 # Egress sampling not supported on Spectrum-1. - mlxsw_only_on_spectrum 2+ || return + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \ skip_sw action sample rate 5 group 1 @@ -551,7 +550,7 @@ tc_sample_acl_group_conflict_test() # port with different groups. # Policy-based sampling is not supported on Spectrum-1. - mlxsw_only_on_spectrum 2+ || return + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ skip_sw action sample rate 1024 group 1 @@ -580,7 +579,7 @@ __tc_sample_acl_rate_test() RET=0 # Policy-based sampling is not supported on Spectrum-1. - mlxsw_only_on_spectrum 2+ || return + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \ skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1 @@ -632,7 +631,7 @@ tc_sample_acl_max_rate_test() RET=0 # Policy-based sampling is not supported on Spectrum-1. - mlxsw_only_on_spectrum 2+ || return + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ skip_sw action sample rate $((2 ** 24 - 1)) group 1 diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index 99a332b712..729a86cc4e 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -4,35 +4,10 @@ # Test various aspects of VxLAN offloading which are specific to mlxsw, such # as sanitization of invalid configurations and offload indication. -: ${ADDR_FAMILY:=ipv4} -export ADDR_FAMILY - -: ${LOCAL_IP_1:=198.51.100.1} -export LOCAL_IP_1 - -: ${LOCAL_IP_2:=198.51.100.2} -export LOCAL_IP_2 - -: ${PREFIX_LEN:=32} -export PREFIX_LEN - -: ${UDPCSUM_FLAFS:=noudpcsum} -export UDPCSUM_FLAFS - -: ${MC_IP:=239.0.0.1} -export MC_IP - -: ${IP_FLAG:=""} -export IP_FLAG - -: ${ALL_TESTS:=" - sanitization_test - offload_indication_test - sanitization_vlan_aware_test - offload_indication_vlan_aware_test -"} - lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="sanitization_test offload_indication_test \ + sanitization_vlan_aware_test offload_indication_vlan_aware_test" NUM_NETIFS=2 : ${TIMEOUT:=20000} # ms source $lib_dir/lib.sh @@ -88,8 +63,8 @@ sanitization_single_dev_valid_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 sanitization_single_dev_test_pass @@ -105,8 +80,8 @@ sanitization_single_dev_vlan_aware_test() ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 sanitization_single_dev_test_pass @@ -122,8 +97,8 @@ sanitization_single_dev_mcast_enabled_test() ip link add dev br0 type bridge - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 sanitization_single_dev_test_fail @@ -140,9 +115,9 @@ sanitization_single_dev_mcast_group_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add name dummy1 up type dummy - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \ - dev dummy1 group $MC_IP + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ + dev dummy1 group 239.0.0.1 sanitization_single_dev_test_fail @@ -159,7 +134,7 @@ sanitization_single_dev_no_local_ip_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ ttl 20 tos inherit dstport 4789 sanitization_single_dev_test_fail @@ -170,14 +145,31 @@ sanitization_single_dev_no_local_ip_test() log_test "vxlan device with no local ip" } -sanitization_single_dev_learning_enabled_ipv4_test() +sanitization_single_dev_local_ipv6_test() { RET=0 ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 2001:db8::1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with local ipv6 address" +} + +sanitization_single_dev_learning_enabled_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 sanitization_single_dev_test_pass @@ -194,8 +186,8 @@ sanitization_single_dev_local_interface_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add name dummy1 up type dummy - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 dev dummy1 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1 sanitization_single_dev_test_fail @@ -212,8 +204,8 @@ sanitization_single_dev_port_range_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \ + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ srcport 4000 5000 sanitization_single_dev_test_fail @@ -230,8 +222,8 @@ sanitization_single_dev_tos_static_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos 20 local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos 20 local 198.51.100.1 dstport 4789 sanitization_single_dev_test_fail @@ -247,8 +239,8 @@ sanitization_single_dev_ttl_inherit_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl inherit tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl inherit tos inherit local 198.51.100.1 dstport 4789 sanitization_single_dev_test_fail @@ -258,14 +250,14 @@ sanitization_single_dev_ttl_inherit_test() log_test "vxlan device with inherit ttl" } -sanitization_single_dev_udp_checksum_ipv4_test() +sanitization_single_dev_udp_checksum_test() { RET=0 ip link add dev br0 type bridge mcast_snooping 0 ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ttl 20 tos inherit local 198.51.100.1 dstport 4789 sanitization_single_dev_test_fail @@ -284,12 +276,13 @@ sanitization_single_dev_test() sanitization_single_dev_mcast_enabled_test sanitization_single_dev_mcast_group_test sanitization_single_dev_no_local_ip_test - sanitization_single_dev_learning_enabled_"$ADDR_FAMILY"_test + sanitization_single_dev_local_ipv6_test + sanitization_single_dev_learning_enabled_test sanitization_single_dev_local_interface_test sanitization_single_dev_port_range_test sanitization_single_dev_tos_static_test sanitization_single_dev_ttl_inherit_test - sanitization_single_dev_udp_checksum_"$ADDR_FAMILY"_test + sanitization_single_dev_udp_checksum_test } sanitization_multi_devs_test_pass() @@ -341,10 +334,10 @@ sanitization_multi_devs_valid_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 sanitization_multi_devs_test_pass @@ -363,10 +356,10 @@ sanitization_multi_devs_ttl_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ - ttl 40 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 40 tos inherit local 198.51.100.1 dstport 4789 sanitization_multi_devs_test_fail @@ -385,10 +378,10 @@ sanitization_multi_devs_udp_dstport_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 5789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 5789 sanitization_multi_devs_test_fail @@ -407,10 +400,10 @@ sanitization_multi_devs_local_ip_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_2 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.2 dstport 4789 sanitization_multi_devs_test_fail @@ -450,12 +443,12 @@ offload_indication_setup_create() ip link set dev $swp1 master br0 ip link set dev $swp2 master br1 - ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo + ip address add 198.51.100.1/32 dev lo ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ - $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ - $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 } offload_indication_setup_destroy() @@ -463,7 +456,7 @@ offload_indication_setup_destroy() ip link del dev vxlan1 ip link del dev vxlan0 - ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo + ip address del 198.51.100.1/32 dev lo ip link set dev $swp2 nomaster ip link set dev $swp1 nomaster @@ -476,7 +469,7 @@ offload_indication_fdb_flood_test() { RET=0 - bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst $LOCAL_IP_2 + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \ bridge fdb show brport vxlan0 @@ -492,7 +485,7 @@ offload_indication_fdb_bridge_test() RET=0 bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \ - dst $LOCAL_IP_2 + dst 198.51.100.2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 @@ -543,7 +536,7 @@ offload_indication_fdb_bridge_test() # marked as offloaded in both drivers RET=0 - bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst $LOCAL_IP_2 + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 check_err $? @@ -567,17 +560,17 @@ offload_indication_decap_route_test() RET=0 busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? ip link set dev vxlan0 down busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? ip link set dev vxlan1 down busywait "$TIMEOUT" not wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - vxlan device down" @@ -586,26 +579,26 @@ offload_indication_decap_route_test() ip link set dev vxlan1 up busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? ip link set dev vxlan0 up busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - vxlan device up" RET=0 - ip address delete $LOCAL_IP_1/$PREFIX_LEN dev lo + ip address delete 198.51.100.1/32 dev lo busywait "$TIMEOUT" not wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? - ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo + ip address add 198.51.100.1/32 dev lo busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - add local route" @@ -614,18 +607,18 @@ offload_indication_decap_route_test() ip link set dev $swp1 nomaster busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? ip link set dev $swp2 nomaster busywait "$TIMEOUT" not wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? ip link set dev $swp1 master br0 ip link set dev $swp2 master br1 busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - local ports enslavement" @@ -634,12 +627,12 @@ offload_indication_decap_route_test() ip link del dev br0 busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? ip link del dev br1 busywait "$TIMEOUT" not wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - bridge device deletion" @@ -653,25 +646,25 @@ offload_indication_decap_route_test() ip link set dev vxlan0 master br0 ip link set dev vxlan1 master br1 busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? ip link del dev vxlan0 busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? ip link del dev vxlan1 busywait "$TIMEOUT" not wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - vxlan device deletion" ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ - $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ - $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 } check_fdb_offloaded() @@ -728,10 +721,10 @@ __offload_indication_join_vxlan_first() local mac=00:11:22:33:44:55 local zmac=00:00:00:00:00:00 - bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 + bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 ip link set dev vxlan0 master br0 - bridge fdb add dev vxlan0 $mac self master static dst $LOCAL_IP_2 + bridge fdb add dev vxlan0 $mac self master static dst 198.51.100.2 RET=0 check_vxlan_fdb_not_offloaded @@ -781,8 +774,8 @@ __offload_indication_join_vxlan_first() offload_indication_join_vxlan_first() { ip link add dev br0 up type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 __offload_indication_join_vxlan_first @@ -796,7 +789,7 @@ __offload_indication_join_vxlan_last() RET=0 - bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 + bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 ip link set dev $swp1 master br0 @@ -816,8 +809,8 @@ __offload_indication_join_vxlan_last() offload_indication_join_vxlan_last() { ip link add dev br0 up type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 __offload_indication_join_vxlan_last @@ -844,10 +837,10 @@ sanitization_vlan_aware_test() ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ - $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ - $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 # Test that when each VNI is mapped to a different VLAN we can enslave # a port to the bridge @@ -891,20 +884,20 @@ sanitization_vlan_aware_test() # Use the offload indication of the local route to ensure the VXLAN # configuration was correctly rollbacked. - ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo + ip address add 198.51.100.1/32 dev lo ip link set dev vxlan10 type vxlan ttl 10 ip link set dev $swp1 master br0 &> /dev/null check_fail $? busywait "$TIMEOUT" not wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? log_test "vlan-aware - failed enslavement to bridge due to conflict" ip link set dev vxlan10 type vxlan ttl 20 - ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo + ip address del 198.51.100.1/32 dev lo ip link del dev vxlan20 ip link del dev vxlan10 @@ -923,12 +916,12 @@ offload_indication_vlan_aware_setup_create() bridge vlan add vid 10 dev $swp1 bridge vlan add vid 20 dev $swp1 - ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo + ip address add 198.51.100.1/32 dev lo ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ - $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ - $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 bridge vlan add vid 10 dev vxlan10 pvid untagged bridge vlan add vid 20 dev vxlan20 pvid untagged @@ -942,7 +935,7 @@ offload_indication_vlan_aware_setup_destroy() ip link del dev vxlan20 ip link del dev vxlan10 - ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo + ip address del 198.51.100.1/32 dev lo bridge vlan del vid 20 dev $swp1 bridge vlan del vid 10 dev $swp1 @@ -959,7 +952,7 @@ offload_indication_vlan_aware_fdb_test() log_info "vxlan entry offload indication - vlan-aware" bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \ - dst $LOCAL_IP_2 vlan 10 + dst 198.51.100.2 vlan 10 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 @@ -1010,7 +1003,7 @@ offload_indication_vlan_aware_fdb_test() # marked as offloaded in both drivers RET=0 - bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst $LOCAL_IP_2 + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 check_err $? @@ -1028,7 +1021,7 @@ offload_indication_vlan_aware_decap_route_test() RET=0 busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? # Toggle PVID flag on one VxLAN device and make sure route is still @@ -1036,7 +1029,7 @@ offload_indication_vlan_aware_decap_route_test() bridge vlan add vid 10 dev vxlan10 untagged busywait "$TIMEOUT" wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? # Toggle PVID flag on second VxLAN device and make sure route is no @@ -1044,15 +1037,14 @@ offload_indication_vlan_aware_decap_route_test() bridge vlan add vid 20 dev vxlan20 untagged busywait "$TIMEOUT" not wait_for_offload \ - ip $IP_FLAG route show table local $LOCAL_IP_1 + ip route show table local 198.51.100.1 check_err $? # Toggle PVID flag back and make sure route is marked as offloaded bridge vlan add vid 10 dev vxlan10 pvid untagged bridge vlan add vid 20 dev vxlan20 pvid untagged - busywait "$TIMEOUT" wait_for_offload ip $IP_FLAG route show table local \ - $LOCAL_IP_1 + busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - vni map/unmap" @@ -1062,8 +1054,8 @@ offload_indication_vlan_aware_join_vxlan_first() { ip link add dev br0 up type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 1 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 __offload_indication_join_vxlan_first 1 @@ -1075,8 +1067,8 @@ offload_indication_vlan_aware_join_vxlan_last() { ip link add dev br0 up type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 1 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 __offload_indication_join_vxlan_last @@ -1093,14 +1085,14 @@ offload_indication_vlan_aware_l3vni_test() sysctl_set net.ipv6.conf.default.disable_ipv6 1 ip link add dev br0 up type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 0 - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 ip link set dev $swp1 master br0 # The test will use the offload indication on the FDB entry to # understand if the tunnel is offloaded or not - bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 + bridge fdb append $zmac dev vxlan0 self dst 192.0.2.1 ip link set dev vxlan0 master br0 bridge vlan add dev vxlan0 vid 10 pvid untagged diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh index 38148f5187..749ba3cfda 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh @@ -4,21 +4,6 @@ # Test vetoing of FDB entries that mlxsw can not offload. This exercises several # different veto vectors to test various rollback scenarios in the vxlan driver. -: ${LOCAL_IP:=198.51.100.1} -export LOCAL_IP - -: ${REMOTE_IP_1:=198.51.100.2} -export REMOTE_IP_1 - -: ${REMOTE_IP_2:=198.51.100.3} -export REMOTE_IP_2 - -: ${UDPCSUM_FLAFS:=noudpcsum} -export UDPCSUM_FLAFS - -: ${MC_IP:=224.0.0.1} -export MC_IP - lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" @@ -41,8 +26,8 @@ setup_prepare() ip link set dev $swp1 master br0 ip link set dev $swp2 up - ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ - ttl 20 tos inherit local $LOCAL_IP dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 ip link set dev vxlan0 master br0 } @@ -65,11 +50,11 @@ fdb_create_veto_test() RET=0 bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \ - dst $REMOTE_IP_1 2>/dev/null + dst 198.51.100.2 2>/dev/null check_fail $? "multicast MAC not rejected" bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \ - dst $REMOTE_IP_1 2>&1 >/dev/null | grep -q mlxsw_spectrum + dst 198.51.100.2 2>&1 >/dev/null | grep -q mlxsw_spectrum check_err $? "multicast MAC rejected without extack" log_test "vxlan FDB veto - create" @@ -80,15 +65,15 @@ fdb_replace_veto_test() RET=0 bridge fdb add 00:01:02:03:04:05 dev vxlan0 self static \ - dst $REMOTE_IP_1 + dst 198.51.100.2 check_err $? "valid FDB rejected" bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \ - dst $REMOTE_IP_1 port 1234 2>/dev/null + dst 198.51.100.2 port 1234 2>/dev/null check_fail $? "FDB with an explicit port not rejected" bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \ - dst $REMOTE_IP_1 port 1234 2>&1 >/dev/null \ + dst 198.51.100.2 port 1234 2>&1 >/dev/null \ | grep -q mlxsw_spectrum check_err $? "FDB with an explicit port rejected without extack" @@ -100,15 +85,15 @@ fdb_append_veto_test() RET=0 bridge fdb add 00:00:00:00:00:00 dev vxlan0 self static \ - dst $REMOTE_IP_1 + dst 198.51.100.2 check_err $? "valid FDB rejected" bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \ - dst $REMOTE_IP_2 port 1234 2>/dev/null + dst 198.51.100.3 port 1234 2>/dev/null check_fail $? "FDB with an explicit port not rejected" bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \ - dst $REMOTE_IP_2 port 1234 2>&1 >/dev/null \ + dst 198.51.100.3 port 1234 2>&1 >/dev/null \ | grep -q mlxsw_spectrum check_err $? "FDB with an explicit port rejected without extack" @@ -120,11 +105,11 @@ fdb_changelink_veto_test() RET=0 ip link set dev vxlan0 type vxlan \ - group $MC_IP dev lo 2>/dev/null + group 224.0.0.1 dev lo 2>/dev/null check_fail $? "FDB with a multicast IP not rejected" ip link set dev vxlan0 type vxlan \ - group $MC_IP dev lo 2>&1 >/dev/null \ + group 224.0.0.1 dev lo 2>&1 >/dev/null \ | grep -q mlxsw_spectrum check_err $? "FDB with a multicast IP rejected without extack" diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh index 922744059a..7ca1f030d2 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh @@ -50,7 +50,7 @@ function make_netdev { modprobe netdevsim fi - echo $NSIM_ID $@ > /sys/bus/netdevsim/new_device + echo $NSIM_ID > /sys/bus/netdevsim/new_device # get new device name ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/ } diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index eaf8a04a7c..f7d84549cc 100644 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -156,11 +156,6 @@ create_tcam_skeleton() setup_prepare() { - ip link set $eth0 up - ip link set $eth1 up - ip link set $eth2 up - ip link set $eth3 up - create_tcam_skeleton $eth0 ip link add br0 type bridge @@ -247,9 +242,9 @@ test_vlan_push() tcpdump_cleanup } -test_vlan_ingress_modify() +test_vlan_modify() { - printf "Testing ingress VLAN modification.. " + printf "Testing VLAN modification.. " ip link set br0 type bridge vlan_filtering 1 bridge vlan add dev $eth0 vid 200 @@ -285,44 +280,6 @@ test_vlan_ingress_modify() ip link set br0 type bridge vlan_filtering 0 } -test_vlan_egress_modify() -{ - printf "Testing egress VLAN modification.. " - - tc qdisc add dev $eth1 clsact - - ip link set br0 type bridge vlan_filtering 1 - bridge vlan add dev $eth0 vid 200 - bridge vlan add dev $eth1 vid 200 - - tc filter add dev $eth1 egress chain $(ES0) pref 3 \ - protocol 802.1Q flower skip_sw vlan_id 200 vlan_prio 0 \ - action vlan modify id 300 priority 7 - - tcpdump_start $eth2 - - $MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip - - sleep 1 - - tcpdump_stop - - if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then - echo "OK" - else - echo "FAIL" - fi - - tcpdump_cleanup - - tc filter del dev $eth1 egress chain $(ES0) pref 3 - tc qdisc del dev $eth1 clsact - - bridge vlan del dev $eth0 vid 200 - bridge vlan del dev $eth1 vid 200 - ip link set br0 type bridge vlan_filtering 0 -} - test_skbedit_priority() { local num_pkts=100 @@ -347,8 +304,7 @@ trap cleanup EXIT ALL_TESTS=" test_vlan_pop test_vlan_push - test_vlan_ingress_modify - test_vlan_egress_modify + test_vlan_modify test_skbedit_priority " diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index c3311c8c40..8ec1922e97 100644 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -428,7 +428,7 @@ for t in $TEST_CASES; do exit 1 fi done -(cd $TRACING_DIR; finish_ftrace) # for cleanup +(cd $TRACING_DIR; initialize_ftrace) # for cleanup prlog "" prlog "# of passed: " `echo $PASSED_CASES | wc -w` diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 5f6cbec847..000fd05e84 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -124,22 +124,10 @@ initialize_ftrace() { # Reset ftrace to initial-state [ -f uprobe_events ] && echo > uprobe_events [ -f synthetic_events ] && echo > synthetic_events [ -f snapshot ] && echo 0 > snapshot - -# Stop tracing while reading the trace file by default, to prevent -# the test results while checking it and to avoid taking a long time -# to check the result. - [ -f options/pause-on-trace ] && echo 1 > options/pause-on-trace - clear_trace enable_tracing } -finish_ftrace() { - initialize_ftrace -# And recover it to default. - [ -f options/pause-on-trace ] && echo 0 > options/pause-on-trace -} - check_requires() { # Check required files and tracers for i in "$@" ; do r=${i%:README} diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index dc7ade1967..84285a6f60 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -22,9 +22,6 @@ ppc64*) ppc*) ARG1=%r3 ;; -s390*) - ARG1=%r2 -;; *) echo "Please implement other architecture here" exit_untested diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc index 47d84b5cb6..474ca1a9a0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -32,10 +32,6 @@ ppc*) GOODREG=%r3 BADREG=%msr ;; -s390*) - GOODREG=%r2 - BADREG=%s2 -;; *) echo "Please implement other architecture here" exit_untested diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index fbcbdb6963..0e78b49d0f 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -8,4 +8,3 @@ futex_wait_uninitialized_heap futex_wait_wouldblock futex_wait futex_requeue -futex_waitv diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 5cc38de9d8..bd1fec59e0 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -17,8 +17,7 @@ TEST_GEN_FILES := \ futex_wait_uninitialized_heap \ futex_wait_private_mapped_file \ futex_wait \ - futex_requeue \ - futex_waitv + futex_requeue TEST_PROGS := run.sh diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c index 3651ce17be..1f8f6daaf1 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -17,7 +17,6 @@ #include #include "futextest.h" -#include "futex2test.h" #include "logging.h" #define TEST_NAME "futex-wait-timeout" @@ -97,12 +96,6 @@ int main(int argc, char *argv[]) struct timespec to; pthread_t thread; int c; - struct futex_waitv waitv = { - .uaddr = (uintptr_t)&f1, - .val = f1, - .flags = FUTEX_32, - .__reserved = 0 - }; while ((c = getopt(argc, argv, "cht:v:")) != -1) { switch (c) { @@ -125,7 +118,7 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(9); + ksft_set_plan(7); ksft_print_msg("%s: Block on a futex and wait for timeout\n", basename(argv[0])); ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); @@ -182,18 +175,6 @@ int main(int argc, char *argv[]) res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME); test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS); - /* futex_waitv with CLOCK_MONOTONIC */ - if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) - return RET_FAIL; - res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); - test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT); - - /* futex_waitv with CLOCK_REALTIME */ - if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; - res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME); - test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT); - ksft_print_cnts(); return ret; } diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 7d7a6a06cd..0ae390ff81 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -22,7 +22,6 @@ #include #include #include "futextest.h" -#include "futex2test.h" #include "logging.h" #define TEST_NAME "futex-wait-wouldblock" @@ -43,12 +42,6 @@ int main(int argc, char *argv[]) futex_t f1 = FUTEX_INITIALIZER; int res, ret = RET_PASS; int c; - struct futex_waitv waitv = { - .uaddr = (uintptr_t)&f1, - .val = f1+1, - .flags = FUTEX_32, - .__reserved = 0 - }; while ((c = getopt(argc, argv, "cht:v:")) != -1) { switch (c) { @@ -68,44 +61,18 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(2); + ksft_set_plan(1); ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", basename(argv[0])); info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG); if (!res || errno != EWOULDBLOCK) { - ksft_test_result_fail("futex_wait returned: %d %s\n", - res ? errno : res, - res ? strerror(errno) : ""); + fail("futex_wait returned: %d %s\n", + res ? errno : res, res ? strerror(errno) : ""); ret = RET_FAIL; - } else { - ksft_test_result_pass("futex_wait\n"); } - if (clock_gettime(CLOCK_MONOTONIC, &to)) { - error("clock_gettime failed\n", errno); - return errno; - } - - to.tv_nsec += timeout_ns; - - if (to.tv_nsec >= 1000000000) { - to.tv_sec++; - to.tv_nsec -= 1000000000; - } - - info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); - res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); - if (!res || errno != EWOULDBLOCK) { - ksft_test_result_pass("futex_waitv returned: %d %s\n", - res ? errno : res, - res ? strerror(errno) : ""); - ret = RET_FAIL; - } else { - ksft_test_result_pass("futex_waitv\n"); - } - - ksft_print_cnts(); + print_result(TEST_NAME, ret); return ret; } diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 5ccd599da6..11a9d62290 100644 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -79,6 +79,3 @@ echo echo ./futex_requeue $COLOR - -echo -./futex_waitv $COLOR diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore index ededb077a3..a4969f7ee0 100644 --- a/tools/testing/selftests/gpio/.gitignore +++ b/tools/testing/selftests/gpio/.gitignore @@ -1,4 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only gpio-mockup-cdev -gpio-chip-info -gpio-line-name diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 71b3066023..42ea7d2aa8 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := gpio-mockup.sh gpio-sim.sh +TEST_PROGS := gpio-mockup.sh TEST_FILES := gpio-mockup-sysfs.sh -TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name -CFLAGS += -O2 -g -Wall -I../../../../usr/include/ +TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev +CFLAGS += -I../../../../usr/include include ../lib.mk diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config index 409a8532fa..ce100342c2 100644 --- a/tools/testing/selftests/gpio/config +++ b/tools/testing/selftests/gpio/config @@ -1,4 +1,3 @@ CONFIG_GPIOLIB=y CONFIG_GPIO_CDEV=y CONFIG_GPIO_MOCKUP=m -CONFIG_GPIO_SIM=m diff --git a/tools/testing/selftests/gpio/gpio-mockup-cdev.c b/tools/testing/selftests/gpio/gpio-mockup-cdev.c index d1640f44f8..e83eac7162 100644 --- a/tools/testing/selftests/gpio/gpio-mockup-cdev.c +++ b/tools/testing/selftests/gpio/gpio-mockup-cdev.c @@ -117,7 +117,7 @@ int main(int argc, char *argv[]) { char *chip; int opt, ret, cfd, lfd; - unsigned int offset, val = 0, abiv; + unsigned int offset, val, abiv; uint32_t flags_v1; uint64_t flags_v2; diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c index f4a15cbdd5..af7f9c7d59 100644 --- a/tools/testing/selftests/ir/ir_loopback.c +++ b/tools/testing/selftests/ir/ir_loopback.c @@ -26,19 +26,10 @@ #include "../kselftest.h" #define TEST_SCANCODES 10 +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #define SYSFS_PATH_MAX 256 #define DNAME_PATH_MAX 256 -/* - * Support ancient lirc.h which does not have these values. Can be removed - * once RHEL 8 is no longer a relevant testing platform. - */ -#if RC_PROTO_MAX < 26 -#define RC_PROTO_RCMM12 24 -#define RC_PROTO_RCMM24 25 -#define RC_PROTO_RCMM32 26 -#endif - static const struct { enum rc_proto proto; const char *name; diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile index 806a150648..aa91d20632 100644 --- a/tools/testing/selftests/kexec/Makefile +++ b/tools/testing/selftests/kexec/Makefile @@ -4,7 +4,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -ifeq ($(ARCH),$(filter $(ARCH),x86 ppc64le)) +ifeq ($(ARCH),x86) TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh TEST_FILES := kexec_common_lib.sh diff --git a/tools/testing/selftests/kexec/kexec_common_lib.sh b/tools/testing/selftests/kexec/kexec_common_lib.sh index 0e114b34d5..43017cfe88 100644 --- a/tools/testing/selftests/kexec/kexec_common_lib.sh +++ b/tools/testing/selftests/kexec/kexec_common_lib.sh @@ -91,27 +91,6 @@ get_efi_var_secureboot_mode() return 0; } -# On powerpc platform, check device-tree property -# /proc/device-tree/ibm,secureboot/os-secureboot-enforcing -# to detect secureboot state. -get_ppc64_secureboot_mode() -{ - local secure_boot_file="/proc/device-tree/ibm,secureboot/os-secureboot-enforcing" - # Check for secure boot file existence - if [ -f $secure_boot_file ]; then - log_info "Secureboot is enabled (Device tree)" - return 1; - fi - log_info "Secureboot is not enabled (Device tree)" - return 0; -} - -# Return the architecture of the system -get_arch() -{ - echo $(arch) -} - # Check efivar SecureBoot-$(the UUID) and SetupMode-$(the UUID). # The secure boot mode can be accessed either as the last integer # of "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*" or from @@ -121,19 +100,14 @@ get_arch() get_secureboot_mode() { local secureboot_mode=0 - local system_arch=$(get_arch) - if [ "$system_arch" == "ppc64le" ]; then - get_ppc64_secureboot_mode + get_efivarfs_secureboot_mode + secureboot_mode=$? + + # fallback to using the efi_var files + if [ $secureboot_mode -eq 0 ]; then + get_efi_var_secureboot_mode secureboot_mode=$? - else - get_efivarfs_secureboot_mode - secureboot_mode=$? - # fallback to using the efi_var files - if [ $secureboot_mode -eq 0 ]; then - get_efi_var_secureboot_mode - secureboot_mode=$? - fi fi if [ $secureboot_mode -eq 0 ]; then @@ -164,20 +138,15 @@ kconfig_enabled() return 0 } -# Attempt to get the kernel config first by checking the modules directory -# then via proc, and finally by extracting it from the kernel image or the -# configs.ko using scripts/extract-ikconfig. +# Attempt to get the kernel config first via proc, and then by +# extracting it from the kernel image or the configs.ko using +# scripts/extract-ikconfig. # Return 1 for found. get_kconfig() { local proc_config="/proc/config.gz" local module_dir="/lib/modules/`uname -r`" - local configs_module="$module_dir/kernel/kernel/configs.ko*" - - if [ -f $module_dir/config ]; then - IKCONFIG=$module_dir/config - return 1 - fi + local configs_module="$module_dir/kernel/kernel/configs.ko" if [ ! -f $proc_config ]; then modprobe configs > /dev/null 2>&1 diff --git a/tools/testing/selftests/kexec/test_kexec_file_load.sh b/tools/testing/selftests/kexec/test_kexec_file_load.sh index c9ccb3c93d..2ff600388c 100644 --- a/tools/testing/selftests/kexec/test_kexec_file_load.sh +++ b/tools/testing/selftests/kexec/test_kexec_file_load.sh @@ -97,11 +97,10 @@ check_for_imasig() check_for_modsig() { local module_sig_string="~Module signature appended~" + local sig="$(tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE)" local ret=0 - tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE | \ - grep -q "$module_sig_string" - if [ $? -eq 0 ]; then + if [ "$sig" == "$module_sig_string" ]; then ret=1 log_info "kexec kernel image modsig signed" else @@ -226,12 +225,8 @@ get_secureboot_mode secureboot=$? # Are there pe and ima signatures -if [ "$(get_arch)" == 'ppc64le' ]; then - pe_signed=0 -else - check_for_pesig - pe_signed=$? -fi +check_for_pesig +pe_signed=$? check_for_imasig ima_signed=$? diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index f118098749..8d50483fe2 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -48,10 +48,6 @@ #include #include -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) -#endif - /* define kselftest exit codes */ #define KSFT_PASS 0 #define KSFT_FAIL 1 diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index a9ba782d8c..cc9c846585 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -33,9 +33,9 @@ tap_timeout() { # Make sure tests will time out if utility is available. if [ -x /usr/bin/timeout ] ; then - /usr/bin/timeout --foreground "$kselftest_timeout" $1 + /usr/bin/timeout --foreground "$kselftest_timeout" "$1" else - $1 + "$1" fi } @@ -65,25 +65,17 @@ run_one() TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST" echo "# $TEST_HDR_MSG" - if [ ! -e "$TEST" ]; then - echo "# Warning: file $TEST is missing!" + if [ ! -x "$TEST" ]; then + echo -n "# Warning: file $TEST is " + if [ ! -e "$TEST" ]; then + echo "missing!" + else + echo "not executable, correct this." + fi echo "not ok $test_num $TEST_HDR_MSG" else - cmd="./$BASENAME_TEST" - if [ ! -x "$TEST" ]; then - echo "# Warning: file $TEST is not executable" - - if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ] - then - interpreter=$(head -n 1 "$TEST" | cut -c 3-) - cmd="$interpreter ./$BASENAME_TEST" - else - echo "not ok $test_num $TEST_HDR_MSG" - return - fi - fi cd `dirname $TEST` > /dev/null - ((((( tap_timeout "$cmd" 2>&1; echo $? >&3) | + ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) | tap_prefix >&4) 3>&1) | (read xs; exit $xs)) 4>>"$logfile" && echo "ok $test_num $TEST_HDR_MSG") || diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 11779405dc..78e59620d2 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -671,9 +671,7 @@ #define EXPECT_STRNE(expected, seen) \ __EXPECT_STR(expected, seen, !=, 0) -#ifndef ARRAY_SIZE #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) -#endif /* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is * not thread-safe, but it should be fine in most sane test scenarios. diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index dce7de7755..b8dbabe24a 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,21 +1,17 @@ # SPDX-License-Identifier: GPL-2.0-only -/aarch64/arch_timer /aarch64/debug-exceptions /aarch64/get-reg-list /aarch64/psci_cpu_on_test /aarch64/vgic_init -/aarch64/vgic_irq /s390x/memop /s390x/resets /s390x/sync_regs_test -/x86_64/amx_test -/x86_64/cpuid_test /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test /x86_64/emulator_error_test +/x86_64/get_cpuid_test /x86_64/get_msr_index_features -/x86_64/kvm_clock_test /x86_64/kvm_pv_test /x86_64/hyperv_clock /x86_64/hyperv_cpuid @@ -23,23 +19,18 @@ /x86_64/mmio_warning_test /x86_64/mmu_role_test /x86_64/platform_info_test -/x86_64/pmu_event_filter_test /x86_64/set_boot_cpu_id /x86_64/set_sregs_test -/x86_64/sev_migrate_tests /x86_64/smm_test /x86_64/state_test /x86_64/svm_vmcall_test /x86_64/svm_int_ctl_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test -/x86_64/userspace_io_test /x86_64/userspace_msr_exit_test /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test -/x86_64/vmx_exception_with_invalid_guest_state -/x86_64/vmx_invalid_nested_guest_state /x86_64/vmx_preemption_timer_test /x86_64/vmx_set_nested_state_test /x86_64/vmx_tsc_adjust_test @@ -62,4 +53,3 @@ /set_memory_region_test /steal_time /kvm_binary_stats_test -/system_counter_offset_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 17c3f0749f..d1774f4613 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -32,31 +32,24 @@ endif ifeq ($(ARCH),s390) UNAME_M := s390x endif -# Set UNAME_M riscv compile/install to work -ifeq ($(ARCH),riscv) - UNAME_M := riscv -endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S -LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c +LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c -LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c -TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test +TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features -TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test @@ -65,13 +58,10 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test @@ -82,8 +72,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests -TEST_GEN_PROGS_x86_64 += x86_64/amx_test TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test @@ -97,21 +85,16 @@ TEST_GEN_PROGS_x86_64 += rseq_test TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test -TEST_GEN_PROGS_x86_64 += system_counter_offset_test -TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test TEST_GEN_PROGS_aarch64 += aarch64/vgic_init -TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += kvm_page_table_test -TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test -TEST_GEN_PROGS_aarch64 += memslot_perf_test TEST_GEN_PROGS_aarch64 += rseq_test TEST_GEN_PROGS_aarch64 += set_memory_region_test TEST_GEN_PROGS_aarch64 += steal_time @@ -128,13 +111,6 @@ TEST_GEN_PROGS_s390x += rseq_test TEST_GEN_PROGS_s390x += set_memory_region_test TEST_GEN_PROGS_s390x += kvm_binary_stats_test -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test - TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) @@ -149,7 +125,7 @@ endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ - -I$(> 16) << 16) | ((uint64_t)(flags) << 12) | index) #define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) #define GICR_TYPER 0x8 -#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2) -#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3) - struct vm_gic { struct kvm_vm *vm; int gic_fd; - uint32_t gic_dev_type; }; -static uint64_t max_phys_size; +static int max_ipa_bits; /* helper to access a redistributor register */ -static int access_v3_redist_reg(int gicv3_fd, int vcpu, int offset, - uint32_t *val, bool write) +static int access_redist_reg(int gicv3_fd, int vcpu, int offset, + uint32_t *val, bool write) { uint64_t attr = REG_OFFSET(vcpu, offset); @@ -61,13 +58,12 @@ static int run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) return 0; } -static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, uint32_t nr_vcpus) +static struct vm_gic vm_gic_create(void) { struct vm_gic v; - v.gic_dev_type = gic_dev_type; - v.vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); - v.gic_fd = kvm_create_device(v.vm, gic_dev_type, false); + v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL); + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); return v; } @@ -78,129 +74,78 @@ static void vm_gic_destroy(struct vm_gic *v) kvm_vm_free(v->vm); } -struct vgic_region_attr { - uint64_t attr; - uint64_t size; - uint64_t alignment; -}; - -struct vgic_region_attr gic_v3_dist_region = { - .attr = KVM_VGIC_V3_ADDR_TYPE_DIST, - .size = 0x10000, - .alignment = 0x10000, -}; - -struct vgic_region_attr gic_v3_redist_region = { - .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST, - .size = NR_VCPUS * 0x20000, - .alignment = 0x10000, -}; - -struct vgic_region_attr gic_v2_dist_region = { - .attr = KVM_VGIC_V2_ADDR_TYPE_DIST, - .size = 0x1000, - .alignment = 0x1000, -}; - -struct vgic_region_attr gic_v2_cpu_region = { - .attr = KVM_VGIC_V2_ADDR_TYPE_CPU, - .size = 0x2000, - .alignment = 0x1000, -}; - /** - * Helper routine that performs KVM device tests in general. Eventually the - * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping - * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be - * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0 - * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a - * DIST region @0x1000. + * Helper routine that performs KVM device tests in general and + * especially ARM_VGIC_V3 ones. Eventually the ARM_VGIC_V3 + * device gets created, a legacy RDIST region is set at @0x0 + * and a DIST region is set @0x60000 */ static void subtest_dist_rdist(struct vm_gic *v) { int ret; uint64_t addr; - struct vgic_region_attr rdist; /* CPU interface in GICv2*/ - struct vgic_region_attr dist; - - rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region - : gic_v2_cpu_region; - dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region - : gic_v2_dist_region; /* Check existing group/attributes */ kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - dist.attr); + KVM_VGIC_V3_ADDR_TYPE_DIST); kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr); + KVM_VGIC_V3_ADDR_TYPE_REDIST); /* check non existing attribute */ - ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1); + ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, 0); TEST_ASSERT(ret && errno == ENXIO, "attribute not supported"); /* misaligned DIST and REDIST address settings */ - addr = dist.alignment / 0x10; + addr = 0x1000; ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - dist.attr, &addr, true); - TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned"); + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "GICv3 dist base not 64kB aligned"); - addr = rdist.alignment / 0x10; ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr, true); - TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned"); + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "GICv3 redist base not 64kB aligned"); /* out of range address */ - addr = max_phys_size; - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - dist.attr, &addr, true); - TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit"); + if (max_ipa_bits) { + addr = 1ULL << max_ipa_bits; + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true); + TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit"); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr, true); - TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit"); - - /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */ - addr = max_phys_size - dist.alignment; - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr, true); - TEST_ASSERT(ret && errno == E2BIG, - "half of the redist is beyond IPA limit"); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit"); + } /* set REDIST base address @0x0*/ addr = 0x00000; kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr, true); + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); /* Attempt to create a second legacy redistributor region */ addr = 0xE0000; ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr, true); - TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again"); + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + TEST_ASSERT(ret && errno == EEXIST, "GICv3 redist base set again"); - ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST); - if (!ret) { - /* Attempt to mix legacy and new redistributor regions */ - addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, - &addr, true); - TEST_ASSERT(ret && errno == EINVAL, - "attempt to mix GICv3 REDIST and REDIST_REGION"); - } + /* Attempt to mix legacy and new redistributor regions */ + addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "attempt to mix GICv3 REDIST and REDIST_REGION"); /* * Set overlapping DIST / REDIST, cannot be detected here. Will be detected * on first vcpu run instead. */ - addr = rdist.size - rdist.alignment; - kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - dist.attr, &addr, true); + addr = 3 * 2 * 0x10000; + kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST, + &addr, true); } /* Test the new REDIST region API */ -static void subtest_v3_redist_regions(struct vm_gic *v) +static void subtest_redist_regions(struct vm_gic *v) { uint64_t addr, expected_addr; int ret; @@ -254,19 +199,12 @@ static void subtest_v3_redist_regions(struct vm_gic *v) kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); - addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2); + addr = REDIST_REGION_ATTR_ADDR(1, 1ULL << max_ipa_bits, 0, 2); ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); TEST_ASSERT(ret && errno == E2BIG, "register redist region with base address beyond IPA range"); - /* The last redist is above the pa range. */ - addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); - TEST_ASSERT(ret && errno == E2BIG, - "register redist region with top address beyond IPA range"); - addr = 0x260000; ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); @@ -311,12 +249,13 @@ static void subtest_v3_redist_regions(struct vm_gic *v) * VGIC KVM device is created and initialized before the secondary CPUs * get created */ -static void test_vgic_then_vcpus(uint32_t gic_dev_type) +static void test_vgic_then_vcpus(void) { struct vm_gic v; int ret, i; - v = vm_gic_create_with_vcpus(gic_dev_type, 1); + v.vm = vm_create_default(0, 0, guest_code); + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); subtest_dist_rdist(&v); @@ -331,12 +270,12 @@ static void test_vgic_then_vcpus(uint32_t gic_dev_type) } /* All the VCPUs are created before the VGIC KVM device gets initialized */ -static void test_vcpus_then_vgic(uint32_t gic_dev_type) +static void test_vcpus_then_vgic(void) { struct vm_gic v; int ret; - v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS); + v = vm_gic_create(); subtest_dist_rdist(&v); @@ -346,15 +285,15 @@ static void test_vcpus_then_vgic(uint32_t gic_dev_type) vm_gic_destroy(&v); } -static void test_v3_new_redist_regions(void) +static void test_new_redist_regions(void) { void *dummy = NULL; struct vm_gic v; uint64_t addr; int ret; - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS); - subtest_v3_redist_regions(&v); + v = vm_gic_create(); + subtest_redist_regions(&v); kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); @@ -364,8 +303,8 @@ static void test_v3_new_redist_regions(void) /* step2 */ - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS); - subtest_v3_redist_regions(&v); + v = vm_gic_create(); + subtest_redist_regions(&v); addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2); kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, @@ -378,8 +317,8 @@ static void test_v3_new_redist_regions(void) /* step 3 */ - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS); - subtest_v3_redist_regions(&v); + v = vm_gic_create(); + subtest_redist_regions(&v); _kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy, true); @@ -399,7 +338,7 @@ static void test_v3_new_redist_regions(void) vm_gic_destroy(&v); } -static void test_v3_typer_accesses(void) +static void test_typer_accesses(void) { struct vm_gic v; uint64_t addr; @@ -412,12 +351,12 @@ static void test_v3_typer_accesses(void) vm_vcpu_add_default(v.vm, 3, guest_code); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); TEST_ASSERT(ret && errno == EINVAL, "attempting to read GICR_TYPER of non created vcpu"); vm_vcpu_add_default(v.vm, 1, guest_code); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); TEST_ASSERT(ret && errno == EBUSY, "read GICR_TYPER before GIC initialized"); vm_vcpu_add_default(v.vm, 2, guest_code); @@ -426,7 +365,7 @@ static void test_v3_typer_accesses(void) KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); for (i = 0; i < NR_VCPUS ; i++) { - ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); TEST_ASSERT(!ret && !val, "read GICR_TYPER before rdist region setting"); } @@ -435,10 +374,10 @@ static void test_v3_typer_accesses(void) KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); /* The 2 first rdists should be put there (vcpu 0 and 3) */ - ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); TEST_ASSERT(!ret && !val, "read typer of rdist #0"); - ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #1"); addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1); @@ -446,11 +385,11 @@ static void test_v3_typer_accesses(void) KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region"); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x100, "no redist region attached to vcpu #1 yet, last cannot be returned"); - ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x200, "no redist region attached to vcpu #2, last cannot be returned"); @@ -458,10 +397,10 @@ static void test_v3_typer_accesses(void) kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1"); - ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x210, "read typer of rdist #1, last properly returned"); @@ -478,7 +417,7 @@ static void test_v3_typer_accesses(void) * rdist region #2 @0x200000 2 rdist capacity * rdists: 1, 2 */ -static void test_v3_last_bit_redist_regions(void) +static void test_last_bit_redist_regions(void) { uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; struct vm_gic v; @@ -505,29 +444,29 @@ static void test_v3_last_bit_redist_regions(void) kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); - ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0"); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1"); - ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x200, "read typer of rdist #2"); - ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #3"); - ret = access_v3_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #5"); - ret = access_v3_redist_reg(v.gic_fd, 4, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 4, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x410, "read typer of rdist #4"); vm_gic_destroy(&v); } /* Test last bit with legacy region */ -static void test_v3_last_bit_single_rdist(void) +static void test_last_bit_single_rdist(void) { uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; struct vm_gic v; @@ -546,106 +485,28 @@ static void test_v3_last_bit_single_rdist(void) kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); - ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0"); - ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x300, "read typer of rdist #1"); - ret = access_v3_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #2"); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #3"); - ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); TEST_ASSERT(!ret && val == 0x210, "read typer of rdist #3"); vm_gic_destroy(&v); } -/* Uses the legacy REDIST region API. */ -static void test_v3_redist_ipa_range_check_at_vcpu_run(void) -{ - struct vm_gic v; - int ret, i; - uint64_t addr; - - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1); - - /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */ - addr = max_phys_size - (3 * 2 * 0x10000); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); - - addr = 0x00000; - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true); - - /* Add the rest of the VCPUs */ - for (i = 1; i < NR_VCPUS; ++i) - vm_vcpu_add_default(v.vm, i, guest_code); - - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); - - /* Attempt to run a vcpu without enough redist space. */ - ret = run_vcpu(v.vm, 2); - TEST_ASSERT(ret && errno == EINVAL, - "redist base+size above PA range detected on 1st vcpu run"); - - vm_gic_destroy(&v); -} - -static void test_v3_its_region(void) -{ - struct vm_gic v; - uint64_t addr; - int its_fd, ret; - - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS); - its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS, false); - - addr = 0x401000; - ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr, true); - TEST_ASSERT(ret && errno == EINVAL, - "ITS region with misaligned address"); - - addr = max_phys_size; - ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr, true); - TEST_ASSERT(ret && errno == E2BIG, - "register ITS region with base address beyond IPA range"); - - addr = max_phys_size - 0x10000; - ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr, true); - TEST_ASSERT(ret && errno == E2BIG, - "Half of ITS region is beyond IPA range"); - - /* This one succeeds setting the ITS base */ - addr = 0x400000; - kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr, true); - - addr = 0x300000; - ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr, true); - TEST_ASSERT(ret && errno == EEXIST, "ITS base set again"); - - close(its_fd); - vm_gic_destroy(&v); -} - -/* - * Returns 0 if it's possible to create GIC device of a given type (V2 or V3). - */ -int test_kvm_device(uint32_t gic_dev_type) +void test_kvm_device(void) { struct vm_gic v; int ret, fd; - uint32_t other; v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL); @@ -653,70 +514,38 @@ int test_kvm_device(uint32_t gic_dev_type) ret = _kvm_create_device(v.vm, 0, true, &fd); TEST_ASSERT(ret && errno == ENODEV, "unsupported device"); - /* trial mode */ - ret = _kvm_create_device(v.vm, gic_dev_type, true, &fd); - if (ret) - return ret; - v.gic_fd = kvm_create_device(v.vm, gic_dev_type, false); + /* trial mode with VGIC_V3 device */ + ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, true, &fd); + if (ret) { + print_skip("GICv3 not supported"); + exit(KSFT_SKIP); + } + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); - ret = _kvm_create_device(v.vm, gic_dev_type, false, &fd); - TEST_ASSERT(ret && errno == EEXIST, "create GIC device twice"); + ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false, &fd); + TEST_ASSERT(ret && errno == EEXIST, "create GICv3 device twice"); - kvm_create_device(v.vm, gic_dev_type, true); + kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, true); - /* try to create the other gic_dev_type */ - other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3 - : KVM_DEV_TYPE_ARM_VGIC_V2; - - if (!_kvm_create_device(v.vm, other, true, &fd)) { - ret = _kvm_create_device(v.vm, other, false, &fd); - TEST_ASSERT(ret && errno == EINVAL, - "create GIC device while other version exists"); + if (!_kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V2, true, &fd)) { + ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V2, false, &fd); + TEST_ASSERT(ret && errno == EINVAL, "create GICv2 while v3 exists"); } vm_gic_destroy(&v); - - return 0; -} - -void run_tests(uint32_t gic_dev_type) -{ - test_vcpus_then_vgic(gic_dev_type); - test_vgic_then_vcpus(gic_dev_type); - - if (VGIC_DEV_IS_V3(gic_dev_type)) { - test_v3_new_redist_regions(); - test_v3_typer_accesses(); - test_v3_last_bit_redist_regions(); - test_v3_last_bit_single_rdist(); - test_v3_redist_ipa_range_check_at_vcpu_run(); - test_v3_its_region(); - } } int main(int ac, char **av) { - int ret; - int pa_bits; + max_ipa_bits = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; - max_phys_size = 1ULL << pa_bits; + test_kvm_device(); + test_vcpus_then_vgic(); + test_vgic_then_vcpus(); + test_new_redist_regions(); + test_typer_accesses(); + test_last_bit_redist_regions(); + test_last_bit_single_rdist(); - ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3); - if (!ret) { - pr_info("Running GIC_v3 tests.\n"); - run_tests(KVM_DEV_TYPE_ARM_VGIC_V3); - return 0; - } - - ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2); - if (!ret) { - pr_info("Running GIC_v2 tests.\n"); - run_tests(KVM_DEV_TYPE_ARM_VGIC_V2); - return 0; - } - - print_skip("No GICv2 nor GICv3 support"); - exit(KSFT_SKIP); return 0; } diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index d890903231..5d95113c7b 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -47,7 +47,7 @@ #include "guest_modes.h" /* Global variable used to synchronize all of the vCPU threads. */ -static int iteration; +static int iteration = -1; /* Defines what vCPU threads should do during a given iteration. */ static enum { @@ -215,11 +215,12 @@ static bool spin_wait_for_next_iteration(int *current_iteration) return true; } -static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args) +static void *vcpu_thread_main(void *arg) { + struct perf_test_vcpu_args *vcpu_args = arg; struct kvm_vm *vm = perf_test_args.vm; int vcpu_id = vcpu_args->vcpu_id; - int current_iteration = 0; + int current_iteration = -1; while (spin_wait_for_next_iteration(¤t_iteration)) { switch (READ_ONCE(iteration_work)) { @@ -234,6 +235,8 @@ static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args) vcpu_last_completed_iteration[vcpu_id] = current_iteration; } + + return NULL; } static void spin_wait_for_vcpu(int vcpu_id, int target_iteration) @@ -274,7 +277,8 @@ static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description) static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access, const char *description) { - perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1); + perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1; + sync_global_to_guest(vm, perf_test_args); iteration_work = ITERATION_ACCESS_MEMORY; run_iteration(vm, vcpus, description); } @@ -292,16 +296,48 @@ static void mark_memory_idle(struct kvm_vm *vm, int vcpus) run_iteration(vm, vcpus, "Mark memory idle"); } +static pthread_t *create_vcpu_threads(int vcpus) +{ + pthread_t *vcpu_threads; + int i; + + vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0])); + TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads."); + + for (i = 0; i < vcpus; i++) { + vcpu_last_completed_iteration[i] = iteration; + pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main, + &perf_test_args.vcpu_args[i]); + } + + return vcpu_threads; +} + +static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus) +{ + int i; + + /* Set done to signal the vCPU threads to exit */ + done = true; + + for (i = 0; i < vcpus; i++) + pthread_join(vcpu_threads[i], NULL); +} + static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *params = arg; struct kvm_vm *vm; + pthread_t *vcpu_threads; int vcpus = params->vcpus; vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, 1, - params->backing_src, !overlap_memory_access); + params->backing_src); - perf_test_start_vcpu_threads(vcpus, vcpu_thread_main); + perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes, + !overlap_memory_access); + + vcpu_threads = create_vcpu_threads(vcpus); pr_info("\n"); access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory"); @@ -316,10 +352,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) mark_memory_idle(vm, vcpus); access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory"); - /* Set done to signal the vCPU threads to exit */ - done = true; - - perf_test_join_vcpu_threads(vcpus); + terminate_vcpu_threads(vcpu_threads, vcpus); + free(vcpu_threads); perf_test_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 6a719d0655..1510b21e63 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -42,9 +42,10 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static size_t demand_paging_size; static char *guest_data_prototype; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void *vcpu_worker(void *data) { int ret; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; @@ -67,6 +68,8 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) ts_diff = timespec_elapsed(start); PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, ts_diff.tv_sec, ts_diff.tv_nsec); + + return NULL; } static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) @@ -279,6 +282,7 @@ struct test_params { static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; + pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; struct timespec start; @@ -289,7 +293,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) int r; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, - p->src_type, p->partition_vcpu_memory_access); + p->src_type); + + perf_test_args.wr_fract = 1; demand_paging_size = get_backing_src_pagesz(p->src_type); @@ -298,6 +304,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) "Failed to allocate buffer for guest data pattern"); memset(guest_data_prototype, 0xAB, demand_paging_size); + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, + p->partition_vcpu_memory_access); + if (p->uffd_mode) { uffd_handler_threads = malloc(nr_vcpus * sizeof(*uffd_handler_threads)); @@ -310,15 +322,26 @@ static void run_test(enum vm_guest_mode mode, void *arg) TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - struct perf_test_vcpu_args *vcpu_args; + vm_paddr_t vcpu_gpa; void *vcpu_hva; void *vcpu_alias; + uint64_t vcpu_mem_size; - vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + if (p->partition_vcpu_memory_access) { + vcpu_gpa = guest_test_phys_mem + + (vcpu_id * guest_percpu_mem_size); + vcpu_mem_size = guest_percpu_mem_size; + } else { + vcpu_gpa = guest_test_phys_mem; + vcpu_mem_size = guest_percpu_mem_size * nr_vcpus; + } + PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size); /* Cache the host addresses of the region */ - vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); - vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa); + vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); + vcpu_alias = addr_gpa2alias(vm, vcpu_gpa); /* * Set up user fault fd to handle demand paging @@ -332,18 +355,32 @@ static void run_test(enum vm_guest_mode mode, void *arg) pipefds[vcpu_id * 2], p->uffd_mode, p->uffd_delay, &uffd_args[vcpu_id], vcpu_hva, vcpu_alias, - vcpu_args->pages * perf_test_args.guest_page_size); + vcpu_mem_size); } } + /* Export the shared variables to the guest */ + sync_global_to_guest(vm, perf_test_args); + pr_info("Finished creating vCPUs and starting uffd threads\n"); clock_gettime(CLOCK_MONOTONIC, &start); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &perf_test_args.vcpu_args[vcpu_id]); + } + pr_info("Started all vCPUs\n"); - perf_test_join_vcpu_threads(nr_vcpus); + /* Wait for the vcpu threads to quit */ + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + pthread_join(vcpu_threads[vcpu_id], NULL); + PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); + } + ts_diff = timespec_elapsed(start); + pr_info("All vCPU threads joined\n"); if (p->uffd_mode) { @@ -367,6 +404,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_destroy_vm(vm); free(guest_data_prototype); + free(vcpu_threads); if (p->uffd_mode) { free(uffd_handler_threads); free(uffd_args); diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 1954b964d1..7ffab5bd5c 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -31,7 +31,7 @@ static bool host_quit; static int iteration; static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void *vcpu_worker(void *data) { int ret; struct kvm_vm *vm = perf_test_args.vm; @@ -41,6 +41,7 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) struct timespec ts_diff; struct timespec total = (struct timespec){0}; struct timespec avg; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; run = vcpu_state(vm, vcpu_id); @@ -82,6 +83,8 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); + + return NULL; } struct test_params { @@ -167,6 +170,7 @@ static void free_bitmaps(unsigned long *bitmaps[], int slots) static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; + pthread_t *vcpu_threads; struct kvm_vm *vm; unsigned long **bitmaps; uint64_t guest_num_pages; @@ -182,10 +186,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct timespec clear_dirty_log_total = (struct timespec){0}; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, - p->slots, p->backing_src, - p->partition_vcpu_memory_access); + p->slots, p->backing_src); - perf_test_set_wr_fract(vm, p->wr_fract); + perf_test_args.wr_fract = p->wr_fract; guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -200,15 +203,25 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, &cap); } + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, + p->partition_vcpu_memory_access); + + sync_global_to_guest(vm, perf_test_args); + /* Start the iterations */ iteration = 0; host_quit = false; clock_gettime(CLOCK_MONOTONIC, &start); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { vcpu_last_completed_iteration[vcpu_id] = -1; - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &perf_test_args.vcpu_args[vcpu_id]); + } /* Allow the vCPUs to populate memory */ pr_debug("Starting iteration %d - Populating\n", iteration); @@ -277,7 +290,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Tell the vcpu thread to quit */ host_quit = true; - perf_test_join_vcpu_threads(nr_vcpus); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", @@ -292,6 +306,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) } free_bitmaps(bitmaps, p->slots); + free(vcpu_threads); perf_test_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 3fcd89e195..792c60e1b1 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -115,7 +115,7 @@ static void guest_code(void) addr = guest_test_virt_mem; addr += (READ_ONCE(random_array[i]) % guest_num_pages) * guest_page_size; - addr = align_down(addr, host_page_size); + addr &= ~(host_page_size - 1); *(uint64_t *)addr = READ_ONCE(iteration); } @@ -737,14 +737,14 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (!p->phys_offset) { guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * guest_page_size; - guest_test_phys_mem = align_down(guest_test_phys_mem, host_page_size); + guest_test_phys_mem &= ~(host_page_size - 1); } else { guest_test_phys_mem = p->phys_offset; } #ifdef __s390x__ /* Align to 1M (segment size) */ - guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20); + guest_test_phys_mem &= ~((1 << 20) - 1); #endif pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 8f9f46979a..c0273aefa6 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -9,24 +9,20 @@ #include "kvm_util.h" #include -#include -#include #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) -/* - * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert - * SYS_* register definitions in asm/sysreg.h to use in KVM - * calls such as get_reg() and set_reg(). - */ -#define KVM_ARM64_SYS_REG(sys_reg_id) \ - ARM64_SYS_REG(sys_reg_Op0(sys_reg_id), \ - sys_reg_Op1(sys_reg_id), \ - sys_reg_CRn(sys_reg_id), \ - sys_reg_CRm(sys_reg_id), \ - sys_reg_Op2(sys_reg_id)) +#define CPACR_EL1 3, 0, 1, 0, 2 +#define TCR_EL1 3, 0, 2, 0, 2 +#define MAIR_EL1 3, 0, 10, 2, 0 +#define MPIDR_EL1 3, 0, 0, 0, 5 +#define TTBR0_EL1 3, 0, 2, 0, 0 +#define SCTLR_EL1 3, 0, 1, 0, 0 +#define VBAR_EL1 3, 0, 12, 0, 0 + +#define ID_AA64DFR0_EL1 3, 0, 0, 5, 0 /* * Default MAIR @@ -63,7 +59,7 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, ®); } -void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init); +void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init); void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init, void *guest_code); @@ -113,9 +109,6 @@ enum { #define ESR_EC_WP_CURRENT 0x35 #define ESR_EC_BRK_INS 0x3c -void aarch64_get_supported_page_sizes(uint32_t ipa, - bool *ps4k, bool *ps16k, bool *ps64k); - void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); @@ -125,64 +118,18 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); -static inline void cpu_relax(void) -{ - asm volatile("yield" ::: "memory"); -} - -#define isb() asm volatile("isb" : : : "memory") -#define dsb(opt) asm volatile("dsb " #opt : : : "memory") -#define dmb(opt) asm volatile("dmb " #opt : : : "memory") - -#define dma_wmb() dmb(oshst) -#define __iowmb() dma_wmb() - -#define dma_rmb() dmb(oshld) - -#define __iormb(v) \ -({ \ - unsigned long tmp; \ - \ - dma_rmb(); \ - \ - /* \ - * Courtesy of arch/arm64/include/asm/io.h: \ - * Create a dummy control dependency from the IO read to any \ - * later instructions. This ensures that a subsequent call \ - * to udelay() will be ordered due to the ISB in __delay(). \ - */ \ - asm volatile("eor %0, %1, %1\n" \ - "cbnz %0, ." \ - : "=r" (tmp) : "r" ((unsigned long)(v)) \ - : "memory"); \ +#define write_sysreg(reg, val) \ +({ \ + u64 __val = (u64)(val); \ + asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val)); \ }) -static __always_inline void __raw_writel(u32 val, volatile void *addr) -{ - asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr)); -} +#define read_sysreg(reg) \ +({ u64 val; \ + asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\ + val; \ +}) -static __always_inline u32 __raw_readl(const volatile void *addr) -{ - u32 val; - asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr)); - return val; -} - -#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c))) -#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; }) - -#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));}) -#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; }) - -static inline void local_irq_enable(void) -{ - asm volatile("msr daifclr, #3" : : : "memory"); -} - -static inline void local_irq_disable(void) -{ - asm volatile("msr daifset, #3" : : : "memory"); -} +#define isb() asm volatile("isb" : : : "memory") #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index c9286811a4..1876d148ea 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -7,7 +7,398 @@ #ifndef SELFTEST_KVM_UTIL_H #define SELFTEST_KVM_UTIL_H -#include "kvm_util_base.h" -#include "ucall_common.h" +#include "test_util.h" + +#include "asm/kvm.h" +#include "linux/list.h" +#include "linux/kvm.h" +#include + +#include "sparsebit.h" + +#define KVM_DEV_PATH "/dev/kvm" +#define KVM_MAX_VCPUS 512 + +/* + * Callers of kvm_util only have an incomplete/opaque description of the + * structure kvm_util is using to maintain the state of a VM. + */ +struct kvm_vm; + +typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ +typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ + +/* Minimum allocated guest virtual and physical addresses */ +#define KVM_UTIL_MIN_VADDR 0x2000 +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +#define DEFAULT_GUEST_PHY_PAGES 512 +#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 +#define DEFAULT_STACK_PGS 5 + +enum vm_guest_mode { + VM_MODE_P52V48_4K, + VM_MODE_P52V48_64K, + VM_MODE_P48V48_4K, + VM_MODE_P48V48_64K, + VM_MODE_P40V48_4K, + VM_MODE_P40V48_64K, + VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ + VM_MODE_P47V64_4K, + VM_MODE_P44V64_4K, + NUM_VM_MODES, +}; + +#if defined(__aarch64__) + +#define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__x86_64__) + +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__s390x__) + +#define VM_MODE_DEFAULT VM_MODE_P44V64_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 16) + +#endif + +#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) +#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) + +struct vm_guest_mode_params { + unsigned int pa_bits; + unsigned int va_bits; + unsigned int page_size; + unsigned int page_shift; +}; +extern const struct vm_guest_mode_params vm_guest_mode_params[]; + +int open_kvm_dev_path_or_exit(void); +int kvm_check_cap(long cap); +int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap); +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); +const char *vm_guest_mode_string(uint32_t i); + +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); +void kvm_vm_free(struct kvm_vm *vmp); +void kvm_vm_restart(struct kvm_vm *vmp, int perm); +void kvm_vm_release(struct kvm_vm *vmp); +void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); +void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, + uint64_t first_page, uint32_t num_pages); +uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm); + +int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, + size_t len); + +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); + +void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); + +/* + * VM VCPU Dump + * + * Input Args: + * stream - Output FILE stream + * vm - Virtual Machine + * vcpuid - VCPU ID + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps the current state of the VCPU specified by @vcpuid, within the VM + * given by @vm, to the FILE stream given by @stream. + */ +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, + uint8_t indent); + +void vm_create_irqchip(struct kvm_vm *vm); + +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags); + +void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, + void *arg); +int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, + void *arg); +void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg); +void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); +void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); +void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); + +void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + unsigned int npages); +void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); +void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); +vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); +void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); + +/* + * Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gva - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Returns the VM physical address of the translated VM virtual + * address given by @gva. + */ +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); + +struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_guest_debug *debug); +void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_mp_state *mp_state); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); +void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); + +/* + * VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first @num function input registers of the VCPU with @vcpuid, + * per the C calling convention of the architecture, to the values given + * as variable args. Each of the variable args is expected to be of type + * uint64_t. The maximum @num can be is specific to the architecture. + */ +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); + +void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); +void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); +int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); +void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_fpu *fpu); +void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_fpu *fpu); +void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); +void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); +#ifdef __KVM_HAVE_VCPU_EVENTS +void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events); +void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events); +#endif +#ifdef __x86_64__ +void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_nested_state *state); +int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_nested_state *state, bool ignore_error); +#endif +void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid); + +int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); +int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); +int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd); +int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test); +int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write); +int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write); + +const char *exit_reason_str(unsigned int exit_reason); + +void virt_pgd_alloc(struct kvm_vm *vm); + +/* + * VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * vaddr - VM Virtual Address + * paddr - VM Physical Address + * memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within @vm, creates a virtual translation for the page starting + * at @vaddr to the page starting at @paddr. + */ +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); + +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, + uint32_t memslot); +vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot); +vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); + +/* + * Create a VM with reasonable defaults + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * extra_mem_pages - The number of extra pages to add (this will + * decide how much extra space we will need to + * setup the page tables using memslot 0) + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code); + +/* Same as vm_create_default, but can be used for more than one vcpu */ +struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages, + uint32_t num_percpu_pages, void *guest_code, + uint32_t vcpuids[]); + +/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */ +struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, + uint64_t slot0_mem_pages, uint64_t extra_mem_pages, + uint32_t num_percpu_pages, void *guest_code, + uint32_t vcpuids[]); + +/* + * Adds a vCPU with reasonable defaults (e.g. a stack) + * + * Input Args: + * vm - Virtual Machine + * vcpuid - The id of the VCPU to add to the VM. + * guest_code - The vCPU's entry point + */ +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); + +bool vm_is_unrestricted_guest(struct kvm_vm *vm); + +unsigned int vm_get_page_size(struct kvm_vm *vm); +unsigned int vm_get_page_shift(struct kvm_vm *vm); +unsigned long vm_compute_max_gfn(struct kvm_vm *vm); +uint64_t vm_get_max_gfn(struct kvm_vm *vm); +int vm_get_fd(struct kvm_vm *vm); + +unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); +unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); +unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); +static inline unsigned int +vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) +{ + unsigned int n; + n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); +#ifdef __s390x__ + /* s390 requires 1M aligned guest sizes */ + n = (n + 255) & ~255; +#endif + return n; +} + +struct kvm_userspace_memory_region * +kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, + uint64_t end); + +struct kvm_dirty_log * +allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region); + +int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); + +#define sync_global_to_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(_p, &(g), sizeof(g)); \ +}) + +#define sync_global_from_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(&(g), _p, sizeof(g)); \ +}) + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid); + +/* Common ucalls */ +enum { + UCALL_NONE, + UCALL_SYNC, + UCALL_ABORT, + UCALL_DONE, + UCALL_UNHANDLED, +}; + +#define UCALL_MAX_ARGS 6 + +struct ucall { + uint64_t cmd; + uint64_t args[UCALL_MAX_ARGS]; +}; + +void ucall_init(struct kvm_vm *vm, void *arg); +void ucall_uninit(struct kvm_vm *vm); +void ucall(uint64_t cmd, int nargs, ...); +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); + +#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ + ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) +#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) +#define GUEST_DONE() ucall(UCALL_DONE, 0) +#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \ + if (!(_condition)) \ + ucall(UCALL_ABORT, 2 + _nargs, \ + "Failed guest assert: " \ + _condstr, __LINE__, _args); \ +} while (0) + +#define GUEST_ASSERT(_condition) \ + __GUEST_ASSERT(_condition, #_condition, 0, 0) + +#define GUEST_ASSERT_1(_condition, arg1) \ + __GUEST_ASSERT(_condition, #_condition, 1, (arg1)) + +#define GUEST_ASSERT_2(_condition, arg1, arg2) \ + __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2)) + +#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \ + __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3)) + +#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \ + __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4)) + +#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b) + +int vm_get_stats_fd(struct kvm_vm *vm); +int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid); #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index a86f953d8d..df9f1a3a3f 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -8,8 +8,6 @@ #ifndef SELFTEST_KVM_PERF_TEST_UTIL_H #define SELFTEST_KVM_PERF_TEST_UTIL_H -#include - #include "kvm_util.h" /* Default guest test virtual memory offset */ @@ -20,7 +18,6 @@ #define PERF_TEST_MEM_SLOT_INDEX 1 struct perf_test_vcpu_args { - uint64_t gpa; uint64_t gva; uint64_t pages; @@ -30,7 +27,7 @@ struct perf_test_vcpu_args { struct perf_test_args { struct kvm_vm *vm; - uint64_t gpa; + uint64_t host_page_size; uint64_t guest_page_size; int wr_fract; @@ -39,15 +36,19 @@ struct perf_test_args { extern struct perf_test_args perf_test_args; +/* + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. + */ +extern uint64_t guest_test_phys_mem; + struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, uint64_t vcpu_memory_bytes, int slots, - enum vm_mem_backing_src_type backing_src, - bool partition_vcpu_memory_access); + enum vm_mem_backing_src_type backing_src); void perf_test_destroy_vm(struct kvm_vm *vm); - -void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); - -void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); -void perf_test_join_vcpu_threads(int vcpus); +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, + uint64_t vcpu_memory_bytes, + bool partition_vcpu_memory_access); #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 99e0dcdc92..f8fddc84c0 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -104,7 +104,6 @@ size_t get_trans_hugepagesz(void); size_t get_def_hugetlb_pagesz(void); const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i); size_t get_backing_src_pagesz(uint32_t i); -bool is_backing_src_hugetlb(uint32_t i); void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); long get_run_delay(void); @@ -118,29 +117,4 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t) return vm_mem_backing_src_alias(t)->flag & MAP_SHARED; } -/* Aligns x up to the next multiple of size. Size must be a power of 2. */ -static inline uint64_t align_up(uint64_t x, uint64_t size) -{ - uint64_t mask = size - 1; - - TEST_ASSERT(size != 0 && !(size & (size - 1)), - "size not a power of 2: %lu", size); - return ((x + mask) & ~mask); -} - -static inline uint64_t align_down(uint64_t x, uint64_t size) -{ - uint64_t x_aligned_up = align_up(x, size); - - if (x == x_aligned_up) - return x; - else - return x_aligned_up - size; -} - -static inline void *align_ptr_up(void *x, size_t size) -{ - return (void *)align_up((unsigned long)x, size); -} - #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 8a470da7b7..05e65ca1c3 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -10,10 +10,8 @@ #include #include -#include #include -#include #include "../kvm_util.h" @@ -94,21 +92,6 @@ struct desc_ptr { uint64_t address; } __attribute__((packed)); -struct kvm_x86_state { - struct kvm_xsave *xsave; - struct kvm_vcpu_events events; - struct kvm_mp_state mp_state; - struct kvm_regs regs; - struct kvm_xcrs xcrs; - struct kvm_sregs sregs; - struct kvm_debugregs debugregs; - union { - struct kvm_nested_state nested; - char nested_[16384]; - }; - struct kvm_msrs msrs; -}; - static inline uint64_t get_desc64_base(const struct desc64 *desc) { return ((uint64_t)desc->base3 << 32) | @@ -364,37 +347,17 @@ static inline unsigned long get_xmm(int n) } bool is_intel_cpu(void); -bool is_amd_cpu(void); - -static inline unsigned int x86_family(unsigned int eax) -{ - unsigned int x86; - - x86 = (eax >> 8) & 0xf; - - if (x86 == 0xf) - x86 += (eax >> 20) & 0xff; - - return x86; -} - -static inline unsigned int x86_model(unsigned int eax) -{ - return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); -} +struct kvm_x86_state; struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state); -void kvm_x86_state_cleanup(struct kvm_x86_state *state); struct kvm_msr_list *kvm_get_msr_index_list(void); uint64_t kvm_get_feature_msr(uint64_t msr_index); struct kvm_cpuid2 *kvm_get_supported_cpuid(void); struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid); -int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_cpuid2 *cpuid); void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); @@ -438,11 +401,6 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr); void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr, uint64_t pte); -/* - * get_cpuid() - find matching CPUID entry and return pointer to it. - */ -struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index); /* * set_cpuid() - overwrites a matching cpuid entry with the provided value. * matches based on ent->function && ent->index. returns true @@ -458,7 +416,6 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); -void vm_xsave_req_perm(int bit); enum x86_page_size { X86_PAGE_SIZE_4K = 0, @@ -486,11 +443,4 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, /* VMX_EPT_VPID_CAP bits */ #define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) -#define XSTATE_XTILE_CFG_BIT 17 -#define XSTATE_XTILE_DATA_BIT 18 - -#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) -#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) -#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \ - XSTATE_XTILE_DATA_MASK) #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h index 587fbe408b..b7531c83b8 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h @@ -46,6 +46,4 @@ static inline bool cpu_has_svm(void) return ecx & CPUID_SVM; } -int open_sev_dev_path_or_exit(void); - #endif /* SELFTEST_KVM_SVM_UTILS_H */ diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index aed9dc3ca1..aa3795cd7b 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -83,7 +83,7 @@ int main(int argc, char *argv[]) kvm_max_vcpu_id = kvm_max_vcpus; TEST_ASSERT(kvm_max_vcpu_id >= kvm_max_vcpus, - "KVM_MAX_VCPU_IDS (%d) must be at least as large as KVM_MAX_VCPUS (%d).", + "KVM_MAX_VCPU_ID (%d) must be at least as large as KVM_MAX_VCPUS (%d).", kvm_max_vcpu_id, kvm_max_vcpus); test_vcpu_creation(0, kvm_max_vcpus); diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index ba1fdc3dcf..36407cb0ec 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -280,7 +280,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) #ifdef __s390x__ alignment = max(0x100000, alignment); #endif - guest_test_phys_mem = align_down(guest_test_phys_mem, alignment); + guest_test_phys_mem &= ~(alignment - 1); /* Set up the shared data structure test_args */ test_args.vm = vm; diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 9343d82519..632b74d6b3 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -8,7 +8,6 @@ #include #include -#include "guest_modes.h" #include "kvm_util.h" #include "../kvm_util_internal.h" #include "processor.h" @@ -213,7 +212,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } -void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init) +void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; uint64_t sctlr_el1, tcr_el1; @@ -233,12 +232,11 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 * registers, which the variable argument list macros do. */ - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); + set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20); - get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1); - get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1); + get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1); + get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1); - /* Configure base granule size */ switch (vm->mode) { case VM_MODE_P52V48_4K: TEST_FAIL("AArch64 does not support 4K sized pages " @@ -247,46 +245,24 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init TEST_FAIL("AArch64 does not support 4K sized pages " "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: - case VM_MODE_P48V48_64K: - case VM_MODE_P40V48_64K: - case VM_MODE_P36V48_64K: tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ - break; - case VM_MODE_P48V48_16K: - case VM_MODE_P40V48_16K: - case VM_MODE_P36V48_16K: - case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ - break; - case VM_MODE_P48V48_4K: - case VM_MODE_P40V48_4K: - case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ - break; - default: - TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); - } - - /* Configure output size */ - switch (vm->mode) { - case VM_MODE_P52V48_64K: tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ break; case VM_MODE_P48V48_4K: - case VM_MODE_P48V48_16K: + tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + break; case VM_MODE_P48V48_64K: + tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ break; case VM_MODE_P40V48_4K: - case VM_MODE_P40V48_16K: - case VM_MODE_P40V48_64K: + tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ break; - case VM_MODE_P36V48_4K: - case VM_MODE_P36V48_16K: - case VM_MODE_P36V48_64K: - case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + case VM_MODE_P40V48_64K: + tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); @@ -297,11 +273,10 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), vm->pgd); - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpuid); + set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1); + set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1); + set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1); + set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd); } void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) @@ -387,7 +362,7 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) { extern char vectors; - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); + set_reg(vm, vcpuid, ARM64_SYS_REG(VBAR_EL1), (uint64_t)&vectors); } void route_exception(struct ex_regs *regs, int vector) @@ -451,52 +426,3 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, assert(vector < VECTOR_NUM); handlers->exception_handlers[vector][0] = handler; } - -uint32_t guest_get_vcpuid(void) -{ - return read_sysreg(tpidr_el1); -} - -void aarch64_get_supported_page_sizes(uint32_t ipa, - bool *ps4k, bool *ps16k, bool *ps64k) -{ - struct kvm_vcpu_init preferred_init; - int kvm_fd, vm_fd, vcpu_fd, err; - uint64_t val; - struct kvm_one_reg reg = { - .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), - .addr = (uint64_t)&val, - }; - - kvm_fd = open_kvm_dev_path_or_exit(); - vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, ipa); - TEST_ASSERT(vm_fd >= 0, "Can't create VM"); - - vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); - TEST_ASSERT(vcpu_fd >= 0, "Can't create vcpu"); - - err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init); - TEST_ASSERT(err == 0, "Can't get target"); - err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init); - TEST_ASSERT(err == 0, "Can't get init vcpu"); - - err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); - TEST_ASSERT(err == 0, "Can't get MMFR0"); - - *ps4k = ((val >> 28) & 0xf) != 0xf; - *ps64k = ((val >> 24) & 0xf) == 0; - *ps16k = ((val >> 20) & 0xf) != 0; - - close(vcpu_fd); - close(vm_fd); - close(kvm_fd); -} - -/* - * arm64 doesn't have a true default mode, so start by computing the - * available IPA space and page sizes early. - */ -void __attribute__((constructor)) init_guest_modes(void) -{ - guest_modes_append_default(); -} diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 13e8e3dcf9..eac44f5d0d 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -157,7 +157,8 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) "memsize of 0,\n" " phdr index: %u p_memsz: 0x%" PRIx64, n1, (uint64_t) phdr.p_memsz); - vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size); + vm_vaddr_t seg_vstart = phdr.p_vaddr; + seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1); vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index 8784013b74..c330f414ef 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -4,59 +4,22 @@ */ #include "guest_modes.h" -#ifdef __aarch64__ -#include "processor.h" -enum vm_guest_mode vm_mode_default; -#endif - struct guest_mode guest_modes[NUM_VM_MODES]; void guest_modes_append_default(void) { -#ifndef __aarch64__ guest_mode_append(VM_MODE_DEFAULT, true, true); -#else + +#ifdef __aarch64__ + guest_mode_append(VM_MODE_P40V48_64K, true, true); { unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - bool ps4k, ps16k, ps64k; - int i; - - aarch64_get_supported_page_sizes(limit, &ps4k, &ps16k, &ps64k); - - vm_mode_default = NUM_VM_MODES; - if (limit >= 52) - guest_mode_append(VM_MODE_P52V48_64K, ps64k, ps64k); + guest_mode_append(VM_MODE_P52V48_64K, true, true); if (limit >= 48) { - guest_mode_append(VM_MODE_P48V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P48V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P48V48_64K, ps64k, ps64k); + guest_mode_append(VM_MODE_P48V48_4K, true, true); + guest_mode_append(VM_MODE_P48V48_64K, true, true); } - if (limit >= 40) { - guest_mode_append(VM_MODE_P40V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P40V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P40V48_64K, ps64k, ps64k); - if (ps4k) - vm_mode_default = VM_MODE_P40V48_4K; - } - if (limit >= 36) { - guest_mode_append(VM_MODE_P36V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P36V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P36V48_64K, ps64k, ps64k); - guest_mode_append(VM_MODE_P36V47_16K, ps16k, ps16k); - } - - /* - * Pick the first supported IPA size if the default - * isn't available. - */ - for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) { - if (guest_modes[i].supported && guest_modes[i].enabled) - vm_mode_default = i; - } - - TEST_ASSERT(vm_mode_default != NUM_VM_MODES, - "No supported mode!"); } #endif #ifdef __s390x__ @@ -75,16 +38,6 @@ void guest_modes_append_default(void) guest_mode_append(VM_MODE_P47V64_4K, true, true); } #endif -#ifdef __riscv - { - unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS); - - if (sz >= 52) - guest_mode_append(VM_MODE_P52V48_4K, true, true); - if (sz >= 48) - guest_mode_append(VM_MODE_P48V48_4K, true, true); - } -#endif } void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index d8cf851ab1..e9d0ab9567 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -22,17 +22,13 @@ static int vcpu_mmap_sz(void); -int open_path_or_exit(const char *path, int flags) +/* Aligns x up to the next multiple of size. Size must be a power of 2. */ +static void *align(void *x, size_t size) { - int fd; - - fd = open(path, flags); - if (fd < 0) { - print_skip("%s not available (errno: %d)", path, errno); - exit(KSFT_SKIP); - } - - return fd; + size_t mask = size - 1; + TEST_ASSERT(size != 0 && !(size & (size - 1)), + "size not a power of 2: %lu", size); + return (void *) (((size_t) x + mask) & ~mask); } /* @@ -46,7 +42,16 @@ int open_path_or_exit(const char *path, int flags) */ static int _open_kvm_dev_path_or_exit(int flags) { - return open_path_or_exit(KVM_DEV_PATH, flags); + int fd; + + fd = open(KVM_DEV_PATH, flags); + if (fd < 0) { + print_skip("%s not available, is KVM loaded? (errno: %d)", + KVM_DEV_PATH, errno); + exit(KSFT_SKIP); + } + + return fd; } int open_kvm_dev_path_or_exit(void) @@ -85,33 +90,6 @@ int kvm_check_cap(long cap) return ret; } -/* VM Check Capability - * - * Input Args: - * vm - Virtual Machine - * cap - Capability - * - * Output Args: None - * - * Return: - * On success, the Value corresponding to the capability (KVM_CAP_*) - * specified by the value of cap. On failure a TEST_ASSERT failure - * is produced. - * - * Looks up and returns the value corresponding to the capability - * (KVM_CAP_*) given by cap. - */ -int vm_check_cap(struct kvm_vm *vm, long cap) -{ - int ret; - - ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap); - TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n" - " rc: %i errno: %i", ret, errno); - - return ret; -} - /* VM Enable Capability * * Input Args: @@ -193,18 +171,12 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", - [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", - [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", - [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", - [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", - [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", - [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -215,21 +187,15 @@ const char *vm_guest_mode_string(uint32_t i) } const struct vm_guest_mode_params vm_guest_mode_params[] = { - [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, - [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, - [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, - [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, - [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, - [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, - [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, - [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, - [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, - [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, - [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, - [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, - [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, - [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, - [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, + { 52, 48, 0x1000, 12 }, + { 52, 48, 0x10000, 16 }, + { 48, 48, 0x1000, 12 }, + { 48, 48, 0x10000, 16 }, + { 40, 48, 0x1000, 12 }, + { 40, 48, 0x10000, 16 }, + { 0, 0, 0x1000, 12 }, + { 47, 64, 0x1000, 12 }, + { 44, 64, 0x1000, 12 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -291,19 +257,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->pgtable_levels = 3; break; case VM_MODE_P40V48_4K: - case VM_MODE_P36V48_4K: vm->pgtable_levels = 4; break; case VM_MODE_P40V48_64K: - case VM_MODE_P36V48_64K: - vm->pgtable_levels = 3; - break; - case VM_MODE_P48V48_16K: - case VM_MODE_P40V48_16K: - case VM_MODE_P36V48_16K: - vm->pgtable_levels = 4; - break; - case VM_MODE_P36V47_16K: vm->pgtable_levels = 3; break; case VM_MODE_PXXV48_4K: @@ -492,11 +448,9 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, uint64_t first_page, uint32_t num_pages) { - struct kvm_clear_dirty_log args = { - .dirty_bitmap = log, .slot = slot, - .first_page = first_page, - .num_pages = num_pages - }; + struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot, + .first_page = first_page, + .num_pages = num_pages }; int ret; ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args); @@ -921,17 +875,9 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, alignment = 1; #endif - /* - * When using THP mmap is not guaranteed to returned a hugepage aligned - * address so we have to pad the mmap. Padding is not needed for HugeTLB - * because mmap will always return an address aligned to the HugeTLB - * page size. - */ if (src_type == VM_MEM_SRC_ANONYMOUS_THP) alignment = max(backing_src_pagesz, alignment); - ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); - /* Add enough memory to align up if necessary */ if (alignment > 1) region->mmap_size += alignment; @@ -964,13 +910,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, "test_malloc failed, mmap_start: %p errno: %i", region->mmap_start, errno); - TEST_ASSERT(!is_backing_src_hugetlb(src_type) || - region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), - "mmap_start %p is not aligned to HugeTLB page size 0x%lx", - region->mmap_start, backing_src_pagesz); - /* Align host address */ - region->host_mem = align_ptr_up(region->mmap_start, alignment); + region->host_mem = align(region->mmap_start, alignment); /* As needed perform madvise */ if ((src_type == VM_MEM_SRC_ANONYMOUS || @@ -1013,7 +954,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, "mmap of alias failed, errno: %i", errno); /* Align host alias address */ - region->host_alias = align_ptr_up(region->mmap_alias, alignment); + region->host_alias = align(region->mmap_alias, alignment); } } @@ -1851,7 +1792,7 @@ void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) { int ret = _vcpu_sregs_set(vm, vcpuid, sregs); - TEST_ASSERT(ret == 0, "KVM_SET_SREGS IOCTL failed, " + TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " "rc: %i errno: %i", ret, errno); } @@ -2043,7 +1984,7 @@ int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) { int ret = _kvm_device_check_attr(dev_fd, group, attr); - TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); + TEST_ASSERT(ret >= 0, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); return ret; } @@ -2067,7 +2008,7 @@ int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test) ret = _kvm_create_device(vm, type, test, &fd); if (!test) { - TEST_ASSERT(!ret, + TEST_ASSERT(ret >= 0, "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno); return fd; } @@ -2095,120 +2036,10 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, { int ret = _kvm_device_access(dev_fd, group, attr, val, write); - TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno); + TEST_ASSERT(ret >= 0, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno); return ret; } -int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - - TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid); - - return _kvm_device_check_attr(vcpu->fd, group, attr); -} - -int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr) -{ - int ret = _vcpu_has_device_attr(vm, vcpuid, group, attr); - - TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno); - return ret; -} - -int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr, void *val, bool write) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - - TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid); - - return _kvm_device_access(vcpu->fd, group, attr, val, write); -} - -int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr, void *val, bool write) -{ - int ret = _vcpu_access_device_attr(vm, vcpuid, group, attr, val, write); - - TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno); - return ret; -} - -/* - * IRQ related functions. - */ - -int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) -{ - struct kvm_irq_level irq_level = { - .irq = irq, - .level = level, - }; - - return _vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); -} - -void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) -{ - int ret = _kvm_irq_line(vm, irq, level); - - TEST_ASSERT(ret >= 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", ret, errno); -} - -struct kvm_irq_routing *kvm_gsi_routing_create(void) -{ - struct kvm_irq_routing *routing; - size_t size; - - size = sizeof(struct kvm_irq_routing); - /* Allocate space for the max number of entries: this wastes 196 KBs. */ - size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); - routing = calloc(1, size); - assert(routing); - - return routing; -} - -void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, - uint32_t gsi, uint32_t pin) -{ - int i; - - assert(routing); - assert(routing->nr < KVM_MAX_IRQ_ROUTES); - - i = routing->nr; - routing->entries[i].gsi = gsi; - routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; - routing->entries[i].flags = 0; - routing->entries[i].u.irqchip.irqchip = 0; - routing->entries[i].u.irqchip.pin = pin; - routing->nr++; -} - -int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) -{ - int ret; - - assert(routing); - ret = ioctl(vm_get_fd(vm), KVM_SET_GSI_ROUTING, routing); - free(routing); - - return ret; -} - -void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) -{ - int ret; - - ret = _kvm_gsi_routing_write(vm, routing); - TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING failed, rc: %i errno: %i", - ret, errno); -} - /* * VM Dump * diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 722df3a287..0ef80dbdc1 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -10,40 +10,21 @@ struct perf_test_args perf_test_args; +uint64_t guest_test_phys_mem; + /* * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code. */ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; -struct vcpu_thread { - /* The id of the vCPU. */ - int vcpu_id; - - /* The pthread backing the vCPU. */ - pthread_t thread; - - /* Set to true once the vCPU thread is up and running. */ - bool running; -}; - -/* The vCPU threads involved in this test. */ -static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS]; - -/* The function run by each vCPU thread, as provided by the test. */ -static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *); - -/* Set to true once all vCPU threads are up and running. */ -static bool all_vcpu_threads_running; - /* * Continuously write to the first 8 bytes of each page in the * specified region. */ static void guest_code(uint32_t vcpu_id) { - struct perf_test_args *pta = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id]; + struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; uint64_t gva; uint64_t pages; int i; @@ -56,9 +37,9 @@ static void guest_code(uint32_t vcpu_id) while (true) { for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * pta->guest_page_size); + uint64_t addr = gva + (i * perf_test_args.guest_page_size); - if (i % pta->wr_fract == 0) + if (i % perf_test_args.wr_fract == 0) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -68,81 +49,35 @@ static void guest_code(uint32_t vcpu_id) } } -void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, - uint64_t vcpu_memory_bytes, - bool partition_vcpu_memory_access) -{ - struct perf_test_args *pta = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args; - int vcpu_id; - - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vcpu_args = &pta->vcpu_args[vcpu_id]; - - vcpu_args->vcpu_id = vcpu_id; - if (partition_vcpu_memory_access) { - vcpu_args->gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args->pages = vcpu_memory_bytes / - pta->guest_page_size; - vcpu_args->gpa = pta->gpa + (vcpu_id * vcpu_memory_bytes); - } else { - vcpu_args->gva = guest_test_virt_mem; - vcpu_args->pages = (vcpus * vcpu_memory_bytes) / - pta->guest_page_size; - vcpu_args->gpa = pta->gpa; - } - - vcpu_args_set(vm, vcpu_id, 1, vcpu_id); - - pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_args->gpa, vcpu_args->gpa + - (vcpu_args->pages * pta->guest_page_size)); - } -} - struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, uint64_t vcpu_memory_bytes, int slots, - enum vm_mem_backing_src_type backing_src, - bool partition_vcpu_memory_access) + enum vm_mem_backing_src_type backing_src) { - struct perf_test_args *pta = &perf_test_args; struct kvm_vm *vm; uint64_t guest_num_pages; - uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); int i; pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - /* By default vCPUs will write to memory. */ - pta->wr_fract = 1; - - /* - * Snapshot the non-huge page size. This is used by the guest code to - * access/dirty pages at the logging granularity. - */ - pta->guest_page_size = vm_guest_mode_params[mode].page_size; + perf_test_args.host_page_size = getpagesize(); + perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; guest_num_pages = vm_adjust_num_guest_pages(mode, - (vcpus * vcpu_memory_bytes) / pta->guest_page_size); + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); - TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0, + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, "Guest memory size is not guest page size aligned."); TEST_ASSERT(guest_num_pages % slots == 0, "Guest memory cannot be evenly divided into %d slots.", slots); - /* - * Pass guest_num_pages to populate the page tables for test memory. - * The memory is also added to memslot 0, but that's a benign side - * effect as KVM allows aliasing HVAs in meslots. - */ vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES, - guest_num_pages, 0, guest_code, NULL); + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, + 0, guest_code, NULL); - pta->vm = vm; + perf_test_args.vm = vm; /* * If there should be more memory in the guest test region than there @@ -155,18 +90,20 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, guest_num_pages, vm_get_max_gfn(vm), vcpus, vcpu_memory_bytes); - pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size; - pta->gpa = align_down(pta->gpa, backing_src_pagesz); + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); #ifdef __s390x__ /* Align to 1M (segment size) */ - pta->gpa = align_down(pta->gpa, 1 << 20); + guest_test_phys_mem &= ~((1 << 20) - 1); #endif - pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa); + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { uint64_t region_pages = guest_num_pages / slots; - vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i; + vm_paddr_t region_start = guest_test_phys_mem + + region_pages * perf_test_args.guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, PERF_TEST_MEM_SLOT_INDEX + i, @@ -174,15 +111,10 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, } /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages); - - perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); ucall_init(vm, NULL); - /* Export the shared variables to the guest. */ - sync_global_to_guest(vm, perf_test_args); - return vm; } @@ -192,60 +124,36 @@ void perf_test_destroy_vm(struct kvm_vm *vm) kvm_vm_free(vm); } -void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) -{ - perf_test_args.wr_fract = wr_fract; - sync_global_to_guest(vm, perf_test_args); -} - -static void *vcpu_thread_main(void *data) -{ - struct vcpu_thread *vcpu = data; - - WRITE_ONCE(vcpu->running, true); - - /* - * Wait for all vCPU threads to be up and running before calling the test- - * provided vCPU thread function. This prevents thread creation (which - * requires taking the mmap_sem in write mode) from interfering with the - * guest faulting in its memory. - */ - while (!READ_ONCE(all_vcpu_threads_running)) - ; - - vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_id]); - - return NULL; -} - -void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)) +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, + uint64_t vcpu_memory_bytes, + bool partition_vcpu_memory_access) { + vm_paddr_t vcpu_gpa; + struct perf_test_vcpu_args *vcpu_args; int vcpu_id; - vcpu_thread_fn = vcpu_fn; - WRITE_ONCE(all_vcpu_threads_running, false); - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - struct vcpu_thread *vcpu = &vcpu_threads[vcpu_id]; + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - vcpu->vcpu_id = vcpu_id; - WRITE_ONCE(vcpu->running, false); + vcpu_args->vcpu_id = vcpu_id; + if (partition_vcpu_memory_access) { + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + vcpu_gpa = guest_test_phys_mem + + (vcpu_id * vcpu_memory_bytes); + } else { + vcpu_args->gva = guest_test_virt_mem; + vcpu_args->pages = (vcpus * vcpu_memory_bytes) / + perf_test_args.guest_page_size; + vcpu_gpa = guest_test_phys_mem; + } - pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu); + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + + (vcpu_args->pages * perf_test_args.guest_page_size)); } - - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - while (!READ_ONCE(vcpu_threads[vcpu_id].running)) - ; - } - - WRITE_ONCE(all_vcpu_threads_running, true); -} - -void perf_test_join_vcpu_threads(int vcpus) -{ - int vcpu_id; - - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) - pthread_join(vcpu_threads[vcpu_id].thread, NULL); } diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 50e0cf41a7..a0d0c83d83 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -1866,7 +1866,7 @@ void sparsebit_validate_internal(struct sparsebit *s) * of total bits set. */ if (s->num_set != total_bits_set) { - fprintf(stderr, "Number of bits set mismatch,\n" + fprintf(stderr, "Number of bits set missmatch,\n" " s->num_set: 0x%lx total_bits_set: 0x%lx", s->num_set, total_bits_set); diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 6d23878bbf..b724291089 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -283,11 +283,6 @@ size_t get_backing_src_pagesz(uint32_t i) } } -bool is_backing_src_hugetlb(uint32_t i) -{ - return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB); -} - static void print_available_backing_src_types(const char *prefix) { int i; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 9f000dfb55..da73b97e1e 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -650,60 +650,6 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid) vcpu_sregs_set(vm, vcpuid, &sregs); } -#define CPUID_XFD_BIT (1 << 4) -static bool is_xfd_supported(void) -{ - int eax, ebx, ecx, edx; - const int leaf = 0xd, subleaf = 0x1; - - __asm__ __volatile__( - "cpuid" - : /* output */ "=a"(eax), "=b"(ebx), - "=c"(ecx), "=d"(edx) - : /* input */ "0"(leaf), "2"(subleaf)); - - return !!(eax & CPUID_XFD_BIT); -} - -void vm_xsave_req_perm(int bit) -{ - int kvm_fd; - u64 bitmask; - long rc; - struct kvm_device_attr attr = { - .group = 0, - .attr = KVM_X86_XCOMP_GUEST_SUPP, - .addr = (unsigned long) &bitmask - }; - - kvm_fd = open_kvm_dev_path_or_exit(); - rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); - close(kvm_fd); - if (rc == -1 && (errno == ENXIO || errno == EINVAL)) - exit(KSFT_SKIP); - TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); - if (!(bitmask & (1ULL << bit))) - exit(KSFT_SKIP); - - if (!is_xfd_supported()) - exit(KSFT_SKIP); - - rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); - - /* - * The older kernel version(<5.15) can't support - * ARCH_REQ_XCOMP_GUEST_PERM and directly return. - */ - if (rc) - return; - - rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); - TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); - TEST_ASSERT(bitmask & (1ULL << bit), - "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", - bitmask); -} - void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) { struct kvm_mp_state mp_state; @@ -714,7 +660,6 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) /* Create VCPU */ vm_vcpu_add(vm, vcpuid); - vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); vcpu_setup(vm, vcpuid); /* Setup guest general purpose registers */ @@ -727,6 +672,9 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) /* Setup the MP state */ mp_state.mp_state = 0; vcpu_set_mp_state(vm, vcpuid, &mp_state); + + /* Setup supported CPUIDs */ + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); } /* @@ -901,17 +849,6 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) return entry; } - -int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_cpuid2 *cpuid) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - return ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); -} - /* * VM VCPU CPUID Set * @@ -929,9 +866,12 @@ int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid) { + struct vcpu *vcpu = vcpu_find(vm, vcpuid); int rc; - rc = __vcpu_set_cpuid(vm, vcpuid, cpuid); + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", rc, errno); @@ -1079,6 +1019,21 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) sregs_dump(stream, &sregs, indent + 4); } +struct kvm_x86_state { + struct kvm_vcpu_events events; + struct kvm_mp_state mp_state; + struct kvm_regs regs; + struct kvm_xsave xsave; + struct kvm_xcrs xcrs; + struct kvm_sregs sregs; + struct kvm_debugregs debugregs; + union { + struct kvm_nested_state nested; + char nested_[16384]; + }; + struct kvm_msrs msrs; +}; + static int kvm_get_num_msrs_fd(int kvm_fd) { struct kvm_msr_list nmsrs; @@ -1116,22 +1071,6 @@ struct kvm_msr_list *kvm_get_msr_index_list(void) return list; } -static int vcpu_save_xsave_state(struct kvm_vm *vm, struct vcpu *vcpu, - struct kvm_x86_state *state) -{ - int size; - - size = vm_check_cap(vm, KVM_CAP_XSAVE2); - if (!size) - size = sizeof(struct kvm_xsave); - - state->xsave = malloc(size); - if (size == sizeof(struct kvm_xsave)) - return ioctl(vcpu->fd, KVM_GET_XSAVE, state->xsave); - else - return ioctl(vcpu->fd, KVM_GET_XSAVE2, state->xsave); -} - struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); @@ -1159,25 +1098,25 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); list->nmsrs = nmsrs; r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", + r); state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", + r); r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", + r); r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", + r); - r = vcpu_save_xsave_state(vm, vcpu, state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", - r); + r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", + r); if (kvm_check_cap(KVM_CAP_XCRS)) { r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); @@ -1186,17 +1125,17 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) } r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", + r); if (nested_size) { state->nested.size = sizeof(state->nested_); r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", - r); + r); TEST_ASSERT(state->nested.size <= nested_size, - "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", - state->nested.size, nested_size); + "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", + state->nested.size, nested_size); } else state->nested.size = 0; @@ -1204,12 +1143,12 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) for (i = 0; i < nmsrs; i++) state->msrs.entries[i].index = list->indices[i]; r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); - TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", - r, r == nmsrs ? -1 : list->indices[r]); + TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", + r, r == nmsrs ? -1 : list->indices[r]); r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", + r); free(list); return state; @@ -1220,14 +1159,9 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s struct vcpu *vcpu = vcpu_find(vm, vcpuid); int r; - r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); - TEST_ASSERT(r == state->msrs.nmsrs, - "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", - r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); + r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", + r); if (kvm_check_cap(KVM_CAP_XCRS)) { r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); @@ -1235,43 +1169,41 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s r); } - r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", - r); + r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); + TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", + r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", + r); r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", + r); r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", + r); r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", + r); if (state->nested.size) { r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", - r); + r); } } -void kvm_x86_state_cleanup(struct kvm_x86_state *state) +bool is_intel_cpu(void) { - free(state->xsave); - free(state); -} - -static bool cpu_vendor_string_is(const char *vendor) -{ - const uint32_t *chunk = (const uint32_t *)vendor; int eax, ebx, ecx, edx; + const uint32_t *chunk; const int leaf = 0; __asm__ __volatile__( @@ -1280,22 +1212,10 @@ static bool cpu_vendor_string_is(const char *vendor) "=c"(ecx), "=d"(edx) : /* input */ "0"(leaf), "2"(0)); + chunk = (const uint32_t *)("GenuineIntel"); return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); } -bool is_intel_cpu(void) -{ - return cpu_vendor_string_is("GenuineIntel"); -} - -/* - * Exclude early K5 samples with a vendor string of "AMDisbetter!" - */ -bool is_amd_cpu(void) -{ - return cpu_vendor_string_is("AuthenticAMD"); -} - uint32_t kvm_get_cpuid_max_basic(void) { return kvm_get_supported_cpuid_entry(0)->eax; @@ -1419,23 +1339,6 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) } } -struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index) -{ - int i; - - for (i = 0; i < cpuid->nent; i++) { - struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; - - if (cur->function == function && cur->index == index) - return cur; - } - - TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index); - - return NULL; -} - bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent) { @@ -1531,6 +1434,22 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui return cpuid; } +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 + +static inline unsigned x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ @@ -1540,7 +1459,11 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; /* Avoid reserved HyperTransport region on AMD processors. */ - if (!is_amd_cpu()) + eax = ecx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx || + ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx || + edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) return max_gfn; /* On parts with <40 physical address bits, the area is fully hidden */ @@ -1550,7 +1473,6 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) /* Before family 17h, the HyperTransport area is just below 1T. */ ht_gfn = (1 << 28) - num_ht_pages; eax = 1; - ecx = 0; cpuid(&eax, &ebx, &ecx, &edx); if (x86_family(eax) < 0x17) goto done; diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c index 0ebc03ce07..161eba7cd1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c @@ -13,8 +13,6 @@ #include "processor.h" #include "svm_util.h" -#define SEV_DEV_PATH "/dev/sev" - struct gpr64_regs guest_regs; u64 rflags; @@ -174,14 +172,3 @@ void nested_svm_check_supported(void) exit(KSFT_SKIP); } } - -/* - * Open SEV_DEV_PATH if available, otherwise exit the entire program. - * - * Return: - * The opened file descriptor of /dev/sev. - */ -int open_sev_dev_path_or_exit(void) -{ - return open_path_or_exit(SEV_DEV_PATH, 0); -} diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 1410d0a914..4cfcafea9f 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -36,9 +36,11 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static bool run_vcpus = true; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void *vcpu_worker(void *data) { int ret; + struct perf_test_vcpu_args *vcpu_args = + (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; @@ -57,6 +59,8 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); } + + return NULL; } struct memslot_antagonist_args { @@ -76,7 +80,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, * Add the dummy memslot just below the perf_test_util memslot, which is * at the top of the guest physical address space. */ - gpa = perf_test_args.gpa - pages * vm_get_page_size(vm); + gpa = guest_test_phys_mem - pages * vm_get_page_size(vm); for (i = 0; i < nr_modifications; i++) { usleep(delay); @@ -96,15 +100,29 @@ struct test_params { static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; + pthread_t *vcpu_threads; struct kvm_vm *vm; + int vcpu_id; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, - VM_MEM_SRC_ANONYMOUS, - p->partition_vcpu_memory_access); + VM_MEM_SRC_ANONYMOUS); + + perf_test_args.wr_fract = 1; + + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, + p->partition_vcpu_memory_access); + + /* Export the shared variables to the guest */ + sync_global_to_guest(vm, perf_test_args); pr_info("Finished creating vCPUs\n"); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &perf_test_args.vcpu_args[vcpu_id]); pr_info("Started all vCPUs\n"); @@ -113,10 +131,16 @@ static void run_test(enum vm_guest_mode mode, void *arg) run_vcpus = false; - perf_test_join_vcpu_threads(nr_vcpus); + /* Wait for the vcpu threads to quit */ + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); + pr_info("All vCPU threads joined\n"); - perf_test_destroy_vm(vm); + ucall_uninit(vm); + kvm_vm_free(vm); + + free(vcpu_threads); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 1727f75e0c..d6e381e01d 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -127,54 +127,43 @@ static bool verbose; pr_info(__VA_ARGS__); \ } while (0) -static void check_mmio_access(struct vm_data *vm, struct kvm_run *run) -{ - TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit"); - TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read"); - TEST_ASSERT(run->mmio.len == 8, - "Unexpected exit mmio size = %u", run->mmio.len); - TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min && - run->mmio.phys_addr <= vm->mmio_gpa_max, - "Unexpected exit mmio address = 0x%llx", - run->mmio.phys_addr); -} - static void *vcpu_worker(void *data) { struct vm_data *vm = data; struct kvm_run *run; struct ucall uc; + uint64_t cmd; run = vcpu_state(vm->vm, VCPU_ID); while (1) { vcpu_run(vm->vm, VCPU_ID); - switch (get_ucall(vm->vm, VCPU_ID, &uc)) { - case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == 0, - "Unexpected sync ucall, got %lx", - (ulong)uc.args[1]); + if (run->exit_reason == KVM_EXIT_IO) { + cmd = get_ucall(vm->vm, VCPU_ID, &uc); + if (cmd != UCALL_SYNC) + break; + sem_post(&vcpu_ready); continue; - case UCALL_NONE: - if (run->exit_reason == KVM_EXIT_MMIO) - check_mmio_access(vm, run); - else - goto done; - break; - case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld, val = %lu", - (const char *)uc.args[0], - __FILE__, uc.args[1], uc.args[2]); - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); } + + if (run->exit_reason != KVM_EXIT_MMIO) + break; + + TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit"); + TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read"); + TEST_ASSERT(run->mmio.len == 8, + "Unexpected exit mmio size = %u", run->mmio.len); + TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min && + run->mmio.phys_addr <= vm->mmio_gpa_max, + "Unexpected exit mmio address = 0x%llx", + run->mmio.phys_addr); } -done: + if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT) + TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0], + __FILE__, uc.args[1], uc.args[2]); + return NULL; } @@ -279,7 +268,6 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, TEST_ASSERT(data->hva_slots, "malloc() fail"); data->vm = vm_create_default(VCPU_ID, mempages, guest_code); - ucall_init(data->vm, NULL); pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", max_mem_slots - 1, data->pages_per_slot, rempages); diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 6f6fd189dd..f40fd097cb 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -109,7 +109,8 @@ int main(int argc, char *argv[]) } } -done: kvm_vm_free(vm); + +done: return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 4c7841dfd4..2b46dcca86 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -129,7 +129,7 @@ static void save_restore_vm(struct kvm_vm *vm) vcpu_set_hv_cpuid(vm, VCPU_ID); vcpu_enable_evmcs(vm, VCPU_ID); vcpu_load_state(vm, VCPU_ID, state); - kvm_x86_state_cleanup(state); + free(state); memset(®s2, 0, sizeof(regs2)); vcpu_regs_get(vm, VCPU_ID, ®s2); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 672915ce73..91d88aaa98 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -165,10 +165,10 @@ static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid, vcpu_set_cpuid(vm, VCPU_ID, cpuid); } -static void guest_test_msrs_access(void) +static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, + struct kvm_cpuid2 *best) { struct kvm_run *run; - struct kvm_vm *vm; struct ucall uc; int stage = 0, r; struct kvm_cpuid_entry2 feat = { @@ -180,34 +180,11 @@ static void guest_test_msrs_access(void) struct kvm_cpuid_entry2 dbg = { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }; - struct kvm_cpuid2 *best; - vm_vaddr_t msr_gva; - struct kvm_enable_cap cap = { - .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, - .args = {1} - }; - struct msr_data *msr; + struct kvm_enable_cap cap = {0}; + + run = vcpu_state(vm, VCPU_ID); while (true) { - vm = vm_create_default(VCPU_ID, 0, guest_msr); - - msr_gva = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); - msr = addr_gva2hva(vm, msr_gva); - - vcpu_args_set(vm, VCPU_ID, 1, msr_gva); - vcpu_enable_cap(vm, VCPU_ID, &cap); - - vcpu_set_hv_cpuid(vm, VCPU_ID); - - best = kvm_get_supported_hv_cpuid(); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); - - run = vcpu_state(vm, VCPU_ID); - switch (stage) { case 0: /* @@ -338,7 +315,6 @@ static void guest_test_msrs_access(void) * capability enabled and guest visible CPUID bit unset. */ cap.cap = KVM_CAP_HYPERV_SYNIC2; - cap.args[0] = 0; vcpu_enable_cap(vm, VCPU_ID, &cap); break; case 22: @@ -485,9 +461,9 @@ static void guest_test_msrs_access(void) switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == 0, - "Unexpected stage: %ld (0 expected)\n", - uc.args[1]); + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)\n", + uc.args[1], stage); break; case UCALL_ABORT: TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], @@ -498,14 +474,13 @@ static void guest_test_msrs_access(void) } stage++; - kvm_vm_free(vm); } } -static void guest_test_hcalls_access(void) +static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall, + void *input, void *output, struct kvm_cpuid2 *best) { struct kvm_run *run; - struct kvm_vm *vm; struct ucall uc; int stage = 0, r; struct kvm_cpuid_entry2 feat = { @@ -518,38 +493,10 @@ static void guest_test_hcalls_access(void) struct kvm_cpuid_entry2 dbg = { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }; - struct kvm_enable_cap cap = { - .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, - .args = {1} - }; - vm_vaddr_t hcall_page, hcall_params; - struct hcall_data *hcall; - struct kvm_cpuid2 *best; + + run = vcpu_state(vm, VCPU_ID); while (true) { - vm = vm_create_default(VCPU_ID, 0, guest_hcall); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); - - /* Hypercall input/output */ - hcall_page = vm_vaddr_alloc_pages(vm, 2); - hcall = addr_gva2hva(vm, hcall_page); - memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); - - hcall_params = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); - - vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params); - vcpu_enable_cap(vm, VCPU_ID, &cap); - - vcpu_set_hv_cpuid(vm, VCPU_ID); - - best = kvm_get_supported_hv_cpuid(); - - run = vcpu_state(vm, VCPU_ID); - switch (stage) { case 0: hcall->control = 0xdeadbeef; @@ -659,9 +606,9 @@ static void guest_test_hcalls_access(void) switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == 0, - "Unexpected stage: %ld (0 expected)\n", - uc.args[1]); + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)\n", + uc.args[1], stage); break; case UCALL_ABORT: TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], @@ -672,15 +619,66 @@ static void guest_test_hcalls_access(void) } stage++; - kvm_vm_free(vm); } } int main(void) { + struct kvm_cpuid2 *best; + struct kvm_vm *vm; + vm_vaddr_t msr_gva, hcall_page, hcall_params; + struct kvm_enable_cap cap = { + .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, + .args = {1} + }; + + /* Test MSRs */ + vm = vm_create_default(VCPU_ID, 0, guest_msr); + + msr_gva = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); + vcpu_args_set(vm, VCPU_ID, 1, msr_gva); + vcpu_enable_cap(vm, VCPU_ID, &cap); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + best = kvm_get_supported_hv_cpuid(); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + pr_info("Testing access to Hyper-V specific MSRs\n"); - guest_test_msrs_access(); + guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva), + best); + kvm_vm_free(vm); + + /* Test hypercalls */ + vm = vm_create_default(VCPU_ID, 0, guest_hcall); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + + /* Hypercall input/output */ + hcall_page = vm_vaddr_alloc_pages(vm, 2); + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); + + hcall_params = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); + + vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params); + vcpu_enable_cap(vm, VCPU_ID, &cap); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + best = kvm_get_supported_hv_cpuid(); pr_info("Testing access to Hyper-V hypercalls\n"); - guest_test_hcalls_access(); + guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params), + addr_gva2hva(vm, hcall_page), + addr_gva2hva(vm, hcall_page) + getpagesize(), + best); + + kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index a626d40fdb..db2a17559c 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -211,7 +211,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); - kvm_x86_state_cleanup(state); + free(state); } done: diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 2e0a92da8f..32854c1462 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -218,7 +218,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); - kvm_x86_state_cleanup(state); + free(state); memset(®s2, 0, sizeof(regs2)); vcpu_regs_get(vm, VCPU_ID, ®s2); diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c index 30a81038df..df04f56ce8 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c @@ -75,7 +75,7 @@ static void l1_guest_code(struct svm_test_data *svm) vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; /* No intercepts for real and virtual interrupts */ - vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR)); + vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR); /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */ vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT); diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index a426078b16..5a6a662f2e 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -77,8 +77,8 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) switch (get_ucall(vm, vcpuid, &uc)) { case UCALL_SYNC: TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx", - stage + 1, (ulong)uc.args[1]); + uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); return; case UCALL_DONE: return; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index edac8839e7..2835a17f1b 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -30,8 +30,8 @@ static struct kvm_vm *vm; static void l2_guest_code(void) { /* Exit to L0 */ - asm volatile("inb %%dx, %%al" - : : [port] "d" (PORT_L0_EXIT) : "rax"); + asm volatile("inb %%dx, %%al" + : : [port] "d" (PORT_L0_EXIT) : "rax"); } static void l1_guest_code(struct vmx_pages *vmx_pages) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c index 2454a1f2ca..23051d84b9 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c @@ -110,5 +110,22 @@ int main(int argc, char *argv[]) ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT); TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); + /* testcase 4, set capabilities when we don't have PDCM bit */ + entry_1_0->ecx &= ~X86_FEATURE_PDCM; + vcpu_set_cpuid(vm, VCPU_ID, cpuid); + ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); + + /* testcase 5, set capabilities when we don't have PMU version bits */ + entry_1_0->ecx |= X86_FEATURE_PDCM; + eax.split.version_id = 0; + entry_1_0->ecx = eax.full; + vcpu_set_cpuid(vm, VCPU_ID, cpuid); + ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); + TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); + + vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0); + ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0); + kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c index ff92e25b6f..a07480aed3 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c @@ -244,7 +244,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); - kvm_x86_state_cleanup(state); + free(state); memset(®s2, 0, sizeof(regs2)); vcpu_regs_get(vm, VCPU_ID, ®s2); diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 865e171468..eda0d2a512 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -14,9 +14,6 @@ #include #include #include -#include - -#include #define VCPU_ID 5 @@ -25,19 +22,10 @@ #define SHINFO_REGION_SLOT 10 #define PAGE_SIZE 4096 -#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE)) -#define DUMMY_REGION_SLOT 11 - -#define SHINFO_ADDR (SHINFO_REGION_GPA) #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) -#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) -#define SHINFO_VADDR (SHINFO_REGION_GVA) #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) -#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) - -#define EVTCHN_VECTOR 0x10 static struct kvm_vm *vm; @@ -46,20 +34,20 @@ static struct kvm_vm *vm; #define MIN_STEAL_TIME 50000 struct pvclock_vcpu_time_info { - u32 version; - u32 pad0; - u64 tsc_timestamp; - u64 system_time; - u32 tsc_to_system_mul; - s8 tsc_shift; - u8 flags; - u8 pad[2]; + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 flags; + u8 pad[2]; } __attribute__((__packed__)); /* 32 bytes */ struct pvclock_wall_clock { - u32 version; - u32 sec; - u32 nsec; + u32 version; + u32 sec; + u32 nsec; } __attribute__((__packed__)); struct vcpu_runstate_info { @@ -68,66 +56,15 @@ struct vcpu_runstate_info { uint64_t time[4]; }; -struct arch_vcpu_info { - unsigned long cr2; - unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ -}; - -struct vcpu_info { - uint8_t evtchn_upcall_pending; - uint8_t evtchn_upcall_mask; - unsigned long evtchn_pending_sel; - struct arch_vcpu_info arch; - struct pvclock_vcpu_time_info time; -}; /* 64 bytes (x86) */ - -struct shared_info { - struct vcpu_info vcpu_info[32]; - unsigned long evtchn_pending[64]; - unsigned long evtchn_mask[64]; - struct pvclock_wall_clock wc; - uint32_t wc_sec_hi; - /* arch_shared_info here */ -}; - #define RUNSTATE_running 0 #define RUNSTATE_runnable 1 #define RUNSTATE_blocked 2 #define RUNSTATE_offline 3 -static const char *runstate_names[] = { - "running", - "runnable", - "blocked", - "offline" -}; - -struct { - struct kvm_irq_routing info; - struct kvm_irq_routing_entry entries[2]; -} irq_routes; - -static void evtchn_handler(struct ex_regs *regs) -{ - struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; - vi->evtchn_upcall_pending = 0; - vi->evtchn_pending_sel = 0; - - GUEST_SYNC(0x20); -} - static void guest_code(void) { struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; - __asm__ __volatile__( - "sti\n" - "nop\n" - ); - - /* Trigger an interrupt injection */ - GUEST_SYNC(0); - /* Test having the host set runstates manually */ GUEST_SYNC(RUNSTATE_runnable); GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); @@ -157,25 +94,7 @@ static void guest_code(void) GUEST_SYNC(6); GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); - /* Attempt to deliver a *masked* interrupt */ - GUEST_SYNC(7); - - /* Wait until we see the bit set */ - struct shared_info *si = (void *)SHINFO_VADDR; - while (!si->evtchn_pending[0]) - __asm__ __volatile__ ("rep nop" : : : "memory"); - - /* Now deliver an *unmasked* interrupt */ - GUEST_SYNC(8); - - while (!si->evtchn_pending[1]) - __asm__ __volatile__ ("rep nop" : : : "memory"); - - /* Change memslots and deliver an interrupt */ - GUEST_SYNC(9); - - for (;;) - __asm__ __volatile__ ("rep nop" : : : "memory"); + GUEST_DONE(); } static int cmp_timespec(struct timespec *a, struct timespec *b) @@ -192,18 +111,9 @@ static int cmp_timespec(struct timespec *a, struct timespec *b) return 0; } -static void handle_alrm(int sig) -{ - TEST_FAIL("IRQ delivery timed out"); -} - int main(int argc, char *argv[]) { struct timespec min_ts, max_ts, vm_ts; - bool verbose; - - verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || - !strncmp(argv[1], "--verbose", 10)); int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { @@ -212,7 +122,6 @@ int main(int argc, char *argv[]) } bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); - bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); clock_gettime(CLOCK_REALTIME, &min_ts); @@ -224,11 +133,6 @@ int main(int argc, char *argv[]) SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2); - struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); - - int zero_fd = open("/dev/zero", O_RDONLY); - TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); - struct kvm_xen_hvm_config hvmc = { .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, .msr = XEN_HYPERCALL_MSR, @@ -247,19 +151,9 @@ int main(int argc, char *argv[]) }; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); - /* - * Test what happens when the HVA of the shinfo page is remapped after - * the kernel has a reference to it. But make sure we copy the clock - * info over since that's only set at setup time, and we test it later. - */ - struct pvclock_wall_clock wc_copy = shinfo->wc; - void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); - TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); - shinfo->wc = wc_copy; - struct kvm_xen_vcpu_attr vi = { .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, - .u.gpa = VCPU_INFO_ADDR, + .u.gpa = SHINFO_REGION_GPA + 0x40, }; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi); @@ -269,16 +163,6 @@ int main(int argc, char *argv[]) }; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock); - struct kvm_xen_hvm_attr vec = { - .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, - .u.vector = EVTCHN_VECTOR, - }; - vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); - if (do_runstate_tests) { struct kvm_xen_vcpu_attr st = { .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, @@ -287,57 +171,9 @@ int main(int argc, char *argv[]) vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); } - int irq_fd[2] = { -1, -1 }; - - if (do_eventfd_tests) { - irq_fd[0] = eventfd(0, 0); - irq_fd[1] = eventfd(0, 0); - - /* Unexpected, but not a KVM failure */ - if (irq_fd[0] == -1 || irq_fd[1] == -1) - do_eventfd_tests = false; - } - - if (do_eventfd_tests) { - irq_routes.info.nr = 2; - - irq_routes.entries[0].gsi = 32; - irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; - irq_routes.entries[0].u.xen_evtchn.port = 15; - irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID; - irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; - - irq_routes.entries[1].gsi = 33; - irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; - irq_routes.entries[1].u.xen_evtchn.port = 66; - irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID; - irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; - - vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes); - - struct kvm_irqfd ifd = { }; - - ifd.fd = irq_fd[0]; - ifd.gsi = 32; - vm_ioctl(vm, KVM_IRQFD, &ifd); - - ifd.fd = irq_fd[1]; - ifd.gsi = 33; - vm_ioctl(vm, KVM_IRQFD, &ifd); - - struct sigaction sa = { }; - sa.sa_handler = handle_alrm; - sigaction(SIGALRM, &sa, NULL); - } - - struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); - vinfo->evtchn_upcall_pending = 0; - struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); rs->state = 0x5a; - bool evtchn_irq_expected = false; - for (;;) { volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); struct ucall uc; @@ -357,33 +193,21 @@ int main(int argc, char *argv[]) struct kvm_xen_vcpu_attr rst; long rundelay; - if (do_runstate_tests) - TEST_ASSERT(rs->state_entry_time == rs->time[0] + - rs->time[1] + rs->time[2] + rs->time[3], - "runstate times don't add up"); + /* If no runstate support, bail out early */ + if (!do_runstate_tests) + goto done; + + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); switch (uc.args[1]) { - case 0: - if (verbose) - printf("Delivering evtchn upcall\n"); - evtchn_irq_expected = true; - vinfo->evtchn_upcall_pending = 1; - break; - - case RUNSTATE_runnable...RUNSTATE_offline: - TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); - if (!do_runstate_tests) - goto done; - if (verbose) - printf("Testing runstate %s\n", runstate_names[uc.args[1]]); + case RUNSTATE_running...RUNSTATE_offline: rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; rst.u.runstate.state = uc.args[1]; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); break; - case 4: - if (verbose) - printf("Testing RUNSTATE_ADJUST\n"); rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; memset(&rst.u, 0, sizeof(rst.u)); rst.u.runstate.state = (uint64_t)-1; @@ -397,8 +221,6 @@ int main(int argc, char *argv[]) break; case 5: - if (verbose) - printf("Testing RUNSTATE_DATA\n"); rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; memset(&rst.u, 0, sizeof(rst.u)); rst.u.runstate.state = RUNSTATE_running; @@ -407,55 +229,13 @@ int main(int argc, char *argv[]) rst.u.runstate.time_offline = 0x5a; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); break; - case 6: - if (verbose) - printf("Testing steal time\n"); /* Yield until scheduler delay exceeds target */ rundelay = get_run_delay() + MIN_STEAL_TIME; do { sched_yield(); } while (get_run_delay() < rundelay); break; - - case 7: - if (!do_eventfd_tests) - goto done; - if (verbose) - printf("Testing masked event channel\n"); - shinfo->evtchn_mask[0] = 0x8000; - eventfd_write(irq_fd[0], 1UL); - alarm(1); - break; - - case 8: - if (verbose) - printf("Testing unmasked event channel\n"); - /* Unmask that, but deliver the other one */ - shinfo->evtchn_pending[0] = 0; - shinfo->evtchn_mask[0] = 0; - eventfd_write(irq_fd[1], 1UL); - evtchn_irq_expected = true; - alarm(1); - break; - - case 9: - if (verbose) - printf("Testing event channel after memslot change\n"); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); - eventfd_write(irq_fd[0], 1UL); - evtchn_irq_expected = true; - alarm(1); - break; - - case 0x20: - TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); - evtchn_irq_expected = false; - if (shinfo->evtchn_pending[1] && - shinfo->evtchn_pending[0]) - goto done; - break; } break; } @@ -481,19 +261,9 @@ int main(int argc, char *argv[]) ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); ti2 = addr_gpa2hva(vm, PVTIME_ADDR); - if (verbose) { - printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); - printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", - ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, - ti->tsc_shift, ti->flags); - printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", - ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, - ti2->tsc_shift, ti2->flags); - } - vm_ts.tv_sec = wc->sec; vm_ts.tv_nsec = wc->nsec; - TEST_ASSERT(wc->version && !(wc->version & 1), + TEST_ASSERT(wc->version && !(wc->version & 1), "Bad wallclock version %x", wc->version); TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); @@ -514,15 +284,6 @@ int main(int argc, char *argv[]) }; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst); - if (verbose) { - printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", - rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", - rs->state, rs->state_entry_time); - for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { - printf("State %s: %" PRIu64 " ns\n", - runstate_names[i], rs->time[i]); - } - } TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, "State entry time mismatch"); diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 183b7e8e1b..20e2a9286d 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -17,6 +17,10 @@ #include "../kselftest_harness.h" +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + /* * TEST_F_FORK() is useful when a test drop privileges but the corresponding * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config index a26a3fa9e9..38edea2563 100644 --- a/tools/testing/selftests/lkdtm/config +++ b/tools/testing/selftests/lkdtm/config @@ -8,4 +8,3 @@ CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y CONFIG_UBSAN_BOUNDS=y CONFIG_UBSAN_TRAP=y -CONFIG_STACKPROTECTOR_STRONG=y diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh index 95e9049592..e95e79bd31 100644 --- a/tools/testing/selftests/lkdtm/run.sh +++ b/tools/testing/selftests/lkdtm/run.sh @@ -56,14 +56,8 @@ if echo "$test" | grep -q '^#' ; then fi # If no expected output given, assume an Oops with back trace is success. -repeat=1 if [ -z "$expect" ]; then expect="call trace:" -else - if echo "$expect" | grep -q '^repeat:' ; then - repeat=$(echo "$expect" | cut -d' ' -f1 | cut -d: -f2) - expect=$(echo "$expect" | cut -d' ' -f2-) - fi fi # Prepare log for report checking @@ -89,9 +83,7 @@ dmesg > "$DMESG" # the signal that killed the subprocess, we must ignore the failure and # continue. However we don't silence stderr since there might be other # useful details reported there in the case of other unexpected conditions. -for i in $(seq 1 $repeat); do - echo "$test" | cat >"$TRIGGER" || true -done +echo "$test" | cat >"$TRIGGER" || true # Record and dump the results dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh index 14fedeef76..1b4d95d575 100644 --- a/tools/testing/selftests/lkdtm/stack-entropy.sh +++ b/tools/testing/selftests/lkdtm/stack-entropy.sh @@ -4,27 +4,13 @@ # Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test. set -e samples="${1:-1000}" -TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT -KSELFTEST_SKIP_TEST=4 - -# Verify we have LKDTM available in the kernel. -if [ ! -r $TRIGGER ] ; then - /sbin/modprobe -q lkdtm || true - if [ ! -r $TRIGGER ] ; then - echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)" - else - echo "Cannot write $TRIGGER (need to run as root?)" - fi - # Skip this test - exit $KSELFTEST_SKIP_TEST -fi # Capture dmesg continuously since it may fill up depending on sample size. log=$(mktemp -t stack-entropy-XXXXXX) dmesg --follow >"$log" & pid=$! report=-1 for i in $(seq 1 $samples); do - echo "REPORT_STACK" > $TRIGGER + echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT if [ -t 1 ]; then percent=$(( 100 * $i / $samples )) if [ "$percent" -ne "$report" ]; then diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 6b36b7f5dc..09f7bfa383 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -12,7 +12,6 @@ CORRUPT_LIST_ADD list_add corruption CORRUPT_LIST_DEL list_del corruption STACK_GUARD_PAGE_LEADING STACK_GUARD_PAGE_TRAILING -REPORT_STACK_CANARY repeat:2 ok: stack canaries differ UNSET_SMEP pinned CR4 bits changed: DOUBLE_FAULT CORRUPT_PAC diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 94df2692e6..192a2899ba 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -455,7 +455,6 @@ static void mfd_fail_write(int fd) printf("mmap()+mprotect() didn't fail as expected\n"); abort(); } - munmap(p, mfd_def_size); } /* verify PUNCH_HOLE fails */ diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config index 1eef042a31..a7e8cd5bb2 100644 --- a/tools/testing/selftests/memory-hotplug/config +++ b/tools/testing/selftests/memory-hotplug/config @@ -1,4 +1,5 @@ CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTPLUG_SPARSE=y CONFIG_NOTIFIER_ERROR_INJECTION=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_MEMORY_HOTREMOVE=y diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c index d2917054fe..584dc6bc3b 100644 --- a/tools/testing/selftests/mount/unprivileged-remount-test.c +++ b/tools/testing/selftests/mount/unprivileged-remount-test.c @@ -204,7 +204,7 @@ bool test_unpriv_remount(const char *fstype, const char *mount_options, if (!WIFEXITED(status)) { die("child did not terminate cleanly\n"); } - return WEXITSTATUS(status) == EXIT_SUCCESS; + return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; } create_and_enter_userns(); @@ -282,7 +282,7 @@ static bool test_priv_mount_unpriv_remount(void) if (!WIFEXITED(status)) { die("child did not terminate cleanly\n"); } - return WEXITSTATUS(status) == EXIT_SUCCESS; + return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; } orig_mnt_flags = read_mnt_flags(orig_path); diff --git a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c index 50ed5d475d..860198f83a 100644 --- a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c +++ b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c @@ -191,7 +191,7 @@ static bool is_shared_mount(const char *path) #define SET_GROUP_FROM "/tmp/move_mount_set_group_supported_from" #define SET_GROUP_TO "/tmp/move_mount_set_group_supported_to" -static bool move_mount_set_group_supported(void) +static int move_mount_set_group_supported(void) { int ret; @@ -222,7 +222,7 @@ static bool move_mount_set_group_supported(void) AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP); umount2("/tmp", MNT_DETACH); - return ret >= 0; + return ret < 0 ? false : true; } FIXTURE(move_mount_set_group) { @@ -232,7 +232,7 @@ FIXTURE(move_mount_set_group) { FIXTURE_SETUP(move_mount_set_group) { - bool ret; + int ret; ASSERT_EQ(prepare_unpriv_mountns(), 0); @@ -254,7 +254,7 @@ FIXTURE_SETUP(move_mount_set_group) FIXTURE_TEARDOWN(move_mount_set_group) { - bool ret; + int ret; ret = move_mount_set_group_supported(); ASSERT_GE(ret, 0); @@ -348,7 +348,7 @@ TEST_F(move_mount_set_group, complex_sharing_copying) .shared = false, }; pid_t pid; - bool ret; + int ret; ret = move_mount_set_group_supported(); ASSERT_GE(ret, 0); diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 7581a7348e..19deb9cdf7 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -31,8 +31,3 @@ rxtimestamp timestamping txtimestamp so_netns_cookie -test_unix_oob -gro -ioam6_parser -toeplitz -cmsg_so_mark diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 9897fa9ab9..6a953ec793 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -22,19 +22,16 @@ TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh TEST_PROGS += bareudp.sh -TEST_PROGS += amt.sh TEST_PROGS += unicast_extensions.sh TEST_PROGS += udpgro_fwd.sh TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh -TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh -TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest @@ -52,7 +49,6 @@ TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls TEST_GEN_FILES += toeplitz -TEST_GEN_FILES += cmsg_so_mark TEST_FILES := settings diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index ead7963b9b..86ab429fe7 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -44,4 +44,3 @@ CONFIG_NET_ACT_MIRRED=m CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y CONFIG_CRYPTO_SM4=y -CONFIG_AMT=m diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 3f4c8cfe7a..aec9e784d0 100644 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -66,10 +66,6 @@ NSB_LO_IP=172.16.2.2 NSA_LO_IP6=2001:db8:2::1 NSB_LO_IP6=2001:db8:2::2 -# non-local addresses for freebind tests -NL_IP=172.17.1.1 -NL_IP6=2001:db8:4::1 - MD5_PW=abc123 MD5_WRONG_PW=abc1234 @@ -320,9 +316,6 @@ addr2str() ${NSB_LO_IP6}) echo "ns-B loopback IPv6";; ${NSB_LINKIP6}|${NSB_LINKIP6}%*) echo "ns-B IPv6 LLA";; - ${NL_IP}) echo "nonlocal IP";; - ${NL_IP6}) echo "nonlocal IPv6";; - ${VRF_IP}) echo "VRF IP";; ${VRF_IP6}) echo "VRF IPv6";; @@ -1786,14 +1779,6 @@ ipv4_addr_bind_novrf() log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" done - # - # raw socket with nonlocal bind - # - a=${NL_IP} - log_start - run_cmd nettest -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b - log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after device bind" - # # tcp sockets # @@ -1843,14 +1828,6 @@ ipv4_addr_bind_vrf() run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind" - # - # raw socket with nonlocal bind - # - a=${NL_IP} - log_start - run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b - log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind" - # # tcp sockets # @@ -2001,7 +1978,6 @@ ipv4_rt() a=${NSA_IP} log_start - run_cmd nettest ${varg} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & @@ -3441,14 +3417,6 @@ ipv6_addr_bind_novrf() log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" done - # - # raw socket with nonlocal bind - # - a=${NL_IP6} - log_start - run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b - log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address" - # # tcp sockets # @@ -3493,14 +3461,6 @@ ipv6_addr_bind_vrf() run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 1 "Raw socket bind to invalid local address after vrf bind" - # - # raw socket with nonlocal bind - # - a=${NL_IP6} - log_start - run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${VRF} -b - log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind" - # # tcp sockets # @@ -4059,9 +4019,6 @@ usage: ${0##*/} OPTS -p Pause on fail -P Pause after each test -v Be verbose - -Tests: - $TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER EOF } @@ -4134,6 +4091,8 @@ do # setup namespaces and config, but do not run any tests setup) setup; exit 0;; vrf_setup) setup "yes"; exit 0;; + + help) echo "Test names: $TESTS"; exit 0;; esac done @@ -4141,11 +4100,3 @@ cleanup 2>/dev/null printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} - -if [ $nfail -ne 0 ]; then - exit 1 # KSFT_FAIL -elif [ $nsuccess -eq 0 ]; then - exit $ksft_skip -fi - -exit 0 # KSFT_PASS diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index d444ee6aa3..b5a69ad191 100644 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -629,66 +629,6 @@ ipv6_fcnal() log_test $? 0 "Nexthops removed on admin down" } -ipv6_grp_refs() -{ - if [ ! -x "$(command -v mausezahn)" ]; then - echo "SKIP: Could not run test; need mausezahn tool" - return - fi - - run_cmd "$IP link set dev veth1 up" - run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10" - run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20" - run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10" - run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20" - run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10" - run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20" - run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10" - run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20" - run_cmd "$IP nexthop add id 102 group 100" - run_cmd "$IP route add 2001:db8:101::1/128 nhid 102" - - # create per-cpu dsts through nh 100 - run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1" - - # remove nh 100 from the group to delete the route potentially leaving - # a stale per-cpu dst which holds a reference to the nexthop's net - # device and to the IPv6 route - run_cmd "$IP nexthop replace id 102 group 101" - run_cmd "$IP route del 2001:db8:101::1/128" - - # add both nexthops to the group so a reference is taken on them - run_cmd "$IP nexthop replace id 102 group 100/101" - - # if the bug described in commit "net: nexthop: release IPv6 per-cpu - # dsts when replacing a nexthop group" exists at this point we have - # an unlinked IPv6 route (but not freed due to stale dst) with a - # reference over the group so we delete the group which will again - # only unlink it due to the route reference - run_cmd "$IP nexthop del id 102" - - # delete the nexthop with stale dst, since we have an unlinked - # group with a ref to it and an unlinked IPv6 route with ref to the - # group, the nh will only be unlinked and not freed so the stale dst - # remains forever and we get a net device refcount imbalance - run_cmd "$IP nexthop del id 100" - - # if a reference was lost this command will hang because the net device - # cannot be removed - timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1 - - # we can't cleanup if the command is hung trying to delete the netdev - if [ $? -eq 137 ]; then - return 1 - fi - - # cleanup - run_cmd "$IP link del veth1.20" - run_cmd "$IP nexthop flush" - - return 0 -} - ipv6_grp_fcnal() { local rc @@ -794,9 +734,6 @@ ipv6_grp_fcnal() run_cmd "$IP nexthop add id 108 group 31/24" log_test $? 2 "Nexthop group can not have a blackhole and another nexthop" - - ipv6_grp_refs - log_test $? 0 "Nexthop group replace refcounts" } ipv6_res_grp_fcnal() diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 697994a927..a4bd1b0873 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -6,7 +6,6 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y -CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m CONFIG_NET_ACT_VLAN=m diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index de9944d420..2c14a86ada 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -563,6 +563,12 @@ devlink_trap_group_policer_get() | jq '.[][][]["policer"]' } +devlink_trap_policer_ids_get() +{ + devlink -j -p trap policer show \ + | jq '.[]["'$DEVLINK_DEV'"][]["policer"]' +} + devlink_port_by_netdev() { local if_name=$1 diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index b0980a2efa..e51def39fd 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -41,9 +41,5 @@ NETIF_CREATE=yes # Timeout (in seconds) before ping exits regardless of how many packets have # been sent or received PING_TIMEOUT=5 -# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process -# filter by software/hardware -TC_FLAG=skip_hw # IPv6 traceroute utility name. TROUTE6=traceroute6 - diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 7da783d6f4..92087d423b 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -20,12 +20,9 @@ NETIF_TYPE=${NETIF_TYPE:=veth} NETIF_CREATE=${NETIF_CREATE:=yes} MCD=${MCD:=smcrouted} MC_CLI=${MC_CLI:=smcroutectl} -PING_COUNT=${PING_COUNT:=10} PING_TIMEOUT=${PING_TIMEOUT:=5} WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600} -REQUIRE_JQ=${REQUIRE_JQ:=yes} -REQUIRE_MZ=${REQUIRE_MZ:=yes} relative_path="${BASH_SOURCE%/*}" if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then @@ -144,12 +141,8 @@ require_command() fi } -if [[ "$REQUIRE_JQ" = "yes" ]]; then - require_command jq -fi -if [[ "$REQUIRE_MZ" = "yes" ]]; then - require_command $MZ -fi +require_command jq +require_command $MZ if [[ ! -v NUM_NETIFS ]]; then echo "SKIP: importer does not define \"NUM_NETIFS\"" @@ -287,15 +280,6 @@ log_test() return 0 } -log_test_skip() -{ - local test_name=$1 - local opt_str=$2 - - printf "TEST: %-60s [SKIP]\n" "$test_name $opt_str" - return 0 -} - log_info() { local msg=$1 @@ -1112,8 +1096,7 @@ ping_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null } ping_test() @@ -1134,8 +1117,7 @@ ping6_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING6 $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING6 $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null } ping6_test() diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh index 1b27f2b0f1..f8cda822c1 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -80,7 +80,7 @@ test_gretap() test_ip6gretap() { - test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ipv6' \ + test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \ "mirror to ip6gretap" } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh index aff88f78e3..472bd023e2 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -74,7 +74,7 @@ test_span_gre_ttl() mirror_install $swp1 ingress $tundev "matchall $tcflags" tc filter add dev $h3 ingress pref 77 prot $prot \ - flower skip_hw ip_ttl 50 action pass + flower ip_ttl 50 action pass mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index c8a9b5bd84..880e3ab9d0 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -141,7 +141,7 @@ test_gretap() test_ip6gretap() { - test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ipv6' \ + test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \ "mirror to ip6gretap" } @@ -218,7 +218,6 @@ test_ip6gretap_forbidden_egress() test_span_gre_untagged_egress() { local tundev=$1; shift - local ul_proto=$1; shift local what=$1; shift RET=0 @@ -226,7 +225,7 @@ test_span_gre_untagged_egress() mirror_install $swp1 ingress $tundev "matchall $tcflags" quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_vlan_dir $h3 555 ingress h3_addr_add_del del $h3.555 bridge vlan add dev $swp3 vid 555 pvid untagged @@ -234,7 +233,7 @@ test_span_gre_untagged_egress() sleep 5 quick_test_span_gre_dir $tundev ingress - fail_test_span_vlan_dir $h3 555 ingress "$ul_proto" + fail_test_span_vlan_dir $h3 555 ingress h3_addr_add_del del $h3 bridge vlan add dev $swp3 vid 555 @@ -242,7 +241,7 @@ test_span_gre_untagged_egress() sleep 5 quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_vlan_dir $h3 555 ingress mirror_uninstall $swp1 ingress @@ -251,12 +250,12 @@ test_span_gre_untagged_egress() test_gretap_untagged_egress() { - test_span_gre_untagged_egress gt4 ip "mirror to gretap" + test_span_gre_untagged_egress gt4 "mirror to gretap" } test_ip6gretap_untagged_egress() { - test_span_gre_untagged_egress gt6 ipv6 "mirror to ip6gretap" + test_span_gre_untagged_egress gt6 "mirror to ip6gretap" } test_span_gre_fdb_roaming() diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 3e8ebeff30..6406cd76a1 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -115,14 +115,13 @@ do_test_span_vlan_dir_ips() local dev=$1; shift local vid=$1; shift local direction=$1; shift - local ul_proto=$1; shift local ip1=$1; shift local ip2=$1; shift # Install the capture as skip_hw to avoid double-counting of packets. # The traffic is meant for local box anyway, so will be trapped to # kernel. - vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype $ul_proto" + vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip" mirror_test v$h1 $ip1 $ip2 $dev 100 $expect mirror_test v$h2 $ip2 $ip1 $dev 100 $expect vlan_capture_uninstall $dev diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh index 0b44e14823..9ab2ce77b3 100644 --- a/tools/testing/selftests/net/forwarding/mirror_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh @@ -85,9 +85,9 @@ test_tagged_vlan_dir() RET=0 mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" - do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" ip \ + do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \ 192.0.2.17 192.0.2.18 - do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" ip \ + do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \ 192.0.2.17 192.0.2.18 mirror_uninstall $swp1 $direction diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh index 75a37c189e..8bd85da190 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh @@ -4,12 +4,9 @@ ALL_TESTS=" ping_ipv4 tbf_test - tbf_root_test " source $lib_dir/sch_tbf_core.sh -QDISC_TYPE=${QDISC% *} - tbf_test_one() { local bs=$1; shift @@ -25,8 +22,6 @@ tbf_test_one() tbf_test() { - log_info "Testing root-$QDISC_TYPE-tbf" - # This test is used for both ETS and PRIO. Even though we only need two # bands, PRIO demands a minimum of three. tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 @@ -34,29 +29,6 @@ tbf_test() tc qdisc del dev $swp2 root } -tbf_root_test() -{ - local bs=128K - - log_info "Testing root-tbf-$QDISC_TYPE" - - tc qdisc replace dev $swp2 root handle 1: \ - tbf rate 400Mbit burst $bs limit 1M - tc qdisc replace dev $swp2 parent 1:1 handle 10: \ - $QDISC 3 priomap 2 1 0 - tc qdisc replace dev $swp2 parent 10:3 handle 103: \ - bfifo limit 1M - tc qdisc replace dev $swp2 parent 10:2 handle 102: \ - bfifo limit 1M - tc qdisc replace dev $swp2 parent 10:1 handle 101: \ - bfifo limit 1M - - do_tbf_test 10 400 $bs - do_tbf_test 11 400 $bs - - tc qdisc del dev $swp2 root -} - trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index de19eb6c38..d9eca22713 100644 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -3,7 +3,7 @@ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ - gact_trap_test mirred_egress_to_ingress_test" + gact_trap_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -13,12 +13,10 @@ tcflags="skip_hw" h1_create() { simple_if_init $h1 192.0.2.1/24 - tc qdisc add dev $h1 clsact } h1_destroy() { - tc qdisc del dev $h1 clsact simple_if_fini $h1 192.0.2.1/24 } @@ -155,49 +153,6 @@ gact_trap_test() log_test "trap ($tcflags)" } -mirred_egress_to_ingress_test() -{ - RET=0 - - tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \ - ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action \ - ct commit nat src addr 192.0.2.2 pipe \ - ct clear pipe \ - ct commit nat dst addr 192.0.2.1 pipe \ - mirred ingress redirect dev $h1 - - tc filter add dev $swp1 protocol ip pref 11 handle 111 ingress flower \ - ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action drop - tc filter add dev $swp1 protocol ip pref 12 handle 112 ingress flower \ - ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 0 action pass - - $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ - -t icmp "ping,id=42,seq=10" -q - - tc_check_packets "dev $h1 egress" 100 1 - check_err $? "didn't mirror first packet" - - tc_check_packets "dev $swp1 ingress" 111 1 - check_fail $? "didn't redirect first packet" - tc_check_packets "dev $swp1 ingress" 112 1 - check_err $? "didn't receive reply to first packet" - - ping 192.0.2.2 -I$h1 -c1 -w1 -q 1>/dev/null 2>&1 - - tc_check_packets "dev $h1 egress" 100 2 - check_err $? "didn't mirror second packet" - tc_check_packets "dev $swp1 ingress" 111 1 - check_fail $? "didn't redirect second packet" - tc_check_packets "dev $swp1 ingress" 112 2 - check_err $? "didn't receive reply to second packet" - - tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower - tc filter del dev $swp1 ingress protocol ip pref 11 handle 111 flower - tc filter del dev $swp1 ingress protocol ip pref 12 handle 112 flower - - log_test "mirred_egress_to_ingress ($tcflags)" -} - setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh index bce8bb8d2b..0e18e8be6e 100644 --- a/tools/testing/selftests/net/forwarding/tc_common.sh +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -16,16 +16,6 @@ tc_check_packets() tc_rule_handle_stats_get "$id" "$handle" > /dev/null } -tc_check_at_least_x_packets() -{ - local id=$1 - local handle=$2 - local count=$3 - - busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $count" \ - tc_rule_handle_stats_get "$id" "$handle" > /dev/null -} - tc_check_packets_hitting() { local id=$1 diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index a596bbf3ed..a5789721ba 100644 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -680,6 +680,26 @@ test_pvid() log_test "VXLAN: flood after vlan re-add" } +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + __test_learning() { local -a expects=(0 0 0 0 0) diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 30024d0ed3..cf37ce86b0 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -57,14 +57,17 @@ #include #include -#include "../kselftest.h" - #define DPORT 8000 #define SPORT 1500 #define PAYLOAD_LEN 100 +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #define NUM_PACKETS 4 #define START_SEQ 100 #define START_ACK 100 +#define SIP6 "fdaa::2" +#define DIP6 "fdaa::1" +#define SIP4 "192.168.1.200" +#define DIP4 "192.168.1.100" #define ETH_P_NONE 0 #define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) #define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) @@ -72,10 +75,6 @@ #define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) #define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) -static const char *addr6_src = "fdaa::2"; -static const char *addr6_dst = "fdaa::1"; -static const char *addr4_src = "192.168.1.200"; -static const char *addr4_dst = "192.168.1.100"; static int proto = -1; static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN]; static char *testname = "data"; @@ -179,18 +178,18 @@ static uint16_t tcp_checksum(void *buf, int payload_len) uint32_t sum = 0; if (proto == PF_INET6) { - if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1) + if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1) error(1, errno, "inet_pton6 source ip pseudo"); - if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1) + if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1) error(1, errno, "inet_pton6 dest ip pseudo"); ph6.protocol = htons(IPPROTO_TCP); ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len); sum = checksum_nofold(&ph6, sizeof(ph6), 0); } else if (proto == PF_INET) { - if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1) + if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1) error(1, errno, "inet_pton source ip pseudo"); - if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1) + if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1) error(1, errno, "inet_pton dest ip pseudo"); ph4.protocol = htons(IPPROTO_TCP); ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len); @@ -230,9 +229,9 @@ static void fill_networklayer(void *buf, int payload_len) ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); ip6h->nexthdr = IPPROTO_TCP; ip6h->hop_limit = 8; - if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1) + if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1) error(1, errno, "inet_pton source ip6"); - if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1) + if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1) error(1, errno, "inet_pton dest ip6"); } else if (proto == PF_INET) { memset(iph, 0, sizeof(*iph)); @@ -244,9 +243,9 @@ static void fill_networklayer(void *buf, int payload_len) iph->tot_len = htons(sizeof(struct tcphdr) + payload_len + sizeof(struct iphdr)); iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ - if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1) + if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1) error(1, errno, "inet_pton source ip"); - if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1) + if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1) error(1, errno, "inet_pton dest ip"); iph->check = checksum_fold(buf, sizeof(struct iphdr), 0); } @@ -732,7 +731,7 @@ static void set_timeout(int fd) { struct timeval timeout; - timeout.tv_sec = 3; + timeout.tv_sec = 120; timeout.tv_usec = 0; if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout)) < 0) @@ -1024,13 +1023,11 @@ static void gro_receiver(void) static void parse_args(int argc, char **argv) { static const struct option opts[] = { - { "daddr", required_argument, NULL, 'd' }, { "dmac", required_argument, NULL, 'D' }, { "iface", required_argument, NULL, 'i' }, { "ipv4", no_argument, NULL, '4' }, { "ipv6", no_argument, NULL, '6' }, { "rx", no_argument, NULL, 'r' }, - { "saddr", required_argument, NULL, 's' }, { "smac", required_argument, NULL, 'S' }, { "test", required_argument, NULL, 't' }, { "verbose", no_argument, NULL, 'v' }, @@ -1038,7 +1035,7 @@ static void parse_args(int argc, char **argv) }; int c; - while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) { switch (c) { case '4': proto = PF_INET; @@ -1048,9 +1045,6 @@ static void parse_args(int argc, char **argv) proto = PF_INET6; ethhdr_proto = htons(ETH_P_IPV6); break; - case 'd': - addr4_dst = addr6_dst = optarg; - break; case 'D': dmac = optarg; break; @@ -1060,9 +1054,6 @@ static void parse_args(int argc, char **argv) case 'r': tx_socket = false; break; - case 's': - addr4_src = addr6_src = optarg; - break; case 'S': smac = optarg; break; @@ -1100,7 +1091,5 @@ int main(int argc, char **argv) gro_sender(); else gro_receiver(); - - fprintf(stderr, "Gro::%s test passed.\n", testname); return 0; } diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index a2b9fad5a9..a2489ec398 100644 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -6,7 +6,7 @@ # This script evaluates the IOAM insertion for IPv6 by checking the IOAM data # consistency directly inside packets on the receiver side. Tests are divided # into three categories: OUTPUT (evaluates the IOAM processing by the sender), -# INPUT (evaluates the IOAM processing by a receiver) and GLOBAL (evaluates +# INPUT (evaluates the IOAM processing by the receiver) and GLOBAL (evaluates # wider use cases that do not fall into the other two categories). Both OUTPUT # and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL # tests use the entire three-node topology (alpha, beta, gamma). Each test is @@ -200,7 +200,7 @@ check_kernel_compatibility() ip -netns ioam-tmp-node link set veth0 up ip -netns ioam-tmp-node link set veth1 up - ip -netns ioam-tmp-node ioam namespace add 0 + ip -netns ioam-tmp-node ioam namespace add 0 &>/dev/null ns_ad=$? ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0" @@ -214,11 +214,11 @@ check_kernel_compatibility() exit 1 fi - ip -netns ioam-tmp-node route add db02::/64 encap ioam6 mode inline \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 + ip -netns ioam-tmp-node route add db02::/64 encap ioam6 trace prealloc \ + type 0x800000 ns 0 size 4 dev veth0 &>/dev/null tr_ad=$? - ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6" + ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6 trace" tr_sh=$? if [[ $tr_ad != 0 || $tr_sh != 0 ]] @@ -232,30 +232,6 @@ check_kernel_compatibility() ip link del veth0 2>/dev/null || true ip netns del ioam-tmp-node || true - - lsmod | grep -q "ip6_tunnel" - ip6tnl_loaded=$? - - if [ $ip6tnl_loaded = 0 ] - then - encap_tests=0 - else - modprobe ip6_tunnel &>/dev/null - lsmod | grep -q "ip6_tunnel" - encap_tests=$? - - if [ $encap_tests != 0 ] - then - ip a | grep -q "ip6tnl0" - encap_tests=$? - - if [ $encap_tests != 0 ] - then - echo "Note: ip6_tunnel not found neither as a module nor inside the" \ - "kernel, tests that require it (encap mode) will be omitted" - fi - fi - fi } cleanup() @@ -266,11 +242,6 @@ cleanup() ip netns del ioam-node-alpha || true ip netns del ioam-node-beta || true ip netns del ioam-node-gamma || true - - if [ $ip6tnl_loaded != 0 ] - then - modprobe -r ip6_tunnel 2>/dev/null || true - fi } setup() @@ -358,12 +329,6 @@ log_test_failed() printf "TEST: %-60s [FAIL]\n" "${desc}" } -log_results() -{ - echo "- Tests passed: ${npassed}" - echo "- Tests failed: ${nfailed}" -} - run_test() { local name=$1 @@ -384,26 +349,16 @@ run_test() ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null if [ $? != 0 ] then - nfailed=$((nfailed+1)) log_test_failed "${desc}" kill -2 $spid &>/dev/null else wait $spid - if [ $? = 0 ] - then - npassed=$((npassed+1)) - log_test_passed "${desc}" - else - nfailed=$((nfailed+1)) - log_test_failed "${desc}" - fi + [ $? = 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" fi } run() { - echo - printf "%0.s-" {1..74} echo echo "OUTPUT tests" printf "%0.s-" {1..74} @@ -414,8 +369,7 @@ run() for t in $TESTS_OUTPUT do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" + $t done # clean OUTPUT settings @@ -423,8 +377,6 @@ run() ip -netns ioam-node-alpha route change db01::/64 dev veth0 - echo - printf "%0.s-" {1..74} echo echo "INPUT tests" printf "%0.s-" {1..74} @@ -435,8 +387,7 @@ run() for t in $TESTS_INPUT do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" + $t done # clean INPUT settings @@ -445,8 +396,7 @@ run() ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]} ip -netns ioam-node-alpha route change db01::/64 dev veth0 - echo - printf "%0.s-" {1..74} + echo echo "GLOBAL tests" printf "%0.s-" {1..74} @@ -454,12 +404,8 @@ run() for t in $TESTS_GLOBAL do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" + $t done - - echo - log_results } bit2type=( @@ -485,16 +431,11 @@ out_undef_ns() ############################################################################## local desc="Unknown IOAM namespace" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0x800000 ns 0 size 4 dev veth0 - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 - - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0x800000 0 - - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0x800000 0 } out_no_room() @@ -505,16 +446,11 @@ out_no_room() ############################################################################## local desc="Missing trace room" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0xc00000 ns 123 size 4 dev veth0 - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xc00000 123 - - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0xc00000 123 } out_bits() @@ -529,13 +465,10 @@ out_bits() local tmp=${bit2size[22]} bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up - for i in {0..22} do - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \ + prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ dev veth0 &>/dev/null local cmd_res=$? @@ -552,13 +485,11 @@ out_bits() log_test_failed "$descr" fi else - run_test "out_bit$i" "$descr ($1 mode)" ioam-node-alpha \ - ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + run_test "out_bit$i" "$descr" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 ${bit2type[$i]} 123 fi done - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down - bit2size[22]=$tmp } @@ -570,16 +501,11 @@ out_full_supp_trace() ############################################################################## local desc="Full supported trace" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0xfff002 ns 123 size 100 dev veth0 - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 100 dev veth0 - - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xfff002 123 - - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0xfff002 123 } @@ -600,16 +526,11 @@ in_undef_ns() ############################################################################## local desc="Unknown IOAM namespace" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0x800000 ns 0 size 4 dev veth0 - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 - - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0x800000 0 - - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0x800000 0 } in_no_room() @@ -620,16 +541,11 @@ in_no_room() ############################################################################## local desc="Missing trace room" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0xc00000 ns 123 size 4 dev veth0 - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xc00000 123 - - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0xc00000 123 } in_bits() @@ -644,21 +560,15 @@ in_bits() local tmp=${bit2size[22]} bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up - for i in {0..11} {22..22} do - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ - dev veth0 + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \ + prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0 - run_test "in_bit$i" "${desc//$i} ($1 mode)" ioam-node-alpha \ - ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + run_test "in_bit$i" "${desc//$i}" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 ${bit2type[$i]} 123 done - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down - bit2size[22]=$tmp } @@ -675,16 +585,11 @@ in_oflag() # back the IOAM namespace that was previously configured on the sender. ip -netns ioam-node-alpha ioam namespace add 123 - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0xc00000 ns 123 size 4 dev veth0 - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xc00000 123 - - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0xc00000 123 # And we clean the exception for this test to get things back to normal for # other INPUT tests @@ -699,16 +604,11 @@ in_full_supp_trace() ############################################################################## local desc="Full supported trace" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0xfff002 ns 123 size 80 dev veth0 - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 80 dev veth0 - - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xfff002 123 - - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0xfff002 123 } @@ -727,16 +627,11 @@ fwd_full_supp_trace() ############################################################################## local desc="Forward - Full supported trace" - [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 up + ip -netns ioam-node-alpha route change db02::/64 encap ioam6 trace prealloc \ + type 0xfff002 ns 123 size 244 via db01::1 dev veth0 - ip -netns ioam-node-alpha route change db02::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 - - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-gamma \ - db01::2 db02::2 veth0 0xfff002 123 - - [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 down + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-gamma db01::2 \ + db02::2 veth0 0xfff002 123 } @@ -746,9 +641,6 @@ fwd_full_supp_trace() # # ################################################################################ -npassed=0 -nfailed=0 - if [ "$(id -u)" -ne 0 ] then echo "SKIP: Need root privileges" diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index d9d1d41901..8f6997d358 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -240,8 +240,11 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, *p += sizeof(__u32); } - if (ioam6h->type.bit6) + if (ioam6h->type.bit6) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; *p += sizeof(__u32); + } if (ioam6h->type.bit7) { if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index cc10c10c5e..3d7dde2c32 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -41,6 +41,7 @@ #define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__) +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) #define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */ diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index 49daae73c4..260336d5f0 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only mptcp_connect -mptcp_inq -mptcp_sockopt pm_nl_ctl *.pcap diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 0356c4501c..f1464f09b0 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -8,7 +8,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh -TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq +TEST_GEN_FILES = mptcp_connect pm_nl_ctl TEST_FILES := settings diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index d36b7da508..0faaccd214 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -9,13 +9,9 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m CONFIG_NF_TABLES=m +CONFIG_NFT_COUNTER=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NF_TABLES_INET=y -CONFIG_NFT_TPROXY=m -CONFIG_NFT_SOCKET=m -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 8628aa61b7..89c4753c27 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -14,9 +14,7 @@ #include #include #include -#include -#include #include #include #include @@ -29,7 +27,6 @@ #include #include -#include extern int optind; @@ -61,65 +58,40 @@ static enum cfg_peek cfg_peek = CFG_NONE_PEEK; static const char *cfg_host; static const char *cfg_port = "12000"; static int cfg_sock_proto = IPPROTO_MPTCP; +static bool tcpulp_audit; static int pf = AF_INET; static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; static bool cfg_remove; -static unsigned int cfg_time; static unsigned int cfg_do_w; static int cfg_wait; static uint32_t cfg_mark; -static char *cfg_input; -static int cfg_repeat = 1; struct cfg_cmsg_types { unsigned int cmsg_enabled:1; unsigned int timestampns:1; - unsigned int tcp_inq:1; }; -struct cfg_sockopt_types { - unsigned int transparent:1; -}; - -struct tcp_inq_state { - unsigned int last; - bool expect_eof; -}; - -static struct tcp_inq_state tcp_inq; - static struct cfg_cmsg_types cfg_cmsg_types; -static struct cfg_sockopt_types cfg_sockopt_types; static void die_usage(void) { - fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-i file] [-I num] [-j] [-l] " - "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-j] [-l] [-r num] " - "[-s MPTCP|TCP] [-S num] [-r num] [-t num] [-T num] [-u] [-w sec] connect_address\n"); + fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" + "[-l] [-w sec] connect_address\n"); fprintf(stderr, "\t-6 use ipv6\n"); - fprintf(stderr, "\t-c cmsg -- test cmsg type \n"); - fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin"); - fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num " - "incoming connections, in client mode, disconnect and reconnect to the server\n"); - fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down " - "-- for MPJ tests\n"); - fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n"); + fprintf(stderr, "\t-t num -- set poll timeout to num\n"); + fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); + fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); + fprintf(stderr, "\t-p num -- use port num\n"); + fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); fprintf(stderr, "\t-M mark -- set socket packet mark\n"); - fprintf(stderr, "\t-o option -- test sockopt